From cb1a5bae568450b96c6828f235589cb42ce1dc81 Mon Sep 17 00:00:00 2001
From: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Date: Mon, 22 Jan 2018 18:04:35 +0100
Subject: [PATCH 0001/1678] mac80211_hwsim: add permanent mac address option
 for new radios

If simulation needs predictable permanent mac addresses of hwsim wireless
phy, this patch add the ability to create a new radio with a user defined
permanent mac address. Allowed mac addresses needs to be locally
administrated mac addresses (as also the former fixed 42:* and 02:* were).

To do not break the operation with legacy software using hwsim, the new
address is set twice. The problem here is, the netlink call backs use
wiphy->addresses[1] as identification of a radio and not the proposed
permanent address (wiphy->addresses[0]). This design decision is not
documented in the kernel repo, therefore this patch simply reproduces this,
but with the same address.

Signed-off-by: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
[make pointer const]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 43 +++++++++++++++++++++------
 drivers/net/wireless/mac80211_hwsim.h |  9 +++++-
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1cf22e62e3dd..12b727ba9dec 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -637,6 +637,7 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = {
 	[HWSIM_ATTR_RADIO_NAME] = { .type = NLA_STRING },
 	[HWSIM_ATTR_NO_VIF] = { .type = NLA_FLAG },
 	[HWSIM_ATTR_FREQ] = { .type = NLA_U32 },
+	[HWSIM_ATTR_PERM_ADDR] = { .type = NLA_UNSPEC, .len = ETH_ALEN },
 };
 
 static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
@@ -2408,6 +2409,7 @@ struct hwsim_new_radio_params {
 	bool destroy_on_close;
 	const char *hwname;
 	bool no_vif;
+	const u8 *perm_addr;
 };
 
 static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb,
@@ -2572,15 +2574,25 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	skb_queue_head_init(&data->pending);
 
 	SET_IEEE80211_DEV(hw, data->dev);
-	eth_zero_addr(addr);
-	addr[0] = 0x02;
-	addr[3] = idx >> 8;
-	addr[4] = idx;
-	memcpy(data->addresses[0].addr, addr, ETH_ALEN);
-	memcpy(data->addresses[1].addr, addr, ETH_ALEN);
-	data->addresses[1].addr[0] |= 0x40;
-	hw->wiphy->n_addresses = 2;
-	hw->wiphy->addresses = data->addresses;
+	if (!param->perm_addr) {
+		eth_zero_addr(addr);
+		addr[0] = 0x02;
+		addr[3] = idx >> 8;
+		addr[4] = idx;
+		memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+		/* Why need here second address ? */
+		data->addresses[1].addr[0] |= 0x40;
+		memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
+		/* possible address clash is checked at hash table insertion */
+	} else {
+		memcpy(data->addresses[0].addr, param->perm_addr, ETH_ALEN);
+		/* compatibility with automatically generated mac addr */
+		memcpy(data->addresses[1].addr, param->perm_addr, ETH_ALEN);
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
+	}
 
 	data->channels = param->channels;
 	data->use_chanctx = param->use_chanctx;
@@ -3210,6 +3222,19 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		param.regd = hwsim_world_regdom_custom[idx];
 	}
 
+	if (info->attrs[HWSIM_ATTR_PERM_ADDR]) {
+		if (!is_valid_ether_addr(
+				nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]))) {
+			GENL_SET_ERR_MSG(info,"MAC is no valid source addr");
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[HWSIM_ATTR_PERM_ADDR]);
+			return -EINVAL;
+		}
+
+
+		param.perm_addr = nla_data(info->attrs[HWSIM_ATTR_PERM_ADDR]);
+	}
+
 	ret = mac80211_hwsim_new_radio(info, &param);
 	kfree(hwname);
 	return ret;
diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h
index a96a79c1eff5..0fe3199f8c72 100644
--- a/drivers/net/wireless/mac80211_hwsim.h
+++ b/drivers/net/wireless/mac80211_hwsim.h
@@ -68,7 +68,12 @@ enum hwsim_tx_control_flags {
  *	%HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
  * @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters,
  *	returns the radio ID (>= 0) or negative on errors, if successful
- *	then multicast the result
+ *	then multicast the result, uses optional parameter:
+ *	%HWSIM_ATTR_REG_STRICT_REG, %HWSIM_ATTR_SUPPORT_P2P_DEVICE,
+ *	%HWSIM_ATTR_DESTROY_RADIO_ON_CLOSE, %HWSIM_ATTR_CHANNELS,
+ *	%HWSIM_ATTR_NO_VIF, %HWSIM_ATTR_RADIO_NAME, %HWSIM_ATTR_USE_CHANCTX,
+ *	%HWSIM_ATTR_REG_HINT_ALPHA2, %HWSIM_ATTR_REG_CUSTOM_REG,
+ *	%HWSIM_ATTR_PERM_ADDR
  * @HWSIM_CMD_DEL_RADIO: destroy a radio, reply is multicasted
  * @HWSIM_CMD_GET_RADIO: fetch information about existing radios, uses:
  *	%HWSIM_ATTR_RADIO_ID
@@ -126,6 +131,7 @@ enum {
  * @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received.
  * @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding
  *	rates of %HWSIM_ATTR_TX_INFO
+ * @HWSIM_ATTR_PERM_ADDR: permanent mac address of new radio
  * @__HWSIM_ATTR_MAX: enum limit
  */
 
@@ -153,6 +159,7 @@ enum {
 	HWSIM_ATTR_FREQ,
 	HWSIM_ATTR_PAD,
 	HWSIM_ATTR_TX_INFO_FLAGS,
+	HWSIM_ATTR_PERM_ADDR,
 	__HWSIM_ATTR_MAX,
 };
 #define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1)

From 301b04040c3ea2512b8936f419efbbaef2645ccb Mon Sep 17 00:00:00 2001
From: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Date: Mon, 22 Jan 2018 18:04:36 +0100
Subject: [PATCH 0002/1678] mac80211_hwsim: add nl_err_msg in hwsim_new_radio
 in netlink case

Add a NL_ERR_MSG in case of creating a radio by a netlink message to give
clear output to the creating process instead of creating only a debug
message in kernel log. The same function is used for the creation while
module load, so keep the old message, although it should never be thrown
while load, because the module controls all mac addresses.

Signed-off-by: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
[remove message, add missing braces]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 12b727ba9dec..6996dad24e71 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2797,8 +2797,11 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
 				     hwsim_rht_params);
 	if (err < 0) {
-		pr_debug("mac80211_hwsim: radio index %d already present\n",
-			 idx);
+		if (info) {
+			GENL_SET_ERR_MSG(info, "perm addr already present");
+			NL_SET_BAD_ATTR(info->extack,
+					info->attrs[HWSIM_ATTR_PERM_ADDR]);
+		}
 		spin_unlock_bh(&hwsim_radio_lock);
 		goto failed_final_insert;
 	}

From 6335698e24ec11e1324b916177da6721df724dd8 Mon Sep 17 00:00:00 2001
From: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Date: Mon, 22 Jan 2018 18:04:37 +0100
Subject: [PATCH 0003/1678] mac80211_hwsim: add generation count for netlink
 dump operation

Make the dump operation aware of changes on radio list and corresponding
inconsistent dumps. Changed variable name for better understanding.

Signed-off-by: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
[compress ternary operator]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 31 +++++++++++++++++++++------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 6996dad24e71..f0379223ec6d 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -493,6 +493,7 @@ static LIST_HEAD(hwsim_radios);
 static struct workqueue_struct *hwsim_wq;
 static struct rhashtable hwsim_radios_rht;
 static int hwsim_radio_idx;
+static int hwsim_radios_generation = 1;
 
 static struct platform_driver mac80211_hwsim_driver = {
 	.driver = {
@@ -2807,6 +2808,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	}
 
 	list_add_tail(&data->list, &hwsim_radios);
+	hwsim_radios_generation++;
 	spin_unlock_bh(&hwsim_radio_lock);
 
 	if (idx > 0)
@@ -3277,6 +3279,7 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		list_del(&data->list);
 		rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
 				       hwsim_rht_params);
+		hwsim_radios_generation++;
 		spin_unlock_bh(&hwsim_radio_lock);
 		mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
 					 info);
@@ -3333,17 +3336,19 @@ out_err:
 static int hwsim_dump_radio_nl(struct sk_buff *skb,
 			       struct netlink_callback *cb)
 {
-	int idx = cb->args[0];
+	int last_idx = cb->args[0];
 	struct mac80211_hwsim_data *data = NULL;
-	int res;
+	int res = 0;
+	void *hdr;
 
 	spin_lock_bh(&hwsim_radio_lock);
+	cb->seq = hwsim_radios_generation;
 
-	if (idx == hwsim_radio_idx)
+	if (last_idx >= hwsim_radio_idx-1)
 		goto done;
 
 	list_for_each_entry(data, &hwsim_radios, list) {
-		if (data->idx < idx)
+		if (data->idx <= last_idx)
 			continue;
 
 		if (!net_eq(wiphy_net(data->hw->wiphy), sock_net(skb->sk)))
@@ -3356,14 +3361,25 @@ static int hwsim_dump_radio_nl(struct sk_buff *skb,
 		if (res < 0)
 			break;
 
-		idx = data->idx + 1;
+		last_idx = data->idx;
 	}
 
-	cb->args[0] = idx;
+	cb->args[0] = last_idx;
+
+	/* list changed, but no new element sent, set interrupted flag */
+	if (skb->len == 0 && cb->prev_seq && cb->seq != cb->prev_seq) {
+		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, &hwsim_genl_family,
+				  NLM_F_MULTI, HWSIM_CMD_GET_RADIO);
+		if (!hdr)
+			res = -EMSGSIZE;
+		genl_dump_check_consistent(cb, hdr);
+		genlmsg_end(skb, hdr);
+	}
 
 done:
 	spin_unlock_bh(&hwsim_radio_lock);
-	return skb->len;
+	return res ?: skb->len;
 }
 
 /* Generic Netlink operations array */
@@ -3421,6 +3437,7 @@ static void destroy_radio(struct work_struct *work)
 	struct mac80211_hwsim_data *data =
 		container_of(work, struct mac80211_hwsim_data, destroy_work);
 
+	hwsim_radios_generation++;
 	mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL);
 }
 

From 5037a00992e5fcb3d8509964313565a3dab6697c Mon Sep 17 00:00:00 2001
From: Sunil Dutt <usdutt@qti.qualcomm.com>
Date: Thu, 25 Jan 2018 17:13:37 +0200
Subject: [PATCH 0004/1678] nl80211: Introduce scan flags to emphasize
 requested scan behavior

This commit defines new scan flags (LOW_SPAN, LOW_POWER, HIGH_LATENCY)
to emphasize the requested scan behavior for the driver. These flags
are optional and are mutually exclusive. The implementation of the
respective functionality can be driver/hardware specific.

These flags can be used to control the compromise between how long
a scan takes, how much power it uses, and high accurate/complete
the scan is in finding the BSSs.

Signed-off-by: Sunil Dutt <usdutt@qti.qualcomm.com>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 28 +++++++++++++++++++++++++++-
 net/wireless/nl80211.c       | 13 +++++++++++--
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c587a61c32bf..6f60503fa617 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4945,6 +4945,9 @@ enum nl80211_feature_flags {
  *	probe request tx deferral and suppression
  * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
  *	value in %NL80211_ATTR_USE_MFP.
+ * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
+ * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
+ * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4972,6 +4975,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
 	NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
 	NL80211_EXT_FEATURE_MFP_OPTIONAL,
+	NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
+	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
+	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -5032,6 +5038,10 @@ enum nl80211_timeout_reason {
  * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN
  * requests.
  *
+ * NL80211_SCAN_FLAG_LOW_SPAN, NL80211_SCAN_FLAG_LOW_POWER, and
+ * NL80211_SCAN_FLAG_HIGH_ACCURACY flags are exclusive of each other, i.e., only
+ * one of them can be used in the request.
+ *
  * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority
  * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning
  * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured
@@ -5059,7 +5069,20 @@ enum nl80211_timeout_reason {
  *	and suppression (if it has received a broadcast Probe Response frame,
  *	Beacon frame or FILS Discovery frame from an AP that the STA considers
  *	a suitable candidate for (re-)association - suitable in terms of
- *	SSID and/or RSSI
+ *	SSID and/or RSSI.
+ * @NL80211_SCAN_FLAG_LOW_SPAN: Span corresponds to the total time taken to
+ *	accomplish the scan. Thus, this flag intends the driver to perform the
+ *	scan request with lesser span/duration. It is specific to the driver
+ *	implementations on how this is accomplished. Scan accuracy may get
+ *	impacted with this flag.
+ * @NL80211_SCAN_FLAG_LOW_POWER: This flag intends the scan attempts to consume
+ *	optimal possible power. Drivers can resort to their specific means to
+ *	optimize the power. Scan accuracy may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_HIGH_ACCURACY: Accuracy here intends to the extent of scan
+ *	results obtained. Thus HIGH_ACCURACY scan flag aims to get maximum
+ *	possible scan results. This flag hints the driver to use the best
+ *	possible scan configuration to improve the accuracy in scanning.
+ *	Latency and power use may get impacted with this flag.
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -5070,6 +5093,9 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP		= 1<<5,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE		= 1<<6,
 	NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION	= 1<<7,
+	NL80211_SCAN_FLAG_LOW_SPAN				= 1<<8,
+	NL80211_SCAN_FLAG_LOW_POWER				= 1<<9,
+	NL80211_SCAN_FLAG_HIGH_ACCURACY				= 1<<10,
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ab0c687d0c44..dd249ec9f228 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6715,8 +6715,17 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 
 	*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
 
-	if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
-	    !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+	if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+	     !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
+	    ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
+	     !wiphy_ext_feature_isset(wiphy,
+				      NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
 		return -EOPNOTSUPP;
 
 	if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {

From 40cbfa90218bc570a7959b436b9d48a18c361041 Mon Sep 17 00:00:00 2001
From: Srinivas Dasari <dasaris@qti.qualcomm.com>
Date: Thu, 25 Jan 2018 17:13:38 +0200
Subject: [PATCH 0005/1678] cfg80211/nl80211: Optional authentication offload
 to userspace

This interface allows the host driver to offload the authentication to
user space. This is exclusively defined for host drivers that do not
define separate commands for authentication and association, but rely on
userspace SME (e.g., in wpa_supplicant for the ~WPA_DRIVER_FLAGS_SME
case) for the authentication to happen. This can be used to implement
SAE without full implementation in the kernel/firmware while still being
able to use NL80211_CMD_CONNECT with driver-based BSS selection.

Host driver sends NL80211_CMD_EXTERNAL_AUTH event to start/abort
authentication to the port on which connect is triggered and status
of authentication is further indicated by user space to host
driver through the same command response interface.

User space entities advertise this capability through the
NL80211_ATTR_EXTERNAL_AUTH_SUPP flag in the NL80211_CMD_CONNECT request.
Host drivers shall look at this capability to offload the authentication.

Signed-off-by: Srinivas Dasari <dasaris@qti.qualcomm.com>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[add socket connection ownership check]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 54 +++++++++++++++++++--
 include/uapi/linux/nl80211.h | 47 ++++++++++++++++++
 net/wireless/nl80211.c       | 94 ++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      | 15 ++++++
 net/wireless/trace.h         | 23 +++++++++
 5 files changed, 230 insertions(+), 3 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 81174f9b8d14..68def3e5b013 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1905,11 +1905,16 @@ struct cfg80211_auth_request {
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
  * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
  * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
+ * @CONNECT_REQ_EXTERNAL_AUTH_SUPPORT: User space indicates external
+ *	authentication capability. Drivers can offload authentication to
+ *	userspace if this flag is set. Only applicable for cfg80211_connect()
+ *	request (connect callback).
  */
 enum cfg80211_assoc_req_flags {
-	ASSOC_REQ_DISABLE_HT		= BIT(0),
-	ASSOC_REQ_DISABLE_VHT		= BIT(1),
-	ASSOC_REQ_USE_RRM		= BIT(2),
+	ASSOC_REQ_DISABLE_HT			= BIT(0),
+	ASSOC_REQ_DISABLE_VHT			= BIT(1),
+	ASSOC_REQ_USE_RRM			= BIT(2),
+	CONNECT_REQ_EXTERNAL_AUTH_SUPPORT	= BIT(3),
 };
 
 /**
@@ -2600,6 +2605,33 @@ struct cfg80211_pmk_conf {
 	const u8 *pmk_r0_name;
 };
 
+/**
+ * struct cfg80211_external_auth_params - Trigger External authentication.
+ *
+ * Commonly used across the external auth request and event interfaces.
+ *
+ * @action: action type / trigger for external authentication. Only significant
+ *	for the authentication request event interface (driver to user space).
+ * @bssid: BSSID of the peer with which the authentication has
+ *	to happen. Used by both the authentication request event and
+ *	authentication response command interface.
+ * @ssid: SSID of the AP.  Used by both the authentication request event and
+ *	authentication response command interface.
+ * @key_mgmt_suite: AKM suite of the respective authentication. Used by the
+ *	authentication request event interface.
+ * @status: status code, %WLAN_STATUS_SUCCESS for successful authentication,
+ *	use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
+ *	the real status code for failures. Used only for the authentication
+ *	response command interface (user space to driver).
+ */
+struct cfg80211_external_auth_params {
+	enum nl80211_external_auth_action action;
+	u8 bssid[ETH_ALEN] __aligned(2);
+	struct cfg80211_ssid ssid;
+	unsigned int key_mgmt_suite;
+	u16 status;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -2923,6 +2955,9 @@ struct cfg80211_pmk_conf {
  *	(invoked with the wireless_dev mutex held)
  * @del_pmk: delete the previously configured PMK for the given authenticator.
  *	(invoked with the wireless_dev mutex held)
+ *
+ * @external_auth: indicates result of offloaded authentication processing from
+ *     user space
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3216,6 +3251,8 @@ struct cfg80211_ops {
 			   const struct cfg80211_pmk_conf *conf);
 	int	(*del_pmk)(struct wiphy *wiphy, struct net_device *dev,
 			   const u8 *aa);
+	int     (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
+				 struct cfg80211_external_auth_params *params);
 };
 
 /*
@@ -6202,6 +6239,17 @@ void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
+/**
+ * cfg80211_external_auth_request - userspace request for authentication
+ * @netdev: network device
+ * @params: External authentication parameters
+ * @gfp: allocation flags
+ * Returns: 0 on success, < 0 on error
+ */
+int cfg80211_external_auth_request(struct net_device *netdev,
+				   struct cfg80211_external_auth_params *params,
+				   gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6f60503fa617..c2342456cf16 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -992,6 +992,27 @@
  *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
+ * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
+ *	drivers that do not define separate commands for authentication and
+ *	association, but rely on user space for the authentication to happen.
+ *	This interface acts both as the event request (driver to user space)
+ *	to trigger the authentication and command response (userspace to
+ *	driver) to indicate the authentication status.
+ *
+ *	User space uses the %NL80211_CMD_CONNECT command to the host driver to
+ *	trigger a connection. The host driver selects a BSS and further uses
+ *	this interface to offload only the authentication part to the user
+ *	space. Authentication frames are passed between the driver and user
+ *	space through the %NL80211_CMD_FRAME interface. Host driver proceeds
+ *	further with the association after getting successful authentication
+ *	status. User space indicates the authentication status through
+ *	%NL80211_ATTR_STATUS_CODE attribute in %NL80211_CMD_EXTERNAL_AUTH
+ *	command interface.
+ *
+ *	Host driver reports this status on an authentication failure to the
+ *	user space through the connect result as the user space would have
+ *	initiated the connection through the connect request.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1198,6 +1219,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_RELOAD_REGDB,
 
+	NL80211_CMD_EXTERNAL_AUTH,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2153,6 +2176,16 @@ enum nl80211_commands {
  * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
  * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
  *
+ * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external
+ *     authentication operation (u32 attribute with an
+ *     &enum nl80211_external_auth_action value). This is used with the
+ *     &NL80211_CMD_EXTERNAL_AUTH request event.
+ * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
+ *     space supports external authentication. This attribute shall be used
+ *     only with %NL80211_CMD_CONNECT request. The driver may offload
+ *     authentication processing to user space if this capability is indicated
+ *     in NL80211_CMD_CONNECT requests from the user space.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2579,6 +2612,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMKR0_NAME,
 	NL80211_ATTR_PORT_AUTHORIZED,
 
+	NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5495,4 +5531,15 @@ enum nl80211_nan_match_attributes {
 	NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
 };
 
+/**
+ * nl80211_external_auth_action - Action to perform with external
+ *     authentication request. Used by NL80211_ATTR_EXTERNAL_AUTH_ACTION.
+ * @NL80211_EXTERNAL_AUTH_START: Start the authentication.
+ * @NL80211_EXTERNAL_AUTH_ABORT: Abort the ongoing authentication.
+ */
+enum nl80211_external_auth_action {
+	NL80211_EXTERNAL_AUTH_START,
+	NL80211_EXTERNAL_AUTH_ABORT,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index dd249ec9f228..aa6b64069c80 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -420,6 +420,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_FILS_CACHE_ID] = { .len = 2 },
 	[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
 	[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
+	[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -9161,6 +9162,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) {
+		if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+			GENL_SET_ERR_MSG(info,
+					 "external auth requires connection ownership");
+			return -EINVAL;
+		}
+		connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT;
+	}
+
 	wdev_lock(dev->ieee80211_ptr);
 
 	err = cfg80211_connect(rdev, dev, &connect, connkeys,
@@ -12469,6 +12479,41 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
+static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct cfg80211_external_auth_params params;
+
+	if (rdev->ops->external_auth)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_SSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_BSSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_STATUS_CODE])
+		return -EINVAL;
+
+	memset(&params, 0, sizeof(params));
+
+	params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+	if (params.ssid.ssid_len == 0 ||
+	    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+		return -EINVAL;
+	memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
+	       params.ssid.ssid_len);
+
+	memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
+	       ETH_ALEN);
+
+	params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
+
+	return rdev_external_auth(rdev, dev, &params);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -13364,6 +13409,14 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_EXTERNAL_AUTH,
+		.doit = nl80211_external_auth,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 
 };
 
@@ -15375,6 +15428,47 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
 	nlmsg_free(msg);
 }
 
+int cfg80211_external_auth_request(struct net_device *dev,
+				   struct cfg80211_external_auth_params *params,
+				   gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!wdev->conn_owner_nlportid)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+	    nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite) ||
+	    nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION,
+			params->action) ||
+	    nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
+	    nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
+		    params->ssid.ssid))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+			wdev->conn_owner_nlportid);
+	return 0;
+
+ nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(cfg80211_external_auth_request);
+
 /* initialisation/exit functions */
 
 int __init nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 0c06240d25af..84f23ae015fc 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1190,4 +1190,19 @@ static inline int rdev_del_pmk(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
+
+static inline int
+rdev_external_auth(struct cfg80211_registered_device *rdev,
+		   struct net_device *dev,
+		   struct cfg80211_external_auth_params *params)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_external_auth(&rdev->wiphy, dev, params);
+	if (rdev->ops->external_auth)
+		ret = rdev->ops->external_auth(&rdev->wiphy, dev, params);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bcfedd39e7a3..5152938b358d 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2319,6 +2319,29 @@ TRACE_EVENT(rdev_del_pmk,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
 );
 
+TRACE_EVENT(rdev_external_auth,
+	    TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		     struct cfg80211_external_auth_params *params),
+	    TP_ARGS(wiphy, netdev, params),
+	    TP_STRUCT__entry(WIPHY_ENTRY
+			     NETDEV_ENTRY
+			     MAC_ENTRY(bssid)
+			     __array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
+			     __field(u16, status)
+	    ),
+	    TP_fast_assign(WIPHY_ASSIGN;
+			   NETDEV_ASSIGN;
+			   MAC_ASSIGN(bssid, params->bssid);
+			   memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
+			   memcpy(__entry->ssid, params->ssid.ssid,
+				  params->ssid.ssid_len);
+			   __entry->status = params->status;
+	    ),
+	    TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+		      ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
+		      __entry->bssid, __entry->ssid, __entry->status)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/

From 10773a7c09b327d02144c7d181e6544b7015ffc7 Mon Sep 17 00:00:00 2001
From: Srinivas Dasari <dasaris@qti.qualcomm.com>
Date: Thu, 25 Jan 2018 17:13:39 +0200
Subject: [PATCH 0006/1678] nl80211: Allow SAE Authentication for
 NL80211_CMD_CONNECT

This commit allows SAE Authentication for NL80211_CMD_CONNECT
interface, provided host driver advertises the support.

Host drivers may offload the SAE authentication to user space
through NL80211_CMD_EXTERNAL_AUTH interface and thus expect
the user space to advertise support to handle offload through
NL80211_ATTR_EXTERNAL_AUTH_SUPPORT in NL80211_CMD_CONNECT
request. Such drivers should reject the connect request on no
offload support from user space.

Signed-off-by: Srinivas Dasari <dasaris@qti.qualcomm.com>
Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index aa6b64069c80..bdb70fe74e3c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3921,9 +3921,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
 			return false;
 		return true;
 	case NL80211_CMD_CONNECT:
-		/* SAE not supported yet */
-		if (auth_type == NL80211_AUTHTYPE_SAE)
+		if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
+		    auth_type == NL80211_AUTHTYPE_SAE)
 			return false;
+
 		/* FILS with SK PFS or PK not supported yet */
 		if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
 		    auth_type == NL80211_AUTHTYPE_FILS_PK)

From 25b0ba7ebbc5ffa5a64d752b85af78e6e517e3b2 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Fri, 26 Jan 2018 17:14:19 -0800
Subject: [PATCH 0007/1678] mac80211: Add txq flags to debugfs

Might help one figure out why aqm drivers may fail to transmit
frames sometimes.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs_sta.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 444ea8d127fe..4105081dc1df 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -160,12 +160,12 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 		       sta->cparams.ecn ? "yes" : "no");
 	p += scnprintf(p,
 		       bufsz+buf-p,
-		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
 		txqi = to_txq_info(sta->sta.txq[i]);
 		p += scnprintf(p, bufsz+buf-p,
-			       "%d %d %u %u %u %u %u %u %u %u %u\n",
+			       "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
 			       txqi->txq.tid,
 			       txqi->txq.ac,
 			       txqi->tin.backlog_bytes,
@@ -176,7 +176,11 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 			       txqi->tin.overlimit,
 			       txqi->tin.collisions,
 			       txqi->tin.tx_bytes,
-			       txqi->tin.tx_packets);
+			       txqi->tin.tx_packets,
+			       txqi->flags,
+			       txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
+			       txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
+			       txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
 	}
 
 	rcu_read_unlock();

From 466b9936bf93b7ec3bce1dcd493262ff0a8a4f44 Mon Sep 17 00:00:00 2001
From: "tamizhr@codeaurora.org" <tamizhr@codeaurora.org>
Date: Wed, 31 Jan 2018 16:24:49 +0530
Subject: [PATCH 0008/1678] cfg80211: Add support to notify station's opmode
 change to userspace

ht/vht action frames will be sent to AP from station to notify
change of its ht/vht opmode(max bandwidth, smps mode or nss) modified
values. Currently these valuse used by driver/firmware for rate control
algorithm. This patch introduces NL80211_CMD_STA_OPMODE_CHANGED
command to notify those modified/current supported values(max bandwidth,
smps mode, max nss) to userspace application. This will be useful for the
application like steering, which closely monitoring station's capability
changes. Since the application has taken these values during station
association.

Signed-off-by: Tamizh chelvam <tamizhr@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 43 ++++++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 12 ++++++++
 net/wireless/nl80211.c       | 55 ++++++++++++++++++++++++++++++++++++
 3 files changed, 110 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 68def3e5b013..7d49cd0cf92d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3553,6 +3553,35 @@ enum wiphy_vendor_command_flags {
 	WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2),
 };
 
+/**
+ * enum wiphy_opmode_flag - Station's ht/vht operation mode information flags
+ *
+ * @STA_OPMODE_MAX_BW_CHANGED: Max Bandwidth changed
+ * @STA_OPMODE_SMPS_MODE_CHANGED: SMPS mode changed
+ * @STA_OPMODE_N_SS_CHANGED: max N_SS (number of spatial streams) changed
+ *
+ */
+enum wiphy_opmode_flag {
+	STA_OPMODE_MAX_BW_CHANGED	= BIT(0),
+	STA_OPMODE_SMPS_MODE_CHANGED	= BIT(1),
+	STA_OPMODE_N_SS_CHANGED		= BIT(2),
+};
+
+/**
+ * struct sta_opmode_info - Station's ht/vht operation mode information
+ * @changed: contains value from &enum wiphy_opmode_flag
+ * @smps_mode: New SMPS mode of a station
+ * @bw: new max bandwidth value of a station
+ * @rx_nss: new rx_nss value of a station
+ */
+
+struct sta_opmode_info {
+	u32 changed;
+	u8 smps_mode;
+	u8 bw;
+	u8 rx_nss;
+};
+
 /**
  * struct wiphy_vendor_command - vendor command definition
  * @info: vendor command identifying information, as used in nl80211
@@ -5721,6 +5750,20 @@ void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp);
 void cfg80211_radar_event(struct wiphy *wiphy,
 			  struct cfg80211_chan_def *chandef, gfp_t gfp);
 
+/**
+ * cfg80211_sta_opmode_change_notify - STA's ht/vht operation mode change event
+ * @dev: network device
+ * @mac: MAC address of a station which opmode got modified
+ * @sta_opmode: station's current opmode value
+ * @gfp: context flags
+ *
+ * Driver should call this function when station's opmode modified via action
+ * frame.
+ */
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+				       struct sta_opmode_info *sta_opmode,
+				       gfp_t gfp);
+
 /**
  * cfg80211_cac_event - Channel availability check (CAC) event
  * @netdev: network device
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c2342456cf16..ca3d5a613fc0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1013,6 +1013,11 @@
  *	user space through the connect result as the user space would have
  *	initiated the connection through the connect request.
  *
+ * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's
+ *	ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE,
+ *	&NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its
+ *	address(specified in &NL80211_ATTR_MAC).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1221,6 +1226,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_EXTERNAL_AUTH,
 
+	NL80211_CMD_STA_OPMODE_CHANGED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -2186,6 +2193,9 @@ enum nl80211_commands {
  *     authentication processing to user space if this capability is indicated
  *     in NL80211_CMD_CONNECT requests from the user space.
  *
+ * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
+ *	u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2615,6 +2625,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_EXTERNAL_AUTH_ACTION,
 	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
 
+	NL80211_ATTR_NSS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bdb70fe74e3c..cc6ec5bab676 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14950,6 +14950,61 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac,
+				       struct sta_opmode_info *sta_opmode,
+				       gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	void *hdr;
+
+	if (WARN_ON(!mac))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw))
+		goto nla_put_failure;
+
+	if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) &&
+	    nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_MLME, gfp);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
+
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 			   u64 cookie, bool acked, gfp_t gfp)
 {

From ff84e7bfe1763982e1ada390386dbe4739cee1e2 Mon Sep 17 00:00:00 2001
From: "tamizhr@codeaurora.org" <tamizhr@codeaurora.org>
Date: Wed, 31 Jan 2018 16:24:50 +0530
Subject: [PATCH 0009/1678] mac80211: Add support to notify ht/vht opmode
 modification.

This will add support to send an event to a userspace application
whenever station advertise its ht/vht opmode modification through
an action frame.

Signed-off-by: Tamizh chelvam <tamizhr@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c  | 14 ++++++++++++++
 net/mac80211/vht.c |  9 +++++++++
 2 files changed, 23 insertions(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fd580614085b..e755f93ad735 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2848,6 +2848,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		case WLAN_HT_ACTION_SMPS: {
 			struct ieee80211_supported_band *sband;
 			enum ieee80211_smps_mode smps_mode;
+			struct sta_opmode_info sta_opmode = {};
 
 			/* convert to HT capability */
 			switch (mgmt->u.action.u.ht_smps.smps_control) {
@@ -2868,17 +2869,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (rx->sta->sta.smps_mode == smps_mode)
 				goto handled;
 			rx->sta->sta.smps_mode = smps_mode;
+			sta_opmode.smps_mode = smps_mode;
+			sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
 
 			sband = rx->local->hw.wiphy->bands[status->band];
 
 			rate_control_rate_update(local, sband, rx->sta,
 						 IEEE80211_RC_SMPS_CHANGED);
+			cfg80211_sta_opmode_change_notify(sdata->dev,
+							  rx->sta->addr,
+							  &sta_opmode,
+							  GFP_KERNEL);
 			goto handled;
 		}
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
 			struct ieee80211_supported_band *sband;
 			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
 			enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+			struct sta_opmode_info sta_opmode = {};
 
 			/* If it doesn't support 40 MHz it can't change ... */
 			if (!(rx->sta->sta.ht_cap.cap &
@@ -2899,9 +2907,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 			rx->sta->sta.bandwidth = new_bw;
 			sband = rx->local->hw.wiphy->bands[status->band];
+			sta_opmode.bw = new_bw;
+			sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
 
 			rate_control_rate_update(local, sband, rx->sta,
 						 IEEE80211_RC_BW_CHANGED);
+			cfg80211_sta_opmode_change_notify(sdata->dev,
+							  rx->sta->addr,
+							  &sta_opmode,
+							  GFP_KERNEL);
 			goto handled;
 		}
 		default:
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index b9276ac849fa..5714dee76b12 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -447,6 +447,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 				  enum nl80211_band band)
 {
 	enum ieee80211_sta_rx_bandwidth new_bw;
+	struct sta_opmode_info sta_opmode = {};
 	u32 changed = 0;
 	u8 nss;
 
@@ -460,7 +461,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 
 	if (sta->sta.rx_nss != nss) {
 		sta->sta.rx_nss = nss;
+		sta_opmode.rx_nss = nss;
 		changed |= IEEE80211_RC_NSS_CHANGED;
+		sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
 	}
 
 	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
@@ -481,9 +484,15 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 	new_bw = ieee80211_sta_cur_vht_bw(sta);
 	if (new_bw != sta->sta.bandwidth) {
 		sta->sta.bandwidth = new_bw;
+		sta_opmode.bw = new_bw;
 		changed |= IEEE80211_RC_BW_CHANGED;
+		sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
 	}
 
+	if (sta_opmode.changed)
+		cfg80211_sta_opmode_change_notify(sdata->dev, sta->addr,
+						  &sta_opmode, GFP_KERNEL);
+
 	return changed;
 }
 

From 1ce0cec1c14cda7e514fa21b36c0f035203b447d Mon Sep 17 00:00:00 2001
From: Tedd Ho-Jeong An <tedd.an@intel.com>
Date: Mon, 5 Feb 2018 14:20:36 -0800
Subject: [PATCH 0010/1678] Bluetooth: btusb: Add support for Intel Bluetooth
 device 22560 [8087:0026]

The Intel Bluetooth device 22560 family (HarrisonPeak, QnJ, and IcyPeak)
use the same firmware loading mechanism as previous generation,
so include new USB product ID and whitelist the hardware variant.

T:  Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=12   MxCh= 0
D:  Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=8087 ProdID=0026 Rev= 0.01
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  64 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
I:  If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  63 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  63 Ivl=1ms

Signed-off-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 2a55380ad730..098967cb3fc9 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -339,6 +339,7 @@ static const struct usb_device_id blacklist_table[] = {
 
 	/* Intel Bluetooth devices */
 	{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW },
+	{ USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW },
 	{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
 	{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
@@ -2057,6 +2058,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	case 0x0c:	/* WsP */
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		break;
 	default:
 		BT_ERR("%s: Unsupported Intel hardware variant (%u)",
@@ -2149,6 +2152,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		break;
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
 			 le16_to_cpu(ver.hw_variant),
 			 le16_to_cpu(ver.hw_revision),
@@ -2180,6 +2185,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		break;
 	case 0x11:	/* JfP */
 	case 0x12:	/* ThP */
+	case 0x13:	/* HrP */
+	case 0x14:	/* QnJ, IcP */
 		snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
 			 le16_to_cpu(ver.hw_variant),
 			 le16_to_cpu(ver.hw_revision),

From 8b077bdba66d1d606eb1bdce3db87ff605aa7f1c Mon Sep 17 00:00:00 2001
From: Maxim Zhukov <mussitantesmortem@gmail.com>
Date: Mon, 5 Feb 2018 00:09:43 +0300
Subject: [PATCH 0011/1678] Bluetooth: ath3k: replace hardcode numbers with
 define

Replaced the numbers with a readable define.

Signed-off-by: Maxim Zhukov <mussitantesmortem@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/ath3k.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 204afe66de92..0a5cfea44529 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -227,15 +227,16 @@ static int ath3k_load_firmware(struct usb_device *udev,
 		return -ENOMEM;
 	}
 
-	memcpy(send_buf, firmware->data, 20);
+	memcpy(send_buf, firmware->data, FW_HDR_SIZE);
 	err = usb_control_msg(udev, pipe, USB_REQ_DFU_DNLOAD, USB_TYPE_VENDOR,
-			      0, 0, send_buf, 20, USB_CTRL_SET_TIMEOUT);
+			      0, 0, send_buf, FW_HDR_SIZE,
+			      USB_CTRL_SET_TIMEOUT);
 	if (err < 0) {
 		BT_ERR("Can't change to loading configuration err");
 		goto error;
 	}
-	sent += 20;
-	count -= 20;
+	sent += FW_HDR_SIZE;
+	count -= FW_HDR_SIZE;
 
 	pipe = usb_sndbulkpipe(udev, 0x02);
 

From b3baa2428dece073f7158b2eb616dc69a759ea8e Mon Sep 17 00:00:00 2001
From: Maxim Zhukov <mussitantesmortem@gmail.com>
Date: Mon, 5 Feb 2018 00:09:44 +0300
Subject: [PATCH 0012/1678] Bluetooth: ath3k: do not init variables

Do not need to initialize variables, because further on the code they
fall into the snprintf.

Signed-off-by: Maxim Zhukov <mussitantesmortem@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/ath3k.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 0a5cfea44529..b16c01a0b6d4 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -399,7 +399,7 @@ static int ath3k_set_normal_mode(struct usb_device *udev)
 static int ath3k_load_patch(struct usb_device *udev)
 {
 	unsigned char fw_state;
-	char filename[ATH3K_NAME_LEN] = {0};
+	char filename[ATH3K_NAME_LEN];
 	const struct firmware *firmware;
 	struct ath3k_version fw_version;
 	__u32 pt_rom_version, pt_build_version;
@@ -452,7 +452,7 @@ static int ath3k_load_patch(struct usb_device *udev)
 static int ath3k_load_syscfg(struct usb_device *udev)
 {
 	unsigned char fw_state;
-	char filename[ATH3K_NAME_LEN] = {0};
+	char filename[ATH3K_NAME_LEN];
 	const struct firmware *firmware;
 	struct ath3k_version fw_version;
 	int clk_value, ret;

From f13b3d7e81bd8b40a42c2fe5304b2fc53d638378 Mon Sep 17 00:00:00 2001
From: Maxim Zhukov <mussitantesmortem@gmail.com>
Date: Mon, 5 Feb 2018 00:09:45 +0300
Subject: [PATCH 0013/1678] Bluetooth: ath3k: remove blank line after if

Removed blank line after if.

Signed-off-by: Maxim Zhukov <mussitantesmortem@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/ath3k.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index b16c01a0b6d4..4df5b953a40d 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -523,7 +523,6 @@ static int ath3k_probe(struct usb_interface *intf,
 
 	/* load patch and sysconfig files for AR3012 */
 	if (id->driver_info & BTUSB_ATH3012) {
-
 		/* New firmware with patch and sysconfig files already loaded */
 		if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x0001)
 			return -ENODEV;

From 3d08f43c73c4144517e31f3cf7564f98e81fd28f Mon Sep 17 00:00:00 2001
From: Maxim Zhukov <mussitantesmortem@gmail.com>
Date: Mon, 5 Feb 2018 00:09:46 +0300
Subject: [PATCH 0014/1678] Bluetooth: ath3k: Fix warning: quoted string split
 across lines

This patch avoided the warning:
WARNING: quoted string split across lines
 #355: FILE: drivers/bluetooth/ath3k.c:355:
 +			BT_ERR("Error in firmware loading err = %d,"
 +				"len = %d, size = %d", err, len, size);

This patch fix this issue.

Signed-off-by: Maxim Zhukov <mussitantesmortem@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/ath3k.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 4df5b953a40d..8fe5ec4bb342 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -203,6 +203,12 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ }	/* Terminating entry */
 };
 
+static inline void ath3k_log_failed_loading(int err, int len, int size)
+{
+	BT_ERR("Error in firmware loading err = %d, len = %d, size = %d",
+			err, len, size);
+}
+
 #define USB_REQ_DFU_DNLOAD	1
 #define BULK_SIZE		4096
 #define FW_HDR_SIZE		20
@@ -251,8 +257,7 @@ static int ath3k_load_firmware(struct usb_device *udev,
 					&len, 3000);
 
 		if (err || (len != size)) {
-			BT_ERR("Error in firmware loading err = %d,"
-				"len = %d, size = %d", err, len, size);
+			ath3k_log_failed_loading(err, len, size);
 			goto error;
 		}
 
@@ -351,8 +356,7 @@ static int ath3k_load_fwfile(struct usb_device *udev,
 		err = usb_bulk_msg(udev, pipe, send_buf, size,
 					&len, 3000);
 		if (err || (len != size)) {
-			BT_ERR("Error in firmware loading err = %d,"
-				"len = %d, size = %d", err, len, size);
+			ath3k_log_failed_loading(err, len, size);
 			kfree(send_buf);
 			return err;
 		}

From 0a21963aacfe88fdbfe6166781d4ccd159197bd6 Mon Sep 17 00:00:00 2001
From: Maxim Zhukov <mussitantesmortem@gmail.com>
Date: Mon, 5 Feb 2018 00:09:47 +0300
Subject: [PATCH 0015/1678] Bluetooth: ath3k: fix checkpatch warning

This patch fixed warning:
 WARNING: Prefer using '"%s...", __func__' to using 'ath3k_disconnect', this function's name, in a string
 #568: FILE: drivers/bluetooth/ath3k.c:568:
 +	BT_DBG("ath3k_disconnect intf %p", intf);

Signed-off-by: Maxim Zhukov <mussitantesmortem@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/ath3k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 8fe5ec4bb342..3d7a5c149af3 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -569,7 +569,7 @@ static int ath3k_probe(struct usb_interface *intf,
 
 static void ath3k_disconnect(struct usb_interface *intf)
 {
-	BT_DBG("ath3k_disconnect intf %p", intf);
+	BT_DBG("%s intf %p", __func__, intf);
 }
 
 static struct usb_driver ath3k_driver = {

From 688d6240e0646a56ff8bdffb2310dcdeca354814 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sat, 27 Jan 2018 18:17:38 +0800
Subject: [PATCH 0016/1678] Bluetooth: hci_ath: Replace mdelay with msleep in
 ath_wakeup_ar3k

ath_wakeup_ar3k() is never called from atomic context.

It is only called by ath_hci_uart_work() that is only called in
ath_open() via INIT_WORK().
All of the above functions do not enter atomic context along the way.

Despite never getting called from atomic context, ath_wakeup_ar3k() calls
mdelay() for busy wait.
That is not necessary and can be replaced with msleep to avoid busy wait.

This is found by a static analysis tool named DCNS written by myself.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_ath.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
index 14ae7ee88acb..d568fbd94d6c 100644
--- a/drivers/bluetooth/hci_ath.c
+++ b/drivers/bluetooth/hci_ath.c
@@ -71,12 +71,12 @@ static int ath_wakeup_ar3k(struct tty_struct *tty)
 	/* Clear RTS first */
 	tty->driver->ops->tiocmget(tty);
 	tty->driver->ops->tiocmset(tty, 0x00, TIOCM_RTS);
-	mdelay(20);
+	msleep(20);
 
 	/* Set RTS, wake up board */
 	tty->driver->ops->tiocmget(tty);
 	tty->driver->ops->tiocmset(tty, TIOCM_RTS, 0x00);
-	mdelay(20);
+	msleep(20);
 
 	status = tty->driver->ops->tiocmget(tty);
 	return status;

From 1ebbf046273e50cf3cbf2ec13d520dc74ab454d4 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Fri, 26 Jan 2018 23:57:01 +0800
Subject: [PATCH 0017/1678] Bluetooth: btmrvl_main: Replace GFP_ATOMIC with
 GFP_KERNEL in btmrvl_send_sync_cmd

After checking all possible call chains to btmrvl_send_sync_cmd(),
my tool finds that this function is never called in atomic context,
namely never in an interrupt handler or holding a spinlock.
And it calls wait_event_interruptible_timeout() after bt_skb_alloc(),
so it indicates that btmrvl_send_sync_cmd()
can call function which can sleep.
Thus GFP_ATOMIC is not necessary, and it can be replaced with GFP_KERNEL.

This is found by a static analysis tool named DCNS written by myself.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmrvl_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index b280d466f05b..f6c694a1b9b0 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -183,7 +183,7 @@ static int btmrvl_send_sync_cmd(struct btmrvl_private *priv, u16 opcode,
 		return -EFAULT;
 	}
 
-	skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_ATOMIC);
+	skb = bt_skb_alloc(HCI_COMMAND_HDR_SIZE + len, GFP_KERNEL);
 	if (!skb) {
 		BT_ERR("No free skb");
 		return -ENOMEM;

From 06633ee14d5c9bff3b46be67be2824138c75594a Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sat, 27 Jan 2018 18:03:52 +0800
Subject: [PATCH 0018/1678] Bluetooth: hci_ll: Replace mdelay with msleep in
 download_firmware

download_firmware() is never called from atomic context.

It is only called by ll_setup() that is called only via function pointer
"->setup" used in hci_uart_setup() in drivers/bluetooth/hci_serdev.c and
drivers/bluetooth/hci_ldisc.c. hci_uart_setup() is called only
via function pointer "->setup" used in hci_dev_do_open()
in net/bluetooth/hci_core.c.
All of the above functions do not enter atomic context.

Besides, ll_setup() calls msleep() and hci_dev_do_open calls mutex_lock().
So it indicates that all the above functions call functions that can sleep.

Despite never getting called from atomic context, download_firmware()
calls mdelay() for busy wait.
That is not necessary and can be replaced with msleep to avoid busy wait.

This is found by a static analysis tool named DCNS written by myself.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_ll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 1b4417a623a4..2f30dcad96bd 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -650,7 +650,7 @@ static int download_firmware(struct ll_device *lldev)
 			break;
 		case ACTION_DELAY:	/* sleep */
 			bt_dev_info(lldev->hu.hdev, "sleep command in scr");
-			mdelay(((struct bts_action_delay *)action_ptr)->msec);
+			msleep(((struct bts_action_delay *)action_ptr)->msec);
 			break;
 		}
 		len -= (sizeof(struct bts_action) +

From 62ebdc25c4002e5fc104ab536ce7d99ba76be03f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Rymanowski?= <lukasz.rymanowski@codecoup.pl>
Date: Fri, 9 Feb 2018 18:26:02 +0100
Subject: [PATCH 0019/1678] Bluetooth: Fix incorrect bits for LE states
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes incorrect checks for LE states.
Issues found when doing mgmt tests for scenario
when Linux Kernel should do connectable advertising
while connected.

Signed-off-by: Łukasz Rymanowski <lukasz.rymanowski@codecoup.pl>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3394e6791673..66c0781773df 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,8 +934,8 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 		/* Slave connection state and connectable mode bit 38
 		 * and scannable bit 21.
 		 */
-		if (connectable && (!(hdev->le_states[4] & 0x01) ||
-				    !(hdev->le_states[2] & 0x40)))
+		if (connectable && (!(hdev->le_states[4] & 0x40) ||
+				    !(hdev->le_states[2] & 0x20)))
 			return false;
 	}
 
@@ -948,7 +948,7 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
 		/* Master connection state and connectable mode bit 35 and
 		 * scannable 19.
 		 */
-		if (connectable && (!(hdev->le_states[4] & 0x10) ||
+		if (connectable && (!(hdev->le_states[4] & 0x08) ||
 				    !(hdev->le_states[2] & 0x08)))
 			return false;
 	}

From fed03fe7e55b7dc16077f672bd9d7bbe92b3a691 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Sun, 11 Feb 2018 12:24:32 -0600
Subject: [PATCH 0020/1678] Bluetooth: btusb: Add device ID for RTL8822BE

The Asus Z370-I contains a Realtek RTL8822BE device with an associated
BT chip using a USB ID of 0b05:185c. This device is added to the driver.

Signed-off-by: Hon Weng Chong <honwchong@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 098967cb3fc9..c8e9ae6b99e1 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -374,6 +374,9 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3461), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3462), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8822BE Bluetooth devices */
+	{ USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK },
+
 	/* Silicon Wave based devices */
 	{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
 

From 907f84990924bf3a8d248c040dabeb5127ae6938 Mon Sep 17 00:00:00 2001
From: Alex Lu <alex_lu@realsil.com.cn>
Date: Sun, 11 Feb 2018 12:24:33 -0600
Subject: [PATCH 0021/1678] Bluetooth: btrtl: Add RTL8723D and RTL8821C devices

The Bluetooth parts of RTL8723D and RTL8723B share the same lmp
subversion, thus we need to check both lmp subversion and hci revision
to distinguish the two. The same situation is true for RTL8821A and
RTL8821C. Accordingly, the selection code is revised.

To improve maintainability, a new id_table struct is defined, and an
array of such structs is constructed. Adding a new device can thus be
as simple as adding another value to the table.

Signed-off-by: Alex Lu <alex_lu@realsil.com.cn>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btrtl.c | 119 +++++++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 34 deletions(-)

diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 6e2ad748abba..437f080deaab 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -35,6 +35,60 @@
 #define RTL_ROM_LMP_8761A	0x8761
 #define RTL_ROM_LMP_8822B	0x8822
 
+#define IC_MATCH_FL_LMPSUBV	(1 << 0)
+#define IC_MATCH_FL_HCIREV	(1 << 1)
+#define IC_INFO(lmps, hcir) \
+	.match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV, \
+	.lmp_subver = (lmps), \
+	.hci_rev = (hcir)
+
+struct id_table {
+	__u16 match_flags;
+	__u16 lmp_subver;
+	__u16 hci_rev;
+	bool config_needed;
+	char *fw_name;
+	char *cfg_name;
+};
+
+static const struct id_table ic_id_table[] = {
+	/* 8723B */
+	{ IC_INFO(RTL_ROM_LMP_8723B, 0xb),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8723b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8723b_config.bin" },
+
+	/* 8723D */
+	{ IC_INFO(RTL_ROM_LMP_8723B, 0xd),
+	  .config_needed = true,
+	  .fw_name  = "rtl_bt/rtl8723d_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8723d_config.bin" },
+
+	/* 8821A */
+	{ IC_INFO(RTL_ROM_LMP_8821A, 0xa),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8821a_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8821a_config.bin" },
+
+	/* 8821C */
+	{ IC_INFO(RTL_ROM_LMP_8821A, 0xc),
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8821c_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8821c_config.bin" },
+
+	/* 8761A */
+	{ IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
+	  .config_needed = false,
+	  .fw_name  = "rtl_bt/rtl8761a_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8761a_config.bin" },
+
+	/* 8822B */
+	{ IC_INFO(RTL_ROM_LMP_8822B, 0xb),
+	  .config_needed = true,
+	  .fw_name  = "rtl_bt/rtl8822b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8822b_config.bin" },
+	};
+
 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 {
 	struct rtl_rom_version_evt *rom_version;
@@ -64,9 +118,9 @@ static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 	return 0;
 }
 
-static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
-				   const struct firmware *fw,
-				   unsigned char **_buf)
+static int rtlbt_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
+				const struct firmware *fw,
+				unsigned char **_buf)
 {
 	const u8 extension_sig[] = { 0x51, 0x04, 0xfd, 0x77 };
 	struct rtl_epatch_header *epatch_info;
@@ -88,6 +142,8 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver,
 		{ RTL_ROM_LMP_8821A, 2 },
 		{ RTL_ROM_LMP_8761A, 3 },
 		{ RTL_ROM_LMP_8822B, 8 },
+		{ RTL_ROM_LMP_8723B, 9 },	/* 8723D */
+		{ RTL_ROM_LMP_8821A, 10 },	/* 8821C */
 	};
 
 	ret = rtl_read_rom_version(hdev, &rom_version);
@@ -320,8 +376,8 @@ out:
 	return ret;
 }
 
-static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
-				const char *fw_name)
+static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 hci_rev,
+				u16 lmp_subver)
 {
 	unsigned char *fw_data = NULL;
 	const struct firmware *fw;
@@ -330,39 +386,40 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
 	u8 *cfg_buff = NULL;
 	u8 *tbuff;
 	char *cfg_name = NULL;
-	bool config_needed = false;
+	char *fw_name = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ic_id_table); i++) {
+		if ((ic_id_table[i].match_flags & IC_MATCH_FL_LMPSUBV) &&
+		    (ic_id_table[i].lmp_subver != lmp_subver))
+			continue;
+		if ((ic_id_table[i].match_flags & IC_MATCH_FL_HCIREV) &&
+		    (ic_id_table[i].hci_rev != hci_rev))
+			continue;
 
-	switch (lmp_subver) {
-	case RTL_ROM_LMP_8723B:
-		cfg_name = "rtl_bt/rtl8723b_config.bin";
-		break;
-	case RTL_ROM_LMP_8821A:
-		cfg_name = "rtl_bt/rtl8821a_config.bin";
-		break;
-	case RTL_ROM_LMP_8761A:
-		cfg_name = "rtl_bt/rtl8761a_config.bin";
-		break;
-	case RTL_ROM_LMP_8822B:
-		cfg_name = "rtl_bt/rtl8822b_config.bin";
-		config_needed = true;
-		break;
-	default:
-		BT_ERR("%s: rtl: no config according to lmp_subver %04x",
-		       hdev->name, lmp_subver);
 		break;
 	}
 
+	if (i >= ARRAY_SIZE(ic_id_table)) {
+		BT_ERR("%s: unknown IC info, lmp subver %04x, hci rev %04x",
+		       hdev->name, lmp_subver, hci_rev);
+		return -EINVAL;
+	}
+
+	cfg_name = ic_id_table[i].cfg_name;
+
 	if (cfg_name) {
 		cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
 		if (cfg_sz < 0) {
 			cfg_sz = 0;
-			if (config_needed)
+			if (ic_id_table[i].config_needed)
 				BT_ERR("Necessary config file %s not found\n",
 				       cfg_name);
 		}
 	} else
 		cfg_sz = 0;
 
+	fw_name = ic_id_table[i].fw_name;
 	bt_dev_info(hdev, "rtl: loading %s", fw_name);
 	ret = request_firmware(&fw, fw_name, &hdev->dev);
 	if (ret < 0) {
@@ -370,7 +427,7 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver,
 		goto err_req_fw;
 	}
 
-	ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
+	ret = rtlbt_parse_firmware(hdev, lmp_subver, fw, &fw_data);
 	if (ret < 0)
 		goto out;
 
@@ -429,7 +486,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 {
 	struct sk_buff *skb;
 	struct hci_rp_read_local_version *resp;
-	u16 lmp_subver;
+	u16 hci_rev, lmp_subver;
 
 	skb = btrtl_read_local_version(hdev);
 	if (IS_ERR(skb))
@@ -441,6 +498,7 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 		    resp->hci_ver, resp->hci_rev,
 		    resp->lmp_ver, resp->lmp_subver);
 
+	hci_rev = le16_to_cpu(resp->hci_rev);
 	lmp_subver = le16_to_cpu(resp->lmp_subver);
 	kfree_skb(skb);
 
@@ -455,17 +513,10 @@ int btrtl_setup_realtek(struct hci_dev *hdev)
 	case RTL_ROM_LMP_3499:
 		return btrtl_setup_rtl8723a(hdev);
 	case RTL_ROM_LMP_8723B:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8723b_fw.bin");
 	case RTL_ROM_LMP_8821A:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8821a_fw.bin");
 	case RTL_ROM_LMP_8761A:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8761a_fw.bin");
 	case RTL_ROM_LMP_8822B:
-		return btrtl_setup_rtl8723b(hdev, lmp_subver,
-					    "rtl_bt/rtl8822b_fw.bin");
+		return btrtl_setup_rtl8723b(hdev, hci_rev, lmp_subver);
 	default:
 		bt_dev_info(hdev, "rtl: assuming no firmware upload needed");
 		return 0;

From 11a350c965cc5ac2f5d0ed1fb336735a0c1a6344 Mon Sep 17 00:00:00 2001
From: Alan Brady <alan.brady@intel.com>
Date: Fri, 29 Dec 2017 08:48:33 -0500
Subject: [PATCH 0022/1678] i40e: fix typo in function description

'bufer' should be 'buffer'

Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 2 +-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e554aa6cf070..ddd4b3046046 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1991,7 +1991,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 			       struct i40e_rx_buffer *rx_buffer)
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 357d6051281f..464200f48d91 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1273,7 +1273,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
  * @rx_buffer: rx buffer to pull data from
  *
  * This function will clean up the contents of the rx_buffer.  It will
- * either recycle the bufer or unmap it and free the associated resources.
+ * either recycle the buffer or unmap it and free the associated resources.
  */
 static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 			       struct i40e_rx_buffer *rx_buffer)

From 40588ca6513729e4de60e49896aab0a7ee09df19 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:49:28 -0500
Subject: [PATCH 0023/1678] i40e/i40evf: Only track one ITR setting per ring
 instead of Tx/Rx

The rings are already split out into Tx and Rx rings so it doesn't make
sense to have any single ring store both a Tx and Rx itr_setting value.
Since that is the case drop the pair in favor of storing just a single ITR
value.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_debugfs.c    | 12 +++----
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 32 +++++++++----------
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 ++++----
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   |  6 ++--
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  3 +-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c |  6 ++--
 drivers/net/ethernet/intel/i40evf/i40e_txrx.h |  3 +-
 .../ethernet/intel/i40evf/i40evf_ethtool.c    | 24 +++++++-------
 .../net/ethernet/intel/i40evf/i40evf_main.c   |  8 ++---
 9 files changed, 53 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 4c3b4243cf65..e9fc51bd6c95 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -315,9 +315,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 i, rx_ring->vsi,
 			 rx_ring->q_vector);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: rx_itr_setting = %d (%s)\n",
-			 i, rx_ring->rx_itr_setting,
-			 ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed");
+			 "    rx_rings[%i]: itr_setting = %d (%s)\n",
+			 i, rx_ring->itr_setting,
+			 ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
 	}
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
 		struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
@@ -366,9 +366,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 "    tx_rings[%i]: DCB tc = %d\n",
 			 i, tx_ring->dcb_tc);
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: tx_itr_setting = %d (%s)\n",
-			 i, tx_ring->tx_itr_setting,
-			 ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed");
+			 "    tx_rings[%i]: itr_setting = %d (%s)\n",
+			 i, tx_ring->itr_setting,
+			 ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
 	}
 	rcu_read_unlock();
 	dev_info(&pf->pdev->dev,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2f5bee713fef..0fb60336e64b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev,
 	rx_ring = vsi->rx_rings[queue];
 	tx_ring = vsi->tx_rings[queue];
 
-	if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
 		ec->use_adaptive_rx_coalesce = 1;
 
-	if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
 		ec->use_adaptive_tx_coalesce = 1;
 
-	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
 
 	/* we use the _usecs_high to store/set the interrupt rate limit
 	 * that the hardware supports, that almost but not quite
@@ -2315,26 +2315,26 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 
 	intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
 
-	rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
-	tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+	rx_ring->itr_setting = ec->rx_coalesce_usecs;
+	tx_ring->itr_setting = ec->tx_coalesce_usecs;
 
 	if (ec->use_adaptive_rx_coalesce)
-		rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+		rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	else
-		rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+		rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	if (ec->use_adaptive_tx_coalesce)
-		tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+		tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	else
-		tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+		tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	q_vector = rx_ring->q_vector;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
 	vector = vsi->base_vector + q_vector->v_idx;
 	wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
 
 	q_vector = tx_ring->q_vector;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
+	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
 	vector = vsi->base_vector + q_vector->v_idx;
 	wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
 
@@ -2364,11 +2364,11 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
 	if (queue < 0) {
-		cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting;
-		cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting;
+		cur_rx_itr = vsi->rx_rings[0]->itr_setting;
+		cur_tx_itr = vsi->tx_rings[0]->itr_setting;
 	} else if (queue < vsi->num_queue_pairs) {
-		cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting;
-		cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting;
+		cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
+		cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
 	} else {
 		netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
 			   vsi->num_queue_pairs - 1);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e31adbc75f9c..005a6d6a20fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3450,11 +3450,11 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
 
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-		q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting);
+		q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
 		q_vector->rx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
 		     q_vector->rx.itr);
-		q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting);
+		q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
 		q_vector->tx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
 		     q_vector->tx.itr);
@@ -3559,10 +3559,10 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 
 	/* set the ITR configuration */
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting);
+	q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
 	q_vector->rx.latency_range = I40E_LOW_LATENCY;
 	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
-	q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting);
+	q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
 	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
 
@@ -10018,7 +10018,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->dcb_tc = 0;
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
-		ring->tx_itr_setting = pf->tx_itr_default;
+		ring->itr_setting = pf->tx_itr_default;
 		vsi->tx_rings[i] = ring++;
 
 		if (!i40e_enabled_xdp_vsi(vsi))
@@ -10036,7 +10036,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		set_ring_xdp(ring);
-		ring->tx_itr_setting = pf->tx_itr_default;
+		ring->itr_setting = pf->tx_itr_default;
 		vsi->xdp_rings[i] = ring++;
 
 setup_rx:
@@ -10049,7 +10049,7 @@ setup_rx:
 		ring->count = vsi->num_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		ring->rx_itr_setting = pf->rx_itr_default;
+		ring->itr_setting = pf->rx_itr_default;
 		vsi->rx_rings[i] = ring;
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ddd4b3046046..b9121a87ee46 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2290,12 +2290,12 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 #define INTREG I40E_PFINT_DYN_CTLN
 static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
 {
-	return vsi->rx_rings[idx]->rx_itr_setting;
+	return vsi->rx_rings[idx]->itr_setting;
 }
 
 static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
 {
-	return vsi->tx_rings[idx]->tx_itr_setting;
+	return vsi->tx_rings[idx]->itr_setting;
 }
 
 /**
@@ -2322,7 +2322,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 	/* avoid dynamic calculation if in countdown mode OR if
 	 * all dynamic is disabled
 	 */
-	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+	txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
 	rx_itr_setting = get_rx_itr(vsi, idx);
 	tx_itr_setting = get_tx_itr(vsi, idx);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 701b708628b0..1758dd3bf91b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -382,8 +382,7 @@ struct i40e_ring {
 	 * these values always store the USER setting, and must be converted
 	 * before programming to a register.
 	 */
-	u16 rx_itr_setting;
-	u16 tx_itr_setting;
+	u16 itr_setting;
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 464200f48d91..291130af2985 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1475,14 +1475,14 @@ static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
 {
 	struct i40evf_adapter *adapter = vsi->back;
 
-	return adapter->rx_rings[idx].rx_itr_setting;
+	return adapter->rx_rings[idx].itr_setting;
 }
 
 static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
 {
 	struct i40evf_adapter *adapter = vsi->back;
 
-	return adapter->tx_rings[idx].tx_itr_setting;
+	return adapter->tx_rings[idx].itr_setting;
 }
 
 /**
@@ -1503,7 +1503,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 	/* avoid dynamic calculation if in countdown mode OR if
 	 * all dynamic is disabled
 	 */
-	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+	txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
 	rx_itr_setting = get_rx_itr(vsi, idx);
 	tx_itr_setting = get_tx_itr(vsi, idx);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 7798a6645c3f..038ed0e2acb7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -362,8 +362,7 @@ struct i40e_ring {
 	 * these values always store the USER setting, and must be converted
 	 * before programming to a register.
 	 */
-	u16 rx_itr_setting;
-	u16 tx_itr_setting;
+	u16 itr_setting;
 
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index e2d8aa19d205..11dfdc882934 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev,
 	rx_ring = &adapter->rx_rings[queue];
 	tx_ring = &adapter->tx_rings[queue];
 
-	if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
 		ec->use_adaptive_rx_coalesce = 1;
 
-	if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
 		ec->use_adaptive_tx_coalesce = 1;
 
-	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
 
 	return 0;
 }
@@ -519,24 +519,24 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 	struct i40e_q_vector *q_vector;
 	u16 vector;
 
-	rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
-	tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
+	rx_ring->itr_setting = ec->rx_coalesce_usecs;
+	tx_ring->itr_setting = ec->tx_coalesce_usecs;
 
-	rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+	rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	if (!ec->use_adaptive_rx_coalesce)
-		rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC;
+		rx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
-	tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+	tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	if (!ec->use_adaptive_tx_coalesce)
-		tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC;
+		tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
 	q_vector = rx_ring->q_vector;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
 	vector = vsi->base_vector + q_vector->v_idx;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
 
 	q_vector = tx_ring->q_vector;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
+	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
 	vector = vsi->base_vector + q_vector->v_idx;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 16989ad2ca90..a5fb540c2637 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -354,7 +354,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 	q_vector->rx.ring = rx_ring;
 	q_vector->rx.count++;
 	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
 	q_vector->ring_mask |= BIT(r_idx);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
@@ -379,7 +379,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 	q_vector->tx.ring = tx_ring;
 	q_vector->tx.count++;
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
+	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	q_vector->num_ringpairs++;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
@@ -1169,7 +1169,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		tx_ring->netdev = adapter->netdev;
 		tx_ring->dev = &adapter->pdev->dev;
 		tx_ring->count = adapter->tx_desc_count;
-		tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
+		tx_ring->itr_setting = I40E_ITR_TX_DEF;
 		if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
 			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
@@ -1178,7 +1178,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		rx_ring->netdev = adapter->netdev;
 		rx_ring->dev = &adapter->pdev->dev;
 		rx_ring->count = adapter->rx_desc_count;
-		rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
+		rx_ring->itr_setting = I40E_ITR_RX_DEF;
 	}
 
 	adapter->num_active_queues = num_active_queues;

From 71dc371993625b4b1ae26214af74427765bfa3a2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:49:53 -0500
Subject: [PATCH 0024/1678] i40e/i40evf: Clean up logic for adaptive ITR

The logic for dynamic ITR update is confusing at best as there were odd
paths chosen for how to find the rings associated with a given queue based
on the vector index and other inconsistencies throughout the code.

This patch is an attempt to clean up the logic so that we can more easily
understand what is going on. Specifically if there is a Rx or Tx ring that
is enabled in dynamic mode on the q_vector it is allowed to override the
other side of the interrupt moderation. While it isn't correct all this
patch is doing is cleaning up the logic for now so that when we come
through and fix it we can more easily identify that this is wrong.

The other big change made here is that we replace references to:
	vsi->rx_rings[q_vector->v_idx]->itr_setting
with:
	q_vector->rx.ring->itr_setting

The general idea is we can avoid the long pointer chase since just
accessing q_vector->rx.ring is a single pointer access versus having to
chase down vsi->rx_rings, and then finding the pointer in the array, and
finally chasing down the itr_setting from there.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 55 +++++------------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 59 +++++--------------
 2 files changed, 28 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b9121a87ee46..6b3fe5d26dc1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1016,6 +1016,9 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	int bytes_per_usec;
 	unsigned int usecs, estimated_usecs;
 
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+		return false;
+
 	if (rc->total_packets == 0 || !rc->itr)
 		return false;
 
@@ -2288,15 +2291,6 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_PFINT_DYN_CTLN
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
-	return vsi->rx_rings[idx]->itr_setting;
-}
-
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
-	return vsi->tx_rings[idx]->itr_setting;
-}
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -2309,9 +2303,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	bool rx = false, tx = false;
-	u32 rxval, txval;
-	int idx = q_vector->v_idx;
-	int rx_itr_setting, tx_itr_setting;
+	u32 txval;
 
 	/* If we don't have MSIX, then we only need to re-enable icr0 */
 	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2319,29 +2311,15 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		return;
 	}
 
-	/* avoid dynamic calculation if in countdown mode OR if
-	 * all dynamic is disabled
-	 */
 	txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
-	rx_itr_setting = get_rx_itr(vsi, idx);
-	tx_itr_setting = get_tx_itr(vsi, idx);
-
-	if (q_vector->itr_countdown > 0 ||
-	    (!ITR_IS_DYNAMIC(rx_itr_setting) &&
-	     !ITR_IS_DYNAMIC(tx_itr_setting))) {
+	/* avoid dynamic calculation if in countdown mode */
+	if (q_vector->itr_countdown > 0)
 		goto enable_int;
-	}
 
-	if (ITR_IS_DYNAMIC(rx_itr_setting)) {
-		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
-	}
-
-	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
-		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
-	}
+	/* these will return false if dynamic mode is disabled */
+	rx = i40e_set_new_dynamic_itr(&q_vector->rx);
+	tx = i40e_set_new_dynamic_itr(&q_vector->tx);
 
 	if (rx || tx) {
 		/* get the higher of the two ITR adjustments and
@@ -2349,25 +2327,20 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		 * when in adaptive mode (Rx and/or Tx)
 		 */
 		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
+		u32 rxval;
 
 		q_vector->tx.itr = q_vector->rx.itr = itr;
-		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
-		tx = true;
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
-		rx = true;
-	}
 
-	/* only need to enable the interrupt once, but need
-	 * to possibly update both ITR values
-	 */
-	if (rx) {
 		/* set the INTENA_MSK_MASK so that this first write
 		 * won't actually enable the interrupt, instead just
 		 * updating the ITR (it's bit 31 PF and VF)
 		 */
-		rxval |= BIT(31);
+		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr) | BIT(31);
+
 		/* don't check _DOWN because interrupt isn't being enabled */
 		wr32(hw, INTREG(q_vector->reg_idx), rxval);
+
+		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
 	}
 
 enable_int:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 291130af2985..3fd7e9731f49 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -413,6 +413,9 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	int bytes_per_usec;
 	unsigned int usecs, estimated_usecs;
 
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+		return false;
+
 	if (rc->total_packets == 0 || !rc->itr)
 		return false;
 
@@ -1471,19 +1474,6 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_VFINT_DYN_CTLN1
-static inline int get_rx_itr(struct i40e_vsi *vsi, int idx)
-{
-	struct i40evf_adapter *adapter = vsi->back;
-
-	return adapter->rx_rings[idx].itr_setting;
-}
-
-static inline int get_tx_itr(struct i40e_vsi *vsi, int idx)
-{
-	struct i40evf_adapter *adapter = vsi->back;
-
-	return adapter->tx_rings[idx].itr_setting;
-}
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1496,33 +1486,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	bool rx = false, tx = false;
-	u32 rxval, txval;
-	int idx = q_vector->v_idx;
-	int rx_itr_setting, tx_itr_setting;
+	u32 txval;
 
-	/* avoid dynamic calculation if in countdown mode OR if
-	 * all dynamic is disabled
-	 */
 	txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
-	rx_itr_setting = get_rx_itr(vsi, idx);
-	tx_itr_setting = get_tx_itr(vsi, idx);
-
-	if (q_vector->itr_countdown > 0 ||
-	    (!ITR_IS_DYNAMIC(rx_itr_setting) &&
-	     !ITR_IS_DYNAMIC(tx_itr_setting))) {
+	/* avoid dynamic calculation if in countdown mode */
+	if (q_vector->itr_countdown > 0)
 		goto enable_int;
-	}
 
-	if (ITR_IS_DYNAMIC(rx_itr_setting)) {
-		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
-	}
-
-	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
-		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
-	}
+	/* these will return false if dynamic mode is disabled */
+	rx = i40e_set_new_dynamic_itr(&q_vector->rx);
+	tx = i40e_set_new_dynamic_itr(&q_vector->tx);
 
 	if (rx || tx) {
 		/* get the higher of the two ITR adjustments and
@@ -1530,25 +1504,20 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		 * when in adaptive mode (Rx and/or Tx)
 		 */
 		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
+		u32 rxval;
 
 		q_vector->tx.itr = q_vector->rx.itr = itr;
-		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
-		tx = true;
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
-		rx = true;
-	}
 
-	/* only need to enable the interrupt once, but need
-	 * to possibly update both ITR values
-	 */
-	if (rx) {
 		/* set the INTENA_MSK_MASK so that this first write
 		 * won't actually enable the interrupt, instead just
 		 * updating the ITR (it's bit 31 PF and VF)
 		 */
-		rxval |= BIT(31);
+		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr) | BIT(31);
+
 		/* don't check _DOWN because interrupt isn't being enabled */
 		wr32(hw, INTREG(q_vector->reg_idx), rxval);
+
+		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
 	}
 
 enable_int:

From 1fa51a650e1deb50410677f1bd6c0ce17aa48a49 Mon Sep 17 00:00:00 2001
From: Filip Sadowski <filip.sadowski@intel.com>
Date: Fri, 29 Dec 2017 08:50:05 -0500
Subject: [PATCH 0025/1678] i40e: Add delay after EMP reset for firmware to
 recover

This patch adds necessary delay for 4.33 firmware to recover after
EMP reset. Without this patch driver occasionally reinitializes
structures too quickly to communicate with firmware after EMP reset
causing AdminQ to timeout.

Signed-off-by: Filip Sadowski <filip.sadowski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 005a6d6a20fb..a88fdb8bf5f0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -9215,6 +9215,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 	}
 	i40e_get_oem_version(&pf->hw);
 
+	if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+	    ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) ||
+	     hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) {
+		/* The following delay is necessary for 4.33 firmware and older
+		 * to recover after EMP reset. 200 ms should suffice but we
+		 * put here 300 ms to be sure that FW is ready to operate
+		 * after reset.
+		 */
+		mdelay(300);
+	}
+
 	/* re-verify the eeprom if we just had an EMP reset */
 	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
 		i40e_verify_eeprom(pf);

From 17b4d25c12abc9c9e9e111ad00968997b246b1c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Jab=C5=82o=C5=84ski?=
 <pawel.jablonski@intel.com>
Date: Fri, 29 Dec 2017 08:50:20 -0500
Subject: [PATCH 0026/1678] i40e: Warn when setting link-down-on-close while in
 MFP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a warning message when the link-down-on-close flag is
setting on. The warning is printed only on MFP devices

Signed-off-by: Paweł Jabłoński <pawel.jablonski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0fb60336e64b..5ca63c5d36b4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4479,6 +4479,12 @@ flags_complete:
 		}
 	}
 
+	if ((changed_flags & pf->flags &
+	     I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+	    (pf->flags & I40E_FLAG_MFP_ENABLED))
+		dev_warn(&pf->pdev->dev,
+			 "Turning on link-down-on-close flag may affect other partitions\n");
+
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
 		if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
 			struct i40e_dcbx_config *dcbcfg;

From fe09ed0ee1f57a818e4d7e41b25f16744487e678 Mon Sep 17 00:00:00 2001
From: Alan Brady <alan.brady@intel.com>
Date: Fri, 29 Dec 2017 08:50:34 -0500
Subject: [PATCH 0027/1678] i40e: use changed_flags to check
 I40E_FLAG_DISABLE_FW_LLDP

Currently in i40e_set_priv_flags we use new_flags to check for the
I40E_FLAG_DISABLE_FW_LLDP flag.  This is an issue for a few a reasons.
DISABLE_FW_LLDP is persistent across reboots/driver reloads.  This means
we need some way to detect if FW LLDP is enabled on init.  We do this by
trying to init_dcb and if it fails with EPERM we know LLDP is disabled
in FW.

This could be a problem on older FW versions or NPAR enabled PFs because
there are situations where the FW could disable LLDP, but they do _not_
support using this flag to change it.  If we do end up in this
situation, the flag will be set, then when the user tries to change any
priv flags, the driver thinks the user is trying to disable FW LLDP on a
FW that doesn't support it and essentially forbids any priv flag
changes.

The fix is simple, instead of checking if this flag is set, we should be
checking if the user is trying to _change_ the flag on unsupported FW
versions.

This patch also adds a comment explaining that the cmpxchg is the point
of no return.  Once we put the new flags into pf->flags we can't back
out.

Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5ca63c5d36b4..5ee27358922a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4406,6 +4406,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	}
 
 flags_complete:
+	changed_flags = orig_flags ^ new_flags;
+
 	/* Before we finalize any flag changes, we need to perform some
 	 * checks to ensure that the changes are supported and safe.
 	 */
@@ -4415,13 +4417,17 @@ flags_complete:
 	    !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
 		return -EOPNOTSUPP;
 
-	/* Disable FW LLDP not supported if NPAR active or if FW
-	 * API version < 1.7
+	/* If the driver detected FW LLDP was disabled on init, this flag could
+	 * be set, however we do not support _changing_ the flag if NPAR is
+	 * enabled or FW API version < 1.7.  There are situations where older
+	 * FW versions/NPAR enabled PFs could disable LLDP, however we _must_
+	 * not allow the user to enable/disable LLDP with this flag on
+	 * unsupported FW versions.
 	 */
-	if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
 		if (pf->hw.func_caps.npar_enable) {
 			dev_warn(&pf->pdev->dev,
-				 "Unable to stop FW LLDP if NPAR active\n");
+				 "Unable to change FW LLDP if NPAR active\n");
 			return -EOPNOTSUPP;
 		}
 
@@ -4429,7 +4435,7 @@ flags_complete:
 		    (pf->hw.aq.api_maj_ver == 1 &&
 		     pf->hw.aq.api_min_ver < 7)) {
 			dev_warn(&pf->pdev->dev,
-				 "FW ver does not support stopping FW LLDP\n");
+				 "FW ver does not support changing FW LLDP\n");
 			return -EOPNOTSUPP;
 		}
 	}
@@ -4439,6 +4445,10 @@ flags_complete:
 	 * something else has modified the flags variable since we copied it
 	 * originally. We'll just punt with an error and log something in the
 	 * message buffer.
+	 *
+	 * This is the point of no return for this function.  We need to have
+	 * checked any discrepancies or misconfigurations and returned
+	 * EOPNOTSUPP before updating pf->flags here.
 	 */
 	if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
 		dev_warn(&pf->pdev->dev,
@@ -4446,8 +4456,6 @@ flags_complete:
 		return -EAGAIN;
 	}
 
-	changed_flags = orig_flags ^ new_flags;
-
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
 	 * changed in the code above.

From 8b99b1179c27958f51ecfc489af5f459f9408d96 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:50:44 -0500
Subject: [PATCH 0028/1678] i40e/i40evf: Clean-up of bits related to using
 q_vector->reg_idx

This patch is a further clean-up related to the change over to using
q_vector->reg_idx when accessing the ITR registers. Specifically the code
appears to have several other spots where we were computing the register
offset manually and this resulted in errors in a few spots.

Specifically in the i40evf functions for mapping queues to vectors it
appears we may have had an off by 1 error since (v_idx - 1) for the first
q_vector with an index of 0 would result in us returning -1 if I am not
mistaken.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c     | 12 ++++++------
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 12 +++++-------
 drivers/net/ethernet/intel/i40evf/i40evf_main.c    |  6 ++++--
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5ee27358922a..8cc9198ac32f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2311,7 +2311,7 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_q_vector *q_vector;
-	u16 vector, intrl;
+	u16 intrl;
 
 	intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
 
@@ -2330,15 +2330,15 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 
 	q_vector = rx_ring->q_vector;
 	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+	wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, q_vector->reg_idx),
+	     q_vector->rx.itr);
 
 	q_vector = tx_ring->q_vector;
 	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+	wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, q_vector->reg_idx),
+	     q_vector->tx.itr);
 
-	wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl);
+	wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
 	i40e_flush(hw);
 }
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 11dfdc882934..ed5b8ec4d2a2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
 
 /**
  * i40evf_set_itr_per_queue - set ITR values for specific queue
- * @vsi: the VSI to set values for
+ * @adapter: the VF adapter struct to set values for
  * @ec: coalesce settings from ethtool
  * @queue: the queue to modify
  *
@@ -514,10 +514,8 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 {
 	struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
 	struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
-	struct i40e_vsi *vsi = &adapter->vsi;
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_q_vector *q_vector;
-	u16 vector;
 
 	rx_ring->itr_setting = ec->rx_coalesce_usecs;
 	tx_ring->itr_setting = ec->tx_coalesce_usecs;
@@ -532,13 +530,13 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 
 	q_vector = rx_ring->q_vector;
 	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
+	     q_vector->rx.itr);
 
 	q_vector = tx_ring->q_vector;
 	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
-	vector = vsi->base_vector + q_vector->v_idx;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
+	     q_vector->tx.itr);
 
 	i40e_flush(hw);
 }
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index a5fb540c2637..f648e5e97529 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -357,7 +357,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
 	q_vector->ring_mask |= BIT(r_idx);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
+	     q_vector->rx.itr);
 }
 
 /**
@@ -382,7 +383,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	q_vector->num_ringpairs++;
-	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
+	     q_vector->tx.itr);
 }
 
 /**

From 4ff17929e6031bf3377233331223f617015e4ef3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:50:55 -0500
Subject: [PATCH 0029/1678] i40e/i40evf: Don't bother setting the CLEARPBA bit

The CLEARPBA bit in the dynamic interrupt control register actually has
no effect either way on the hardware. As per errata 28 in the XL710
specification update the interrupt is actually cleared any time the
register is written with the INTENA_MSK bit set to 0. As such the act of
toggling the enable bit actually will trigger the interrupt being
cleared and could lead to potential lost events if auto-masking is
not enabled.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 11 ++++++++++-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 11 ++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 6b3fe5d26dc1..ac1fa9e3c04f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2281,8 +2281,17 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 {
 	u32 val;
 
+	/* We don't bother with setting the CLEARPBA bit as the data sheet
+	 * points out doing so is "meaningless since it was already
+	 * auto-cleared". The auto-clearing happens when the interrupt is
+	 * asserted.
+	 *
+	 * Hardware errata 28 for also indicates that writing to a
+	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+	 * an event in the PBA anyway so we need to rely on the automask
+	 * to hold pending events for us until the interrupt is re-enabled
+	 */
 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
-	      I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
 	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
 	      (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 3fd7e9731f49..34d898f0adaa 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1464,8 +1464,17 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 {
 	u32 val;
 
+	/* We don't bother with setting the CLEARPBA bit as the data sheet
+	 * points out doing so is "meaningless since it was already
+	 * auto-cleared". The auto-clearing happens when the interrupt is
+	 * asserted.
+	 *
+	 * Hardware errata 28 for also indicates that writing to a
+	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+	 * an event in the PBA anyway so we need to rely on the automask
+	 * to hold pending events for us until the interrupt is re-enabled
+	 */
 	val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
-	      I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
 	      (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
 	      (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
 

From 9b2c45d479d0fb8647c9e83359df69162b5fbe5f Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Mon, 12 Feb 2018 20:00:20 +0100
Subject: [PATCH 0030/1678] net: make getname() functions return length rather
 than use int* parameter

Changes since v1:
Added changes in these files:
    drivers/infiniband/hw/usnic/usnic_transport.c
    drivers/staging/lustre/lnet/lnet/lib-socket.c
    drivers/target/iscsi/iscsi_target_login.c
    drivers/vhost/net.c
    fs/dlm/lowcomms.c
    fs/ocfs2/cluster/tcp.c
    security/tomoyo/network.c

Before:
All these functions either return a negative error indicator,
or store length of sockaddr into "int *socklen" parameter
and return zero on success.

"int *socklen" parameter is awkward. For example, if caller does not
care, it still needs to provide on-stack storage for the value
it does not need.

None of the many FOO_getname() functions of various protocols
ever used old value of *socklen. They always just overwrite it.

This change drops this parameter, and makes all these functions, on success,
return length of sockaddr. It's always >= 0 and can be differentiated
from an error.

Tests in callers are changed from "if (err)" to "if (err < 0)", where needed.

rpc_sockname() lost "int buflen" parameter, since its only use was
to be passed to kernel_getsockname() as &buflen and subsequently
not used in any way.

Userspace API is not changed.

    text    data     bss      dec     hex filename
30108430 2633624  873672 33615726 200ef6e vmlinux.before.o
30108109 2633612  873672 33615393 200ee21 vmlinux.o

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: David S. Miller <davem@davemloft.net>
CC: linux-kernel@vger.kernel.org
CC: netdev@vger.kernel.org
CC: linux-bluetooth@vger.kernel.org
CC: linux-decnet-user@lists.sourceforge.net
CC: linux-wireless@vger.kernel.org
CC: linux-rdma@vger.kernel.org
CC: linux-sctp@vger.kernel.org
CC: linux-nfs@vger.kernel.org
CC: linux-x25@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/usnic/usnic_transport.c |  5 ++-
 drivers/isdn/mISDN/socket.c                   |  5 ++-
 drivers/net/ppp/pppoe.c                       |  6 ++--
 drivers/net/ppp/pptp.c                        |  6 ++--
 drivers/scsi/iscsi_tcp.c                      | 14 ++++----
 drivers/soc/qcom/qmi_interface.c              |  3 +-
 drivers/staging/ipx/af_ipx.c                  |  6 ++--
 drivers/staging/irda/net/af_irda.c            |  8 ++---
 drivers/staging/lustre/lnet/lnet/lib-socket.c |  7 ++--
 drivers/target/iscsi/iscsi_target_login.c     | 18 +++++-----
 drivers/vhost/net.c                           |  7 ++--
 fs/dlm/lowcomms.c                             |  7 ++--
 fs/ocfs2/cluster/tcp.c                        |  6 ++--
 include/linux/net.h                           |  8 ++---
 include/net/inet_common.h                     |  2 +-
 include/net/ipv6.h                            |  2 +-
 include/net/sock.h                            |  2 +-
 net/appletalk/ddp.c                           |  5 ++-
 net/atm/pvc.c                                 |  5 ++-
 net/atm/svc.c                                 |  5 ++-
 net/ax25/af_ax25.c                            |  4 +--
 net/bluetooth/hci_sock.c                      |  4 +--
 net/bluetooth/l2cap_sock.c                    |  5 ++-
 net/bluetooth/rfcomm/sock.c                   |  5 ++-
 net/bluetooth/sco.c                           |  5 ++-
 net/can/raw.c                                 |  6 ++--
 net/core/sock.c                               |  5 +--
 net/decnet/af_decnet.c                        |  6 ++--
 net/ipv4/af_inet.c                            |  5 ++-
 net/ipv6/af_inet6.c                           |  5 ++-
 net/iucv/af_iucv.c                            |  5 ++-
 net/l2tp/l2tp_ip.c                            |  5 ++-
 net/l2tp/l2tp_ip6.c                           |  5 ++-
 net/l2tp/l2tp_ppp.c                           |  5 ++-
 net/llc/af_llc.c                              |  5 ++-
 net/netlink/af_netlink.c                      |  5 ++-
 net/netrom/af_netrom.c                        |  9 ++---
 net/nfc/llcp_sock.c                           |  5 ++-
 net/packet/af_packet.c                        | 10 +++---
 net/phonet/socket.c                           |  5 ++-
 net/qrtr/qrtr.c                               |  5 ++-
 net/rds/af_rds.c                              |  5 ++-
 net/rds/tcp.c                                 |  7 ++--
 net/rose/af_rose.c                            |  5 ++-
 net/sctp/ipv6.c                               |  8 ++---
 net/smc/af_smc.c                              | 11 +++---
 net/socket.c                                  | 35 +++++++++----------
 net/sunrpc/clnt.c                             |  6 ++--
 net/sunrpc/svcsock.c                          | 13 ++++---
 net/sunrpc/xprtsock.c                         |  3 +-
 net/tipc/socket.c                             |  5 ++-
 net/unix/af_unix.c                            | 10 +++---
 net/vmw_vsock/af_vsock.c                      |  4 +--
 net/x25/af_x25.c                              |  4 +--
 security/tomoyo/network.c                     |  5 +--
 55 files changed, 159 insertions(+), 203 deletions(-)

diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index de318389a301..67de94343cb4 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -174,14 +174,13 @@ void usnic_transport_put_socket(struct socket *sock)
 int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
 					uint32_t *addr, uint16_t *port)
 {
-	int len;
 	int err;
 	struct sockaddr_in sock_addr;
 
 	err = sock->ops->getname(sock,
 				(struct sockaddr *)&sock_addr,
-				&len, 0);
-	if (err)
+				0);
+	if (err < 0)
 		return err;
 
 	if (sock_addr.sin_family != AF_INET)
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c5603d1a07d6..1f8f489b4167 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -560,7 +560,7 @@ done:
 
 static int
 data_sock_getname(struct socket *sock, struct sockaddr *addr,
-		  int *addr_len, int peer)
+		  int peer)
 {
 	struct sockaddr_mISDN	*maddr = (struct sockaddr_mISDN *) addr;
 	struct sock		*sk = sock->sk;
@@ -570,14 +570,13 @@ data_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	lock_sock(sk);
 
-	*addr_len = sizeof(*maddr);
 	maddr->family = AF_ISDN;
 	maddr->dev = _pms(sk)->dev->id;
 	maddr->channel = _pms(sk)->ch.nr;
 	maddr->sapi = _pms(sk)->ch.addr & 0xff;
 	maddr->tei = (_pms(sk)->ch.addr >> 8) & 0xff;
 	release_sock(sk);
-	return 0;
+	return sizeof(*maddr);
 }
 
 static const struct proto_ops data_sock_ops = {
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5aa59f41bf8c..bd89d1c559ce 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -714,7 +714,7 @@ err_put:
 }
 
 static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
-		  int *usockaddr_len, int peer)
+		  int peer)
 {
 	int len = sizeof(struct sockaddr_pppox);
 	struct sockaddr_pppox sp;
@@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	memcpy(uaddr, &sp, len);
 
-	*usockaddr_len = len;
-
-	return 0;
+	return len;
 }
 
 static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 6dde9a0cfe76..8249d46a7844 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
 }
 
 static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *usockaddr_len, int peer)
+	int peer)
 {
 	int len = sizeof(struct sockaddr_pppox);
 	struct sockaddr_pppox sp;
@@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	memcpy(uaddr, &sp, len);
 
-	*usockaddr_len = len;
-
-	return 0;
+	return len;
 }
 
 static int pptp_release(struct socket *sock)
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 6198559abbd8..0ad00dbf912d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -732,7 +732,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
 	struct sockaddr_in6 addr;
-	int rc, len;
+	int rc;
 
 	switch(param) {
 	case ISCSI_PARAM_CONN_PORT:
@@ -745,12 +745,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
 		}
 		if (param == ISCSI_PARAM_LOCAL_PORT)
 			rc = kernel_getsockname(tcp_sw_conn->sock,
-						(struct sockaddr *)&addr, &len);
+						(struct sockaddr *)&addr);
 		else
 			rc = kernel_getpeername(tcp_sw_conn->sock,
-						(struct sockaddr *)&addr, &len);
+						(struct sockaddr *)&addr);
 		spin_unlock_bh(&conn->session->frwd_lock);
-		if (rc)
+		if (rc < 0)
 			return rc;
 
 		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
@@ -771,7 +771,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
 	struct iscsi_tcp_conn *tcp_conn;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn;
 	struct sockaddr_in6 addr;
-	int rc, len;
+	int rc;
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_IPADDRESS:
@@ -793,9 +793,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
 		}
 
 		rc = kernel_getsockname(tcp_sw_conn->sock,
-					(struct sockaddr *)&addr, &len);
+					(struct sockaddr *)&addr);
 		spin_unlock_bh(&session->frwd_lock);
-		if (rc)
+		if (rc < 0)
 			return rc;
 
 		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
diff --git a/drivers/soc/qcom/qmi_interface.c b/drivers/soc/qcom/qmi_interface.c
index 877611d5c42b..321982277697 100644
--- a/drivers/soc/qcom/qmi_interface.c
+++ b/drivers/soc/qcom/qmi_interface.c
@@ -586,7 +586,6 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
 				      struct sockaddr_qrtr *sq)
 {
 	struct socket *sock;
-	int sl = sizeof(*sq);
 	int ret;
 
 	ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
@@ -594,7 +593,7 @@ static struct socket *qmi_sock_create(struct qmi_handle *qmi,
 	if (ret < 0)
 		return ERR_PTR(ret);
 
-	ret = kernel_getsockname(sock, (struct sockaddr *)sq, &sl);
+	ret = kernel_getsockname(sock, (struct sockaddr *)sq);
 	if (ret < 0) {
 		sock_release(sock);
 		return ERR_PTR(ret);
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index d21a9d128d3e..5703dd176787 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1577,7 +1577,7 @@ out:
 
 
 static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct ipx_address *addr;
 	struct sockaddr_ipx sipx;
@@ -1585,8 +1585,6 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct ipx_sock *ipxs = ipx_sk(sk);
 	int rc;
 
-	*uaddr_len = sizeof(struct sockaddr_ipx);
-
 	lock_sock(sk);
 	if (peer) {
 		rc = -ENOTCONN;
@@ -1620,7 +1618,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
 	sipx.sipx_zero	 = 0;
 	memcpy(uaddr, &sipx, sizeof(sipx));
 
-	rc = 0;
+	rc = sizeof(struct sockaddr_ipx);
 out:
 	release_sock(sk);
 	return rc;
diff --git a/drivers/staging/irda/net/af_irda.c b/drivers/staging/irda/net/af_irda.c
index 2f1e9ab3d6d0..c13553a9ee11 100644
--- a/drivers/staging/irda/net/af_irda.c
+++ b/drivers/staging/irda/net/af_irda.c
@@ -697,7 +697,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
  *
  */
 static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct sockaddr_irda saddr;
 	struct sock *sk = sock->sk;
@@ -720,11 +720,9 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
 	pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel);
 	pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr);
 
-	/* uaddr_len come to us uninitialised */
-	*uaddr_len = sizeof (struct sockaddr_irda);
-	memcpy(uaddr, &saddr, *uaddr_len);
+	memcpy(uaddr, &saddr, sizeof (struct sockaddr_irda));
 
-	return 0;
+	return sizeof (struct sockaddr_irda);
 }
 
 /*
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index ce93806eefca..1bee667802b0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -448,14 +448,13 @@ int
 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
 {
 	struct sockaddr_in sin;
-	int len = sizeof(sin);
 	int rc;
 
 	if (remote)
-		rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
+		rc = kernel_getpeername(sock, (struct sockaddr *)&sin);
 	else
-		rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
-	if (rc) {
+		rc = kernel_getsockname(sock, (struct sockaddr *)&sin);
+	if (rc < 0) {
 		CERROR("Error %d getting sock %s IP/port\n",
 		       rc, remote ? "peer" : "local");
 		return rc;
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 64c5a57b92e4..99501785cdc1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1020,7 +1020,7 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 	struct socket *new_sock, *sock = np->np_socket;
 	struct sockaddr_in sock_in;
 	struct sockaddr_in6 sock_in6;
-	int rc, err;
+	int rc;
 
 	rc = kernel_accept(sock, &new_sock, 0);
 	if (rc < 0)
@@ -1033,8 +1033,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 		memset(&sock_in6, 0, sizeof(struct sockaddr_in6));
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in6, &err, 1);
-		if (!rc) {
+				(struct sockaddr *)&sock_in6, 1);
+		if (rc >= 0) {
 			if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
 				memcpy(&conn->login_sockaddr, &sock_in6, sizeof(sock_in6));
 			} else {
@@ -1047,8 +1047,8 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 		}
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in6, &err, 0);
-		if (!rc) {
+				(struct sockaddr *)&sock_in6, 0);
+		if (rc >= 0) {
 			if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr)) {
 				memcpy(&conn->local_sockaddr, &sock_in6, sizeof(sock_in6));
 			} else {
@@ -1063,13 +1063,13 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 		memset(&sock_in, 0, sizeof(struct sockaddr_in));
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in, &err, 1);
-		if (!rc)
+				(struct sockaddr *)&sock_in, 1);
+		if (rc >= 0)
 			memcpy(&conn->login_sockaddr, &sock_in, sizeof(sock_in));
 
 		rc = conn->sock->ops->getname(conn->sock,
-				(struct sockaddr *)&sock_in, &err, 0);
-		if (!rc)
+				(struct sockaddr *)&sock_in, 0);
+		if (rc >= 0)
 			memcpy(&conn->local_sockaddr, &sock_in, sizeof(sock_in));
 	}
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 610cba276d47..b5fb56b822fd 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1038,7 +1038,7 @@ static struct socket *get_raw_socket(int fd)
 		struct sockaddr_ll sa;
 		char  buf[MAX_ADDR_LEN];
 	} uaddr;
-	int uaddr_len = sizeof uaddr, r;
+	int r;
 	struct socket *sock = sockfd_lookup(fd, &r);
 
 	if (!sock)
@@ -1050,9 +1050,8 @@ static struct socket *get_raw_socket(int fd)
 		goto err;
 	}
 
-	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
-			       &uaddr_len, 0);
-	if (r)
+	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
+	if (r < 0)
 		goto err;
 
 	if (uaddr.sa.sll_family != AF_PACKET) {
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cff79ea0c01d..5243989a60cc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -482,7 +482,6 @@ static void lowcomms_error_report(struct sock *sk)
 {
 	struct connection *con;
 	struct sockaddr_storage saddr;
-	int buflen;
 	void (*orig_report)(struct sock *) = NULL;
 
 	read_lock_bh(&sk->sk_callback_lock);
@@ -492,7 +491,7 @@ static void lowcomms_error_report(struct sock *sk)
 
 	orig_report = listen_sock.sk_error_report;
 	if (con->sock == NULL ||
-	    kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
+	    kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) {
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
 				   "sending to node %d, port %d, "
 				   "sk_err=%d/%d\n", dlm_our_nodeid(),
@@ -757,8 +756,8 @@ static int tcp_accept_from_sock(struct connection *con)
 
 	/* Get the connected socket's peer */
 	memset(&peeraddr, 0, sizeof(peeraddr));
-	if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
-				  &len, 2)) {
+	len = newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, 2);
+	if (len < 0) {
 		result = -ECONNABORTED;
 		goto accept_err;
 	}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eac5140aac47..e5076185cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1819,7 +1819,7 @@ int o2net_register_hb_callbacks(void)
 
 static int o2net_accept_one(struct socket *sock, int *more)
 {
-	int ret, slen;
+	int ret;
 	struct sockaddr_in sin;
 	struct socket *new_sock = NULL;
 	struct o2nm_node *node = NULL;
@@ -1864,9 +1864,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 		goto out;
 	}
 
-	slen = sizeof(sin);
-	ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
-				       &slen, 1);
+	ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
 	if (ret < 0)
 		goto out;
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 91216b16feb7..000d1aada74f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
 				      struct socket *newsock, int flags, bool kern);
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
-				      int *sockaddr_len, int peer);
+				      int peer);
 	__poll_t	(*poll)	     (struct file *file, struct socket *sock,
 				      struct poll_table_struct *wait);
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
@@ -294,10 +294,8 @@ int kernel_listen(struct socket *sock, int backlog);
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags);
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-		       int *addrlen);
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-		       int *addrlen);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
 int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
 		      int *optlen);
 int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 5a54c9570977..500f81375200 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,7 +32,7 @@ int inet_shutdown(struct socket *sock, int how);
 int inet_listen(struct socket *sock, int backlog);
 void inet_sock_destruct(struct sock *sk);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		 int peer);
 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int inet_ctl_sock_create(struct sock **sk, unsigned short family,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8606c9113d3f..7a98cd583c73 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1056,7 +1056,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
 
 int inet6_release(struct socket *sock);
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
-int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len,
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int peer);
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 169c92afcafa..3aa7b7d6e6c7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1584,7 +1584,7 @@ int sock_no_bind(struct socket *, struct sockaddr *, int);
 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
 int sock_no_accept(struct socket *, struct socket *, int, bool);
-int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
+int sock_no_getname(struct socket *, struct sockaddr *, int);
 __poll_t sock_no_poll(struct file *, struct socket *,
 			  struct poll_table_struct *);
 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 03a9fc0771c0..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1238,7 +1238,7 @@ out:
  * fields into the sockaddr.
  */
 static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
-			 int *uaddr_len, int peer)
+			 int peer)
 {
 	struct sockaddr_at sat;
 	struct sock *sk = sock->sk;
@@ -1251,7 +1251,6 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
 		if (atalk_autobind(sk) < 0)
 			goto out;
 
-	*uaddr_len = sizeof(struct sockaddr_at);
 	memset(&sat, 0, sizeof(sat));
 
 	if (peer) {
@@ -1268,9 +1267,9 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
 		sat.sat_port	    = at->src_port;
 	}
 
-	err = 0;
 	sat.sat_family = AF_APPLETALK;
 	memcpy(uaddr, &sat, sizeof(sat));
+	err = sizeof(struct sockaddr_at);
 
 out:
 	release_sock(sk);
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1140b3bdcaa..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -87,21 +87,20 @@ static int pvc_getsockopt(struct socket *sock, int level, int optname,
 }
 
 static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
-		       int *sockaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_atmpvc *addr;
 	struct atm_vcc *vcc = ATM_SD(sock);
 
 	if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
 		return -ENOTCONN;
-	*sockaddr_len = sizeof(struct sockaddr_atmpvc);
 	addr = (struct sockaddr_atmpvc *)sockaddr;
 	memset(addr, 0, sizeof(*addr));
 	addr->sap_family = AF_ATMPVC;
 	addr->sap_addr.itf = vcc->dev->number;
 	addr->sap_addr.vpi = vcc->vpi;
 	addr->sap_addr.vci = vcc->vci;
-	return 0;
+	return sizeof(struct sockaddr_atmpvc);
 }
 
 static const struct proto_ops pvc_proto_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index c458adcbc177..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -419,15 +419,14 @@ out:
 }
 
 static int svc_getname(struct socket *sock, struct sockaddr *sockaddr,
-		       int *sockaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_atmsvc *addr;
 
-	*sockaddr_len = sizeof(struct sockaddr_atmsvc);
 	addr = (struct sockaddr_atmsvc *) sockaddr;
 	memcpy(addr, peer ? &ATM_SD(sock)->remote : &ATM_SD(sock)->local,
 	       sizeof(struct sockaddr_atmsvc));
-	return 0;
+	return sizeof(struct sockaddr_atmsvc);
 }
 
 int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fdd399626b..c8319ed48485 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1388,7 +1388,7 @@ out:
 }
 
 static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *uaddr_len, int peer)
+	int peer)
 {
 	struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -1427,7 +1427,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
 			fsa->fsa_digipeater[0] = null_ax25_address;
 		}
 	}
-	*uaddr_len = sizeof (struct full_sockaddr_ax25);
+	err = sizeof (struct full_sockaddr_ax25);
 
 out:
 	release_sock(sk);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 923e9a271872..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1340,7 +1340,7 @@ done:
 }
 
 static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
-			    int *addr_len, int peer)
+			    int peer)
 {
 	struct sockaddr_hci *haddr = (struct sockaddr_hci *)addr;
 	struct sock *sk = sock->sk;
@@ -1360,10 +1360,10 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
 		goto done;
 	}
 
-	*addr_len = sizeof(*haddr);
 	haddr->hci_family = AF_BLUETOOTH;
 	haddr->hci_dev    = hdev->id;
 	haddr->hci_channel= hci_pi(sk)->channel;
+	err = sizeof(*haddr);
 
 done:
 	release_sock(sk);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 67a8642f57ea..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -358,7 +358,7 @@ done:
 }
 
 static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
-			      int *len, int peer)
+			      int peer)
 {
 	struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
 	struct sock *sk = sock->sk;
@@ -373,7 +373,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
 
 	memset(la, 0, sizeof(struct sockaddr_l2));
 	addr->sa_family = AF_BLUETOOTH;
-	*len = sizeof(struct sockaddr_l2);
 
 	la->l2_psm = chan->psm;
 
@@ -387,7 +386,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
 		la->l2_bdaddr_type = chan->src_type;
 	}
 
-	return 0;
+	return sizeof(struct sockaddr_l2);
 }
 
 static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1aaccf637479..93a3b219db09 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -533,7 +533,7 @@ done:
 	return err;
 }
 
-static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer)
 {
 	struct sockaddr_rc *sa = (struct sockaddr_rc *) addr;
 	struct sock *sk = sock->sk;
@@ -552,8 +552,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
 	else
 		bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src);
 
-	*len = sizeof(struct sockaddr_rc);
-	return 0;
+	return sizeof(struct sockaddr_rc);
 }
 
 static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 08df57665e1f..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -680,7 +680,7 @@ done:
 }
 
 static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
-			    int *len, int peer)
+			    int peer)
 {
 	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
 	struct sock *sk = sock->sk;
@@ -688,14 +688,13 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
 	BT_DBG("sock %p, sk %p", sock, sk);
 
 	addr->sa_family = AF_BLUETOOTH;
-	*len = sizeof(struct sockaddr_sco);
 
 	if (peer)
 		bacpy(&sa->sco_bdaddr, &sco_pi(sk)->dst);
 	else
 		bacpy(&sa->sco_bdaddr, &sco_pi(sk)->src);
 
-	return 0;
+	return sizeof(struct sockaddr_sco);
 }
 
 static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/can/raw.c b/net/can/raw.c
index f2ecc43376a1..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -470,7 +470,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 }
 
 static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
-		       int *len, int peer)
+		       int peer)
 {
 	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
 	struct sock *sk = sock->sk;
@@ -483,9 +483,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
 	addr->can_family  = AF_CAN;
 	addr->can_ifindex = ro->ifindex;
 
-	*len = sizeof(*addr);
-
-	return 0;
+	return sizeof(*addr);
 }
 
 static int raw_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/core/sock.c b/net/core/sock.c
index c501499a04fe..04e5e27c9b81 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1274,7 +1274,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	{
 		char address[128];
 
-		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+		lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
+		if (lv < 0)
 			return -ENOTCONN;
 		if (lv < len)
 			return -EINVAL;
@@ -2497,7 +2498,7 @@ int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
 EXPORT_SYMBOL(sock_no_accept);
 
 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
-		    int *len, int peer)
+		    int peer)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 91dd09f79808..45cb5bea884b 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1180,14 +1180,12 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
 }
 
 
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len,int peer)
+static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
 {
 	struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
 
-	*uaddr_len = sizeof(struct sockaddr_dn);
-
 	lock_sock(sk);
 
 	if (peer) {
@@ -1205,7 +1203,7 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int *uaddr_len
 
 	release_sock(sk);
 
-	return 0;
+	return sizeof(struct sockaddr_dn);
 }
 
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e4329e161943..f98e2f0db841 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -723,7 +723,7 @@ EXPORT_SYMBOL(inet_accept);
  *	This does both peername and sockname.
  */
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
@@ -745,8 +745,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sin->sin_addr.s_addr = addr;
 	}
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	*uaddr_len = sizeof(*sin);
-	return 0;
+	return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet_getname);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..c1e292db04db 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -470,7 +470,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
  */
 
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
-		 int *uaddr_len, int peer)
+		 int peer)
 {
 	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -500,8 +500,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	}
 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
 						 sk->sk_bound_dev_if);
-	*uaddr_len = sizeof(*sin);
-	return 0;
+	return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet6_getname);
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e8cc7bcbca3..81ce15ffb878 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -989,14 +989,13 @@ done:
 }
 
 static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
-			     int *len, int peer)
+			     int peer)
 {
 	struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
 
 	addr->sa_family = AF_IUCV;
-	*len = sizeof(struct sockaddr_iucv);
 
 	if (peer) {
 		memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
@@ -1009,7 +1008,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
 	memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
 	memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
 
-	return 0;
+	return sizeof(struct sockaddr_iucv);
 }
 
 /**
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index ff61124fdf59..4614585e1720 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -349,7 +349,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
 }
 
 static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
-			   int *uaddr_len, int peer)
+			   int peer)
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
@@ -370,8 +370,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
 		lsa->l2tp_conn_id = lsk->conn_id;
 		lsa->l2tp_addr.s_addr = addr;
 	}
-	*uaddr_len = sizeof(*lsa);
-	return 0;
+	return sizeof(*lsa);
 }
 
 static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 192344688c06..efea58b66295 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -421,7 +421,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
 }
 
 static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *uaddr_len, int peer)
+			    int peer)
 {
 	struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -449,8 +449,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
 	}
 	if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
 		lsa->l2tp_scope_id = sk->sk_bound_dev_if;
-	*uaddr_len = sizeof(*lsa);
-	return 0;
+	return sizeof(*lsa);
 }
 
 static int l2tp_ip6_backlog_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 59f246d7b290..99a03c72db4f 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -870,7 +870,7 @@ err:
 /* getname() support.
  */
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *usockaddr_len, int peer)
+			    int peer)
 {
 	int len = 0;
 	int error = 0;
@@ -969,8 +969,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		memcpy(uaddr, &sp, len);
 	}
 
-	*usockaddr_len = len;
-	error = 0;
+	error = len;
 
 	sock_put(sk);
 end:
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c38d16f22d2a..01dcc0823d1f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -971,7 +971,7 @@ release:
  *	Return the address information of a socket.
  */
 static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
-			  int *uaddrlen, int peer)
+			  int peer)
 {
 	struct sockaddr_llc sllc;
 	struct sock *sk = sock->sk;
@@ -982,7 +982,6 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
 	lock_sock(sk);
 	if (sock_flag(sk, SOCK_ZAPPED))
 		goto out;
-	*uaddrlen = sizeof(sllc);
 	if (peer) {
 		rc = -ENOTCONN;
 		if (sk->sk_state != TCP_ESTABLISHED)
@@ -1003,9 +1002,9 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
 			       IFHWADDRLEN);
 		}
 	}
-	rc = 0;
 	sllc.sllc_family = AF_LLC;
 	memcpy(uaddr, &sllc, sizeof(sllc));
+	rc = sizeof(sllc);
 out:
 	release_sock(sk);
 	return rc;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2ad445c1d27c..3c8af14330b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1105,7 +1105,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 }
 
 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
-			   int *addr_len, int peer)
+			   int peer)
 {
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -1113,7 +1113,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 
 	nladdr->nl_family = AF_NETLINK;
 	nladdr->nl_pad = 0;
-	*addr_len = sizeof(*nladdr);
 
 	if (peer) {
 		nladdr->nl_pid = nlk->dst_portid;
@@ -1124,7 +1123,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 		netlink_unlock_table();
 	}
-	return 0;
+	return sizeof(*nladdr);
 }
 
 static int netlink_ioctl(struct socket *sock, unsigned int cmd,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 9ba30c63be3d..35bb6807927f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -829,11 +829,12 @@ out_release:
 }
 
 static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *uaddr_len, int peer)
+	int peer)
 {
 	struct full_sockaddr_ax25 *sax = (struct full_sockaddr_ax25 *)uaddr;
 	struct sock *sk = sock->sk;
 	struct nr_sock *nr = nr_sk(sk);
+	int uaddr_len;
 
 	memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
 
@@ -848,16 +849,16 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
 		sax->fsa_ax25.sax25_call   = nr->user_addr;
 		memset(sax->fsa_digipeater, 0, sizeof(sax->fsa_digipeater));
 		sax->fsa_digipeater[0]     = nr->dest_addr;
-		*uaddr_len = sizeof(struct full_sockaddr_ax25);
+		uaddr_len = sizeof(struct full_sockaddr_ax25);
 	} else {
 		sax->fsa_ax25.sax25_family = AF_NETROM;
 		sax->fsa_ax25.sax25_ndigis = 0;
 		sax->fsa_ax25.sax25_call   = nr->source_addr;
-		*uaddr_len = sizeof(struct sockaddr_ax25);
+		uaddr_len = sizeof(struct sockaddr_ax25);
 	}
 	release_sock(sk);
 
-	return 0;
+	return uaddr_len;
 }
 
 int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 376040092142..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -497,7 +497,7 @@ error:
 }
 
 static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
-			     int *len, int peer)
+			     int peer)
 {
 	struct sock *sk = sock->sk;
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -510,7 +510,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
 		 llcp_sock->dsap, llcp_sock->ssap);
 
 	memset(llcp_addr, 0, sizeof(*llcp_addr));
-	*len = sizeof(struct sockaddr_nfc_llcp);
 
 	lock_sock(sk);
 	if (!llcp_sock->dev) {
@@ -528,7 +527,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
 	       llcp_addr->service_name_len);
 	release_sock(sk);
 
-	return 0;
+	return sizeof(struct sockaddr_nfc_llcp);
 }
 
 static inline __poll_t llcp_accept_poll(struct sock *parent)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e0f3f4aeeb4f..616cb9c18f88 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3409,7 +3409,7 @@ out:
 }
 
 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
-			       int *uaddr_len, int peer)
+			       int peer)
 {
 	struct net_device *dev;
 	struct sock *sk	= sock->sk;
@@ -3424,13 +3424,12 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
 	if (dev)
 		strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
 	rcu_read_unlock();
-	*uaddr_len = sizeof(*uaddr);
 
-	return 0;
+	return sizeof(*uaddr);
 }
 
 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
-			  int *uaddr_len, int peer)
+			  int peer)
 {
 	struct net_device *dev;
 	struct sock *sk = sock->sk;
@@ -3455,9 +3454,8 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sll->sll_halen = 0;
 	}
 	rcu_read_unlock();
-	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 
-	return 0;
+	return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 }
 
 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index fffcd69f63ff..f9b40e6a18a5 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -326,7 +326,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 }
 
 static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
-				int *sockaddr_len, int peer)
+				int peer)
 {
 	struct sock *sk = sock->sk;
 	struct pn_sock *pn = pn_sk(sk);
@@ -337,8 +337,7 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
 		pn_sockaddr_set_object((struct sockaddr_pn *)addr,
 					pn->sobject);
 
-	*sockaddr_len = sizeof(struct sockaddr_pn);
-	return 0;
+	return sizeof(struct sockaddr_pn);
 }
 
 static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5fb3929e3d7d..b33e5aeb4c06 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -893,7 +893,7 @@ static int qrtr_connect(struct socket *sock, struct sockaddr *saddr,
 }
 
 static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
-			int *len, int peer)
+			int peer)
 {
 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
 	struct sockaddr_qrtr qaddr;
@@ -912,12 +912,11 @@ static int qrtr_getname(struct socket *sock, struct sockaddr *saddr,
 	}
 	release_sock(sk);
 
-	*len = sizeof(qaddr);
 	qaddr.sq_family = AF_QIPCRTR;
 
 	memcpy(saddr, &qaddr, sizeof(qaddr));
 
-	return 0;
+	return sizeof(qaddr);
 }
 
 static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 744c637c86b0..0a8eefd256b3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -110,7 +110,7 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
 }
 
 static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
-		       int *uaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
@@ -131,8 +131,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	sin->sin_family = AF_INET;
 
-	*uaddr_len = sizeof(*sin);
-	return 0;
+	return sizeof(*sin);
 }
 
 /*
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 44c4652721af..08230a145042 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -227,7 +227,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 	struct rds_tcp_connection *tc;
 	unsigned long flags;
 	struct sockaddr_in sin;
-	int sinlen;
 	struct socket *sock;
 
 	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
@@ -239,12 +238,10 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 
 		sock = tc->t_sock;
 		if (sock) {
-			sock->ops->getname(sock, (struct sockaddr *)&sin,
-					   &sinlen, 0);
+			sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
 			tsinfo.local_addr = sin.sin_addr.s_addr;
 			tsinfo.local_port = sin.sin_port;
-			sock->ops->getname(sock, (struct sockaddr *)&sin,
-					   &sinlen, 1);
+			sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
 			tsinfo.peer_addr = sin.sin_addr.s_addr;
 			tsinfo.peer_port = sin.sin_port;
 		}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 083bd251406f..5170373b797c 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -938,7 +938,7 @@ out_release:
 }
 
 static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
-	int *uaddr_len, int peer)
+	int peer)
 {
 	struct full_sockaddr_rose *srose = (struct full_sockaddr_rose *)uaddr;
 	struct sock *sk = sock->sk;
@@ -964,8 +964,7 @@ static int rose_getname(struct socket *sock, struct sockaddr *uaddr,
 			srose->srose_digis[n] = rose->source_digis[n];
 	}
 
-	*uaddr_len = sizeof(struct full_sockaddr_rose);
-	return 0;
+	return sizeof(struct full_sockaddr_rose);
 }
 
 int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct rose_neigh *neigh, unsigned int lci)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e35d4f73d2df..0d873c58e516 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -952,16 +952,16 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 
 /* Handle SCTP_I_WANT_MAPPED_V4_ADDR for getpeername() and getsockname() */
 static int sctp_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	int rc;
 
-	rc = inet6_getname(sock, uaddr, uaddr_len, peer);
+	rc = inet6_getname(sock, uaddr, peer);
 
-	if (rc != 0)
+	if (rc < 0)
 		return rc;
 
-	*uaddr_len = sctp_v6_addr_to_user(sctp_sk(sock->sk),
+	rc = sctp_v6_addr_to_user(sctp_sk(sock->sk),
 					  (union sctp_addr *)uaddr);
 
 	return rc;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index da1a5cdefd13..38ae22b65e77 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -281,7 +281,6 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 	struct in_device *in_dev;
 	struct sockaddr_in addr;
 	int rc = -ENOENT;
-	int len;
 
 	if (!dst) {
 		rc = -ENOTCONN;
@@ -293,7 +292,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
 	}
 
 	/* get address to which the internal TCP socket is bound */
-	kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
+	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
 	/* analyze IPv4 specific data of net_device belonging to TCP socket */
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dst->dev);
@@ -771,7 +770,7 @@ static void smc_listen_work(struct work_struct *work)
 	u8 buf[SMC_CLC_MAX_LEN];
 	struct smc_link *link;
 	int reason_code = 0;
-	int rc = 0, len;
+	int rc = 0;
 	__be32 subnet;
 	u8 prefix_len;
 	u8 ibport;
@@ -824,7 +823,7 @@ static void smc_listen_work(struct work_struct *work)
 	}
 
 	/* get address of the peer connected to the internal TCP socket */
-	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
+	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
 
 	/* allocate connection / link group */
 	mutex_lock(&smc_create_lgr_pending);
@@ -1075,7 +1074,7 @@ out:
 }
 
 static int smc_getname(struct socket *sock, struct sockaddr *addr,
-		       int *len, int peer)
+		       int peer)
 {
 	struct smc_sock *smc;
 
@@ -1085,7 +1084,7 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
 
 	smc = smc_sk(sock->sk);
 
-	return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
+	return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
 }
 
 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
diff --git a/net/socket.c b/net/socket.c
index a93c99b518ca..fac8246a8ae8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1573,8 +1573,9 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out_fd;
 
 	if (upeer_sockaddr) {
-		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
-					  &len, 2) < 0) {
+		len = newsock->ops->getname(newsock,
+					(struct sockaddr *)&address, 2);
+		if (len < 0) {
 			err = -ECONNABORTED;
 			goto out_fd;
 		}
@@ -1654,7 +1655,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 {
 	struct socket *sock;
 	struct sockaddr_storage address;
-	int len, err, fput_needed;
+	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (!sock)
@@ -1664,10 +1665,11 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
 	if (err)
 		goto out_put;
 
-	err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
-	if (err)
+	err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
+	if (err < 0)
 		goto out_put;
-	err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
+        /* "err" is actually length in this case */
+	err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
 
 out_put:
 	fput_light(sock->file, fput_needed);
@@ -1685,7 +1687,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 {
 	struct socket *sock;
 	struct sockaddr_storage address;
-	int len, err, fput_needed;
+	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
 	if (sock != NULL) {
@@ -1695,11 +1697,10 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 			return err;
 		}
 
-		err =
-		    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
-				       1);
-		if (!err)
-			err = move_addr_to_user(&address, len, usockaddr,
+		err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
+		if (err >= 0)
+			/* "err" is actually length in this case */
+			err = move_addr_to_user(&address, err, usockaddr,
 						usockaddr_len);
 		fput_light(sock->file, fput_needed);
 	}
@@ -3166,17 +3167,15 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 }
 EXPORT_SYMBOL(kernel_connect);
 
-int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-			 int *addrlen)
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
 {
-	return sock->ops->getname(sock, addr, addrlen, 0);
+	return sock->ops->getname(sock, addr, 0);
 }
 EXPORT_SYMBOL(kernel_getsockname);
 
-int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-			 int *addrlen)
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 {
-	return sock->ops->getname(sock, addr, addrlen, 1);
+	return sock->ops->getname(sock, addr, 1);
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6e432ecd7f99..806395687bb6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
  * negative errno is returned.
  */
 static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
-			struct sockaddr *buf, int buflen)
+			struct sockaddr *buf)
 {
 	struct socket *sock;
 	int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
 		goto out_release;
 	}
 
-	err = kernel_getsockname(sock, buf, &buflen);
+	err = kernel_getsockname(sock, buf);
 	if (err < 0) {
 		dprintk("RPC:       getsockname failed (%d)\n", err);
 		goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
 	rcu_read_unlock();
 
 	rpc_set_port(sap, 0);
-	err = rpc_sockname(net, sap, salen, buf, buflen);
+	err = rpc_sockname(net, sap, salen, buf);
 	put_net(net);
 	if (err != 0)
 		/* Couldn't discover local address, return ANYADDR */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 943f2a745cd5..08cd951aaeea 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -832,12 +832,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	}
 	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
 
-	err = kernel_getpeername(newsock, sin, &slen);
+	err = kernel_getpeername(newsock, sin);
 	if (err < 0) {
 		net_warn_ratelimited("%s: peername failed (err %d)!\n",
 				     serv->sv_name, -err);
 		goto failed;		/* aborted connection or whatever */
 	}
+	slen = err;
 
 	/* Ideally, we would want to reject connections from unauthorized
 	 * hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +867,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	if (IS_ERR(newsvsk))
 		goto failed;
 	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
-	err = kernel_getsockname(newsock, sin, &slen);
+	err = kernel_getsockname(newsock, sin);
+	slen = err;
 	if (unlikely(err < 0)) {
 		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
 		slen = offsetof(struct sockaddr, sa_data);
@@ -1465,7 +1467,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
 		err = PTR_ERR(svsk);
 		goto out;
 	}
-	if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+	salen = kernel_getsockname(svsk->sk_sock, sin);
+	if (salen >= 0)
 		svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
 	svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
 	return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1542,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
 	if (error < 0)
 		goto bummer;
 
-	newlen = len;
-	error = kernel_getsockname(sock, newsin, &newlen);
+	error = kernel_getsockname(sock, newsin);
 	if (error < 0)
 		goto bummer;
+	newlen = error;
 
 	if (protocol == IPPROTO_TCP) {
 		if ((error = kernel_listen(sock, 64)) < 0)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a6b8c1f8f92a..956e29c1438d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1794,10 +1794,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
 static unsigned short xs_sock_getport(struct socket *sock)
 {
 	struct sockaddr_storage buf;
-	int buflen;
 	unsigned short port = 0;
 
-	if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+	if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
 		goto out;
 	switch (buf.ss_family) {
 	case AF_INET6:
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b0323ec7971e..f93477187a90 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -665,7 +665,7 @@ exit:
  *       a completely predictable manner).
  */
 static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
+			int peer)
 {
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 	struct sock *sk = sock->sk;
@@ -684,13 +684,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.node = tn->own_addr;
 	}
 
-	*uaddr_len = sizeof(*addr);
 	addr->addrtype = TIPC_ADDR_ID;
 	addr->family = AF_TIPC;
 	addr->scope = 0;
 	addr->addr.name.domain = 0;
 
-	return 0;
+	return sizeof(*addr);
 }
 
 /**
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d545e1d0dea2..723698416242 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -637,7 +637,7 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
 			       int addr_len, int flags);
 static int unix_socketpair(struct socket *, struct socket *);
 static int unix_accept(struct socket *, struct socket *, int, bool);
-static int unix_getname(struct socket *, struct sockaddr *, int *, int);
+static int unix_getname(struct socket *, struct sockaddr *, int);
 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 				    poll_table *);
@@ -1453,7 +1453,7 @@ out:
 }
 
 
-static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
+static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 {
 	struct sock *sk = sock->sk;
 	struct unix_sock *u;
@@ -1476,12 +1476,12 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
 	if (!u->addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
-		*uaddr_len = sizeof(short);
+		err = sizeof(short);
 	} else {
 		struct unix_address *addr = u->addr;
 
-		*uaddr_len = addr->len;
-		memcpy(sunaddr, addr->name, *uaddr_len);
+		err = addr->len;
+		memcpy(sunaddr, addr->name, addr->len);
 	}
 	unix_state_unlock(sk);
 	sock_put(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index e0fc84daed94..aac9b8f6552e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -759,7 +759,7 @@ vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 }
 
 static int vsock_getname(struct socket *sock,
-			 struct sockaddr *addr, int *addr_len, int peer)
+			 struct sockaddr *addr, int peer)
 {
 	int err;
 	struct sock *sk;
@@ -794,7 +794,7 @@ static int vsock_getname(struct socket *sock,
 	 */
 	BUILD_BUG_ON(sizeof(*vm_addr) > 128);
 	memcpy(addr, vm_addr, sizeof(*vm_addr));
-	*addr_len = sizeof(*vm_addr);
+	err = sizeof(*vm_addr);
 
 out:
 	release_sock(sk);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 562cc11131f6..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -896,7 +896,7 @@ out:
 }
 
 static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
-		       int *uaddr_len, int peer)
+		       int peer)
 {
 	struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -913,7 +913,7 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
 		sx25->sx25_addr = x25->source_addr;
 
 	sx25->sx25_family = AF_X25;
-	*uaddr_len = sizeof(*sx25);
+	rc = sizeof(*sx25);
 
 out:
 	return rc;
diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c
index cd6932e5225c..9094f4b3b367 100644
--- a/security/tomoyo/network.c
+++ b/security/tomoyo/network.c
@@ -655,10 +655,11 @@ int tomoyo_socket_listen_permission(struct socket *sock)
 		return 0;
 	{
 		const int error = sock->ops->getname(sock, (struct sockaddr *)
-						     &addr, &addr_len, 0);
+						     &addr, 0);
 
-		if (error)
+		if (error < 0)
 			return error;
+		addr_len = error;
 	}
 	address.protocol = type;
 	address.operation = TOMOYO_NETWORK_LISTEN;

From 92418fb14750c2baeebddf5903e3105cd11da90c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:51:08 -0500
Subject: [PATCH 0031/1678] i40e/i40evf: Use usec value instead of reg value
 for ITR defines

Instead of using the register value for the defines when setting up the
ring ITR we can just use the actual values and avoid the use of shifts and
macros to translate between the values we have and the values we want.

This helps to make the code more readable as we can quickly translate from
one value to the other.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 12 ++---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 11 ++++-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   | 45 ++++++++++---------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 11 ++++-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 44 +++++++++---------
 .../ethernet/intel/i40evf/i40evf_ethtool.c    | 12 ++---
 6 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 8cc9198ac32f..3647af8fe32e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2315,8 +2315,8 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 
 	intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
 
-	rx_ring->itr_setting = ec->rx_coalesce_usecs;
-	tx_ring->itr_setting = ec->tx_coalesce_usecs;
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
 
 	if (ec->use_adaptive_rx_coalesce)
 		rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
@@ -2396,7 +2396,7 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 	}
 
-	if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+	if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
 		netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
@@ -2407,16 +2407,16 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 	}
 
-	if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+	if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
 		netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
 
 	if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
-		ec->rx_coalesce_usecs = I40E_MIN_ITR << 1;
+		ec->rx_coalesce_usecs = I40E_MIN_ITR;
 
 	if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
-		ec->tx_coalesce_usecs = I40E_MIN_ITR << 1;
+		ec->tx_coalesce_usecs = I40E_MIN_ITR;
 
 	intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
 	vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ac1fa9e3c04f..ade3e17fba6c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2277,7 +2277,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	return failure ? budget : (int)total_rx_packets;
 }
 
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 {
 	u32 val;
 
@@ -2290,10 +2290,17 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
 	 * an event in the PBA anyway so we need to rely on the automask
 	 * to hold pending events for us until the interrupt is re-enabled
+	 *
+	 * The itr value is reported in microseconds, and the register
+	 * value is recorded in 2 microsecond units. For this reason we
+	 * only need to shift by the interval shift - 1 instead of the
+	 * full value.
 	 */
+	itr &= I40E_ITR_MASK;
+
 	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
-	      (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+	      (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
 
 	return val;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 1758dd3bf91b..0f8751c2e595 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -30,32 +30,37 @@
 #include <net/xdp.h>
 
 /* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR               0x0001  /* reg uses 2 usec resolution */
-#define I40E_ITR_100K              0x0005
-#define I40E_ITR_50K               0x000A
-#define I40E_ITR_20K               0x0019
-#define I40E_ITR_18K               0x001B
-#define I40E_ITR_8K                0x003E
-#define I40E_ITR_4K                0x007A
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
-#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
 #define I40E_DEFAULT_IRQ_WORK      256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define I40E_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define I40E_MIN_ITR		     2	/* reg uses 2 usec resolution */
+#define I40E_ITR_100K		    10	/* all values below must be even */
+#define I40E_ITR_50K		    20
+#define I40E_ITR_20K		    50
+#define I40E_ITR_18K		    60
+#define I40E_ITR_8K		   122
+#define I40E_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
 /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
  * the value of the rate limit is non-zero
  */
 #define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+
 /**
  * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
  * @intrl: interrupt rate limit to convert
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 34d898f0adaa..c5f8e941f53e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1460,7 +1460,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	return failure ? budget : (int)total_rx_packets;
 }
 
-static u32 i40e_buildreg_itr(const int type, const u16 itr)
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 {
 	u32 val;
 
@@ -1473,10 +1473,17 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
 	 * an event in the PBA anyway so we need to rely on the automask
 	 * to hold pending events for us until the interrupt is re-enabled
+	 *
+	 * The itr value is reported in microseconds, and the register
+	 * value is recorded in 2 microsecond units. For this reason we
+	 * only need to shift by the interval shift - 1 instead of the
+	 * full value.
 	 */
+	itr &= I40E_ITR_MASK;
+
 	val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
 	      (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
-	      (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
+	      (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
 
 	return val;
 }
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 038ed0e2acb7..a4f29722ceff 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -28,31 +28,35 @@
 #define _I40E_TXRX_H_
 
 /* Interrupt Throttling and Rate Limiting Goodies */
-
-#define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
-#define I40E_MIN_ITR               0x0001  /* reg uses 2 usec resolution */
-#define I40E_ITR_100K              0x0005
-#define I40E_ITR_50K               0x000A
-#define I40E_ITR_20K               0x0019
-#define I40E_ITR_18K               0x001B
-#define I40E_ITR_8K                0x003E
-#define I40E_ITR_4K                0x007A
-#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_TX_DEF            (ITR_REG_TO_USEC(I40E_ITR_20K) | \
-				    I40E_ITR_DYNAMIC)
-#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
-#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
-#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
 #define I40E_DEFAULT_IRQ_WORK      256
-#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
-#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
-#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define I40E_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define I40E_MIN_ITR		     2	/* reg uses 2 usec resolution */
+#define I40E_ITR_100K		    10	/* all values below must be even */
+#define I40E_ITR_50K		    20
+#define I40E_ITR_20K		    50
+#define I40E_ITR_18K		    60
+#define I40E_ITR_8K		   122
+#define I40E_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
 /* 0x40 is the enable bit for interrupt rate limiting, and must be set if
  * the value of the rate limit is non-zero
  */
 #define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
 #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
 #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
 #define I40E_INTRL_8K              125     /* 8000 ints/sec */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index ed5b8ec4d2a2..f5d372576d71 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -517,8 +517,8 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_q_vector *q_vector;
 
-	rx_ring->itr_setting = ec->rx_coalesce_usecs;
-	tx_ring->itr_setting = ec->tx_coalesce_usecs;
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
 
 	rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
 	if (!ec->use_adaptive_rx_coalesce)
@@ -563,8 +563,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
 	if (ec->rx_coalesce_usecs == 0) {
 		if (ec->use_adaptive_rx_coalesce)
 			netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
-	} else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		   (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+	} else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) ||
+		   (ec->rx_coalesce_usecs > I40E_MAX_ITR)) {
 		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}
@@ -573,8 +573,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev,
 	if (ec->tx_coalesce_usecs == 0) {
 		if (ec->use_adaptive_tx_coalesce)
 			netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
-	} else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		   (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+	} else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) ||
+		   (ec->tx_coalesce_usecs > I40E_MAX_ITR)) {
 		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
 		return -EINVAL;
 	}

From d4942d581afb3d7d8dd1baa881a58d03a36ee25d Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:51:20 -0500
Subject: [PATCH 0032/1678] i40evf: Correctly populate rxitr_idx and txitr_idx

While testing code for the recent ITR changes I found that updating the Tx
ITR appeared to have no effect with everything defaulting to the Rx ITR. A
bit of digging narrowed it down the fact that we were asking the PF to
associate all causes with ITR 0 as we weren't populating the itr_idx values
for either Rx or Tx.

To correct it I have added the configuration for these values to this
patch. In addition I did some minor clean-up to just add a local pointer
for the vector map instead of dereferencing it based off of the index
repeatedly. In my opinion this makes the resultant code a bit more readable
and saves us a few characters.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 50ce0d6c09ef..d57a67285505 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -344,6 +344,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter)
 void i40evf_map_queues(struct i40evf_adapter *adapter)
 {
 	struct virtchnl_irq_map_info *vimi;
+	struct virtchnl_vector_map *vecmap;
 	int v_idx, q_vectors, len;
 	struct i40e_q_vector *q_vector;
 
@@ -367,17 +368,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
 	vimi->num_vectors = adapter->num_msix_vectors;
 	/* Queue vectors first */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
-		q_vector = adapter->q_vectors + v_idx;
-		vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
-		vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS;
-		vimi->vecmap[v_idx].txq_map = q_vector->ring_mask;
-		vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask;
+		q_vector = &adapter->q_vectors[v_idx];
+		vecmap = &vimi->vecmap[v_idx];
+
+		vecmap->vsi_id = adapter->vsi_res->vsi_id;
+		vecmap->vector_id = v_idx + NONQ_VECS;
+		vecmap->txq_map = q_vector->ring_mask;
+		vecmap->rxq_map = q_vector->ring_mask;
+		vecmap->rxitr_idx = I40E_RX_ITR;
+		vecmap->txitr_idx = I40E_TX_ITR;
 	}
 	/* Misc vector last - this is only for AdminQ messages */
-	vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id;
-	vimi->vecmap[v_idx].vector_id = 0;
-	vimi->vecmap[v_idx].txq_map = 0;
-	vimi->vecmap[v_idx].rxq_map = 0;
+	vecmap = &vimi->vecmap[v_idx];
+	vecmap->vsi_id = adapter->vsi_res->vsi_id;
+	vecmap->vector_id = 0;
+	vecmap->txq_map = 0;
+	vecmap->rxq_map = 0;
 
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS;
 	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,

From 556fdfd6e6ffcab9d03c942df06a5591c84ca637 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:51:25 -0500
Subject: [PATCH 0033/1678] i40e/i40evf: Split container ITR into current_itr
 and target_itr

This patch is mostly prep-work for replacing the current approach to
programming the dynamic aka adaptive ITR. Specifically here what we are
doing is splitting the Tx and Rx ITR each into two separate values.

The first value current_itr represents the current value of the register.

The second value target_itr represents the desired value of the register.

The general plan by doing this is to allow for deferring the update of the
ITR value under certain circumstances. For now we will work with what we
have, but in the future I hope to change the behavior so that we always
only update one ITR at a time using some simple logic to determine which
ITR requires an update.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 13 ++--
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 22 +++---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 68 +++++++++++--------
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  3 +-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 68 +++++++++++--------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.h |  3 +-
 .../ethernet/intel/i40evf/i40evf_ethtool.c    | 14 ++--
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 10 +--
 8 files changed, 115 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 3647af8fe32e..29a7412b2fa6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2329,14 +2329,15 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 		tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
 
 	q_vector = rx_ring->q_vector;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
-	wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, q_vector->reg_idx),
-	     q_vector->rx.itr);
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 
 	q_vector = tx_ring->q_vector;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
-	wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, q_vector->reg_idx),
-	     q_vector->tx.itr);
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
+
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
 
 	wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
 	i40e_flush(hw);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index a88fdb8bf5f0..39552b3875ee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3450,14 +3450,18 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
 
 		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-		q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
+		q_vector->rx.target_itr =
+			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
 		q_vector->rx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
-		     q_vector->rx.itr);
-		q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
+		     q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->tx.target_itr =
+			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
 		q_vector->tx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
-		     q_vector->tx.itr);
+		     q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
 		wr32(hw, I40E_PFINT_RATEN(vector - 1),
 		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
 
@@ -3559,12 +3563,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 
 	/* set the ITR configuration */
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
+	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
 	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
-	q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
+	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
+	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
+	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
 
 	i40e_enable_misc_int_causes(pf);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ade3e17fba6c..f4257b9e6dae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1012,17 +1012,16 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 {
 	enum i40e_latency_range new_latency_range = rc->latency_range;
-	u32 new_itr = rc->itr;
 	int bytes_per_usec;
 	unsigned int usecs, estimated_usecs;
 
 	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
 		return false;
 
-	if (rc->total_packets == 0 || !rc->itr)
+	if (!rc->total_packets || !rc->current_itr)
 		return false;
 
-	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
+	usecs = (rc->current_itr << 1) * ITR_COUNTDOWN_START;
 	bytes_per_usec = rc->total_bytes / usecs;
 
 	/* The calculations in this algorithm depend on interrupts actually
@@ -1070,13 +1069,13 @@ reset_latency:
 
 	switch (new_latency_range) {
 	case I40E_LOWEST_LATENCY:
-		new_itr = I40E_ITR_50K;
+		rc->target_itr = I40E_ITR_50K;
 		break;
 	case I40E_LOW_LATENCY:
-		new_itr = I40E_ITR_20K;
+		rc->target_itr = I40E_ITR_20K;
 		break;
 	case I40E_BULK_LATENCY:
-		new_itr = I40E_ITR_18K;
+		rc->target_itr = I40E_ITR_18K;
 		break;
 	default:
 		break;
@@ -1086,11 +1085,7 @@ reset_latency:
 	rc->total_packets = 0;
 	rc->last_itr_update = jiffies;
 
-	if (new_itr != rc->itr) {
-		rc->itr = new_itr;
-		return true;
-	}
-	return false;
+	return rc->target_itr != rc->current_itr;
 }
 
 /**
@@ -2319,7 +2314,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	bool rx = false, tx = false;
-	u32 txval;
+	u32 intval;
 
 	/* If we don't have MSIX, then we only need to re-enable icr0 */
 	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
@@ -2327,8 +2322,6 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		return;
 	}
 
-	txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
-
 	/* avoid dynamic calculation if in countdown mode */
 	if (q_vector->itr_countdown > 0)
 		goto enable_int;
@@ -2342,26 +2335,43 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		 * use the same value for both ITR registers
 		 * when in adaptive mode (Rx and/or Tx)
 		 */
-		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-		u32 rxval;
+		u16 itr = max(q_vector->tx.target_itr,
+			      q_vector->rx.target_itr);
 
-		q_vector->tx.itr = q_vector->rx.itr = itr;
-
-		/* set the INTENA_MSK_MASK so that this first write
-		 * won't actually enable the interrupt, instead just
-		 * updating the ITR (it's bit 31 PF and VF)
-		 */
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr) | BIT(31);
-
-		/* don't check _DOWN because interrupt isn't being enabled */
-		wr32(hw, INTREG(q_vector->reg_idx), rxval);
-
-		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
+		q_vector->tx.target_itr = itr;
+		q_vector->rx.target_itr = itr;
 	}
 
 enable_int:
+	if (q_vector->rx.target_itr != q_vector->rx.current_itr) {
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+		if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
+			/* set the INTENA_MSK_MASK so that this first write
+			 * won't actually enable the interrupt, instead just
+			 * updating the ITR (it's bit 31 PF and VF)
+			 *
+			 * don't check _DOWN because interrupt isn't being
+			 * enabled
+			 */
+			wr32(hw, INTREG(q_vector->reg_idx),
+			     intval | BIT(31));
+			/* now that Rx is done process Tx update */
+			goto update_tx;
+		}
+	} else if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
+update_tx:
+		intval = i40e_buildreg_itr(I40E_TX_ITR,
+					   q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+	} else {
+		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+	}
+
 	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-		wr32(hw, INTREG(q_vector->reg_idx), txval);
+		wr32(hw, INTREG(q_vector->reg_idx), intval);
 
 	if (q_vector->itr_countdown)
 		q_vector->itr_countdown--;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 0f8751c2e595..8ad8ffc63579 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -477,7 +477,8 @@ struct i40e_ring_container {
 	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
 	enum i40e_latency_range latency_range;
-	u16 itr;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
 };
 
 /* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index c5f8e941f53e..1f130e931077 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -409,17 +409,16 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 {
 	enum i40e_latency_range new_latency_range = rc->latency_range;
-	u32 new_itr = rc->itr;
 	int bytes_per_usec;
 	unsigned int usecs, estimated_usecs;
 
 	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
 		return false;
 
-	if (rc->total_packets == 0 || !rc->itr)
+	if (!rc->total_packets || !rc->current_itr)
 		return false;
 
-	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
+	usecs = (rc->current_itr << 1) * ITR_COUNTDOWN_START;
 	bytes_per_usec = rc->total_bytes / usecs;
 
 	/* The calculations in this algorithm depend on interrupts actually
@@ -467,13 +466,13 @@ reset_latency:
 
 	switch (new_latency_range) {
 	case I40E_LOWEST_LATENCY:
-		new_itr = I40E_ITR_50K;
+		rc->target_itr = I40E_ITR_50K;
 		break;
 	case I40E_LOW_LATENCY:
-		new_itr = I40E_ITR_20K;
+		rc->target_itr = I40E_ITR_20K;
 		break;
 	case I40E_BULK_LATENCY:
-		new_itr = I40E_ITR_18K;
+		rc->target_itr = I40E_ITR_18K;
 		break;
 	default:
 		break;
@@ -483,11 +482,7 @@ reset_latency:
 	rc->total_packets = 0;
 	rc->last_itr_update = jiffies;
 
-	if (new_itr != rc->itr) {
-		rc->itr = new_itr;
-		return true;
-	}
-	return false;
+	return rc->target_itr != rc->current_itr;
 }
 
 /**
@@ -1502,9 +1497,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	bool rx = false, tx = false;
-	u32 txval;
-
-	txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+	u32 intval;
 
 	/* avoid dynamic calculation if in countdown mode */
 	if (q_vector->itr_countdown > 0)
@@ -1519,26 +1512,43 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		 * use the same value for both ITR registers
 		 * when in adaptive mode (Rx and/or Tx)
 		 */
-		u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
-		u32 rxval;
+		u16 itr = max(q_vector->tx.target_itr,
+			      q_vector->rx.target_itr);
 
-		q_vector->tx.itr = q_vector->rx.itr = itr;
-
-		/* set the INTENA_MSK_MASK so that this first write
-		 * won't actually enable the interrupt, instead just
-		 * updating the ITR (it's bit 31 PF and VF)
-		 */
-		rxval = i40e_buildreg_itr(I40E_RX_ITR, itr) | BIT(31);
-
-		/* don't check _DOWN because interrupt isn't being enabled */
-		wr32(hw, INTREG(q_vector->reg_idx), rxval);
-
-		txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
+		q_vector->tx.target_itr = itr;
+		q_vector->rx.target_itr = itr;
 	}
 
 enable_int:
+	if (q_vector->rx.target_itr != q_vector->rx.current_itr) {
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+		if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
+			/* set the INTENA_MSK_MASK so that this first write
+			 * won't actually enable the interrupt, instead just
+			 * updating the ITR (it's bit 31 PF and VF)
+			 *
+			 * don't check _DOWN because interrupt isn't being
+			 * enabled
+			 */
+			wr32(hw, INTREG(q_vector->reg_idx),
+			     intval | BIT(31));
+			/* now that Rx is done process Tx update */
+			goto update_tx;
+		}
+	} else if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
+update_tx:
+		intval = i40e_buildreg_itr(I40E_TX_ITR,
+					   q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+	} else {
+		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+	}
+
 	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-		wr32(hw, INTREG(q_vector->reg_idx), txval);
+		wr32(hw, INTREG(q_vector->reg_idx), intval);
 
 	if (q_vector->itr_countdown)
 		q_vector->itr_countdown--;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index a4f29722ceff..4069259aef34 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -442,7 +442,8 @@ struct i40e_ring_container {
 	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
 	enum i40e_latency_range latency_range;
-	u16 itr;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
 };
 
 /* iterator for handling rings in ring container */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index f5d372576d71..aded3ad7763e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -514,7 +514,6 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 {
 	struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
 	struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
-	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_q_vector *q_vector;
 
 	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
@@ -529,16 +528,15 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
 		tx_ring->itr_setting ^= I40E_ITR_DYNAMIC;
 
 	q_vector = rx_ring->q_vector;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
-	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
-	     q_vector->rx.itr);
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 
 	q_vector = tx_ring->q_vector;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
-	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
-	     q_vector->tx.itr);
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 
-	i40e_flush(hw);
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index f648e5e97529..3bf6a126be72 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -354,11 +354,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 	q_vector->rx.ring = rx_ring;
 	q_vector->rx.count++;
 	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	q_vector->rx.itr = ITR_TO_REG(rx_ring->itr_setting);
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 	q_vector->ring_mask |= BIT(r_idx);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
-	     q_vector->rx.itr);
+	     q_vector->rx.current_itr);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
 }
 
 /**
@@ -380,11 +381,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 	q_vector->tx.ring = tx_ring;
 	q_vector->tx.count++;
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-	q_vector->tx.itr = ITR_TO_REG(tx_ring->itr_setting);
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	q_vector->num_ringpairs++;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
-	     q_vector->tx.itr);
+	     q_vector->tx.target_itr);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
 }
 
 /**

From a0073a4b8b5906b2a7eab5e9d4a91759b56bc96f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 29 Dec 2017 08:52:19 -0500
Subject: [PATCH 0034/1678] i40e/i40evf: Add support for new mechanism of
 updating adaptive ITR

This patch replaces the existing mechanism for determining the correct
value to program for adaptive ITR with yet another new and more
complicated approach.

The basic idea from a 30K foot view is that this new approach will push the
Rx interrupt moderation up so that by default it starts in low latency and
is gradually pushed up into a higher latency setup as long as doing so
increases the number of packets processed, if the number of packets drops
to 4 to 1 per packet we will reset and just base our ITR on the size of the
packets being received. For Tx we leave it floating at a high interrupt
delay and do not pull it down unless we start processing more than 112
packets per interrupt. If we start exceeding that we will cut our interrupt
rates in half until we are back below 112.

The side effect of these patches are that we will be processing more
packets per interrupt. This is both a good and a bad thing as it means we
will not be blocking processing in the case of things like pktgen and XDP,
but we will also be consuming a bit more CPU in the cases of things such as
network throughput tests using netperf.

One delta from this versus the ixgbe version of the changes is that I have
made the interrupt moderation a bit more aggressive when we are in bulk
mode by moving our "goldilocks zone" up from 48 to 96 to 56 to 112. The
main motivation behind moving this is to address the fact that we need to
update less frequently, and have more fine grained control due to the
separate Tx and Rx ITR times.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h        |   3 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c   |  15 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 366 ++++++++++++------
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  17 +-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 366 ++++++++++++------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.h |  17 +-
 drivers/net/ethernet/intel/i40evf/i40evf.h    |   3 +-
 .../net/ethernet/intel/i40evf/i40evf_main.c   |   6 +-
 8 files changed, 532 insertions(+), 261 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 46e9f4e0a02c..ebe795a7f5f9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -824,6 +824,7 @@ struct i40e_q_vector {
 	struct i40e_ring_container rx;
 	struct i40e_ring_container tx;
 
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
 
 	cpumask_t affinity_mask;
@@ -832,8 +833,6 @@ struct i40e_q_vector {
 	struct rcu_head rcu;	/* to avoid race with update stats on free */
 	char name[I40E_INT_NAME_STR_LEN];
 	bool arm_wb_state;
-#define ITR_COUNTDOWN_START 100
-	u8 itr_countdown;	/* when 0 should adjust ITR */
 } ____cacheline_internodealigned_in_smp;
 
 /* lan device */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 39552b3875ee..70ecd9c3a163 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3449,19 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
 	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
 		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
 
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+		q_vector->rx.next_update = jiffies + 1;
 		q_vector->rx.target_itr =
 			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
-		q_vector->rx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
 		     q_vector->rx.target_itr);
 		q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+		q_vector->tx.next_update = jiffies + 1;
 		q_vector->tx.target_itr =
 			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
-		q_vector->tx.latency_range = I40E_LOW_LATENCY;
 		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
 		     q_vector->tx.target_itr);
 		q_vector->tx.current_itr = q_vector->tx.target_itr;
+
 		wr32(hw, I40E_PFINT_RATEN(vector - 1),
 		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
 
@@ -3562,13 +3563,12 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
 	u32 val;
 
 	/* set the ITR configuration */
-	q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	q_vector->rx.next_update = jiffies + 1;
 	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
 	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
 	q_vector->rx.current_itr = q_vector->rx.target_itr;
+	q_vector->tx.next_update = jiffies + 1;
 	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
 	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
 	q_vector->tx.current_itr = q_vector->tx.target_itr;
 
@@ -10345,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
 			       i40e_napi_poll, NAPI_POLL_WEIGHT);
 
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
-
 	/* tie q_vector and vsi together */
 	vsi->q_vectors[v_idx] = q_vector;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f4257b9e6dae..1ec9b1d8023d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -995,97 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 	}
 }
 
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+					struct i40e_ring_container *rc)
+{
+	return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+	unsigned int divisor;
+
+	switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+		break;
+	case I40E_LINK_SPEED_25GB:
+	case I40E_LINK_SPEED_20GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+		break;
+	default:
+	case I40E_LINK_SPEED_10GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+		break;
+	case I40E_LINK_SPEED_1GB:
+	case I40E_LINK_SPEED_100MB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+		break;
+	}
+
+	return divisor;
+}
+
 /**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt.  The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern.  Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+			    struct i40e_ring_container *rc)
 {
-	enum i40e_latency_range new_latency_range = rc->latency_range;
-	int bytes_per_usec;
-	unsigned int usecs, estimated_usecs;
+	unsigned int avg_wire_size, packets, bytes, itr;
+	unsigned long next_update = jiffies;
 
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
 	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
-		return false;
+		return;
 
-	if (!rc->total_packets || !rc->current_itr)
-		return false;
-
-	usecs = (rc->current_itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_usec = rc->total_bytes / usecs;
-
-	/* The calculations in this algorithm depend on interrupts actually
-	 * firing at the ITR rate. This may not happen if the packet rate is
-	 * really low, or if we've been napi polling. Check to make sure
-	 * that's not the case before we continue.
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
 	 */
-	estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
-	if (estimated_usecs > usecs) {
-		new_latency_range = I40E_LOW_LATENCY;
-		goto reset_latency;
+	itr = i40e_container_is_rx(q_vector, rc) ?
+	      I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+	      I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
 	}
 
-	/* simple throttlerate management
-	 *   0-10MB/s   lowest (50000 ints/s)
-	 *  10-20MB/s   low    (20000 ints/s)
-	 *  20-1249MB/s bulk   (18000 ints/s)
+	packets = rc->total_packets;
+	bytes = rc->total_bytes;
+
+	if (i40e_container_is_rx(q_vector, rc)) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+			itr = I40E_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+		     I40E_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+	}
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
 	 *
-	 * The math works out because the divisor is in 10^(-6) which
-	 * turns the bytes/us input value into MB/s values, but
-	 * make sure to use usecs, as the register values written
-	 * are in 2 usec increments in the ITR registers, and make sure
-	 * to use the smoothed values that the countdown timer gives us.
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
 	 */
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		if (bytes_per_usec > 10)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
-	case I40E_LOW_LATENCY:
-		if (bytes_per_usec > 20)
-			new_latency_range = I40E_BULK_LATENCY;
-		else if (bytes_per_usec <= 10)
-			new_latency_range = I40E_LOWEST_LATENCY;
-		break;
-	case I40E_BULK_LATENCY:
-	default:
-		if (bytes_per_usec <= 20)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
+	if (packets < 56) {
+		itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= I40E_ITR_ADAPTIVE_LATENCY;
+			itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
 	}
 
-reset_latency:
-	rc->latency_range = new_latency_range;
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= I40E_ITR_MASK;
 
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		rc->target_itr = I40E_ITR_50K;
-		break;
-	case I40E_LOW_LATENCY:
-		rc->target_itr = I40E_ITR_20K;
-		break;
-	case I40E_BULK_LATENCY:
-		rc->target_itr = I40E_ITR_18K;
-		break;
-	default:
-		break;
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr /= 2;
+		itr &= I40E_ITR_MASK;
+		if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+			itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
 	}
 
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 60) {
+		/* Start at 250k ints/sec */
+		avg_wire_size = 4096;
+	} else if (avg_wire_size <= 380) {
+		/* 250K ints/sec to 60K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 1696;
+	} else if (avg_wire_size <= 1084) {
+		/* 60K ints/sec to 36K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 36K ints/sec to 30K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 30K ints/sec */
+		avg_wire_size = 32256;
+	}
+
+	/* If we are in low latency mode halve our delay which doubles the
+	 * rate to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size /= 2;
+
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+	       I40E_ITR_ADAPTIVE_MIN_INC;
+
+	if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= I40E_ITR_ADAPTIVE_LATENCY;
+		itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+	}
+
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
 	rc->total_bytes = 0;
 	rc->total_packets = 0;
-	rc->last_itr_update = jiffies;
-
-	return rc->target_itr != rc->current_itr;
 }
 
 /**
@@ -2303,6 +2447,15 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_PFINT_DYN_CTLN
 
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
  * @vsi: the VSI we care about
@@ -2313,7 +2466,6 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 					  struct i40e_q_vector *q_vector)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
-	bool rx = false, tx = false;
 	u32 intval;
 
 	/* If we don't have MSIX, then we only need to re-enable icr0 */
@@ -2322,61 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		return;
 	}
 
-	/* avoid dynamic calculation if in countdown mode */
-	if (q_vector->itr_countdown > 0)
-		goto enable_int;
+	/* These will do nothing if dynamic updates are not enabled */
+	i40e_update_itr(q_vector, &q_vector->tx);
+	i40e_update_itr(q_vector, &q_vector->rx);
 
-	/* these will return false if dynamic mode is disabled */
-	rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-	tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-
-	if (rx || tx) {
-		/* get the higher of the two ITR adjustments and
-		 * use the same value for both ITR registers
-		 * when in adaptive mode (Rx and/or Tx)
-		 */
-		u16 itr = max(q_vector->tx.target_itr,
-			      q_vector->rx.target_itr);
-
-		q_vector->tx.target_itr = itr;
-		q_vector->rx.target_itr = itr;
-	}
-
-enable_int:
-	if (q_vector->rx.target_itr != q_vector->rx.current_itr) {
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
 		intval = i40e_buildreg_itr(I40E_RX_ITR,
 					   q_vector->rx.target_itr);
 		q_vector->rx.current_itr = q_vector->rx.target_itr;
-
-		if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
-			/* set the INTENA_MSK_MASK so that this first write
-			 * won't actually enable the interrupt, instead just
-			 * updating the ITR (it's bit 31 PF and VF)
-			 *
-			 * don't check _DOWN because interrupt isn't being
-			 * enabled
-			 */
-			wr32(hw, INTREG(q_vector->reg_idx),
-			     intval | BIT(31));
-			/* now that Rx is done process Tx update */
-			goto update_tx;
-		}
-	} else if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
-update_tx:
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
 		intval = i40e_buildreg_itr(I40E_TX_ITR,
 					   q_vector->tx.target_itr);
 		q_vector->tx.current_itr = q_vector->tx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	} else {
+		/* No ITR update, lowest priority */
 		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
 	}
 
 	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
 		wr32(hw, INTREG(q_vector->reg_idx), intval);
-
-	if (q_vector->itr_countdown)
-		q_vector->itr_countdown--;
-	else
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 8ad8ffc63579..f75a8fe68fcf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -463,20 +463,19 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
 	ring->flags |= I40E_TXR_FLAGS_XDP;
 }
 
-enum i40e_latency_range {
-	I40E_LOWEST_LATENCY = 0,
-	I40E_LOW_LATENCY = 1,
-	I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC	0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY	0x8000
+#define I40E_ITR_ADAPTIVE_BULK		0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
 
 struct i40e_ring_container {
-	/* array of pointers to rings */
-	struct i40e_ring *ring;
+	struct i40e_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
-	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
-	enum i40e_latency_range latency_range;
 	u16 target_itr;			/* target ITR setting for ring(s) */
 	u16 current_itr;		/* current ITR setting for ring(s) */
 };
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 1f130e931077..eb8f3e327f6b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -392,97 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 	     val);
 }
 
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+					struct i40e_ring_container *rc)
+{
+	return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+	unsigned int divisor;
+
+	switch (q_vector->adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+		break;
+	case I40E_LINK_SPEED_25GB:
+	case I40E_LINK_SPEED_20GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+		break;
+	default:
+	case I40E_LINK_SPEED_10GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+		break;
+	case I40E_LINK_SPEED_1GB:
+	case I40E_LINK_SPEED_100MB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+		break;
+	}
+
+	return divisor;
+}
+
 /**
- * i40e_set_new_dynamic_itr - Find new ITR level
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
  * @rc: structure containing ring performance data
  *
- * Returns true if ITR changed, false if not
- *
- * Stores a new ITR value based on packets and byte counts during
- * the last interrupt.  The advantage of per interrupt computation
- * is faster updates and more accurate ITR for the current traffic
- * pattern.  Constants in this function were computed based on
- * theoretical maximum wire speed and thresholds were set based on
- * testing data as well as attempting to minimize response time
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
  * while increasing bulk throughput.
  **/
-static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+			    struct i40e_ring_container *rc)
 {
-	enum i40e_latency_range new_latency_range = rc->latency_range;
-	int bytes_per_usec;
-	unsigned int usecs, estimated_usecs;
+	unsigned int avg_wire_size, packets, bytes, itr;
+	unsigned long next_update = jiffies;
 
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
 	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
-		return false;
+		return;
 
-	if (!rc->total_packets || !rc->current_itr)
-		return false;
-
-	usecs = (rc->current_itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_usec = rc->total_bytes / usecs;
-
-	/* The calculations in this algorithm depend on interrupts actually
-	 * firing at the ITR rate. This may not happen if the packet rate is
-	 * really low, or if we've been napi polling. Check to make sure
-	 * that's not the case before we continue.
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
 	 */
-	estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
-	if (estimated_usecs > usecs) {
-		new_latency_range = I40E_LOW_LATENCY;
-		goto reset_latency;
+	itr = i40e_container_is_rx(q_vector, rc) ?
+	      I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+	      I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
 	}
 
-	/* simple throttlerate management
-	 *   0-10MB/s   lowest (50000 ints/s)
-	 *  10-20MB/s   low    (20000 ints/s)
-	 *  20-1249MB/s bulk   (18000 ints/s)
+	packets = rc->total_packets;
+	bytes = rc->total_bytes;
+
+	if (i40e_container_is_rx(q_vector, rc)) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+			itr = I40E_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+		     I40E_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+	}
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
 	 *
-	 * The math works out because the divisor is in 10^(-6) which
-	 * turns the bytes/us input value into MB/s values, but
-	 * make sure to use usecs, as the register values written
-	 * are in 2 usec increments in the ITR registers, and make sure
-	 * to use the smoothed values that the countdown timer gives us.
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
 	 */
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		if (bytes_per_usec > 10)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
-	case I40E_LOW_LATENCY:
-		if (bytes_per_usec > 20)
-			new_latency_range = I40E_BULK_LATENCY;
-		else if (bytes_per_usec <= 10)
-			new_latency_range = I40E_LOWEST_LATENCY;
-		break;
-	case I40E_BULK_LATENCY:
-	default:
-		if (bytes_per_usec <= 20)
-			new_latency_range = I40E_LOW_LATENCY;
-		break;
+	if (packets < 56) {
+		itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= I40E_ITR_ADAPTIVE_LATENCY;
+			itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
 	}
 
-reset_latency:
-	rc->latency_range = new_latency_range;
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= I40E_ITR_MASK;
 
-	switch (new_latency_range) {
-	case I40E_LOWEST_LATENCY:
-		rc->target_itr = I40E_ITR_50K;
-		break;
-	case I40E_LOW_LATENCY:
-		rc->target_itr = I40E_ITR_20K;
-		break;
-	case I40E_BULK_LATENCY:
-		rc->target_itr = I40E_ITR_18K;
-		break;
-	default:
-		break;
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr /= 2;
+		itr &= I40E_ITR_MASK;
+		if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+			itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
 	}
 
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 60) {
+		/* Start at 250k ints/sec */
+		avg_wire_size = 4096;
+	} else if (avg_wire_size <= 380) {
+		/* 250K ints/sec to 60K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 1696;
+	} else if (avg_wire_size <= 1084) {
+		/* 60K ints/sec to 36K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 36K ints/sec to 30K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 30K ints/sec */
+		avg_wire_size = 32256;
+	}
+
+	/* If we are in low latency mode halve our delay which doubles the
+	 * rate to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size /= 2;
+
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+	       I40E_ITR_ADAPTIVE_MIN_INC;
+
+	if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= I40E_ITR_ADAPTIVE_LATENCY;
+		itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+	}
+
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
 	rc->total_bytes = 0;
 	rc->total_packets = 0;
-	rc->last_itr_update = jiffies;
-
-	return rc->target_itr != rc->current_itr;
 }
 
 /**
@@ -1486,6 +1630,15 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_VFINT_DYN_CTLN1
 
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
  * @vsi: the VSI we care about
@@ -1496,64 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 					  struct i40e_q_vector *q_vector)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
-	bool rx = false, tx = false;
 	u32 intval;
 
-	/* avoid dynamic calculation if in countdown mode */
-	if (q_vector->itr_countdown > 0)
-		goto enable_int;
+	/* These will do nothing if dynamic updates are not enabled */
+	i40e_update_itr(q_vector, &q_vector->tx);
+	i40e_update_itr(q_vector, &q_vector->rx);
 
-	/* these will return false if dynamic mode is disabled */
-	rx = i40e_set_new_dynamic_itr(&q_vector->rx);
-	tx = i40e_set_new_dynamic_itr(&q_vector->tx);
-
-	if (rx || tx) {
-		/* get the higher of the two ITR adjustments and
-		 * use the same value for both ITR registers
-		 * when in adaptive mode (Rx and/or Tx)
-		 */
-		u16 itr = max(q_vector->tx.target_itr,
-			      q_vector->rx.target_itr);
-
-		q_vector->tx.target_itr = itr;
-		q_vector->rx.target_itr = itr;
-	}
-
-enable_int:
-	if (q_vector->rx.target_itr != q_vector->rx.current_itr) {
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
 		intval = i40e_buildreg_itr(I40E_RX_ITR,
 					   q_vector->rx.target_itr);
 		q_vector->rx.current_itr = q_vector->rx.target_itr;
-
-		if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
-			/* set the INTENA_MSK_MASK so that this first write
-			 * won't actually enable the interrupt, instead just
-			 * updating the ITR (it's bit 31 PF and VF)
-			 *
-			 * don't check _DOWN because interrupt isn't being
-			 * enabled
-			 */
-			wr32(hw, INTREG(q_vector->reg_idx),
-			     intval | BIT(31));
-			/* now that Rx is done process Tx update */
-			goto update_tx;
-		}
-	} else if (q_vector->tx.target_itr != q_vector->tx.current_itr) {
-update_tx:
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
 		intval = i40e_buildreg_itr(I40E_TX_ITR,
 					   q_vector->tx.target_itr);
 		q_vector->tx.current_itr = q_vector->tx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	} else {
+		/* No ITR update, lowest priority */
 		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
 	}
 
 	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
 		wr32(hw, INTREG(q_vector->reg_idx), intval);
-
-	if (q_vector->itr_countdown)
-		q_vector->itr_countdown--;
-	else
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 4069259aef34..9129447d079b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -428,20 +428,19 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
 	ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
 }
 
-enum i40e_latency_range {
-	I40E_LOWEST_LATENCY = 0,
-	I40E_LOW_LATENCY = 1,
-	I40E_BULK_LATENCY = 2,
-};
+#define I40E_ITR_ADAPTIVE_MIN_INC	0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY	0x8000
+#define I40E_ITR_ADAPTIVE_BULK		0x0000
+#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
 
 struct i40e_ring_container {
-	/* array of pointers to rings */
-	struct i40e_ring *ring;
+	struct i40e_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
-	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
-	enum i40e_latency_range latency_range;
 	u16 target_itr;			/* target ITR setting for ring(s) */
 	u16 current_itr;		/* current ITR setting for ring(s) */
 };
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 9690c1ea019e..b6991e8014d8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -117,9 +117,8 @@ struct i40e_q_vector {
 	struct i40e_ring_container rx;
 	struct i40e_ring_container tx;
 	u32 ring_mask;
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
-#define ITR_COUNTDOWN_START 100
-	u8 itr_countdown;	/* when 0 or 1 update ITR */
 	u16 v_idx;		/* index in the vsi->q_vector array. */
 	u16 reg_idx;		/* register index of the interrupt */
 	char name[IFNAMSIZ + 15];
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 3bf6a126be72..6fd09926181a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -353,10 +353,9 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 	rx_ring->vsi = &adapter->vsi;
 	q_vector->rx.ring = rx_ring;
 	q_vector->rx.count++;
-	q_vector->rx.latency_range = I40E_LOW_LATENCY;
+	q_vector->rx.next_update = jiffies + 1;
 	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
 	q_vector->ring_mask |= BIT(r_idx);
-	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
 	     q_vector->rx.current_itr);
 	q_vector->rx.current_itr = q_vector->rx.target_itr;
@@ -380,9 +379,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 	tx_ring->vsi = &adapter->vsi;
 	q_vector->tx.ring = tx_ring;
 	q_vector->tx.count++;
-	q_vector->tx.latency_range = I40E_LOW_LATENCY;
+	q_vector->tx.next_update = jiffies + 1;
 	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
-	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	q_vector->num_ringpairs++;
 	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
 	     q_vector->tx.target_itr);

From 60aa80460da115216070082b4ab686b9e37c86ef Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 30 Jan 2018 14:53:48 +0000
Subject: [PATCH 0035/1678] esp4: remove redundant initialization of pointer
 esph

Pointer esph is being assigned a value that is never read, esph is
re-assigned and only read inside an if statement, hence the
initialization is redundant and can be removed.

Cleans up clang warning:
net/ipv4/esp4.c:657:21: warning: Value stored to 'esph' during
its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 296d0b956bfe..97689012b357 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -654,7 +654,7 @@ static void esp_input_restore_header(struct sk_buff *skb)
 static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
-	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+	struct ip_esp_hdr *esph;
 
 	/* For ESN we move the header forward by 4 bytes to
 	 * accomodate the high bits.  We will move it back after

From 98f6c533a3e98f21305575f0cf87cdb6c2210c43 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:26:02 +0300
Subject: [PATCH 0036/1678] net: Assign net to net_namespace_list in
 setup_net()

This patch merges two repeating pieces of code in one,
and they will live in setup_net() now.

The only change is that assignment:

	init_net_initialized = true;

becomes reordered with:

	list_add_tail_rcu(&net->list, &net_namespace_list);

The order does not have visible effect, and it is a simple
cleanup because of:

init_net_initialized is used in !CONFIG_NET_NS case
to order proc_net_ns_ops registration occuring at boot time:

	start_kernel()->proc_root_init()->proc_net_init(),
with
	net_ns_init()->setup_net(&init_net, &init_user_ns)

also occuring in boot time from the same init_task.

When there are no another tasks to race with them,
for the single task it does not matter, which order
two sequential independent loads should be made.
So we make them reordered.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3cad5f51afd3..1180c217895a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -303,6 +303,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 		if (error < 0)
 			goto out_undo;
 	}
+	rtnl_lock();
+	list_add_tail_rcu(&net->list, &net_namespace_list);
+	rtnl_unlock();
 out:
 	return error;
 
@@ -424,11 +427,6 @@ struct net *copy_net_ns(unsigned long flags,
 
 	net->ucounts = ucounts;
 	rv = setup_net(net, user_ns);
-	if (rv == 0) {
-		rtnl_lock();
-		list_add_tail_rcu(&net->list, &net_namespace_list);
-		rtnl_unlock();
-	}
 	mutex_unlock(&net_mutex);
 	if (rv < 0) {
 		dec_net_namespaces(ucounts);
@@ -880,11 +878,6 @@ static int __init net_ns_init(void)
 		panic("Could not setup the initial network namespace");
 
 	init_net_initialized = true;
-
-	rtnl_lock();
-	list_add_tail_rcu(&init_net.list, &net_namespace_list);
-	rtnl_unlock();
-
 	mutex_unlock(&net_mutex);
 
 	register_pernet_subsys(&net_ns_ops);

From 5ba049a5cc8e24a1643df75bbf65b4efa070fa74 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:26:13 +0300
Subject: [PATCH 0037/1678] net: Cleanup in copy_net_ns()

Line up destructors actions in the revers order
to constructors. Next patches will add more actions,
and this will be comfortable, if there is the such
order.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1180c217895a..81384386f91b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -411,27 +411,25 @@ struct net *copy_net_ns(unsigned long flags,
 
 	net = net_alloc();
 	if (!net) {
-		dec_net_namespaces(ucounts);
-		return ERR_PTR(-ENOMEM);
+		rv = -ENOMEM;
+		goto dec_ucounts;
 	}
-
+	refcount_set(&net->passive, 1);
+	net->ucounts = ucounts;
 	get_user_ns(user_ns);
 
 	rv = mutex_lock_killable(&net_mutex);
-	if (rv < 0) {
-		net_free(net);
-		dec_net_namespaces(ucounts);
-		put_user_ns(user_ns);
-		return ERR_PTR(rv);
-	}
+	if (rv < 0)
+		goto put_userns;
 
-	net->ucounts = ucounts;
 	rv = setup_net(net, user_ns);
 	mutex_unlock(&net_mutex);
 	if (rv < 0) {
-		dec_net_namespaces(ucounts);
+put_userns:
 		put_user_ns(user_ns);
 		net_drop_ns(net);
+dec_ucounts:
+		dec_net_namespaces(ucounts);
 		return ERR_PTR(rv);
 	}
 	return net;

From 1a57feb847c56d6193f67d0e892c24e71f9e3ab1 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:26:23 +0300
Subject: [PATCH 0038/1678] net: Introduce net_sem for protection of
 pernet_list

Currently, the mutex is mostly used to protect pernet operations
list. It orders setup_net() and cleanup_net() with parallel
{un,}register_pernet_operations() calls, so ->exit{,batch} methods
of the same pernet operations are executed for a dying net, as
were used to call ->init methods, even after the net namespace
is unlinked from net_namespace_list in cleanup_net().

But there are several problems with scalability. The first one
is that more than one net can't be created or destroyed
at the same moment on the node. For big machines with many cpus
running many containers it's very sensitive.

The second one is that it's need to synchronize_rcu() after net
is removed from net_namespace_list():

Destroy net_ns:
cleanup_net()
  mutex_lock(&net_mutex)
  list_del_rcu(&net->list)
  synchronize_rcu()                                  <--- Sleep there for ages
  list_for_each_entry_reverse(ops, &pernet_list, list)
    ops_exit_list(ops, &net_exit_list)
  list_for_each_entry_reverse(ops, &pernet_list, list)
    ops_free_list(ops, &net_exit_list)
  mutex_unlock(&net_mutex)

This primitive is not fast, especially on the systems with many processors
and/or when preemptible RCU is enabled in config. So, all the time, while
cleanup_net() is waiting for RCU grace period, creation of new net namespaces
is not possible, the tasks, who makes it, are sleeping on the same mutex:

Create net_ns:
copy_net_ns()
  mutex_lock_killable(&net_mutex)                    <--- Sleep there for ages

I observed 20-30 seconds hangs of "unshare -n" on ordinary 8-cpu laptop
with preemptible RCU enabled after CRIU tests round is finished.

The solution is to convert net_mutex to the rw_semaphore and add fine grain
locks to really small number of pernet_operations, what really need them.

Then, pernet_operations::init/::exit methods, modifying the net-related data,
will require down_read() locking only, while down_write() will be used
for changing pernet_list (i.e., when modules are being loaded and unloaded).

This gives signify performance increase, after all patch set is applied,
like you may see here:

%for i in {1..10000}; do unshare -n bash -c exit; done

*before*
real 1m40,377s
user 0m9,672s
sys 0m19,928s

*after*
real 0m17,007s
user 0m5,311s
sys 0m11,779

(5.8 times faster)

This patch starts replacing net_mutex to net_sem. It adds rw_semaphore,
describes the variables it protects, and makes to use, where appropriate.
net_mutex is still present, and next patches will kick it out step-by-step.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  1 +
 net/core/net_namespace.c  | 39 ++++++++++++++++++++++++++-------------
 net/core/rtnetlink.c      |  4 ++--
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fdcde96eb65..e9ee9ad0a681 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -36,6 +36,7 @@ extern int rtnl_is_locked(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct mutex net_mutex;
+extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 81384386f91b..e89b2b7abd36 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -41,6 +41,11 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
+/*
+ * net_sem: protects: pernet_list, net_generic_ids,
+ * init_net_initialized and first_device pointer.
+ */
+DECLARE_RWSEM(net_sem);
 
 #define MIN_PERNET_OPS_ID	\
 	((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -286,7 +291,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
  */
 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 {
-	/* Must be called with net_mutex held */
+	/* Must be called with net_sem held */
 	const struct pernet_operations *ops, *saved_ops;
 	int error = 0;
 	LIST_HEAD(net_exit_list);
@@ -418,12 +423,16 @@ struct net *copy_net_ns(unsigned long flags,
 	net->ucounts = ucounts;
 	get_user_ns(user_ns);
 
-	rv = mutex_lock_killable(&net_mutex);
+	rv = down_read_killable(&net_sem);
 	if (rv < 0)
 		goto put_userns;
-
+	rv = mutex_lock_killable(&net_mutex);
+	if (rv < 0)
+		goto up_read;
 	rv = setup_net(net, user_ns);
 	mutex_unlock(&net_mutex);
+up_read:
+	up_read(&net_sem);
 	if (rv < 0) {
 put_userns:
 		put_user_ns(user_ns);
@@ -477,6 +486,7 @@ static void cleanup_net(struct work_struct *work)
 	list_replace_init(&cleanup_list, &net_kill_list);
 	spin_unlock_irq(&cleanup_list_lock);
 
+	down_read(&net_sem);
 	mutex_lock(&net_mutex);
 
 	/* Don't let anyone else find us. */
@@ -517,6 +527,7 @@ static void cleanup_net(struct work_struct *work)
 		ops_free_list(ops, &net_exit_list);
 
 	mutex_unlock(&net_mutex);
+	up_read(&net_sem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -543,8 +554,10 @@ static void cleanup_net(struct work_struct *work)
  */
 void net_ns_barrier(void)
 {
+	down_write(&net_sem);
 	mutex_lock(&net_mutex);
 	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);
 
@@ -871,12 +884,12 @@ static int __init net_ns_init(void)
 
 	rcu_assign_pointer(init_net.gen, ng);
 
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
 	init_net_initialized = true;
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 
 	register_pernet_subsys(&net_ns_ops);
 
@@ -1016,9 +1029,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 int register_pernet_subsys(struct pernet_operations *ops)
 {
 	int error;
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	error =  register_pernet_operations(first_device, ops);
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1034,9 +1047,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  */
 void unregister_pernet_subsys(struct pernet_operations *ops)
 {
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	unregister_pernet_operations(ops);
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 
@@ -1062,11 +1075,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 int register_pernet_device(struct pernet_operations *ops)
 {
 	int error;
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	error = register_pernet_operations(&pernet_list, ops);
 	if (!error && (first_device == &pernet_list))
 		first_device = &ops->list;
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1082,11 +1095,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
  */
 void unregister_pernet_device(struct pernet_operations *ops)
 {
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	if (&ops->list == first_device)
 		first_device = first_device->next;
 	unregister_pernet_operations(ops);
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc290413a49d..257e7bbaffba 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void)
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
 	/* Close the race with cleanup_net() */
-	mutex_lock(&net_mutex);
+	down_write(&net_sem);
 	rtnl_lock_unregistering_all();
 	__rtnl_link_unregister(ops);
 	rtnl_unlock();
-	mutex_unlock(&net_mutex);
+	up_write(&net_sem);
 }
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 

From bcab1ddd9b2b105390712a9c1605bdb20a7f9a03 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:26:33 +0300
Subject: [PATCH 0039/1678] net: Move mutex_unlock() in cleanup_net() up

net_sem protects from pernet_list changing, while
ops_free_list() makes simple kfree(), and it can't
race with other pernet_operations callbacks.

So we may release net_mutex earlier then it was.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e89b2b7abd36..f8453c438798 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -522,11 +522,12 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_exit_list(ops, &net_exit_list);
 
+	mutex_unlock(&net_mutex);
+
 	/* Free the net generic variables */
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	mutex_unlock(&net_mutex);
 	up_read(&net_sem);
 
 	/* Ensure there are no outstanding rcu callbacks using this

From 447cd7a0d7d1e5b4486e99cce289654fec9951e3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:26:44 +0300
Subject: [PATCH 0040/1678] net: Allow pernet_operations to be executed in
 parallel

This adds new pernet_operations::async flag to indicate operations,
which ->init(), ->exit() and ->exit_batch() methods are allowed
to be executed in parallel with the methods of any other pernet_operations.

When there are only asynchronous pernet_operations in the system,
net_mutex won't be taken for a net construction and destruction.

Also, remove BUG_ON(mutex_is_locked()) from net_assign_generic()
without replacing with the equivalent net_sem check, as there is
one more lockdep assert below.

v3: Add comment near net_mutex.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  6 ++++++
 net/core/net_namespace.c    | 30 ++++++++++++++++++++----------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f306b2aa15a4..9158ec1ad06f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -313,6 +313,12 @@ struct pernet_operations {
 	void (*exit_batch)(struct list_head *net_exit_list);
 	unsigned int *id;
 	size_t size;
+	/*
+	 * Indicates above methods are allowed to be executed in parallel
+	 * with methods of any other pernet_operations, i.e. they are not
+	 * need synchronization via net_mutex.
+	 */
+	bool async;
 };
 
 /*
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f8453c438798..2a01ff32d9c7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,6 +29,7 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
+/* Used only if there are !async pernet_operations registered */
 DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
@@ -41,8 +42,9 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
+static unsigned nr_sync_pernet_ops;
 /*
- * net_sem: protects: pernet_list, net_generic_ids,
+ * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
  * init_net_initialized and first_device pointer.
  */
 DECLARE_RWSEM(net_sem);
@@ -70,11 +72,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
 {
 	struct net_generic *ng, *old_ng;
 
-	BUG_ON(!mutex_is_locked(&net_mutex));
 	BUG_ON(id < MIN_PERNET_OPS_ID);
 
 	old_ng = rcu_dereference_protected(net->gen,
-					   lockdep_is_held(&net_mutex));
+					   lockdep_is_held(&net_sem));
 	if (old_ng->s.len > id) {
 		old_ng->ptr[id] = data;
 		return 0;
@@ -426,11 +427,14 @@ struct net *copy_net_ns(unsigned long flags,
 	rv = down_read_killable(&net_sem);
 	if (rv < 0)
 		goto put_userns;
-	rv = mutex_lock_killable(&net_mutex);
-	if (rv < 0)
-		goto up_read;
+	if (nr_sync_pernet_ops) {
+		rv = mutex_lock_killable(&net_mutex);
+		if (rv < 0)
+			goto up_read;
+	}
 	rv = setup_net(net, user_ns);
-	mutex_unlock(&net_mutex);
+	if (nr_sync_pernet_ops)
+		mutex_unlock(&net_mutex);
 up_read:
 	up_read(&net_sem);
 	if (rv < 0) {
@@ -487,7 +491,8 @@ static void cleanup_net(struct work_struct *work)
 	spin_unlock_irq(&cleanup_list_lock);
 
 	down_read(&net_sem);
-	mutex_lock(&net_mutex);
+	if (nr_sync_pernet_ops)
+		mutex_lock(&net_mutex);
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
@@ -522,7 +527,8 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_exit_list(ops, &net_exit_list);
 
-	mutex_unlock(&net_mutex);
+	if (nr_sync_pernet_ops)
+		mutex_unlock(&net_mutex);
 
 	/* Free the net generic variables */
 	list_for_each_entry_reverse(ops, &pernet_list, list)
@@ -994,6 +1000,9 @@ again:
 		rcu_barrier();
 		if (ops->id)
 			ida_remove(&net_generic_ids, *ops->id);
+	} else if (!ops->async) {
+		pr_info_once("Pernet operations %ps are sync.\n", ops);
+		nr_sync_pernet_ops++;
 	}
 
 	return error;
@@ -1001,7 +1010,8 @@ again:
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
 {
-	
+	if (!ops->async)
+		BUG_ON(nr_sync_pernet_ops-- == 0);
 	__unregister_pernet_operations(ops);
 	rcu_barrier();
 	if (ops->id)

From f039e184bc45227553e645fb1c7021d69a5262b4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:26:52 +0300
Subject: [PATCH 0041/1678] net: Convert proc_net_ns_ops

This patch starts to convert pernet_subsys, registered
before initcalls.

proc_net_ns_ops::proc_net_ns_init()/proc_net_ns_exit()
{un,}register pernet net->proc_net and ->proc_net_stat.

Constructors and destructors of another pernet_operations
are not interested in foreign net's proc_net and proc_net_stat.
Proc filesystem privitives are synchronized on proc_subdir_lock.

So, proc_net_ns_ops methods are able to be executed
in parallel with methods of any other pernet operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 68c06ae7888c..da6f8733c9c5 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net)
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
 	.init = proc_net_ns_init,
 	.exit = proc_net_ns_exit,
+	.async = true,
 };
 
 int __init proc_net_init(void)

From 3fc3b827f0c4397c74d4b8a8a06d71b903a4982f Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:27:03 +0300
Subject: [PATCH 0042/1678] net: Convert net_ns_ops methods

This patch starts to convert pernet_subsys, registered
from pure initcalls.

net_ns_ops::net_ns_net_init/net_ns_net_init, methods use only
ida_simple_* functions, which are not need a synchronization.
They are synchronized by idr subsystem.

So, net_ns_ops methods are able to be executed
in parallel with methods of other pernet operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2a01ff32d9c7..e21c564c8c00 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -649,6 +649,7 @@ static __net_exit void net_ns_net_exit(struct net *net)
 static struct pernet_operations __net_initdata net_ns_ops = {
 	.init = net_ns_net_init,
 	.exit = net_ns_net_exit,
+	.async = true,
 };
 
 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {

From 93d230fe0762bf129fb4debc944af3e1c1e8f40e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:27:13 +0300
Subject: [PATCH 0043/1678] net: Convert sysctl_pernet_ops

This patch starts to convert pernet_subsys, registered
from core initcalls.

Methods sysctl_net_init() and sysctl_net_exit() initialize
net::sysctls table of a namespace.

pernet_operations::init()/exit() methods from the rest
of the list do not touch net::sysctls of strangers,
so it's safe to execute sysctl_pernet_ops's methods
in parallel with any other pernet_operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sysctl_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9aed6fe1bf1a..f424539829b7 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -89,6 +89,7 @@ static void __net_exit sysctl_net_exit(struct net *net)
 static struct pernet_operations sysctl_pernet_ops = {
 	.init = sysctl_net_init,
 	.exit = sysctl_net_exit,
+	.async = true,
 };
 
 static struct ctl_table_header *net_header;

From 9549929923738d14dc450f0427a4b98ac3e8aff2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:27:23 +0300
Subject: [PATCH 0044/1678] net: Convert netfilter_net_ops

Methods netfilter_net_init() and netfilter_net_exit()
initialize net::nf::hooks and change net-related proc
directory of net. Another pernet_operations are not
interested in forein net::nf::hooks or proc entries,
so it's safe to make them executed in parallel with
methods of other pernet operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0f6b8172fb9a..d72cc786c7b7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -629,6 +629,7 @@ static void __net_exit netfilter_net_exit(struct net *net)
 static struct pernet_operations netfilter_net_ops = {
 	.init = netfilter_net_init,
 	.exit = netfilter_net_exit,
+	.async = true,
 };
 
 int __init netfilter_init(void)

From c9d8fb91351fe1514731b7f6d24d4decc0028978 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:27:31 +0300
Subject: [PATCH 0045/1678] net: Convert nf_log_net_ops

The pernet_operations would have had a problem in parallel
execution with others, if init_net had been able to released.
But it's not, and the rest is safe for that.
There is memory allocation, which nobody else interested in,
and sysctl registration. So, we make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index c2c1b16b7538..1ba3da51050d 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -577,6 +577,7 @@ static void __net_exit nf_log_net_exit(struct net *net)
 static struct pernet_operations nf_log_net_ops = {
 	.init = nf_log_net_init,
 	.exit = nf_log_net_exit,
+	.async = true,
 };
 
 int __init netfilter_log_init(void)

From 604da74e4fc1c2afc50dc7a1850acfd9aff2b58d Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:27:41 +0300
Subject: [PATCH 0046/1678] net: Convert net_inuse_ops

net_inuse_ops methods expose statistics in /proc.
No one from the rest of pernet_subsys or pernet_device
lists touch net::core::inuse.

So, it's safe to make net_inuse_ops async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/sock.c b/net/core/sock.c
index 04e5e27c9b81..f2bf69b86c58 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3112,6 +3112,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
 static struct pernet_operations net_inuse_ops = {
 	.init = sock_inuse_init_net,
 	.exit = sock_inuse_exit_net,
+	.async = true,
 };
 
 static __init int net_inuse_init(void)

From ff291d005a988aaa7d73daf44c3d04585e9f0637 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:27:51 +0300
Subject: [PATCH 0047/1678] net: Convert net_defaults_ops

net_defaults_ops introduce only net_defaults_init_net method,
and it acts on net::core::sysctl_somaxconn, which
is not interesting for the rest of pernet_subsys and
pernet_device lists. Then, make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e21c564c8c00..bcab9a938d6f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -340,6 +340,7 @@ static int __net_init net_defaults_init_net(struct net *net)
 
 static struct pernet_operations net_defaults_ops = {
 	.init = net_defaults_init_net,
+	.async = true,
 };
 
 static __init int net_defaults_init(void)

From 194b95d2166661f890dca5ef3b952c73622eb4e5 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:28:00 +0300
Subject: [PATCH 0048/1678] net: Convert netlink_net_ops

The methods of netlink_net_ops create and destroy "netlink"
file, which are not interesting for foreigh pernet_operations.
So, netlink_net_ops may safely be made async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3c8af14330b5..b3065908e146 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2723,6 +2723,7 @@ static void __init netlink_add_usersock_entry(void)
 static struct pernet_operations __net_initdata netlink_net_ops = {
 	.init = netlink_net_init,
 	.exit = netlink_net_exit,
+	.async = true,
 };
 
 static inline u32 netlink_hash(const void *data, u32 len, u32 seed)

From 46456675ec1b7f93dadd2b1b4b58d763c3ae9266 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:28:15 +0300
Subject: [PATCH 0049/1678] net: Convert rtnetlink_net_ops

rtnetlink_net_init() and rtnetlink_net_exit()
create and destroy netlink socket net::rtnl.

The socket is used to send rtnl notification via
rtnl_net_notifyid(). There is no a problem
to create and destroy it in parallel with other
pernet operations, as we link net in setup_net()
after the socket is created, and destroy
in cleanup_net() after net is unhashed from all
the lists and there is no RCU references on it.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 257e7bbaffba..67f375cfb982 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
 static struct pernet_operations rtnetlink_net_ops = {
 	.init = rtnetlink_net_init,
 	.exit = rtnetlink_net_exit,
+	.async = true,
 };
 
 void __init rtnetlink_init(void)

From 906f63ec1d1b4941d36eee18121e68749c9e3279 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:28:24 +0300
Subject: [PATCH 0050/1678] net: Convert audit_net_ops

This patch starts to convert pernet_subsys, registered
from postcore initcalls.

audit_net_init() creates netlink socket, while audit_net_exit()
destroys it. The rest of the pernet_list are not interested
in the socket, so we make audit_net_ops async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/audit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/audit.c b/kernel/audit.c
index 227db99b0f19..5e49b614d0e6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = {
 	.exit = audit_net_exit,
 	.id = &audit_net_id,
 	.size = sizeof(struct audit_net),
+	.async = true,
 };
 
 /* Initialize audit support at boot time. */

From 15898a011b3d0390869f31167c4403835bc04954 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:28:33 +0300
Subject: [PATCH 0051/1678] net: Convert uevent_net_ops

uevent_net_init() and uevent_net_exit() create and
destroy netlink socket, and these actions serialized
in netlink code.

Parallel execution with other pernet_operations
makes the socket disappear earlier from uevent_sock_list
on ->exit. As userspace can't be interested in broadcast
messages of dying net, and, as I see, no one in kernel
listen them, we may safely make uevent_net_ops async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/kobject_uevent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..9539d7ab3ea8 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -650,6 +650,7 @@ found:
 static struct pernet_operations uevent_net_ops = {
 	.init	= uevent_net_init,
 	.exit	= uevent_net_exit,
+	.async  = true,
 };
 
 static int __init kobject_uevent_init(void)

From 36b0068e6c9892aa4757d4fa08fd14fbba72b3b3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:28:44 +0300
Subject: [PATCH 0052/1678] net: Convert proto_net_ops

This patch starts to convert pernet_subsys, registered
from subsys initcalls.

It seems safe to be executed in parallel with others,
as it's only creates/destoyes proc entry,
which nobody else is not interested in.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/sock.c b/net/core/sock.c
index f2bf69b86c58..e90d461748f0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3386,6 +3386,7 @@ static __net_exit void proto_exit_net(struct net *net)
 static __net_initdata struct pernet_operations proto_net_ops = {
 	.init = proto_init_net,
 	.exit = proto_exit_net,
+	.async = true,
 };
 
 static int __init proto_init(void)

From 88b8ffebdb4d0f4da4e9a8383c8478c32372b42b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:28:54 +0300
Subject: [PATCH 0053/1678] net: Convert pernet_subsys ops, registered via
 net_dev_init()

There are:
1)dev_proc_ops and dev_mc_net_ops, which create and destroy
pernet proc file and not interesting for another net namespaces;
2)netdev_net_ops, which creates pernet hashes, which are not
touched by another pernet_operations.

So, make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c        | 1 +
 net/core/net-procfs.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index dda9d7b9a840..dc7506f00a66 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8833,6 +8833,7 @@ static void __net_exit netdev_exit(struct net *net)
 static struct pernet_operations __net_initdata netdev_net_ops = {
 	.init = netdev_init,
 	.exit = netdev_exit,
+	.async = true,
 };
 
 static void __net_exit default_device_exit(struct net *net)
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index e010bb800d7b..65b51e778782 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_proc_ops = {
 	.init = dev_proc_net_init,
 	.exit = dev_proc_net_exit,
+	.async = true,
 };
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
@@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_mc_net_ops = {
 	.init = dev_mc_net_init,
 	.exit = dev_mc_net_exit,
+	.async = true,
 };
 
 int __init dev_proc_init(void)

From 86b63418fd382b095fdac4408fd565aa5da4b036 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:29:03 +0300
Subject: [PATCH 0054/1678] net: Convert fib_* pernet_operations, registered
 via subsys_initcall

Both of them create and initialize lists, which are not touched
by another foreing pernet_operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_notifier.c | 1 +
 net/core/fib_rules.c    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..5ace0705a3f9 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
 static struct pernet_operations fib_notifier_net_ops = {
 	.init = fib_notifier_net_init,
 	.exit = fib_notifier_net_exit,
+	.async = true,
 };
 
 static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98e1066c3d55..cb071b8e8d17 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1030,6 +1030,7 @@ static void __net_exit fib_rules_net_exit(struct net *net)
 static struct pernet_operations fib_rules_net_ops = {
 	.init = fib_rules_net_init,
 	.exit = fib_rules_net_exit,
+	.async = true,
 };
 
 static int __init fib_rules_init(void)

From 13da199c38ee7f33a1c42db62647118f9f9f527c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:29:13 +0300
Subject: [PATCH 0055/1678] net: Convert subsys_initcall() registered
 pernet_operations from net/sched

psched_net_ops only creates and destroyes /proc entry,
and safe to be executed in parallel with any foreigh
pernet_operations.

tcf_action_net_ops initializes and destructs tcf_action_net::egdev_ht,
which is not touched by foreign pernet_operations.

So, make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 1 +
 net/sched/sch_api.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eba6682727dd..4886ea4a7d6e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1454,6 +1454,7 @@ static struct pernet_operations tcf_action_net_ops = {
 	.exit = tcf_action_net_exit,
 	.id = &tcf_action_net_id,
 	.size = sizeof(struct tcf_action_net),
+	.async = true,
 };
 
 static int __init tc_action_init(void)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d512f49ee83c..27e672c12492 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2128,6 +2128,7 @@ static void __net_exit psched_net_exit(struct net *net)
 static struct pernet_operations psched_net_ops = {
 	.init = psched_net_init,
 	.exit = psched_net_exit,
+	.async = true,
 };
 
 static int __init pktsched_init(void)

From 83caf62c867bed2637d4f3713839b0c414d6d966 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:29:23 +0300
Subject: [PATCH 0056/1678] net: Convert genl_pernet_ops

This pernet_operations create and destroy net::genl_sock.
Foreign pernet_operations don't touch it.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6f02499ef007..a6f63a5faee7 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1035,6 +1035,7 @@ static void __net_exit genl_pernet_exit(struct net *net)
 static struct pernet_operations genl_pernet_ops = {
 	.init = genl_pernet_init,
 	.exit = genl_pernet_exit,
+	.async = true,
 };
 
 static int __init genl_init(void)

From 6c0075d0f6ccf5be4527408830852106808ab2bb Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:29:33 +0300
Subject: [PATCH 0057/1678] net: Convert wext_pernet_ops

These pernet_operations initialize and purge net::wext_nlevents
queue, and are not touched by foreign pernet_operations.

Mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 9efbfc753347..bc7064486b15 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -390,6 +390,7 @@ static void __net_exit wext_pernet_exit(struct net *net)
 static struct pernet_operations wext_pernet_ops = {
 	.init = wext_pernet_init,
 	.exit = wext_pernet_exit,
+	.async = true,
 };
 
 static int __init wireless_nlevent_init(void)

From 232cf06c611f57ccb8e321de3fd850f21215ede8 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:29:42 +0300
Subject: [PATCH 0058/1678] net: Convert sysctl_core_ops

These pernet_operations register and destroy sysctl
directory, and it's not interesting for foreign
pernet_operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f2d0462611c3..d714f65782b7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -572,6 +572,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_core_ops = {
 	.init = sysctl_core_net_init,
 	.exit = sysctl_core_net_exit,
+	.async = true,
 };
 
 static __init int sysctl_core_init(void)

From f84c6821aa540342360067604ad156e3d53a67ed Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:29:52 +0300
Subject: [PATCH 0059/1678] net: Convert pernet_subsys, registered from
 inet_init()

arp_net_ops just addr/removes /proc entry.

devinet_ops allocates and frees duplicate of init_net tables
and (un)registers sysctl entries.

fib_net_ops allocates and frees pernet tables, creates/destroys
netlink socket and (un)initializes /proc entries. Foreign
pernet_operations do not touch them.

ip_rt_proc_ops only modifies pernet /proc entries.

xfrm_net_ops creates/destroys /proc entries, allocates/frees
pernet statistics, hashes and tables, and (un)initializes
sysctl files. These are not touched by foreigh pernet_operations

xfrm4_net_ops allocates/frees private pernet memory, and
configures sysctls.

sysctl_route_ops creates/destroys sysctls.

rt_genid_ops only initializes fields of just allocated net.

ipv4_inetpeer_ops allocated/frees net private memory.

igmp_net_ops just creates/destroys /proc files and socket,
noone else interested in.

tcp_sk_ops seems to be safe, because tcp_sk_init() does not
depend on any other pernet_operations modifications. Iteration
over hash table in inet_twsk_purge() is made under RCU lock,
and it's safe to iterate the table this way. Removing from
the table happen from inet_twsk_deschedule_put(), but this
function is safe without any extern locks, as it's synchronized
inside itself. There are many examples, it's used in different
context. So, it's safe to leave tcp_sk_exit_batch() unlocked.

tcp_net_metrics_ops is synchronized on tcp_metrics_lock and safe.

udplite4_net_ops only creates/destroys pernet /proc file.

icmp_sk_ops creates percpu sockets, not touched by foreign
pernet_operations.

ipmr_net_ops creates/destroys pernet fib tables, (un)registers
fib rules and /proc files. This seem to be safe to execute
in parallel with foreign pernet_operations.

af_inet_ops just sets up default parameters of newly created net.

ipv4_mib_ops creates and destroys pernet percpu statistics.

raw_net_ops, tcp4_net_ops, udp4_net_ops, ping_v4_net_ops
and ip_proc_ops only create/destroy pernet /proc files.

ip4_frags_ops creates and destroys sysctl file.

So, it's safe to make the pernet_operations async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c      | 2 ++
 net/ipv4/arp.c          | 1 +
 net/ipv4/devinet.c      | 1 +
 net/ipv4/fib_frontend.c | 1 +
 net/ipv4/icmp.c         | 1 +
 net/ipv4/igmp.c         | 1 +
 net/ipv4/ip_fragment.c  | 1 +
 net/ipv4/ipmr.c         | 1 +
 net/ipv4/ping.c         | 1 +
 net/ipv4/proc.c         | 1 +
 net/ipv4/raw.c          | 1 +
 net/ipv4/route.c        | 4 ++++
 net/ipv4/tcp_ipv4.c     | 2 ++
 net/ipv4/tcp_metrics.c  | 1 +
 net/ipv4/udp.c          | 1 +
 net/ipv4/udplite.c      | 1 +
 net/ipv4/xfrm4_policy.c | 1 +
 net/xfrm/xfrm_policy.c  | 1 +
 18 files changed, 23 insertions(+)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f98e2f0db841..e8c7fad8c329 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1735,6 +1735,7 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
 	.init = ipv4_mib_init_net,
 	.exit = ipv4_mib_exit_net,
+	.async = true,
 };
 
 static int __init init_ipv4_mibs(void)
@@ -1788,6 +1789,7 @@ static __net_exit void inet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations af_inet_ops = {
 	.init = inet_init_net,
 	.exit = inet_exit_net,
+	.async = true,
 };
 
 static int __init init_inet_pernet_ops(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f28f06c91ead..7dc9de8444a9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1447,6 +1447,7 @@ static void __net_exit arp_net_exit(struct net *net)
 static struct pernet_operations arp_net_ops = {
 	.init = arp_net_init,
 	.exit = arp_net_exit,
+	.async = true,
 };
 
 static int __init arp_proc_init(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..5ae0d1f097ca 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2469,6 +2469,7 @@ static __net_exit void devinet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations devinet_ops = {
 	.init = devinet_init_net,
 	.exit = devinet_exit_net,
+	.async = true,
 };
 
 static struct rtnl_af_ops inet_af_ops __read_mostly = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f05afaf3235c..ac71c3d496c0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1362,6 +1362,7 @@ static void __net_exit fib_net_exit(struct net *net)
 static struct pernet_operations fib_net_ops = {
 	.init = fib_net_init,
 	.exit = fib_net_exit,
+	.async = true,
 };
 
 void __init ip_fib_init(void)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1617604c9284..cc56efa64d5c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1257,6 +1257,7 @@ fail:
 static struct pernet_operations __net_initdata icmp_sk_ops = {
        .init = icmp_sk_init,
        .exit = icmp_sk_exit,
+       .async = true,
 };
 
 int __init icmp_init(void)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f2402581fef1..c2743763777e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -3028,6 +3028,7 @@ static void __net_exit igmp_net_exit(struct net *net)
 static struct pernet_operations igmp_net_ops = {
 	.init = igmp_net_init,
 	.exit = igmp_net_exit,
+	.async = true,
 };
 #endif
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..5e843ae5e468 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -885,6 +885,7 @@ static void __net_exit ipv4_frags_exit_net(struct net *net)
 static struct pernet_operations ip4_frags_ops = {
 	.init = ipv4_frags_init_net,
 	.exit = ipv4_frags_exit_net,
+	.async = true,
 };
 
 void __init ipfrag_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b05689bbba31..7c7ac9d32e77 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3327,6 +3327,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 static struct pernet_operations ipmr_net_ops = {
 	.init = ipmr_net_init,
 	.exit = ipmr_net_exit,
+	.async = true,
 };
 
 int __init ip_mr_init(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b8f0db54b197..0164def9c808 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1204,6 +1204,7 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v4_net_ops = {
 	.init = ping_v4_proc_init_net,
 	.exit = ping_v4_proc_exit_net,
+	.async = true,
 };
 
 int __init ping_proc_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index dc5edc8f7564..fdabc70283b6 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -549,6 +549,7 @@ static __net_exit void ip_proc_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ip_proc_ops = {
 	.init = ip_proc_init_net,
 	.exit = ip_proc_exit_net,
+	.async = true,
 };
 
 int __init ip_misc_proc_init(void)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9b367fc48d7d..54648d20bf0f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1156,6 +1156,7 @@ static __net_exit void raw_exit_net(struct net *net)
 static __net_initdata struct pernet_operations raw_net_ops = {
 	.init = raw_init_net,
 	.exit = raw_exit_net,
+	.async = true,
 };
 
 int __init raw_proc_init(void)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49cc1c1df1ba..9376ed69ffeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -417,6 +417,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
 	.init = ip_rt_do_proc_init,
 	.exit = ip_rt_do_proc_exit,
+	.async = true,
 };
 
 static int __init ip_rt_proc_init(void)
@@ -2994,6 +2995,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_route_ops = {
 	.init = sysctl_route_net_init,
 	.exit = sysctl_route_net_exit,
+	.async = true,
 };
 #endif
 
@@ -3007,6 +3009,7 @@ static __net_init int rt_genid_init(struct net *net)
 
 static __net_initdata struct pernet_operations rt_genid_ops = {
 	.init = rt_genid_init,
+	.async = true,
 };
 
 static int __net_init ipv4_inetpeer_init(struct net *net)
@@ -3032,6 +3035,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net)
 static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
 	.init	=	ipv4_inetpeer_init,
 	.exit	=	ipv4_inetpeer_exit,
+	.async	=	true,
 };
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f8ad397e285e..ac16795486ea 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2387,6 +2387,7 @@ static void __net_exit tcp4_proc_exit_net(struct net *net)
 static struct pernet_operations tcp4_net_ops = {
 	.init = tcp4_proc_init_net,
 	.exit = tcp4_proc_exit_net,
+	.async = true,
 };
 
 int __init tcp4_proc_init(void)
@@ -2573,6 +2574,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
        .init	   = tcp_sk_init,
        .exit	   = tcp_sk_exit,
        .exit_batch = tcp_sk_exit_batch,
+       .async	   = true,
 };
 
 void __init tcp_v4_init(void)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03b51cdcc731..aa6fea9f3328 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1024,6 +1024,7 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
 	.init		=	tcp_net_metrics_init,
 	.exit_batch	=	tcp_net_metrics_exit_batch,
+	.async		=	true,
 };
 
 void __init tcp_metrics_init(void)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bfaefe560b5c..ac5fac0e59b1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2757,6 +2757,7 @@ static void __net_exit udp4_proc_exit_net(struct net *net)
 static struct pernet_operations udp4_net_ops = {
 	.init = udp4_proc_init_net,
 	.exit = udp4_proc_exit_net,
+	.async = true,
 };
 
 int __init udp4_proc_init(void)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f96614e9b9a5..72f2c3806408 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -104,6 +104,7 @@ static void __net_exit udplite4_proc_exit_net(struct net *net)
 static struct pernet_operations udplite4_net_ops = {
 	.init = udplite4_proc_init_net,
 	.exit = udplite4_proc_exit_net,
+	.async = true,
 };
 
 static __init int udplite4_proc_init(void)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05017e2c849c..753f526cf9db 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -365,6 +365,7 @@ static void __net_exit xfrm4_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm4_net_ops = {
 	.init	= xfrm4_net_init,
 	.exit	= xfrm4_net_exit,
+	.async	= true,
 };
 
 static void __init xfrm4_policy_init(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7a23078132cf..77d9d1ab05ce 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2982,6 +2982,7 @@ static void __net_exit xfrm_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm_net_ops = {
 	.init = xfrm_net_init,
 	.exit = xfrm_net_exit,
+	.async = true,
 };
 
 void __init xfrm_init(void)

From 167f7ac723e5b4ea22c44a0bd8e357bb76a68cd2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:00 +0300
Subject: [PATCH 0060/1678] net: Convert unix_net_ops

These pernet_operations are just create and destroy
/proc and sysctl entries, and are not touched by
foreign pernet_operations.

So, we are able to make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 723698416242..e3eb8806b3e4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2913,6 +2913,7 @@ static void __net_exit unix_net_exit(struct net *net)
 static struct pernet_operations unix_net_ops = {
 	.init = unix_net_init,
 	.exit = unix_net_exit,
+	.async = true,
 };
 
 static int __init af_unix_init(void)

From cb5e3400e78598e1eb872954516a02ba85926d84 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:08 +0300
Subject: [PATCH 0061/1678] net: Convert packet_net_ops

These pernet_operations just create and destroy /proc entry,
and another operations do not touch it.

Also, nobody else are interested in foreign net::packet::sklist.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 616cb9c18f88..2c5a6fe5d749 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4557,6 +4557,7 @@ static void __net_exit packet_net_exit(struct net *net)
 static struct pernet_operations packet_net_ops = {
 	.init = packet_net_init,
 	.exit = packet_net_exit,
+	.async = true,
 };
 
 

From 22769a2a6e93a17d08e1cd7a2de2749088887b87 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:18 +0300
Subject: [PATCH 0062/1678] net: Convert ipv4_sysctl_ops

These pernet_operations create and destroy sysctl,
which are not touched by anybody else.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 93e172118a94..89683d868b37 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1219,6 +1219,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 	.init = ipv4_sysctl_init_net,
 	.exit = ipv4_sysctl_exit_net,
+	.async = true,
 };
 
 static __init int sysctl_ipv4_init(void)

From 0bc9be67185ec90feedc971af6e7b84ab932703a Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:27 +0300
Subject: [PATCH 0063/1678] net: Convert addrconf_ops

These pernet_operations (un)register sysctl, which
are not touched by anybody else.

So, it's safe to make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..8c17f8d8d5d9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6550,6 +6550,7 @@ static void __net_exit addrconf_exit_net(struct net *net)
 static struct pernet_operations addrconf_ops = {
 	.init = addrconf_init_net,
 	.exit = addrconf_exit_net,
+	.async = true,
 };
 
 static struct rtnl_af_ops inet6_ops __read_mostly = {

From 9a4d105de78488526d8b0d6cdc5f2c22f122ca4a Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:35 +0300
Subject: [PATCH 0064/1678] net: Convert loopback_net_ops

These pernet_operations have only init() method. It allocates
memory for net_device, calls register_netdev() and assigns
net::loopback_dev.

register_netdev() is allowed be used without additional locks,
as it's synchronized on rtnl_lock(). There are many examples
of using this functon directly from ioctl().

The only difference, compared to ioctl(), is that net is not
completely alive at this moment. But it looks like, there is
no way for parallel pernet_operations to dereference
the net_device, as the most of struct net_device lists,
where it's linked, are related to net, and the net is not liked.

The exceptions are net_device::unreg_list, close_list, todo_list,
used for unregistration, and ::link_watch_list, where net_device
may be linked to global lists.

Unregistration of loopback_dev obviously can't happen, when
loopback_net_init() is executing, as the net as alive. It occurs
in default_device_ops, which currently requires net_mutex,
and it behaves as a barrier at the moment. It will be considered
in next patch.

Speaking about link_watch_list, it seems, there is no way
for loopback_dev at time of registration to be linked in lweventlist
and be available for another pernet_operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 30612497643c..b97a907ea5aa 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -230,4 +230,5 @@ out:
 /* Registered in net/core/dev.c */
 struct pernet_operations __net_initdata loopback_net_ops = {
 	.init = loopback_net_init,
+	.async = true,
 };

From 2608e6b7adc8b07194b855e2102d6f1a277e3f03 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:42 +0300
Subject: [PATCH 0065/1678] net: Convert default_device_ops

These pernet operations consist of exit() and exit_batch() methods.

default_device_exit() moves not-local and virtual devices to init_net.
There is nothing exciting, because this may happen in any time
on a working system, and rtnl_lock() and synchronize_net() protect
us from all cases of external dereference.

The same for default_device_exit_batch(). Similar unregisteration
may happen in any time on a system. Here several lists (like todo_list),
which are accessed under rtnl_lock(). After rtnl_unlock() and
netdev_run_todo() all the devices are flushed.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index dc7506f00a66..df5241c8eda1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8934,6 +8934,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 static struct pernet_operations __net_initdata default_device_ops = {
 	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
+	.async = true,
 };
 
 /*

From 59a513587ac09359875d6074778f21a3c01754e0 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:30:52 +0300
Subject: [PATCH 0066/1678] net: Convert diag_net_ops

These pernet operations just create and destroy netlink
socket. The socket is pernet and else operations don't
touch it.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock_diag.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 146b50e30659..aee5642affd9 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net)
 static struct pernet_operations diag_net_ops = {
 	.init = diag_net_init,
 	.exit = diag_net_exit,
+	.async = true,
 };
 
 static int __init sock_diag_init(void)

From b86b47a39598cdf17e0e826ebe1be21c798112cf Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Feb 2018 12:31:01 +0300
Subject: [PATCH 0067/1678] net: Convert netlink_tap_net_ops

These pernet_operations init just allocated net memory,
and they obviously can be executed in parallel in any
others.

v3: New

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b3065908e146..63cb55d3c2fd 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,6 +253,7 @@ static struct pernet_operations netlink_tap_net_ops = {
 	.exit = netlink_tap_exit_net,
 	.id   = &netlink_tap_net_id,
 	.size = sizeof(struct netlink_tap_net),
+	.async = true,
 };
 
 static bool netlink_filter_tap(const struct sk_buff *skb)

From d75de7b6e737148863705f3523f9575d10429815 Mon Sep 17 00:00:00 2001
From: Jake Moroni <mail@jakemoroni.com>
Date: Mon, 12 Feb 2018 19:23:43 -0500
Subject: [PATCH 0068/1678] dpaa_eth: fix incorrect comment

The comment stated that a thread was started, but
that is not the case.

Signed-off-by: Jake Moroni <mail@jakemoroni.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 7caa8da48421..a998c36c5e61 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2766,7 +2766,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 
 	priv->channel = (u16)channel;
 
-	/* Start a thread that will walk the CPUs with affine portals
+	/* Walk the CPUs with affine portals
 	 * and add this pool channel to each's dequeue mask.
 	 */
 	dpaa_eth_add_channel(priv->channel);

From fe735a3d2c41f9ed2b706ce328c2b5d2b988d60b Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 13 Feb 2018 11:26:08 +0100
Subject: [PATCH 0069/1678] mlxsw: spectrum_ipip: Add a forgotten include

struct ip_tunnel_parm, which is used in spectrum_ipip.h, is defined in
if_tunnel.h. However, the former neglects to include the latter.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 04b08d9d76e9..e6c21974a97c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -37,6 +37,7 @@
 
 #include "spectrum_router.h"
 #include <net/ip_fib.h>
+#include <linux/if_tunnel.h>
 
 struct ip_tunnel_parm
 mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);

From e437f3b62d0844c4073c9e13263413748fa69093 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 13 Feb 2018 11:26:09 +0100
Subject: [PATCH 0070/1678] mlxsw: spectrum: Distinguish between IPv4/6 tunnels

struct ip_tunnel_parm, where GRE and several other tunnel types hold
information, is IPv4-specific. The current router / ipip code in mlxsw
however uses it as if it were generic.

Make it clear that it's not. Rename many functions from _params_ to
_params4_. mlxsw_sp_ipip_parms_saddr() and _daddr() take a proto
argument to dispatch on it. Move the dispatch logic to
mlxsw_sp_ipip_netdev_saddr() and _daddr(), and replace with
single-protocol functions.

In struct mlxsw_sp_ipip_entry, move the "parms" field to a (for the time
being, singleton) union. Update users throughout.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_ipip.c   | 153 +++++++-----------
 .../ethernet/mellanox/mlxsw/spectrum_ipip.h   |   6 +-
 .../ethernet/mellanox/mlxsw/spectrum_router.c |  16 +-
 3 files changed, 78 insertions(+), 97 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 7502e53447bd..a1c4b1e63f8d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -37,122 +37,89 @@
 #include "spectrum_ipip.h"
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev)
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
 {
 	struct ip_tunnel *tun = netdev_priv(ol_dev);
 
 	return tun->parms;
 }
 
-static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms)
+static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.i_flags & TUNNEL_KEY);
 }
 
-static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms)
+static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.o_flags & TUNNEL_KEY);
 }
 
-static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms)
 {
-	return mlxsw_sp_ipip_parms_has_ikey(parms) ?
+	return mlxsw_sp_ipip_parms4_has_ikey(parms) ?
 		be32_to_cpu(parms.i_key) : 0;
 }
 
-static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms)
+static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms)
 {
-	return mlxsw_sp_ipip_parms_has_okey(parms) ?
+	return mlxsw_sp_ipip_parms4_has_okey(parms) ?
 		be32_to_cpu(parms.o_key) : 0;
 }
 
-static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms)
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
 {
-	return parms.iph.saddr;
+	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
 static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto,
-			  struct ip_tunnel_parm parms)
+mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
-	switch (proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_parms_saddr4(parms),
-		};
-	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
-	}
-
-	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
-}
-
-static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms)
-{
-	return parms.iph.daddr;
-}
-
-static union mlxsw_sp_l3addr
-mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto,
-			  struct ip_tunnel_parm parms)
-{
-	switch (proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return (union mlxsw_sp_l3addr) {
-			.addr4 = mlxsw_sp_ipip_parms_daddr4(parms),
-		};
-	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
-	}
-
-	WARN_ON(1);
-	return (union mlxsw_sp_l3addr) {
-		.addr4 = 0,
-	};
-}
-
-static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
-
-static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
-
-static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev));
-}
-
-static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
-{
-	return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev));
+	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_saddr(proto,
-					 mlxsw_sp_ipip_netdev_parms(ol_dev));
+	struct ip_tunnel_parm parms4;
+
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		return mlxsw_sp_ipip_parms4_saddr(parms4);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	}
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {0};
 }
 
 static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev));
+
+	struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+
+	return mlxsw_sp_ipip_parms4_daddr(parms4).addr4;
 }
 
 static union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev)
 {
-	return mlxsw_sp_ipip_parms_daddr(proto,
-					 mlxsw_sp_ipip_netdev_parms(ol_dev));
+	struct ip_tunnel_parm parms4;
+
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		return mlxsw_sp_ipip_parms4_daddr(parms4);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	}
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {0};
 }
 
 static int
@@ -176,12 +143,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
 				     u32 tunnel_index,
 				     struct mlxsw_sp_ipip_entry *ipip_entry)
 {
-	bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
-	u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+	struct ip_tunnel_parm parms;
 	unsigned int type_check;
+	bool has_ikey;
 	u32 daddr4;
+	u32 ikey;
+
+	parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
+	has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms);
+	ikey = mlxsw_sp_ipip_parms4_ikey(parms);
 
 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
 
@@ -273,14 +245,15 @@ static struct mlxsw_sp_rif_ipip_lb_config
 mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
 				      const struct net_device *ol_dev)
 {
+	struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
 
-	lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+	lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ?
 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
 		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
 	return (struct mlxsw_sp_rif_ipip_lb_config){
 		.lb_ipipt = lb_ipipt,
-		.okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
+		.okey = mlxsw_sp_ipip_parms4_okey(parms),
 		.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
 		.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
 						    ol_dev),
@@ -300,16 +273,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 	bool update_nhs = false;
 	int err = 0;
 
-	new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev);
+	new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev);
 
-	new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-					      new_parms);
-	old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4,
-					      ipip_entry->parms);
-	new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-					      new_parms);
-	old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4,
-					      ipip_entry->parms);
+	new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms);
+	old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4);
+	new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms);
+	old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4);
 
 	if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) {
 		u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
@@ -326,14 +295,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 		}
 
 		update_tunnel = true;
-	} else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) !=
-		    mlxsw_sp_ipip_parms_okey(new_parms)) ||
-		   ipip_entry->parms.link != new_parms.link) {
+	} else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) !=
+		    mlxsw_sp_ipip_parms4_okey(new_parms)) ||
+		   ipip_entry->parms4.link != new_parms.link) {
 		update_tunnel = true;
 	} else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) {
 		update_nhs = true;
-	} else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) !=
-		   mlxsw_sp_ipip_parms_ikey(new_parms)) {
+	} else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) !=
+		   mlxsw_sp_ipip_parms4_ikey(new_parms)) {
 		update_decap = true;
 	}
 
@@ -350,7 +319,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp,
 							  false, false, false,
 							  extack);
 
-	ipip_entry->parms = new_parms;
+	ipip_entry->parms4 = new_parms;
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index e6c21974a97c..a4ff5737eccc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -40,7 +40,7 @@
 #include <linux/if_tunnel.h>
 
 struct ip_tunnel_parm
-mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev);
+mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
@@ -57,7 +57,9 @@ struct mlxsw_sp_ipip_entry {
 	struct mlxsw_sp_rif_ipip_lb *ol_lb;
 	struct mlxsw_sp_fib_entry *decap_fib_entry;
 	struct list_head ipip_list_node;
-	struct ip_tunnel_parm parms;
+	union {
+		struct ip_tunnel_parm parms4;
+	};
 };
 
 struct mlxsw_sp_ipip_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index f0b25baba09a..d8aaf8fd9cc2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,10 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -1020,9 +1020,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 			  enum mlxsw_sp_ipip_type ipipt,
 			  struct net_device *ol_dev)
 {
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct mlxsw_sp_ipip_entry *ret = NULL;
 
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
 	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
 	if (!ipip_entry)
 		return ERR_PTR(-ENOMEM);
@@ -1036,7 +1038,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
 
 	ipip_entry->ipipt = ipipt;
 	ipip_entry->ol_dev = ol_dev;
-	ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
+
+	switch (ipip_ops->ul_proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		WARN_ON(1);
+		break;
+	}
 
 	return ipip_entry;
 

From 306a934e5b39aac34910296764f96293fff34a96 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 13 Feb 2018 11:27:46 +0100
Subject: [PATCH 0071/1678] mlxsw: spectrum: Fix a coding style nit

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3dcc58d61506..da3d981a09f9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -582,8 +582,8 @@ mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	return NULL;
 }
 
-static struct mlxsw_sp_span_entry
-*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
 {
 	struct mlxsw_sp_span_entry *span_entry;
 

From ce470b44e256a5c25ae1f45a3086e6838fe52ecd Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 13 Feb 2018 11:27:47 +0100
Subject: [PATCH 0072/1678] mlxsw: spectrum: Drop struct span_entry.used

The member ref_count already determines whether a given SPAN entry is
used, and is as easy to use as a dedicated boolean.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ++----
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index da3d981a09f9..0a30438832c9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -534,7 +534,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 	/* find a free entry to use */
 	index = -1;
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		if (!mlxsw_sp->span.entries[i].used) {
+		if (!mlxsw_sp->span.entries[i].ref_count) {
 			index = i;
 			span_entry = &mlxsw_sp->span.entries[i];
 			break;
@@ -549,7 +549,6 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 	if (err)
 		return NULL;
 
-	span_entry->used = true;
 	span_entry->id = index;
 	span_entry->ref_count = 1;
 	span_entry->local_port = local_port;
@@ -565,7 +564,6 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 
 	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	span_entry->used = false;
 }
 
 struct mlxsw_sp_span_entry *
@@ -576,7 +574,7 @@ mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
 
-		if (curr->used && curr->local_port == local_port)
+		if (curr->ref_count && curr->local_port == local_port)
 			return curr;
 	}
 	return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index bdd8f94a452c..44dc916f4f5c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -124,7 +124,6 @@ struct mlxsw_sp_span_inspected_port {
 
 struct mlxsw_sp_span_entry {
 	u8 local_port;
-	bool used;
 	struct list_head bound_ports_list;
 	int ref_count;
 	int id;

From a629ef210d890bc279280d5f53ef72e10ad8a523 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 13 Feb 2018 11:27:48 +0100
Subject: [PATCH 0073/1678] mlxsw: spectrum: Move SPAN code to separate module

For the upcoming work on SPAN, it makes sense to move the current code
to a module of its own. It already has a well-defined API boundary to
the mirror management (which is used from matchall and ACL code). A
couple more functions need to be exported for the functions that
spectrum.c needs to use for MTU handling and subsystem init/fini.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile  |   2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 320 +---------------
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  29 +-
 .../mlxsw/spectrum_acl_flex_actions.c         |   1 +
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 356 ++++++++++++++++++
 .../ethernet/mellanox/mlxsw/spectrum_span.h   |  73 ++++
 6 files changed, 433 insertions(+), 348 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 9463c3fa254f..0cadcabfe86f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -20,7 +20,7 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_cnt.o spectrum_fid.o \
 				   spectrum_ipip.o spectrum_acl_flex_actions.o \
 				   spectrum_mr.o spectrum_mr_tcam.o \
-				   spectrum_qdisc.o
+				   spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 0a30438832c9..b3a423f175ce 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -71,6 +71,7 @@
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
 #include "spectrum_acl_flex_actions.h"
+#include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
@@ -487,325 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
-static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
-{
-	int i;
-
-	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
-		return -EIO;
-
-	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
-							  MAX_SPAN);
-	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
-					 sizeof(struct mlxsw_sp_span_entry),
-					 GFP_KERNEL);
-	if (!mlxsw_sp->span.entries)
-		return -ENOMEM;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-		INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
-
-	return 0;
-}
-
-static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
-	}
-	kfree(mlxsw_sp->span.entries);
-}
-
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	u8 local_port = port->local_port;
-	int index;
-	int i;
-	int err;
-
-	/* find a free entry to use */
-	index = -1;
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		if (!mlxsw_sp->span.entries[i].ref_count) {
-			index = i;
-			span_entry = &mlxsw_sp->span.entries[i];
-			break;
-		}
-	}
-	if (index < 0)
-		return NULL;
-
-	/* create a new port analayzer entry for local_port */
-	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	if (err)
-		return NULL;
-
-	span_entry->id = index;
-	span_entry->ref_count = 1;
-	span_entry->local_port = local_port;
-	return span_entry;
-}
-
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_span_entry *span_entry)
-{
-	u8 local_port = span_entry->local_port;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	int pa_id = span_entry->id;
-
-	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-}
-
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
-{
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		if (curr->ref_count && curr->local_port == local_port)
-			return curr;
-	}
-	return NULL;
-}
-
-static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-					      port->local_port);
-	if (span_entry) {
-		/* Already exists, just take a reference */
-		span_entry->ref_count++;
-		return span_entry;
-	}
-
-	return mlxsw_sp_span_entry_create(port);
-}
-
-static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_span_entry *span_entry)
-{
-	WARN_ON(!span_entry->ref_count);
-	if (--span_entry->ref_count == 0)
-		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
-	return 0;
-}
-
-static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_inspected_port *p;
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-
-		list_for_each_entry(p, &curr->bound_ports_list, list)
-			if (p->local_port == port->local_port &&
-			    p->type == MLXSW_SP_SPAN_EGRESS)
-				return true;
-	}
-
-	return false;
-}
-
-static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
-					 int mtu)
-{
-	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
-}
-
-static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int err;
-
-	/* If port is egress mirrored, the shared buffer size should be
-	 * updated according to the mtu value
-	 */
-	if (mlxsw_sp_span_is_egress_mirror(port)) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
-				    struct mlxsw_sp_span_entry *span_entry)
-{
-	struct mlxsw_sp_span_inspected_port *p;
-
-	list_for_each_entry(p, &span_entry->bound_ports_list, list)
-		if (port->local_port == p->local_port)
-			return p;
-	return NULL;
-}
-
-static int
-mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
-				  struct mlxsw_sp_span_entry *span_entry,
-				  enum mlxsw_sp_span_type type,
-				  bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char mpar_pl[MLXSW_REG_MPAR_LEN];
-	int pa_id = span_entry->id;
-
-	/* bind the port to the SPAN entry */
-	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
-			    (enum mlxsw_reg_mpar_i_e) type, bind, pa_id);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
-}
-
-static int
-mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int err;
-
-	/* if it is an egress SPAN, bind a shared buffer to it */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
-							     port->dev->mtu);
-
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-		if (err) {
-			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
-			return err;
-		}
-	}
-
-	if (bind) {
-		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-							true);
-		if (err)
-			goto err_port_bind;
-	}
-
-	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
-	if (!inspected_port) {
-		err = -ENOMEM;
-		goto err_inspected_port_alloc;
-	}
-	inspected_port->local_port = port->local_port;
-	inspected_port->type = type;
-	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
-
-	return 0;
-
-err_inspected_port_alloc:
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-err_port_bind:
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-	return err;
-}
-
-static void
-mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-
-	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
-	if (!inspected_port)
-		return;
-
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-	/* remove the SBIB buffer if it was egress SPAN */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-
-	list_del(&inspected_port->list);
-	kfree(inspected_port);
-}
-
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
-	int err;
-
-	span_entry = mlxsw_sp_span_entry_get(to);
-	if (!span_entry)
-		return -ENOENT;
-
-	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
-		   span_entry->id);
-
-	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
-	if (err)
-		goto err_port_bind;
-
-	return 0;
-
-err_port_bind:
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-	return err;
-}
-
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
-			      enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-					      destination_port);
-	if (!span_entry) {
-		netdev_err(from->dev, "no span entry found\n");
-		return;
-	}
-
-	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
-		   span_entry->id);
-	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
-}
-
 static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				    bool enable, u32 rate)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 44dc916f4f5c..6718a1f0482c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -80,6 +80,7 @@ enum mlxsw_sp_resource_id {
 
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
+struct mlxsw_sp_span_entry;
 
 struct mlxsw_sp_upper {
 	struct net_device *dev;
@@ -111,24 +112,6 @@ struct mlxsw_sp_mid {
 	unsigned long *ports_in_mid; /* bits array */
 };
 
-enum mlxsw_sp_span_type {
-	MLXSW_SP_SPAN_EGRESS,
-	MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
-	struct list_head list;
-	enum mlxsw_sp_span_type type;
-	u8 local_port;
-};
-
-struct mlxsw_sp_span_entry {
-	u8 local_port;
-	struct list_head bound_ports_list;
-	int ref_count;
-	int id;
-};
-
 enum mlxsw_sp_port_mall_action_type {
 	MLXSW_SP_PORT_MALL_MIRROR,
 	MLXSW_SP_PORT_MALL_SAMPLE,
@@ -395,16 +378,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
 void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type,
-			     bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from,
-			      u8 destination_port,
-			      enum mlxsw_sp_span_type type,
-			      bool bind);
-struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
 
 /* spectrum_dcb.c */
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 6ca6894125f0..f7e61cecc42b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -35,6 +35,7 @@
 
 #include "spectrum_acl_flex_actions.h"
 #include "core_acl_flex_actions.h"
+#include "spectrum_span.h"
 
 #define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
new file mode 100644
index 000000000000..c3bec37d71ed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -0,0 +1,356 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+
+#include "spectrum.h"
+#include "spectrum_span.h"
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
+		return -EIO;
+
+	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+							  MAX_SPAN);
+	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
+					 sizeof(struct mlxsw_sp_span_entry),
+					 GFP_KERNEL);
+	if (!mlxsw_sp->span.entries)
+		return -ENOMEM;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++)
+		INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+
+	return 0;
+}
+
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
+	}
+	kfree(mlxsw_sp->span.entries);
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	struct mlxsw_sp_span_entry *span_entry;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	u8 local_port = port->local_port;
+	int index;
+	int i;
+	int err;
+
+	/* find a free entry to use */
+	index = -1;
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		if (!mlxsw_sp->span.entries[i].ref_count) {
+			index = i;
+			span_entry = &mlxsw_sp->span.entries[i];
+			break;
+		}
+	}
+	if (index < 0)
+		return NULL;
+
+	/* create a new port analayzer entry for local_port */
+	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+	if (err)
+		return NULL;
+
+	span_entry->id = index;
+	span_entry->ref_count = 1;
+	span_entry->local_port = local_port;
+	return span_entry;
+}
+
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_span_entry *span_entry)
+{
+	u8 local_port = span_entry->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->ref_count && curr->local_port == local_port)
+			return curr;
+	}
+	return NULL;
+}
+
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
+					      port->local_port);
+	if (span_entry) {
+		/* Already exists, just take a reference */
+		span_entry->ref_count++;
+		return span_entry;
+	}
+
+	return mlxsw_sp_span_entry_create(port);
+}
+
+static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_span_entry *span_entry)
+{
+	WARN_ON(!span_entry->ref_count);
+	if (--span_entry->ref_count == 0)
+		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+	return 0;
+}
+
+static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	struct mlxsw_sp_span_inspected_port *p;
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		list_for_each_entry(p, &curr->bound_ports_list, list)
+			if (p->local_port == port->local_port &&
+			    p->type == MLXSW_SP_SPAN_EGRESS)
+				return true;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp,
+					 int mtu)
+{
+	return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1;
+}
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int err;
+
+	/* If port is egress mirrored, the shared buffer size should be
+	 * updated according to the mtu value
+	 */
+	if (mlxsw_sp_span_is_egress_mirror(port)) {
+		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu);
+
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not update shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static struct mlxsw_sp_span_inspected_port *
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
+				    struct mlxsw_sp_span_entry *span_entry)
+{
+	struct mlxsw_sp_span_inspected_port *p;
+
+	list_for_each_entry(p, &span_entry->bound_ports_list, list)
+		if (port->local_port == p->local_port)
+			return p;
+	return NULL;
+}
+
+static int
+mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
+				  struct mlxsw_sp_span_entry *span_entry,
+				  enum mlxsw_sp_span_type type,
+				  bool bind)
+{
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char mpar_pl[MLXSW_REG_MPAR_LEN];
+	int pa_id = span_entry->id;
+
+	/* bind the port to the SPAN entry */
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+			    (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
+				 struct mlxsw_sp_span_entry *span_entry,
+				 enum mlxsw_sp_span_type type,
+				 bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int err;
+
+	/* if it is an egress SPAN, bind a shared buffer to it */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
+							     port->dev->mtu);
+
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize);
+		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+		if (err) {
+			netdev_err(port->dev, "Could not create shared buffer for mirroring\n");
+			return err;
+		}
+	}
+
+	if (bind) {
+		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+							true);
+		if (err)
+			goto err_port_bind;
+	}
+
+	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
+	if (!inspected_port) {
+		err = -ENOMEM;
+		goto err_inspected_port_alloc;
+	}
+	inspected_port->local_port = port->local_port;
+	inspected_port->type = type;
+	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
+
+	return 0;
+
+err_inspected_port_alloc:
+	if (bind)
+		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+						  false);
+err_port_bind:
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+	return err;
+}
+
+static void
+mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
+				 struct mlxsw_sp_span_entry *span_entry,
+				 enum mlxsw_sp_span_type type,
+				 bool bind)
+{
+	struct mlxsw_sp_span_inspected_port *inspected_port;
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+	if (!inspected_port)
+		return;
+
+	if (bind)
+		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
+						  false);
+	/* remove the SBIB buffer if it was egress SPAN */
+	if (type == MLXSW_SP_SPAN_EGRESS) {
+		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+
+	list_del(&inspected_port->list);
+	kfree(inspected_port);
+}
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+			     struct mlxsw_sp_port *to,
+			     enum mlxsw_sp_span_type type, bool bind)
+{
+	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+	struct mlxsw_sp_span_entry *span_entry;
+	int err;
+
+	span_entry = mlxsw_sp_span_entry_get(to);
+	if (!span_entry)
+		return -ENOENT;
+
+	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
+		   span_entry->id);
+
+	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
+	if (err)
+		goto err_port_bind;
+
+	return 0;
+
+err_port_bind:
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+	return err;
+}
+
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+			      enum mlxsw_sp_span_type type, bool bind)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
+					      destination_port);
+	if (!span_entry) {
+		netdev_err(from->dev, "no span entry found\n");
+		return;
+	}
+
+	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
+		   span_entry->id);
+	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
new file mode 100644
index 000000000000..069050e385ff
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -0,0 +1,73 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_SPAN_H
+#define _MLXSW_SPECTRUM_SPAN_H
+
+#include <linux/types.h>
+
+struct mlxsw_sp;
+struct mlxsw_sp_port;
+
+enum mlxsw_sp_span_type {
+	MLXSW_SP_SPAN_EGRESS,
+	MLXSW_SP_SPAN_INGRESS
+};
+
+struct mlxsw_sp_span_inspected_port {
+	struct list_head list;
+	enum mlxsw_sp_span_type type;
+	u8 local_port;
+};
+
+struct mlxsw_sp_span_entry {
+	u8 local_port;
+	struct list_head bound_ports_list;
+	int ref_count;
+	int id;
+};
+
+int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+
+int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
+			     struct mlxsw_sp_port *to,
+			     enum mlxsw_sp_span_type type, bool bind);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+			      enum mlxsw_sp_span_type type, bool bind);
+struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+
+int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
+
+#endif

From 6c677750f22db3bb466a95f08b91a1cca8323146 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Tue, 13 Feb 2018 11:29:05 +0100
Subject: [PATCH 0074/1678] mlxsw: spectrum: Use NL_SET_ERR_MSG_MOD

Use NL_SET_ERR_MSG_MOD helper which adds the module name instead
of specifying the prefix each time.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 28 +++++++------------
 .../ethernet/mellanox/mlxsw/spectrum_router.c |  6 ++--
 .../mellanox/mlxsw/spectrum_switchdev.c       |  6 ++--
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b3a423f175ce..5e8ea712caa2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4236,13 +4236,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
 	u16 lag_id;
 
 	if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
-		NL_SET_ERR_MSG(extack,
-			       "spectrum: Exceeded number of supported LAG devices");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices");
 		return false;
 	}
 	if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
-		NL_SET_ERR_MSG(extack,
-			       "spectrum: LAG device using unsupported Tx type");
+		NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
 		return false;
 	}
 	return true;
@@ -4484,8 +4482,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		    !netif_is_lag_master(upper_dev) &&
 		    !netif_is_bridge_master(upper_dev) &&
 		    !netif_is_ovs_master(upper_dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Unknown upper device type");
+			NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4494,8 +4491,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
 							  upper_dev))) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
 		if (netif_is_lag_master(upper_dev) &&
@@ -4503,24 +4499,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
 					       info->upper_info, extack))
 			return -EINVAL;
 		if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Master device is a LAG master and this device has a VLAN");
+			NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN");
 			return -EINVAL;
 		}
 		if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
 		    !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Can not put a VLAN on a LAG port");
+			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port");
 			return -EINVAL;
 		}
 		if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Master device is an OVS master and this device has a VLAN");
+			NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN");
 			return -EINVAL;
 		}
 		if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
-			NL_SET_ERR_MSG(extack,
-				       "spectrum: Can not put a VLAN on an OVS port");
+			NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port");
 			return -EINVAL;
 		}
 		break;
@@ -4633,7 +4625,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
 		if (!netif_is_bridge_master(upper_dev)) {
-			NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
+			NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers");
 			return -EINVAL;
 		}
 		if (!info->linking)
@@ -4642,7 +4634,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
 		    (!netif_is_bridge_master(upper_dev) ||
 		     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
 							  upper_dev))) {
-			NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
+			NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported");
 			return -EINVAL;
 		}
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d8aaf8fd9cc2..267a0120c7a4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -793,7 +793,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 
 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 	if (!vr) {
-		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
 		return ERR_PTR(-EBUSY);
 	}
 	vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
@@ -5796,7 +5796,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
 	}
 
 	if (err < 0)
-		NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
+		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload");
 
 	return err;
 }
@@ -6035,7 +6035,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 
 	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
 	if (err) {
-		NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
+		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
 		goto err_rif_index_alloc;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 593ad31be749..f9f53af04fe1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1819,7 +1819,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
 
 	if (is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
+		NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge");
 		return -EINVAL;
 	}
 
@@ -1885,7 +1885,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 	u16 vid;
 
 	if (!is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
+		NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge");
 		return -EINVAL;
 	}
 	vid = vlan_dev_vlan_id(bridge_port->dev);
@@ -1895,7 +1895,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 		return -EINVAL;
 
 	if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
-		NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
+		NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port");
 		return -EINVAL;
 	}
 

From ff22b5bf78fe2a9f2a26950cc2ea180180bef47f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 13 Feb 2018 19:33:20 +0800
Subject: [PATCH 0075/1678] sctp: rename sctp_diag.c as diag.c

Remove 'sctp_' prefix for diag file, to keep consistent with other
files' names.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/Makefile                | 2 ++
 net/sctp/{sctp_diag.c => diag.c} | 0
 2 files changed, 2 insertions(+)
 rename net/sctp/{sctp_diag.c => diag.c} (100%)

diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6776582ec449..e845e4588535 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -15,6 +15,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  offload.o stream_sched.o stream_sched_prio.o \
 	  stream_sched_rr.o stream_interleave.o
 
+sctp_diag-y := diag.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/sctp_diag.c b/net/sctp/diag.c
similarity index 100%
rename from net/sctp/sctp_diag.c
rename to net/sctp/diag.c

From 6f68dc993afce0b580b9b1a9edadcd4b178cbb06 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 13 Feb 2018 19:33:21 +0800
Subject: [PATCH 0076/1678] sctp: add file comments in diag.c

This patch is to add the missing file comments for sctp diag file.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/diag.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index a72a7d925d46..078f01a8d582 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -1,3 +1,34 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions implement sctp diag support.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Xin Long <lucien.xin@gmail.com>
+ */
+
 #include <linux/module.h>
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>

From 0d876f2c6de5b1b00c2fd38bb4f4692e720207b1 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:11:34 -0800
Subject: [PATCH 0077/1678] net/ipv4: Simplify fib_select_path

If flow oif is set and it is not an l3mdev, then fib_select_path
can jump to the source address check.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c586597da20d..0b8c0a719f3e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1760,13 +1760,11 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, const struct sk_buff *skb)
 {
-	bool oif_check;
-
-	oif_check = (fl4->flowi4_oif == 0 ||
-		     fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF);
+	if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
+		goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi->fib_nhs > 1 && oif_check) {
+	if (res->fi->fib_nhs > 1) {
 		int h = fib_multipath_hash(res->fi, fl4, skb);
 
 		fib_select_multipath(res, h);
@@ -1775,9 +1773,10 @@ void fib_select_path(struct net *net, struct fib_result *res,
 #endif
 	if (!res->prefixlen &&
 	    res->table->tb_num_default > 1 &&
-	    res->type == RTN_UNICAST && oif_check)
+	    res->type == RTN_UNICAST)
 		fib_select_default(fl4, res);
 
+check_saddr:
 	if (!fl4->saddr)
 		fl4->saddr = FIB_RES_PREFSRC(net, *res);
 }

From 8c2ceabe99e04005cadba739856eed6953a8a3af Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:12:12 -0800
Subject: [PATCH 0078/1678] net/ipv4: Unexport fib_multipath_hash and
 fib_select_path

Do not export fib_multipath_hash or fib_select_path; both are only used
by core ipv4 code.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 1 -
 net/ipv4/route.c         | 1 -
 2 files changed, 2 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0b8c0a719f3e..2b883b58354f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1780,4 +1780,3 @@ check_saddr:
 	if (!fl4->saddr)
 		fl4->saddr = FIB_RES_PREFSRC(net, *res);
 }
-EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 9376ed69ffeb..b0ef4cc3e875 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1847,7 +1847,6 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 
 	return mhash >> 1;
 }
-EXPORT_SYMBOL_GPL(fib_multipath_hash);
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
 static int ip_mkroute_input(struct sk_buff *skb,

From 07f4e62f1c704f583740e6dd59c223819feb03ec Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:37:33 -0800
Subject: [PATCH 0079/1678] selftests: fib_tests: simplify ip commands in a
 namespace

'ip netns exec testns ip' is more efficiently handled using 'ip -netns';
runs the ip command after switching the namespace and avoids an exec.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 210 +++++++++++------------
 1 file changed, 105 insertions(+), 105 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a9154eefb2e2..672f263f8f53 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -34,23 +34,23 @@ fib_unreg_unicast_test()
 
 	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip link del dev dummy0
+	ip -netns testns link del dev dummy0
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	check_fail $?
 
 	ip netns del testns
@@ -68,40 +68,40 @@ fib_unreg_multipath_test()
 
 	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
 
-	ip netns exec testns ip link add dummy1 type dummy
-	ip netns exec testns ip link set dev dummy1 up
+	ip -netns testns link add dummy1 type dummy
+	ip -netns testns link set dev dummy1 up
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
+	ip -netns testns address add 192.0.2.1/24 dev dummy1
+	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip netns exec testns ip route add 203.0.113.0/24 \
+	ip -netns testns route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+	ip -netns testns -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip link del dev dummy0
+	ip -netns testns link del dev dummy0
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	# In IPv6 we do not flush the entire multipath route.
 	check_err $?
 
-	ip netns exec testns ip link del dev dummy1
+	ip -netns testns link del dev dummy1
 
 	ip netns del testns
 
@@ -126,26 +126,26 @@ fib_down_unicast_test()
 
 	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip link set dev dummy0 down
+	ip -netns testns link set dev dummy0 down
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	check_fail $?
 
-	ip netns exec testns ip link del dev dummy0
+	ip -netns testns link del dev dummy0
 
 	ip netns del testns
 
@@ -161,31 +161,31 @@ fib_down_multipath_test_do()
 	local down_dev=$1
 	local up_dev=$2
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+	ip -netns testns route get fibmatch 203.0.113.1 \
 		oif $down_dev &> /dev/null
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
 		oif $down_dev &> /dev/null
 	check_fail $?
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 \
+	ip -netns testns route get fibmatch 203.0.113.1 \
 		oif $up_dev &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 \
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
 		oif $up_dev &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+	ip -netns testns route get fibmatch 203.0.113.1 | \
 		grep $down_dev | grep -q "dead linkdown"
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
 		grep $down_dev | grep -q "dead linkdown"
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 | \
+	ip -netns testns route get fibmatch 203.0.113.1 | \
 		grep $up_dev | grep -q "dead linkdown"
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
 		grep $up_dev | grep -q "dead linkdown"
 	check_fail $?
 }
@@ -196,52 +196,52 @@ fib_down_multipath_test()
 
 	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
 
-	ip netns exec testns ip link add dummy1 type dummy
-	ip netns exec testns ip link set dev dummy1 up
+	ip -netns testns link add dummy1 type dummy
+	ip -netns testns link set dev dummy1 up
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy1
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy1
+	ip -netns testns address add 192.0.2.1/24 dev dummy1
+	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip netns exec testns ip route add 203.0.113.0/24 \
+	ip -netns testns route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip netns exec testns ip -6 route add 2001:db8:3::/64 \
+	ip -netns testns -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip link set dev dummy0 down
+	ip -netns testns link set dev dummy0 down
 	check_err $?
 
 	fib_down_multipath_test_do "dummy0" "dummy1"
 
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link set dev dummy0 up
 	check_err $?
-	ip netns exec testns ip link set dev dummy1 down
+	ip -netns testns link set dev dummy1 down
 	check_err $?
 
 	fib_down_multipath_test_do "dummy1" "dummy0"
 
-	ip netns exec testns ip link set dev dummy0 down
+	ip -netns testns link set dev dummy0 down
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 203.0.113.1 &> /dev/null
+	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	check_fail $?
 
-	ip netns exec testns ip link del dev dummy1
-	ip netns exec testns ip link del dev dummy0
+	ip -netns testns link del dev dummy1
+	ip -netns testns link del dev dummy0
 
 	ip netns del testns
 
@@ -267,56 +267,56 @@ fib_carrier_local_test()
 	# Local routes should not be affected when carrier changes.
 	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
 
-	ip netns exec testns ip link set dev dummy0 carrier on
+	ip -netns testns link set dev dummy0 carrier on
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+	ip -netns testns route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
 	check_fail $?
 
-	ip netns exec testns ip link set dev dummy0 carrier off
+	ip -netns testns link set dev dummy0 carrier off
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 198.51.100.1 | \
+	ip -netns testns route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::1 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
 	check_fail $?
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+	ip -netns testns address add 192.0.2.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
 
-	ip netns exec testns ip route get fibmatch 192.0.2.1 &> /dev/null
+	ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 192.0.2.1 | \
+	ip -netns testns route get fibmatch 192.0.2.1 | \
 		grep -q "linkdown"
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::1 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
 		grep -q "linkdown"
 	check_fail $?
 
-	ip netns exec testns ip link del dev dummy0
+	ip -netns testns link del dev dummy0
 
 	ip netns del testns
 
@@ -333,56 +333,56 @@ fib_carrier_unicast_test()
 
 	netns_create "testns"
 
-	ip netns exec testns ip link add dummy0 type dummy
-	ip netns exec testns ip link set dev dummy0 up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
 
-	ip netns exec testns ip link set dev dummy0 carrier on
+	ip -netns testns link set dev dummy0 carrier on
 
-	ip netns exec testns ip address add 198.51.100.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:1::1/64 dev dummy0
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+	ip -netns testns route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
 	check_fail $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
 	check_fail $?
 
-	ip netns exec testns ip link set dev dummy0 carrier off
+	ip -netns testns link set dev dummy0 carrier off
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 &> /dev/null
+	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 198.51.100.2 | \
+	ip -netns testns route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:1::2 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
 	check_err $?
 
-	ip netns exec testns ip address add 192.0.2.1/24 dev dummy0
-	ip netns exec testns ip -6 address add 2001:db8:2::1/64 dev dummy0
+	ip -netns testns address add 192.0.2.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
 
-	ip netns exec testns ip route get fibmatch 192.0.2.2 &> /dev/null
+	ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+	ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
 	check_err $?
 
-	ip netns exec testns ip route get fibmatch 192.0.2.2 | \
+	ip -netns testns route get fibmatch 192.0.2.2 | \
 		grep -q "linkdown"
 	check_err $?
-	ip netns exec testns ip -6 route get fibmatch 2001:db8:2::2 | \
+	ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
 		grep -q "linkdown"
 	check_err $?
 
-	ip netns exec testns ip link del dev dummy0
+	ip -netns testns link del dev dummy0
 
 	ip netns del testns
 

From 1056691b26809e838da67bc2a61761017d20cfda Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:37:34 -0800
Subject: [PATCH 0080/1678] selftests: fib_tests: Make test results more
 verbose

fib_tests.sh is failing in a VM:
    $ fib_tests.sh
    Running netdev unregister tests
    PASS: unicast route test
    PASS: multipath route test
    Running netdev down tests
    PASS: unicast route test
    PASS: multipath route test
    Running netdev carrier change tests
    PASS: local route carrier test
    FAIL: unicast route carrier test

The last test corresponds to fib_carrier_unicast_test which 12 places
that could be failing. Be more verbose in the output so a failure is
easier to track down and separate test setup failures with set -e and
set +e pairs.

With the verbose logging it is easier to see which checks are failing:
    $fib_tests.sh
    Single path route carrier test
        ....
        Carrier down
            IPv4 fibmatch                                         [ OK ]
            IPv6 fibmatch                                         [ OK ]
            IPv4 linkdown flag set                                [FAIL]
            IPv6 linkdown flag set                                [FAIL]
        Second address added with carrier down
            IPv4 fibmatch                                         [ OK ]
            IPv6 fibmatch                                         [ OK ]
            IPv4 linkdown flag set                                [FAIL]
            IPv6 linkdown flag set                                [ OK ]

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 248 ++++++++++++-----------
 1 file changed, 135 insertions(+), 113 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 672f263f8f53..d4e0b5cb4355 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,17 +6,25 @@
 
 ret=0
 
-check_err()
-{
-	if [ $ret -eq 0 ]; then
-		ret=$1
-	fi
-}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 
-check_fail()
+log_test()
 {
-	if [ $1 -eq 0 ]; then
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "        %-60s  [ OK ]\n" "${msg}"
+	else
 		ret=1
+		printf "        %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
 	fi
 }
 
@@ -30,42 +38,44 @@ netns_create()
 
 fib_unreg_unicast_test()
 {
-	ret=0
+	echo
+	echo "Single path route test"
 
+	set -e
 	netns_create "testns"
 
 	ip -netns testns link add dummy0 type dummy
 	ip -netns testns link set dev dummy0 up
-
 	ip -netns testns address add 198.51.100.1/24 dev dummy0
 	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
 
+	echo "    Start point"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
+	set -e
 	ip -netns testns link del dev dummy0
-	check_err $?
+	set +e
 
+	echo "    Nexthop device deleted"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv4 fibmatch - no route"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv6 fibmatch - no route"
 
 	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route test"
-		return 1
-	fi
-	echo "PASS: unicast route test"
 }
 
 fib_unreg_multipath_test()
 {
-	ret=0
 
+	echo
+	echo "Multipath route test"
+
+	set -e
 	netns_create "testns"
 
 	ip -netns testns link add dummy0 type dummy
@@ -86,44 +96,49 @@ fib_unreg_multipath_test()
 	ip -netns testns -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
 
+	echo "    Start point"
 	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
+	set -e
 	ip -netns testns link del dev dummy0
-	check_err $?
+	set +e
 
+	echo "    One nexthop device deleted"
 	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv4 - multipath route removed on delete"
+
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	# In IPv6 we do not flush the entire multipath route.
-	check_err $?
+	log_test $? 0 "IPv6 - multipath down to single path"
 
+	set -e
 	ip -netns testns link del dev dummy1
+	set +e
+
+	echo "    Second nexthop device deleted"
+	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 - no route"
 
 	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: multipath route test"
-		return 1
-	fi
-	echo "PASS: multipath route test"
 }
 
 fib_unreg_test()
 {
-	echo "Running netdev unregister tests"
-
 	fib_unreg_unicast_test
 	fib_unreg_multipath_test
 }
 
 fib_down_unicast_test()
 {
-	ret=0
+	echo
+	echo "Single path, admin down"
 
+	set -e
 	netns_create "testns"
 
 	ip -netns testns link add dummy0 type dummy
@@ -131,29 +146,27 @@ fib_down_unicast_test()
 
 	ip -netns testns address add 198.51.100.1/24 dev dummy0
 	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
 
+	echo "    Start point"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
+	set -e
 	ip -netns testns link set dev dummy0 down
-	check_err $?
+	set +e
 
+	echo "    Route deleted on down"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv6 fibmatch"
 
 	ip -netns testns link del dev dummy0
 
 	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route test"
-		return 1
-	fi
-	echo "PASS: unicast route test"
 }
 
 fib_down_multipath_test_do()
@@ -163,37 +176,39 @@ fib_down_multipath_test_do()
 
 	ip -netns testns route get fibmatch 203.0.113.1 \
 		oif $down_dev &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv4 fibmatch on down device"
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
 		oif $down_dev &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv6 fibmatch on down device"
 
 	ip -netns testns route get fibmatch 203.0.113.1 \
 		oif $up_dev &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch on up device"
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
 		oif $up_dev &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch on up device"
 
 	ip -netns testns route get fibmatch 203.0.113.1 | \
 		grep $down_dev | grep -q "dead linkdown"
-	check_err $?
+	log_test $? 0 "IPv4 flags on down device"
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
 		grep $down_dev | grep -q "dead linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 flags on down device"
 
 	ip -netns testns route get fibmatch 203.0.113.1 | \
 		grep $up_dev | grep -q "dead linkdown"
-	check_fail $?
+	log_test $? 1 "IPv4 flags on up device"
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
 		grep $up_dev | grep -q "dead linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 flags on up device"
 }
 
 fib_down_multipath_test()
 {
-	ret=0
+	echo
+	echo "Admin down multipath"
 
+	set -e
 	netns_create "testns"
 
 	ip -netns testns link add dummy0 type dummy
@@ -214,57 +229,58 @@ fib_down_multipath_test()
 	ip -netns testns -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
 
+	echo "    Verify start point"
 	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
+
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
+	set -e
 	ip -netns testns link set dev dummy0 down
-	check_err $?
+	set +e
 
+	echo "    One device down, one up"
 	fib_down_multipath_test_do "dummy0" "dummy1"
 
+	set -e
 	ip -netns testns link set dev dummy0 up
-	check_err $?
 	ip -netns testns link set dev dummy1 down
-	check_err $?
+	set +e
 
+	echo "    Other device down and up"
 	fib_down_multipath_test_do "dummy1" "dummy0"
 
+	set -e
 	ip -netns testns link set dev dummy0 down
-	check_err $?
+	set +e
 
+	echo "    Both devices down"
 	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
-	check_fail $?
+	log_test $? 2 "IPv6 fibmatch"
 
 	ip -netns testns link del dev dummy1
 	ip -netns testns link del dev dummy0
-
 	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: multipath route test"
-		return 1
-	fi
-	echo "PASS: multipath route test"
 }
 
 fib_down_test()
 {
-	echo "Running netdev down tests"
-
 	fib_down_unicast_test
 	fib_down_multipath_test
 }
 
+# Local routes should not be affected when carrier changes.
 fib_carrier_local_test()
 {
-	ret=0
+	echo
+	echo "Local carrier tests - single path"
 
-	# Local routes should not be affected when carrier changes.
+	set -e
 	netns_create "testns"
 
 	ip -netns testns link add dummy0 type dummy
@@ -274,65 +290,71 @@ fib_carrier_local_test()
 
 	ip -netns testns address add 198.51.100.1/24 dev dummy0
 	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
 
+	echo "    Start point"
 	ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
 	ip -netns testns route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv4 - no linkdown flag"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 - no linkdown flag"
 
+	set -e
 	ip -netns testns link set dev dummy0 carrier off
+	set +e
 
+	echo "    Carrier off on nexthop"
 	ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
 	ip -netns testns route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv4 - linkdown flag set"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 - linkdown flag set"
 
+	set -e
 	ip -netns testns address add 192.0.2.1/24 dev dummy0
 	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
 
+	echo "    Route to local address with carrier down"
 	ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
 	ip -netns testns route get fibmatch 192.0.2.1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv4 linkdown flag set"
 	ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 linkdown flag set"
 
 	ip -netns testns link del dev dummy0
 
 	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: local route carrier test"
-		return 1
-	fi
-	echo "PASS: local route carrier test"
 }
 
 fib_carrier_unicast_test()
 {
 	ret=0
 
+	echo
+	echo "Single path route carrier test"
+
 	netns_create "testns"
 
+	set -e
 	ip -netns testns link add dummy0 type dummy
 	ip -netns testns link set dev dummy0 up
 
@@ -340,63 +362,63 @@ fib_carrier_unicast_test()
 
 	ip -netns testns address add 198.51.100.1/24 dev dummy0
 	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
 
+	echo "    Start point"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
 	ip -netns testns route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv4 no linkdown flag"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
-	check_fail $?
+	log_test $? 1 "IPv6 no linkdown flag"
 
+	set -e
 	ip -netns testns link set dev dummy0 carrier off
+	set +e
 
+	echo "    Carrier down"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
 	ip -netns testns route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv4 linkdown flag set"
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 linkdown flag set"
 
+	set -e
 	ip -netns testns address add 192.0.2.1/24 dev dummy0
 	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
 
+	echo "    Second address added with carrier down"
 	ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv4 fibmatch"
 	ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
-	check_err $?
+	log_test $? 0 "IPv6 fibmatch"
 
 	ip -netns testns route get fibmatch 192.0.2.2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv4 linkdown flag set"
 	ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
 		grep -q "linkdown"
-	check_err $?
+	log_test $? 0 "IPv6 linkdown flag set"
 
 	ip -netns testns link del dev dummy0
 
 	ip netns del testns
-
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: unicast route carrier test"
-		return 1
-	fi
-	echo "PASS: unicast route carrier test"
 }
 
 fib_carrier_test()
 {
-	echo "Running netdev carrier change tests"
-
 	fib_carrier_local_test
 	fib_carrier_unicast_test
 }

From ee395a5e722c8c7e85641c3017de8f64209ff04f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:37:35 -0800
Subject: [PATCH 0081/1678] selftests: fib_tests: Move admin of dummy0 to
 helpers

Move setup and teardown of testns and dummy0 to helpers.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 100 ++++++++---------------
 1 file changed, 34 insertions(+), 66 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index d4e0b5cb4355..e113cfd659fc 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -28,12 +28,24 @@ log_test()
 	fi
 }
 
-netns_create()
+setup()
 {
-	local testns=$1
+	set -e
+	ip netns add testns
+	ip -netns testns link set dev lo up
 
-	ip netns add $testns
-	ip netns exec $testns ip link set dev lo up
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
+
+}
+
+cleanup()
+{
+	ip -netns testns link del dev dummy0 &> /dev/null
+	ip netns del testns
 }
 
 fib_unreg_unicast_test()
@@ -41,14 +53,7 @@ fib_unreg_unicast_test()
 	echo
 	echo "Single path route test"
 
-	set -e
-	netns_create "testns"
-
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
-	set +e
+	setup
 
 	echo "    Start point"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
@@ -66,7 +71,7 @@ fib_unreg_unicast_test()
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 2 "IPv6 fibmatch - no route"
 
-	ip netns del testns
+	cleanup
 }
 
 fib_unreg_multipath_test()
@@ -75,18 +80,11 @@ fib_unreg_multipath_test()
 	echo
 	echo "Multipath route test"
 
+	setup
+
 	set -e
-	netns_create "testns"
-
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-
 	ip -netns testns link add dummy1 type dummy
 	ip -netns testns link set dev dummy1 up
-
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
-
 	ip -netns testns address add 192.0.2.1/24 dev dummy1
 	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
 
@@ -124,7 +122,7 @@ fib_unreg_multipath_test()
 	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	log_test $? 2 "IPv6 - no route"
 
-	ip netns del testns
+	cleanup
 }
 
 fib_unreg_test()
@@ -138,15 +136,7 @@ fib_down_unicast_test()
 	echo
 	echo "Single path, admin down"
 
-	set -e
-	netns_create "testns"
-
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
-	set +e
+	setup
 
 	echo "    Start point"
 	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
@@ -164,9 +154,7 @@ fib_down_unicast_test()
 	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 2 "IPv6 fibmatch"
 
-	ip -netns testns link del dev dummy0
-
-	ip netns del testns
+	cleanup
 }
 
 fib_down_multipath_test_do()
@@ -208,18 +196,12 @@ fib_down_multipath_test()
 	echo
 	echo "Admin down multipath"
 
+	setup
+
 	set -e
-	netns_create "testns"
-
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-
 	ip -netns testns link add dummy1 type dummy
 	ip -netns testns link set dev dummy1 up
 
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
-
 	ip -netns testns address add 192.0.2.1/24 dev dummy1
 	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
 
@@ -264,8 +246,7 @@ fib_down_multipath_test()
 	log_test $? 2 "IPv6 fibmatch"
 
 	ip -netns testns link del dev dummy1
-	ip -netns testns link del dev dummy0
-	ip netns del testns
+	cleanup
 }
 
 fib_down_test()
@@ -280,16 +261,10 @@ fib_carrier_local_test()
 	echo
 	echo "Local carrier tests - single path"
 
+	setup
+
 	set -e
-	netns_create "testns"
-
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-
 	ip -netns testns link set dev dummy0 carrier on
-
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 	set +e
 
 	echo "    Start point"
@@ -340,9 +315,7 @@ fib_carrier_local_test()
 		grep -q "linkdown"
 	log_test $? 1 "IPv6 linkdown flag set"
 
-	ip -netns testns link del dev dummy0
-
-	ip netns del testns
+	cleanup
 }
 
 fib_carrier_unicast_test()
@@ -352,16 +325,10 @@ fib_carrier_unicast_test()
 	echo
 	echo "Single path route carrier test"
 
-	netns_create "testns"
+	setup
 
 	set -e
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-
 	ip -netns testns link set dev dummy0 carrier on
-
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
 	set +e
 
 	echo "    Start point"
@@ -412,9 +379,7 @@ fib_carrier_unicast_test()
 		grep -q "linkdown"
 	log_test $? 0 "IPv6 linkdown flag set"
 
-	ip -netns testns link del dev dummy0
-
-	ip netns del testns
+	cleanup
 }
 
 fib_carrier_test()
@@ -446,6 +411,9 @@ if [ $? -ne 0 ]; then
 	exit 0
 fi
 
+# start clean
+cleanup &> /dev/null
+
 fib_test
 
 exit $ret

From e2ba732a1681d907a6dffbaf9802aebcac233099 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:37:36 -0800
Subject: [PATCH 0082/1678] selftests: fib_tests: sleep after changing carrier

sleep for a second after setting carrier down to allow linkwatch
to propagate the change to the routing stack via netdev_state_change.
As it stands there is a race setting carrier down on the dummy
device and then checking the linkdown flag in the routes.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index e113cfd659fc..b617985ecdc1 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -282,6 +282,7 @@ fib_carrier_local_test()
 
 	set -e
 	ip -netns testns link set dev dummy0 carrier off
+	sleep 1
 	set +e
 
 	echo "    Carrier off on nexthop"

From 3758d2c74d67be946b623f3736118e5f59c0d504 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 10 Jan 2018 07:13:51 +0000
Subject: [PATCH 0083/1678] i40e: Make local function i40e_get_link_speed
 static

Fixes the following sparse warning:

drivers/net/ethernet/intel/i40e/i40e_main.c:5440:5: warning:
 symbol 'i40e_get_link_speed' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 70ecd9c3a163..eba020d3eb51 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -5381,7 +5381,7 @@ out:
  * @vsi: VSI to be configured
  *
  **/
-int i40e_get_link_speed(struct i40e_vsi *vsi)
+static int i40e_get_link_speed(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 

From 03f431b33a6486680ff4e43b059176720bb41972 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 12 Jan 2018 02:27:13 +0000
Subject: [PATCH 0084/1678] i40evf: use GFP_ATOMIC under spin lock

A spin lock is taken here so we should use GFP_ATOMIC.

Fixes: 504398f0a78e ("i40evf: use spinlock to protect (mac|vlan)_filter_list")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index d57a67285505..82e6b115be87 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -465,7 +465,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter)
 		more = true;
 	}
 
-	veal = kzalloc(len, GFP_KERNEL);
+	veal = kzalloc(len, GFP_ATOMIC);
 	if (!veal) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -538,7 +538,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter)
 		      (count * sizeof(struct virtchnl_ether_addr));
 		more = true;
 	}
-	veal = kzalloc(len, GFP_KERNEL);
+	veal = kzalloc(len, GFP_ATOMIC);
 	if (!veal) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -612,7 +612,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter)
 		      (count * sizeof(u16));
 		more = true;
 	}
-	vvfl = kzalloc(len, GFP_KERNEL);
+	vvfl = kzalloc(len, GFP_ATOMIC);
 	if (!vvfl) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;
@@ -684,7 +684,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter)
 		      (count * sizeof(u16));
 		more = true;
 	}
-	vvfl = kzalloc(len, GFP_KERNEL);
+	vvfl = kzalloc(len, GFP_ATOMIC);
 	if (!vvfl) {
 		spin_unlock_bh(&adapter->mac_vlan_list_lock);
 		return;

From 7be78aa444794d4d242f8192c627df57e2cc2e98 Mon Sep 17 00:00:00 2001
From: Mitch Williams <mitch.a.williams@intel.com>
Date: Mon, 22 Jan 2018 12:00:32 -0500
Subject: [PATCH 0085/1678] i40e: don't leak memory addresses

Could a Bad Person do Bad Things to a server if they found these
addresses printed in the log? Who knows? But let's not take that risk.

Remove pointers from a bunch of printks. In some cases, I was able to
adjust the message to indicate whether or not the value was null. In
others, I just removed the entire message as there was really no hope of
saving it.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_debugfs.c    | 40 +++----------------
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 13 +++---
 2 files changed, 12 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index e9fc51bd6c95..b829fd365693 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		dev_info(&pf->pdev->dev, "        vlan_features = 0x%08lx\n",
 			 (unsigned long int)nd->vlan_features);
 	}
-	dev_info(&pf->pdev->dev,
-		 "    vlgrp: & = %p\n", vsi->active_vlans);
+	dev_info(&pf->pdev->dev, "    active_vlans is %s\n",
+		 vsi->active_vlans ? "<valid>" : "<null>");
 	dev_info(&pf->pdev->dev,
 		 "    flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
 		 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
@@ -269,14 +269,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		if (!rx_ring)
 			continue;
 
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: desc = %p\n",
-			 i, rx_ring->desc);
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
-			 i, rx_ring->dev,
-			 rx_ring->netdev,
-			 rx_ring->rx_bi);
 		dev_info(&pf->pdev->dev,
 			 "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
 			 i, *rx_ring->state,
@@ -307,13 +299,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 rx_ring->rx_stats.realloc_count,
 			 rx_ring->rx_stats.page_reuse_count);
 		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
-			 i, rx_ring->size,
-			 (unsigned long int)rx_ring->dma);
-		dev_info(&pf->pdev->dev,
-			 "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
-			 i, rx_ring->vsi,
-			 rx_ring->q_vector);
+			 "    rx_rings[%i]: size = %i\n",
+			 i, rx_ring->size);
 		dev_info(&pf->pdev->dev,
 			 "    rx_rings[%i]: itr_setting = %d (%s)\n",
 			 i, rx_ring->itr_setting,
@@ -325,14 +312,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		if (!tx_ring)
 			continue;
 
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: desc = %p\n",
-			 i, tx_ring->desc);
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
-			 i, tx_ring->dev,
-			 tx_ring->netdev,
-			 tx_ring->tx_bi);
 		dev_info(&pf->pdev->dev,
 			 "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
 			 i, *tx_ring->state,
@@ -355,13 +334,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 tx_ring->tx_stats.tx_busy,
 			 tx_ring->tx_stats.tx_done_old);
 		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
-			 i, tx_ring->size,
-			 (unsigned long int)tx_ring->dma);
-		dev_info(&pf->pdev->dev,
-			 "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
-			 i, tx_ring->vsi,
-			 tx_ring->q_vector);
+			 "    tx_rings[%i]: size = %i\n",
+			 i, tx_ring->size);
 		dev_info(&pf->pdev->dev,
 			 "    tx_rings[%i]: DCB tc = %d\n",
 			 i, tx_ring->dcb_tc);
@@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 		 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
 		 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
 		 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
-	if (vsi->back)
-		dev_info(&pf->pdev->dev, "    PF = %p\n", vsi->back);
 	dev_info(&pf->pdev->dev, "    idx = %d\n", vsi->idx);
 	dev_info(&pf->pdev->dev,
 		 "    tc_config: numtc = %d, enabled_tc = 0x%x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index eba020d3eb51..0f256ee87c2a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -215,8 +215,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
 
 	if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
 		dev_info(&pf->pdev->dev,
-			 "param err: pile=%p needed=%d id=0x%04x\n",
-			 pile, needed, id);
+			 "param err: pile=%s needed=%d id=0x%04x\n",
+			 pile ? "<valid>" : "<null>", needed, id);
 		return -EINVAL;
 	}
 
@@ -9954,18 +9954,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi)
 
 	mutex_lock(&pf->switch_mutex);
 	if (!pf->vsi[vsi->idx]) {
-		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n",
-			vsi->idx, vsi->idx, vsi, vsi->type);
+		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
+			vsi->idx, vsi->idx, vsi->type);
 		goto unlock_vsi;
 	}
 
 	if (pf->vsi[vsi->idx] != vsi) {
 		dev_err(&pf->pdev->dev,
-			"pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n",
+			"pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
 			pf->vsi[vsi->idx]->idx,
-			pf->vsi[vsi->idx],
 			pf->vsi[vsi->idx]->type,
-			vsi->idx, vsi, vsi->type);
+			vsi->idx, vsi->type);
 		goto unlock_vsi;
 	}
 

From a48350c29be1bde064d1135e05c64c84098a030d Mon Sep 17 00:00:00 2001
From: Alan Brady <alan.brady@intel.com>
Date: Mon, 22 Jan 2018 12:00:33 -0500
Subject: [PATCH 0086/1678] i40e: broadcast filters can trigger overflow
 promiscuous

When adding a bunch of VLANs to all the ports on a device, it's possible
to run out of space for broadcast filters.  The driver should trigger
overflow promiscuous in this circumstance to prevent traffic from being
unexpectedly dropped.

Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0f256ee87c2a..d96ecfc37bbc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2177,11 +2177,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
 							    NULL);
 	}
 
-	if (aq_ret)
+	if (aq_ret) {
+		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 		dev_warn(&vsi->back->pdev->dev,
-			 "Error %s setting broadcast promiscuous mode on %s\n",
+			 "Error %s, forcing overflow promiscuous on %s\n",
 			 i40e_aq_str(hw, hw->aq.asq_last_status),
 			 vsi_name);
+	}
 
 	return aq_ret;
 }

From fbd5eb54c28ce314e0cca1505d2261a8a84ebc44 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Mon, 22 Jan 2018 12:00:34 -0500
Subject: [PATCH 0087/1678] i40evf: Use an iterator of the same type as the
 list

When iterating through the linked list of VLAN filters, make the
iterator the same type as that of the linked list.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 6fd09926181a..4f07469e9769 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1027,6 +1027,7 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter)
 void i40evf_down(struct i40evf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct i40evf_vlan_filter *vlf;
 	struct i40evf_mac_filter *f;
 
 	if (adapter->state <= __I40EVF_DOWN_PENDING)
@@ -1045,7 +1046,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 		f->remove = true;
 	}
 	/* remove all VLAN filters */
-	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
 		f->remove = true;
 	}
 
@@ -3067,6 +3068,7 @@ static void i40evf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40evf_vlan_filter *vlf, *vlftmp;
 	struct i40evf_mac_filter *f, *ftmp;
 	struct i40e_hw *hw = &adapter->hw;
 	int err;
@@ -3129,9 +3131,10 @@ static void i40evf_remove(struct pci_dev *pdev)
 		list_del(&f->list);
 		kfree(f);
 	}
-	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
-		list_del(&f->list);
-		kfree(f);
+	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+				 list) {
+		list_del(&vlf->list);
+		kfree(vlf);
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);

From cc6a96a41991de1961cfd712a83df21456607d30 Mon Sep 17 00:00:00 2001
From: Alan Brady <alan.brady@intel.com>
Date: Mon, 22 Jan 2018 12:00:35 -0500
Subject: [PATCH 0088/1678] i40e: refactor promisc_changed in
 i40e_sync_vsi_filters

This code here is quite complex and easy to screw up.  Let's see if we
can't improve the readability and maintainability a bit.  This refactors
out promisc_changed into two variables 'old_overflow' and 'new_overflow'
which makes it a bit clearer when we're concerned about when and how
overflow promiscuous is changed.  This also makes so that we no longer
need to pass a boolean pointer to i40e_aqc_add_filters.  Instead we can
simply check if we changed the overflow promiscuous flag since the
function start.

Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 42 ++++++++++-----------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d96ecfc37bbc..1073972be948 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2116,17 +2116,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
  * @list: the list of filters to send to firmware
  * @add_head: Position in the add hlist
  * @num_add: the number of filters to add
- * @promisc_change: set to true on exit if promiscuous mode was forced on
  *
  * Send a request to firmware via AdminQ to add a chunk of filters. Will set
- * promisc_changed to true if the firmware has run out of space for more
- * filters.
+ * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
+ * space for more filters.
  */
 static
 void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 			  struct i40e_aqc_add_macvlan_element_data *list,
 			  struct i40e_new_mac_filter *add_head,
-			  int num_add, bool *promisc_changed)
+			  int num_add)
 {
 	struct i40e_hw *hw = &vsi->back->hw;
 	int aq_err, fcnt;
@@ -2136,7 +2135,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
 	fcnt = i40e_update_filter_state(num_add, list, add_head);
 
 	if (fcnt != num_add) {
-		*promisc_changed = true;
 		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
 		dev_warn(&vsi->back->pdev->dev,
 			 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
@@ -2269,9 +2267,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	struct i40e_mac_filter *f;
 	struct i40e_new_mac_filter *new, *add_head = NULL;
 	struct i40e_hw *hw = &vsi->back->hw;
+	bool old_overflow, new_overflow;
 	unsigned int failed_filters = 0;
 	unsigned int vlan_filters = 0;
-	bool promisc_changed = false;
 	char vsi_name[16] = "PF";
 	int filter_list_len = 0;
 	i40e_status aq_ret = 0;
@@ -2293,6 +2291,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		usleep_range(1000, 2000);
 	pf = vsi->back;
 
+	old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
 	if (vsi->netdev) {
 		changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
 		vsi->current_netdev_flags = vsi->netdev->flags;
@@ -2466,15 +2466,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
 				i40e_aqc_add_filters(vsi, vsi_name, add_list,
-						     add_head, num_add,
-						     &promisc_changed);
+						     add_head, num_add);
 				memset(add_list, 0, list_size);
 				num_add = 0;
 			}
 		}
 		if (num_add) {
 			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
-					     num_add, &promisc_changed);
+					     num_add);
 		}
 		/* Now move all of the filters from the temp add list back to
 		 * the VSI's list.
@@ -2503,24 +2502,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	}
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-	/* If promiscuous mode has changed, we need to calculate a new
-	 * threshold for when we are safe to exit
-	 */
-	if (promisc_changed)
-		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
-
 	/* Check if we are able to exit overflow promiscuous mode. We can
 	 * safely exit if we didn't just enter, we no longer have any failed
 	 * filters, and we have reduced filters below the threshold value.
 	 */
-	if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) &&
-	    !promisc_changed && !failed_filters &&
-	    (vsi->active_filters < vsi->promisc_threshold)) {
+	if (old_overflow && !failed_filters &&
+	    vsi->active_filters < vsi->promisc_threshold) {
 		dev_info(&pf->pdev->dev,
 			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
 			 vsi_name);
 		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
-		promisc_changed = true;
 		vsi->promisc_threshold = 0;
 	}
 
@@ -2530,6 +2521,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		goto out;
 	}
 
+	new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+	/* If we are entering overflow promiscuous, we need to calculate a new
+	 * threshold for when we are safe to exit
+	 */
+	if (!old_overflow && new_overflow)
+		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
 	/* check for changes in promiscuous modes */
 	if (changed_flags & IFF_ALLMULTI) {
 		bool cur_multipromisc;
@@ -2550,12 +2549,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		}
 	}
 
-	if ((changed_flags & IFF_PROMISC) || promisc_changed) {
+	if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
 		bool cur_promisc;
 
 		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
-			       test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-					vsi->state));
+			       new_overflow);
 		aq_ret = i40e_set_promiscuous(pf, cur_promisc);
 		if (aq_ret) {
 			retval = i40e_aq_rc_to_posix(aq_ret,

From 7363115efb0452d2ec5b69ae18378a173ac2cf7f Mon Sep 17 00:00:00 2001
From: Alan Brady <alan.brady@intel.com>
Date: Mon, 22 Jan 2018 12:00:36 -0500
Subject: [PATCH 0089/1678] i40e: do not force filter failure in overflow
 promiscuous

Broadcast filters can now cause overflow promiscuous to trigger when
adding "too many" VLANs to all the ports of a device and the driver
needs a way to exit overflow promiscuous once triggered.

Currently the driver looks to see if there are "too many" filters and/or
we have any failed filters to determine when it is safe to exit overflow
promiscuous.  If we trigger overflow promiscuous with broadcast filters,
any new filters added will be "auto-failed" until we exit overflow
promiscuous.  Since the user can't manually remove the failed broadcast
filters for VLANs (nor should we expect the user to do such), there is
no way to exit overflow promiscuous without reloading the driver.

The easiest way to do this is to remove the shortcut to "auto-fail"
filters in overflow promiscuous.  If the user removes the VLANs, the
failed filters will be removed and since we're no longer "auto-failing"
new filters, we'll eventually get a good set of filters and exit
overflow promiscuous.

This has the side benefit of making filter state more explicit in that
if a filter says it's failed we know for a fact it failed and not just
assuming it will if we're in overflow promiscuous.  This is nice because
if the user removes some filters and then adds some, even if we're in
overflow promiscuous, the filter might succeed; we were just assuming it
won't because the user hasn't rectified other existing failed filters.

Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1073972be948..8a6ebe7b4254 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1380,14 +1380,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 
 		ether_addr_copy(f->macaddr, macaddr);
 		f->vlan = vlan;
-		/* If we're in overflow promisc mode, set the state directly
-		 * to failed, so we don't bother to try sending the filter
-		 * to the hardware.
-		 */
-		if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))
-			f->state = I40E_FILTER_FAILED;
-		else
-			f->state = I40E_FILTER_NEW;
+		f->state = I40E_FILTER_NEW;
 		INIT_HLIST_NODE(&f->hlist);
 
 		key = i40e_addr_to_hkey(macaddr);
@@ -2425,12 +2418,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 		num_add = 0;
 		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
-			if (test_bit(__I40E_VSI_OVERFLOW_PROMISC,
-				     vsi->state)) {
-				new->state = I40E_FILTER_FAILED;
-				continue;
-			}
-
 			/* handle broadcast filters by updating the broadcast
 			 * promiscuous flag instead of adding a MAC filter.
 			 */

From 7b63435a5035621baa4de8d15ca1b4440c3b4d12 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Mon, 22 Jan 2018 12:00:37 -0500
Subject: [PATCH 0090/1678] i40e: i40e: Change ethtool check from MAC to HW
 flag

The MAC, FW Version and NPAR check used to determine
if shutting off the FW LLDP engine is supported is not
using the usual feature check mechanism.

This patch fixes the problem by moving the feature check
to i40e_sw_init in order to set a flag in pf->hw_features
that ethtool will use for priv_flags disable operation.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h         |  1 +
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 ++----------
 drivers/net/ethernet/intel/i40e/i40e_main.c    | 10 ++++++++++
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ebe795a7f5f9..4cf99292fc2f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -507,6 +507,7 @@ struct i40e_pf {
 #define I40E_HW_STOP_FW_LLDP			BIT(16)
 #define I40E_HW_PORT_ID_VALID			BIT(17)
 #define I40E_HW_RESTART_AUTONEG			BIT(18)
+#define I40E_HW_STOPPABLE_FW_LLDP		BIT(19)
 
 	u64 flags;
 #define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(0)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 29a7412b2fa6..0dcbbda164c4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4426,17 +4426,9 @@ flags_complete:
 	 * unsupported FW versions.
 	 */
 	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (pf->hw.func_caps.npar_enable) {
+		if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) {
 			dev_warn(&pf->pdev->dev,
-				 "Unable to change FW LLDP if NPAR active\n");
-			return -EOPNOTSUPP;
-		}
-
-		if (pf->hw.aq.api_maj_ver < 1 ||
-		    (pf->hw.aq.api_maj_ver == 1 &&
-		     pf->hw.aq.api_min_ver < 7)) {
-			dev_warn(&pf->pdev->dev,
-				 "FW ver does not support changing FW LLDP\n");
+				 "Device does not support changing FW LLDP\n");
 			return -EOPNOTSUPP;
 		}
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 8a6ebe7b4254..101702af099f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11089,6 +11089,16 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		/* IWARP needs one extra vector for CQP just like MISC.*/
 		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
 	}
+	/* Stopping the FW LLDP engine is only supported on the
+	 * XL710 with a FW ver >= 1.7.  Also, stopping FW LLDP
+	 * engine is not supported if NPAR is functioning on this
+	 * part
+	 */
+	if (pf->hw.mac.type == I40E_MAC_XL710 &&
+	    !pf->hw.func_caps.npar_enable &&
+	    (pf->hw.aq.api_maj_ver > 1 ||
+	     (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6)))
+		pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP;
 
 #ifdef CONFIG_PCI_IOV
 	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {

From 8946b56354b71b1f75b9551aeb33f19c8d792065 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 22 Jan 2018 12:00:38 -0500
Subject: [PATCH 0091/1678] i40evf: use __dev_[um]c_sync routines in
 .set_rx_mode

Similar to changes done to the PF driver in commit 6622f5cdbaf3 ("i40e:
make use of __dev_uc_sync and __dev_mc_sync"), replace our
home-rolled method for updating the internal status of MAC filters with
__dev_uc_sync and __dev_mc_sync.

These new functions use internal state within the netdev struct in order
to efficiently break the question of "which filters in this list need to
be added or removed" into singular "add this filter" and "delete this
filter" requests.

This vastly improves our handling of .set_rx_mode especially with large
number of MAC filters being added to the device, and even results in
a simpler .set_rx_mode handler.

Under some circumstances, such as when attached to a bridge, we may
receive a request to delete our own permanent address. Prevent deletion
of this address during i40evf_addr_unsync so that we don't accidentally
stop receiving traffic.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 97 +++++++++++--------
 1 file changed, 57 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 4f07469e9769..34fd6c553879 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -785,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev,
  **/
 static struct
 i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
-				      u8 *macaddr)
+				      const u8 *macaddr)
 {
 	struct i40evf_mac_filter *f;
 
@@ -808,7 +808,7 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter,
  **/
 static struct
 i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
-				     u8 *macaddr)
+				     const u8 *macaddr)
 {
 	struct i40evf_mac_filter *f;
 
@@ -879,6 +879,53 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 	return (f == NULL) ? -ENOMEM : 0;
 }
 
+/**
+ * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+	if (i40evf_add_filter(adapter, addr))
+		return 0;
+	else
+		return -ENOMEM;
+}
+
+/**
+ * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40evf_mac_filter *f;
+
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	f = i40evf_find_filter(adapter, addr);
+	if (f) {
+		f->remove = true;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+	}
+	return 0;
+}
+
 /**
  * i40evf_set_rx_mode - NDO callback to set the netdev filters
  * @netdev: network interface device structure
@@ -886,44 +933,11 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 static void i40evf_set_rx_mode(struct net_device *netdev)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40evf_mac_filter *f, *ftmp;
-	struct netdev_hw_addr *uca;
-	struct netdev_hw_addr *mca;
-	struct netdev_hw_addr *ha;
-
-	/* add addr if not already in the filter list */
-	netdev_for_each_uc_addr(uca, netdev) {
-		i40evf_add_filter(adapter, uca->addr);
-	}
-	netdev_for_each_mc_addr(mca, netdev) {
-		i40evf_add_filter(adapter, mca->addr);
-	}
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
-	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
-		netdev_for_each_mc_addr(mca, netdev)
-			if (ether_addr_equal(mca->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		netdev_for_each_uc_addr(uca, netdev)
-			if (ether_addr_equal(uca->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		for_each_dev_addr(netdev, ha)
-			if (ether_addr_equal(ha->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr))
-			goto bottom_of_search_loop;
-
-		/* f->macaddr wasn't found in uc, mc, or ha list so delete it */
-		f->remove = true;
-		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-
-bottom_of_search_loop:
-		continue;
-	}
+	__dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+	__dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync);
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
 	if (netdev->flags & IFF_PROMISC &&
 	    !(adapter->flags & I40EVF_FLAG_PROMISC_ON))
@@ -938,8 +952,6 @@ bottom_of_search_loop:
 	else if (!(netdev->flags & IFF_ALLMULTI) &&
 		 adapter->flags & I40EVF_FLAG_ALLMULTI_ON)
 		adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI;
-
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 }
 
 /**
@@ -1041,10 +1053,15 @@ void i40evf_down(struct i40evf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
+	/* clear the sync flag on all filters */
+	__dev_uc_unsync(adapter->netdev, NULL);
+	__dev_mc_unsync(adapter->netdev, NULL);
+
 	/* remove all MAC filters */
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		f->remove = true;
 	}
+
 	/* remove all VLAN filters */
 	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
 		f->remove = true;

From 693acdd0f18b4b2a52439804dd756db8397044da Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Mon, 22 Jan 2018 12:00:39 -0500
Subject: [PATCH 0092/1678] i40evf: Make VF reset warning message more clear

When the PF resets the VF, the VF puts out a warning message
indicating that the VF received a reset message from the PF.
Make this message more clear so that we do not mistakenly
think that the PF is undergoing a reset.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 82e6b115be87..0700f0afe2d3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1037,7 +1037,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			i40evf_print_link_message(adapter);
 			break;
 		case VIRTCHNL_EVENT_RESET_IMPENDING:
-			dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+			dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n");
 			if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
 				adapter->flags |= I40EVF_FLAG_RESET_PENDING;
 				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");

From 153e1b84f477f716bc3f81e6cfae1a3d941fc7ec Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:44:06 -0800
Subject: [PATCH 0093/1678] selftests: Add FIB onlink tests

Add test cases verifying FIB onlink commands work as expected in
various conditions - IPv4, IPv6, main table, and VRF.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/fib-onlink-tests.sh | 375 ++++++++++++++++++
 1 file changed, 375 insertions(+)
 create mode 100755 tools/testing/selftests/net/fib-onlink-tests.sh

diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000000..06b1d7cc12cc
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,375 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+log_subsection()
+{
+	echo
+	echo "#########################################"
+	echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+	echo
+	echo "COMMAND: $*"
+	eval $*
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local pfx
+	local addr
+
+	addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+	echo
+	echo "########################################"
+	echo "Configuring interfaces"
+
+	set -e
+
+	# create namespace
+	ip netns add ${PEER_NS}
+	ip -netns ${PEER_NS} li set lo up
+
+	# add vrf table
+	ip li add ${VRF} type vrf table ${VRF_TABLE}
+	ip li set ${VRF} up
+	ip ro add table ${VRF_TABLE} unreachable default
+	ip -6 ro add table ${VRF_TABLE} unreachable default
+
+	# create test interfaces
+	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+	ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+	ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+	ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+	# enslave vrf interfaces
+	for n in 5 7; do
+		ip li set ${NETIFS[p${n}]} vrf ${VRF}
+	done
+
+	# add addresses
+	for n in 1 3 5 7; do
+		ip li set ${NETIFS[p${n}]} up
+		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+	done
+
+	# move peer interfaces to namespace and add addresses
+	for n in 2 4 6 8; do
+		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+	done
+
+	set +e
+
+	# let DAD complete - assume default of 1 probe
+	sleep 1
+}
+
+cleanup()
+{
+	# make sure we start from a clean slate
+	ip netns del ${PEER_NS} 2>/dev/null
+	for n in 1 3 5 7; do
+		ip link del ${NETIFS[p${n}]} 2>/dev/null
+	done
+	ip link del ${VRF} 2>/dev/null
+	ip ro flush table ${VRF_TABLE}
+	ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+	# - unicast connected, unicast recursive
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+	run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+}
+
+invalid_onlink_ipv4()
+{
+	run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+
+	run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+	run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+		"Gateway resolves to wrong nexthop device"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+	# - unicast connected, unicast recursive, v4-mapped
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+	run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+	run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+}
+
+invalid_onlink_ipv6()
+{
+	local lladdr
+
+	lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+	run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+		"Invalid gw - local linklocal address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+		"Invalid gw - multicast address"
+
+	lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+		"Invalid gw - local linklocal address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+		"Invalid gw - multicast address, VRF"
+
+	run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+		"No nexthop device given"
+
+	# default VRF validation is done against LOCAL table
+	# run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+	#	"Gateway resolves to wrong nexthop device"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+	log_section "IPv4 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv4
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv4
+
+	log_section "IPv6 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv6
+	invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi

From 8ac2441e0b7b7d2e76c47e0997a6634abd80cb26 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@oracle.com>
Date: Fri, 9 Feb 2018 11:56:12 -0800
Subject: [PATCH 0094/1678] samples/bpf: adjust rlimit RLIMIT_MEMLOCK for
 xdp_redirect

Default rlimit RLIMIT_MEMLOCK is 64KB, causes bpf map failure.
e.g.
[root@labbpf]# ./xdp_redirect $(</sys/class/net/eth2/ifindex) \
> $(</sys/class/net/eth3/ifindex)
failed to create a map: 1 Operation not permitted

The failure is seen when executing xdp_redirect while xdp_monitor
is already runnig.

Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/bpf/xdp_redirect_user.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index d54e91eb6cbf..b701b5c21342 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -20,6 +20,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <sys/resource.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
@@ -75,6 +76,7 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	const char *optstr = "SN";
 	char filename[256];
 	int ret, opt, key = 0;
@@ -98,6 +100,11 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
 	ifindex_in = strtoul(argv[optind], NULL, 0);
 	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
 	printf("input: %d output: %d\n", ifindex_in, ifindex_out);

From 9492686c53f3a98e7027d1079db1471ab20e17de Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Tue, 13 Feb 2018 13:42:49 +0900
Subject: [PATCH 0095/1678] bpf: samples/sockmap fix Makefile for build error

While building samples/sockmap, undefined reference error is thrown
for `nla_dump_errormsg'.
Linking tools/lib/bpf/nlattr.o as a fix

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/sockmap/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 73f1da4d116c..9bf2881bd11b 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -2,7 +2,7 @@
 hostprogs-y := sockmap
 
 # Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o
+LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/

From 444890c3ce7d74bdc20f2bf930b8476b98d3e972 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Tue, 13 Feb 2018 13:44:22 +0900
Subject: [PATCH 0096/1678] bpf: samples/sockmap detach sock ops program

samples/sockops program keeps the sock_ops program attached to cgroup.
Fixed this by detaching program before exit.

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/sockmap/sockmap_user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7c25c0c112bc..95a54a89a532 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -566,6 +566,7 @@ run:
 	else
 		fprintf(stderr, "unknown test\n");
 out:
+	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
 	close(s1);
 	close(s2);
 	close(p1);

From 297dd12cb104151797fd649433a2157b585f1718 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 13 Feb 2018 14:15:36 +0100
Subject: [PATCH 0097/1678] net: avoid including xdp.h in filter.h

If is sufficient with a forward declaration of struct xdp_rxq_info in
linux/filter.h, which avoids including net/xdp.h.  This was originally
suggested by John Fastabend during the review phase, but wasn't
included in the final patchset revision.  Thus, this followup.

Suggested-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 276932d75975..fdb691b520c0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,7 +20,6 @@
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
 
-#include <net/xdp.h>
 #include <net/sch_generic.h>
 
 #include <uapi/linux/filter.h>
@@ -30,6 +29,7 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 struct bpf_prog_aux;
+struct xdp_rxq_info;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function

From 41757dcb0c3d8446c549e55163c9fd9561fcf599 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 13 Feb 2018 14:19:15 +0100
Subject: [PATCH 0098/1678] selftests/bpf: fix Makefile for cgroup_helpers.c

The current selftests Makefile construct result in cgroup_helpers.c
gets compiled together with all the TEST_GEN_PROGS. And it also result
in invoking the libbpf Makefile two times (tools/lib/bpf).

These issues were introduced in commit 9d1f15941967 ("bpf: move
cgroup_helpers from samples/bpf/ to tools/testing/selftesting/bpf/").

The only test program that requires the cgroup helpers is 'test_dev_cgroup'.

Thus, create a make target $(OUTPUT)/test_dev_cgroup that extend[1]
the 'prerequisite' for the 'stem' %-style pattern in ../lib.mk,
for this particular test program.

Reviewers notice the make-rules in tools/testing/selftests/lib.mk
differ from the normal kernel kbuild rules, and it is practical
to use 'make -p' to follow how these 'Implicit/static pattern stem'
gets expanded.

[1] https://www.gnu.org/software/make/manual/html_node/Static-Usage.html

Fixes: 9d1f15941967 ("bpf: move cgroup_helpers from samples/bpf/ to tools/testing/selftesting/bpf/")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5c43c187f27c..8567a858b789 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,12 +35,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open
 
 include ../lib.mk
 
-BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c
+BPFOBJ := $(OUTPUT)/libbpf.a
 
 $(TEST_GEN_PROGS): $(BPFOBJ)
 
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+
 .PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated

From 615a9474985799c8b48645b8e95a9b9f0691f56a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Feb 2018 10:35:05 -0800
Subject: [PATCH 0099/1678] tools/bpf: adjust rlimit RLIMIT_MEMLOCK for
 test_tcpbpf_user

The default rlimit RLIMIT_MEMLOCK is 64KB. In certain cases,
e.g. in a test machine mimicking our production system, this test may
fail due to unable to charge the required memory for map creation:
   # ./test_tcpbpf_user
   libbpf: failed to create map (name: 'global_map'): Operation not permitted
   libbpf: failed to load object 'test_tcpbpf_kern.o'
   FAILED: load_bpf_file failed for: test_tcpbpf_kern.o

Changing the default rlimit RLIMIT_MEMLOCK to unlimited makes
the test always pass.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_tcpbpf_user.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 95a370f3d378..5d73db416460 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -11,6 +11,8 @@
 #include <linux/ptrace.h>
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -42,6 +44,7 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
 
 int main(int argc, char **argv)
 {
+	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	const char *file = "test_tcpbpf_kern.o";
 	struct tcpbpf_globals g = {0};
 	int cg_fd, prog_fd, map_fd;
@@ -54,6 +57,9 @@ int main(int argc, char **argv)
 	int pid;
 	int rv;
 
+	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
+		perror("Unable to lift memlock rlimit");
+
 	if (argc > 1 && strcmp(argv[1], "-d") == 0)
 		debug_flag = true;
 

From ee6f4a5ad806a13b6b26b6699bb5562c08148280 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Sun, 11 Feb 2018 22:50:46 -0500
Subject: [PATCH 0100/1678] ieee802154: 6lowpan: set IFF_NO_QUEUE

This patch sets the IFF_NO_QUEUE for IEEE 802.15.4 6lowpan interfaces. As
commit 24dcbf662205 ("6lowpan: Don't set IFF_NO_QUEUE") removes it for
"reasons" from the bluetooth 6lowpan subsystem. In IEEE 802.15.4 the lower
interface deals with one qdisc for the real hardware, 6LoWPAN does the
protocol adaption only and no second queuing on top.

Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
---
 net/ieee802154/6lowpan/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 974765b7d92a..e4f305320519 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev)
 	/* We need an ipv6hdr as minimum len when calling xmit */
 	ldev->hard_header_len	= sizeof(struct ipv6hdr);
 	ldev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+	ldev->priv_flags	|= IFF_NO_QUEUE;
 
 	ldev->netdev_ops	= &lowpan_netdev_ops;
 	ldev->header_ops	= &lowpan_header_ops;

From 330c7272c40e965b8ab510d1022acd6e6a32e9c8 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:52:00 -0800
Subject: [PATCH 0101/1678] net: Make dn_ptr depend on CONFIG_DECNET

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5eef6c8e2741..d2ef35e00626 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1800,7 +1800,9 @@ struct net_device {
 #endif
 	void 			*atalk_ptr;
 	struct in_device __rcu	*ip_ptr;
+#if IS_ENABLED(CONFIG_DECNET)
 	struct dn_dev __rcu     *dn_ptr;
+#endif
 	struct inet6_dev __rcu	*ip6_ptr;
 	void			*ax25_ptr;
 	struct wireless_dev	*ieee80211_ptr;
diff --git a/net/core/dev.c b/net/core/dev.c
index df5241c8eda1..4bd4ad7ffda4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8134,8 +8134,9 @@ void netdev_run_todo(void)
 		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
+#if IS_ENABLED(CONFIG_DECNET)
 		WARN_ON(dev->dn_ptr);
-
+#endif
 		if (dev->priv_destructor)
 			dev->priv_destructor(dev);
 		if (dev->needs_free_netdev)

From 19ff13f2a411d99af67d8e51867d54b86e1bf017 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:52:01 -0800
Subject: [PATCH 0102/1678] net: Make ax25_ptr depend on CONFIG_AX25

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 include/net/ax25.h        | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d2ef35e00626..936dc2c9dca1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1804,7 +1804,9 @@ struct net_device {
 	struct dn_dev __rcu     *dn_ptr;
 #endif
 	struct inet6_dev __rcu	*ip6_ptr;
+#if IS_ENABLED(CONFIG_AX25)
 	void			*ax25_ptr;
+#endif
 	struct wireless_dev	*ieee80211_ptr;
 	struct wpan_dev		*ieee802154_ptr;
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 76fb39c272a7..c91bc87931c7 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -318,10 +318,12 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
 extern ax25_dev *ax25_dev_list;
 extern spinlock_t ax25_dev_lock;
 
+#if IS_ENABLED(CONFIG_AX25)
 static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
 {
 	return dev->ax25_ptr;
 }
+#endif
 
 ax25_dev *ax25_addr_ax25dev(ax25_address *);
 void ax25_dev_device_up(struct net_device *);

From 89e58148fbf2e4cbd84006e263061c19a2b47adf Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 13 Feb 2018 08:52:02 -0800
Subject: [PATCH 0103/1678] net: Make atalk_ptr depend on ATALK or IRDA

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h     | 2 ++
 include/linux/netdevice.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 4d356e168692..40373920ea58 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -113,10 +113,12 @@ extern void aarp_proto_init(void);
 /* Inter module exports */
 
 /* Give a device find its atif control structure */
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 static inline struct atalk_iface *atalk_find_dev(struct net_device *dev)
 {
 	return dev->atalk_ptr;
 }
+#endif
 
 extern struct atalk_addr *atalk_find_dev_addr(struct net_device *dev);
 extern struct net_device *atrtr_get_dev(struct atalk_addr *sa);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 936dc2c9dca1..dbe6344b727a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1798,7 +1798,9 @@ struct net_device {
 #if IS_ENABLED(CONFIG_TIPC)
 	struct tipc_bearer __rcu *tipc_ptr;
 #endif
+#if IS_ENABLED(CONFIG_IRDA) || IS_ENABLED(CONFIG_ATALK)
 	void 			*atalk_ptr;
+#endif
 	struct in_device __rcu	*ip_ptr;
 #if IS_ENABLED(CONFIG_DECNET)
 	struct dn_dev __rcu     *dn_ptr;

From e92bad5034922776bd5e64e3db739d7607eb7e29 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 08:52:03 -0800
Subject: [PATCH 0104/1678] net: Remove atalk header from socket.c

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index fac8246a8ae8..d83e804d5e65 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,7 +104,6 @@
 #include <linux/ipv6_route.h>
 #include <linux/route.h>
 #include <linux/sockios.h>
-#include <linux/atalk.h>
 #include <net/busy_poll.h>
 #include <linux/errqueue.h>
 

From 836ce5ed729df825ae472778978a09e5d290f543 Mon Sep 17 00:00:00 2001
From: Avinash Dayanand <avinash.dayanand@intel.com>
Date: Tue, 23 Jan 2018 08:50:55 -0800
Subject: [PATCH 0105/1678] i40evf: Fix link up issue when queues are disabled

One of the previous patch fixes the link up issue by ignoring it if
i40evf is not in __I40EVF_RUNNING state. However this doesn't fix the
race condition when queues are disabled esp for ADq on VF. Hence check
if all queues are enabled before starting all queues.

Signed-off-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf.h    |  1 +
 .../net/ethernet/intel/i40evf/i40evf_main.c   |  1 +
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   | 28 +++++++++++++------
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index b6991e8014d8..89ce2f9a0fbe 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -240,6 +240,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_ALLMULTI_ON			BIT(14)
 #define I40EVF_FLAG_LEGACY_RX			BIT(15)
 #define I40EVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
+#define I40EVF_FLAG_QUEUES_DISABLED		BIT(17)
 /* duplicates for common code */
 #define I40E_FLAG_DCB_ENABLED			0
 #define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 34fd6c553879..0776b07477a2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1913,6 +1913,7 @@ continue_reset:
 	i40evf_free_all_rx_resources(adapter);
 	i40evf_free_all_tx_resources(adapter);
 
+	adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED;
 	/* kill and reinit the admin queue */
 	i40evf_shutdown_adminq(hw);
 	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 0700f0afe2d3..e8dcc31fcbf2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1017,14 +1017,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			if (adapter->link_up == link_up)
 				break;
 
-			/* If we get link up message and start queues before
-			 * our queues are configured it will trigger a TX hang.
-			 * In that case, just ignore the link status message,
-			 * we'll get another one after we enable queues and
-			 * actually prepared to send traffic.
-			 */
-			if (link_up && adapter->state != __I40EVF_RUNNING)
-				break;
+			if (link_up) {
+				/* If we get link up message and start queues
+				 * before our queues are configured it will
+				 * trigger a TX hang. In that case, just ignore
+				 * the link status message,we'll get another one
+				 * after we enable queues and actually prepared
+				 * to send traffic.
+				 */
+				if (adapter->state != __I40EVF_RUNNING)
+					break;
+
+				/* For ADq enabled VF, we reconfigure VSIs and
+				 * re-allocate queues. Hence wait till all
+				 * queues are enabled.
+				 */
+				if (adapter->flags &
+				    I40EVF_FLAG_QUEUES_DISABLED)
+					break;
+			}
 
 			adapter->link_up = link_up;
 			if (link_up) {
@@ -1108,6 +1119,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 	case VIRTCHNL_OP_ENABLE_QUEUES:
 		/* enable transmits */
 		i40evf_irq_enable(adapter, true);
+		adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED;
 		break;
 	case VIRTCHNL_OP_DISABLE_QUEUES:
 		i40evf_free_all_tx_resources(adapter);

From eb09f1feb8e5999390a6f149307cb88354232680 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Tue, 23 Jan 2018 08:50:56 -0800
Subject: [PATCH 0106/1678] virtchnl: Add virtchl structures to support queue
 channels

This patch defines new structs in support of the virtchannel message
that the VF sends to the PF to create a queue channel specified by the
user via tc tool.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 40 ++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 3ce61342fa31..1f652ceecf35 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,6 +136,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
 	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
 	VIRTCHNL_OP_REQUEST_QUEUES = 29,
+	VIRTCHNL_OP_ENABLE_CHANNELS = 30,
+	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
 };
 
 /* This macro is used to generate a compilation error if a structure
@@ -244,6 +246,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
+#define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
@@ -496,6 +499,30 @@ struct virtchnl_rss_hena {
 
 VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_rss_hena);
 
+/* VIRTCHNL_OP_ENABLE_CHANNELS
+ * VIRTCHNL_OP_DISABLE_CHANNELS
+ * VF sends these messages to enable or disable channels based on
+ * the user specified queue count and queue offset for each traffic class.
+ * This struct encompasses all the information that the PF needs from
+ * VF to create a channel.
+ */
+struct virtchnl_channel_info {
+	u16 count; /* number of queues in a channel */
+	u16 offset; /* queues in a channel start from 'offset' */
+	u32 pad;
+	u64 max_tx_rate;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_channel_info);
+
+struct virtchnl_tc_info {
+	u32	num_tc;
+	u32	pad;
+	struct	virtchnl_channel_info list[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -711,6 +738,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_REQUEST_QUEUES:
 		valid_len = sizeof(struct virtchnl_vf_res_request);
 		break;
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		valid_len = sizeof(struct virtchnl_tc_info);
+		if (msglen >= valid_len) {
+			struct virtchnl_tc_info *vti =
+				(struct virtchnl_tc_info *)msg;
+			valid_len += vti->num_tc *
+				sizeof(struct virtchnl_channel_info);
+			if (vti->num_tc == 0)
+				err_msg_format = true;
+		}
+		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:

From d5b33d02449615de813e60a7b928da5838c98f54 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Tue, 23 Jan 2018 08:50:57 -0800
Subject: [PATCH 0107/1678] i40evf: add ndo_setup_tc callback to i40evf

This patch introduces the callback to the ndo_setup_tc function
in the VF driver. We add a wrapper function to make room for the
upcoming cloud filter patches which add calls to different functions
from setup_tc.

First, we add support for capability exchange for ADQ between the
PF and VF. Next, we add support to take in the mqprio configuration
and configure queues as per the traffic classes, rate limit and the
priorities specified by the user. This is done by passing the channel
config to the PF driver through a virtchannel message.

The flags and bits added, track if ADq is enabled, set max number of
traffic classes to 4 and provide ability to negotiate capability with
the PF.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf.h    |  22 +++
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 154 ++++++++++++++++++
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   |  82 +++++++++-
 3 files changed, 257 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 89ce2f9a0fbe..c8d68c8d2c33 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -52,6 +52,7 @@
 #include <linux/socket.h>
 #include <linux/jiffies.h>
 #include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
 #include <net/udp.h>
 
 #include "i40e_type.h"
@@ -168,6 +169,20 @@ struct i40evf_vlan_filter {
 	bool add;		/* filter needs to be added */
 };
 
+#define I40EVF_MAX_TRAFFIC_CLASS	4
+/* State of traffic class creation */
+enum i40evf_tc_state_t {
+	__I40EVF_TC_INVALID, /* no traffic class, default state */
+	__I40EVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct i40evf_channel_config {
+	struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS];
+	enum i40evf_tc_state_t state;
+	u8 total_qps;
+};
+
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
 	__I40EVF_STARTUP,		/* driver loaded, probe complete */
@@ -269,6 +284,8 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI		BIT(18)
 #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT(19)
 #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
+#define I40EVF_FLAG_AQ_ENABLE_CHANNELS		BIT(21)
+#define I40EVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -314,6 +331,9 @@ struct i40evf_adapter {
 	u16 rss_lut_size;
 	u8 *rss_key;
 	u8 *rss_lut;
+	/* ADQ related members */
+	struct i40evf_channel_config ch_config;
+	u8 num_tc;
 };
 
 
@@ -380,4 +400,6 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len);
 void i40evf_notify_client_l2_params(struct i40e_vsi *vsi);
 void i40evf_notify_client_open(struct i40e_vsi *vsi);
 void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
+void i40evf_enable_channels(struct i40evf_adapter *adapter);
+void i40evf_disable_channels(struct i40evf_adapter *adapter);
 #endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 0776b07477a2..099d4f59e445 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1732,6 +1732,17 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		i40evf_set_promiscuous(adapter, 0);
 		goto watchdog_done;
 	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) {
+		i40evf_enable_channels(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) {
+		i40evf_disable_channels(adapter);
+		goto watchdog_done;
+	}
+
 	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 
 	if (adapter->state == __I40EVF_RUNNING)
@@ -2211,6 +2222,148 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
 			i40evf_free_rx_resources(&adapter->rx_rings[i]);
 }
 
+/**
+ * i40evf_validate_channel_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
+				     struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	int i, num_qps = 0;
+
+	if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
+	    mqprio_qopt->qopt.num_tc < 1)
+		return -EINVAL;
+
+	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+		if (!mqprio_qopt->qopt.count[i] ||
+		    mqprio_qopt->min_rate[i] ||
+		    mqprio_qopt->max_rate[i] ||
+		    mqprio_qopt->qopt.offset[i] != num_qps)
+			return -EINVAL;
+		num_qps += mqprio_qopt->qopt.count[i];
+	}
+	if (num_qps > MAX_QUEUES)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * __i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_date: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	u8 num_tc = 0, total_qps = 0;
+	int ret = 0, netdev_tc = 0;
+	u16 mode;
+	int i;
+
+	num_tc = mqprio_qopt->qopt.num_tc;
+	mode = mqprio_qopt->mode;
+
+	/* delete queue_channel */
+	if (!mqprio_qopt->qopt.hw) {
+		if (adapter->ch_config.state == __I40EVF_TC_RUNNING) {
+			/* reset the tc configuration */
+			netdev_reset_tc(netdev);
+			adapter->num_tc = 0;
+			netif_tx_stop_all_queues(netdev);
+			netif_tx_disable(netdev);
+			adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+			goto exit;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	/* add queue channel */
+	if (mode == TC_MQPRIO_MODE_CHANNEL) {
+		if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+			dev_err(&adapter->pdev->dev, "ADq not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (adapter->ch_config.state != __I40EVF_TC_INVALID) {
+			dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+			return -EINVAL;
+		}
+
+		ret = i40evf_validate_ch_config(adapter, mqprio_qopt);
+		if (ret)
+			return ret;
+		/* Return if same TC config is requested */
+		if (adapter->num_tc == num_tc)
+			return 0;
+		adapter->num_tc = num_tc;
+
+		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+			if (i < num_tc) {
+				adapter->ch_config.ch_info[i].count =
+					mqprio_qopt->qopt.count[i];
+				adapter->ch_config.ch_info[i].offset =
+					mqprio_qopt->qopt.offset[i];
+				total_qps += mqprio_qopt->qopt.count[i];
+			} else {
+				adapter->ch_config.ch_info[i].count = 1;
+				adapter->ch_config.ch_info[i].offset = 0;
+			}
+		}
+		adapter->ch_config.total_qps = total_qps;
+		netif_tx_stop_all_queues(netdev);
+		netif_tx_disable(netdev);
+		adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+		netdev_reset_tc(netdev);
+		/* Report the tc mapping up the stack */
+		netdev_set_num_tc(adapter->netdev, num_tc);
+		for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) {
+			u16 qcount = mqprio_qopt->qopt.count[i];
+			u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+			if (i < num_tc)
+				netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+						    qoffset);
+		}
+	}
+exit:
+	return ret;
+}
+
+/**
+ * i40evf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_date: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
+{
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EOPNOTSUPP;
+
+	return __i40evf_setup_tc(netdev, type_data);
+}
+
 /**
  * i40evf_open - Called when a network interface is made active
  * @netdev: network interface device structure
@@ -2478,6 +2631,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40evf_netpoll,
 #endif
+	.ndo_setup_tc		= i40evf_setup_tc,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index e8dcc31fcbf2..f6c56141b4fe 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP |
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
-	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+	       VIRTCHNL_VF_OFFLOAD_ADQ;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
 	adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -972,6 +973,70 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
 	netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed);
 }
 
+/**
+ * i40evf_enable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable channels as specified by
+ * the user via tc tool.
+ **/
+void i40evf_enable_channels(struct i40evf_adapter *adapter)
+{
+	struct virtchnl_tc_info *vti = NULL;
+	u16 len;
+	int i;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) +
+	       sizeof(struct virtchnl_tc_info);
+
+	vti = kzalloc(len, GFP_KERNEL);
+	if (!vti)
+		return;
+	vti->num_tc = adapter->num_tc;
+	for (i = 0; i < vti->num_tc; i++) {
+		vti->list[i].count = adapter->ch_config.ch_info[i].count;
+		vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+	}
+
+	adapter->ch_config.state = __I40EVF_TC_RUNNING;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS,
+			   (u8 *)vti, len);
+	kfree(vti);
+}
+
+/**
+ * i40evf_disable_channel
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable channels that are configured
+ **/
+void i40evf_disable_channels(struct i40evf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	adapter->ch_config.state = __I40EVF_TC_INVALID;
+	adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS,
+			   NULL, 0);
+}
+
 /**
  * i40evf_request_reset
  * @adapter: adapter structure
@@ -1080,6 +1145,21 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
 				i40evf_stat_str(&adapter->hw, v_retval));
 			break;
+		case VIRTCHNL_OP_ENABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __I40EVF_TC_INVALID;
+			netdev_reset_tc(netdev);
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_DISABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
+				i40evf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __I40EVF_TC_RUNNING;
+			netif_tx_start_all_queues(netdev);
+			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
 				v_retval,

From c27eac48160de72dee33d42b5a33cc7b8a2eb1f5 Mon Sep 17 00:00:00 2001
From: Avinash Dayanand <avinash.dayanand@intel.com>
Date: Tue, 23 Jan 2018 08:50:58 -0800
Subject: [PATCH 0108/1678] i40e: Enable ADq and create queue channel/s on VF

This patch enables ADq and creates queue channels on a VF. An ADq
enabled VF can have up to 4 VSIs and each one of them represents
a traffic class and this is termed as a queue channel. Each of these
VSIs can have up to 4 queues. This patch services the request for
enabling ADq and adds queue channel based on the TC mqprio info
provided by the user in the VF.

Initially a check is made to see if spoof check is OFF, if not ADq
will not be enabled. PF notifies VF for a reset in order to complete
the creation of ADq resources i.e. creation of additional VSIs and
allocation of queues as per TC information, all in the reset path.

Steps:
======
1. Turn off the spoof check
2. Enable ADq using tc mqprio command with or without rate limit.
3. Pass traffic.

Example:
========
% ip link set dev eth0 vf 0 spoofchk off
% tc qdisc add dev $iface root mqprio num_tc 4 map\
	0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 queues\
	4@0 4@4 4@8 4@8 hw 1 mode channel

Expected results:
=================
1. Total number of queues for the VF should be sum of queues of all TCs.
2. Traffic flow should be normal without errors.

Signed-off-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_type.h   |   2 +-
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 429 +++++++++++++++---
 .../ethernet/intel/i40e/i40e_virtchnl_pf.h    |  17 +
 3 files changed, 379 insertions(+), 69 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index cd294e6a8587..b0eed8c0b2f2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -39,7 +39,7 @@
 #define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
 
 #define I40E_MAX_VSI_QP			16
-#define I40E_MAX_VF_VSI			3
+#define I40E_MAX_VF_VSI			4
 #define I40E_MAX_CHAINED_RX_BUFFERS	5
 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS	16
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index e9309fb9084b..7452d4387818 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -257,6 +257,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
 	return pf_queue_id;
 }
 
+/**
+ * i40e_get_real_pf_qid
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @queue_id: queue number
+ *
+ * wrapper function to get pf_queue_id handling ADq code as well
+ **/
+static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
+{
+	int i;
+
+	if (vf->adq_enabled) {
+		/* Although VF considers all the queues(can be 1 to 16) as its
+		 * own but they may actually belong to different VSIs(up to 4).
+		 * We need to find which queues belongs to which VSI.
+		 */
+		for (i = 0; i < vf->num_tc; i++) {
+			if (queue_id < vf->ch[i].num_qps) {
+				vsi_id = vf->ch[i].vsi_id;
+				break;
+			}
+			/* find right queue id which is relative to a
+			 * given VSI.
+			 */
+			queue_id -= vf->ch[i].num_qps;
+			}
+		}
+
+	return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
+}
+
 /**
  * i40e_config_irq_link_list
  * @vf: pointer to the VF info
@@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 
 	vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 	qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+	pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
 	reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
 
 	wr32(hw, reg_idx, reg);
@@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
 		if (next_q < size) {
 			vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
 			qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
-			pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
-							      vsi_queue_id);
+			pf_queue_id = i40e_get_real_pf_qid(vf,
+							   vsi_id,
+							   vsi_queue_id);
 		} else {
 			pf_queue_id = I40E_QUEUE_END_OF_LIST;
 			qtype = 0;
@@ -669,18 +702,19 @@ error_param:
 /**
  * i40e_alloc_vsi_res
  * @vf: pointer to the VF info
- * @type: type of VSI to allocate
+ * @idx: VSI index, applies only for ADq mode, zero otherwise
  *
  * alloc VF vsi context & resources
  **/
-static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
 {
 	struct i40e_mac_filter *f = NULL;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi;
 	int ret = 0;
 
-	vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id);
+	vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
+			     vf->vf_id);
 
 	if (!vsi) {
 		dev_err(&pf->pdev->dev,
@@ -689,7 +723,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		ret = -ENOENT;
 		goto error_alloc_vsi_res;
 	}
-	if (type == I40E_VSI_SRIOV) {
+
+	if (!idx) {
 		u64 hena = i40e_pf_get_default_rss_hena(pf);
 		u8 broadcast[ETH_ALEN];
 
@@ -721,12 +756,17 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
 		wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
+		/* program mac filter only for VF VSI */
+		ret = i40e_sync_vsi_filters(vsi);
+		if (ret)
+			dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
 	}
 
-	/* program mac filter */
-	ret = i40e_sync_vsi_filters(vsi);
-	if (ret)
-		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+	/* storing VSI index and id for ADq and don't apply the mac filter */
+	if (vf->adq_enabled) {
+		vf->ch[idx].vsi_idx = vsi->idx;
+		vf->ch[idx].vsi_id = vsi->id;
+	}
 
 	/* Set VF bandwidth if specified */
 	if (vf->tx_rate) {
@@ -741,6 +781,92 @@ error_alloc_vsi_res:
 	return ret;
 }
 
+/**
+ * i40e_map_pf_queues_to_vsi
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
+ **/
+static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qps;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < 7; j++) {
+			if (j * 2 >= qps) {
+				/* end of list */
+				reg = 0x07FF07FF;
+			} else {
+				u16 qid = i40e_vc_get_pf_queue_id(vf,
+								  vsi_id,
+								  j * 2);
+				reg = qid;
+				qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
+							      (j * 2) + 1);
+				reg |= qid << 16;
+			}
+			i40e_write_rx_ctl(hw,
+					  I40E_VSILAN_QTABLE(j, vsi_id),
+					  reg);
+		}
+	}
+}
+
+/**
+ * i40e_map_pf_to_vf_queues
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
+ **/
+static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, total_qps = 0;
+	u32 qps, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qid;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < qps; j++) {
+			qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
+
+			reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+			wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
+			     reg);
+			total_qps++;
+		}
+	}
+}
+
 /**
  * i40e_enable_vf_mappings
  * @vf: pointer to the VF info
@@ -751,8 +877,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
-	u32 reg, total_queue_pairs = 0;
-	int j;
+	u32 reg;
 
 	/* Tell the hardware we're using noncontiguous mapping. HW requires
 	 * that VF queues be mapped using this method, even when they are
@@ -765,30 +890,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf)
 	reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
 	wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
 
-	/* map PF queues to VF queues */
-	for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) {
-		u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j);
-
-		reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
-		wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
-		total_queue_pairs++;
-	}
-
-	/* map PF queues to VSI */
-	for (j = 0; j < 7; j++) {
-		if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) {
-			reg = 0x07FF07FF;	/* unused */
-		} else {
-			u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-							  j * 2);
-			reg = qid;
-			qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id,
-						      (j * 2) + 1);
-			reg |= qid << 16;
-		}
-		i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id),
-				  reg);
-	}
+	i40e_map_pf_to_vf_queues(vf);
+	i40e_map_pf_queues_to_vsi(vf);
 
 	i40e_flush(hw);
 }
@@ -824,7 +927,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_hw *hw = &pf->hw;
 	u32 reg_idx, reg;
-	int i, msix_vf;
+	int i, j, msix_vf;
 
 	/* Start by disabling VF's configuration API to prevent the OS from
 	 * accessing the VF's VSI after it's freed / invalidated.
@@ -846,6 +949,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 		vf->lan_vsi_id = 0;
 		vf->num_mac = 0;
 	}
+
+	/* do the accounting and remove additional ADq VSI's */
+	if (vf->adq_enabled && vf->ch[0].vsi_idx) {
+		for (j = 0; j < vf->num_tc; j++) {
+			/* At this point VSI0 is already released so don't
+			 * release it again and only clear their values in
+			 * structure variables
+			 */
+			if (j)
+				i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
+			vf->ch[j].vsi_idx = 0;
+			vf->ch[j].vsi_id = 0;
+		}
+	}
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
 	/* disable interrupts so the VF starts in a known state */
@@ -891,7 +1008,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 {
 	struct i40e_pf *pf = vf->pf;
 	int total_queue_pairs = 0;
-	int ret;
+	int ret, idx;
 
 	if (vf->num_req_queues &&
 	    vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
@@ -900,11 +1017,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
 		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
 
 	/* allocate hw vsi context & associated resources */
-	ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
+	ret = i40e_alloc_vsi_res(vf, 0);
 	if (ret)
 		goto error_alloc;
 	total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
 
+	/* allocate additional VSIs based on tc information for ADq */
+	if (vf->adq_enabled) {
+		if (pf->queues_left >=
+		    (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
+			/* TC 0 always belongs to VF VSI */
+			for (idx = 1; idx < vf->num_tc; idx++) {
+				ret = i40e_alloc_vsi_res(vf, idx);
+				if (ret)
+					goto error_alloc;
+			}
+			/* send correct number of queues */
+			total_queue_pairs = I40E_MAX_VF_QUEUES;
+		} else {
+			dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
+				 vf->vf_id);
+			vf->adq_enabled = false;
+		}
+	}
+
 	/* We account for each VF to get a default number of queue pairs.  If
 	 * the VF has now requested more, we need to account for that to make
 	 * certain we never request more queues than we actually have left in
@@ -1631,6 +1767,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
+
 	vfres->num_vsis = num_vsis;
 	vfres->num_queue_pairs = vf->num_queue_pairs;
 	vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1855,27 +1994,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	    (struct virtchnl_vsi_queue_config_info *)msg;
 	struct virtchnl_queue_pair_info *qpi;
 	struct i40e_pf *pf = vf->pf;
-	u16 vsi_id, vsi_queue_id;
+	u16 vsi_id, vsi_queue_id = 0;
 	i40e_status aq_ret = 0;
-	int i;
+	int i, j = 0, idx = 0;
+
+	vsi_id = qci->vsi_id;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
 
-	vsi_id = qci->vsi_id;
 	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
+
 	for (i = 0; i < qci->num_queue_pairs; i++) {
 		qpi = &qci->qpair[i];
-		vsi_queue_id = qpi->txq.queue_id;
-		if ((qpi->txq.vsi_id != vsi_id) ||
-		    (qpi->rxq.vsi_id != vsi_id) ||
-		    (qpi->rxq.queue_id != vsi_queue_id) ||
-		    !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
+
+		if (!vf->adq_enabled) {
+			vsi_queue_id = qpi->txq.queue_id;
+
+			if (qpi->txq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.queue_id != vsi_queue_id) {
+				aq_ret = I40E_ERR_PARAM;
+				goto error_param;
+			}
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
@@ -1887,9 +2036,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			aq_ret = I40E_ERR_PARAM;
 			goto error_param;
 		}
+
+		/* For ADq there can be up to 4 VSIs with max 4 queues each.
+		 * VF does not know about these additional VSIs and all
+		 * it cares is about its own queues. PF configures these queues
+		 * to its appropriate VSIs based on TC mapping
+		 **/
+		if (vf->adq_enabled) {
+			if (j == (vf->ch[idx].num_qps - 1)) {
+				idx++;
+				j = 0; /* resetting the queue count */
+				vsi_queue_id = 0;
+			} else {
+				j++;
+				vsi_queue_id++;
+			}
+			vsi_id = vf->ch[idx].vsi_id;
+		}
 	}
 	/* set vsi num_queue_pairs in use to num configured by VF */
-	pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs;
+	if (!vf->adq_enabled) {
+		pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+			qci->num_queue_pairs;
+	} else {
+		for (i = 0; i < vf->num_tc; i++)
+			pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
+			       vf->ch[i].num_qps;
+	}
 
 error_param:
 	/* send the response to the VF */
@@ -1897,6 +2070,33 @@ error_param:
 				       aq_ret);
 }
 
+/**
+ * i40e_validate_queue_map
+ * @vsi_id: vsi id
+ * @queuemap: Tx or Rx queue map
+ *
+ * check if Tx or Rx queue map is valid
+ **/
+static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
+				   unsigned long queuemap)
+{
+	u16 vsi_queue_id, queue_id;
+
+	for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
+		if (vf->adq_enabled) {
+			vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
+			queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
+		} else {
+			queue_id = vsi_queue_id;
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * i40e_vc_config_irq_map_msg
  * @vf: pointer to the VF info
@@ -1911,9 +2111,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	struct virtchnl_irq_map_info *irqmap_info =
 	    (struct virtchnl_irq_map_info *)msg;
 	struct virtchnl_vector_map *map;
-	u16 vsi_id, vsi_queue_id, vector_id;
+	u16 vsi_id, vector_id;
 	i40e_status aq_ret = 0;
-	unsigned long tempmap;
 	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
@@ -1923,7 +2122,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	for (i = 0; i < irqmap_info->num_vectors; i++) {
 		map = &irqmap_info->vecmap[i];
-
 		vector_id = map->vector_id;
 		vsi_id = map->vsi_id;
 		/* validate msg params */
@@ -1933,23 +2131,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 			goto error_param;
 		}
 
-		/* lookout for the invalid queue index */
-		tempmap = map->rxq_map;
-		for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-						      vsi_queue_id)) {
-				aq_ret = I40E_ERR_PARAM;
-				goto error_param;
-			}
+		if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
 		}
 
-		tempmap = map->txq_map;
-		for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
-			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
-						      vsi_queue_id)) {
-				aq_ret = I40E_ERR_PARAM;
-				goto error_param;
-			}
+		if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
+			aq_ret = I40E_ERR_PARAM;
+			goto error_param;
 		}
 
 		i40e_config_irq_link_list(vf, vsi_id, map);
@@ -1975,6 +2164,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	struct i40e_pf *pf = vf->pf;
 	u16 vsi_id = vqs->vsi_id;
 	i40e_status aq_ret = 0;
+	int i;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
 		aq_ret = I40E_ERR_PARAM;
@@ -1993,6 +2183,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 
 	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
+
+	/* need to start the rings for additional ADq VSI's as well */
+	if (vf->adq_enabled) {
+		/* zero belongs to LAN VSI */
+		for (i = 1; i < vf->num_tc; i++) {
+			if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
+				aq_ret = I40E_ERR_TIMEOUT;
+		}
+	}
+
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
@@ -2687,6 +2887,97 @@ err:
 				       aq_ret);
 }
 
+/**
+ * i40e_vc_add_qch_msg: Add queue channel and enable ADq
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_tc_info *tci =
+		(struct virtchnl_tc_info *)msg;
+	struct i40e_pf *pf = vf->pf;
+	int i, adq_request_qps = 0;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* ADq cannot be applied if spoof check is ON */
+	if (vf->spoofchk) {
+		dev_err(&pf->pdev->dev,
+			"Spoof check is ON, turn it OFF to enable ADq\n");
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+		dev_err(&pf->pdev->dev,
+			"VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
+			vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* max number of traffic classes for VF currently capped at 4 */
+	if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
+		dev_err(&pf->pdev->dev,
+			"VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n",
+			vf->vf_id, tci->num_tc);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	/* validate queues for each TC */
+	for (i = 0; i < tci->num_tc; i++)
+		if (!tci->list[i].count ||
+		    tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
+			dev_err(&pf->pdev->dev,
+				"VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n",
+				vf->vf_id, i, tci->list[i].count);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+		}
+
+	/* need Max VF queues but already have default number of queues */
+	adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
+
+	if (pf->queues_left < adq_request_qps) {
+		dev_err(&pf->pdev->dev,
+			"No queues left to allocate to VF %d\n",
+			vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	} else {
+		/* we need to allocate max VF queues to enable ADq so as to
+		 * make sure ADq enabled VF always gets back queues when it
+		 * goes through a reset.
+		 */
+		vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
+	}
+
+	/* parse data from the queue channel info */
+	vf->num_tc = tci->num_tc;
+	for (i = 0; i < vf->num_tc; i++)
+		vf->ch[i].num_qps = tci->list[i].count;
+
+	/* set this flag only after making sure all inputs are sane */
+	vf->adq_enabled = true;
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_notify_vf_reset(vf);
+	i40e_reset_vf(vf, false);
+
+	return I40E_SUCCESS;
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
+				       aq_ret);
+}
+
 /**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
@@ -2816,7 +3107,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_REQUEST_QUEUES:
 		ret = i40e_vc_request_queues_msg(vf, msg, msglen);
 		break;
-
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		ret = i40e_vc_add_qch_msg(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 5efc4f92bb37..67543dbefcd8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -69,6 +69,18 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
+ * These variables are used to store indices, id's and number of queues
+ * for each VSI including that of primary VF VSI. Each Traffic class is
+ * termed as channel and each channel can in-turn have 4 queues which
+ * means max 16 queues overall per VF.
+ */
+struct i40evf_channel {
+	u16 vsi_idx; /* index in PF struct for all channel VSIs */
+	u16 vsi_id; /* VSI ID used by firmware */
+	u16 num_qps; /* number of queue pairs requested by user */
+};
+
 /* VF information structure */
 struct i40e_vf {
 	struct i40e_pf *pf;
@@ -111,6 +123,11 @@ struct i40e_vf {
 	u16 num_mac;
 	u16 num_vlan;
 
+	/* ADq related variables */
+	bool adq_enabled; /* flag to enable adq */
+	u8 num_tc;
+	struct i40evf_channel ch[I40E_MAX_VF_VSI];
+
 	/* RDMA Client */
 	struct virtchnl_iwarp_qvlist_info *qvlist_info;
 };

From 5e97ce634802f12981a98ac5a178d225032ac890 Mon Sep 17 00:00:00 2001
From: Avinash Dayanand <avinash.dayanand@intel.com>
Date: Tue, 23 Jan 2018 08:50:59 -0800
Subject: [PATCH 0109/1678] i40evf: Alloc queues for ADq on VF

This patch allocates number of queues requested by the user as a part
of TC command when ADq is enabled on a VF.

In order to be consistent in design with PF implementation of ADq,
don't allow to set channels via ethtool from VF when ADq is already
enabled. This means the users will not be able to change the number of
queues/channels via ethtool for a VF when ADq is ON. In order to be
able to use set channels, users will be required to disable ADq first
and then try setting the channels again.

When ADq is enabled on VF, it goes through a reset during which VSIs
and queues are re-configured. Meanwhile if we receive link status
message from PF even before the queues are re-configured, just ignore
this link up message.

Signed-off-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c |  6 ++++++
 drivers/net/ethernet/intel/i40evf/i40evf_main.c    | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index aded3ad7763e..e6793255de0b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -695,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev,
 		return -EINVAL;
 	}
 
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+		return -EINVAL;
+	}
+
 	/* All of these should have already been checked by ethtool before this
 	 * even gets to us, but just to be sure.
 	 */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 099d4f59e445..8a26393459ef 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1164,6 +1164,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 	 */
 	if (adapter->num_req_queues)
 		num_active_queues = adapter->num_req_queues;
+	else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+		 adapter->num_tc)
+		num_active_queues = adapter->ch_config.total_qps;
 	else
 		num_active_queues = min_t(int,
 					  adapter->vsi_res->num_queue_pairs,
@@ -1491,6 +1494,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
+	/* If we've made it so far while ADq flag being ON, then we haven't
+	 * bailed out anywhere in middle. And ADq isn't just enabled but actual
+	 * resources have been allocated in the reset path.
+	 * Now we can truly claim that ADq is enabled.
+	 */
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc)
+		dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+			 adapter->num_tc);
+
 	dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
 		 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
 		 adapter->num_active_queues);
@@ -3263,6 +3276,7 @@ static void i40evf_remove(struct pci_dev *pdev)
 	/* Shut down all the garbage mashers on the detention level */
 	adapter->state = __I40EVF_REMOVE;
 	adapter->aq_required = 0;
+	adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
 	i40evf_request_reset(adapter);
 	msleep(50);
 	/* If the FW isn't responding, kick it once, but only once. */

From c4998aa30211e6080ce15b7f89ee140bb023df5a Mon Sep 17 00:00:00 2001
From: Avinash Dayanand <avinash.dayanand@intel.com>
Date: Tue, 23 Jan 2018 08:51:00 -0800
Subject: [PATCH 0110/1678] i40e: Delete queue channel for ADq on VF

This patch takes care of freeing up all the VSIs, queues and
other ADq related software and hardware resources, when a user
requests for deletion of ADq on VF.

Example command:
tc qdisc del dev eth0 root

Signed-off-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 7452d4387818..a5565d07f569 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1672,6 +1672,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
 				      sizeof(struct virtchnl_version_info));
 }
 
+/**
+ * i40e_del_qch - delete all the additional VSIs created as a part of ADq
+ * @vf: pointer to VF structure
+ **/
+static void i40e_del_qch(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	int i;
+
+	/* first element in the array belongs to primary VF VSI and we shouldn't
+	 * delete it. We should however delete the rest of the VSIs created
+	 */
+	for (i = 1; i < vf->num_tc; i++) {
+		if (vf->ch[i].vsi_idx) {
+			i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
+			vf->ch[i].vsi_idx = 0;
+			vf->ch[i].vsi_id = 0;
+		}
+	}
+}
+
 /**
  * i40e_vc_get_vf_resources_msg
  * @vf: pointer to the VF info
@@ -2978,6 +2999,45 @@ err:
 				       aq_ret);
 }
 
+/**
+ * i40e_vc_del_qch_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct i40e_pf *pf = vf->pf;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (vf->adq_enabled) {
+		i40e_del_qch(vf);
+		vf->adq_enabled = false;
+		vf->num_tc = 0;
+		dev_info(&pf->pdev->dev,
+			 "Deleting Queue Channels for ADq on VF %d\n",
+			 vf->vf_id);
+	} else {
+		dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+	}
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_notify_vf_reset(vf);
+	i40e_reset_vf(vf, false);
+
+	return I40E_SUCCESS;
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
+				       aq_ret);
+}
+
 /**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
@@ -3110,6 +3170,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_ENABLE_CHANNELS:
 		ret = i40e_vc_add_qch_msg(vf, msg);
 		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		ret = i40e_vc_del_qch_msg(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",

From 591532d614f4ad24376ae901946cf5bfee9237df Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Tue, 23 Jan 2018 08:51:01 -0800
Subject: [PATCH 0111/1678] i40evf: Add support to configure bw via tc tool

This patch adds support to configure bandwidth for the traffic
classes via tc tool. The required information is passed to the PF
which is used in the process of setting up the traffic classes.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf.h    |  1 +
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 66 ++++++++++++++++++-
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   |  3 +
 3 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index c8d68c8d2c33..150d676ab87c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -107,6 +107,7 @@ struct i40e_vsi {
 
 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
 #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40EVF_MBPS_DIVISOR	125000 /* divisor to convert to Mbps */
 
 /* MAX_MSIX_Q_VECTORS of these are allocated,
  * but we only use one per queue-specific vector.
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 8a26393459ef..2f677a2a4b5c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2235,6 +2235,48 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
 			i40evf_free_rx_resources(&adapter->rx_rings[i]);
 }
 
+/**
+ * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter,
+					u64 max_tx_rate)
+{
+	int speed = 0, ret = 0;
+
+	switch (adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case I40E_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case I40E_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case I40E_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case I40E_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	default:
+		break;
+	}
+
+	if (max_tx_rate > speed) {
+		dev_err(&adapter->pdev->dev,
+			"Invalid tx rate specified\n");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
 /**
  * i40evf_validate_channel_config - validate queue mapping info
  * @adapter: board private structure
@@ -2247,7 +2289,10 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter)
 static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
 				     struct tc_mqprio_qopt_offload *mqprio_qopt)
 {
+	u64 total_max_rate = 0;
 	int i, num_qps = 0;
+	u64 tx_rate = 0;
+	int ret = 0;
 
 	if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS ||
 	    mqprio_qopt->qopt.num_tc < 1)
@@ -2255,16 +2300,24 @@ static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
 
 	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
 		if (!mqprio_qopt->qopt.count[i] ||
-		    mqprio_qopt->min_rate[i] ||
-		    mqprio_qopt->max_rate[i] ||
 		    mqprio_qopt->qopt.offset[i] != num_qps)
 			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified\n");
+			return -EINVAL;
+		}
+		/*convert to Mbps */
+		tx_rate = div_u64(mqprio_qopt->max_rate[i],
+				  I40EVF_MBPS_DIVISOR);
+		total_max_rate += tx_rate;
 		num_qps += mqprio_qopt->qopt.count[i];
 	}
 	if (num_qps > MAX_QUEUES)
 		return -EINVAL;
 
-	return 0;
+	ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate);
+	return ret;
 }
 
 /**
@@ -2285,6 +2338,7 @@ static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
 	struct virtchnl_vf_resource *vfres = adapter->vf_res;
 	u8 num_tc = 0, total_qps = 0;
 	int ret = 0, netdev_tc = 0;
+	u64 max_tx_rate;
 	u16 mode;
 	int i;
 
@@ -2332,6 +2386,12 @@ static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
 				adapter->ch_config.ch_info[i].offset =
 					mqprio_qopt->qopt.offset[i];
 				total_qps += mqprio_qopt->qopt.count[i];
+				max_tx_rate = mqprio_qopt->max_rate[i];
+				/* convert to Mbps */
+				max_tx_rate = div_u64(max_tx_rate,
+						      I40EVF_MBPS_DIVISOR);
+				adapter->ch_config.ch_info[i].max_tx_rate =
+					max_tx_rate;
 			} else {
 				adapter->ch_config.ch_info[i].count = 1;
 				adapter->ch_config.ch_info[i].offset = 0;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index f6c56141b4fe..647ea104d435 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1003,6 +1003,9 @@ void i40evf_enable_channels(struct i40evf_adapter *adapter)
 	for (i = 0; i < vti->num_tc; i++) {
 		vti->list[i].count = adapter->ch_config.ch_info[i].count;
 		vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+		vti->list[i].pad = 0;
+		vti->list[i].max_tx_rate =
+				adapter->ch_config.ch_info[i].max_tx_rate;
 	}
 
 	adapter->ch_config.state = __I40EVF_TC_RUNNING;

From 0c483bd4b82619397e7f655eea1ba3ba9d9a15a2 Mon Sep 17 00:00:00 2001
From: Avinash Dayanand <avinash.dayanand@intel.com>
Date: Tue, 23 Jan 2018 08:51:02 -0800
Subject: [PATCH 0112/1678] i40e: Service request to configure bandwidth for
 ADq on a VF

This patch handles the request from ADq enabled VF to allocate
bandwidth to each traffic class which means for each VSI.

Signed-off-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 56 ++++++++++++++++++-
 .../ethernet/intel/i40e/i40e_virtchnl_pf.h    |  1 +
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index a5565d07f569..b4bad70c82cd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -711,6 +711,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
 	struct i40e_mac_filter *f = NULL;
 	struct i40e_pf *pf = vf->pf;
 	struct i40e_vsi *vsi;
+	u64 max_tx_rate = 0;
 	int ret = 0;
 
 	vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
@@ -770,8 +771,15 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
 
 	/* Set VF bandwidth if specified */
 	if (vf->tx_rate) {
+		max_tx_rate = vf->tx_rate;
+	} else if (vf->ch[idx].max_tx_rate) {
+		max_tx_rate = vf->ch[idx].max_tx_rate;
+	}
+
+	if (max_tx_rate) {
+		max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
 		ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
-						  vf->tx_rate / 50, 0, NULL);
+						  max_tx_rate, 0, NULL);
 		if (ret)
 			dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
 				vf->vf_id, ret);
@@ -2918,7 +2926,8 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 	struct virtchnl_tc_info *tci =
 		(struct virtchnl_tc_info *)msg;
 	struct i40e_pf *pf = vf->pf;
-	int i, adq_request_qps = 0;
+	struct i40e_link_status *ls = &pf->hw.phy.link_info;
+	int i, adq_request_qps = 0, speed = 0;
 	i40e_status aq_ret = 0;
 
 	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
@@ -2979,10 +2988,51 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 		vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
 	}
 
+	/* get link speed in MB to validate rate limit */
+	switch (ls->link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = SPEED_100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = SPEED_1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = SPEED_10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = SPEED_20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = SPEED_25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = SPEED_40000;
+		break;
+	default:
+		dev_err(&pf->pdev->dev,
+			"Cannot detect link speed\n");
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
 	/* parse data from the queue channel info */
 	vf->num_tc = tci->num_tc;
-	for (i = 0; i < vf->num_tc; i++)
+	for (i = 0; i < vf->num_tc; i++) {
+		if (tci->list[i].max_tx_rate) {
+			if (tci->list[i].max_tx_rate > speed) {
+				dev_err(&pf->pdev->dev,
+					"Invalid max tx rate %llu specified for VF %d.",
+					tci->list[i].max_tx_rate,
+					vf->vf_id);
+				aq_ret = I40E_ERR_PARAM;
+				goto err;
+			} else {
+				vf->ch[i].max_tx_rate =
+					tci->list[i].max_tx_rate;
+			}
+		}
 		vf->ch[i].num_qps = tci->list[i].count;
+	}
 
 	/* set this flag only after making sure all inputs are sane */
 	vf->adq_enabled = true;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 67543dbefcd8..6e794c8a1ef3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -79,6 +79,7 @@ struct i40evf_channel {
 	u16 vsi_idx; /* index in PF struct for all channel VSIs */
 	u16 vsi_id; /* VSI ID used by firmware */
 	u16 num_qps; /* number of queue pairs requested by user */
+	u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
 };
 
 /* VF information structure */

From 0718e560a330599d15fddc37651d693c7a09e49e Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Tue, 23 Jan 2018 08:51:03 -0800
Subject: [PATCH 0113/1678] virtchnl: Add a macro to check the size of a union

This patch adds a macro to check if the size of a union is correct.
It throws a divide by zero error if the union is not of the correct
size.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 1f652ceecf35..6fe630ebbf23 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -140,13 +140,15 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
 };
 
-/* This macro is used to generate a compilation error if a structure
+/* These macros are used to generate compilation errors if a structure/union
  * is not exactly the correct length. It gives a divide by zero error if the
- * structure is not of the correct size, otherwise it creates an enum that is
- * never used.
+ * structure/union is not of the correct size, otherwise it creates an enum
+ * that is never used.
  */
 #define VIRTCHNL_CHECK_STRUCT_LEN(n, X) enum virtchnl_static_assert_enum_##X \
 	{ virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+#define VIRTCHNL_CHECK_UNION_LEN(n, X) enum virtchnl_static_asset_enum_##X \
+	{ virtchnl_static_assert_##X = (n)/((sizeof(union X) == (n)) ? 1 : 0) }
 
 /* Virtual channel message descriptor. This overlays the admin queue
  * descriptor. All other data is passed in external buffers.

From 3872c8d44c2e489bcce0c743e808a4135e8da228 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Tue, 23 Jan 2018 08:51:04 -0800
Subject: [PATCH 0114/1678] virtchnl: Add filter data structures

This patch adds infrastructure to send virtchnl messages to the
PF to configure filters on the VF. The patch adds a struct
called virtchnl_filter which contains information about the fields
in the user-specified tc filter.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 59 ++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 6fe630ebbf23..b0a7f315bfbe 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -138,6 +138,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_REQUEST_QUEUES = 29,
 	VIRTCHNL_OP_ENABLE_CHANNELS = 30,
 	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
+	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
+	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 };
 
 /* These macros are used to generate compilation errors if a structure/union
@@ -525,6 +527,57 @@ struct virtchnl_tc_info {
 
 VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_tc_info);
 
+/* VIRTCHNL_ADD_CLOUD_FILTER
+ * VIRTCHNL_DEL_CLOUD_FILTER
+ * VF sends these messages to add or delete a cloud filter based on the
+ * user specified match and action filters. These structures encompass
+ * all the information that the PF needs from the VF to add/delete a
+ * cloud filter.
+ */
+
+struct virtchnl_l4_spec {
+	u8	src_mac[ETH_ALEN];
+	u8	dst_mac[ETH_ALEN];
+	__be16	vlan_id;
+	__be16	pad; /* reserved for future use */
+	__be32	src_ip[4];
+	__be32	dst_ip[4];
+	__be16	src_port;
+	__be16	dst_port;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(52, virtchnl_l4_spec);
+
+union virtchnl_flow_spec {
+	struct	virtchnl_l4_spec tcp_spec;
+	u8	buffer[128]; /* reserved for future use */
+};
+
+VIRTCHNL_CHECK_UNION_LEN(128, virtchnl_flow_spec);
+
+enum virtchnl_action {
+	/* action types */
+	VIRTCHNL_ACTION_DROP = 0,
+	VIRTCHNL_ACTION_TC_REDIRECT,
+};
+
+enum virtchnl_flow_type {
+	/* flow types */
+	VIRTCHNL_TCP_V4_FLOW = 0,
+	VIRTCHNL_TCP_V6_FLOW,
+};
+
+struct virtchnl_filter {
+	union	virtchnl_flow_spec data;
+	union	virtchnl_flow_spec mask;
+	enum	virtchnl_flow_type flow_type;
+	enum	virtchnl_action action;
+	u32	action_meta;
+	__u8	field_flags;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
+
 /* VIRTCHNL_OP_EVENT
  * PF sends this message to inform the VF driver of events that may affect it.
  * No direct response is expected from the VF, though it may generate other
@@ -753,6 +806,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 		break;
 	case VIRTCHNL_OP_DISABLE_CHANNELS:
 		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		valid_len = sizeof(struct virtchnl_filter);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		valid_len = sizeof(struct virtchnl_filter);
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:

From 0075fa0fadd0ac6daf5c26a153e7bfa56fc9b4f3 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Tue, 23 Jan 2018 08:51:05 -0800
Subject: [PATCH 0115/1678] i40evf: Add support to apply cloud filters

This patch enables a tc filter to be applied as a cloud
filter for the VF. This patch adds functions which parse the
tc filter, extract the necessary fields needed to configure the
filter and package them in a virtchnl message to be sent to the
PF to apply them.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf.h    |  48 ++
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 580 +++++++++++++++++-
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   | 187 ++++++
 3 files changed, 810 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 150d676ab87c..e46555ad7122 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -54,6 +54,8 @@
 #include <net/ip6_checksum.h>
 #include <net/pkt_cls.h>
 #include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
 
 #include "i40e_type.h"
 #include <linux/avf/virtchnl.h>
@@ -184,6 +186,14 @@ struct i40evf_channel_config {
 	u8 total_qps;
 };
 
+/* State of cloud filter */
+enum i40evf_cloud_filter_state_t {
+	__I40EVF_CF_INVALID,	 /* cloud filter not added */
+	__I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+	__I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+	__I40EVF_CF_ACTIVE,	 /* cloud filter is active */
+};
+
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
 	__I40EVF_STARTUP,		/* driver loaded, probe complete */
@@ -205,6 +215,36 @@ enum i40evf_critical_section_t {
 	__I40EVF_IN_REMOVE_TASK,	/* device being removed */
 };
 
+#define I40EVF_CLOUD_FIELD_OMAC		0x01
+#define I40EVF_CLOUD_FIELD_IMAC		0x02
+#define I40EVF_CLOUD_FIELD_IVLAN	0x04
+#define I40EVF_CLOUD_FIELD_TEN_ID	0x08
+#define I40EVF_CLOUD_FIELD_IIP		0x10
+
+#define I40EVF_CF_FLAGS_OMAC	I40EVF_CLOUD_FIELD_OMAC
+#define I40EVF_CF_FLAGS_IMAC	I40EVF_CLOUD_FIELD_IMAC
+#define I40EVF_CF_FLAGS_IMAC_IVLAN	(I40EVF_CLOUD_FIELD_IMAC |\
+					 I40EVF_CLOUD_FIELD_IVLAN)
+#define I40EVF_CF_FLAGS_IMAC_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
+					 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC	(I40EVF_CLOUD_FIELD_OMAC |\
+						 I40EVF_CLOUD_FIELD_IMAC |\
+						 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID	(I40EVF_CLOUD_FIELD_IMAC |\
+						 I40EVF_CLOUD_FIELD_IVLAN |\
+						 I40EVF_CLOUD_FIELD_TEN_ID)
+#define I40EVF_CF_FLAGS_IIP	I40E_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct i40evf_cloud_filter {
+	enum i40evf_cloud_filter_state_t state;
+	struct list_head list;
+	struct virtchnl_filter f;
+	unsigned long cookie;
+	bool del;		/* filter needs to be deleted */
+	bool add;		/* filter needs to be added */
+};
+
 /* board specific private data structure */
 struct i40evf_adapter {
 	struct timer_list watchdog_timer;
@@ -287,6 +327,8 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
 #define I40EVF_FLAG_AQ_ENABLE_CHANNELS		BIT(21)
 #define I40EVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
+#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
+#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -335,6 +377,10 @@ struct i40evf_adapter {
 	/* ADQ related members */
 	struct i40evf_channel_config ch_config;
 	u8 num_tc;
+	struct list_head cloud_filter_list;
+	/* lock to protest access to the cloud filter list */
+	spinlock_t cloud_filter_list_lock;
+	u16 num_cloud_filters;
 };
 
 
@@ -403,4 +449,6 @@ void i40evf_notify_client_open(struct i40e_vsi *vsi);
 void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset);
 void i40evf_enable_channels(struct i40evf_adapter *adapter);
 void i40evf_disable_channels(struct i40evf_adapter *adapter);
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter);
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter);
 #endif /* _I40EVF_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 2f677a2a4b5c..4955ce3ab6a2 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1041,6 +1041,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	struct i40evf_vlan_filter *vlf;
 	struct i40evf_mac_filter *f;
+	struct i40evf_cloud_filter *cf;
 
 	if (adapter->state <= __I40EVF_DOWN_PENDING)
 		return;
@@ -1064,11 +1065,18 @@ void i40evf_down(struct i40evf_adapter *adapter)
 
 	/* remove all VLAN filters */
 	list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
-		f->remove = true;
+		vlf->remove = true;
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* remove all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		cf->del = true;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __I40EVF_RESETTING) {
 		/* cancel any current operation */
@@ -1079,6 +1087,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 		 */
 		adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
 		adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
@@ -1756,6 +1765,16 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		goto watchdog_done;
 	}
 
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		i40evf_add_cloud_filter(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		i40evf_del_cloud_filter(adapter);
+		goto watchdog_done;
+	}
+
 	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
 
 	if (adapter->state == __I40EVF_RUNNING)
@@ -1779,6 +1798,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 {
 	struct i40evf_mac_filter *f, *ftmp;
 	struct i40evf_vlan_filter *fv, *fvtmp;
+	struct i40evf_cloud_filter *cf, *cftmp;
 
 	adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 
@@ -1800,7 +1820,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-	/* Delete all of the filters, both MAC and VLAN. */
+	/* Delete all of the filters */
 	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
 		list_del(&f->list);
 		kfree(f);
@@ -1813,6 +1833,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	i40evf_free_misc_irq(adapter);
 	i40evf_reset_interrupt_capability(adapter);
 	i40evf_free_queues(adapter);
@@ -1842,9 +1870,11 @@ static void i40evf_reset_task(struct work_struct *work)
 	struct i40evf_adapter *adapter = container_of(work,
 						      struct i40evf_adapter,
 						      reset_task);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
 	struct net_device *netdev = adapter->netdev;
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40evf_vlan_filter *vlf;
+	struct i40evf_cloud_filter *cf;
 	struct i40evf_mac_filter *f;
 	u32 reg_val;
 	int i = 0, err;
@@ -1969,8 +1999,19 @@ continue_reset:
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	/* check if TCs are running and re-add all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			cf->add = true;
+		}
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
 	i40evf_misc_irq_enable(adapter);
 
 	mod_timer(&adapter->watchdog_timer, jiffies + 2);
@@ -2320,6 +2361,24 @@ static int i40evf_validate_ch_config(struct i40evf_adapter *adapter,
 	return ret;
 }
 
+/**
+ * i40evf_del_all_cloud_filters - delete all cloud filters
+ * on the traffic classes
+ **/
+static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf, *cftmp;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+				 list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
 /**
  * __i40evf_setup_tc - configure multiple traffic classes
  * @netdev: network interface device structure
@@ -2353,6 +2412,7 @@ static int __i40evf_setup_tc(struct net_device *netdev, void *type_data)
 			adapter->num_tc = 0;
 			netif_tx_stop_all_queues(netdev);
 			netif_tx_disable(netdev);
+			i40evf_del_all_cloud_filters(adapter);
 			adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS;
 			goto exit;
 		} else {
@@ -2417,6 +2477,499 @@ exit:
 	return ret;
 }
 
+/**
+ * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @cls_flower: pointer to struct tc_cls_flower_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
+				   struct tc_cls_flower_offload *f,
+				   struct i40evf_cloud_filter *filter)
+{
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
+	u8 field_flags = 0;
+	u16 addr_type = 0;
+	u16 n_proto = 0;
+	int i = 0;
+
+	if (f->dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
+			f->dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->mask);
+
+		if (mask->keyid != 0)
+			field_flags |= I40EVF_CLOUD_FIELD_TEN_ID;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->key);
+
+		struct flow_dissector_key_basic *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_BASIC,
+						  f->mask);
+		n_proto_key = ntohs(key->n_proto);
+		n_proto_mask = ntohs(mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		n_proto = n_proto_key & n_proto_mask;
+		if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+			return -EINVAL;
+		if (n_proto == ETH_P_IPV6) {
+			/* specify flow type as TCP IPv6 */
+			filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
+		}
+
+		if (key->ip_proto != IPPROTO_TCP) {
+			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->key);
+
+		struct flow_dissector_key_eth_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  f->mask);
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(mask->dst)) {
+			if (is_broadcast_ether_addr(mask->dst)) {
+				field_flags |= I40EVF_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+					mask->dst);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(mask->src)) {
+			if (is_broadcast_ether_addr(mask->src)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+					mask->src);
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (!is_zero_ether_addr(key->dst))
+			if (is_valid_ether_addr(key->dst) ||
+			    is_multicast_ether_addr(key->dst)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					filter->f.mask.tcp_spec.dst_mac[i] |=
+									0xff;
+				ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
+						key->dst);
+			}
+
+		if (!is_zero_ether_addr(key->src))
+			if (is_valid_ether_addr(key->src) ||
+			    is_multicast_ether_addr(key->src)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					filter->f.mask.tcp_spec.src_mac[i] |=
+									0xff;
+				ether_addr_copy(filter->f.data.tcp_spec.src_mac,
+						key->src);
+		}
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->key);
+		struct flow_dissector_key_vlan *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->mask);
+
+		if (mask->vlan_id) {
+			if (mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= I40EVF_CLOUD_FIELD_IVLAN;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+					mask->vlan_id);
+				return I40E_ERR_CONFIG;
+			}
+		}
+		filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+		filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_CONTROL,
+						  f->key);
+
+		addr_type = key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						  f->mask);
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+					be32_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+					be32_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) {
+			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (key->dst) {
+			filter->f.mask.tcp_spec.dst_ip[0] |=
+							cpu_to_be32(0xffffffff);
+			filter->f.data.tcp_spec.dst_ip[0] = key->dst;
+		}
+		if (key->src) {
+			filter->f.mask.tcp_spec.src_ip[0] |=
+							cpu_to_be32(0xffffffff);
+			filter->f.data.tcp_spec.src_ip[0] = key->src;
+		}
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						  f->mask);
+
+		/* validate mask, make sure it is not IPV6_ADDR_ANY */
+		if (ipv6_addr_any(&mask->dst)) {
+			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+				IPV6_ADDR_ANY);
+			return I40E_ERR_CONFIG;
+		}
+
+		/* src and dest IPv6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&key->dst) ||
+		    ipv6_addr_loopback(&key->src)) {
+			dev_err(&adapter->pdev->dev,
+				"ipv6 addr should not be loopback\n");
+			return I40E_ERR_CONFIG;
+		}
+		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+			field_flags |= I40EVF_CLOUD_FIELD_IIP;
+
+		if (key->dst.s6_addr) {
+			for (i = 0; i < 4; i++)
+				filter->f.mask.tcp_spec.dst_ip[i] |=
+							cpu_to_be32(0xffffffff);
+			memcpy(&filter->f.data.tcp_spec.dst_ip,
+			       &key->dst.s6_addr32,
+			       sizeof(filter->f.data.tcp_spec.dst_ip));
+		}
+		if (key->src.s6_addr) {
+			for (i = 0; i < 4; i++)
+				filter->f.mask.tcp_spec.src_ip[i] |=
+							cpu_to_be32(0xffffffff);
+			memcpy(&filter->f.data.tcp_spec.src_ip,
+			       &key->src.s6_addr32,
+			       sizeof(filter->f.data.tcp_spec.src_ip));
+		}
+	}
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->key);
+		struct flow_dissector_key_ports *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_PORTS,
+						  f->mask);
+
+		if (mask->src) {
+			if (mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+					be16_to_cpu(mask->src));
+				return I40E_ERR_CONFIG;
+			}
+		}
+
+		if (mask->dst) {
+			if (mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= I40EVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+					be16_to_cpu(mask->dst));
+				return I40E_ERR_CONFIG;
+			}
+		}
+		if (key->dst) {
+			filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+			filter->f.data.tcp_spec.dst_port = key->dst;
+		}
+
+		if (key->src) {
+			filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+			filter->f.data.tcp_spec.src_port = key->dst;
+		}
+	}
+	filter->f.field_flags = field_flags;
+
+	return 0;
+}
+
+/**
+ * i40evf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc,
+				struct i40evf_cloud_filter *filter)
+{
+	if (tc == 0)
+		return 0;
+	if (tc < adapter->num_tc) {
+		if (!filter->f.data.tcp_spec.dst_port) {
+			dev_err(&adapter->pdev->dev,
+				"Specify destination port to redirect to traffic class other than TC0\n");
+			return -EINVAL;
+		}
+	}
+	/* redirect to a traffic class on the same device */
+	filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+	filter->f.action_meta = tc;
+	return 0;
+}
+
+/**
+ * i40evf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
+				      struct tc_cls_flower_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	struct i40evf_cloud_filter *filter = NULL;
+	int err = 0, count = 50;
+
+	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		udelay(1);
+		if (--count == 0)
+			return -EINVAL;
+	}
+
+	if (tc < 0) {
+		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter) {
+		err = -ENOMEM;
+		goto clearout;
+	}
+	filter->cookie = cls_flower->cookie;
+
+	/* set the mask to all zeroes to begin with */
+	memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+	/* start out with flow type and eth type IPv4 to begin with */
+	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+	err = i40evf_parse_cls_flower(adapter, cls_flower, filter);
+	if (err < 0)
+		goto err;
+
+	err = i40evf_handle_tclass(adapter, tc, filter);
+	if (err < 0)
+		goto err;
+
+	/* add filter to the list */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_add_tail(&filter->list, &adapter->cloud_filter_list);
+	adapter->num_cloud_filters++;
+	filter->add = true;
+	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+	if (err)
+		kfree(filter);
+clearout:
+	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	return err;
+}
+
+/* i40evf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter,
+						  unsigned long *cookie)
+{
+	struct i40evf_cloud_filter *filter = NULL;
+
+	if (!cookie)
+		return NULL;
+
+	list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	}
+	return NULL;
+}
+
+/**
+ * i40evf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct tc_cls_flower_offload
+ */
+static int i40evf_delete_clsflower(struct i40evf_adapter *adapter,
+				   struct tc_cls_flower_offload *cls_flower)
+{
+	struct i40evf_cloud_filter *filter = NULL;
+	int err = 0;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	filter = i40evf_find_cf(adapter, &cls_flower->cookie);
+	if (filter) {
+		filter->del = true;
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+	} else {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	return err;
+}
+
+/**
+ * i40evf_setup_tc_cls_flower - flower classifier offloads
+ * @netdev: net device to configure
+ * @type_data: offload data
+ */
+static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter,
+				      struct tc_cls_flower_offload *cls_flower)
+{
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return i40evf_configure_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return i40evf_delete_clsflower(adapter, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * i40evf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				    void *cb_priv)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return i40evf_setup_tc_cls_flower(cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40evf_setup_tc_block - register callbacks for tc
+ * @netdev: network interface device structure
+ * @f: tc offload data
+ *
+ * This function registers block callbacks for tc
+ * offloads
+ **/
+static int i40evf_setup_tc_block(struct net_device *dev,
+				 struct tc_block_offload *f)
+{
+	struct i40evf_adapter *adapter = netdev_priv(dev);
+
+	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_BLOCK_BIND:
+		return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
+					     adapter, adapter);
+	case TC_BLOCK_UNBIND:
+		tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
+					adapter);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 /**
  * i40evf_setup_tc - configure multiple traffic classes
  * @netdev: network interface device structure
@@ -2431,10 +2984,14 @@ exit:
 static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 			   void *type_data)
 {
-	if (type != TC_SETUP_QDISC_MQPRIO)
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return __i40evf_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return i40evf_setup_tc_block(netdev, type_data);
+	default:
 		return -EOPNOTSUPP;
-
-	return __i40evf_setup_tc(netdev, type_data);
+	}
 }
 
 /**
@@ -2819,6 +3376,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
 		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX);
+	/* Enable cloud filter if ADQ is supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+		hw_features |= NETIF_F_HW_TC;
 
 	netdev->hw_features |= hw_features;
 
@@ -3186,9 +3746,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mutex_init(&hw->aq.arq_mutex);
 
 	spin_lock_init(&adapter->mac_vlan_list_lock);
+	spin_lock_init(&adapter->cloud_filter_list_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
+	INIT_LIST_HEAD(&adapter->cloud_filter_list);
 
 	INIT_WORK(&adapter->reset_task, i40evf_reset_task);
 	INIT_WORK(&adapter->adminq_task, i40evf_adminq_task);
@@ -3315,6 +3877,7 @@ static void i40evf_remove(struct pci_dev *pdev)
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40evf_vlan_filter *vlf, *vlftmp;
 	struct i40evf_mac_filter *f, *ftmp;
+	struct i40evf_cloud_filter *cf, *cftmp;
 	struct i40e_hw *hw = &adapter->hw;
 	int err;
 	/* Indicate we are in remove and not to run reset_task */
@@ -3385,6 +3948,13 @@ static void i40evf_remove(struct pci_dev *pdev)
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 647ea104d435..6134b61e0938 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1040,6 +1040,134 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter)
 			   NULL, 0);
 }
 
+/**
+ * i40evf_print_cloud_filter
+ * @adapter: adapter structure
+ * @f: cloud filter to print
+ *
+ * Print the cloud filter
+ **/
+static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
+				      struct virtchnl_filter f)
+{
+	switch (f.flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
+			 &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
+			 ntohs(f.data.tcp_spec.vlan_id),
+			 &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
+			 ntohs(f.data.tcp_spec.dst_port),
+			 ntohs(f.data.tcp_spec.src_port));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
+			 &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
+			 ntohs(f.data.tcp_spec.vlan_id),
+			 &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
+			 ntohs(f.data.tcp_spec.dst_port),
+			 ntohs(f.data.tcp_spec.src_port));
+		break;
+	}
+}
+
+/**
+ * i40evf_add_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF add cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_add_cloud_filter(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->add = false;
+			cf->state = __I40EVF_CF_ADD_PENDING;
+			i40evf_send_pf_msg(adapter,
+					   VIRTCHNL_OP_ADD_CLOUD_FILTER,
+					   (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
+ * i40evf_del_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF delete cloud filters as specified
+ * by the user via tc tool.
+ **/
+void i40evf_del_cloud_filter(struct i40evf_adapter *adapter)
+{
+	struct i40evf_cloud_filter *cf, *cftmp;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->del = false;
+			cf->state = __I40EVF_CF_DEL_PENDING;
+			i40evf_send_pf_msg(adapter,
+					   VIRTCHNL_OP_DEL_CLOUD_FILTER,
+					   (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
 /**
  * i40evf_request_reset
  * @adapter: adapter structure
@@ -1163,6 +1291,42 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 			adapter->ch_config.state = __I40EVF_TC_RUNNING;
 			netif_tx_start_all_queues(netdev);
 			break;
+		case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+			struct i40evf_cloud_filter *cf, *cftmp;
+
+			list_for_each_entry_safe(cf, cftmp,
+						 &adapter->cloud_filter_list,
+						 list) {
+				if (cf->state == __I40EVF_CF_ADD_PENDING) {
+					cf->state = __I40EVF_CF_INVALID;
+					dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
+						 i40evf_stat_str(&adapter->hw,
+								 v_retval));
+					i40evf_print_cloud_filter(adapter,
+								  cf->f);
+					list_del(&cf->list);
+					kfree(cf);
+					adapter->num_cloud_filters--;
+				}
+			}
+			}
+			break;
+		case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+			struct i40evf_cloud_filter *cf;
+
+			list_for_each_entry(cf, &adapter->cloud_filter_list,
+					    list) {
+				if (cf->state == __I40EVF_CF_DEL_PENDING) {
+					cf->state = __I40EVF_CF_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
+						 i40evf_stat_str(&adapter->hw,
+								 v_retval));
+					i40evf_print_cloud_filter(adapter,
+								  cf->f);
+				}
+			}
+			}
+			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
 				v_retval,
@@ -1257,6 +1421,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 		}
 		}
 		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+		struct i40evf_cloud_filter *cf;
+
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			if (cf->state == __I40EVF_CF_ADD_PENDING)
+				cf->state = __I40EVF_CF_ACTIVE;
+		}
+		}
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+		struct i40evf_cloud_filter *cf, *cftmp;
+
+		list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+					 list) {
+			if (cf->state == __I40EVF_CF_DEL_PENDING) {
+				cf->state = __I40EVF_CF_INVALID;
+				list_del(&cf->list);
+				kfree(cf);
+				adapter->num_cloud_filters--;
+			}
+		}
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",

From e284fc280473bed23f2e1ed324e102a48f7d17e1 Mon Sep 17 00:00:00 2001
From: Avinash Dayanand <avinash.dayanand@intel.com>
Date: Tue, 23 Jan 2018 08:51:06 -0800
Subject: [PATCH 0116/1678] i40e: Add and delete cloud filter

This patch provides support to add or delete cloud filter for queue
channels created for ADq on VF.
We are using the HW's cloud filter feature and programming it to act
as a TC filter applied to a group of queues.

There are two possible modes for a VF when applying a cloud filter
1. Basic Mode:	Intended to apply filters that don't need a VF to be
		Trusted. This would include the following
		  Dest MAC + L4 port
		  Dest MAC + VLAN + L4 port
2. Advanced Mode: This mode is only for filters with combination that
		  requires VF to be Trusted.
		  Dest IP + L4 port

When cloud filters are applied on a trusted VF and for some reason
the same VF is later made as untrusted then all cloud filters
will be deleted. All cloud filters has to be re-applied in
such a case.
Cloud filters are also deleted when queue channel is deleted.

Testing-Hints:
=============
1. Adding Basic Mode filter should be possible on a VF in
   Non-Trusted mode.
2. In Advanced mode all filters should be able to be created.

Steps:
======
1. Enable ADq and create TCs using TC mqprio command
2. Apply cloud filter.
3. Turn-off the spoof check.
4. Pass traffic.

Example:
========
1. tc qdisc add dev enp4s2 root mqprio num_tc 4 map 0 0 0 0 1 2 2 3\
	queues 2@0 2@2 1@4 1@5 hw 1 mode channel
2. tc qdisc add dev enp4s2 ingress
3. ethtool -K enp4s2 hw-tc-offload on
4. ip link set ens261f0 vf 0 spoofchk off
5. tc filter add dev enp4s2 protocol ip parent ffff: prio 1 flower\
	dst_ip 192.168.3.5/32 ip_proto udp dst_port 25 skip_sw hw_tc 2

Signed-off-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h        |   6 +
 drivers/net/ethernet/intel/i40e/i40e_main.c   |  16 +-
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 458 +++++++++++++++++-
 .../ethernet/intel/i40e/i40e_virtchnl_pf.h    |   2 +
 4 files changed, 470 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 4cf99292fc2f..36d9401a6258 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1109,4 +1109,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 
 int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
 int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter,
+			      bool add);
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add);
 #endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 101702af099f..f6d37456f3b7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf);
 static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-				     struct i40e_cloud_filter *filter,
-				     bool add);
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-					     struct i40e_cloud_filter *filter,
-					     bool add);
 static int i40e_get_capabilities(struct i40e_pf *pf,
 				 enum i40e_admin_queue_opc list_type);
 
@@ -6841,8 +6835,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter,
  * Add or delete a cloud filter for a specific flow spec.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
-				     struct i40e_cloud_filter *filter, bool add)
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter, bool add)
 {
 	struct i40e_aqc_cloud_filters_element_data cld_filter;
 	struct i40e_pf *pf = vsi->back;
@@ -6908,9 +6902,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
  * Add or delete a cloud filter for a specific flow spec using big buffer.
  * Returns 0 if the filter were successfully added.
  **/
-static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
-					     struct i40e_cloud_filter *filter,
-					     bool add)
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add)
 {
 	struct i40e_aqc_cloud_filters_element_bb cld_filter;
 	struct i40e_pf *pf = vsi->back;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index b4bad70c82cd..5cca083da93c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2916,6 +2916,440 @@ err:
 				       aq_ret);
 }
 
+/**
+ * i40e_validate_cloud_filter
+ * @mask: mask for TC filter
+ * @data: data for TC filter
+ *
+ * This function validates cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+				      struct virtchnl_filter *tc_filter)
+{
+	struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
+	struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	bool found = false;
+	int bkt;
+
+	if (!tc_filter->action) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Currently ADq doesn't support Drop Action\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	/* action_meta is TC number here to which the filter is applied */
+	if (!tc_filter->action_meta ||
+	    tc_filter->action_meta > I40E_MAX_VF_VSI) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+			 vf->vf_id, tc_filter->action_meta);
+		goto err;
+	}
+
+	/* Check filter if it's programmed for advanced mode or basic mode.
+	 * There are two ADq modes (for VF only),
+	 * 1. Basic mode: intended to allow as many filter options as possible
+	 *		  to be added to a VF in Non-trusted mode. Main goal is
+	 *		  to add filters to its own MAC and VLAN id.
+	 * 2. Advanced mode: is for allowing filters to be applied other than
+	 *		  its own MAC or VLAN. This mode requires the VF to be
+	 *		  Trusted.
+	 */
+	if (mask.dst_mac[0] && !mask.dst_ip[0]) {
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		f = i40e_find_mac(vsi, data.dst_mac);
+
+		if (!f) {
+			dev_info(&pf->pdev->dev,
+				 "Destination MAC %pM doesn't belong to VF %d\n",
+				 data.dst_mac, vf->vf_id);
+			goto err;
+		}
+
+		if (mask.vlan_id) {
+			hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+					   hlist) {
+				if (f->vlan == ntohs(data.vlan_id)) {
+					found = true;
+					break;
+				}
+			}
+			if (!found) {
+				dev_info(&pf->pdev->dev,
+					 "VF %d doesn't have any VLAN id %u\n",
+					 vf->vf_id, ntohs(data.vlan_id));
+				goto err;
+			}
+		}
+	} else {
+		/* Check if VF is trusted */
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+			dev_err(&pf->pdev->dev,
+				"VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
+				vf->vf_id);
+			return I40E_ERR_CONFIG;
+		}
+	}
+
+	if (mask.dst_mac[0] & data.dst_mac[0]) {
+		if (is_broadcast_ether_addr(data.dst_mac) ||
+		    is_zero_ether_addr(data.dst_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
+				 vf->vf_id, data.dst_mac);
+			goto err;
+		}
+	}
+
+	if (mask.src_mac[0] & data.src_mac[0]) {
+		if (is_broadcast_ether_addr(data.src_mac) ||
+		    is_zero_ether_addr(data.src_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
+				 vf->vf_id, data.src_mac);
+			goto err;
+		}
+	}
+
+	if (mask.dst_port & data.dst_port) {
+		if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (mask.src_port & data.src_port) {
+		if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
+	    tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	if (mask.vlan_id & data.vlan_id) {
+		if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
+			dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	return I40E_SUCCESS;
+err:
+	return I40E_ERR_CONFIG;
+}
+
+/**
+ * i40e_find_vsi_from_seid - searches for the vsi with the given seid
+ * @vf: pointer to the VF info
+ * @seid - seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int i;
+
+	for (i = 0; i < vf->num_tc ; i++) {
+		vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
+		if (vsi->seid == seid)
+			return vsi;
+	}
+	return NULL;
+}
+
+/**
+ * i40e_del_all_cloud_filters
+ * @vf: pointer to the VF info
+ *
+ * This function deletes all cloud filters
+ **/
+static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+{
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	int ret;
+
+	hlist_for_each_entry_safe(cfilter, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
+
+		if (!vsi) {
+			dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
+				vf->vf_id, cfilter->seid);
+			continue;
+		}
+
+		if (cfilter->dst_port)
+			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+								false);
+		else
+			ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+		if (ret)
+			dev_err(&pf->pdev->dev,
+				"VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+				vf->vf_id, i40e_stat_str(&pf->hw, ret),
+				i40e_aq_str(&pf->hw,
+					    pf->hw.aq.asq_last_status));
+
+		hlist_del(&cfilter->cloud_node);
+		kfree(cfilter);
+		vf->num_cloud_filters--;
+	}
+}
+
+/**
+ * i40e_vc_del_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function deletes a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter cfilter, *cf = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	i40e_status aq_ret = 0;
+	int i, ret;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input, can't apply cloud filter\n",
+			 vf->vf_id);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+	}
+
+	memset(&cfilter, 0, sizeof(cfilter));
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter.dst_port = mask.dst_port & tcf.dst_port;
+	cfilter.src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter.n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter.n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter.ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter.ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the vsi to which the tc belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter.seid = vsi->seid;
+	cfilter.flags = vcf->field_flags;
+
+	/* Deleting TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to delete cloud filter, err %s aq_err %s\n",
+			vf->vf_id, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	hlist_for_each_entry_safe(cf, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		if (cf->seid != cfilter.seid)
+			continue;
+		if (mask.dst_port)
+			if (cfilter.dst_port != cf->dst_port)
+				continue;
+		if (mask.dst_mac[0])
+			if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
+				continue;
+		/* for ipv4 data to be valid, only first byte of mask is set */
+		if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
+			if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
+				   ARRAY_SIZE(tcf.dst_ip)))
+				continue;
+		/* for ipv6, mask is set for all sixteen bytes (4 words) */
+		if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
+			if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
+				   sizeof(cfilter.ip.v6.src_ip6)))
+				continue;
+		if (mask.vlan_id)
+			if (cfilter.vlan_id != cf->vlan_id)
+				continue;
+
+		hlist_del(&cf->cloud_node);
+		kfree(cf);
+		vf->num_cloud_filters--;
+	}
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function adds a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	i40e_status aq_ret = 0;
+	int i, ret;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq is not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input/s, can't apply cloud filter\n",
+			 vf->vf_id);
+			aq_ret = I40E_ERR_PARAM;
+			goto err;
+	}
+
+	cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
+	if (!cfilter)
+		return -ENOMEM;
+
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter->dst_port = mask.dst_port & tcf.dst_port;
+	cfilter->src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter->n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter->n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter->ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter->ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the VSI to which the TC belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter->seid = vsi->seid;
+	cfilter->flags = vcf->field_flags;
+
+	/* Adding cloud filter programmed as TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to add cloud filter, err %s aq_err %s\n",
+			vf->vf_id, i40e_stat_str(&pf->hw, ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	INIT_HLIST_NODE(&cfilter->cloud_node);
+	hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+	vf->num_cloud_filters++;
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+				       aq_ret);
+}
+
 /**
  * i40e_vc_add_qch_msg: Add queue channel and enable ADq
  * @vf: pointer to the VF info
@@ -3036,6 +3470,11 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 
 	/* set this flag only after making sure all inputs are sane */
 	vf->adq_enabled = true;
+	/* num_req_queues is set when user changes number of queues via ethtool
+	 * and this causes issue for default VSI(which depends on this variable)
+	 * when ADq is enabled, hence reset it.
+	 */
+	vf->num_req_queues = 0;
 
 	/* reset the VF in order to allocate resources */
 	i40e_vc_notify_vf_reset(vf);
@@ -3065,11 +3504,12 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
 	}
 
 	if (vf->adq_enabled) {
+		i40e_del_all_cloud_filters(vf);
 		i40e_del_qch(vf);
 		vf->adq_enabled = false;
 		vf->num_tc = 0;
 		dev_info(&pf->pdev->dev,
-			 "Deleting Queue Channels for ADq on VF %d\n",
+			 "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
 			 vf->vf_id);
 	} else {
 		dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
@@ -3223,6 +3663,12 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_DISABLE_CHANNELS:
 		ret = i40e_vc_del_qch_msg(vf, msg);
 		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		ret = i40e_vc_add_cloud_filter(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		ret = i40e_vc_del_cloud_filter(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
@@ -3788,6 +4234,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
 	i40e_vc_disable_vf(vf);
 	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
 		 vf_id, setting ? "" : "un");
+
+	if (vf->adq_enabled) {
+		if (!vf->trusted) {
+			dev_info(&pf->pdev->dev,
+				 "VF %u no longer Trusted, deleting all cloud filters\n",
+				 vf_id);
+			i40e_del_all_cloud_filters(vf);
+		}
+	}
+
 out:
 	return ret;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 6e794c8a1ef3..6852599b2379 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -128,6 +128,8 @@ struct i40e_vf {
 	bool adq_enabled; /* flag to enable adq */
 	u8 num_tc;
 	struct i40evf_channel ch[I40E_MAX_VF_VSI];
+	struct hlist_head cloud_filter_list;
+	u16 num_cloud_filters;
 
 	/* RDMA Client */
 	struct virtchnl_iwarp_qvlist_info *qvlist_info;

From e0f9759f530bf789e984961dce79f525b151ecf3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 13 Feb 2018 06:14:12 -0800
Subject: [PATCH 0117/1678] tcp: try to keep packet if SYN_RCV race is lost
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

배석진 reported that in some situations, packets for a given 5-tuple
end up being processed by different CPUS.

This involves RPS, and fragmentation.

배석진 is seeing packet drops when a SYN_RECV request socket is
moved into ESTABLISH state. Other states are protected by socket lock.

This is caused by a CPU losing the race, and simply not caring enough.

Since this seems to occur frequently, we can do better and perform
a second lookup.

Note that all needed memory barriers are already in the existing code,
thanks to the spin_lock()/spin_unlock() pair in inet_ehash_insert()
and reqsk_put(). The second lookup must find the new socket,
unless it has already been accepted and closed by another cpu.

Note that the fragmentation could be avoided in the first place by
use of a correct TCP MSS option in the SYN{ACK} packet, but this
does not mean we can not be more robust.

Many thanks to 배석진 for a very detailed analysis.

Reported-by: 배석진 <soukjin.bae@samsung.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  3 ++-
 net/ipv4/tcp_input.c     |  4 +++-
 net/ipv4/tcp_ipv4.c      | 13 ++++++++++++-
 net/ipv4/tcp_minisocks.c |  3 ++-
 net/ipv6/tcp_ipv6.c      | 13 ++++++++++++-
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e3fc667f9ac2..92b06c6e7732 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -374,7 +374,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
 					      struct sk_buff *skb,
 					      const struct tcphdr *th);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
-			   struct request_sock *req, bool fastopen);
+			   struct request_sock *req, bool fastopen,
+			   bool *lost_race);
 int tcp_child_process(struct sock *parent, struct sock *child,
 		      struct sk_buff *skb);
 void tcp_enter_loss(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575d3c1fb6e8..a6b48f6253e3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5870,10 +5870,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	tp->rx_opt.saw_tstamp = 0;
 	req = tp->fastopen_rsk;
 	if (req) {
+		bool req_stolen;
+
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 		    sk->sk_state != TCP_FIN_WAIT1);
 
-		if (!tcp_check_req(sk, skb, req, true))
+		if (!tcp_check_req(sk, skb, req, true, &req_stolen))
 			goto discard;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac16795486ea..f3e52bc98980 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,6 +1672,7 @@ process:
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen = false;
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
@@ -1694,10 +1695,20 @@ process:
 			th = (const struct tcphdr *)skb->data;
 			iph = ip_hdr(skb);
 			tcp_v4_fill_cb(skb, iph, th);
-			nsk = tcp_check_req(sk, skb, req, false);
+			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
 		}
 		if (!nsk) {
 			reqsk_put(req);
+			if (req_stolen) {
+				/* Another cpu got exclusive access to req
+				 * and created a full blown socket.
+				 * Try to feed this packet to this socket
+				 * instead of discarding it.
+				 */
+				tcp_v4_restore_cb(skb);
+				sock_put(sk);
+				goto lookup;
+			}
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..e7e36433cdb5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req,
-			   bool fastopen)
+			   bool fastopen, bool *req_stolen)
 {
 	struct tcp_options_received tmp_opt;
 	struct sock *child;
@@ -785,6 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	sock_rps_save_rxhash(child, skb);
 	tcp_synack_rtt_meas(child, req);
+	*req_stolen = !own_req;
 	return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 412139f4eccd..883df0ad5bfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1451,6 +1451,7 @@ process:
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
 		struct request_sock *req = inet_reqsk(sk);
+		bool req_stolen = false;
 		struct sock *nsk;
 
 		sk = req->rsk_listener;
@@ -1470,10 +1471,20 @@ process:
 			th = (const struct tcphdr *)skb->data;
 			hdr = ipv6_hdr(skb);
 			tcp_v6_fill_cb(skb, hdr, th);
-			nsk = tcp_check_req(sk, skb, req, false);
+			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
 		}
 		if (!nsk) {
 			reqsk_put(req);
+			if (req_stolen) {
+				/* Another cpu got exclusive access to req
+				 * and created a full blown socket.
+				 * Try to feed this packet to this socket
+				 * instead of discarding it.
+				 */
+				tcp_v6_restore_cb(skb);
+				sock_put(sk);
+				goto lookup;
+			}
 			goto discard_and_relse;
 		}
 		if (nsk == sk) {

From 052fddfb3c4e5f3d413d3f6b8dffe1b7192026af Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 14 Feb 2018 01:07:42 +0100
Subject: [PATCH 0118/1678] net: ptp: Add stub for ptp_classify_raw()

When NET_PTP_CLASSIFY is disabled, a stub function is required in
order that the drivers compile.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptp_classify.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index a079656b614c..059242030631 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -75,5 +75,9 @@ void __init ptp_classifier_init(void);
 static inline void ptp_classifier_init(void)
 {
 }
+static inline unsigned int ptp_classify_raw(struct sk_buff *skb)
+{
+	return PTP_CLASS_NONE;
+}
 #endif
 #endif /* _PTP_CLASSIFY_H_ */

From b000be95e5b903c686349dff5d1bf8e2dcf76aef Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:43 +0100
Subject: [PATCH 0119/1678] net: dsa: mv88e6xxx: export g2 register accessors

Let the mv88e6xxx_g2_* register accessor functions be accessible
outside of global2.c.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global2.c |  8 ++++----
 drivers/net/dsa/mv88e6xxx/global2.h | 25 +++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index af0727877825..3e7102e9fc3a 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -20,22 +20,22 @@
 #include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */
 #include "global2.h"
 
-static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
 	return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
 	return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
-static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
 	return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
-static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
 	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
 }
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 669f59017b12..bfa25223abee 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -230,6 +230,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
+int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
+int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
+int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update);
+int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+
 int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port);
 
@@ -279,6 +284,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
+static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip,
 					    int port)
 {

From 0d632c3d6fe3b909fa481ddd51b5cb76ad18f921 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:44 +0100
Subject: [PATCH 0120/1678] net: dsa: mv88e6xxx: add accessors for PTP/TAI
 registers

This patch implements support for accessing the Precision Time Protocol
and Time Application Interface registers via the AVB register interface
in the Global 2 register.

The register interface differs slightly between different models; older
models use a 3-bit operations field, while newer models use a 2-bit
field. The operations values and the special "global port" values are
different between the two. This is a similar split to the differences
in the "Ingress Rate" register between models, so, like in that case,
we call the two variants "6352" and "6390" and create an ops structure
to abstract between the two.

checkpatch fixups by Andrew Lunn

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/Makefile      |   1 +
 drivers/net/dsa/mv88e6xxx/chip.c        |   9 ++
 drivers/net/dsa/mv88e6xxx/chip.h        |  22 +++
 drivers/net/dsa/mv88e6xxx/global2.h     |  27 +++-
 drivers/net/dsa/mv88e6xxx/global2_avb.c | 193 ++++++++++++++++++++++++
 5 files changed, 251 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/dsa/mv88e6xxx/global2_avb.c

diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 58a4a0014e59..bdbbbf7b872e 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -5,6 +5,7 @@ mv88e6xxx-objs += global1.o
 mv88e6xxx-objs += global1_atu.o
 mv88e6xxx-objs += global1_vtu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
 mv88e6xxx-objs += serdes.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index eb328bade225..413d50e606f2 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2843,6 +2843,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6290_ops = {
@@ -2879,6 +2880,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
@@ -2913,6 +2915,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
@@ -2945,6 +2948,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6341_ops = {
@@ -2981,6 +2985,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
@@ -3049,6 +3054,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
@@ -3086,6 +3092,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.avb_ops = &mv88e6352_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
@@ -3124,6 +3131,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6390x_ops = {
@@ -3162,6 +3170,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.avb_ops = &mv88e6390_avb_ops,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 3dba6e90adcf..5467c86f38a1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -146,6 +146,7 @@ struct mv88e6xxx_vtu_entry {
 
 struct mv88e6xxx_bus_ops;
 struct mv88e6xxx_irq_ops;
+struct mv88e6xxx_avb_ops;
 
 struct mv88e6xxx_irq {
 	u16 masked;
@@ -344,6 +345,9 @@ struct mv88e6xxx_ops {
 			   struct mv88e6xxx_vtu_entry *entry);
 	int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry);
+
+	/* Interface to the AVB/PTP registers */
+	const struct mv88e6xxx_avb_ops *avb_ops;
 };
 
 struct mv88e6xxx_irq_ops {
@@ -355,6 +359,24 @@ struct mv88e6xxx_irq_ops {
 	void (*irq_free)(struct mv88e6xxx_chip *chip);
 };
 
+struct mv88e6xxx_avb_ops {
+	/* Access port-scoped Precision Time Protocol registers */
+	int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
+			     u16 *data, int len);
+	int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr,
+			      u16 data);
+
+	/* Access global Precision Time Protocol registers */
+	int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+			int len);
+	int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+
+	/* Access global Time Application Interface registers */
+	int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data,
+			int len);
+	int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data);
+};
+
 #define STATS_TYPE_PORT		BIT(0)
 #define STATS_TYPE_BANK0	BIT(1)
 #define STATS_TYPE_BANK1	BIT(2)
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index bfa25223abee..e1d4850974dc 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -149,7 +149,26 @@
 #define MV88E6390_G2_EEPROM_ADDR_MASK	0xffff
 
 /* Offset 0x16: AVB Command Register */
-#define MV88E6352_G2_AVB_CMD		0x16
+#define MV88E6352_G2_AVB_CMD			0x16
+#define MV88E6352_G2_AVB_CMD_BUSY		0x8000
+#define MV88E6352_G2_AVB_CMD_OP_READ		0x4000
+#define MV88E6352_G2_AVB_CMD_OP_READ_INCR	0x6000
+#define MV88E6352_G2_AVB_CMD_OP_WRITE		0x3000
+#define MV88E6390_G2_AVB_CMD_OP_READ		0x0000
+#define MV88E6390_G2_AVB_CMD_OP_READ_INCR	0x4000
+#define MV88E6390_G2_AVB_CMD_OP_WRITE		0x6000
+#define MV88E6352_G2_AVB_CMD_PORT_MASK		0x0f00
+#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL	0xe
+#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL	0xf
+#define MV88E6390_G2_AVB_CMD_PORT_MASK		0x1f00
+#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL	0x1e
+#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL	0x1f
+#define MV88E6352_G2_AVB_CMD_BLOCK_PTP		0
+#define MV88E6352_G2_AVB_CMD_BLOCK_AVB		1
+#define MV88E6352_G2_AVB_CMD_BLOCK_QAV		2
+#define MV88E6352_G2_AVB_CMD_BLOCK_QVB		3
+#define MV88E6352_G2_AVB_CMD_BLOCK_MASK		0x00e0
+#define MV88E6352_G2_AVB_CMD_ADDR_MASK		0x001f
 
 /* Offset 0x17: AVB Data Register */
 #define MV88E6352_G2_AVB_DATA		0x17
@@ -272,6 +291,9 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
+extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
+extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -407,6 +429,9 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {};
 static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
 
+static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
+static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c
new file mode 100644
index 000000000000..2e398ccb88ca
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c
@@ -0,0 +1,193 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016-2017 Savoir-faire Linux Inc.
+ *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * Copyright (c) 2017 National Instruments
+ *	Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "global2.h"
+
+/* Offset 0x16: AVB Command Register
+ * Offset 0x17: AVB Data Register
+ *
+ * There are two different versions of this register interface:
+ *    "6352": 3-bit "op" field, 4-bit "port" field.
+ *    "6390": 2-bit "op" field, 5-bit "port" field.
+ *
+ * The "op" codes are different between the two, as well as the special
+ * port fields for global PTP and TAI configuration.
+ */
+
+/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words.
+ * The hardware supports snapshotting up to four contiguous registers.
+ */
+static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop,
+				 u16 *data, int len)
+{
+	int err;
+	int i;
+
+	/* Hardware can only snapshot four words. */
+	if (len > 4)
+		return -E2BIG;
+
+	err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop);
+	if (err)
+		return err;
+
+	for (i = 0; i < len; ++i) {
+		err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA,
+					&data[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */
+static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop,
+				  u16 data)
+{
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop);
+}
+
+static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+					  int port, int addr, u16 *data,
+					  int len)
+{
+	u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ :
+				 MV88E6352_G2_AVB_CMD_OP_READ_INCR) |
+		     (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+		     addr;
+
+	return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+					   int port, int addr, u16 data)
+{
+	u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) |
+		      (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+	return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6352_g2_avb_port_ptp_read(chip,
+					MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6352_g2_avb_port_ptp_write(chip,
+					MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data);
+}
+
+static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6352_g2_avb_port_ptp_read(chip,
+					MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6352_g2_avb_port_ptp_write(chip,
+					MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {
+	.port_ptp_read		= mv88e6352_g2_avb_port_ptp_read,
+	.port_ptp_write		= mv88e6352_g2_avb_port_ptp_write,
+	.ptp_read		= mv88e6352_g2_avb_ptp_read,
+	.ptp_write		= mv88e6352_g2_avb_ptp_write,
+	.tai_read		= mv88e6352_g2_avb_tai_read,
+	.tai_write		= mv88e6352_g2_avb_tai_write,
+};
+
+static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip,
+					  int port, int addr, u16 *data,
+					  int len)
+{
+	u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ :
+				 MV88E6390_G2_AVB_CMD_OP_READ_INCR) |
+		     (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) |
+		     addr;
+
+	return mv88e6xxx_g2_avb_read(chip, readop, data, len);
+}
+
+static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip,
+					   int port, int addr, u16 data)
+{
+	u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) |
+		      (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr;
+
+	return mv88e6xxx_g2_avb_write(chip, writeop, data);
+}
+
+static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6390_g2_avb_port_ptp_read(chip,
+					MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6390_g2_avb_port_ptp_write(chip,
+					MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL,
+					addr, data);
+}
+
+static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr,
+				     u16 *data, int len)
+{
+	return mv88e6390_g2_avb_port_ptp_read(chip,
+					MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data, len);
+}
+
+static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr,
+				      u16 data)
+{
+	return mv88e6390_g2_avb_port_ptp_write(chip,
+					MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL,
+					addr, data);
+}
+
+const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {
+	.port_ptp_read		= mv88e6390_g2_avb_port_ptp_read,
+	.port_ptp_write		= mv88e6390_g2_avb_port_ptp_write,
+	.ptp_read		= mv88e6390_g2_avb_ptp_read,
+	.ptp_write		= mv88e6390_g2_avb_ptp_write,
+	.tai_read		= mv88e6390_g2_avb_tai_read,
+	.tai_write		= mv88e6390_g2_avb_tai_write,
+};

From 2fa8d3af4badc40a39092140a01101119988faf6 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:45 +0100
Subject: [PATCH 0121/1678] net: dsa: mv88e6xxx: expose switch time as a PTP
 hardware clock

This patch adds basic support for exposing the 32-bit timestamp counter
inside the mv88e6xxx switch as a ptp_clock.

Adjfine implemented by Richard Cochran.
Andrew Lunn: fix return value of PTP stub function.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/Kconfig  |  10 ++
 drivers/net/dsa/mv88e6xxx/Makefile |   1 +
 drivers/net/dsa/mv88e6xxx/chip.c   |  20 +++
 drivers/net/dsa/mv88e6xxx/chip.h   |  15 +++
 drivers/net/dsa/mv88e6xxx/ptp.c    | 197 +++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/ptp.h    |  83 ++++++++++++
 6 files changed, 326 insertions(+)
 create mode 100644 drivers/net/dsa/mv88e6xxx/ptp.c
 create mode 100644 drivers/net/dsa/mv88e6xxx/ptp.h

diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 1aaa7a95ebc4..ae9e7f7cb31c 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2
 
 	  It is required on most chips. If the chip you compile the support for
 	  doesn't have such registers set, say N here. In doubt, say Y.
+
+config NET_DSA_MV88E6XXX_PTP
+	bool "PTP support for Marvell 88E6xxx"
+	default n
+	depends on NET_DSA_MV88E6XXX_GLOBAL2
+	imply NETWORK_PHY_TIMESTAMPING
+	imply PTP_1588_CLOCK
+	help
+	  Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
+	  chips that support it.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index bdbbbf7b872e..40a423c9a982 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -8,4 +8,5 @@ mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
 mv88e6xxx-objs += serdes.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 413d50e606f2..a795efd41511 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -38,6 +38,7 @@
 #include "global2.h"
 #include "phy.h"
 #include "port.h"
+#include "ptp.h"
 #include "serdes.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
@@ -2092,6 +2093,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
+	/* Setup PTP Hardware Clock */
+	if (chip->info->ptp_support) {
+		err = mv88e6xxx_ptp_setup(chip);
+		if (err)
+			goto unlock;
+	}
+
 unlock:
 	mutex_unlock(&chip->reg_lock);
 
@@ -3484,6 +3492,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6191_ops,
 	},
 
@@ -3504,6 +3513,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6240_ops,
 	},
 
@@ -3524,6 +3534,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6290_ops,
 	},
 
@@ -3543,6 +3554,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6320_ops,
 	},
 
@@ -3561,6 +3573,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6321_ops,
 	},
 
@@ -3580,6 +3593,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6341_ops,
 	},
 
@@ -3640,6 +3654,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.ptp_support = true,
 		.ops = &mv88e6352_ops,
 	},
 	[MV88E6390] = {
@@ -3659,6 +3674,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6390_ops,
 	},
 	[MV88E6390X] = {
@@ -3678,6 +3694,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.pvt = true,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
 		.ops = &mv88e6390x_ops,
 	},
 };
@@ -4031,6 +4048,9 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
 	struct mv88e6xxx_chip *chip = ds->priv;
 
+	if (chip->info->ptp_support)
+		mv88e6xxx_ptp_free(chip);
+
 	mv88e6xxx_phy_destroy(chip);
 	mv88e6xxx_unregister_switch(chip);
 	mv88e6xxx_mdios_unregister(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 5467c86f38a1..92e5fd6e2318 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -16,6 +16,8 @@
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
 #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 #include <net/dsa.h>
 
 #ifndef UINT64_MAX
@@ -126,6 +128,9 @@ struct mv88e6xxx_info {
 	 */
 	u8 atu_move_port_mask;
 	const struct mv88e6xxx_ops *ops;
+
+	/* Supports PTP */
+	bool ptp_support;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -210,6 +215,16 @@ struct mv88e6xxx_chip {
 	int watchdog_irq;
 	int atu_prob_irq;
 	int vtu_prob_irq;
+
+	/* This cyclecounter abstracts the switch PTP time.
+	 * reg_lock must be held for any operation that read()s.
+	 */
+	struct cyclecounter	tstamp_cc;
+	struct timecounter	tstamp_tc;
+	struct delayed_work	overflow_work;
+
+	struct ptp_clock	*ptp_clock;
+	struct ptp_clock_info	ptp_clock_info;
 };
 
 struct mv88e6xxx_bus_ops {
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
new file mode 100644
index 000000000000..6bc643cea03d
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -0,0 +1,197 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "ptp.h"
+
+/* Raw timestamps are in units of 8-ns clock periods. */
+#define CC_SHIFT	28
+#define CC_MULT		(8 << CC_SHIFT)
+#define CC_MULT_NUM	(1 << 9)
+#define CC_MULT_DEM	15625ULL
+
+#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
+
+#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
+#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \
+				      ptp_clock_info)
+#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+					     overflow_work)
+
+static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
+			      u16 *data, int len)
+{
+	if (!chip->info->ops->avb_ops->tai_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
+}
+
+static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
+{
+	struct mv88e6xxx_chip *chip = cc_to_chip(cc);
+	u16 phc_time[2];
+	int err;
+
+	err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time,
+				 ARRAY_SIZE(phc_time));
+	if (err)
+		return 0;
+	else
+		return ((u32)phc_time[1] << 16) | phc_time[0];
+}
+
+static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	int neg_adj = 0;
+	u32 diff, mult;
+	u64 adj;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+	mult = CC_MULT;
+	adj = CC_MULT_NUM;
+	adj *= scaled_ppm;
+	diff = div_u64(adj, CC_MULT_DEM);
+
+	mutex_lock(&chip->reg_lock);
+
+	timecounter_read(&chip->tstamp_tc);
+	chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff;
+
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+	mutex_lock(&chip->reg_lock);
+	timecounter_adjtime(&chip->tstamp_tc, delta);
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp,
+				 struct timespec64 *ts)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	u64 ns;
+
+	mutex_lock(&chip->reg_lock);
+	ns = timecounter_read(&chip->tstamp_tc);
+	mutex_unlock(&chip->reg_lock);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	u64 ns;
+
+	ns = timespec64_to_ns(ts);
+
+	mutex_lock(&chip->reg_lock);
+	timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns);
+	mutex_unlock(&chip->reg_lock);
+
+	return 0;
+}
+
+static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
+				struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+				enum ptp_pin_function func, unsigned int chan)
+{
+	return -EOPNOTSUPP;
+}
+
+/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
+ * seconds; this task forces periodic reads so that we don't miss any.
+ */
+#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16)
+static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw);
+	struct timespec64 ts;
+
+	mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts);
+
+	schedule_delayed_work(&chip->overflow_work,
+			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
+}
+
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+	/* Set up the cycle counter */
+	memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
+	chip->tstamp_cc.read	= mv88e6xxx_ptp_clock_read;
+	chip->tstamp_cc.mask	= CYCLECOUNTER_MASK(32);
+	chip->tstamp_cc.mult	= CC_MULT;
+	chip->tstamp_cc.shift	= CC_SHIFT;
+
+	timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc,
+			 ktime_to_ns(ktime_get_real()));
+
+	INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
+
+	chip->ptp_clock_info.owner = THIS_MODULE;
+	snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
+		 dev_name(chip->dev));
+	chip->ptp_clock_info.max_adj	= 1000000;
+
+	chip->ptp_clock_info.adjfine	= mv88e6xxx_ptp_adjfine;
+	chip->ptp_clock_info.adjtime	= mv88e6xxx_ptp_adjtime;
+	chip->ptp_clock_info.gettime64	= mv88e6xxx_ptp_gettime;
+	chip->ptp_clock_info.settime64	= mv88e6xxx_ptp_settime;
+	chip->ptp_clock_info.enable	= mv88e6xxx_ptp_enable;
+	chip->ptp_clock_info.verify	= mv88e6xxx_ptp_verify;
+
+	chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
+	if (IS_ERR(chip->ptp_clock))
+		return PTR_ERR(chip->ptp_clock);
+
+	schedule_delayed_work(&chip->overflow_work,
+			      MV88E6XXX_TAI_OVERFLOW_PERIOD);
+
+	return 0;
+}
+
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+	if (chip->ptp_clock) {
+		cancel_delayed_work_sync(&chip->overflow_work);
+
+		ptp_clock_unregister(chip->ptp_clock);
+		chip->ptp_clock = NULL;
+	}
+}
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
new file mode 100644
index 000000000000..5713d00800fc
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -0,0 +1,83 @@
+/*
+ * Marvell 88E6xxx Switch PTP support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_PTP_H
+#define _MV88E6XXX_PTP_H
+
+#include "chip.h"
+
+/* Offset 0x00: TAI Global Config */
+#define MV88E6XXX_TAI_CFG			0x00
+
+/* Offset 0x01: Timestamp Clock Period (ps) */
+#define MV88E6XXX_TAI_CLOCK_PERIOD		0x01
+
+/* Offset 0x02/0x03: Trigger Generation Amount */
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO	0x02
+#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI	0x03
+
+/* Offset 0x04: Clock Compensation */
+#define MV88E6XXX_TAI_TRIG_CLOCK_COMP		0x04
+
+/* Offset 0x05: Trigger Configuration */
+#define MV88E6XXX_TAI_TRIG_CFG			0x05
+
+/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */
+#define MV88E6XXX_TAI_IRL_AMOUNT		0x06
+
+/* Offset 0x07: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP			0x07
+
+/* Offset 0x08: Ingress Rate Limiter Compensation */
+#define MV88E6XXX_TAI_IRL_COMP_PS		0x08
+
+/* Offset 0x09: Event Status */
+#define MV88E6XXX_TAI_EVENT_STATUS		0x09
+
+/* Offset 0x0A/0x0B: Event Time */
+#define MV88E6XXX_TAI_EVENT_TIME_LO		0x0a
+#define MV88E6XXX_TAI_EVENT_TYPE_HI		0x0b
+
+/* Offset 0x0E/0x0F: PTP Global Time */
+#define MV88E6XXX_TAI_TIME_LO			0x0e
+#define MV88E6XXX_TAI_TIME_HI			0x0f
+
+/* Offset 0x10/0x11: Trig Generation Time */
+#define MV88E6XXX_TAI_TRIG_TIME_LO		0x10
+#define MV88E6XXX_TAI_TRIG_TIME_HI		0x11
+
+/* Offset 0x12: Lock Status */
+#define MV88E6XXX_TAI_LOCK_STATUS		0x12
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+static void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_PTP_H */

From a73ccd610690505df3a5b282c32f2f4c4e729d49 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:46 +0100
Subject: [PATCH 0122/1678] net: dsa: mv88e6xxx: add support for GPIO
 configuration

MV88E6352 and later switches support GPIO control through the "Scratch
& Misc" global2 register. (Older switches do too, though with a slightly
different register interface. Only the 6352-style is implemented here.)

Add a new file, global2_scratch.c, for operations in the Scratch & Misc
space. Additionally, add a GPIO operations structure to present an
abstract view over GPIO manipulation.

Reverse Christmas tree and unsigned has been replaced with unsigned
int by Andrew Lunn.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/Makefile          |   1 +
 drivers/net/dsa/mv88e6xxx/chip.c            |  26 +++
 drivers/net/dsa/mv88e6xxx/chip.h            |  33 +++
 drivers/net/dsa/mv88e6xxx/global2.c         |   1 +
 drivers/net/dsa/mv88e6xxx/global2.h         |  33 +++
 drivers/net/dsa/mv88e6xxx/global2_scratch.c | 240 ++++++++++++++++++++
 6 files changed, 334 insertions(+)
 create mode 100644 drivers/net/dsa/mv88e6xxx/global2_scratch.c

diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 40a423c9a982..0cd0e2b1e4a2 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -6,6 +6,7 @@ mv88e6xxx-objs += global1_atu.o
 mv88e6xxx-objs += global1_vtu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index a795efd41511..41e28e28e65b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2480,6 +2480,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
@@ -2610,6 +2611,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
@@ -2681,6 +2683,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
@@ -2744,6 +2747,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6190x_ops = {
@@ -2779,6 +2783,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 };
 
 static const struct mv88e6xxx_ops mv88e6191_ops = {
@@ -2851,6 +2856,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6352_avb_ops,
 };
 
@@ -2888,6 +2894,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6390_avb_ops,
 };
 
@@ -2923,6 +2930,7 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6352_avb_ops,
 };
 
@@ -2956,6 +2964,7 @@ static const struct mv88e6xxx_ops mv88e6321_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6352_avb_ops,
 };
 
@@ -2993,6 +3002,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6390_avb_ops,
 };
 
@@ -3100,6 +3110,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
 	.serdes_power = mv88e6352_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6352_avb_ops,
 };
 
@@ -3139,6 +3150,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6390_avb_ops,
 };
 
@@ -3178,6 +3190,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
 	.serdes_power = mv88e6390_serdes_power,
+	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6390_avb_ops,
 };
 
@@ -3284,6 +3297,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6341",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3363,6 +3377,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6172",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3403,6 +3418,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6176",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3441,6 +3457,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3461,6 +3478,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3502,6 +3520,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6240",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3523,6 +3542,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6290",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3544,6 +3564,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6320",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3564,6 +3585,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6321",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3583,6 +3605,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6341",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3643,6 +3666,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6352",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3663,6 +3687,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3683,6 +3708,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 92e5fd6e2318..5bfa1aeb3269 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -41,6 +41,8 @@
 #define MV88E6XXX_MAX_PVT_SWITCHES	32
 #define MV88E6XXX_MAX_PVT_PORTS		16
 
+#define MV88E6XXX_MAX_GPIO	16
+
 enum mv88e6xxx_egress_mode {
 	MV88E6XXX_EGRESS_MODE_UNMODIFIED,
 	MV88E6XXX_EGRESS_MODE_UNTAGGED,
@@ -107,6 +109,7 @@ struct mv88e6xxx_info {
 	const char *name;
 	unsigned int num_databases;
 	unsigned int num_ports;
+	unsigned int num_gpio;
 	unsigned int max_vid;
 	unsigned int port_base_addr;
 	unsigned int global1_addr;
@@ -151,6 +154,7 @@ struct mv88e6xxx_vtu_entry {
 
 struct mv88e6xxx_bus_ops;
 struct mv88e6xxx_irq_ops;
+struct mv88e6xxx_gpio_ops;
 struct mv88e6xxx_avb_ops;
 
 struct mv88e6xxx_irq {
@@ -216,6 +220,9 @@ struct mv88e6xxx_chip {
 	int atu_prob_irq;
 	int vtu_prob_irq;
 
+	/* GPIO resources */
+	u8 gpio_data[2];
+
 	/* This cyclecounter abstracts the switch PTP time.
 	 * reg_lock must be held for any operation that read()s.
 	 */
@@ -361,6 +368,9 @@ struct mv88e6xxx_ops {
 	int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry);
 
+	/* GPIO operations */
+	const struct mv88e6xxx_gpio_ops *gpio_ops;
+
 	/* Interface to the AVB/PTP registers */
 	const struct mv88e6xxx_avb_ops *avb_ops;
 };
@@ -374,6 +384,24 @@ struct mv88e6xxx_irq_ops {
 	void (*irq_free)(struct mv88e6xxx_chip *chip);
 };
 
+struct mv88e6xxx_gpio_ops {
+	/* Get/set data on GPIO pin */
+	int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin);
+	int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int value);
+
+	/* get/set GPIO direction */
+	int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin);
+	int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin,
+		       bool input);
+
+	/* get/set GPIO pin control */
+	int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int *func);
+	int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin,
+			int func);
+};
+
 struct mv88e6xxx_avb_ops {
 	/* Access port-scoped Precision Time Protocol registers */
 	int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr,
@@ -423,6 +451,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip)
 	return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
 }
 
+static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->num_gpio;
+}
+
 int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 3e7102e9fc3a..5f370f1fc7c4 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus,
 						   val);
 }
 
+/* Offset 0x1B: Watchdog Control */
 static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
 {
 	u16 reg;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index e1d4850974dc..25f92b3d7157 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -242,6 +242,35 @@
 #define MV88E6352_G2_NOEGR_POLICY	0x2000
 #define MV88E6390_G2_LAG_ID_4		0x2000
 
+/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */
+/* Offset 0x02: Misc Configuration */
+#define MV88E6352_G2_SCRATCH_MISC_CFG		0x02
+#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI	0x80
+/* Offset 0x60-0x61: GPIO Configuration */
+#define MV88E6352_G2_SCRATCH_GPIO_CFG0		0x60
+#define MV88E6352_G2_SCRATCH_GPIO_CFG1		0x61
+/* Offset 0x62-0x63: GPIO Direction */
+#define MV88E6352_G2_SCRATCH_GPIO_DIR0		0x62
+#define MV88E6352_G2_SCRATCH_GPIO_DIR1		0x63
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT	0
+#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN	1
+/* Offset 0x64-0x65: GPIO Data */
+#define MV88E6352_G2_SCRATCH_GPIO_DATA0		0x64
+#define MV88E6352_G2_SCRATCH_GPIO_DATA1		0x65
+/* Offset 0x68-0x6F: GPIO Pin Control */
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL0		0x68
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL1		0x69
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL2		0x6A
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL3		0x6B
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL4		0x6C
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL5		0x6D
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL6		0x6E
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL7		0x6F
+
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO	0
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG	1
+#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ	2
+
 #ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -294,6 +323,8 @@ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;
 extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
 
+extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -432,6 +463,8 @@ static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {};
 static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {};
 static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
 
+static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
new file mode 100644
index 000000000000..0ff12bff9f0e
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -0,0 +1,240 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+
+/* Offset 0x1A: Scratch and Misc. Register */
+static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg,
+				     u8 *data)
+{
+	u16 value;
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC,
+				 reg << 8);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value);
+	if (err)
+		return err;
+
+	*data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK);
+
+	return 0;
+}
+
+static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
+				      u8 data)
+{
+	u16 value = (reg << 8) | data;
+
+	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value);
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: is bit set?
+ */
+static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
+					int base_reg, unsigned int offset,
+					int *set)
+{
+	int reg = base_reg + (offset / 8);
+	u8 mask = (1 << (offset & 0x7));
+	u8 val;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	*set = !!(mask & val);
+
+	return 0;
+}
+
+/**
+ * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * @chip: chip private data
+ * @nr: bit index
+ * @set: set if true, clear if false
+ *
+ * Helper function for dealing with the direction and data registers.
+ */
+static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip,
+					int base_reg, unsigned int offset,
+					int set)
+{
+	int reg = base_reg + (offset / 8);
+	u8 mask = (1 << (offset & 0x7));
+	u8 val;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	if (set)
+		val |= mask;
+	else
+		val &= ~mask;
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for low, 1 for high, negative error
+ */
+static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip,
+					      unsigned int pin)
+{
+	int val = 0;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_get_bit(chip,
+					   MV88E6352_G2_SCRATCH_GPIO_DATA0,
+					   pin, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ * @value: value to set
+ */
+static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int value)
+{
+	u8 mask = (1 << (pin & 0x7));
+	int offset = (pin / 8);
+	int reg;
+
+	reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset;
+
+	if (value)
+		chip->gpio_data[offset] |= mask;
+	else
+		chip->gpio_data[offset] &= ~mask;
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ *
+ * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX).
+ */
+static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip,
+					     unsigned int pin)
+{
+	int val = 0;
+	int err;
+
+	err = mv88e6xxx_g2_scratch_get_bit(chip,
+					   MV88E6352_G2_SCRATCH_GPIO_DIR0,
+					   pin, &val);
+	if (err)
+		return err;
+
+	return val;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin
+ * @chip: chip private data
+ * @pin: gpio index
+ */
+static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip,
+					     unsigned int pin, bool input)
+{
+	int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN :
+			     MV88E6352_G2_SCRATCH_GPIO_DIR_OUT);
+
+	return mv88e6xxx_g2_scratch_set_bit(chip,
+					    MV88E6352_G2_SCRATCH_GPIO_DIR0,
+					    pin, value);
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ *
+ * Note that the function numbers themselves may vary by chipset.
+ */
+static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int *func)
+{
+	int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+	int offset = (pin & 0x1) ? 4 : 0;
+	u8 mask = (0x7 << offset);
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	*func = (val & mask) >> offset;
+
+	return 0;
+}
+
+/**
+ * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting
+ * @chip: chip private data
+ * @pin: gpio index
+ * @func: function number
+ */
+static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip,
+					      unsigned int pin, int func)
+{
+	int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2);
+	int offset = (pin & 0x1) ? 4 : 0;
+	u8 mask = (0x7 << offset);
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, reg, &val);
+	if (err)
+		return err;
+
+	val = (val & ~mask) | ((func & mask) << offset);
+
+	return mv88e6xxx_g2_scratch_write(chip, reg, val);
+}
+
+const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
+	.get_data = mv88e6352_g2_scratch_gpio_get_data,
+	.set_data = mv88e6352_g2_scratch_gpio_set_data,
+	.get_dir = mv88e6352_g2_scratch_gpio_get_dir,
+	.set_dir = mv88e6352_g2_scratch_gpio_set_dir,
+	.get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
+	.set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
+};

From 4eb3be2937670aecf8bbd52a95b6e9238a9bd7aa Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:47 +0100
Subject: [PATCH 0123/1678] net: dsa: mv88e6xxx: add support for event capture

This patch adds support for configuring mv88e6xxx GPIO lines as PTP
pins, so that they may be used for time stamping external events or for
periodic output.

Checkpatch and reverse Christmas tree fixes by Andrew Lunn

Periodic output removed by Richard Cochran, until a better abstraction
of a VCO is added to Linux in general.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.h |   4 +
 drivers/net/dsa/mv88e6xxx/ptp.c  | 189 ++++++++++++++++++++++++++++++-
 drivers/net/dsa/mv88e6xxx/ptp.h  |  16 +++
 3 files changed, 207 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 5bfa1aeb3269..eaf254a0e674 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -232,6 +232,10 @@ struct mv88e6xxx_chip {
 
 	struct ptp_clock	*ptp_clock;
 	struct ptp_clock_info	ptp_clock_info;
+	struct delayed_work	tai_event_work;
+	struct ptp_pin_desc	pin_config[MV88E6XXX_MAX_GPIO];
+	u16 trig_config;
+	u16 evcap_config;
 };
 
 struct mv88e6xxx_bus_ops {
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index 6bc643cea03d..92885e715e85 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -31,6 +31,8 @@
 				      ptp_clock_info)
 #define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
 					     overflow_work)
+#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
+					      tai_event_work)
 
 static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
 			      u16 *data, int len)
@@ -41,6 +43,30 @@ static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr,
 	return chip->info->ops->avb_ops->tai_read(chip, addr, data, len);
 }
 
+static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data)
+{
+	if (!chip->info->ops->avb_ops->tai_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->tai_write(chip, addr, data);
+}
+
+/* TODO: places where this are called should be using pinctrl */
+static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin,
+				   int func, int input)
+{
+	int err;
+
+	if (!chip->info->ops->gpio_ops)
+		return -EOPNOTSUPP;
+
+	err = chip->info->ops->gpio_ops->set_dir(chip, pin, input);
+	if (err)
+		return err;
+
+	return chip->info->ops->gpio_ops->set_pctl(chip, pin, func);
+}
+
 static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
 {
 	struct mv88e6xxx_chip *chip = cc_to_chip(cc);
@@ -55,6 +81,92 @@ static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc)
 		return ((u32)phc_time[1] << 16) | phc_time[0];
 }
 
+/* mv88e6xxx_config_eventcap - configure TAI event capture
+ * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external)
+ * @rising: zero for falling-edge trigger, else rising-edge trigger
+ *
+ * This will also reset the capture sequence counter.
+ */
+static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event,
+				     int rising)
+{
+	u16 global_config;
+	u16 cap_config;
+	int err;
+
+	chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE |
+			     MV88E6XXX_TAI_CFG_CAP_CTR_START;
+	if (!rising)
+		chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING;
+
+	global_config = (chip->evcap_config | chip->trig_config);
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config);
+	if (err)
+		return err;
+
+	if (event == PTP_CLOCK_PPS) {
+		cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG;
+	} else if (event == PTP_CLOCK_EXTTS) {
+		/* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */
+		cap_config = 0;
+	} else {
+		return -EINVAL;
+	}
+
+	/* Write the capture config; this also clears the capture counter */
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS,
+				  cap_config);
+
+	return err;
+}
+
+static void mv88e6xxx_tai_event_work(struct work_struct *ugly)
+{
+	struct delayed_work *dw = to_delayed_work(ugly);
+	struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw);
+	struct ptp_clock_event ev;
+	u16 status[4];
+	u32 raw_ts;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS,
+				 status, ARRAY_SIZE(status));
+	mutex_unlock(&chip->reg_lock);
+
+	if (err) {
+		dev_err(chip->dev, "failed to read TAI status register\n");
+		return;
+	}
+	if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) {
+		dev_warn(chip->dev, "missed event capture\n");
+		return;
+	}
+	if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID))
+		goto out;
+
+	raw_ts = ((u32)status[2] << 16) | status[1];
+
+	/* Clear the valid bit so the next timestamp can come in */
+	status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID;
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]);
+	mutex_unlock(&chip->reg_lock);
+
+	/* This is an external timestamp */
+	ev.type = PTP_CLOCK_EXTTS;
+
+	/* We only have one timestamping channel. */
+	ev.index = 0;
+	mutex_lock(&chip->reg_lock);
+	ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts);
+	mutex_unlock(&chip->reg_lock);
+
+	ptp_clock_event(chip->ptp_clock, &ev);
+out:
+	schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL);
+}
+
 static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
 	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
@@ -122,16 +234,71 @@ static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp,
 	return 0;
 }
 
+static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip,
+				      struct ptp_clock_request *rq, int on)
+{
+	int rising = (rq->extts.flags & PTP_RISING_EDGE);
+	int func;
+	int pin;
+	int err;
+
+	pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
+
+	if (pin < 0)
+		return -EBUSY;
+
+	mutex_lock(&chip->reg_lock);
+
+	if (on) {
+		func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ;
+
+		err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+		if (err)
+			goto out;
+
+		schedule_delayed_work(&chip->tai_event_work,
+				      TAI_EVENT_WORK_INTERVAL);
+
+		err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising);
+	} else {
+		func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO;
+
+		err = mv88e6xxx_set_gpio_func(chip, pin, func, true);
+
+		cancel_delayed_work_sync(&chip->tai_event_work);
+	}
+
+out:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
 static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp,
 				struct ptp_clock_request *rq, int on)
 {
-	return -EOPNOTSUPP;
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return mv88e6xxx_ptp_enable_extts(chip, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
 				enum ptp_pin_function func, unsigned int chan)
 {
-	return -EOPNOTSUPP;
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+		break;
+	case PTP_PF_PEROUT:
+	case PTP_PF_PHYSYNC:
+		return -EOPNOTSUPP;
+	}
+	return 0;
 }
 
 /* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3
@@ -152,6 +319,8 @@ static void mv88e6xxx_ptp_overflow_check(struct work_struct *work)
 
 int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
 {
+	int i;
+
 	/* Set up the cycle counter */
 	memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc));
 	chip->tstamp_cc.read	= mv88e6xxx_ptp_clock_read;
@@ -163,12 +332,27 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
 			 ktime_to_ns(ktime_get_real()));
 
 	INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check);
+	INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work);
 
 	chip->ptp_clock_info.owner = THIS_MODULE;
 	snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name),
 		 dev_name(chip->dev));
 	chip->ptp_clock_info.max_adj	= 1000000;
 
+	chip->ptp_clock_info.n_ext_ts	= 1;
+	chip->ptp_clock_info.n_per_out	= 0;
+	chip->ptp_clock_info.n_pins	= mv88e6xxx_num_gpio(chip);
+	chip->ptp_clock_info.pps	= 0;
+
+	for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) {
+		struct ptp_pin_desc *ppd = &chip->pin_config[i];
+
+		snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i);
+		ppd->index = i;
+		ppd->func = PTP_PF_NONE;
+	}
+	chip->ptp_clock_info.pin_config = chip->pin_config;
+
 	chip->ptp_clock_info.adjfine	= mv88e6xxx_ptp_adjfine;
 	chip->ptp_clock_info.adjtime	= mv88e6xxx_ptp_adjtime;
 	chip->ptp_clock_info.gettime64	= mv88e6xxx_ptp_gettime;
@@ -190,6 +374,7 @@ void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
 {
 	if (chip->ptp_clock) {
 		cancel_delayed_work_sync(&chip->overflow_work);
+		cancel_delayed_work_sync(&chip->tai_event_work);
 
 		ptp_clock_unregister(chip->ptp_clock);
 		chip->ptp_clock = NULL;
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
index 5713d00800fc..21c93d278b09 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.h
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -21,6 +21,18 @@
 
 /* Offset 0x00: TAI Global Config */
 #define MV88E6XXX_TAI_CFG			0x00
+#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE		0x8000
+#define MV88E6XXX_TAI_CFG_CAP_CTR_START		0x4000
+#define MV88E6XXX_TAI_CFG_EVREQ_FALLING		0x2000
+#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO	0x1000
+#define MV88E6XXX_TAI_CFG_IRL_ENABLE		0x0400
+#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN		0x0200
+#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN		0x0100
+#define MV88E6XXX_TAI_CFG_TRIG_LOCK		0x0080
+#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE		0x0008
+#define MV88E6XXX_TAI_CFG_MULTI_PTP		0x0004
+#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT	0x0002
+#define MV88E6XXX_TAI_CFG_TRIG_ENABLE		0x0001
 
 /* Offset 0x01: Timestamp Clock Period (ps) */
 #define MV88E6XXX_TAI_CLOCK_PERIOD		0x01
@@ -46,6 +58,10 @@
 
 /* Offset 0x09: Event Status */
 #define MV88E6XXX_TAI_EVENT_STATUS		0x09
+#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG	0x4000
+#define MV88E6XXX_TAI_EVENT_STATUS_ERROR	0x0200
+#define MV88E6XXX_TAI_EVENT_STATUS_VALID	0x0100
+#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK	0x00ff
 
 /* Offset 0x0A/0x0B: Event Time */
 #define MV88E6XXX_TAI_EVENT_TIME_LO		0x0a

From 0336369d3a4d65c9332476b618ff3bb9b41045e1 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:48 +0100
Subject: [PATCH 0124/1678] net: dsa: forward hardware timestamping ioctls to
 switch driver

This patch adds support to the dsa slave network device so that
switch drivers can implement the SIOC[GS]HWTSTAMP ioctls and the
ethtool timestamp-info interface.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 15 +++++++++++++++
 net/dsa/slave.c   | 29 +++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6cb602dd970c..4c0df83dddaf 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
@@ -367,6 +368,12 @@ struct dsa_switch_ops {
 	int	(*set_wol)(struct dsa_switch *ds, int port,
 			   struct ethtool_wolinfo *w);
 
+	/*
+	 * ethtool timestamp info
+	 */
+	int	(*get_ts_info)(struct dsa_switch *ds, int port,
+			       struct ethtool_ts_info *ts);
+
 	/*
 	 * Suspend and resume
 	 */
@@ -469,6 +476,14 @@ struct dsa_switch_ops {
 					 int port, struct net_device *br);
 	void	(*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
 					  int port, struct net_device *br);
+
+	/*
+	 * PTP functionality
+	 */
+	int	(*port_hwtstamp_get)(struct dsa_switch *ds, int port,
+				     struct ifreq *ifr);
+	int	(*port_hwtstamp_set)(struct dsa_switch *ds, int port,
+				     struct ifreq *ifr);
 };
 
 struct dsa_switch_driver {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f52307296de4..935d93f0d36c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -255,6 +255,22 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->dp->ds;
+	int port = p->dp->index;
+
+	/* Pass through to switch driver if it supports timestamping */
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		if (ds->ops->port_hwtstamp_get)
+			return ds->ops->port_hwtstamp_get(ds, port, ifr);
+		break;
+	case SIOCSHWTSTAMP:
+		if (ds->ops->port_hwtstamp_set)
+			return ds->ops->port_hwtstamp_set(ds, port, ifr);
+		break;
+	}
+
 	if (!dev->phydev)
 		return -ENODEV;
 
@@ -918,6 +934,18 @@ static int dsa_slave_set_rxnfc(struct net_device *dev,
 	return ds->ops->set_rxnfc(ds, dp->index, nfc);
 }
 
+static int dsa_slave_get_ts_info(struct net_device *dev,
+				 struct ethtool_ts_info *ts)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->dp->ds;
+
+	if (!ds->ops->get_ts_info)
+		return -EOPNOTSUPP;
+
+	return ds->ops->get_ts_info(ds, p->dp->index, ts);
+}
+
 static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_drvinfo		= dsa_slave_get_drvinfo,
 	.get_regs_len		= dsa_slave_get_regs_len,
@@ -938,6 +966,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 	.get_rxnfc		= dsa_slave_get_rxnfc,
 	.set_rxnfc		= dsa_slave_set_rxnfc,
+	.get_ts_info		= dsa_slave_get_ts_info,
 };
 
 /* legacy way, bypassing the bridge *****************************************/

From 90af1059c52c0031f3bfd8279c9ede153ca83275 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:49 +0100
Subject: [PATCH 0125/1678] net: dsa: forward timestamping callbacks to switch
 drivers

Forward the rx/tx timestamp machinery from the dsa infrastructure to the
switch driver.

On the rx side, defer delivery of skbs until we have an rx timestamp.
This mimicks the behavior of skb_defer_rx_timestamp.

On the tx side, identify PTP packets, clone them, and pass them to the
underlying switch driver before we transmit. This mimicks the behavior
of skb_tx_timestamp.

Adjusted txstamp API to keep the allocation and freeing of the clone
in the same central function by Richard Cochran

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  5 +++++
 net/dsa/dsa.c     | 36 ++++++++++++++++++++++++++++++++++++
 net/dsa/slave.c   | 30 ++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 4c0df83dddaf..0ad17b63684d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -102,6 +102,7 @@ struct dsa_platform_data {
 };
 
 struct packet_type;
+struct dsa_switch;
 
 struct dsa_device_ops {
 	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -484,6 +485,10 @@ struct dsa_switch_ops {
 				     struct ifreq *ifr);
 	int	(*port_hwtstamp_set)(struct dsa_switch *ds, int port,
 				     struct ifreq *ifr);
+	bool	(*port_txtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *clone, unsigned int type);
+	bool	(*port_rxtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *skb, unsigned int type);
 };
 
 struct dsa_switch_driver {
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6a9d0f50fbee..e63c554e0623 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -23,6 +23,7 @@
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
+#include <linux/ptp_classify.h>
 #include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
@@ -122,6 +123,38 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
+/* Determine if we should defer delivery of skb until we have a rx timestamp.
+ *
+ * Called from dsa_switch_rcv. For now, this will only work if tagging is
+ * enabled on the switch. Normally the MAC driver would retrieve the hardware
+ * timestamp when it reads the packet out of the hardware. However in a DSA
+ * switch, the DSA driver owning the interface to which the packet is
+ * delivered is never notified unless we do so here.
+ */
+static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p,
+				       struct sk_buff *skb)
+{
+	struct dsa_switch *ds = p->dp->ds;
+	unsigned int type;
+
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+
+	__skb_push(skb, ETH_HLEN);
+
+	type = ptp_classify_raw(skb);
+
+	__skb_pull(skb, ETH_HLEN);
+
+	if (type == PTP_CLASS_NONE)
+		return false;
+
+	if (likely(ds->ops->port_rxtstamp))
+		return ds->ops->port_rxtstamp(ds, p->dp->index, skb, type);
+
+	return false;
+}
+
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 			  struct packet_type *pt, struct net_device *unused)
 {
@@ -157,6 +190,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	s->rx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	if (dsa_skb_defer_rx_timestamp(p, skb))
+		return 0;
+
 	netif_receive_skb(skb);
 
 	return 0;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 935d93f0d36c..3376dad6dcfd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -21,6 +21,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+#include <linux/ptp_classify.h>
 
 #include "dsa_priv.h"
 
@@ -401,6 +402,30 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
 	return NETDEV_TX_OK;
 }
 
+static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
+				 struct sk_buff *skb)
+{
+	struct dsa_switch *ds = p->dp->ds;
+	struct sk_buff *clone;
+	unsigned int type;
+
+	type = ptp_classify_raw(skb);
+	if (type == PTP_CLASS_NONE)
+		return;
+
+	if (!ds->ops->port_txtstamp)
+		return;
+
+	clone = skb_clone_sk(skb);
+	if (!clone)
+		return;
+
+	if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type))
+		return;
+
+	kfree_skb(clone);
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -413,6 +438,11 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	s->tx_bytes += skb->len;
 	u64_stats_update_end(&s->syncp);
 
+	/* Identify PTP protocol packets, clone them, and pass them to the
+	 * switch driver
+	 */
+	dsa_skb_tx_timestamp(p, skb);
+
 	/* Transmit function may have to reallocate the original SKB,
 	 * in which case it must have freed it. Only free it here on error.
 	 */

From c6fe0ad2c3499cca7c7b3be83958e6f7e961f567 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:50 +0100
Subject: [PATCH 0126/1678] net: dsa: mv88e6xxx: add rx/tx timestamping support

This patch implements RX/TX timestamping support.

The Marvell PTP hardware supports RX timestamping individual message
types, but for simplicity we only support the EVENT receive filter since
few if any clients bother with the more specific filter types.

checkpatch and reverse Christmas tree changes by Andrew Lunn.

Re-factor duplicated code paths and avoid IfOk anti-pattern, use the
common ptp worker thread from the class layer and time stamp UDP/IPv4
frames as well as Layer-2 frame by Richard Cochran.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/Makefile   |   1 +
 drivers/net/dsa/mv88e6xxx/chip.c     |  16 +-
 drivers/net/dsa/mv88e6xxx/chip.h     |  29 ++
 drivers/net/dsa/mv88e6xxx/hwtstamp.c | 571 +++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/hwtstamp.h | 163 ++++++++
 drivers/net/dsa/mv88e6xxx/ptp.c      |   3 +-
 drivers/net/dsa/mv88e6xxx/ptp.h      |   9 +
 7 files changed, 788 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/dsa/mv88e6xxx/hwtstamp.c
 create mode 100644 drivers/net/dsa/mv88e6xxx/hwtstamp.h

diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 0cd0e2b1e4a2..50de304abe2f 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -7,6 +7,7 @@ mv88e6xxx-objs += global1_vtu.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o
 mv88e6xxx-objs += phy.o
 mv88e6xxx-objs += port.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 41e28e28e65b..af63710e93c1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -36,6 +36,7 @@
 #include "chip.h"
 #include "global1.h"
 #include "global2.h"
+#include "hwtstamp.h"
 #include "phy.h"
 #include "port.h"
 #include "ptp.h"
@@ -2093,11 +2094,15 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
-	/* Setup PTP Hardware Clock */
+	/* Setup PTP Hardware Clock and timestamping */
 	if (chip->info->ptp_support) {
 		err = mv88e6xxx_ptp_setup(chip);
 		if (err)
 			goto unlock;
+
+		err = mv88e6xxx_hwtstamp_setup(chip);
+		if (err)
+			goto unlock;
 	}
 
 unlock:
@@ -3932,6 +3937,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.port_mdb_del           = mv88e6xxx_port_mdb_del,
 	.crosschip_bridge_join	= mv88e6xxx_crosschip_bridge_join,
 	.crosschip_bridge_leave	= mv88e6xxx_crosschip_bridge_leave,
+	.port_hwtstamp_set	= mv88e6xxx_port_hwtstamp_set,
+	.port_hwtstamp_get	= mv88e6xxx_port_hwtstamp_get,
+	.port_txtstamp		= mv88e6xxx_port_txtstamp,
+	.port_rxtstamp		= mv88e6xxx_port_rxtstamp,
+	.get_ts_info		= mv88e6xxx_get_ts_info,
 };
 
 static struct dsa_switch_driver mv88e6xxx_switch_drv = {
@@ -4074,8 +4084,10 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
 	struct mv88e6xxx_chip *chip = ds->priv;
 
-	if (chip->info->ptp_support)
+	if (chip->info->ptp_support) {
+		mv88e6xxx_hwtstamp_free(chip);
 		mv88e6xxx_ptp_free(chip);
+	}
 
 	mv88e6xxx_phy_destroy(chip);
 	mv88e6xxx_unregister_switch(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index eaf254a0e674..97d7915f32c7 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -164,6 +164,32 @@ struct mv88e6xxx_irq {
 	unsigned int nirqs;
 };
 
+/* state flags for mv88e6xxx_port_hwtstamp::state */
+enum {
+	MV88E6XXX_HWTSTAMP_ENABLED,
+	MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+};
+
+struct mv88e6xxx_port_hwtstamp {
+	/* Port index */
+	int port_id;
+
+	/* Timestamping state */
+	unsigned long state;
+
+	/* Resources for receive timestamping */
+	struct sk_buff_head rx_queue;
+	struct sk_buff_head rx_queue2;
+
+	/* Resources for transmit timestamping */
+	unsigned long tx_tstamp_start;
+	struct sk_buff *tx_skb;
+	u16 tx_seq_id;
+
+	/* Current timestamp configuration */
+	struct hwtstamp_config tstamp_config;
+};
+
 struct mv88e6xxx_chip {
 	const struct mv88e6xxx_info *info;
 
@@ -236,6 +262,9 @@ struct mv88e6xxx_chip {
 	struct ptp_pin_desc	pin_config[MV88E6XXX_MAX_GPIO];
 	u16 trig_config;
 	u16 evcap_config;
+
+	/* Per-port timestamping resources. */
+	struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
 };
 
 struct mv88e6xxx_bus_ops {
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
new file mode 100644
index 000000000000..1b8f79b2c939
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -0,0 +1,571 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "chip.h"
+#include "global2.h"
+#include "hwtstamp.h"
+#include "ptp.h"
+#include <linux/ptp_classify.h>
+
+#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb))
+
+static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port,
+				   int addr, u16 *data, int len)
+{
+	if (!chip->info->ops->avb_ops->port_ptp_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr,
+						       data, len);
+}
+
+static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port,
+				    int addr, u16 data)
+{
+	if (!chip->info->ops->avb_ops->port_ptp_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr,
+							data);
+}
+
+static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr,
+			       u16 data)
+{
+	if (!chip->info->ops->avb_ops->ptp_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->avb_ops->ptp_write(chip, addr, data);
+}
+
+/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX
+ * timestamp. When working properly, hardware will produce a timestamp
+ * within 1ms. Software may enounter delays due to MDIO contention, so
+ * the timeout is set accordingly.
+ */
+#define TX_TSTAMP_TIMEOUT	msecs_to_jiffies(20)
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+			  struct ethtool_ts_info *info)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->phc_index = ptp_clock_index(chip->ptp_clock);
+	info->tx_types =
+		(1 << HWTSTAMP_TX_OFF) |
+		(1 << HWTSTAMP_TX_ON);
+	info->rx_filters =
+		(1 << HWTSTAMP_FILTER_NONE) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_SYNC) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
+
+	return 0;
+}
+
+static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port,
+					 struct hwtstamp_config *config)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	bool tstamp_enable = false;
+	u16 port_config0;
+	int err;
+
+	/* Prevent the TX/RX paths from trying to interact with the
+	 * timestamp hardware while we reconfigure it.
+	 */
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+	/* reserved for future extensions */
+	if (config->flags)
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tstamp_enable = false;
+		break;
+	case HWTSTAMP_TX_ON:
+		tstamp_enable = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* The switch supports timestamping both L2 and L4; one cannot be
+	 * disabled independently of the other.
+	 */
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tstamp_enable = false;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (tstamp_enable) {
+		/* Disable transportSpecific value matching, so that packets
+		 * with either 1588 (0) and 802.1AS (1) will be timestamped.
+		 */
+		port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH;
+	} else {
+		/* Disable PTP. This disables both RX and TX timestamping. */
+		port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP;
+	}
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+				       port_config0);
+	mutex_unlock(&chip->reg_lock);
+
+	if (err < 0)
+		return err;
+
+	/* Once hardware has been configured, enable timestamp checks
+	 * in the RX/TX paths.
+	 */
+	if (tstamp_enable)
+		set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state);
+
+	return 0;
+}
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+				struct ifreq *ifr)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	struct hwtstamp_config config;
+	int err;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	if (port < 0 || port >= mv88e6xxx_num_ports(chip))
+		return -EINVAL;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = mv88e6xxx_set_hwtstamp_config(chip, port, &config);
+	if (err)
+		return err;
+
+	/* Save the chosen configuration to be returned later. */
+	memcpy(&ps->tstamp_config, &config, sizeof(config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+				struct ifreq *ifr)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	struct hwtstamp_config *config = &ps->tstamp_config;
+
+	if (!chip->info->ptp_support)
+		return -EOPNOTSUPP;
+
+	if (port < 0 || port >= mv88e6xxx_num_ports(chip))
+		return -EINVAL;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/* Get the start of the PTP header in this skb */
+static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type)
+{
+	u8 *data = skb_mac_header(skb);
+	unsigned int offset = 0;
+
+	if (type & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (type & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return NULL;
+	}
+
+	/* Ensure that the entire header is present in this packet. */
+	if (skb->len + ETH_HLEN < offset + 34)
+		return NULL;
+
+	return data + offset;
+}
+
+/* Returns a pointer to the PTP header if the caller should time stamp,
+ * or NULL if the caller should not.
+ */
+static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
+				   struct sk_buff *skb, unsigned int type)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	u8 *hdr;
+
+	if (!chip->info->ptp_support)
+		return NULL;
+
+	if (port < 0 || port >= mv88e6xxx_num_ports(chip))
+		return NULL;
+
+	hdr = parse_ptp_header(skb, type);
+	if (!hdr)
+		return NULL;
+
+	if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state))
+		return NULL;
+
+	return hdr;
+}
+
+static int mv88e6xxx_ts_valid(u16 status)
+{
+	if (!(status & MV88E6XXX_PTP_TS_VALID))
+		return 0;
+	if (status & MV88E6XXX_PTP_TS_STATUS_MASK)
+		return 0;
+	return 1;
+}
+
+static int seq_match(struct sk_buff *skb, u16 ts_seqid)
+{
+	unsigned int type = SKB_PTP_TYPE(skb);
+	u8 *hdr = parse_ptp_header(skb, type);
+	__be16 *seqid;
+
+	seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+	return ts_seqid == ntohs(*seqid);
+}
+
+static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
+			       struct mv88e6xxx_port_hwtstamp *ps,
+			       struct sk_buff *skb, u16 reg,
+			       struct sk_buff_head *rxq)
+{
+	u16 buf[4] = { 0 }, status, timelo, timehi, seq_id;
+	struct skb_shared_hwtstamps *shwt;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+				      reg, buf, ARRAY_SIZE(buf));
+	mutex_unlock(&chip->reg_lock);
+	if (err)
+		pr_err("failed to get the receive time stamp\n");
+
+	status = buf[0];
+	timelo = buf[1];
+	timehi = buf[2];
+	seq_id = buf[3];
+
+	if (status & MV88E6XXX_PTP_TS_VALID) {
+		mutex_lock(&chip->reg_lock);
+		err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0);
+		mutex_unlock(&chip->reg_lock);
+		if (err)
+			pr_err("failed to clear the receive status\n");
+	}
+	/* Since the device can only handle one time stamp at a time,
+	 * we purge any extra frames from the queue.
+	 */
+	for ( ; skb; skb = skb_dequeue(rxq)) {
+		if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
+			u64 ns = timehi << 16 | timelo;
+
+			mutex_lock(&chip->reg_lock);
+			ns = timecounter_cyc2time(&chip->tstamp_tc, ns);
+			mutex_unlock(&chip->reg_lock);
+			shwt = skb_hwtstamps(skb);
+			memset(shwt, 0, sizeof(*shwt));
+			shwt->hwtstamp = ns_to_ktime(ns);
+			status &= ~MV88E6XXX_PTP_TS_VALID;
+		}
+		netif_rx_ni(skb);
+	}
+}
+
+static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip,
+				    struct mv88e6xxx_port_hwtstamp *ps)
+{
+	struct sk_buff *skb;
+
+	skb = skb_dequeue(&ps->rx_queue);
+
+	if (skb)
+		mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS,
+				   &ps->rx_queue);
+
+	skb = skb_dequeue(&ps->rx_queue2);
+	if (skb)
+		mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS,
+				   &ps->rx_queue2);
+}
+
+static int is_pdelay_resp(u8 *msgtype)
+{
+	return (*msgtype & 0xf) == 3;
+}
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb, unsigned int type)
+{
+	struct mv88e6xxx_port_hwtstamp *ps;
+	struct mv88e6xxx_chip *chip;
+	u8 *hdr;
+
+	chip = ds->priv;
+	ps = &chip->port_hwtstamp[port];
+
+	if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT)
+		return false;
+
+	hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
+	if (!hdr)
+		return false;
+
+	SKB_PTP_TYPE(skb) = type;
+
+	if (is_pdelay_resp(hdr))
+		skb_queue_tail(&ps->rx_queue2, skb);
+	else
+		skb_queue_tail(&ps->rx_queue, skb);
+
+	ptp_schedule_worker(chip->ptp_clock, 0);
+
+	return true;
+}
+
+static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip,
+				   struct mv88e6xxx_port_hwtstamp *ps)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	u16 departure_block[4], status;
+	struct sk_buff *tmp_skb;
+	u32 time_raw;
+	int err;
+	u64 ns;
+
+	if (!ps->tx_skb)
+		return 0;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_ptp_read(chip, ps->port_id,
+				      MV88E6XXX_PORT_PTP_DEP_STS,
+				      departure_block,
+				      ARRAY_SIZE(departure_block));
+	mutex_unlock(&chip->reg_lock);
+
+	if (err)
+		goto free_and_clear_skb;
+
+	if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) {
+		if (time_is_before_jiffies(ps->tx_tstamp_start +
+					   TX_TSTAMP_TIMEOUT)) {
+			dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n",
+				 ps->port_id);
+			goto free_and_clear_skb;
+		}
+		/* The timestamp should be available quickly, while getting it
+		 * is high priority and time bounded to only 10ms. A poll is
+		 * warranted so restart the work.
+		 */
+		return 1;
+	}
+
+	/* We have the timestamp; go ahead and clear valid now */
+	mutex_lock(&chip->reg_lock);
+	mv88e6xxx_port_ptp_write(chip, ps->port_id,
+				 MV88E6XXX_PORT_PTP_DEP_STS, 0);
+	mutex_unlock(&chip->reg_lock);
+
+	status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK;
+	if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) {
+		dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id);
+		goto free_and_clear_skb;
+	}
+
+	if (departure_block[3] != ps->tx_seq_id) {
+		dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id);
+		goto free_and_clear_skb;
+	}
+
+	memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+	time_raw = ((u32)departure_block[2] << 16) | departure_block[1];
+	mutex_lock(&chip->reg_lock);
+	ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw);
+	mutex_unlock(&chip->reg_lock);
+	shhwtstamps.hwtstamp = ns_to_ktime(ns);
+
+	dev_dbg(chip->dev,
+		"p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n",
+		ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp),
+		departure_block[0], ps->tx_seq_id, departure_block[3]);
+
+	/* skb_complete_tx_timestamp() will free up the client to make
+	 * another timestamp-able transmit. We have to be ready for it
+	 * -- by clearing the ps->tx_skb "flag" -- beforehand.
+	 */
+
+	tmp_skb = ps->tx_skb;
+	ps->tx_skb = NULL;
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+	skb_complete_tx_timestamp(tmp_skb, &shhwtstamps);
+
+	return 0;
+
+free_and_clear_skb:
+	dev_kfree_skb_any(ps->tx_skb);
+	ps->tx_skb = NULL;
+	clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state);
+
+	return 0;
+}
+
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+	struct mv88e6xxx_chip *chip = ptp_to_chip(ptp);
+	struct dsa_switch *ds = chip->ds;
+	struct mv88e6xxx_port_hwtstamp *ps;
+	int i, restart = 0;
+
+	for (i = 0; i < ds->num_ports; i++) {
+		if (!dsa_is_user_port(ds, i))
+			continue;
+
+		ps = &chip->port_hwtstamp[i];
+		if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state))
+			restart |= mv88e6xxx_txtstamp_work(chip, ps);
+
+		mv88e6xxx_rxtstamp_work(chip, ps);
+	}
+
+	return restart ? 1 : -1;
+}
+
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+	__be16 *seq_ptr;
+	u8 *hdr;
+
+	if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
+		return false;
+
+	hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
+	if (!hdr)
+		return false;
+
+	seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID);
+
+	if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
+				  &ps->state))
+		return false;
+
+	ps->tx_skb = clone;
+	ps->tx_tstamp_start = jiffies;
+	ps->tx_seq_id = be16_to_cpup(seq_ptr);
+
+	ptp_schedule_worker(chip->ptp_clock, 0);
+	return true;
+}
+
+static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port)
+{
+	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
+
+	ps->port_id = port;
+
+	skb_queue_head_init(&ps->rx_queue);
+	skb_queue_head_init(&ps->rx_queue2);
+
+	return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0,
+					MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP);
+}
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+	int i;
+
+	/* Disable timestamping on all ports. */
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+		err = mv88e6xxx_hwtstamp_port_setup(chip, i);
+		if (err)
+			return err;
+	}
+
+	/* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to
+	 * timestamp. This affects all ports that have timestamping enabled,
+	 * but the timestamp config is per-port; thus we configure all events
+	 * here and only support the HWTSTAMP_FILTER_*_EVENT filter types.
+	 */
+	err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE,
+				  MV88E6XXX_PTP_MSGTYPE_ALL_EVENT);
+	if (err)
+		return err;
+
+	/* Use ARRIVAL1 for peer delay response messages. */
+	err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR,
+				  MV88E6XXX_PTP_MSGTYPE_PDLAY_RES);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
new file mode 100644
index 000000000000..18ff39cdab4c
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -0,0 +1,163 @@
+/*
+ * Marvell 88E6xxx Switch hardware timestamping support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2017 National Instruments
+ *      Erik Hons <erik.hons@ni.com>
+ *      Brandon Streiff <brandon.streiff@ni.com>
+ *      Dane Wagner <dane.wagner@ni.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_HWTSTAMP_H
+#define _MV88E6XXX_HWTSTAMP_H
+
+#include "chip.h"
+
+/* Global PTP registers */
+/* Offset 0x00: PTP EtherType */
+#define MV88E6XXX_PTP_ETHERTYPE	0x00
+
+/* Offset 0x01: Message Type Timestamp Enables */
+#define MV88E6XXX_PTP_MSGTYPE			0x01
+#define MV88E6XXX_PTP_MSGTYPE_SYNC		0x0001
+#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ		0x0002
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ		0x0004
+#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES		0x0008
+#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT		0x000f
+
+/* Offset 0x02: Timestamp Arrival Capture Pointers */
+#define MV88E6XXX_PTP_TS_ARRIVAL_PTR	0x02
+
+/* Offset 0x08: PTP Interrupt Status */
+#define MV88E6XXX_PTP_IRQ_STATUS	0x08
+
+/* Per-Port PTP Registers */
+/* Offset 0x00: PTP Configuration 0 */
+#define MV88E6XXX_PORT_PTP_CFG0				0x00
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT		12
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK		0xf000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588		0x0000
+#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS		0x1000
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH	0x0800
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE	0x0002
+#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP		0x0001
+
+/* Offset 0x01: PTP Configuration 1 */
+#define MV88E6XXX_PORT_PTP_CFG1	0x01
+
+/* Offset 0x02: PTP Configuration 2 */
+#define MV88E6XXX_PORT_PTP_CFG2				0x02
+#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL		0x1000
+#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN		0x0002
+#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN		0x0001
+
+/* Offset 0x03: PTP LED Configuration */
+#define MV88E6XXX_PORT_PTP_LED_CFG	0x03
+
+/* Offset 0x08: PTP Arrival 0 Status */
+#define MV88E6XXX_PORT_PTP_ARR0_STS	0x08
+
+/* Offset 0x09/0x0A: PTP Arrival 0 Time */
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO	0x09
+#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI	0x0a
+
+/* Offset 0x0B: PTP Arrival 0 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR0_SEQID	0x0b
+
+/* Offset 0x0C: PTP Arrival 1 Status */
+#define MV88E6XXX_PORT_PTP_ARR1_STS	0x0c
+
+/* Offset 0x0D/0x0E: PTP Arrival 1 Time */
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO	0x0d
+#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI	0x0e
+
+/* Offset 0x0F: PTP Arrival 1 Sequence ID */
+#define MV88E6XXX_PORT_PTP_ARR1_SEQID	0x0f
+
+/* Offset 0x10: PTP Departure Status */
+#define MV88E6XXX_PORT_PTP_DEP_STS	0x10
+
+/* Offset 0x11/0x12: PTP Deperture Time */
+#define MV88E6XXX_PORT_PTP_DEP_TIME_LO	0x11
+#define MV88E6XXX_PORT_PTP_DEP_TIME_HI	0x12
+
+/* Offset 0x13: PTP Departure Sequence ID */
+#define MV88E6XXX_PORT_PTP_DEP_SEQID	0x13
+
+/* Status fields for arrival and depature timestamp status registers */
+#define MV88E6XXX_PTP_TS_STATUS_MASK		0x0006
+#define MV88E6XXX_PTP_TS_STATUS_NORMAL		0x0000
+#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN	0x0002
+#define MV88E6XXX_PTP_TS_STATUS_DISCARDED	0x0004
+#define MV88E6XXX_PTP_TS_VALID			0x0001
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
+
+int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
+				struct ifreq *ifr);
+int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
+				struct ifreq *ifr);
+
+bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type);
+bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *clone, unsigned int type);
+
+int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+			  struct ethtool_ts_info *info);
+
+int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds,
+					      int port, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds,
+					      int port, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *clone,
+					   unsigned int type)
+{
+	return false;
+}
+
+static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *clone,
+					   unsigned int type)
+{
+	return false;
+}
+
+static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
+					struct ethtool_ts_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip)
+{
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */
+
+#endif /* _MV88E6XXX_HWTSTAMP_H */
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index 92885e715e85..bd85e2c390e1 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -27,8 +27,6 @@
 #define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100)
 
 #define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc)
-#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \
-				      ptp_clock_info)
 #define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
 					     overflow_work)
 #define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \
@@ -359,6 +357,7 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
 	chip->ptp_clock_info.settime64	= mv88e6xxx_ptp_settime;
 	chip->ptp_clock_info.enable	= mv88e6xxx_ptp_enable;
 	chip->ptp_clock_info.verify	= mv88e6xxx_ptp_verify;
+	chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
 
 	chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev);
 	if (IS_ERR(chip->ptp_clock))
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
index 21c93d278b09..992818ade746 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.h
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -80,11 +80,20 @@
 
 #ifdef CONFIG_NET_DSA_MV88E6XXX_PTP
 
+long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp);
 int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
 
+#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip,	\
+				      ptp_clock_info)
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
 
+static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+{
+	return -1;
+}
+
 static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
 {
 	return 0;

From a2e47134e577713add97124afd812bc3f93627fb Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Wed, 14 Feb 2018 01:07:51 +0100
Subject: [PATCH 0127/1678] net: dsa: mv88e6xxx: add workaround for 6341
 timestamping

88E6341 devices default to timestamping at the PHY, but due to a
hardware issue, timestamps via this component are unreliable. For
this family, configure the PTP hardware to force the timestamping
to occur at the MAC.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/hwtstamp.c | 13 +++++++++++++
 drivers/net/dsa/mv88e6xxx/hwtstamp.h |  9 +++++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index 1b8f79b2c939..b251d534b70d 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -563,6 +563,19 @@ int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip)
 	if (err)
 		return err;
 
+	/* 88E6341 devices default to timestamping at the PHY, but this has
+	 * a hardware issue that results in unreliable timestamps. Force
+	 * these devices to timestamp at the MAC.
+	 */
+	if (chip->info->family == MV88E6XXX_FAMILY_6341) {
+		u16 val = MV88E6341_PTP_CFG_UPDATE |
+			  MV88E6341_PTP_CFG_MODE_IDX |
+			  MV88E6341_PTP_CFG_MODE_TS_AT_MAC;
+		err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
index 18ff39cdab4c..bc71c9212a08 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.h
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -34,6 +34,15 @@
 /* Offset 0x02: Timestamp Arrival Capture Pointers */
 #define MV88E6XXX_PTP_TS_ARRIVAL_PTR	0x02
 
+/* Offset 0x07: PTP Global Configuration */
+#define MV88E6341_PTP_CFG			0x07
+#define MV88E6341_PTP_CFG_UPDATE		0x8000
+#define MV88E6341_PTP_CFG_IDX_MASK		0x7f00
+#define MV88E6341_PTP_CFG_DATA_MASK		0x00ff
+#define MV88E6341_PTP_CFG_MODE_IDX		0x0
+#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY	0x00
+#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC	0x80
+
 /* Offset 0x08: PTP Interrupt Status */
 #define MV88E6XXX_PTP_IRQ_STATUS	0x08
 

From 9942895b5ee4b0db53f32fbcb4a51360607aac1b Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Feb 2018 20:32:04 -0800
Subject: [PATCH 0128/1678] net: Move ipv4 set_lwt_redirect helper to lwtunnel

IPv4 uses set_lwt_redirect to set the lwtunnel redirect functions as
needed. Move it to lwtunnel.h as lwtunnel_set_redirect and change
IPv6 to also use it.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lwtunnel.h | 15 +++++++++++++++
 net/ipv4/route.c       | 17 ++---------------
 net/ipv6/route.c       |  9 +--------
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index d747ef975cd8..33fd9ba7e0e5 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -127,6 +127,17 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+	if (lwtunnel_output_redirect(dst->lwtstate)) {
+		dst->lwtstate->orig_output = dst->output;
+		dst->output = lwtunnel_output;
+	}
+	if (lwtunnel_input_redirect(dst->lwtstate)) {
+		dst->lwtstate->orig_input = dst->input;
+		dst->input = lwtunnel_input;
+	}
+}
 #else
 
 static inline void lwtstate_free(struct lwtunnel_state *lws)
@@ -158,6 +169,10 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
 	return false;
 }
 
+static inline void lwtunnel_set_redirect(struct dst_entry *dst)
+{
+}
+
 static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
 					     unsigned int mtu)
 {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b0ef4cc3e875..6ce623e3e2ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1645,19 +1645,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 	spin_unlock_bh(&fnhe_lock);
 }
 
-static void set_lwt_redirect(struct rtable *rth)
-{
-	if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
-		rth->dst.lwtstate->orig_output = rth->dst.output;
-		rth->dst.output = lwtunnel_output;
-	}
-
-	if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
-		rth->dst.lwtstate->orig_input = rth->dst.input;
-		rth->dst.input = lwtunnel_input;
-	}
-}
-
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   const struct fib_result *res,
@@ -1748,7 +1735,7 @@ rt_cache:
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
 		       do_cache);
-	set_lwt_redirect(rth);
+	lwtunnel_set_redirect(&rth->dst);
 	skb_dst_set(skb, &rth->dst);
 out:
 	err = 0;
@@ -2267,7 +2254,7 @@ add:
 	}
 
 	rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
-	set_lwt_redirect(rth);
+	lwtunnel_set_redirect(&rth->dst);
 
 	return rth;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9dcfadddd800..f0baae26db8f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2671,14 +2671,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 		rt->dst.lwtstate = lwtstate_get(lwtstate);
-		if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
-			rt->dst.lwtstate->orig_output = rt->dst.output;
-			rt->dst.output = lwtunnel_output;
-		}
-		if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
-			rt->dst.lwtstate->orig_input = rt->dst.input;
-			rt->dst.input = lwtunnel_input;
-		}
+		lwtunnel_set_redirect(&rt->dst);
 	}
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);

From 1a4330cdbf2270abcc0703837ef73148d0a75ccc Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Wed, 14 Feb 2018 12:56:27 +0530
Subject: [PATCH 0129/1678] cxgb4: rework on-chip memory read

Rework logic to read EDC and MC. Do 32-bit reads at a time.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cudbg_lib.c    |  67 +++++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h    |   5 +
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c    | 193 +++++++++++-------
 3 files changed, 192 insertions(+), 73 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 557fd8bfd54e..f21ed53aeb44 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -878,6 +878,67 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
 				      &payload->start, &payload->end);
 }
 
+static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
+			     int mtype, u32 addr, u32 len, void *hbuf)
+{
+	u32 win_pf, memoffset, mem_aperture, mem_base;
+	struct adapter *adap = pdbg_init->adap;
+	u32 pos, offset, resid;
+	u32 *buf;
+	int ret;
+
+	/* Argument sanity checks ...
+	 */
+	if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+		return -EINVAL;
+
+	buf = (u32 *)hbuf;
+
+	/* Try to do 32-bit reads.  Residual will be handled later. */
+	resid = len & 0x3;
+	len -= resid;
+
+	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+				&mem_aperture);
+	if (ret)
+		return ret;
+
+	addr = addr + memoffset;
+	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
+
+	pos = addr & ~(mem_aperture - 1);
+	offset = addr - pos;
+
+	/* Set up initial PCI-E Memory Window to cover the start of our
+	 * transfer.
+	 */
+	t4_memory_update_win(adap, win, pos | win_pf);
+
+	/* Transfer data from the adapter */
+	while (len > 0) {
+		*buf++ = le32_to_cpu((__force __le32)
+				     t4_read_reg(adap, mem_base + offset));
+		offset += sizeof(u32);
+		len -= sizeof(u32);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_memory_update_win(adap, win, pos | win_pf);
+		}
+	}
+
+	/* Transfer residual */
+	if (resid)
+		t4_memory_rw_residual(adap, resid, mem_base + offset,
+				      (u8 *)buf, T4_MEMORY_READ);
+
+	return 0;
+}
+
 #define CUDBG_YIELD_ITERATION 256
 
 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +998,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
 				goto skip_read;
 
 		spin_lock(&padap->win0_lock);
-		rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
-				  bytes_read, bytes,
-				  (__be32 *)temp_buff.data,
-				  1);
+		rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
+				       bytes_read, bytes, temp_buff.data);
 		spin_unlock(&padap->win0_lock);
 		if (rc) {
 			cudbg_err->sys_err = rc;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13ce4b7..d3fa53db61ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
 u32 t4_get_util_window(struct adapter *adap);
 void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);
 
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+		      u32 *mem_base, u32 *mem_aperture);
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+			   int dir);
 #define T4_MEMORY_WRITE	0
 #define T4_MEMORY_READ	1
 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 047609ef0515..ba647462be6c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -483,6 +483,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
 	return 0;
 }
 
+/**
+ * t4_memory_rw_init - Get memory window relative offset, base, and size.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @mem_off: memory relative offset with respect to @mtype.
+ * @mem_base: configured memory base address.
+ * @mem_aperture: configured memory window aperture.
+ *
+ * Get the configured memory window's relative offset, base, and size.
+ */
+int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
+		      u32 *mem_base, u32 *mem_aperture)
+{
+	u32 edc_size, mc_size, mem_reg;
+
+	/* Offset into the region of memory which is being accessed
+	 * MEM_EDC0 = 0
+	 * MEM_EDC1 = 1
+	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
+	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
+	 * MEM_HMA  = 4
+	 */
+	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
+	if (mtype == MEM_HMA) {
+		*mem_off = 2 * (edc_size * 1024 * 1024);
+	} else if (mtype != MEM_MC1) {
+		*mem_off = (mtype * (edc_size * 1024 * 1024));
+	} else {
+		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
+						      MA_EXT_MEMORY0_BAR_A));
+		*mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+	}
+
+	/* Each PCI-E Memory Window is programmed with a window size -- or
+	 * "aperture" -- which controls the granularity of its mapping onto
+	 * adapter memory.  We need to grab that aperture in order to know
+	 * how to use the specified window.  The window is also programmed
+	 * with the base address of the Memory Window in BAR0's address
+	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
+	 * the address is relative to BAR0.
+	 */
+	mem_reg = t4_read_reg(adap,
+			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
+						  win));
+	/* a dead adapter will return 0xffffffff for PIO reads */
+	if (mem_reg == 0xffffffff)
+		return -ENXIO;
+
+	*mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
+	*mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
+	if (is_t4(adap->params.chip))
+		*mem_base -= adap->t4_bar0;
+
+	return 0;
+}
+
+/**
+ * t4_memory_update_win - Move memory window to specified address.
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: location to move.
+ *
+ * Move memory window to specified address.
+ */
+void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
+{
+	t4_write_reg(adap,
+		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
+		     addr);
+	/* Read it back to ensure that changes propagate before we
+	 * attempt to use the new value.
+	 */
+	t4_read_reg(adap,
+		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+}
+
+/**
+ * t4_memory_rw_residual - Read/Write residual data.
+ * @adap: the adapter
+ * @off: relative offset within residual to start read/write.
+ * @addr: address within indicated memory type.
+ * @buf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Read/Write residual data less than 32-bits.
+ */
+void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
+			   int dir)
+{
+	union {
+		u32 word;
+		char byte[4];
+	} last;
+	unsigned char *bp;
+	int i;
+
+	if (dir == T4_MEMORY_READ) {
+		last.word = le32_to_cpu((__force __le32)
+					t4_read_reg(adap, addr));
+		for (bp = (unsigned char *)buf, i = off; i < 4; i++)
+			bp[i] = last.byte[i];
+	} else {
+		last.word = *buf;
+		for (i = off; i < 4; i++)
+			last.byte[i] = 0;
+		t4_write_reg(adap, addr,
+			     (__force u32)cpu_to_le32(last.word));
+	}
+}
+
 /**
  *	t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
  *	@adap: the adapter
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 		 u32 len, void *hbuf, int dir)
 {
 	u32 pos, offset, resid, memoffset;
-	u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
+	u32 win_pf, mem_aperture, mem_base;
 	u32 *buf;
+	int ret;
 
 	/* Argument sanity checks ...
 	 */
@@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 	resid = len & 0x3;
 	len -= resid;
 
-	/* Offset into the region of memory which is being accessed
-	 * MEM_EDC0 = 0
-	 * MEM_EDC1 = 1
-	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
-	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
-	 * MEM_HMA  = 4
-	 */
-	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
-	if (mtype == MEM_HMA) {
-		memoffset = 2 * (edc_size * 1024 * 1024);
-	} else if (mtype != MEM_MC1) {
-		memoffset = (mtype * (edc_size * 1024 * 1024));
-	} else {
-		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
-						      MA_EXT_MEMORY0_BAR_A));
-		memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
-	}
+	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
+				&mem_aperture);
+	if (ret)
+		return ret;
 
 	/* Determine the PCIE_MEM_ACCESS_OFFSET */
 	addr = addr + memoffset;
 
-	/* Each PCI-E Memory Window is programmed with a window size -- or
-	 * "aperture" -- which controls the granularity of its mapping onto
-	 * adapter memory.  We need to grab that aperture in order to know
-	 * how to use the specified window.  The window is also programmed
-	 * with the base address of the Memory Window in BAR0's address
-	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
-	 * the address is relative to BAR0.
-	 */
-	mem_reg = t4_read_reg(adap,
-			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
-						  win));
-	mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
-	mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
-	if (is_t4(adap->params.chip))
-		mem_base -= adap->t4_bar0;
 	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);
 
 	/* Calculate our initial PCI-E Memory Window Position and Offset into
 	 * that Window.
 	 */
-	pos = addr & ~(mem_aperture-1);
+	pos = addr & ~(mem_aperture - 1);
 	offset = addr - pos;
 
 	/* Set up initial PCI-E Memory Window to cover the start of our
-	 * transfer.  (Read it back to ensure that changes propagate before we
-	 * attempt to use the new value.)
+	 * transfer.
 	 */
-	t4_write_reg(adap,
-		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
-		     pos | win_pf);
-	t4_read_reg(adap,
-		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
+	t4_memory_update_win(adap, win, pos | win_pf);
 
 	/* Transfer data to/from the adapter as long as there's an integral
 	 * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 		if (offset == mem_aperture) {
 			pos += mem_aperture;
 			offset = 0;
-			t4_write_reg(adap,
-				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-						    win), pos | win_pf);
-			t4_read_reg(adap,
-				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
-						    win));
+			t4_memory_update_win(adap, win, pos | win_pf);
 		}
 	}
 
@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
 	 * residual amount.  The PCI-E Memory Window has already been moved
 	 * above (if necessary) to cover this final transfer.
 	 */
-	if (resid) {
-		union {
-			u32 word;
-			char byte[4];
-		} last;
-		unsigned char *bp;
-		int i;
-
-		if (dir == T4_MEMORY_READ) {
-			last.word = le32_to_cpu(
-					(__force __le32)t4_read_reg(adap,
-						mem_base + offset));
-			for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
-				bp[i] = last.byte[i];
-		} else {
-			last.word = *buf;
-			for (i = resid; i < 4; i++)
-				last.byte[i] = 0;
-			t4_write_reg(adap, mem_base + offset,
-				     (__force u32)cpu_to_le32(last.word));
-		}
-	}
+	if (resid)
+		t4_memory_rw_residual(adap, resid, mem_base + offset,
+				      (u8 *)buf, dir);
 
 	return 0;
 }

From 7494f980ca0503e3eec6f4ba508186d269b37e7f Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Wed, 14 Feb 2018 12:56:28 +0530
Subject: [PATCH 0130/1678] cxgb4: speed up on-chip memory read

Use readq() (via t4_read_reg64()) to read 64-bits at a time.
Read residual in 32-bit multiples.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cudbg_lib.c    | 39 ++++++++++++++-----
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index f21ed53aeb44..6322b8df0ed0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -884,7 +884,8 @@ static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
 	u32 win_pf, memoffset, mem_aperture, mem_base;
 	struct adapter *adap = pdbg_init->adap;
 	u32 pos, offset, resid;
-	u32 *buf;
+	u32 *res_buf;
+	u64 *buf;
 	int ret;
 
 	/* Argument sanity checks ...
@@ -892,10 +893,10 @@ static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
 	if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
 		return -EINVAL;
 
-	buf = (u32 *)hbuf;
+	buf = (u64 *)hbuf;
 
-	/* Try to do 32-bit reads.  Residual will be handled later. */
-	resid = len & 0x3;
+	/* Try to do 64-bit reads.  Residual will be handled later. */
+	resid = len & 0x7;
 	len -= resid;
 
 	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
@@ -916,10 +917,10 @@ static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
 
 	/* Transfer data from the adapter */
 	while (len > 0) {
-		*buf++ = le32_to_cpu((__force __le32)
-				     t4_read_reg(adap, mem_base + offset));
-		offset += sizeof(u32);
-		len -= sizeof(u32);
+		*buf++ = le64_to_cpu((__force __le64)
+				     t4_read_reg64(adap, mem_base + offset));
+		offset += sizeof(u64);
+		len -= sizeof(u64);
 
 		/* If we've reached the end of our current window aperture,
 		 * move the PCI-E Memory Window on to the next.
@@ -931,10 +932,28 @@ static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
 		}
 	}
 
-	/* Transfer residual */
+	res_buf = (u32 *)buf;
+	/* Read residual in 32-bit multiples */
+	while (resid > sizeof(u32)) {
+		*res_buf++ = le32_to_cpu((__force __le32)
+					 t4_read_reg(adap, mem_base + offset));
+		offset += sizeof(u32);
+		resid -= sizeof(u32);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_memory_update_win(adap, win, pos | win_pf);
+		}
+	}
+
+	/* Transfer residual < 32-bits */
 	if (resid)
 		t4_memory_rw_residual(adap, resid, mem_base + offset,
-				      (u8 *)buf, T4_MEMORY_READ);
+				      (u8 *)res_buf, T4_MEMORY_READ);
 
 	return 0;
 }

From 37c64cf63ba1f9c071b37a2129ae9860fd423d6c Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Wed, 14 Feb 2018 13:34:39 +0100
Subject: [PATCH 0131/1678] tipc: apply bearer link tolerance on running links

Currently, the default link tolerance set in struct tipc_bearer only
has effect on links going up after that moment. I.e., a user has to
reset all the node's links across that bearer to have the new value
applied. This is too limiting and disturbing on a running cluster to
be useful.

We now change this so that also already existing links are updated
dynamically, without any need for a reset, when the bearer value is
changed. We leverage the already existing per-link functionality
for this to achieve the wanted effect.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c |  8 +++++---
 net/tipc/link.c   |  3 ++-
 net/tipc/node.c   | 24 ++++++++++++++++++++++++
 net/tipc/node.h   |  1 +
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index c8001471da6c..83d284feab1a 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -946,11 +946,11 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
 
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
-	int err;
-	char *name;
 	struct tipc_bearer *b;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
 	struct net *net = sock_net(skb->sk);
+	char *name;
+	int err;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -982,8 +982,10 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 			return err;
 		}
 
-		if (props[TIPC_NLA_PROP_TOL])
+		if (props[TIPC_NLA_PROP_TOL]) {
 			b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
+			tipc_node_apply_tolerance(net, b);
+		}
 		if (props[TIPC_NLA_PROP_PRIO])
 			b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 		if (props[TIPC_NLA_PROP_WIN])
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 2d6b2aed30e0..3c230466804d 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2126,7 +2126,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq)
 {
 	l->tolerance = tol;
-	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
+	if (link_is_up(l))
+		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
 }
 
 void tipc_link_set_prio(struct tipc_link *l, u32 prio,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9036d8756e73..389193d7cf67 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1618,6 +1618,30 @@ discard:
 	kfree_skb(skb);
 }
 
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+{
+	struct tipc_net *tn = tipc_net(net);
+	int bearer_id = b->identity;
+	struct sk_buff_head xmitq;
+	struct tipc_link_entry *e;
+	struct tipc_node *n;
+
+	__skb_queue_head_init(&xmitq);
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(n, &tn->node_list, list) {
+		tipc_node_write_lock(n);
+		e = &n->links[bearer_id];
+		if (e->link)
+			tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+		tipc_node_write_unlock(n);
+		tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
+	}
+
+	rcu_read_unlock();
+}
+
 int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = sock_net(skb->sk);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index acd58d23a70e..4ce5e3a185c0 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -65,6 +65,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 			  struct tipc_media_addr *maddr,
 			  bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
+void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
 			   char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,

From 9708fb630d19ee51ae3aeb3a533e3010da0e8570 Mon Sep 17 00:00:00 2001
From: Wadim Egorov <w.egorov@phytec.de>
Date: Wed, 14 Feb 2018 17:07:11 +0100
Subject: [PATCH 0132/1678] net: phy: dp83867: Add binding for the CLK_OUT pin
 muxing option

The DP83867 has a muxing option for the CLK_OUT pin. It is possible
to set CLK_OUT for different channels.
Create a binding to select a specific clock for CLK_OUT pin.

Signed-off-by: Wadim Egorov <w.egorov@phytec.de>
Signed-off-by: Daniel Schultz <d.schultz@phytec.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83867.c            | 19 +++++++++++++++++++
 include/dt-bindings/net/ti-dp83867.h | 14 ++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index ab58224f897f..b3935778b19f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -75,6 +75,8 @@
 
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX	0x0
 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN	0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK	(0x1f << 8)
+#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT	8
 
 /* CFG4 bits */
 #define DP83867_CFG4_PORT_MIRROR_EN              BIT(0)
@@ -92,6 +94,7 @@ struct dp83867_private {
 	int io_impedance;
 	int port_mirroring;
 	bool rxctrl_strap_quirk;
+	int clk_output_sel;
 };
 
 static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev)
 	dp83867->io_impedance = -EINVAL;
 
 	/* Optional configuration */
+	ret = of_property_read_u32(of_node, "ti,clk-output-sel",
+				   &dp83867->clk_output_sel);
+	if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
+		/* Keep the default value if ti,clk-output-sel is not set
+		 * or too high
+		 */
+		dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+
 	if (of_property_read_bool(of_node, "ti,max-output-impedance"))
 		dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
 	else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
@@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev)
 	if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP)
 		dp83867_config_port_mirroring(phydev);
 
+	/* Clock output selection if muxing property is set */
+	if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) {
+		val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG);
+		val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+		val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+		phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val);
+	}
+
 	return 0;
 }
 
diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 172744a72eb7..7b1656427cbe 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -42,4 +42,18 @@
 #define	DP83867_RGMIIDCTL_3_75_NS	0xe
 #define	DP83867_RGMIIDCTL_4_00_NS	0xf
 
+/* IO_MUX_CFG - Clock output selection */
+#define DP83867_CLK_O_SEL_CHN_A_RCLK		0x0
+#define DP83867_CLK_O_SEL_CHN_B_RCLK		0x1
+#define DP83867_CLK_O_SEL_CHN_C_RCLK		0x2
+#define DP83867_CLK_O_SEL_CHN_D_RCLK		0x3
+#define DP83867_CLK_O_SEL_CHN_A_RCLK_DIV5	0x4
+#define DP83867_CLK_O_SEL_CHN_B_RCLK_DIV5	0x5
+#define DP83867_CLK_O_SEL_CHN_C_RCLK_DIV5	0x6
+#define DP83867_CLK_O_SEL_CHN_D_RCLK_DIV5	0x7
+#define DP83867_CLK_O_SEL_CHN_A_TCLK		0x8
+#define DP83867_CLK_O_SEL_CHN_B_TCLK		0x9
+#define DP83867_CLK_O_SEL_CHN_C_TCLK		0xA
+#define DP83867_CLK_O_SEL_CHN_D_TCLK		0xB
+#define DP83867_CLK_O_SEL_REF_CLK		0xC
 #endif

From 0e47079b6cd7d26e2dab95398927b1184a9a733c Mon Sep 17 00:00:00 2001
From: Wadim Egorov <w.egorov@phytec.de>
Date: Wed, 14 Feb 2018 17:07:12 +0100
Subject: [PATCH 0133/1678] net: phy: dp83867: Add documentation for CLK_OUT
 pin muxing

Add documentation of ti,clk-output-sel which can be used to select
a specific clock for CLK_OUT.

Signed-off-by: Wadim Egorov <w.egorov@phytec.de>
Signed-off-by: Daniel Schultz <d.schultz@phytec.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,dp83867.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 02c4353b5cf2..9ef9338aaee1 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -25,6 +25,8 @@ Optional property:
 				    software needs to take when this pin is
 				    strapped in these modes. See data manual
 				    for details.
+	- ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
+				    for applicable values.
 
 Note: ti,min-output-impedance and ti,max-output-impedance are mutually
       exclusive. When both properties are present ti,max-output-impedance

From dff8baa261174de689a44572d0ea182d7aa70598 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Wed, 14 Feb 2018 09:22:42 -0800
Subject: [PATCH 0134/1678] kcm: Call strp_stop before strp_done in kcm_attach

In kcm_attach strp_done is called when sk_user_data is already
set to fail the attach. strp_done needs the strp to be stopped and
warns if it isn't. Call strp_stop in this case to eliminate the
warning message.

Reported-by: syzbot+88dfb55e4c8b770d86e3@syzkaller.appspotmail.com
Fixes: e5571240236c5652f ("kcm: Check if sk_user_data already set in kcm_attach"
Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index f297d53a11aa..435594648dac 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1417,6 +1417,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	 */
 	if (csk->sk_user_data) {
 		write_unlock_bh(&csk->sk_callback_lock);
+		strp_stop(&psock->strp);
 		strp_done(&psock->strp);
 		kmem_cache_free(kcm_psockp, psock);
 		return -EALREADY;

From 02d92f7903647119e125b24f5470f96cee0d4b4b Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 19 Jan 2018 16:13:01 -0800
Subject: [PATCH 0135/1678] net/mlx5: CQ Database per EQ

Before this patch the driver had one CQ database protected via one
spinlock, this spinlock is meant to synchronize between CQ
adding/removing and CQ IRQ interrupt handling.

On a system with large number of CPUs and on a work load that requires
lots of interrupts, this global spinlock becomes a very nasty hotspot
and introduces a contention between the active cores, which will
significantly hurt performance and becomes a bottleneck that prevents
seamless cpu scaling.

To solve this we simply move the CQ database and its spinlock to be per
EQ (IRQ), thus per core.

Tested with:
system: 2 sockets, 14 cores per socket, hyperthreading, 2x14x2=56 cores
netperf command: ./super_netperf 200 -P 0 -t TCP_RR  -H <server> -l 30 -- -r 300,300 -o -s 1M,1M -S 1M,1M

WITHOUT THIS PATCH:
Average:     CPU    %usr   %nice    %sys %iowait    %irq   %soft %steal  %guest  %gnice   %idle
Average:     all    4.32    0.00   36.15    0.09    0.00   34.02   0.00    0.00    0.00   25.41

Samples: 2M of event 'cycles:pp', Event count (approx.): 1554616897271
Overhead  Command          Shared Object                 Symbol
+   14.28%  swapper          [kernel.vmlinux]              [k] intel_idle
+   12.25%  swapper          [kernel.vmlinux]              [k] queued_spin_lock_slowpath
+   10.29%  netserver        [kernel.vmlinux]              [k] queued_spin_lock_slowpath
+    1.32%  netserver        [kernel.vmlinux]              [k] mlx5e_xmit

WITH THIS PATCH:
Average:     CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest  %gnice   %idle
Average:     all    4.27    0.00   34.31    0.01    0.00   18.71    0.00    0.00    0.00   42.69

Samples: 2M of event 'cycles:pp', Event count (approx.): 1498132937483
Overhead  Command          Shared Object             Symbol
+   23.33%  swapper          [kernel.vmlinux]          [k] intel_idle
+    1.69%  netserver        [kernel.vmlinux]          [k] mlx5e_xmit

Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c  | 69 +++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 10 ++-
 .../net/ethernet/mellanox/mlx5/core/main.c    |  8 +--
 include/linux/mlx5/cq.h                       |  3 +-
 include/linux/mlx5/driver.h                   | 22 +++---
 5 files changed, 62 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 1016e05c7ec7..dfbeeaa43276 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -86,10 +86,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
 }
 
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
+void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn)
 {
+	struct mlx5_cq_table *table = &eq->cq_table;
 	struct mlx5_core_cq *cq;
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
 
 	spin_lock(&table->lock);
 	cq = radix_tree_lookup(&table->tree, cqn);
@@ -98,7 +98,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
 	spin_unlock(&table->lock);
 
 	if (!cq) {
-		mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
+		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
 		return;
 	}
 
@@ -110,9 +110,9 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
 		complete(&cq->free);
 }
 
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
+void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	struct mlx5_cq_table *table = &eq->cq_table;
 	struct mlx5_core_cq *cq;
 
 	spin_lock(&table->lock);
@@ -124,7 +124,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
 	spin_unlock(&table->lock);
 
 	if (!cq) {
-		mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
+		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
 		return;
 	}
 
@@ -137,19 +137,22 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
 	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
 	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
 			   c_eqn);
-	struct mlx5_eq *eq;
+	struct mlx5_eq *eq, *async_eq;
+	struct mlx5_cq_table *table;
 	int err;
 
+	async_eq = &dev->priv.eq_table.async_eq;
 	eq = mlx5_eqn2eq(dev, eqn);
 	if (IS_ERR(eq))
 		return PTR_ERR(eq);
 
+	table = &eq->cq_table;
+
 	memset(out, 0, sizeof(out));
 	MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
@@ -159,6 +162,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cqn = MLX5_GET(create_cq_out, out, cqn);
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
+	cq->eq         = eq;
 	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
@@ -167,12 +171,20 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->tasklet_ctx.priv = &eq->tasklet_ctx;
 	INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
+	/* Add to comp EQ CQ tree to recv comp events */
 	spin_lock_irq(&table->lock);
 	err = radix_tree_insert(&table->tree, cq->cqn, cq);
 	spin_unlock_irq(&table->lock);
 	if (err)
 		goto err_cmd;
 
+	/* Add to async EQ CQ tree to recv Async events */
+	spin_lock_irq(&async_eq->cq_table.lock);
+	err = radix_tree_insert(&async_eq->cq_table.tree, cq->cqn, cq);
+	spin_unlock_irq(&async_eq->cq_table.lock);
+	if (err)
+		goto err_cq_table;
+
 	cq->pid = current->pid;
 	err = mlx5_debug_cq_add(dev, cq);
 	if (err)
@@ -183,6 +195,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 
 	return 0;
 
+err_cq_table:
+	spin_lock_irq(&table->lock);
+	radix_tree_delete(&table->tree, cq->cqn);
+	spin_unlock_irq(&table->lock);
 err_cmd:
 	memset(din, 0, sizeof(din));
 	memset(dout, 0, sizeof(dout));
@@ -195,21 +211,34 @@ EXPORT_SYMBOL(mlx5_core_create_cq);
 
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
+	struct mlx5_cq_table *asyn_eq_cq_table = &dev->priv.eq_table.async_eq.cq_table;
+	struct mlx5_cq_table *table = &cq->eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
 	struct mlx5_core_cq *tmp;
 	int err;
 
+	spin_lock_irq(&asyn_eq_cq_table->lock);
+	tmp = radix_tree_delete(&asyn_eq_cq_table->tree, cq->cqn);
+	spin_unlock_irq(&asyn_eq_cq_table->lock);
+	if (!tmp) {
+		mlx5_core_warn(dev, "cq 0x%x not found in async eq cq tree\n", cq->cqn);
+		return -EINVAL;
+	}
+	if (tmp != cq) {
+		mlx5_core_warn(dev, "corruption on cqn 0x%x in async eq cq tree\n", cq->cqn);
+		return -EINVAL;
+	}
+
 	spin_lock_irq(&table->lock);
 	tmp = radix_tree_delete(&table->tree, cq->cqn);
 	spin_unlock_irq(&table->lock);
 	if (!tmp) {
-		mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
+		mlx5_core_warn(dev, "cq 0x%x not found in comp eq cq tree\n", cq->cqn);
 		return -EINVAL;
 	}
 	if (tmp != cq) {
-		mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
+		mlx5_core_warn(dev, "corruption on cqn 0x%x in comp eq cq tree\n", cq->cqn);
 		return -EINVAL;
 	}
 
@@ -270,21 +299,3 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
 	return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
-
-int mlx5_init_cq_table(struct mlx5_core_dev *dev)
-{
-	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	int err;
-
-	memset(table, 0, sizeof(*table));
-	spin_lock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-	err = mlx5_cq_debugfs_init(dev);
-
-	return err;
-}
-
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
-{
-	mlx5_cq_debugfs_cleanup(dev);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 25106e996a96..328403ebf2f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -415,7 +415,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 		switch (eqe->type) {
 		case MLX5_EVENT_TYPE_COMP:
 			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-			mlx5_cq_completion(dev, cqn);
+			mlx5_cq_completion(eq, cqn);
 			break;
 		case MLX5_EVENT_TYPE_DCT_DRAINED:
 			rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
@@ -472,7 +472,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
 			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
 				       cqn, eqe->data.cq_err.syndrome);
-			mlx5_cq_event(dev, cqn, eqe->type);
+			mlx5_cq_event(eq, cqn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
@@ -567,6 +567,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
 		       enum mlx5_eq_type type)
 {
+	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
 	irq_handler_t handler;
@@ -576,6 +577,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	u32 *in;
 	int err;
 
+	/* Init CQ table */
+	memset(cq_table, 0, sizeof(*cq_table));
+	spin_lock_init(&cq_table->lock);
+	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+
 	eq->type = type;
 	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
 	eq->cons_index = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2ef641c91c26..8cc22bf80c87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -942,9 +942,9 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto out;
 	}
 
-	err = mlx5_init_cq_table(dev);
+	err = mlx5_cq_debugfs_init(dev);
 	if (err) {
-		dev_err(&pdev->dev, "failed to initialize cq table\n");
+		dev_err(&pdev->dev, "failed to initialize cq debugfs\n");
 		goto err_eq_cleanup;
 	}
 
@@ -1002,7 +1002,7 @@ err_tables_cleanup:
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
+	mlx5_cq_debugfs_cleanup(dev);
 
 err_eq_cleanup:
 	mlx5_eq_cleanup(dev);
@@ -1023,7 +1023,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
+	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_eq_cleanup(dev);
 }
 
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 48c181a2acc9..06ba425a6ad7 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -60,6 +60,7 @@ struct mlx5_core_cq {
 	} tasklet_ctx;
 	int			reset_notify_added;
 	struct list_head	reset_notify;
+	struct mlx5_eq		*eq;
 };
 
 
@@ -171,8 +172,6 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 	mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
 }
 
-int mlx5_init_cq_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6ed79a8a8318..96e003db2bcd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -375,8 +375,15 @@ struct mlx5_eq_pagefault {
 	mempool_t		*pool;
 };
 
+struct mlx5_cq_table {
+	/* protect radix tree */
+	spinlock_t		lock;
+	struct radix_tree_root	tree;
+};
+
 struct mlx5_eq {
 	struct mlx5_core_dev   *dev;
+	struct mlx5_cq_table	cq_table;
 	__be32 __iomem	       *doorbell;
 	u32			cons_index;
 	struct mlx5_buf		buf;
@@ -526,13 +533,6 @@ struct mlx5_core_health {
 	struct delayed_work		recover_work;
 };
 
-struct mlx5_cq_table {
-	/* protect radix tree
-	 */
-	spinlock_t		lock;
-	struct radix_tree_root	tree;
-};
-
 struct mlx5_qp_table {
 	/* protect radix tree
 	 */
@@ -654,10 +654,6 @@ struct mlx5_priv {
 	struct dentry	       *cmdif_debugfs;
 	/* end: qp staff */
 
-	/* start: cq staff */
-	struct mlx5_cq_table	cq_table;
-	/* end: cq staff */
-
 	/* start: mkey staff */
 	struct mlx5_mkey_table	mkey_table;
 	/* end: mkey staff */
@@ -1053,12 +1049,12 @@ int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
+void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
+void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
 		       enum mlx5_eq_type type);

From d2ff4fa575000058def5f5c602784e233211d4e7 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 7 Feb 2018 20:48:43 -0800
Subject: [PATCH 0136/1678] net/mlx5: Add missing likely/unlikely hints to cq
 events

If a hardware event is targeting a CQ, that CQ should exist.
Add unlikely to error handling flows.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index dfbeeaa43276..9feeb555e937 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -97,7 +97,7 @@ void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn)
 		refcount_inc(&cq->refcount);
 	spin_unlock(&table->lock);
 
-	if (!cq) {
+	if (unlikely(!cq)) {
 		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
 		return;
 	}
@@ -118,12 +118,12 @@ void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
 	spin_lock(&table->lock);
 
 	cq = radix_tree_lookup(&table->tree, cqn);
-	if (cq)
+	if (likely(cq))
 		refcount_inc(&cq->refcount);
 
 	spin_unlock(&table->lock);
 
-	if (!cq) {
+	if (unlikely(!cq)) {
 		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
 		return;
 	}

From d5c07157dd4f5ab9123eaab7db572ca360c19a55 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 1 Feb 2018 04:28:17 -0800
Subject: [PATCH 0137/1678] net/mlx5: EQ add/del CQ API

Add API to add/del CQ to/from EQs CQ table to be used in cq.c upon CQ
creation/destruction, as CQ table is now private to eq.c.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c  | 60 +++++--------------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 34 +++++++++++
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  4 ++
 3 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 9feeb555e937..f6e478d05ecc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -137,22 +137,17 @@ void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen)
 {
+	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
-	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-			   c_eqn);
-	struct mlx5_eq *eq, *async_eq;
-	struct mlx5_cq_table *table;
+	struct mlx5_eq *eq;
 	int err;
 
-	async_eq = &dev->priv.eq_table.async_eq;
 	eq = mlx5_eqn2eq(dev, eqn);
 	if (IS_ERR(eq))
 		return PTR_ERR(eq);
 
-	table = &eq->cq_table;
-
 	memset(out, 0, sizeof(out));
 	MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
@@ -172,18 +167,14 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	INIT_LIST_HEAD(&cq->tasklet_ctx.list);
 
 	/* Add to comp EQ CQ tree to recv comp events */
-	spin_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree, cq->cqn, cq);
-	spin_unlock_irq(&table->lock);
+	err = mlx5_eq_add_cq(eq, cq);
 	if (err)
 		goto err_cmd;
 
-	/* Add to async EQ CQ tree to recv Async events */
-	spin_lock_irq(&async_eq->cq_table.lock);
-	err = radix_tree_insert(&async_eq->cq_table.tree, cq->cqn, cq);
-	spin_unlock_irq(&async_eq->cq_table.lock);
+	/* Add to async EQ CQ tree to recv async events */
+	err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq);
 	if (err)
-		goto err_cq_table;
+		goto err_cq_add;
 
 	cq->pid = current->pid;
 	err = mlx5_debug_cq_add(dev, cq);
@@ -195,10 +186,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 
 	return 0;
 
-err_cq_table:
-	spin_lock_irq(&table->lock);
-	radix_tree_delete(&table->tree, cq->cqn);
-	spin_unlock_irq(&table->lock);
+err_cq_add:
+	mlx5_eq_del_cq(eq, cq);
 err_cmd:
 	memset(din, 0, sizeof(din));
 	memset(dout, 0, sizeof(dout));
@@ -211,36 +200,17 @@ EXPORT_SYMBOL(mlx5_core_create_cq);
 
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
-	struct mlx5_cq_table *asyn_eq_cq_table = &dev->priv.eq_table.async_eq.cq_table;
-	struct mlx5_cq_table *table = &cq->eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
-	struct mlx5_core_cq *tmp;
 	int err;
 
-	spin_lock_irq(&asyn_eq_cq_table->lock);
-	tmp = radix_tree_delete(&asyn_eq_cq_table->tree, cq->cqn);
-	spin_unlock_irq(&asyn_eq_cq_table->lock);
-	if (!tmp) {
-		mlx5_core_warn(dev, "cq 0x%x not found in async eq cq tree\n", cq->cqn);
-		return -EINVAL;
-	}
-	if (tmp != cq) {
-		mlx5_core_warn(dev, "corruption on cqn 0x%x in async eq cq tree\n", cq->cqn);
-		return -EINVAL;
-	}
+	err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq);
+	if (err)
+		return err;
 
-	spin_lock_irq(&table->lock);
-	tmp = radix_tree_delete(&table->tree, cq->cqn);
-	spin_unlock_irq(&table->lock);
-	if (!tmp) {
-		mlx5_core_warn(dev, "cq 0x%x not found in comp eq cq tree\n", cq->cqn);
-		return -EINVAL;
-	}
-	if (tmp != cq) {
-		mlx5_core_warn(dev, "corruption on cqn 0x%x in comp eq cq tree\n", cq->cqn);
-		return -EINVAL;
-	}
+	err = mlx5_eq_del_cq(cq->eq, cq);
+	if (err)
+		return err;
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 328403ebf2f5..c1f0468e95bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -704,6 +704,40 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 }
 EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
 
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	int err;
+
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree, cq->cqn, cq);
+	spin_unlock_irq(&table->lock);
+
+	return err;
+}
+
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	struct mlx5_core_cq *tmp;
+
+	spin_lock_irq(&table->lock);
+	tmp = radix_tree_delete(&table->tree, cq->cqn);
+	spin_unlock_irq(&table->lock);
+
+	if (!tmp) {
+		mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
+		return -ENOENT;
+	}
+
+	if (tmp != cq) {
+		mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int mlx5_eq_init(struct mlx5_core_dev *dev)
 {
 	int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 394552f36fcf..54a1cbfb1b5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,6 +38,7 @@
 #include <linux/sched.h>
 #include <linux/if_link.h>
 #include <linux/firmware.h>
+#include <linux/mlx5/cq.h>
 
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
@@ -115,6 +116,9 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
+
+int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);

From f105b45bf77ced96e516e1cd771c41bb7e8c830b Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 1 Feb 2018 03:32:00 -0800
Subject: [PATCH 0138/1678] net/mlx5: CQ hold/put API

Now as the CQ table is per EQ, add an API to hold/put CQ to be used from
eq.c in downstream patch.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c | 42 +++++++++-----------
 include/linux/mlx5/cq.h                      | 11 +++++
 2 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index f6e478d05ecc..06dc7bd302ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,8 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
 				 tasklet_ctx.list) {
 		list_del_init(&mcq->tasklet_ctx.list);
 		mcq->tasklet_ctx.comp(mcq);
-		if (refcount_dec_and_test(&mcq->refcount))
-			complete(&mcq->free);
+		mlx5_cq_put(mcq);
 		if (time_after(jiffies, end))
 			break;
 	}
@@ -80,23 +79,31 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 	 * still arrive.
 	 */
 	if (list_empty_careful(&cq->tasklet_ctx.list)) {
-		refcount_inc(&cq->refcount);
+		mlx5_cq_hold(cq);
 		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
 	}
 	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
 }
 
-void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn)
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 {
 	struct mlx5_cq_table *table = &eq->cq_table;
-	struct mlx5_core_cq *cq;
+	struct mlx5_core_cq *cq = NULL;
 
 	spin_lock(&table->lock);
 	cq = radix_tree_lookup(&table->tree, cqn);
 	if (likely(cq))
-		refcount_inc(&cq->refcount);
+		mlx5_cq_hold(cq);
 	spin_unlock(&table->lock);
 
+	return cq;
+}
+
+void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
 	if (unlikely(!cq)) {
 		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
 		return;
@@ -106,22 +113,12 @@ void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn)
 
 	cq->comp(cq);
 
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
+	mlx5_cq_put(cq);
 }
 
 void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
 {
-	struct mlx5_cq_table *table = &eq->cq_table;
-	struct mlx5_core_cq *cq;
-
-	spin_lock(&table->lock);
-
-	cq = radix_tree_lookup(&table->tree, cqn);
-	if (likely(cq))
-		refcount_inc(&cq->refcount);
-
-	spin_unlock(&table->lock);
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
 
 	if (unlikely(!cq)) {
 		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
@@ -130,8 +127,7 @@ void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
 
 	cq->event(cq, event_type);
 
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
+	mlx5_cq_put(cq);
 }
 
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
@@ -158,7 +154,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
 	cq->eq         = eq;
-	refcount_set(&cq->refcount, 1);
+	refcount_set(&cq->refcount, 0);
+	mlx5_cq_hold(cq);
 	init_completion(&cq->free);
 	if (!cq->comp)
 		cq->comp = mlx5_add_cq_to_tasklet;
@@ -221,8 +218,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 	synchronize_irq(cq->irqn);
 
 	mlx5_debug_cq_remove(dev, cq);
-	if (refcount_dec_and_test(&cq->refcount))
-		complete(&cq->free);
+	mlx5_cq_put(cq);
 	wait_for_completion(&cq->free);
 
 	return 0;
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 06ba425a6ad7..445ad194e0fe 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -172,6 +172,17 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 	mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
 }
 
+static inline void mlx5_cq_hold(struct mlx5_core_cq *cq)
+{
+	refcount_inc(&cq->refcount);
+}
+
+static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
+{
+	if (refcount_dec_and_test(&cq->refcount))
+		complete(&cq->free);
+}
+
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);

From 3ac7afdbcf243d6c79c1569d9e29aef0096e4743 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 1 Feb 2018 04:37:07 -0800
Subject: [PATCH 0139/1678] net/mlx5: Move CQ completion and event forwarding
 logic to eq.c

Since CQ tree is now per EQ, CQ completion and event forwarding became
specific implementation of EQ logic, this patch moves that logic to eq.c
and makes those functions static.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c | 45 ------------------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 49 +++++++++++++++++++-
 include/linux/mlx5/driver.h                  |  2 -
 3 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 06dc7bd302ed..669ed16938b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -85,51 +85,6 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
 	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
 }
 
-/* caller must eventually call mlx5_cq_put on the returned cq */
-static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
-{
-	struct mlx5_cq_table *table = &eq->cq_table;
-	struct mlx5_core_cq *cq = NULL;
-
-	spin_lock(&table->lock);
-	cq = radix_tree_lookup(&table->tree, cqn);
-	if (likely(cq))
-		mlx5_cq_hold(cq);
-	spin_unlock(&table->lock);
-
-	return cq;
-}
-
-void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn)
-{
-	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-	if (unlikely(!cq)) {
-		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	++cq->arm_sn;
-
-	cq->comp(cq);
-
-	mlx5_cq_put(cq);
-}
-
-void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
-{
-	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
-
-	if (unlikely(!cq)) {
-		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
-		return;
-	}
-
-	cq->event(cq, event_type);
-
-	mlx5_cq_put(cq);
-}
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c1f0468e95bd..7e442b38a8ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -393,6 +393,51 @@ static void general_event_handler(struct mlx5_core_dev *dev,
 	}
 }
 
+/* caller must eventually call mlx5_cq_put on the returned cq */
+static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_cq_table *table = &eq->cq_table;
+	struct mlx5_core_cq *cq = NULL;
+
+	spin_lock(&table->lock);
+	cq = radix_tree_lookup(&table->tree, cqn);
+	if (likely(cq))
+		mlx5_cq_hold(cq);
+	spin_unlock(&table->lock);
+
+	return cq;
+}
+
+static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	++cq->arm_sn;
+
+	cq->comp(cq);
+
+	mlx5_cq_put(cq);
+}
+
+static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
+{
+	struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
+
+	if (unlikely(!cq)) {
+		mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
+		return;
+	}
+
+	cq->event(cq, event_type);
+
+	mlx5_cq_put(cq);
+}
+
 static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 {
 	struct mlx5_eq *eq = eq_ptr;
@@ -415,7 +460,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 		switch (eqe->type) {
 		case MLX5_EVENT_TYPE_COMP:
 			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
-			mlx5_cq_completion(eq, cqn);
+			mlx5_eq_cq_completion(eq, cqn);
 			break;
 		case MLX5_EVENT_TYPE_DCT_DRAINED:
 			rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
@@ -472,7 +517,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
 			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
 				       cqn, eqe->data.cq_err.syndrome);
-			mlx5_cq_event(eq, cqn, eqe->type);
+			mlx5_eq_cq_event(eq, cqn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 96e003db2bcd..09e2f3e8753c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1049,12 +1049,10 @@ int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
-void mlx5_cq_completion(struct mlx5_eq *eq, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-void mlx5_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type);
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name,
 		       enum mlx5_eq_type type);

From 3ec5693b17314b58977ba3c8d720d1f9cfef39f8 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 1 Feb 2018 05:42:06 -0800
Subject: [PATCH 0140/1678] net/mlx5: Remove redundant EQ API exports

EQ structure and API is private to mlx5_core driver only, external
drivers should not have access or the means to manipulate EQ objects.

Remove redundant exports and move API functions out of the linux/mlx5
include directory into the driver's mlx5_core.h private include file.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Gal Pressman <galp@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c    |  3 ---
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 17 +++++++++++++++++
 include/linux/mlx5/driver.h                     | 17 -----------------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 7e442b38a8ca..c1c94974e16b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -720,7 +720,6 @@ err_buf:
 	mlx5_buf_free(dev, &eq->buf);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
 
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
@@ -747,7 +746,6 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
 
 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 {
@@ -925,4 +923,3 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
-EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 54a1cbfb1b5a..23e17ac0cba5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -117,11 +117,28 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 
+int mlx5_eq_init(struct mlx5_core_dev *dev);
+void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
+int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+		       int nent, u64 mask, const char *name,
+		       enum mlx5_eq_type type);
+int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
 int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq);
+int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+		       u32 *out, int outlen);
+int mlx5_start_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);
+void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
+int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
+int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
 			u8 access_reg_group);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 09e2f3e8753c..2860a253275b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1045,20 +1045,11 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
-int mlx5_eq_init(struct mlx5_core_dev *dev);
-void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
-void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced);
-int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
-		       int nent, u64 mask, const char *name,
-		       enum mlx5_eq_type type);
-int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_start_eqs(struct mlx5_core_dev *dev);
-void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 		    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1070,14 +1061,6 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
 			 u16 reg_num, int arg, int write);
 
-int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       u32 *out, int outlen);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
-void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
 		       int node);

From 388ca8be00370db132464e27f745b8a0add19fcb Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Tue, 2 Jan 2018 16:08:06 +0200
Subject: [PATCH 0141/1678] IB/mlx5: Implement fragmented completion queue (CQ)

The current implementation of create CQ requires contiguous
memory, such requirement is problematic once the memory is
fragmented or the system is low in memory, it causes for
failures in dma_zalloc_coherent().

This patch implements new scheme of fragmented CQ to overcome
this issue by introducing new type: 'struct mlx5_frag_buf_ctrl'
to allocate fragmented buffers, rather than contiguous ones.

Base the Completion Queues (CQs) on this new fragmented buffer.

It fixes following crashes:
kworker/29:0: page allocation failure: order:6, mode:0x80d0
CPU: 29 PID: 8374 Comm: kworker/29:0 Tainted: G OE 3.10.0
Workqueue: ib_cm cm_work_handler [ib_cm]
Call Trace:
[<>] dump_stack+0x19/0x1b
[<>] warn_alloc_failed+0x110/0x180
[<>] __alloc_pages_slowpath+0x6b7/0x725
[<>] __alloc_pages_nodemask+0x405/0x420
[<>] dma_generic_alloc_coherent+0x8f/0x140
[<>] x86_swiotlb_alloc_coherent+0x21/0x50
[<>] mlx5_dma_zalloc_coherent_node+0xad/0x110 [mlx5_core]
[<>] ? mlx5_db_alloc_node+0x69/0x1b0 [mlx5_core]
[<>] mlx5_buf_alloc_node+0x3e/0xa0 [mlx5_core]
[<>] mlx5_buf_alloc+0x14/0x20 [mlx5_core]
[<>] create_cq_kernel+0x90/0x1f0 [mlx5_ib]
[<>] mlx5_ib_create_cq+0x3b0/0x4e0 [mlx5_ib]

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cq.c               | 64 +++++++++++--------
 drivers/infiniband/hw/mlx5/mlx5_ib.h          |  6 +-
 .../net/ethernet/mellanox/mlx5/core/alloc.c   | 37 +++++++----
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 11 ++--
 drivers/net/ethernet/mellanox/mlx5/core/wq.c  | 18 ++----
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  | 22 ++-----
 include/linux/mlx5/driver.h                   | 51 +++++++++++----
 7 files changed, 124 insertions(+), 85 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 5b974fb97611..c4c7b82f4ac1 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -64,14 +64,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
 	}
 }
 
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
-	return mlx5_buf_offset(&buf->buf, n * size);
-}
-
 static void *get_cqe(struct mlx5_ib_cq *cq, int n)
 {
-	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+	return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
 }
 
 static u8 sw_ownership_bit(int n, int nent)
@@ -403,7 +398,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-	mlx5_buf_free(dev->mdev, &buf->buf);
+	mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -724,12 +719,25 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	return ret;
 }
 
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
-			int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+			     struct mlx5_ib_cq_buf *buf,
+			     int nent,
+			     int cqe_size)
 {
+	struct mlx5_frag_buf_ctrl *c = &buf->fbc;
+	struct mlx5_frag_buf *frag_buf = &c->frag_buf;
+	u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
 	int err;
 
-	err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+	MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
+	MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
+
+	mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
+
+	err = mlx5_frag_buf_alloc_node(dev->mdev,
+				       nent * cqe_size,
+				       frag_buf,
+				       dev->mdev->priv.numa_node);
 	if (err)
 		return err;
 
@@ -862,14 +870,15 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
 	ib_umem_release(cq->buf.umem);
 }
 
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
+			     struct mlx5_ib_cq_buf *buf)
 {
 	int i;
 	void *cqe;
 	struct mlx5_cqe64 *cqe64;
 
 	for (i = 0; i < buf->nent; i++) {
-		cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+		cqe = get_cqe(cq, i);
 		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
 		cqe64->op_own = MLX5_CQE_INVALID << 4;
 	}
@@ -891,14 +900,15 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	cq->mcq.arm_db     = cq->db.db + 1;
 	cq->mcq.cqe_sz = cqe_size;
 
-	err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+	err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
 	if (err)
 		goto err_db;
 
-	init_cq_buf(cq, &cq->buf);
+	init_cq_frag_buf(cq, &cq->buf);
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
+		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+		 cq->buf.fbc.frag_buf.npages;
 	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
@@ -906,11 +916,12 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
-	mlx5_fill_page_array(&cq->buf.buf, pas);
+	mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 	MLX5_SET(cqc, cqc, log_page_size,
-		 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+		 cq->buf.fbc.frag_buf.page_shift -
+		 MLX5_ADAPTER_PAGE_SHIFT);
 
 	*index = dev->mdev->priv.uar->index;
 
@@ -1207,11 +1218,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (!cq->resize_buf)
 		return -ENOMEM;
 
-	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+	err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
 	if (err)
 		goto ex;
 
-	init_cq_buf(cq, cq->resize_buf);
+	init_cq_frag_buf(cq, cq->resize_buf);
 
 	return 0;
 
@@ -1256,9 +1267,8 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
 	}
 
 	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
-		dcqe = get_cqe_from_buf(cq->resize_buf,
-					(i + 1) & (cq->resize_buf->nent),
-					dsize);
+		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+					     (i + 1) & cq->resize_buf->nent);
 		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
 		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
 		memcpy(dcqe, scqe, dsize);
@@ -1324,8 +1334,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		cqe_size = 64;
 		err = resize_kernel(dev, cq, entries, cqe_size);
 		if (!err) {
-			npas = cq->resize_buf->buf.npages;
-			page_shift = cq->resize_buf->buf.page_shift;
+			struct mlx5_frag_buf_ctrl *c;
+
+			c = &cq->resize_buf->fbc;
+			npas = c->frag_buf.npages;
+			page_shift = c->frag_buf.page_shift;
 		}
 	}
 
@@ -1346,7 +1359,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 				     pas, 0);
 	else
-		mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+		mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
+					  pas);
 
 	MLX5_SET(modify_cq_in, in,
 		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 139385129973..eafb9751daf6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -371,7 +371,7 @@ struct mlx5_ib_qp {
 		struct mlx5_ib_rss_qp rss_qp;
 		struct mlx5_ib_dct dct;
 	};
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 
 	struct mlx5_db		db;
 	struct mlx5_ib_wq	rq;
@@ -413,7 +413,7 @@ struct mlx5_ib_qp {
 };
 
 struct mlx5_ib_cq_buf {
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf_ctrl fbc;
 	struct ib_umem		*umem;
 	int			cqe_size;
 	int			nent;
@@ -495,7 +495,7 @@ struct mlx5_ib_wc {
 struct mlx5_ib_srq {
 	struct ib_srq		ibsrq;
 	struct mlx5_core_srq	msrq;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	struct mlx5_db		db;
 	u64		       *wrid;
 	/* protect SRQ hanlding
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 47239bf7bf43..323ffe8bf7e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -71,19 +71,24 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
 }
 
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_buf *buf, int node)
+			struct mlx5_frag_buf *buf, int node)
 {
 	dma_addr_t t;
 
 	buf->size = size;
 	buf->npages       = 1;
 	buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
-	buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
-							  &t, node);
-	if (!buf->direct.buf)
+
+	buf->frags = kzalloc(sizeof(*buf->frags), GFP_KERNEL);
+	if (!buf->frags)
 		return -ENOMEM;
 
-	buf->direct.map = t;
+	buf->frags->buf   = mlx5_dma_zalloc_coherent_node(dev, size,
+							  &t, node);
+	if (!buf->frags->buf)
+		goto err_out;
+
+	buf->frags->map = t;
 
 	while (t & ((1 << buf->page_shift) - 1)) {
 		--buf->page_shift;
@@ -91,18 +96,24 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 	}
 
 	return 0;
+err_out:
+	kfree(buf->frags);
+	return -ENOMEM;
 }
 
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+		   int size, struct mlx5_frag_buf *buf)
 {
 	return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
 }
-EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+EXPORT_SYMBOL(mlx5_buf_alloc);
 
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
-	dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
-			  buf->direct.map);
+	dma_free_coherent(&dev->pdev->dev, buf->size, buf->frags->buf,
+			  buf->frags->map);
+
+	kfree(buf->frags);
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
@@ -147,6 +158,7 @@ err_free_buf:
 err_out:
 	return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_alloc_node);
 
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 {
@@ -162,6 +174,7 @@ void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
 	}
 	kfree(buf->frags);
 }
+EXPORT_SYMBOL_GPL(mlx5_frag_buf_free);
 
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
 						 int node)
@@ -275,13 +288,13 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 }
 EXPORT_SYMBOL_GPL(mlx5_db_free);
 
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
 {
 	u64 addr;
 	int i;
 
 	for (i = 0; i < buf->npages; i++) {
-		addr = buf->direct.map + (i << buf->page_shift);
+		addr = buf->frags->map + (i << buf->page_shift);
 
 		pas[i] = cpu_to_be64(addr);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 0d4bb0688faa..80b84f6af2a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -52,7 +52,7 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
 static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
 				       void *data)
 {
-	u32 ci = cqcc & cq->wq.sz_m1;
+	u32 ci = cqcc & cq->wq.fbc.sz_m1;
 
 	memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
 }
@@ -74,9 +74,10 @@ static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
 
 static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
 {
-	u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
-	u32 wq_sz = 1 << cq->wq.log_sz;
-	u32 ci = cqcc & cq->wq.sz_m1;
+	struct mlx5_frag_buf_ctrl *fbc = &cq->wq.fbc;
+	u8 op_own = (cqcc >> fbc->log_sz) & 1;
+	u32 wq_sz = 1 << fbc->log_sz;
+	u32 ci = cqcc & fbc->sz_m1;
 	u32 ci_top = min_t(u32, wq_sz, ci + n);
 
 	for (; ci < ci_top; ci++, n--) {
@@ -101,7 +102,7 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
 	cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
 	cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
 	cq->title.op_own      &= 0xf0;
-	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.log_sz);
+	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.fbc.log_sz);
 	cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
 
 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 6bcfc25350f5..ea66448ba365 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -41,7 +41,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
-	return wq->sz_m1 + 1;
+	return wq->fbc.sz_m1 + 1;
 }
 
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
@@ -62,7 +62,7 @@ static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
 
 static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 {
-	return mlx5_cqwq_get_size(wq) << wq->log_stride;
+	return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
 }
 
 static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
@@ -92,7 +92,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->buf = wq_ctrl->buf.frags->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -130,7 +130,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->rq.buf = wq_ctrl->buf.direct.buf;
+	wq->rq.buf = wq_ctrl->buf.frags->buf;
 	wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -151,11 +151,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 {
 	int err;
 
-	wq->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
-	wq->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
-	wq->sz_m1	= (1 << wq->log_sz) - 1;
-	wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
-	wq->frag_sz_m1	= (1 << wq->log_frag_strides) - 1;
+	mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -172,7 +168,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->frag_buf = wq_ctrl->frag_buf;
+	wq->fbc.frag_buf = wq_ctrl->frag_buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -209,7 +205,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->buf = wq_ctrl->buf.frags->buf;
 	wq->db  = wq_ctrl->db.db;
 
 	for (i = 0; i < wq->sz_m1; i++) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 718589d0cec2..fca90b94596d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -45,7 +45,7 @@ struct mlx5_wq_param {
 
 struct mlx5_wq_ctrl {
 	struct mlx5_core_dev	*mdev;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	struct mlx5_db		db;
 };
 
@@ -68,14 +68,9 @@ struct mlx5_wq_qp {
 };
 
 struct mlx5_cqwq {
-	struct mlx5_frag_buf	frag_buf;
-	__be32			*db;
-	u32			sz_m1;
-	u32			frag_sz_m1;
-	u32			cc; /* consumer counter */
-	u8			log_sz;
-	u8			log_stride;
-	u8			log_frag_strides;
+	struct mlx5_frag_buf_ctrl fbc;
+	__be32			  *db;
+	u32			  cc; /* consumer counter */
 };
 
 struct mlx5_wq_ll {
@@ -131,20 +126,17 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 
 static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 {
-	return wq->cc & wq->sz_m1;
+	return wq->cc & wq->fbc.sz_m1;
 }
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-	unsigned int frag = (ix >> wq->log_frag_strides);
-
-	return wq->frag_buf.frags[frag].buf +
-		((wq->frag_sz_m1 & ix) << wq->log_stride);
+	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
 {
-	return wq->cc >> wq->log_sz;
+	return wq->cc >> wq->fbc.log_sz;
 }
 
 static inline void mlx5_cqwq_pop(struct mlx5_cqwq *wq)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2860a253275b..bfea26af6de5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -345,13 +345,6 @@ struct mlx5_buf_list {
 	dma_addr_t		map;
 };
 
-struct mlx5_buf {
-	struct mlx5_buf_list	direct;
-	int			npages;
-	int			size;
-	u8			page_shift;
-};
-
 struct mlx5_frag_buf {
 	struct mlx5_buf_list	*frags;
 	int			npages;
@@ -359,6 +352,15 @@ struct mlx5_frag_buf {
 	u8			page_shift;
 };
 
+struct mlx5_frag_buf_ctrl {
+	struct mlx5_frag_buf	frag_buf;
+	u32			sz_m1;
+	u32			frag_sz_m1;
+	u8			log_sz;
+	u8			log_stride;
+	u8			log_frag_strides;
+};
+
 struct mlx5_eq_tasklet {
 	struct list_head list;
 	struct list_head process_list;
@@ -386,7 +388,7 @@ struct mlx5_eq {
 	struct mlx5_cq_table	cq_table;
 	__be32 __iomem	       *doorbell;
 	u32			cons_index;
-	struct mlx5_buf		buf;
+	struct mlx5_frag_buf	buf;
 	int			size;
 	unsigned int		irqn;
 	u8			eqn;
@@ -932,9 +934,9 @@ struct mlx5_hca_vport_context {
 	bool			grh_required;
 };
 
-static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+static inline void *mlx5_buf_offset(struct mlx5_frag_buf *buf, int offset)
 {
-		return buf->direct.buf + offset;
+		return buf->frags->buf + offset;
 }
 
 #define STRUCT_FIELD(header, field) \
@@ -973,6 +975,25 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
+static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
+					      void *cqc)
+{
+	fbc->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
+	fbc->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
+	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
+	fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
+	fbc->frag_sz_m1	= (1 << fbc->log_frag_strides) - 1;
+}
+
+static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
+					  u32 ix)
+{
+	unsigned int frag = (ix >> fbc->log_frag_strides);
+
+	return fbc->frag_buf.frags[frag].buf +
+		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
@@ -998,9 +1019,10 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
-			struct mlx5_buf *buf, int node);
-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
-void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+			struct mlx5_frag_buf *buf, int node);
+int mlx5_buf_alloc(struct mlx5_core_dev *dev,
+		   int size, struct mlx5_frag_buf *buf);
+void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
 int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			     struct mlx5_frag_buf *buf, int node);
 void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
@@ -1045,7 +1067,8 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
-void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+
+void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas);
 void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);

From 95f87a9706d0a55ff02a652bc8f1b3f7d51bf5eb Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Wed, 14 Feb 2018 13:50:34 -0800
Subject: [PATCH 0142/1678] selftests/bpf: Print unexpected output on fail

This makes it easier to debug off-hand when the error message isn't
exactly as expected.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c0f16e93f9bd..6cf9bd6f08b7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11291,7 +11291,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			goto fail_log;
 		}
 		if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
-			printf("FAIL\nUnexpected error message!\n");
+			printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+			      expected_err, bpf_vlog);
 			goto fail_log;
 		}
 	}

From d0a0e4956f6c20754ef67db6dfb9746e85ecdcb5 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Wed, 14 Feb 2018 13:50:35 -0800
Subject: [PATCH 0143/1678] selftests/bpf: Count tests skipped by unpriv

When priviliged tests are skipped due to user rights, count the number of
skipped tests so it's more obvious that the test did not check everything.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6cf9bd6f08b7..7ab02526c403 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11378,7 +11378,7 @@ out:
 
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
-	int i, passes = 0, errors = 0;
+	int i, passes = 0, errors = 0, skips = 0;
 
 	for (i = from; i < to; i++) {
 		struct bpf_test *test = &tests[i];
@@ -11395,13 +11395,17 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 				set_admin(true);
 		}
 
-		if (!unpriv) {
+		if (unpriv) {
+			printf("#%d/p %s SKIP\n", i, test->descr);
+			skips++;
+		} else {
 			printf("#%d/p %s ", i, test->descr);
 			do_test_single(test, false, &passes, &errors);
 		}
 	}
 
-	printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
+	printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+	       skips, errors);
 	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
 }
 

From 0a67487403683852e05fea97208ad7b0ed820115 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Wed, 14 Feb 2018 13:50:36 -0800
Subject: [PATCH 0144/1678] selftests/bpf: Only run tests if !bpf_disabled

The "kernel.unprivileged_bpf_disabled" sysctl, if enabled, causes all
unprivileged tests to fail because it permanently disables unprivileged
BPF access for the currently running kernel. Skip the relevant tests if
the user attempts to run the testsuite with this sysctl enabled.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 26 ++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 7ab02526c403..2971ba2829ac 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -57,6 +57,9 @@
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
 
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -11376,6 +11379,17 @@ out:
 	return ret;
 }
 
+static void get_unpriv_disabled()
+{
+	char buf[2];
+	FILE *fd;
+
+	fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+	if (fgets(buf, 2, fd) == buf && atoi(buf))
+		unpriv_disabled = true;
+	fclose(fd);
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
 	int i, passes = 0, errors = 0, skips = 0;
@@ -11386,7 +11400,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 		/* Program types that are not supported by non-root we
 		 * skip right away.
 		 */
-		if (!test->prog_type) {
+		if (!test->prog_type && unpriv_disabled) {
+			printf("#%d/u %s SKIP\n", i, test->descr);
+			skips++;
+		} else if (!test->prog_type) {
 			if (!unpriv)
 				set_admin(false);
 			printf("#%d/u %s ", i, test->descr);
@@ -11433,6 +11450,13 @@ int main(int argc, char **argv)
 		}
 	}
 
+	get_unpriv_disabled();
+	if (unpriv && unpriv_disabled) {
+		printf("Cannot run as unprivileged user with sysctl %s.\n",
+		       UNPRIV_SYSCTL);
+		return EXIT_FAILURE;
+	}
+
 	setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
 	return do_test(unpriv, from, to);
 }

From 544bdebc6fcb68c2e8075bc2d3b68e39789d4165 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Wed, 14 Feb 2018 13:50:37 -0800
Subject: [PATCH 0145/1678] bpf: Remove unused callee_saved array

This array appears to be completely unused, remove it.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5fb69a85d967..3c74b163eaeb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -508,10 +508,6 @@ err:
 static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
-#define CALLEE_SAVED_REGS 5
-static const int callee_saved[CALLEE_SAVED_REGS] = {
-	BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
-};
 
 static void __mark_reg_not_init(struct bpf_reg_state *reg);
 

From d8d211a2a0c37755a8660dc69f97b7c70bf210b1 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 14 Feb 2018 16:39:56 +0300
Subject: [PATCH 0146/1678] net: Make extern and export get_net_ns()

This function will be used to obtain net of tun device.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 ++
 net/socket.c           | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9286a5a8c60c..1ce1f768a58c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -353,4 +353,6 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen
 			  unsigned int flags, struct timespec *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
 			  unsigned int vlen, unsigned int flags);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
diff --git a/net/socket.c b/net/socket.c
index d83e804d5e65..ab58e57c09ca 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -990,10 +990,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
-static struct ns_common *get_net_ns(struct ns_common *ns)
+struct ns_common *get_net_ns(struct ns_common *ns)
 {
 	return &get_net(container_of(ns, struct net, ns))->ns;
 }
+EXPORT_SYMBOL_GPL(get_net_ns);
 
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {

From 24dce0800baaa508b1a8ccf01ae0a9c8e600a5aa Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 14 Feb 2018 16:40:05 +0300
Subject: [PATCH 0147/1678] net: Export open_related_ns()

This function will be used to obtain net of tun device.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nsfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 36b0772701a0..60702d677bd4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -184,6 +184,7 @@ int open_related_ns(struct ns_common *ns,
 
 	return fd;
 }
+EXPORT_SYMBOL_GPL(open_related_ns);
 
 static long ns_ioctl(struct file *filp, unsigned int ioctl,
 			unsigned long arg)

From f2780d6d74756bc1d7ba32ff3dd0de4afd7c7e1e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 14 Feb 2018 16:40:14 +0300
Subject: [PATCH 0148/1678] tun: Add ioctl() SIOCGSKNS cmd to allow obtaining
 net ns of tun device

This patch adds possibility to get tun device's net namespace fd
in the same way we allow to do that for sockets.

Socket ioctl numbers do not intersect with tun-specific, and there
is already SIOCSIFHWADDR used in tun code. So, SIOCGSKNS number
is choosen instead of custom-made for this functionality.

Note, that open_related_ns() uses plain get_net_ns() and it's safe
(net can't be already dead at this moment):

  tun socket is allocated via sk_alloc() with zero last arg (kern = 0).
  So, each alive socket increments net::count, and the socket is definitely
  alive during ioctl syscall.

Also, common variable net is introduced, so small cleanup in TUNSETIFF
is made.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 81e6cc951e7f..8e9a0ac644d2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -78,6 +78,7 @@
 #include <linux/mutex.h>
 
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
@@ -2793,6 +2794,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
+	struct net *net;
 	kuid_t owner;
 	kgid_t group;
 	int sndbuf;
@@ -2801,7 +2803,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	int le;
 	int ret;
 
-	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
+	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
+	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
 		if (copy_from_user(&ifr, argp, ifreq_len))
 			return -EFAULT;
 	} else {
@@ -2821,6 +2824,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	rtnl_lock();
 
 	tun = tun_get(tfile);
+	net = sock_net(&tfile->sk);
 	if (cmd == TUNSETIFF) {
 		ret = -EEXIST;
 		if (tun)
@@ -2828,7 +2832,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
 		ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
-		ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
+		ret = tun_set_iff(net, file, &ifr);
 
 		if (ret)
 			goto unlock;
@@ -2850,6 +2854,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tfile->ifindex = ifindex;
 		goto unlock;
 	}
+	if (cmd == SIOCGSKNS) {
+		ret = -EPERM;
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			goto unlock;
+
+		ret = open_related_ns(&net->ns, get_net_ns);
+		goto unlock;
+	}
 
 	ret = -EBADFD;
 	if (!tun)

From f87c7f646c6f704b29504d63c4ab965584c4aacb Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:19 -0500
Subject: [PATCH 0149/1678] tools: tc-testing: Command line parms

Separate the functionality of the command line parameters into "selection"
parameters, "action" parameters and other parameters.

"Selection" parameters are for choosing which tests on which to act.
"Action" parameters are for choosing what to do with the selected tests.
"Other" parameters are for global effect (like "help" or "verbose").

With this commit, we add the ability to name a directory as another
selection mechanism.  We can accumulate a number of tests by directory,
file, category, or even by test id, instead of being constrained to
run all tests in one collection or just one test.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../creating-testcases/AddingTestCases.txt    |  35 ++-
 tools/testing/selftests/tc-testing/tdc.py     | 209 +++++++++++-------
 .../selftests/tc-testing/tdc_helper.py        |  15 +-
 3 files changed, 164 insertions(+), 95 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
index 00438331ba47..17b267dedbd9 100644
--- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -12,14 +12,18 @@ template.json for the required JSON format for test cases.
 Include the 'id' field, but do not assign a value. Running tdc with the -i
 option will generate a unique ID for that test case.
 
-tdc will recursively search the 'tc' subdirectory for .json files.  Any
-test case files you create in these directories will automatically be included.
-If you wish to store your custom test cases elsewhere, be sure to run tdc
-with the -f argument and the path to your file.
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files.  Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
 
-Be aware of required escape characters in the JSON data - particularly when
-defining the match pattern. Refer to the tctests.json file for examples when
-in doubt.
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt.  The match pattern is written in json, and
+will be used by python.  So the match pattern will be a python regular
+expression, but should be written using json syntax.
 
 
 TEST CASE STRUCTURE
@@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS
 If an error is detected during the setup/teardown process, execution of the
 tests will immediately stop with an error message and the namespace in which
 the tests are run will be destroyed. This is to prevent inaccurate results
-in the test cases.
+in the test cases.  tdc will output a series of TAP results for the skipped
+tests.
 
 Repeated failures of the setup/teardown may indicate a problem with the test
 case, or possibly even a bug in one of the commands that are not being tested.
@@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the
 individual command into a list, with the command being first, followed by all
 acceptable exit codes for the command.
 
+Example:
+
+A pair of setup commands.  The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 65536"
+        ],
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index fc373fdf2bdc..ef3a8881e458 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -209,20 +209,41 @@ def set_args(parser):
     """
     Set the command line arguments for tdc.
     """
-    parser.add_argument('-p', '--path', type=str,
-                        help='The full path to the tc executable to use')
-    parser.add_argument('-c', '--category', type=str, nargs='?', const='+c',
-                        help='Run tests only from the specified category, or if no category is specified, list known categories.')
-    parser.add_argument('-f', '--file', type=str,
-                        help='Run tests from the specified file')
-    parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY',
-                        help='List all test cases, or those only within the specified category')
-    parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID',
-                        help='Display the test case with specified id')
-    parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID',
-                        help='Execute the single test case with specified ID')
-    parser.add_argument('-i', '--id', action='store_true', dest='gen_id',
-                        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-p', '--path', type=str,
+        help='The full path to the tc executable to use')
+    sg = parser.add_argument_group(
+        'selection', 'select which test cases: ' +
+        'files plus directories; filtered by categories plus testids')
+    ag = parser.add_argument_group(
+        'action', 'select action to perform on selected test cases')
+
+    sg.add_argument(
+        '-D', '--directory', nargs='+', metavar='DIR',
+        help='Collect tests from the specified directory(ies) ' +
+        '(default [tc-tests])')
+    sg.add_argument(
+        '-f', '--file', nargs='+', metavar='FILE',
+        help='Run tests from the specified file(s)')
+    sg.add_argument(
+        '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+        help='Run tests only from the specified category/ies, ' +
+        'or if no category/ies is/are specified, list known categories.')
+    sg.add_argument(
+        '-e', '--execute', nargs='+', metavar='ID',
+        help='Execute the specified test cases with specified IDs')
+    ag.add_argument(
+        '-l', '--list', action='store_true',
+        help='List all test cases, or those only within the specified category')
+    ag.add_argument(
+        '-s', '--show', action='store_true', dest='showID',
+        help='Display the selected test cases')
+    ag.add_argument(
+        '-i', '--id', action='store_true', dest='gen_id',
+        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-v', '--verbose', action='count', default=0,
+        help='Show the commands that are being run')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
     return parser
@@ -257,7 +278,16 @@ def check_case_id(alltests):
     Check for duplicate test case IDs.
     """
     idl = get_id_list(alltests)
+    # print('check_case_id:  idl is {}'.format(idl))
+    # answer = list()
+    # for x in idl:
+    #     print('Looking at {}'.format(x))
+    #     print('what the heck is idl.count(x)???   {}'.format(idl.count(x)))
+    #     if idl.count(x) > 1:
+    #         answer.append(x)
+    #         print(' ... append it {}'.format(x))
     return [x for x in idl if idl.count(x) > 1]
+    return answer
 
 
 def does_id_exist(alltests, newid):
@@ -300,28 +330,96 @@ def generate_case_ids(alltests):
         json.dump(testlist, outfile, indent=4)
         outfile.close()
 
+def filter_tests_by_id(args, testlist):
+    '''
+    Remove tests from testlist that are not in the named id list.
+    If id list is empty, return empty list.
+    '''
+    newlist = list()
+    if testlist and args.execute:
+        target_ids = args.execute
+
+        if isinstance(target_ids, list) and (len(target_ids) > 0):
+            newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+    return newlist
+
+def filter_tests_by_category(args, testlist):
+    '''
+    Remove tests from testlist that are not in a named category.
+    '''
+    answer = list()
+    if args.category and testlist:
+        test_ids = list()
+        for catg in set(args.category):
+            if catg == '+c':
+                continue
+            print('considering category {}'.format(catg))
+            for tc in testlist:
+                if catg in tc['category'] and tc['id'] not in test_ids:
+                    answer.append(tc)
+                    test_ids.append(tc['id'])
+
+    return answer
 
 def get_test_cases(args):
     """
     If a test case file is specified, retrieve tests from that file.
     Otherwise, glob for all json files in subdirectories and load from
     each one.
+    Also, if requested, filter by category, and add tests matching
+    certain ids.
     """
     import fnmatch
-    if args.file != None:
-        if not os.path.isfile(args.file):
-            print("The specified test case file " + args.file + " does not exist.")
-            exit(1)
-        flist = [args.file]
-    else:
-        flist = []
-        for root, dirnames, filenames in os.walk('tc-tests'):
+
+    flist = []
+    testdirs = ['tc-tests']
+
+    if args.file:
+        # at least one file was specified - remove the default directory
+        testdirs = []
+
+        for ff in args.file:
+            if not os.path.isfile(ff):
+                print("IGNORING file " + ff + " \n\tBECAUSE does not exist.")
+            else:
+                flist.append(os.path.abspath(ff))
+
+    if args.directory:
+        testdirs = args.directory
+
+    for testdir in testdirs:
+        for root, dirnames, filenames in os.walk(testdir):
             for filename in fnmatch.filter(filenames, '*.json'):
-                flist.append(os.path.join(root, filename))
-    alltests = list()
+                candidate = os.path.abspath(os.path.join(root, filename))
+                if candidate not in testdirs:
+                    flist.append(candidate)
+
+    alltestcases = list()
     for casefile in flist:
-        alltests = alltests + (load_from_file(casefile))
-    return alltests
+        alltestcases = alltestcases + (load_from_file(casefile))
+
+    allcatlist = get_test_categories(alltestcases)
+    allidlist = get_id_list(alltestcases)
+
+    testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+    idtestcases = filter_tests_by_id(args, alltestcases)
+    cattestcases = filter_tests_by_category(args, alltestcases)
+
+    cat_ids = [x['id'] for x in cattestcases]
+    if args.execute:
+        if args.category:
+            alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+        else:
+            alltestcases = idtestcases
+    else:
+        if cat_ids:
+            alltestcases = cattestcases
+        else:
+            # just accept the existing value of alltestcases,
+            # which has been filtered by file/directory
+            pass
+
+    return allcatlist, allidlist, testcases_by_cats, alltestcases
 
 
 def set_operation_mode(args):
@@ -330,10 +428,9 @@ def set_operation_mode(args):
     what the script should do for this run, and call the appropriate
     function.
     """
-    alltests = get_test_cases(args)
+    ucat, idlist, testcases, alltests = get_test_cases(args)
 
     if args.gen_id:
-        idlist = get_id_list(alltests)
         if (has_blank_ids(idlist)):
             alltests = generate_case_ids(alltests)
         else:
@@ -347,42 +444,20 @@ def set_operation_mode(args):
         print("Please correct them before continuing.")
         exit(1)
 
-    ucat = get_test_categories(alltests)
-
     if args.showID:
-        show_test_case_by_id(alltests, args.showID[0])
+        for atest in alltests:
+            print_test_case(atest)
         exit(0)
 
-    if args.execute:
-        target_id = args.execute[0]
-    else:
-        target_id = ""
-
-    if args.category:
-        if (args.category == '+c'):
-            print("Available categories:")
-            print_sll(ucat)
-            exit(0)
-        else:
-            target_category = args.category
-    else:
-        target_category = ""
-
-
-    testcases = get_categorized_testlist(alltests, ucat)
+    if isinstance(args.category, list) and (len(args.category) == 0):
+        print("Available categories:")
+        print_sll(ucat)
+        exit(0)
 
     if args.list:
-        if (args.list == "++"):
+        if args.list:
             list_test_cases(alltests)
             exit(0)
-        elif(len(args.list) > 0):
-            if (args.list not in ucat):
-                print("Unknown category " + args.list)
-                print("Available categories:")
-                print_sll(ucat)
-                exit(1)
-            list_test_cases(testcases[args.list])
-            exit(0)
 
     if (os.geteuid() != 0):
         print("This script must be run with root privileges.\n")
@@ -390,24 +465,8 @@ def set_operation_mode(args):
 
     ns_create()
 
-    if (len(target_category) == 0):
-        if (len(target_id) > 0):
-            alltests = list(filter(lambda x: target_id in x['id'], alltests))
-            if (len(alltests) == 0):
-                print("Cannot find a test case with ID matching " + target_id)
-                exit(1)
-        catresults = test_runner(alltests, args)
-        print("All test results: " + "\n\n" + catresults)
-    elif (len(target_category) > 0):
-        if (target_category == "flower") and args.device == None:
-            print("Please specify a NIC device (-d) to run category flower")
-            exit(1)
-        if (target_category not in ucat):
-            print("Specified category is not present in this file.")
-            exit(1)
-        else:
-            catresults = test_runner(testcases[target_category], args)
-            print("Category " + target_category + "\n\n" + catresults)
+    catresults = test_runner(alltests, args)
+    print('All test results: \n\n{}'.format(catresults))
 
     ns_destroy()
 
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index db381120a566..9f35c96c88a0 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -57,20 +57,11 @@ def print_sll(items):
 
 def print_test_case(tcase):
     """ Pretty-printing of a given test case. """
+    print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
     for k in tcase.keys():
         if (isinstance(tcase[k], list)):
             print(k + ":")
             print_list(tcase[k])
         else:
-            print(k + ": " + tcase[k])
-
-
-def show_test_case_by_id(testlist, caseID):
-    """ Find the specified test case to pretty-print. """
-    if not any(d.get('id', None) == caseID for d in testlist):
-        print("That ID does not exist.")
-        exit(1)
-    else:
-        print_test_case(next((d for d in testlist if d['id'] == caseID)))
-
-
+            if not ((k == 'id') or (k == 'name')):
+                print(k + ": " + str(tcase[k]))

From 6fac733d9d07c4fcc349a44add75c6435cc3f18c Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:20 -0500
Subject: [PATCH 0150/1678] tools: tc-testing: Refactor test-runner

Split the test_runner function into the loop part (test_runner)
and the contents (run_one_test) for maintainability.
It makes it a little easier to catch exceptions
in an individual test, and keep going (and flush a bunch
of tap results for the skipped tests).

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 83 +++++++++++++++--------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index ef3a8881e458..a2624eda34db 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -85,9 +85,42 @@ def prepare_env(cmdlist):
             print("\nError message:")
             print(foutput)
             print("\nAborting test run.")
-            ns_destroy()
-            exit(1)
+            # ns_destroy()
+            raise Exception('prepare_env did not complete successfully')
 
+def run_one_test(index, tidx):
+    result = True
+    tresult = ""
+    tap = ""
+    print("Test " + tidx["id"] + ": " + tidx["name"])
+    prepare_env(tidx["setup"])
+    (p, procout) = exec_cmd(tidx["cmdUnderTest"])
+    exit_code = p.returncode
+
+    if (exit_code != int(tidx["expExitCode"])):
+        result = False
+        print("exit:", exit_code, int(tidx["expExitCode"]))
+        print(procout)
+    else:
+        match_pattern = re.compile(str(tidx["matchPattern"]),
+                                   re.DOTALL | re.MULTILINE)
+        (p, procout) = exec_cmd(tidx["verifyCmd"])
+        match_index = re.findall(match_pattern, procout)
+        if len(match_index) != int(tidx["matchCount"]):
+            result = False
+
+    if not result:
+        tresult += "not "
+    tresult += "ok {} - {} # {}\n".format(str(index), tidx['id'], tidx["name"])
+    tap += tresult
+
+    if result == False:
+        tap += procout
+
+    prepare_env(tidx["teardown"])
+    index += 1
+
+    return tap
 
 def test_runner(filtered_tests, args):
     """
@@ -104,38 +137,28 @@ def test_runner(filtered_tests, args):
     tap = str(index) + ".." + str(tcount) + "\n"
 
     for tidx in testlist:
-        result = True
-        tresult = ""
         if "flower" in tidx["category"] and args.device == None:
             continue
-        print("Test " + tidx["id"] + ": " + tidx["name"])
-        prepare_env(tidx["setup"])
-        (p, procout) = exec_cmd(tidx["cmdUnderTest"])
-        exit_code = p.returncode
-
-        if (exit_code != int(tidx["expExitCode"])):
-            result = False
-            print("exit:", exit_code, int(tidx["expExitCode"]))
-            print(procout)
-        else:
-            match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL)
-            (p, procout) = exec_cmd(tidx["verifyCmd"])
-            match_index = re.findall(match_pattern, procout)
-            if len(match_index) != int(tidx["matchCount"]):
-                result = False
-
-        if result == True:
-            tresult += "ok "
-        else:
-            tresult += "not ok "
-        tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n"
-
-        if result == False:
-            tap += procout
-
-        prepare_env(tidx["teardown"])
+        try:
+            badtest = tidx  # in case it goes bad
+            tap += run_one_test(index, tidx)
+        except Exception as ee:
+            print('Exception {} (caught in test_runner, running test {} {} {})'.
+                  format(ee, index, tidx['id'], tidx['name']))
+            break
         index += 1
 
+    count = index
+    tap += 'about to flush the tap output if tests need to be skipped\n'
+    if tcount + 1 != index:
+        for tidx in testlist[index - 1:]:
+            msg = 'skipped - previous setup or teardown failed'
+            tap += 'ok {} - {} # {} {} {} \n'.format(
+                count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+            count += 1
+
+    tap += 'done flushing skipped test tap output\n'
+
     return tap
 
 

From 93707cbabcc8baf2b2b5f4a99c1f08ee83eb7abd Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:21 -0500
Subject: [PATCH 0151/1678] tools: tc-testing: Introduce plugin architecture

This should be a general test architecture, and yet allow specific
tests to be done.  Introduce a plugin architecture.

An individual test has 4 stages, setup/execute/verify/teardown.  Each
plugin gets a chance to run a function at each stage, plus one call
before all the tests are called ("pre" suite) and one after all the
tests are called ("post" suite).  In addition, just before each
command is executed, the plugin gets a chance to modify the command
using the "adjust_command" hook.  This makes the test suite quite
flexible.

Future patches will take some functionality out of the tdc.py script and
place it in plugins.

To use the plugins, place the implementation in the plugins directory
and run tdc.py.  It will notice the plugins and use them.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/tc-testing/TdcPlugin.py |  74 ++++++
 .../creating-plugins/AddingPlugins.txt        | 104 +++++++++
 .../tc-testing/plugin-lib/README-PLUGINS      |  27 +++
 .../selftests/tc-testing/plugins/__init__.py  |   0
 tools/testing/selftests/tc-testing/tdc.py     | 221 +++++++++++++-----
 5 files changed, 368 insertions(+), 58 deletions(-)
 create mode 100644 tools/testing/selftests/tc-testing/TdcPlugin.py
 create mode 100644 tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
 create mode 100644 tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
 create mode 100644 tools/testing/selftests/tc-testing/plugins/__init__.py

diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000000..3ee9a6dacb52
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+    def __init__(self):
+        super().__init__()
+        print(' -- {}.__init__'.format(self.sub_class))
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        self.testcount = testcount
+        self.testidlist = testidlist
+        if self.args.verbose > 1:
+            print(' -- {}.pre_suite'.format(self.sub_class))
+
+    def post_suite(self, index):
+        '''run commands after test_runner completes the test loop
+        index is the last ordinal number of test that was attempted'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_suite'.format(self.sub_class))
+
+    def pre_case(self, test_ordinal, testid):
+        '''run commands before test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_case'.format(self.sub_class))
+        self.args.testid = testid
+        self.args.test_ordinal = test_ordinal
+
+    def post_case(self):
+        '''run commands after test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_case'.format(self.sub_class))
+
+    def pre_execute(self):
+        '''run command before test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_execute'.format(self.sub_class))
+
+    def post_execute(self):
+        '''run command after test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_execute'.format(self.sub_class))
+
+    def adjust_command(self, stage, command):
+        '''adjust the command'''
+        if self.args.verbose > 1:
+            print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+        # if stage == 'pre':
+        #     pass
+        # elif stage == 'setup':
+        #     pass
+        # elif stage == 'execute':
+        #     pass
+        # elif stage == 'verify':
+        #     pass
+        # elif stage == 'teardown':
+        #     pass
+        # elif stage == 'post':
+        #     pass
+        # else:
+        #     pass
+
+        return command
+
+    def add_args(self, parser):
+        '''Get the plugin args from the command line'''
+        self.argparser = parser
+        return self.argparser
+
+    def check_args(self, args, remaining):
+        '''Check that the args are set correctly'''
+        self.args = args
+        if self.args.verbose > 1:
+            print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000000..c18f88d09360
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+    pre        (the pre-suite stage)
+    prepare
+    execute
+    verify
+    teardown
+    post       (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+    def __init__(self)
+    def pre_suite(self, testcount, testidlist)     # see "PRE_SUITE" below
+    def post_suite(self, ordinal)                  # see "SKIPPING" below
+    def pre_case(self, test_ordinal, testid)       # see "PRE_CASE" below
+    def post_case(self)
+    def pre_execute(self)
+    def post_execute(self)
+    def adjust_command(self, stage, command)       # see "ADJUST" below
+    def add_args(self, parser)                     # see "ADD_ARGS" below
+    def check_args(self, args, remaining)          # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run).  This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped.  The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output.  The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted.  It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id.  Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed.  The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+    'pre'
+    'setup'
+    'command'
+    'verify'
+    'teardown'
+    'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it.  Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed.  If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg:  raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000000..aa8a2669702b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+    __init__.py
+    10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+    20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+    30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+    Check if the uid is root.  If not, bail out.
+
+valgrindPlugin
+    Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+    This plugin will write files to the cwd, called vgnd-xxx.log.  These will contain
+    the valgrind output for test xxx.  Any file matching the glob 'vgnd-*.log' will be
+    deleted at the end of the run.
+
+nsPlugin
+    Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a2624eda34db..3e6f9f2e1691 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -11,17 +11,91 @@ import re
 import os
 import sys
 import argparse
+import importlib
 import json
 import subprocess
+import time
 from collections import OrderedDict
 from string import Template
 
 from tdc_config import *
 from tdc_helper import *
 
+import TdcPlugin
 
 USE_NS = True
 
+class PluginMgr:
+    def __init__(self, argparser):
+        super().__init__()
+        self.plugins = {}
+        self.plugin_instances = []
+        self.args = []
+        self.argparser = argparser
+
+        # TODO, put plugins in order
+        plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+        for dirpath, dirnames, filenames in os.walk(plugindir):
+            for fn in filenames:
+                if (fn.endswith('.py') and
+                    not fn == '__init__.py' and
+                    not fn.startswith('#') and
+                    not fn.startswith('.#')):
+                    mn = fn[0:-3]
+                    foo = importlib.import_module('plugins.' + mn)
+                    self.plugins[mn] = foo
+                    self.plugin_instances.append(foo.SubPlugin())
+
+    def call_pre_suite(self, testcount, testidlist):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_suite(testcount, testidlist)
+
+    def call_post_suite(self, index):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_suite(index)
+
+    def call_pre_case(self, test_ordinal, testid):
+        for pgn_inst in self.plugin_instances:
+            try:
+                pgn_inst.pre_case(test_ordinal, testid)
+            except Exception as ee:
+                print('exception {} in call to pre_case for {} plugin'.
+                      format(ee, pgn_inst.__class__))
+                print('test_ordinal is {}'.format(test_ordinal))
+                print('testid is {}'.format(testid))
+                raise
+
+    def call_post_case(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_case()
+
+    def call_pre_execute(self):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_execute()
+
+    def call_post_execute(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_execute()
+
+    def call_add_args(self, parser):
+        for pgn_inst in self.plugin_instances:
+            parser = pgn_inst.add_args(parser)
+        return parser
+
+    def call_check_args(self, args, remaining):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.check_args(args, remaining)
+
+    def call_adjust_command(self, stage, command):
+        for pgn_inst in self.plugin_instances:
+            command = pgn_inst.adjust_command(stage, command)
+        return command
+
+    @staticmethod
+    def _make_argparser(args):
+        self.argparser = argparse.ArgumentParser(
+            description='Linux TC unit tests')
+
 
 def replace_keywords(cmd):
     """
@@ -33,21 +107,27 @@ def replace_keywords(cmd):
     return subcmd
 
 
-def exec_cmd(command, nsonly=True):
+def exec_cmd(args, pm, stage, command, nsonly=True):
     """
     Perform any required modifications on an executable command, then run
     it in a subprocess and return the results.
     """
+    if len(command.strip()) == 0:
+        return None, None
     if (USE_NS and nsonly):
         command = 'ip netns exec $NS ' + command
 
     if '$' in command:
         command = replace_keywords(command)
 
+    command = pm.call_adjust_command(stage, command)
+    if args.verbose > 0:
+        print('command "{}"'.format(command))
     proc = subprocess.Popen(command,
         shell=True,
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+        stderr=subprocess.PIPE,
+        env=ENVIR)
     (rawout, serr) = proc.communicate()
 
     if proc.returncode != 0 and len(serr) > 0:
@@ -60,69 +140,85 @@ def exec_cmd(command, nsonly=True):
     return proc, foutput
 
 
-def prepare_env(cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist):
     """
-    Execute the setup/teardown commands for a test case. Optionally
-    terminate test execution if the command fails.
+    Execute the setup/teardown commands for a test case.
+    Optionally terminate test execution if the command fails.
     """
+    if args.verbose > 0:
+        print('{}'.format(prefix))
     for cmdinfo in cmdlist:
-        if (type(cmdinfo) == list):
+        if isinstance(cmdinfo, list):
             exit_codes = cmdinfo[1:]
             cmd = cmdinfo[0]
         else:
             exit_codes = [0]
             cmd = cmdinfo
 
-        if (len(cmd) == 0):
+        if not cmd:
             continue
 
-        (proc, foutput) = exec_cmd(cmd)
+        (proc, foutput) = exec_cmd(args, pm, stage, cmd)
 
-        if proc.returncode not in exit_codes:
-            print
-            print("Could not execute:")
-            print(cmd)
-            print("\nError message:")
-            print(foutput)
-            print("\nAborting test run.")
-            # ns_destroy()
-            raise Exception('prepare_env did not complete successfully')
+        if proc and (proc.returncode not in exit_codes):
+            print('', file=sys.stderr)
+            print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+                  file=sys.stderr)
+            print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+                  file=sys.stderr)
+            print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+            print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+            print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+            raise Exception('"{}" did not complete successfully'.format(prefix))
 
-def run_one_test(index, tidx):
+def run_one_test(pm, args, index, tidx):
     result = True
     tresult = ""
     tap = ""
+    if args.verbose > 0:
+        print("\t====================\n=====> ", end="")
     print("Test " + tidx["id"] + ": " + tidx["name"])
-    prepare_env(tidx["setup"])
-    (p, procout) = exec_cmd(tidx["cmdUnderTest"])
+
+    pm.call_pre_case(index, tidx['id'])
+    prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+    if (args.verbose > 0):
+        print('-----> execute stage')
+    pm.call_pre_execute()
+    (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
     exit_code = p.returncode
+    pm.call_post_execute()
 
     if (exit_code != int(tidx["expExitCode"])):
         result = False
         print("exit:", exit_code, int(tidx["expExitCode"]))
         print(procout)
     else:
-        match_pattern = re.compile(str(tidx["matchPattern"]),
-                                   re.DOTALL | re.MULTILINE)
-        (p, procout) = exec_cmd(tidx["verifyCmd"])
+        if args.verbose > 0:
+            print('-----> verify stage')
+        match_pattern = re.compile(
+            str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+        (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
         match_index = re.findall(match_pattern, procout)
         if len(match_index) != int(tidx["matchCount"]):
             result = False
 
     if not result:
-        tresult += "not "
-    tresult += "ok {} - {} # {}\n".format(str(index), tidx['id'], tidx["name"])
+        tresult += 'not '
+    tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
     tap += tresult
 
     if result == False:
         tap += procout
 
-    prepare_env(tidx["teardown"])
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'])
+    pm.call_post_case()
+
     index += 1
 
     return tap
 
-def test_runner(filtered_tests, args):
+def test_runner(pm, args, filtered_tests):
     """
     Driver function for the unit tests.
 
@@ -135,63 +231,71 @@ def test_runner(filtered_tests, args):
     tcount = len(testlist)
     index = 1
     tap = str(index) + ".." + str(tcount) + "\n"
+    badtest = None
 
+    pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+
+    if args.verbose > 1:
+        print('Run tests here')
     for tidx in testlist:
         if "flower" in tidx["category"] and args.device == None:
             continue
         try:
             badtest = tidx  # in case it goes bad
-            tap += run_one_test(index, tidx)
+            tap += run_one_test(pm, args, index, tidx)
         except Exception as ee:
             print('Exception {} (caught in test_runner, running test {} {} {})'.
                   format(ee, index, tidx['id'], tidx['name']))
             break
         index += 1
 
+    # if we failed in setup or teardown,
+    # fill in the remaining tests with not ok
     count = index
     tap += 'about to flush the tap output if tests need to be skipped\n'
     if tcount + 1 != index:
         for tidx in testlist[index - 1:]:
             msg = 'skipped - previous setup or teardown failed'
-            tap += 'ok {} - {} # {} {} {} \n'.format(
+            tap += 'ok {} - {} # {} {} {}\n'.format(
                 count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
             count += 1
 
     tap += 'done flushing skipped test tap output\n'
+    pm.call_post_suite(index)
 
     return tap
 
 
-def ns_create():
+def ns_create(args, pm):
     """
     Create the network namespace in which the tests will be run and set up
     the required network devices for it.
     """
     if (USE_NS):
         cmd = 'ip netns add $NS'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
         cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
         cmd = 'ip link set $DEV1 netns $NS'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
         cmd = 'ip link set $DEV0 up'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
         cmd = 'ip -n $NS link set $DEV1 up'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
         cmd = 'ip link set $DEV2 netns $NS'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
         cmd = 'ip -n $NS link set $DEV2 up'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'pre', cmd, False)
 
 
-def ns_destroy():
+def ns_destroy(args, pm):
     """
     Destroy the network namespace for testing (and any associated network
     devices as well)
     """
     if (USE_NS):
         cmd = 'ip netns delete $NS'
-        exec_cmd(cmd, False)
+        exec_cmd(args, pm, 'post', cmd, False)
 
 
 def has_blank_ids(idlist):
@@ -272,10 +376,10 @@ def set_args(parser):
     return parser
 
 
-def check_default_settings(args):
+def check_default_settings(args, remaining, pm):
     """
-    Process any arguments overriding the default settings, and ensure the
-    settings are correct.
+    Process any arguments overriding the default settings,
+    and ensure the settings are correct.
     """
     # Allow for overriding specific settings
     global NAMES
@@ -288,6 +392,8 @@ def check_default_settings(args):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)
 
+    pm.call_check_args(args, remaining)
+
 
 def get_id_list(alltests):
     """
@@ -301,16 +407,7 @@ def check_case_id(alltests):
     Check for duplicate test case IDs.
     """
     idl = get_id_list(alltests)
-    # print('check_case_id:  idl is {}'.format(idl))
-    # answer = list()
-    # for x in idl:
-    #     print('Looking at {}'.format(x))
-    #     print('what the heck is idl.count(x)???   {}'.format(idl.count(x)))
-    #     if idl.count(x) > 1:
-    #         answer.append(x)
-    #         print(' ... append it {}'.format(x))
     return [x for x in idl if idl.count(x) > 1]
-    return answer
 
 
 def does_id_exist(alltests, newid):
@@ -403,7 +500,7 @@ def get_test_cases(args):
 
         for ff in args.file:
             if not os.path.isfile(ff):
-                print("IGNORING file " + ff + " \n\tBECAUSE does not exist.")
+                print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
             else:
                 flist.append(os.path.abspath(ff))
 
@@ -445,7 +542,7 @@ def get_test_cases(args):
     return allcatlist, allidlist, testcases_by_cats, alltestcases
 
 
-def set_operation_mode(args):
+def set_operation_mode(pm, args):
     """
     Load the test case data and process remaining arguments to determine
     what the script should do for this run, and call the appropriate
@@ -486,12 +583,15 @@ def set_operation_mode(args):
         print("This script must be run with root privileges.\n")
         exit(1)
 
-    ns_create()
+    ns_create(args, pm)
 
-    catresults = test_runner(alltests, args)
+    if len(alltests):
+        catresults = test_runner(pm, args, alltests)
+    else:
+        catresults = 'No tests found\n'
     print('All test results: \n\n{}'.format(catresults))
 
-    ns_destroy()
+    ns_destroy(args, pm)
 
 
 def main():
@@ -501,10 +601,15 @@ def main():
     """
     parser = args_parse()
     parser = set_args(parser)
+    pm = PluginMgr(parser)
+    parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
-    check_default_settings(args)
+    args.NAMES = NAMES
+    check_default_settings(args, remaining, pm)
+    if args.verbose > 2:
+        print('args is {}'.format(args))
 
-    set_operation_mode(args)
+    set_operation_mode(pm, args)
 
     exit(0)
 

From f6926e85eee9be08d05170af3a2266b8d7f9cdef Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:22 -0500
Subject: [PATCH 0152/1678] tools: tc-testing: rootPlugin

Move the functionality that checks for root permissions into a plugin.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/plugin-lib/rootPlugin.py       | 19 +++++++++++++++++++
 tools/testing/selftests/tc-testing/tdc.py     |  4 ----
 2 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000000..e36775bd4d12
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'root/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        # run commands before test_runner goes into a test loop
+        super().pre_suite(testcount, testidlist)
+
+        if os.geteuid():
+            print('This script must be run with root privileges', file=sys.stderr)
+            exit(1)
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 3e6f9f2e1691..a718d2b57739 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -579,10 +579,6 @@ def set_operation_mode(pm, args):
             list_test_cases(alltests)
             exit(0)
 
-    if (os.geteuid() != 0):
-        print("This script must be run with root privileges.\n")
-        exit(1)
-
     ns_create(args, pm)
 
     if len(alltests):

From a13fedbe56fef141aff7d584eba2a08daaf613cc Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:23 -0500
Subject: [PATCH 0153/1678] tools: tc-testing: nsPlugin

Move the functionality of creating a namespace before the test suite
and destroying it afterwards to a plugin.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/plugin-lib/nsPlugin.py         | 141 ++++++++++++++++++
 tools/testing/selftests/tc-testing/tdc.py     |  45 +-----
 2 files changed, 142 insertions(+), 44 deletions(-)
 create mode 100644 tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000000..a194b1af2b30
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'ns/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.namespace:
+            self._ns_create()
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        if self.args.verbose:
+            print('{}.post_suite'.format(self.sub_class))
+
+        if self.args.namespace:
+            self._ns_destroy()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'netns',
+            'options for nsPlugin(run commands in net namespace)')
+        self.argparser_group.add_argument(
+            '-n', '--namespace', action='store_true',
+            help='Run commands in namespace')
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.namespace:
+            return command
+
+        if self.args.verbose:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+        if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+            if self.args.verbose:
+                print('adjust_command:  stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+            cmdlist.insert(0, self.args.NAMES['NS'])
+            cmdlist.insert(0, 'exec')
+            cmdlist.insert(0, 'netns')
+            cmdlist.insert(0, 'ip')
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def _ns_create(self):
+        '''
+        Create the network namespace in which the tests will be run and set up
+        the required network devices for it.
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV0 up'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            if self.args.device:
+                cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+                cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+
+    def _ns_destroy(self):
+        '''
+        Destroy the network namespace for testing (and any associated network
+        devices as well)
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('post', cmd)
+
+    def _exec_cmd(self, stage, command):
+        '''
+        Perform any required modifications on an executable command, then run
+        it in a subprocess and return the results.
+        '''
+        if '$' in command:
+            command = self._replace_keywords(command)
+
+        self.adjust_command(stage, command)
+        if self.args.verbose:
+            print('_exec_cmd:  command "{}"'.format(command))
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
+
+    def _replace_keywords(self, cmd):
+        """
+        For a given executable command, substitute any known
+        variables contained within NAMES with the correct values
+        """
+        tcmd = Template(cmd)
+        subcmd = tcmd.safe_substitute(self.args.NAMES)
+        return subcmd
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a718d2b57739..b3754b9aa302 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -23,8 +23,6 @@ from tdc_helper import *
 
 import TdcPlugin
 
-USE_NS = True
-
 class PluginMgr:
     def __init__(self, argparser):
         super().__init__()
@@ -107,16 +105,13 @@ def replace_keywords(cmd):
     return subcmd
 
 
-def exec_cmd(args, pm, stage, command, nsonly=True):
+def exec_cmd(args, pm, stage, command):
     """
     Perform any required modifications on an executable command, then run
     it in a subprocess and return the results.
     """
     if len(command.strip()) == 0:
         return None, None
-    if (USE_NS and nsonly):
-        command = 'ip netns exec $NS ' + command
-
     if '$' in command:
         command = replace_keywords(command)
 
@@ -265,39 +260,6 @@ def test_runner(pm, args, filtered_tests):
 
     return tap
 
-
-def ns_create(args, pm):
-    """
-    Create the network namespace in which the tests will be run and set up
-    the required network devices for it.
-    """
-    if (USE_NS):
-        cmd = 'ip netns add $NS'
-        exec_cmd(args, pm, 'pre', cmd, False)
-        cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-        exec_cmd(args, pm, 'pre', cmd, False)
-        cmd = 'ip link set $DEV1 netns $NS'
-        exec_cmd(args, pm, 'pre', cmd, False)
-        cmd = 'ip link set $DEV0 up'
-        exec_cmd(args, pm, 'pre', cmd, False)
-        cmd = 'ip -n $NS link set $DEV1 up'
-        exec_cmd(args, pm, 'pre', cmd, False)
-        cmd = 'ip link set $DEV2 netns $NS'
-        exec_cmd(args, pm, 'pre', cmd, False)
-        cmd = 'ip -n $NS link set $DEV2 up'
-        exec_cmd(args, pm, 'pre', cmd, False)
-
-
-def ns_destroy(args, pm):
-    """
-    Destroy the network namespace for testing (and any associated network
-    devices as well)
-    """
-    if (USE_NS):
-        cmd = 'ip netns delete $NS'
-        exec_cmd(args, pm, 'post', cmd, False)
-
-
 def has_blank_ids(idlist):
     """
     Search the list for empty ID fields and return true/false accordingly.
@@ -579,17 +541,12 @@ def set_operation_mode(pm, args):
             list_test_cases(alltests)
             exit(0)
 
-    ns_create(args, pm)
-
     if len(alltests):
         catresults = test_runner(pm, args, alltests)
     else:
         catresults = 'No tests found\n'
     print('All test results: \n\n{}'.format(catresults))
 
-    ns_destroy(args, pm)
-
-
 def main():
     """
     Start of execution; set up argument parser and get the arguments,

From c25e4736867f73901de4fdb62c122ae2c6d91159 Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:24 -0500
Subject: [PATCH 0154/1678] tools: tc-testing: valgrindPlugin

Run the command under test under valgrind.  Produce an extra set of
tap output for the memory check on each test.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/plugin-lib/valgrindPlugin.py   | 142 ++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000000..477a7bd7d7fb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+    return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'valgrind/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+        if self.args.verbose > 1:
+            print('{}.pre_suite'.format(self.sub_class))
+        if self.args.valgrind:
+            self._add_to_tap('1..{}\n'.format(self.testcount))
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        self._add_to_tap('\n|---\n')
+        if self.args.verbose > 1:
+            print('{}.post_suite'.format(self.sub_class))
+        print('{}'.format(self.tap))
+        if self.args.verbose < 4:
+            subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'valgrind',
+            'options for valgrindPlugin (run command under test under Valgrind)')
+
+        self.argparser_group.add_argument(
+            '-V', '--valgrind', action='store_true',
+            help='Run commands under valgrind')
+
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.valgrind:
+            return command
+
+        if self.args.verbose > 1:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+
+        if stage == 'execute':
+            if self.args.verbose > 1:
+                print('adjust_command:  stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+                      format(stage, command, cmdlist))
+            cmdlist.insert(0, '--track-origins=yes')
+            cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+            cmdlist.insert(0, '--leak-check=full')
+            cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+            cmdlist.insert(0, '-v')  # ask for summary of non-leak errors
+            cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose > 1:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def post_execute(self):
+        if not self.args.valgrind:
+            return
+
+        self.definitely_lost_re = re.compile(
+            r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+        self.indirectly_lost_re = re.compile(
+            r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.possibly_lost_re = re.compile(
+            r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.non_leak_error_re = re.compile(
+            r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+        def_num = 0
+        ind_num = 0
+        pos_num = 0
+        nle_num = 0
+
+        # what about concurrent test runs?  Maybe force them to be in different directories?
+        with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+            content = vfd.read()
+            def_mo = self.definitely_lost_re.search(content)
+            ind_mo = self.indirectly_lost_re.search(content)
+            pos_mo = self.possibly_lost_re.search(content)
+            nle_mo = self.non_leak_error_re.search(content)
+
+            if def_mo:
+                def_num = int(def_mo.group(2))
+            if ind_mo:
+                ind_num = int(ind_mo.group(2))
+            if pos_mo:
+                pos_num = int(pos_mo.group(2))
+            if nle_mo:
+                nle_num = int(nle_mo.group(1))
+
+        mem_results = ''
+        if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+            mem_results += 'not '
+
+        mem_results += 'ok {} - {}-mem # {}\n'.format(
+            self.args.test_ordinal, self.args.testid, 'memory leak check')
+        self._add_to_tap(mem_results)
+        if mem_results.startswith('not '):
+            print('{}'.format(content))
+            self._add_to_tap(content)
+
+    def _add_to_tap(self, more_tap_output):
+        self.tap += more_tap_output

From 95ce14c3fff87ee43366c8cf84790f2e72167633 Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 14 Feb 2018 14:09:25 -0500
Subject: [PATCH 0155/1678] tools: tc-testing: Update README and TODO

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Acked-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/README   | 173 ++++++++++++++++++--
 tools/testing/selftests/tc-testing/TODO.txt |  25 ++-
 2 files changed, 179 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 970ff294fec8..3a0336782d2d 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -14,11 +14,11 @@ REQUIREMENTS
 
 *  The kernel must have network namespace support
 
-*   The kernel must have veth support available, as a veth pair is created
+*  The kernel must have veth support available, as a veth pair is created
    prior to running the tests.
 
-*  All tc-related features must be built in or available as modules.
-   To check what is required in current setup run:
+*  All tc-related features being tested must be built in or available as
+   modules.  To check what is required in current setup run:
    ./tdc.py -c
 
    Note:
@@ -44,10 +44,13 @@ using the -p option when running tdc:
 RUNNING TDC
 -----------
 
-To use tdc, root privileges are required. tdc will not run otherwise.
+To use tdc, root privileges are required.  This is because the
+commands being tested must be run as root.  The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
 
-All tests are executed inside a network namespace to prevent conflicts
-within the host.
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
 
 Running tdc without any arguments will run all tests. Refer to the section
 on command line arguments for more information, or run:
@@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following
 the failed test in the TAP output.
 
 
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future).  A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+  - setup
+  - execute
+  - verify
+  - teardown
+
+The setup and teardown stages can run zero or more commands.  The setup
+stage does some setup if the test needs it.  The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next.  These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command.  The execute stage
+tests the return code against one or more acceptable values.  The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
 USER-DEFINED CONSTANTS
 ----------------------
 
@@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require.
 Example:
 	$TC qdisc add dev $DEV1 ingress
 
+The NAMES values are used to substitute into the commands in the test cases.
+
 
 COMMAND LINE ARGUMENTS
 ----------------------
 
 Run tdc.py -h to see the full list of available arguments.
 
--p PATH           Specify the tc executable located at PATH to be used on this
-                  test run
--c                Show the available test case categories in this test file
--c CATEGORY       Run only tests that belong to CATEGORY
--f FILE           Read test cases from the JSON file named FILE
--l [CATEGORY]     List all test cases in the JSON file. If CATEGORY is
-                  specified, list test cases matching that category.
--s ID             Show the test case matching ID
--e ID             Execute the test case identified by ID
--i                Generate unique ID numbers for test cases with no existing
-                  ID number
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v]
+              [-d DEVICE] [-n NS] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p PATH, --path PATH  The full path to the tc executable to use
+  -v, --verbose         Show the commands that are being run
+  -d DEVICE, --device DEVICE
+                        Execute the test case in flower category
+
+selection:
+  select which test cases: files plus directories; filtered by categories
+  plus testids
+
+  -D DIR [DIR ...], --directory DIR [DIR ...]
+                        Collect tests from the specified directory(ies)
+                        (default [tc-tests])
+  -f FILE [FILE ...], --file FILE [FILE ...]
+                        Run tests from the specified file(s)
+  -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+                        Run tests only from the specified category/ies, or if
+                        no category/ies is/are specified, list known
+                        categories.
+  -e ID [ID ...], --execute ID [ID ...]
+                        Execute the specified test cases with specified IDs
+
+action:
+  select action to perform on selected test cases
+
+  -l, --list            List all test cases, or those only within the
+                        specified category
+  -s, --show            Display the selected test cases
+  -i, --id              Generate ID numbers for new test cases
+
+netns:
+  options for nsPlugin(run commands in net namespace)
+
+  -n NS, --namespace NS
+                        Run commands in namespace NS
+
+valgrind:
+  options for valgrindPlugin (run command under test under Valgrind)
+
+  -V, --valgrind        Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib.  The are executed from
+directory plugins.  Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx  1 bjb  bjb    27 Oct  4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx  1 bjb  bjb    25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x  1 bjb  bjb     0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them.  They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions.  These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed.  This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+  - pre  (pre-suite)
+  - setup
+  - command
+  - verify
+  - teardown
+  - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py.  To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins.  It will be detected at run time and invoked at the
+appropriate times.  There are a few examples in the plugin-lib
+directory:
+
+  - rootPlugin.py:
+      implements the enforcement of running as root
+  - nsPlugin.py:
+      sets up a network namespace and runs all commands in that namespace
+  - valgrindPlugin.py
+      runs each command in the execute stage under valgrind,
+      and checks for leaks.
+      This plugin will output an extra test for each test in the test file,
+      one is the existing output as to whether the test passed or failed,
+      and the other is a test whether the command leaked memory or not.
+      (This one is a preliminary version, it may not work quite right yet,
+      but the overall template is there and it should only need tweaks.)
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
index 6a266d811a78..c40698557e2f 100644
--- a/tools/testing/selftests/tc-testing/TODO.txt
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -5,6 +5,27 @@ tc Testing Suite To-Do list:
 
 - Add support for multiple versions of tc to run successively
 
-- Improve error messages when tdc aborts its run
+- Improve error messages when tdc aborts its run.  Partially done - still
+  need to better handle problems in pre- and post-suite.
 
-- Allow tdc to write its results to file
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+  Maybe use python logger module for this too.
+
+- A better implementation of the "hooks".  Currently, every plugin
+  will attempt to run a function at every hook point.  Could be
+  changed so that plugin __init__ methods will register functions to
+  be run in the various predefined times.  Then if a plugin does not
+  require action at a specific point, no penalty will be paid for
+  trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+  and a way to configure a test suite,
+  to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc

From 68e813aa43071377b698c662bc0214f2a833bcbb Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 14 Feb 2018 14:24:28 -0800
Subject: [PATCH 0156/1678] net/ipv4: Remove fib table id from rtable

Remove rt_table_id from rtable. It was added for getroute to return the
table id that was hit in the lookup. With the changes for fibmatch the
table id can be extracted from the fib_info returned in the fib_result
so it no longer needs to be in rtable directly.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c       | 1 -
 include/net/route.h     | 2 --
 net/ipv4/route.c        | 9 +--------
 net/ipv4/xfrm4_policy.c | 1 -
 4 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 139c61c8244a..239c78c53e58 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev)
 		return -ENOMEM;
 
 	rth->dst.output	= vrf_output;
-	rth->rt_table_id = vrf->tb_id;
 
 	rcu_assign_pointer(vrf->rth, rth);
 
diff --git a/include/net/route.h b/include/net/route.h
index 1eb9ce470e25..158833ea7988 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -65,8 +65,6 @@ struct rtable {
 	/* Miscellaneous cached information */
 	u32			rt_pmtu;
 
-	u32			rt_table_id;
-
 	struct list_head	rt_uncached;
 	struct uncached_list	*rt_uncached_list;
 };
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ce623e3e2ab..5ca7415cd48c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1509,7 +1509,6 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_pmtu = 0;
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
-		rt->rt_table_id = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
 
 		rt->dst.output = ip_output;
@@ -1727,8 +1726,6 @@ rt_cache:
 	}
 
 	rth->rt_is_input = 1;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
@@ -2001,8 +1998,6 @@ local_input:
 	rth->dst.tclassid = itag;
 #endif
 	rth->rt_is_input = 1;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res->type == RTN_UNREACHABLE) {
@@ -2231,8 +2226,6 @@ add:
 		return ERR_PTR(-ENOBUFS);
 
 	rth->rt_iif = orig_oif;
-	if (res->table)
-		rth->rt_table_id = res->table->tb_id;
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
@@ -2762,7 +2755,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		rt->rt_flags |= RTCF_NOTIFY;
 
 	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
-		table_id = rt->rt_table_id;
+		table_id = res.table ? res.table->tb_id : 0;
 
 	if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
 		if (!res.fi) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 753f526cf9db..796ac4115485 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
-	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 
 	return 0;

From ddd0010392d9cbcb95b53d11b7cafc67b373ab56 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Thu, 15 Feb 2018 09:19:26 +0900
Subject: [PATCH 0157/1678] selftests/net: fixes psock_fanout eBPF test case

eBPF test fails due to verifier failure because log_buf is too small.
Fixed by increasing log_buf size

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_fanout.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 989f917068d1..d4346b16b2c1 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -128,6 +128,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
 
 static void sock_fanout_set_ebpf(int fd)
 {
+	static char log_buf[65536];
+
 	const int len_off = __builtin_offsetof(struct __sk_buff, len);
 	struct bpf_insn prog[] = {
 		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
@@ -140,7 +142,6 @@ static void sock_fanout_set_ebpf(int fd)
 		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
 		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
 	};
-	char log_buf[512];
 	union bpf_attr attr;
 	int pfd;
 

From 27469b7352b5197cffa0e3dadb5f1127f055da27 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:42 +0100
Subject: [PATCH 0158/1678] tipc: remove redundant code in topology server

The socket handling in the topology server is unnecessarily generic.
It is prepared to handle both SOCK_RDM, SOCK_DGRAM and SOCK_STREAM
type sockets, as well as the only socket type which is really used,
SOCK_SEQPACKET.

We now remove this redundant code to make the code more readable.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/server.c | 36 +++++++-----------------------------
 net/tipc/server.h |  4 +---
 net/tipc/subscr.c |  4 +---
 3 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/net/tipc/server.c b/net/tipc/server.c
index df0c563c90cd..04a6dd99dd65 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -82,7 +82,6 @@ struct tipc_conn {
 struct outqueue_entry {
 	struct list_head list;
 	struct kvec iov;
-	struct sockaddr_tipc dest;
 };
 
 static void tipc_recv_work(struct work_struct *work);
@@ -93,7 +92,6 @@ static void tipc_conn_kref_release(struct kref *kref)
 {
 	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
 	struct tipc_server *s = con->server;
-	struct sockaddr_tipc *saddr = s->saddr;
 	struct socket *sock = con->sock;
 	struct sock *sk;
 
@@ -103,8 +101,6 @@ static void tipc_conn_kref_release(struct kref *kref)
 			__module_get(sock->ops->owner);
 			__module_get(sk->sk_prot_creator->owner);
 		}
-		saddr->scope = -TIPC_NODE_SCOPE;
-		kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
 		sock_release(sock);
 		con->sock = NULL;
 	}
@@ -325,36 +321,24 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
 {
 	struct tipc_server *s = con->server;
 	struct socket *sock = NULL;
+	int imp = TIPC_CRITICAL_IMPORTANCE;
 	int ret;
 
 	ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
 	if (ret < 0)
 		return NULL;
 	ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
-				(char *)&s->imp, sizeof(s->imp));
+				(char *)&imp, sizeof(imp));
 	if (ret < 0)
 		goto create_err;
 	ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
 	if (ret < 0)
 		goto create_err;
 
-	switch (s->type) {
-	case SOCK_STREAM:
-	case SOCK_SEQPACKET:
-		con->rx_action = tipc_accept_from_sock;
-
-		ret = kernel_listen(sock, 0);
-		if (ret < 0)
-			goto create_err;
-		break;
-	case SOCK_DGRAM:
-	case SOCK_RDM:
-		con->rx_action = tipc_receive_from_sock;
-		break;
-	default:
-		pr_err("Unknown socket type %d\n", s->type);
+	con->rx_action = tipc_accept_from_sock;
+	ret = kernel_listen(sock, 0);
+	if (ret < 0)
 		goto create_err;
-	}
 
 	/* As server's listening socket owner and creator is the same module,
 	 * we have to decrease TIPC module reference count to guarantee that
@@ -444,7 +428,7 @@ static void tipc_clean_outqueues(struct tipc_conn *con)
 }
 
 int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      struct sockaddr_tipc *addr, void *data, size_t len)
+		      void *data, size_t len)
 {
 	struct outqueue_entry *e;
 	struct tipc_conn *con;
@@ -464,9 +448,6 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
 		return -ENOMEM;
 	}
 
-	if (addr)
-		memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc));
-
 	spin_lock_bh(&con->outqueue_lock);
 	list_add_tail(&e->list, &con->outqueue);
 	spin_unlock_bh(&con->outqueue_lock);
@@ -575,10 +556,6 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 		if (con->sock) {
 			memset(&msg, 0, sizeof(msg));
 			msg.msg_flags = MSG_DONTWAIT;
-			if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
-				msg.msg_name = &e->dest;
-				msg.msg_namelen = sizeof(struct sockaddr_tipc);
-			}
 			ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
 					     e->iov.iov_len);
 			if (ret == -EWOULDBLOCK || ret == 0) {
@@ -591,6 +568,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 			evt = e->iov.iov_base;
 			tipc_send_kern_top_evt(s->net, evt);
 		}
+
 		/* Don't starve users filling buffers */
 		if (++count >= MAX_SEND_MSG_COUNT) {
 			cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 64df7513cd70..434736d545c2 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -79,12 +79,10 @@ struct tipc_server {
 				 void *buf, size_t len);
 	struct sockaddr_tipc *saddr;
 	char name[TIPC_SERVER_NAME_LEN];
-	int imp;
-	int type;
 };
 
 int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      struct sockaddr_tipc *addr, void *data, size_t len);
+		      void *data, size_t len);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 68e26470c516..eaef826fc06d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -81,7 +81,7 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
 	sub->evt.found_upper = htohl(found_upper, sub->swap);
 	sub->evt.port.ref = htohl(port_ref, sub->swap);
 	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_conn_sendmsg(tn->topsrv, subscriber->conid, NULL,
+	tipc_conn_sendmsg(tn->topsrv, subscriber->conid,
 			  msg_sect.iov_base, msg_sect.iov_len);
 }
 
@@ -375,8 +375,6 @@ int tipc_topsrv_start(struct net *net)
 	}
 	topsrv->net			= net;
 	topsrv->saddr			= saddr;
-	topsrv->imp			= TIPC_CRITICAL_IMPORTANCE;
-	topsrv->type			= SOCK_SEQPACKET;
 	topsrv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
 	topsrv->tipc_conn_recvmsg	= tipc_subscrb_rcv_cb;
 	topsrv->tipc_conn_new		= tipc_subscrb_connect_cb;

From c901d26d4a8137f3ad0e5865d331f7c63c42d9f9 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:43 +0100
Subject: [PATCH 0159/1678] tipc: remove unnecessary function pointers

Interaction between the functionality in server.c and subscr.c is
done via function pointers installed in struct server. This makes
the code harder to follow, and doesn't serve any obvious purpose.

Here, we replace the function pointers with direct function calls.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/server.c | 21 ++++++++++-----------
 net/tipc/server.h |  5 -----
 net/tipc/subscr.c | 27 ++++++---------------------
 net/tipc/subscr.h |  4 ++++
 4 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/net/tipc/server.c b/net/tipc/server.c
index 04a6dd99dd65..8aa2a33b1e48 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -33,6 +33,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "subscr.h"
 #include "server.h"
 #include "core.h"
 #include "socket.h"
@@ -182,7 +183,6 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
 
 static void tipc_close_conn(struct tipc_conn *con)
 {
-	struct tipc_server *s = con->server;
 	struct sock *sk = con->sock->sk;
 	bool disconnect = false;
 
@@ -191,7 +191,7 @@ static void tipc_close_conn(struct tipc_conn *con)
 	if (disconnect) {
 		sk->sk_user_data = NULL;
 		if (con->conid)
-			s->tipc_conn_release(con->conid, con->usr_data);
+			tipc_subscrb_delete(con->usr_data);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
 
@@ -240,7 +240,6 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
 {
 	struct tipc_server *s = con->server;
 	struct sock *sk = con->sock->sk;
-	struct sockaddr_tipc addr;
 	struct msghdr msg = {};
 	struct kvec iov;
 	void *buf;
@@ -254,7 +253,7 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
 
 	iov.iov_base = buf;
 	iov.iov_len = s->max_rcvbuf_size;
-	msg.msg_name = &addr;
+	msg.msg_name = NULL;
 	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
 	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
 	if (ret <= 0) {
@@ -264,8 +263,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	if (test_bit(CF_CONNECTED, &con->flags))
-		ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid,
-					   &addr, con->usr_data, buf, ret);
+		ret = tipc_subscrb_rcv(sock_net(con->sock->sk), con->conid,
+				       con->usr_data, buf, ret);
 	read_unlock_bh(&sk->sk_callback_lock);
 	kmem_cache_free(s->rcvbuf_cache, buf);
 	if (ret < 0)
@@ -284,7 +283,6 @@ out_close:
 
 static int tipc_accept_from_sock(struct tipc_conn *con)
 {
-	struct tipc_server *s = con->server;
 	struct socket *sock = con->sock;
 	struct socket *newsock;
 	struct tipc_conn *newcon;
@@ -305,7 +303,8 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
 	tipc_register_callbacks(newsock, newcon);
 
 	/* Notify that new connection is incoming */
-	newcon->usr_data = s->tipc_conn_new(newcon->conid);
+	newcon->usr_data = tipc_subscrb_create(newcon->conid);
+
 	if (!newcon->usr_data) {
 		sock_release(newsock);
 		conn_put(newcon);
@@ -489,7 +488,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 
 	*conid = con->conid;
 	s = con->server;
-	scbr = s->tipc_conn_new(*conid);
+	scbr = tipc_subscrb_create(*conid);
 	if (!scbr) {
 		conn_put(con);
 		return false;
@@ -497,7 +496,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 
 	con->usr_data = scbr;
 	con->sock = NULL;
-	s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
+	tipc_subscrb_rcv(net, *conid, scbr, &sub, sizeof(sub));
 	return true;
 }
 
@@ -513,7 +512,7 @@ void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
 	test_and_clear_bit(CF_CONNECTED, &con->flags);
 	srv = con->server;
 	if (con->conid)
-		srv->tipc_conn_release(con->conid, con->usr_data);
+		tipc_subscrb_delete(con->usr_data);
 	conn_put(con);
 	conn_put(con);
 }
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 434736d545c2..b4b83bdc8b20 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -72,11 +72,6 @@ struct tipc_server {
 	struct workqueue_struct *rcv_wq;
 	struct workqueue_struct *send_wq;
 	int max_rcvbuf_size;
-	void *(*tipc_conn_new)(int conid);
-	void (*tipc_conn_release)(int conid, void *usr_data);
-	int (*tipc_conn_recvmsg)(struct net *net, int conid,
-				 struct sockaddr_tipc *addr, void *usr_data,
-				 void *buf, size_t len);
 	struct sockaddr_tipc *saddr;
 	char name[TIPC_SERVER_NAME_LEN];
 };
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index eaef826fc06d..b86fbbf7a0b9 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -220,7 +220,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
 	spin_unlock_bh(&subscriber->lock);
 }
 
-static struct tipc_subscriber *tipc_subscrb_create(int conid)
+struct tipc_subscriber *tipc_subscrb_create(int conid)
 {
 	struct tipc_subscriber *subscriber;
 
@@ -237,7 +237,7 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
 	return subscriber;
 }
 
-static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
+void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
 {
 	tipc_subscrb_subscrp_delete(subscriber, NULL);
 	tipc_subscrb_put(subscriber);
@@ -315,16 +315,10 @@ static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
 	return 0;
 }
 
-/* Handle one termination request for the subscriber */
-static void tipc_subscrb_release_cb(int conid, void *usr_data)
-{
-	tipc_subscrb_delete((struct tipc_subscriber *)usr_data);
-}
-
-/* Handle one request to create a new subscription for the subscriber */
-static int tipc_subscrb_rcv_cb(struct net *net, int conid,
-			       struct sockaddr_tipc *addr, void *usr_data,
-			       void *buf, size_t len)
+/* Handle one request to create a new subscription for the subscriber
+ */
+int tipc_subscrb_rcv(struct net *net, int conid, void *usr_data,
+		     void *buf, size_t len)
 {
 	struct tipc_subscriber *subscriber = usr_data;
 	struct tipc_subscr *s = (struct tipc_subscr *)buf;
@@ -345,12 +339,6 @@ static int tipc_subscrb_rcv_cb(struct net *net, int conid,
 	return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
 }
 
-/* Handle one request to establish a new subscriber */
-static void *tipc_subscrb_connect_cb(int conid)
-{
-	return (void *)tipc_subscrb_create(conid);
-}
-
 int tipc_topsrv_start(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -376,9 +364,6 @@ int tipc_topsrv_start(struct net *net)
 	topsrv->net			= net;
 	topsrv->saddr			= saddr;
 	topsrv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
-	topsrv->tipc_conn_recvmsg	= tipc_subscrb_rcv_cb;
-	topsrv->tipc_conn_new		= tipc_subscrb_connect_cb;
-	topsrv->tipc_conn_release	= tipc_subscrb_release_cb;
 
 	strncpy(topsrv->name, name, strlen(name) + 1);
 	tn->topsrv = topsrv;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index f3edca775d9f..a736f29ba9ab 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -67,6 +67,10 @@ struct tipc_subscription {
 	struct tipc_event evt;
 };
 
+struct tipc_subscriber *tipc_subscrb_create(int conid);
+void tipc_subscrb_delete(struct tipc_subscriber *subscriber);
+int tipc_subscrb_rcv(struct net *net, int conid, void *usr_data,
+		     void *buf, size_t len);
 int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 			       u32 found_upper);
 void tipc_subscrp_report_overlap(struct tipc_subscription *sub,

From df79d040dcd7d7e580c50edf40b82e677fe84801 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:44 +0100
Subject: [PATCH 0160/1678] tipc: eliminate struct tipc_subscriber

It is unnecessary to keep two structures, struct tipc_conn and struct
tipc_subscriber, with a one-to-one relationship and still with different
life cycles. The fact that the two often run in different contexts, and
still may access each other via direct pointers constitutes an additional
hazard, something we have experienced at several occasions, and still
see happening.

We have identified at least two remaining problems that are easier to
fix if we simplify the topology server data structure somewhat.

- When there is a race between a subscription up/down event and a
  timeout event, it is fully possible that the former might be delivered
  after the latter, leading to confusion for the receiver.

- The function tipc_subcrp_timeout() is executing in interrupt context,
  while the following call chain is at least theoretically possible:
  tipc_subscrp_timeout()
    tipc_subscrp_send_event()
      tipc_conn_sendmsg()
        conn_put()
          tipc_conn_kref_release()
            sock_release(sock)

I.e., we end up calling a function that might try to sleep in
interrupt context. To eliminate this, we need to ensure that the
tipc_conn structure and the socket, as well as the subscription
instances, only are deleted in work queue context, i.e., after the
timeout event really has been sent out.

We now remove this unnecessary complexity, by merging data and
functionality of the subscriber structure into struct tipc_conn
and the associated file server.c. We thereafter add a spinlock and
a new 'inactive' state to the subscription structure. Using those,
both problems described above can be easily solved.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/server.c | 161 +++++++++++++++++++++++++++---------------
 net/tipc/server.h |   2 +-
 net/tipc/subscr.c | 173 ++++++++--------------------------------------
 net/tipc/subscr.h |  17 +++--
 4 files changed, 146 insertions(+), 207 deletions(-)

diff --git a/net/tipc/server.c b/net/tipc/server.c
index 8aa2a33b1e48..b8268c0882a7 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -2,6 +2,7 @@
  * net/tipc/server.c: TIPC server infrastructure
  *
  * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,12 +58,13 @@
  * @sock: socket handler associated with connection
  * @flags: indicates connection state
  * @server: pointer to connected server
+ * @sub_list: lsit to all pertaing subscriptions
+ * @sub_lock: lock protecting the subscription list
+ * @outqueue_lock: control access to the outqueue
  * @rwork: receive work item
- * @usr_data: user-specified field
  * @rx_action: what to do when connection socket is active
  * @outqueue: pointer to first outbound message in queue
  * @outqueue_lock: control access to the outqueue
- * @outqueue: list of connection objects for its server
  * @swork: send work item
  */
 struct tipc_conn {
@@ -71,9 +73,10 @@ struct tipc_conn {
 	struct socket *sock;
 	unsigned long flags;
 	struct tipc_server *server;
+	struct list_head sub_list;
+	spinlock_t sub_lock; /* for subscription list */
 	struct work_struct rwork;
 	int (*rx_action) (struct tipc_conn *con);
-	void *usr_data;
 	struct list_head outqueue;
 	spinlock_t outqueue_lock;
 	struct work_struct swork;
@@ -81,6 +84,7 @@ struct tipc_conn {
 
 /* An entry waiting to be sent */
 struct outqueue_entry {
+	u32 evt;
 	struct list_head list;
 	struct kvec iov;
 };
@@ -89,18 +93,33 @@ static void tipc_recv_work(struct work_struct *work);
 static void tipc_send_work(struct work_struct *work);
 static void tipc_clean_outqueues(struct tipc_conn *con);
 
+static bool connected(struct tipc_conn *con)
+{
+	return con && test_bit(CF_CONNECTED, &con->flags);
+}
+
+/**
+ * htohl - convert value to endianness used by destination
+ * @in: value to convert
+ * @swap: non-zero if endianness must be reversed
+ *
+ * Returns converted value
+ */
+static u32 htohl(u32 in, int swap)
+{
+	return swap ? swab32(in) : in;
+}
+
 static void tipc_conn_kref_release(struct kref *kref)
 {
 	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
 	struct tipc_server *s = con->server;
 	struct socket *sock = con->sock;
-	struct sock *sk;
 
 	if (sock) {
-		sk = sock->sk;
 		if (test_bit(CF_SERVER, &con->flags)) {
 			__module_get(sock->ops->owner);
-			__module_get(sk->sk_prot_creator->owner);
+			__module_get(sock->sk->sk_prot_creator->owner);
 		}
 		sock_release(sock);
 		con->sock = NULL;
@@ -129,11 +148,8 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
 
 	spin_lock_bh(&s->idr_lock);
 	con = idr_find(&s->conn_idr, conid);
-	if (con) {
-		if (!test_bit(CF_CONNECTED, &con->flags) ||
-		    !kref_get_unless_zero(&con->kref))
-			con = NULL;
-	}
+	if (!connected(con) || !kref_get_unless_zero(&con->kref))
+		con = NULL;
 	spin_unlock_bh(&s->idr_lock);
 	return con;
 }
@@ -144,7 +160,7 @@ static void sock_data_ready(struct sock *sk)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	con = sock2con(sk);
-	if (con && test_bit(CF_CONNECTED, &con->flags)) {
+	if (connected(con)) {
 		conn_get(con);
 		if (!queue_work(con->server->rcv_wq, &con->rwork))
 			conn_put(con);
@@ -158,7 +174,7 @@ static void sock_write_space(struct sock *sk)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	con = sock2con(sk);
-	if (con && test_bit(CF_CONNECTED, &con->flags)) {
+	if (connected(con)) {
 		conn_get(con);
 		if (!queue_work(con->server->send_wq, &con->swork))
 			conn_put(con);
@@ -181,6 +197,24 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
+/* tipc_con_delete_sub - delete a specific or all subscriptions
+ *  for a given subscriber
+ */
+static void tipc_con_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
+{
+	struct list_head *sub_list = &con->sub_list;
+	struct tipc_subscription *sub, *tmp;
+
+	spin_lock_bh(&con->sub_lock);
+	list_for_each_entry_safe(sub, tmp, sub_list, subscrp_list) {
+		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s)))
+			tipc_sub_delete(sub);
+		else if (s)
+			break;
+	}
+	spin_unlock_bh(&con->sub_lock);
+}
+
 static void tipc_close_conn(struct tipc_conn *con)
 {
 	struct sock *sk = con->sock->sk;
@@ -188,10 +222,11 @@ static void tipc_close_conn(struct tipc_conn *con)
 
 	write_lock_bh(&sk->sk_callback_lock);
 	disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags);
+
 	if (disconnect) {
 		sk->sk_user_data = NULL;
 		if (con->conid)
-			tipc_subscrb_delete(con->usr_data);
+			tipc_con_delete_sub(con, NULL);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
 
@@ -215,7 +250,9 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
 
 	kref_init(&con->kref);
 	INIT_LIST_HEAD(&con->outqueue);
+	INIT_LIST_HEAD(&con->sub_list);
 	spin_lock_init(&con->outqueue_lock);
+	spin_lock_init(&con->sub_lock);
 	INIT_WORK(&con->swork, tipc_send_work);
 	INIT_WORK(&con->rwork, tipc_recv_work);
 
@@ -236,6 +273,35 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
 	return con;
 }
 
+int tipc_con_rcv_sub(struct net *net, int conid, struct tipc_conn *con,
+		     void *buf, size_t len)
+{
+	struct tipc_subscr *s = (struct tipc_subscr *)buf;
+	struct tipc_subscription *sub;
+	bool status;
+	int swap;
+
+	/* Determine subscriber's endianness */
+	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
+			      TIPC_SUB_CANCEL));
+
+	/* Detect & process a subscription cancellation request */
+	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
+		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
+		tipc_con_delete_sub(con, s);
+		return 0;
+	}
+	status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
+	sub = tipc_subscrp_subscribe(net, s, conid, swap, status);
+	if (!sub)
+		return -1;
+
+	spin_lock_bh(&con->sub_lock);
+	list_add(&sub->subscrp_list, &con->sub_list);
+	spin_unlock_bh(&con->sub_lock);
+	return 0;
+}
+
 static int tipc_receive_from_sock(struct tipc_conn *con)
 {
 	struct tipc_server *s = con->server;
@@ -262,9 +328,7 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
 	}
 
 	read_lock_bh(&sk->sk_callback_lock);
-	if (test_bit(CF_CONNECTED, &con->flags))
-		ret = tipc_subscrb_rcv(sock_net(con->sock->sk), con->conid,
-				       con->usr_data, buf, ret);
+	ret = tipc_con_rcv_sub(s->net, con->conid, con, buf, ret);
 	read_unlock_bh(&sk->sk_callback_lock);
 	kmem_cache_free(s->rcvbuf_cache, buf);
 	if (ret < 0)
@@ -302,15 +366,6 @@ static int tipc_accept_from_sock(struct tipc_conn *con)
 	newcon->rx_action = tipc_receive_from_sock;
 	tipc_register_callbacks(newsock, newcon);
 
-	/* Notify that new connection is incoming */
-	newcon->usr_data = tipc_subscrb_create(newcon->conid);
-
-	if (!newcon->usr_data) {
-		sock_release(newsock);
-		conn_put(newcon);
-		return -ENOMEM;
-	}
-
 	/* Wake up receive process in case of 'SYN+' message */
 	newsock->sk->sk_data_ready(newsock->sk);
 	return ret;
@@ -427,7 +482,7 @@ static void tipc_clean_outqueues(struct tipc_conn *con)
 }
 
 int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      void *data, size_t len)
+		      u32 evt, void *data, size_t len)
 {
 	struct outqueue_entry *e;
 	struct tipc_conn *con;
@@ -436,7 +491,7 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
 	if (!con)
 		return -EINVAL;
 
-	if (!test_bit(CF_CONNECTED, &con->flags)) {
+	if (!connected(con)) {
 		conn_put(con);
 		return 0;
 	}
@@ -446,7 +501,7 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
 		conn_put(con);
 		return -ENOMEM;
 	}
-
+	e->evt = evt;
 	spin_lock_bh(&con->outqueue_lock);
 	list_add_tail(&e->list, &con->outqueue);
 	spin_unlock_bh(&con->outqueue_lock);
@@ -470,10 +525,9 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid)
 {
-	struct tipc_subscriber *scbr;
 	struct tipc_subscr sub;
-	struct tipc_server *s;
 	struct tipc_conn *con;
+	int rc;
 
 	sub.seq.type = type;
 	sub.seq.lower = lower;
@@ -487,32 +541,23 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 		return false;
 
 	*conid = con->conid;
-	s = con->server;
-	scbr = tipc_subscrb_create(*conid);
-	if (!scbr) {
-		conn_put(con);
-		return false;
-	}
-
-	con->usr_data = scbr;
 	con->sock = NULL;
-	tipc_subscrb_rcv(net, *conid, scbr, &sub, sizeof(sub));
-	return true;
+	rc = tipc_con_rcv_sub(net, *conid, con, &sub, sizeof(sub));
+	if (rc < 0)
+		tipc_close_conn(con);
+	return !rc;
 }
 
 void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
 {
 	struct tipc_conn *con;
-	struct tipc_server *srv;
 
 	con = tipc_conn_lookup(tipc_topsrv(net), conid);
 	if (!con)
 		return;
 
 	test_and_clear_bit(CF_CONNECTED, &con->flags);
-	srv = con->server;
-	if (con->conid)
-		tipc_subscrb_delete(con->usr_data);
+	tipc_con_delete_sub(con, NULL);
 	conn_put(con);
 	conn_put(con);
 }
@@ -537,7 +582,8 @@ static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
 
 static void tipc_send_to_sock(struct tipc_conn *con)
 {
-	struct tipc_server *s = con->server;
+	struct list_head *queue = &con->outqueue;
+	struct tipc_server *srv = con->server;
 	struct outqueue_entry *e;
 	struct tipc_event *evt;
 	struct msghdr msg;
@@ -545,16 +591,20 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 	int ret;
 
 	spin_lock_bh(&con->outqueue_lock);
-	while (test_bit(CF_CONNECTED, &con->flags)) {
-		e = list_entry(con->outqueue.next, struct outqueue_entry, list);
-		if ((struct list_head *) e == &con->outqueue)
-			break;
+
+	while (!list_empty(queue)) {
+		e = list_first_entry(queue, struct outqueue_entry, list);
 
 		spin_unlock_bh(&con->outqueue_lock);
 
+		if (e->evt == TIPC_SUBSCR_TIMEOUT) {
+			evt = (struct tipc_event *)e->iov.iov_base;
+			tipc_con_delete_sub(con, &evt->s);
+		}
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT;
+
 		if (con->sock) {
-			memset(&msg, 0, sizeof(msg));
-			msg.msg_flags = MSG_DONTWAIT;
 			ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
 					     e->iov.iov_len);
 			if (ret == -EWOULDBLOCK || ret == 0) {
@@ -565,7 +615,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 			}
 		} else {
 			evt = e->iov.iov_base;
-			tipc_send_kern_top_evt(s->net, evt);
+			tipc_send_kern_top_evt(srv->net, evt);
 		}
 
 		/* Don't starve users filling buffers */
@@ -573,7 +623,6 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 			cond_resched();
 			count = 0;
 		}
-
 		spin_lock_bh(&con->outqueue_lock);
 		list_del(&e->list);
 		tipc_free_entry(e);
@@ -591,7 +640,7 @@ static void tipc_recv_work(struct work_struct *work)
 	struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
 	int count = 0;
 
-	while (test_bit(CF_CONNECTED, &con->flags)) {
+	while (connected(con)) {
 		if (con->rx_action(con))
 			break;
 
@@ -608,7 +657,7 @@ static void tipc_send_work(struct work_struct *work)
 {
 	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
 
-	if (test_bit(CF_CONNECTED, &con->flags))
+	if (connected(con))
 		tipc_send_to_sock(con);
 
 	conn_put(con);
diff --git a/net/tipc/server.h b/net/tipc/server.h
index b4b83bdc8b20..fcc72321362d 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -77,7 +77,7 @@ struct tipc_server {
 };
 
 int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      void *data, size_t len);
+		      u32 evt, void *data, size_t len);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index b86fbbf7a0b9..c6de1452db69 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.c: TIPC network topology service
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -38,22 +38,6 @@
 #include "name_table.h"
 #include "subscr.h"
 
-/**
- * struct tipc_subscriber - TIPC network topology subscriber
- * @kref: reference counter to tipc_subscription object
- * @conid: connection identifier to server connecting to subscriber
- * @lock: control access to subscriber
- * @subscrp_list: list of subscription objects for this subscriber
- */
-struct tipc_subscriber {
-	struct kref kref;
-	int conid;
-	spinlock_t lock;
-	struct list_head subscrp_list;
-};
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-
 /**
  * htohl - convert value to endianness used by destination
  * @in: value to convert
@@ -71,9 +55,10 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
 				    u32 event, u32 port_ref, u32 node)
 {
 	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct tipc_subscriber *subscriber = sub->subscriber;
 	struct kvec msg_sect;
 
+	if (sub->inactive)
+		return;
 	msg_sect.iov_base = (void *)&sub->evt;
 	msg_sect.iov_len = sizeof(struct tipc_event);
 	sub->evt.event = htohl(event, sub->swap);
@@ -81,7 +66,7 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
 	sub->evt.found_upper = htohl(found_upper, sub->swap);
 	sub->evt.port.ref = htohl(port_ref, sub->swap);
 	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_conn_sendmsg(tn->topsrv, subscriber->conid,
+	tipc_conn_sendmsg(tn->topsrv, sub->conid, event,
 			  msg_sect.iov_base, msg_sect.iov_len);
 }
 
@@ -132,41 +117,22 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 		return;
 	if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
 		return;
-
-	tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
-				node);
+	spin_lock(&sub->lock);
+	tipc_subscrp_send_event(sub, found_lower, found_upper,
+				event, port_ref, node);
+	spin_unlock(&sub->lock);
 }
 
 static void tipc_subscrp_timeout(struct timer_list *t)
 {
 	struct tipc_subscription *sub = from_timer(sub, t, timer);
-	struct tipc_subscriber *subscriber = sub->subscriber;
+	struct tipc_subscr *s = &sub->evt.s;
 
-	spin_lock_bh(&subscriber->lock);
-	tipc_nametbl_unsubscribe(sub);
-	list_del(&sub->subscrp_list);
-	spin_unlock_bh(&subscriber->lock);
-
-	/* Notify subscriber of timeout */
-	tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
+	spin_lock(&sub->lock);
+	tipc_subscrp_send_event(sub, s->seq.lower, s->seq.upper,
 				TIPC_SUBSCR_TIMEOUT, 0, 0);
-
-	tipc_subscrp_put(sub);
-}
-
-static void tipc_subscrb_kref_release(struct kref *kref)
-{
-	kfree(container_of(kref,struct tipc_subscriber, kref));
-}
-
-static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
-{
-	kref_put(&subscriber->kref, tipc_subscrb_kref_release);
-}
-
-static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
-{
-	kref_get(&subscriber->kref);
+	sub->inactive = true;
+	spin_unlock(&sub->lock);
 }
 
 static void tipc_subscrp_kref_release(struct kref *kref)
@@ -175,11 +141,9 @@ static void tipc_subscrp_kref_release(struct kref *kref)
 						     struct tipc_subscription,
 						     kref);
 	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct tipc_subscriber *subscriber = sub->subscriber;
 
 	atomic_dec(&tn->subscription_count);
 	kfree(sub);
-	tipc_subscrb_put(subscriber);
 }
 
 void tipc_subscrp_put(struct tipc_subscription *subscription)
@@ -192,68 +156,9 @@ void tipc_subscrp_get(struct tipc_subscription *subscription)
 	kref_get(&subscription->kref);
 }
 
-/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
- * subscriptions for a given subscriber.
- */
-static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
-					struct tipc_subscr *s)
-{
-	struct list_head *subscription_list = &subscriber->subscrp_list;
-	struct tipc_subscription *sub, *temp;
-	u32 timeout;
-
-	spin_lock_bh(&subscriber->lock);
-	list_for_each_entry_safe(sub, temp, subscription_list,  subscrp_list) {
-		if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
-			continue;
-
-		timeout = htohl(sub->evt.s.timeout, sub->swap);
-		if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
-			tipc_nametbl_unsubscribe(sub);
-			list_del(&sub->subscrp_list);
-			tipc_subscrp_put(sub);
-		}
-
-		if (s)
-			break;
-	}
-	spin_unlock_bh(&subscriber->lock);
-}
-
-struct tipc_subscriber *tipc_subscrb_create(int conid)
-{
-	struct tipc_subscriber *subscriber;
-
-	subscriber = kzalloc(sizeof(*subscriber), GFP_ATOMIC);
-	if (!subscriber) {
-		pr_warn("Subscriber rejected, no memory\n");
-		return NULL;
-	}
-	INIT_LIST_HEAD(&subscriber->subscrp_list);
-	kref_init(&subscriber->kref);
-	subscriber->conid = conid;
-	spin_lock_init(&subscriber->lock);
-
-	return subscriber;
-}
-
-void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
-{
-	tipc_subscrb_subscrp_delete(subscriber, NULL);
-	tipc_subscrb_put(subscriber);
-}
-
-static void tipc_subscrp_cancel(struct tipc_subscr *s,
-				struct tipc_subscriber *subscriber)
-{
-	tipc_subscrb_get(subscriber);
-	tipc_subscrb_subscrp_delete(subscriber, s);
-	tipc_subscrb_put(subscriber);
-}
-
 static struct tipc_subscription *tipc_subscrp_create(struct net *net,
 						     struct tipc_subscr *s,
-						     int swap)
+						     int conid, bool swap)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_subscription *sub;
@@ -275,6 +180,8 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
 
 	/* Initialize subscription object */
 	sub->net = net;
+	sub->conid = conid;
+	sub->inactive = false;
 	if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
 	    (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
 		pr_warn("Subscription rejected, illegal request\n");
@@ -284,59 +191,39 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
 
 	sub->swap = swap;
 	memcpy(&sub->evt.s, s, sizeof(*s));
+	spin_lock_init(&sub->lock);
 	atomic_inc(&tn->subscription_count);
 	kref_init(&sub->kref);
 	return sub;
 }
 
-static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
-				  struct tipc_subscriber *subscriber, int swap,
-				  bool status)
+struct tipc_subscription *tipc_subscrp_subscribe(struct net *net,
+						 struct tipc_subscr *s,
+						 int conid, bool swap,
+						 bool status)
 {
 	struct tipc_subscription *sub = NULL;
 	u32 timeout;
 
-	sub = tipc_subscrp_create(net, s, swap);
+	sub = tipc_subscrp_create(net, s, conid, swap);
 	if (!sub)
-		return -1;
+		return NULL;
 
-	spin_lock_bh(&subscriber->lock);
-	list_add(&sub->subscrp_list, &subscriber->subscrp_list);
-	sub->subscriber = subscriber;
 	tipc_nametbl_subscribe(sub, status);
-	tipc_subscrb_get(subscriber);
-	spin_unlock_bh(&subscriber->lock);
-
 	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
 	timeout = htohl(sub->evt.s.timeout, swap);
-
 	if (timeout != TIPC_WAIT_FOREVER)
 		mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
-	return 0;
+	return sub;
 }
 
-/* Handle one request to create a new subscription for the subscriber
- */
-int tipc_subscrb_rcv(struct net *net, int conid, void *usr_data,
-		     void *buf, size_t len)
+void tipc_sub_delete(struct tipc_subscription *sub)
 {
-	struct tipc_subscriber *subscriber = usr_data;
-	struct tipc_subscr *s = (struct tipc_subscr *)buf;
-	bool status;
-	int swap;
-
-	/* Determine subscriber's endianness */
-	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
-			      TIPC_SUB_CANCEL));
-
-	/* Detect & process a subscription cancellation request */
-	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
-		tipc_subscrp_cancel(s, subscriber);
-		return 0;
-	}
-	status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
-	return tipc_subscrp_subscribe(net, s, subscriber, swap, status);
+	tipc_nametbl_unsubscribe(sub);
+	if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
+		del_timer_sync(&sub->timer);
+	list_del(&sub->subscrp_list);
+	tipc_subscrp_put(sub);
 }
 
 int tipc_topsrv_start(struct net *net)
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index a736f29ba9ab..cfd0cb3a1da8 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -43,7 +43,7 @@
 #define TIPC_MAX_PUBLICATIONS	65535
 
 struct tipc_subscription;
-struct tipc_subscriber;
+struct tipc_conn;
 
 /**
  * struct tipc_subscription - TIPC network topology subscription object
@@ -58,19 +58,22 @@ struct tipc_subscriber;
  */
 struct tipc_subscription {
 	struct kref kref;
-	struct tipc_subscriber *subscriber;
 	struct net *net;
 	struct timer_list timer;
 	struct list_head nameseq_list;
 	struct list_head subscrp_list;
-	int swap;
 	struct tipc_event evt;
+	int conid;
+	bool swap;
+	bool inactive;
+	spinlock_t lock; /* serialize up/down and timer events */
 };
 
-struct tipc_subscriber *tipc_subscrb_create(int conid);
-void tipc_subscrb_delete(struct tipc_subscriber *subscriber);
-int tipc_subscrb_rcv(struct net *net, int conid, void *usr_data,
-		     void *buf, size_t len);
+struct tipc_subscription *tipc_subscrp_subscribe(struct net *net,
+						 struct tipc_subscr *s,
+						 int conid, bool swap,
+						 bool status);
+void tipc_sub_delete(struct tipc_subscription *sub);
 int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 			       u32 found_upper);
 void tipc_subscrp_report_overlap(struct tipc_subscription *sub,

From 414574a0af36d329f560f542e650cc4a81cc1d69 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:45 +0100
Subject: [PATCH 0161/1678] tipc: simplify interaction between subscription and
 topology connection

The message transmission and reception in the topology server is more
generic than is currently necessary. By basing the funtionality on the
fact that we only send items of type struct tipc_event and always
receive items of struct tipc_subcr we can make several simplifications,
and also get rid of some unnecessary dynamic memory allocations.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c |  10 ++-
 net/tipc/server.c     | 170 ++++++++++++++----------------------------
 net/tipc/server.h     |  12 +--
 net/tipc/subscr.c     |  40 +++++-----
 net/tipc/subscr.h     |   5 +-
 5 files changed, 88 insertions(+), 149 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ed0457cc99d6..c0ca7be46f7a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -810,14 +810,15 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
  */
 void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
 {
-	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
+	struct tipc_server *srv = s->server;
+	struct tipc_net *tn = tipc_net(srv->net);
 	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
 	int index = hash(type);
 	struct name_seq *seq;
 	struct tipc_name_seq ns;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(s->net, type);
+	seq = nametbl_find_seq(srv->net, type);
 	if (!seq)
 		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (seq) {
@@ -837,12 +838,13 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
  */
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
 {
-	struct tipc_net *tn = net_generic(s->net, tipc_net_id);
+	struct tipc_server *srv = s->server;
+	struct tipc_net *tn = tipc_net(srv->net);
 	struct name_seq *seq;
 	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(s->net, type);
+	seq = nametbl_find_seq(srv->net, type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&s->nameseq_list);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index b8268c0882a7..7933fb92d852 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -84,9 +84,9 @@ struct tipc_conn {
 
 /* An entry waiting to be sent */
 struct outqueue_entry {
-	u32 evt;
+	bool inactive;
+	struct tipc_event evt;
 	struct list_head list;
-	struct kvec iov;
 };
 
 static void tipc_recv_work(struct work_struct *work);
@@ -154,6 +154,9 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
 	return con;
 }
 
+/* sock_data_ready - interrupt callback indicating the socket has data to read
+ * The queued job is launched in tipc_recv_from_sock()
+ */
 static void sock_data_ready(struct sock *sk)
 {
 	struct tipc_conn *con;
@@ -168,6 +171,10 @@ static void sock_data_ready(struct sock *sk)
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
+/* sock_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more is space in the send buffer
+ * The queued job is launched in tipc_send_to_sock()
+ */
 static void sock_write_space(struct sock *sk)
 {
 	struct tipc_conn *con;
@@ -273,10 +280,10 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
 	return con;
 }
 
-int tipc_con_rcv_sub(struct net *net, int conid, struct tipc_conn *con,
-		     void *buf, size_t len)
+static int tipc_con_rcv_sub(struct tipc_server *srv,
+			    struct tipc_conn *con,
+			    struct tipc_subscr *s)
 {
-	struct tipc_subscr *s = (struct tipc_subscr *)buf;
 	struct tipc_subscription *sub;
 	bool status;
 	int swap;
@@ -292,7 +299,7 @@ int tipc_con_rcv_sub(struct net *net, int conid, struct tipc_conn *con,
 		return 0;
 	}
 	status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
-	sub = tipc_subscrp_subscribe(net, s, conid, swap, status);
+	sub = tipc_subscrp_subscribe(srv, s, con->conid, swap, status);
 	if (!sub)
 		return -1;
 
@@ -304,43 +311,27 @@ int tipc_con_rcv_sub(struct net *net, int conid, struct tipc_conn *con,
 
 static int tipc_receive_from_sock(struct tipc_conn *con)
 {
-	struct tipc_server *s = con->server;
+	struct tipc_server *srv = con->server;
 	struct sock *sk = con->sock->sk;
 	struct msghdr msg = {};
+	struct tipc_subscr s;
 	struct kvec iov;
-	void *buf;
 	int ret;
 
-	buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC);
-	if (!buf) {
-		ret = -ENOMEM;
-		goto out_close;
-	}
-
-	iov.iov_base = buf;
-	iov.iov_len = s->max_rcvbuf_size;
+	iov.iov_base = &s;
+	iov.iov_len = sizeof(s);
 	msg.msg_name = NULL;
 	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
 	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
-	if (ret <= 0) {
-		kmem_cache_free(s->rcvbuf_cache, buf);
-		goto out_close;
+	if (ret == -EWOULDBLOCK)
+		return -EWOULDBLOCK;
+	if (ret > 0) {
+		read_lock_bh(&sk->sk_callback_lock);
+		ret = tipc_con_rcv_sub(srv, con, &s);
+		read_unlock_bh(&sk->sk_callback_lock);
 	}
-
-	read_lock_bh(&sk->sk_callback_lock);
-	ret = tipc_con_rcv_sub(s->net, con->conid, con, buf, ret);
-	read_unlock_bh(&sk->sk_callback_lock);
-	kmem_cache_free(s->rcvbuf_cache, buf);
 	if (ret < 0)
-		tipc_conn_terminate(s, con->conid);
-	return ret;
-
-out_close:
-	if (ret != -EWOULDBLOCK)
 		tipc_close_conn(con);
-	else if (ret == 0)
-		/* Don't return success if we really got EOF */
-		ret = -EAGAIN;
 
 	return ret;
 }
@@ -442,33 +433,6 @@ static int tipc_open_listening_sock(struct tipc_server *s)
 	return 0;
 }
 
-static struct outqueue_entry *tipc_alloc_entry(void *data, int len)
-{
-	struct outqueue_entry *entry;
-	void *buf;
-
-	entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC);
-	if (!entry)
-		return NULL;
-
-	buf = kmemdup(data, len, GFP_ATOMIC);
-	if (!buf) {
-		kfree(entry);
-		return NULL;
-	}
-
-	entry->iov.iov_base = buf;
-	entry->iov.iov_len = len;
-
-	return entry;
-}
-
-static void tipc_free_entry(struct outqueue_entry *e)
-{
-	kfree(e->iov.iov_base);
-	kfree(e);
-}
-
 static void tipc_clean_outqueues(struct tipc_conn *con)
 {
 	struct outqueue_entry *e, *safe;
@@ -476,50 +440,40 @@ static void tipc_clean_outqueues(struct tipc_conn *con)
 	spin_lock_bh(&con->outqueue_lock);
 	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
 		list_del(&e->list);
-		tipc_free_entry(e);
+		kfree(e);
 	}
 	spin_unlock_bh(&con->outqueue_lock);
 }
 
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      u32 evt, void *data, size_t len)
+/* tipc_conn_queue_evt - interrupt level call from a subscription instance
+ * The queued job is launched in tipc_send_to_sock()
+ */
+void tipc_conn_queue_evt(struct tipc_server *s, int conid,
+			 u32 event, struct tipc_event *evt)
 {
 	struct outqueue_entry *e;
 	struct tipc_conn *con;
 
 	con = tipc_conn_lookup(s, conid);
 	if (!con)
-		return -EINVAL;
+		return;
 
-	if (!connected(con)) {
-		conn_put(con);
-		return 0;
-	}
+	if (!connected(con))
+		goto err;
 
-	e = tipc_alloc_entry(data, len);
-	if (!e) {
-		conn_put(con);
-		return -ENOMEM;
-	}
-	e->evt = evt;
+	e = kmalloc(sizeof(*e), GFP_ATOMIC);
+	if (!e)
+		goto err;
+	e->inactive = (event == TIPC_SUBSCR_TIMEOUT);
+	memcpy(&e->evt, evt, sizeof(*evt));
 	spin_lock_bh(&con->outqueue_lock);
 	list_add_tail(&e->list, &con->outqueue);
 	spin_unlock_bh(&con->outqueue_lock);
 
-	if (!queue_work(s->send_wq, &con->swork))
-		conn_put(con);
-	return 0;
-}
-
-void tipc_conn_terminate(struct tipc_server *s, int conid)
-{
-	struct tipc_conn *con;
-
-	con = tipc_conn_lookup(s, conid);
-	if (con) {
-		tipc_close_conn(con);
-		conn_put(con);
-	}
+	if (queue_work(s->send_wq, &con->swork))
+		return;
+err:
+	conn_put(con);
 }
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
@@ -542,7 +496,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 
 	*conid = con->conid;
 	con->sock = NULL;
-	rc = tipc_con_rcv_sub(net, *conid, con, &sub, sizeof(sub));
+	rc = tipc_con_rcv_sub(tipc_topsrv(net), con, &sub);
 	if (rc < 0)
 		tipc_close_conn(con);
 	return !rc;
@@ -587,6 +541,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 	struct outqueue_entry *e;
 	struct tipc_event *evt;
 	struct msghdr msg;
+	struct kvec iov;
 	int count = 0;
 	int ret;
 
@@ -594,27 +549,28 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 
 	while (!list_empty(queue)) {
 		e = list_first_entry(queue, struct outqueue_entry, list);
-
+		evt = &e->evt;
 		spin_unlock_bh(&con->outqueue_lock);
 
-		if (e->evt == TIPC_SUBSCR_TIMEOUT) {
-			evt = (struct tipc_event *)e->iov.iov_base;
+		if (e->inactive)
 			tipc_con_delete_sub(con, &evt->s);
-		}
+
 		memset(&msg, 0, sizeof(msg));
 		msg.msg_flags = MSG_DONTWAIT;
+		iov.iov_base = evt;
+		iov.iov_len = sizeof(*evt);
+		msg.msg_name = NULL;
 
 		if (con->sock) {
-			ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
-					     e->iov.iov_len);
+			ret = kernel_sendmsg(con->sock, &msg, &iov,
+					     1, sizeof(*evt));
 			if (ret == -EWOULDBLOCK || ret == 0) {
 				cond_resched();
 				goto out;
 			} else if (ret < 0) {
-				goto send_err;
+				goto err;
 			}
 		} else {
-			evt = e->iov.iov_base;
 			tipc_send_kern_top_evt(srv->net, evt);
 		}
 
@@ -625,13 +581,12 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 		}
 		spin_lock_bh(&con->outqueue_lock);
 		list_del(&e->list);
-		tipc_free_entry(e);
+		kfree(e);
 	}
 	spin_unlock_bh(&con->outqueue_lock);
 out:
 	return;
-
-send_err:
+err:
 	tipc_close_conn(con);
 }
 
@@ -695,22 +650,14 @@ int tipc_server_start(struct tipc_server *s)
 	idr_init(&s->conn_idr);
 	s->idr_in_use = 0;
 
-	s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size,
-					    0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!s->rcvbuf_cache)
-		return -ENOMEM;
-
 	ret = tipc_work_start(s);
-	if (ret < 0) {
-		kmem_cache_destroy(s->rcvbuf_cache);
+	if (ret < 0)
 		return ret;
-	}
+
 	ret = tipc_open_listening_sock(s);
-	if (ret < 0) {
+	if (ret < 0)
 		tipc_work_stop(s);
-		kmem_cache_destroy(s->rcvbuf_cache);
-		return ret;
-	}
+
 	return ret;
 }
 
@@ -731,6 +678,5 @@ void tipc_server_stop(struct tipc_server *s)
 	spin_unlock_bh(&s->idr_lock);
 
 	tipc_work_stop(s);
-	kmem_cache_destroy(s->rcvbuf_cache);
 	idr_destroy(&s->conn_idr);
 }
diff --git a/net/tipc/server.h b/net/tipc/server.h
index fcc72321362d..2de8709b467d 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -36,6 +36,7 @@
 #ifndef _TIPC_SERVER_H
 #define _TIPC_SERVER_H
 
+#include "core.h"
 #include <linux/idr.h>
 #include <linux/tipc.h>
 #include <net/net_namespace.h>
@@ -68,7 +69,6 @@ struct tipc_server {
 	spinlock_t idr_lock;
 	int idr_in_use;
 	struct net *net;
-	struct kmem_cache *rcvbuf_cache;
 	struct workqueue_struct *rcv_wq;
 	struct workqueue_struct *send_wq;
 	int max_rcvbuf_size;
@@ -76,19 +76,13 @@ struct tipc_server {
 	char name[TIPC_SERVER_NAME_LEN];
 };
 
-int tipc_conn_sendmsg(struct tipc_server *s, int conid,
-		      u32 evt, void *data, size_t len);
+void tipc_conn_queue_evt(struct tipc_server *s, int conid,
+			 u32 event, struct tipc_event *evt);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);
 void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
 
-/**
- * tipc_conn_terminate - terminate connection with server
- *
- * Note: Must call it in process context since it might sleep
- */
-void tipc_conn_terminate(struct tipc_server *s, int conid);
 int tipc_server_start(struct tipc_server *s);
 
 void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c6de1452db69..c3e0b924e8c2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -52,22 +52,19 @@ static u32 htohl(u32 in, int swap)
 
 static void tipc_subscrp_send_event(struct tipc_subscription *sub,
 				    u32 found_lower, u32 found_upper,
-				    u32 event, u32 port_ref, u32 node)
+				    u32 event, u32 port, u32 node)
 {
-	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
-	struct kvec msg_sect;
+	struct tipc_event *evt = &sub->evt;
+	bool swap = sub->swap;
 
 	if (sub->inactive)
 		return;
-	msg_sect.iov_base = (void *)&sub->evt;
-	msg_sect.iov_len = sizeof(struct tipc_event);
-	sub->evt.event = htohl(event, sub->swap);
-	sub->evt.found_lower = htohl(found_lower, sub->swap);
-	sub->evt.found_upper = htohl(found_upper, sub->swap);
-	sub->evt.port.ref = htohl(port_ref, sub->swap);
-	sub->evt.port.node = htohl(node, sub->swap);
-	tipc_conn_sendmsg(tn->topsrv, sub->conid, event,
-			  msg_sect.iov_base, msg_sect.iov_len);
+	evt->event = htohl(event, swap);
+	evt->found_lower = htohl(found_lower, swap);
+	evt->found_upper = htohl(found_upper, swap);
+	evt->port.ref = htohl(port, swap);
+	evt->port.node = htohl(node, swap);
+	tipc_conn_queue_evt(sub->server, sub->conid, event, evt);
 }
 
 /**
@@ -137,10 +134,11 @@ static void tipc_subscrp_timeout(struct timer_list *t)
 
 static void tipc_subscrp_kref_release(struct kref *kref)
 {
-	struct tipc_subscription *sub = container_of(kref,
-						     struct tipc_subscription,
-						     kref);
-	struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+	struct tipc_subscription *sub;
+	struct tipc_net *tn;
+
+	sub = container_of(kref, struct tipc_subscription, kref);
+	tn = tipc_net(sub->server->net);
 
 	atomic_dec(&tn->subscription_count);
 	kfree(sub);
@@ -156,11 +154,11 @@ void tipc_subscrp_get(struct tipc_subscription *subscription)
 	kref_get(&subscription->kref);
 }
 
-static struct tipc_subscription *tipc_subscrp_create(struct net *net,
+static struct tipc_subscription *tipc_subscrp_create(struct tipc_server *srv,
 						     struct tipc_subscr *s,
 						     int conid, bool swap)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(srv->net);
 	struct tipc_subscription *sub;
 	u32 filter = htohl(s->filter, swap);
 
@@ -179,7 +177,7 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
 	}
 
 	/* Initialize subscription object */
-	sub->net = net;
+	sub->server = srv;
 	sub->conid = conid;
 	sub->inactive = false;
 	if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
@@ -197,7 +195,7 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
 	return sub;
 }
 
-struct tipc_subscription *tipc_subscrp_subscribe(struct net *net,
+struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
 						 struct tipc_subscr *s,
 						 int conid, bool swap,
 						 bool status)
@@ -205,7 +203,7 @@ struct tipc_subscription *tipc_subscrp_subscribe(struct net *net,
 	struct tipc_subscription *sub = NULL;
 	u32 timeout;
 
-	sub = tipc_subscrp_create(net, s, conid, swap);
+	sub = tipc_subscrp_create(srv, s, conid, swap);
 	if (!sub)
 		return NULL;
 
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index cfd0cb3a1da8..64ffd32d15e6 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -49,7 +49,6 @@ struct tipc_conn;
  * struct tipc_subscription - TIPC network topology subscription object
  * @subscriber: pointer to its subscriber
  * @seq: name sequence associated with subscription
- * @net: point to network namespace
  * @timer: timer governing subscription duration (optional)
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
  * @subscrp_list: adjacent subscriptions in subscriber's subscription list
@@ -58,7 +57,7 @@ struct tipc_conn;
  */
 struct tipc_subscription {
 	struct kref kref;
-	struct net *net;
+	struct tipc_server *server;
 	struct timer_list timer;
 	struct list_head nameseq_list;
 	struct list_head subscrp_list;
@@ -69,7 +68,7 @@ struct tipc_subscription {
 	spinlock_t lock; /* serialize up/down and timer events */
 };
 
-struct tipc_subscription *tipc_subscrp_subscribe(struct net *net,
+struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
 						 struct tipc_subscr *s,
 						 int conid, bool swap,
 						 bool status);

From 8985ecc7c1e07c42acc1e44ac56fa224f8a5c62f Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:46 +0100
Subject: [PATCH 0162/1678] tipc: simplify endianness handling in topology
 subscriber

Because of the requirement for total distribution transparency, users
send subscriptions and receive topology events in their own host format.
It is up to the topology server to determine this format and do the
correct conversions to and from its own host format when needed.

Until now, this has been handled in a rather non-transparent way inside
the topology server and subscriber code, leading to unnecessary
complexity when creating subscriptions and issuing events.

We now improve this situation by adding two new macros, tipc_sub_read()
and tipc_evt_write(). Both those functions calculate the need for
conversion internally before performing their respective operations.
Hence, all handling of such conversions become transparent to the rest
of the code.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 42 +++++++++++++---------
 net/tipc/name_table.h |  2 +-
 net/tipc/server.c     | 25 ++-----------
 net/tipc/subscr.c     | 83 ++++++++++++++++---------------------------
 net/tipc/subscr.h     | 36 ++++++++++++++-----
 5 files changed, 86 insertions(+), 102 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c0ca7be46f7a..2fbd0a20a958 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -412,18 +412,22 @@ found:
  * sequence overlapping with the requested sequence
  */
 static void tipc_nameseq_subscribe(struct name_seq *nseq,
-				   struct tipc_subscription *s,
-				   bool status)
+				   struct tipc_subscription *sub)
 {
 	struct sub_seq *sseq = nseq->sseqs;
 	struct tipc_name_seq ns;
+	struct tipc_subscr *s = &sub->evt.s;
+	bool no_status;
 
-	tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+	ns.type = tipc_sub_read(s, seq.type);
+	ns.lower = tipc_sub_read(s, seq.lower);
+	ns.upper = tipc_sub_read(s, seq.upper);
+	no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
 
-	tipc_subscrp_get(s);
-	list_add(&s->nameseq_list, &nseq->subscriptions);
+	tipc_subscrp_get(sub);
+	list_add(&sub->nameseq_list, &nseq->subscriptions);
 
-	if (!status || !sseq)
+	if (no_status || !sseq)
 		return;
 
 	while (sseq != &nseq->sseqs[nseq->first_free]) {
@@ -433,7 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 			int must_report = 1;
 
 			list_for_each_entry(crs, &info->zone_list, zone_list) {
-				tipc_subscrp_report_overlap(s, sseq->lower,
+				tipc_subscrp_report_overlap(sub, sseq->lower,
 							    sseq->upper,
 							    TIPC_PUBLISHED,
 							    crs->ref, crs->node,
@@ -808,11 +812,12 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 /**
  * tipc_nametbl_subscribe - add a subscription object to the name table
  */
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
+void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-	struct tipc_server *srv = s->server;
+	struct tipc_server *srv = sub->server;
 	struct tipc_net *tn = tipc_net(srv->net);
-	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+	struct tipc_subscr *s = &sub->evt.s;
+	u32 type = tipc_sub_read(s, seq.type);
 	int index = hash(type);
 	struct name_seq *seq;
 	struct tipc_name_seq ns;
@@ -823,10 +828,12 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
 		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (seq) {
 		spin_lock_bh(&seq->lock);
-		tipc_nameseq_subscribe(seq, s, status);
+		tipc_nameseq_subscribe(seq, sub);
 		spin_unlock_bh(&seq->lock);
 	} else {
-		tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+		ns.type = tipc_sub_read(s, seq.type);
+		ns.lower = tipc_sub_read(s, seq.lower);
+		ns.upper = tipc_sub_read(s, seq.upper);
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 			ns.type, ns.lower, ns.upper);
 	}
@@ -836,19 +843,20 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status)
 /**
  * tipc_nametbl_unsubscribe - remove a subscription object from name table
  */
-void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
+void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_server *srv = s->server;
+	struct tipc_server *srv = sub->server;
+	struct tipc_subscr *s = &sub->evt.s;
 	struct tipc_net *tn = tipc_net(srv->net);
 	struct name_seq *seq;
-	u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
+	u32 type = tipc_sub_read(s, seq.type);
 
 	spin_lock_bh(&tn->nametbl_lock);
 	seq = nametbl_find_seq(srv->net, type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
-		list_del_init(&s->nameseq_list);
-		tipc_subscrp_put(s);
+		list_del_init(&sub->nameseq_list);
+		tipc_subscrp_put(sub);
 		if (!seq->first_free && list_empty(&seq->subscriptions)) {
 			hlist_del_init_rcu(&seq->ns_list);
 			kfree(seq->sseqs);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f56e7cb3d436..17652602d5e2 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -120,7 +120,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 					     u32 lower, u32 node, u32 ref,
 					     u32 key);
-void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status);
+void tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 7933fb92d852..5d231fa8e4b5 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -98,18 +98,6 @@ static bool connected(struct tipc_conn *con)
 	return con && test_bit(CF_CONNECTED, &con->flags);
 }
 
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
-	return swap ? swab32(in) : in;
-}
-
 static void tipc_conn_kref_release(struct kref *kref)
 {
 	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
@@ -285,21 +273,12 @@ static int tipc_con_rcv_sub(struct tipc_server *srv,
 			    struct tipc_subscr *s)
 {
 	struct tipc_subscription *sub;
-	bool status;
-	int swap;
 
-	/* Determine subscriber's endianness */
-	swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
-			      TIPC_SUB_CANCEL));
-
-	/* Detect & process a subscription cancellation request */
-	if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
-		s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
+	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
 		tipc_con_delete_sub(con, s);
 		return 0;
 	}
-	status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap));
-	sub = tipc_subscrp_subscribe(srv, s, con->conid, swap, status);
+	sub = tipc_subscrp_subscribe(srv, s, con->conid);
 	if (!sub)
 		return -1;
 
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c3e0b924e8c2..406b09fc227b 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -38,32 +38,19 @@
 #include "name_table.h"
 #include "subscr.h"
 
-/**
- * htohl - convert value to endianness used by destination
- * @in: value to convert
- * @swap: non-zero if endianness must be reversed
- *
- * Returns converted value
- */
-static u32 htohl(u32 in, int swap)
-{
-	return swap ? swab32(in) : in;
-}
-
 static void tipc_subscrp_send_event(struct tipc_subscription *sub,
 				    u32 found_lower, u32 found_upper,
 				    u32 event, u32 port, u32 node)
 {
 	struct tipc_event *evt = &sub->evt;
-	bool swap = sub->swap;
 
 	if (sub->inactive)
 		return;
-	evt->event = htohl(event, swap);
-	evt->found_lower = htohl(found_lower, swap);
-	evt->found_upper = htohl(found_upper, swap);
-	evt->port.ref = htohl(port, swap);
-	evt->port.node = htohl(node, swap);
+	tipc_evt_write(evt, event, event);
+	tipc_evt_write(evt, found_lower, found_lower);
+	tipc_evt_write(evt, found_upper, found_upper);
+	tipc_evt_write(evt, port.ref, port);
+	tipc_evt_write(evt, port.node, node);
 	tipc_conn_queue_evt(sub->server, sub->conid, event, evt);
 }
 
@@ -85,29 +72,22 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 	return 1;
 }
 
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
+void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
+				 u32 found_lower, u32 found_upper,
+				 u32 event, u32 port, u32 node,
+				 u32 scope, int must)
 {
-	return htohl(type, swap);
-}
-
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-			      struct tipc_name_seq *out)
-{
-	out->type = htohl(in->type, swap);
-	out->lower = htohl(in->lower, swap);
-	out->upper = htohl(in->upper, swap);
-}
-
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
-				 u32 found_upper, u32 event, u32 port_ref,
-				 u32 node, u32 scope, int must)
-{
-	u32 filter = htohl(sub->evt.s.filter, sub->swap);
 	struct tipc_name_seq seq;
+	struct tipc_subscr *s = &sub->evt.s;
+	u32 filter = tipc_sub_read(s, filter);
+
+	seq.type = tipc_sub_read(s, seq.type);
+	seq.lower = tipc_sub_read(s, seq.lower);
+	seq.upper = tipc_sub_read(s, seq.upper);
 
-	tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
 	if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
 		return;
+
 	if (!must && !(filter & TIPC_SUB_PORTS))
 		return;
 	if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
@@ -116,7 +96,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 		return;
 	spin_lock(&sub->lock);
 	tipc_subscrp_send_event(sub, found_lower, found_upper,
-				event, port_ref, node);
+				event, port, node);
 	spin_unlock(&sub->lock);
 }
 
@@ -156,11 +136,11 @@ void tipc_subscrp_get(struct tipc_subscription *subscription)
 
 static struct tipc_subscription *tipc_subscrp_create(struct tipc_server *srv,
 						     struct tipc_subscr *s,
-						     int conid, bool swap)
+						     int conid)
 {
 	struct tipc_net *tn = tipc_net(srv->net);
 	struct tipc_subscription *sub;
-	u32 filter = htohl(s->filter, swap);
+	u32 filter = tipc_sub_read(s, filter);
 
 	/* Refuse subscription if global limit exceeded */
 	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
@@ -177,39 +157,38 @@ static struct tipc_subscription *tipc_subscrp_create(struct tipc_server *srv,
 	}
 
 	/* Initialize subscription object */
+	if (filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE)
+		goto err;
+	if (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))
+		goto err;
 	sub->server = srv;
 	sub->conid = conid;
 	sub->inactive = false;
-	if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
-	    (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
-		pr_warn("Subscription rejected, illegal request\n");
-		kfree(sub);
-		return NULL;
-	}
-
-	sub->swap = swap;
 	memcpy(&sub->evt.s, s, sizeof(*s));
 	spin_lock_init(&sub->lock);
 	atomic_inc(&tn->subscription_count);
 	kref_init(&sub->kref);
 	return sub;
+err:
+	pr_warn("Subscription rejected, illegal request\n");
+	kfree(sub);
+	return NULL;
 }
 
 struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
 						 struct tipc_subscr *s,
-						 int conid, bool swap,
-						 bool status)
+						 int conid)
 {
 	struct tipc_subscription *sub = NULL;
 	u32 timeout;
 
-	sub = tipc_subscrp_create(srv, s, conid, swap);
+	sub = tipc_subscrp_create(srv, s, conid);
 	if (!sub)
 		return NULL;
 
-	tipc_nametbl_subscribe(sub, status);
+	tipc_nametbl_subscribe(sub);
 	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
-	timeout = htohl(sub->evt.s.timeout, swap);
+	timeout = tipc_sub_read(&sub->evt.s, timeout);
 	if (timeout != TIPC_WAIT_FOREVER)
 		mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
 	return sub;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 64ffd32d15e6..db80e41bba08 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -52,7 +52,6 @@ struct tipc_conn;
  * @timer: timer governing subscription duration (optional)
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
  * @subscrp_list: adjacent subscriptions in subscriber's subscription list
- * @swap: indicates if subscriber uses opposite endianness in its messages
  * @evt: template for events generated by subscription
  */
 struct tipc_subscription {
@@ -63,28 +62,47 @@ struct tipc_subscription {
 	struct list_head subscrp_list;
 	struct tipc_event evt;
 	int conid;
-	bool swap;
 	bool inactive;
 	spinlock_t lock; /* serialize up/down and timer events */
 };
 
 struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
 						 struct tipc_subscr *s,
-						 int conid, bool swap,
-						 bool status);
+						 int conid);
 void tipc_sub_delete(struct tipc_subscription *sub);
+
 int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 			       u32 found_upper);
 void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
-				 u32 found_lower, u32 found_upper, u32 event,
-				 u32 port_ref, u32 node, u32 scope, int must);
-void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
-			      struct tipc_name_seq *out);
-u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
+				 u32 found_lower, u32 found_upper,
+				 u32 event, u32 port, u32 node,
+				 u32 scope, int must);
 int tipc_topsrv_start(struct net *net);
 void tipc_topsrv_stop(struct net *net);
 
 void tipc_subscrp_put(struct tipc_subscription *subscription);
 void tipc_subscrp_get(struct tipc_subscription *subscription);
 
+#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
+
+/* tipc_sub_read - return field_ of struct sub_ in host endian format
+ */
+#define tipc_sub_read(sub_, field_)					\
+	({								\
+		struct tipc_subscr *sub__ = sub_;			\
+		u32 val__ = (sub__)->field_;				\
+		int swap_ = !((sub__)->filter & TIPC_FILTER_MASK);	\
+		(swap_ ? swab32(val__) : val__);			\
+	})
+
+/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format
+ */
+#define tipc_evt_write(evt_, field_, val_)				\
+	({								\
+		struct tipc_event *evt__ = evt_;			\
+		u32 val__ = val_;					\
+		int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK));	\
+		(evt__)->field_ = swap_ ? swab32(val__) : val__;	\
+	})
+
 #endif

From 242e82cc95f6b4e83e1771f9915edcb2a63708e1 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:47 +0100
Subject: [PATCH 0163/1678] tipc: collapse subscription creation functions

After the previous changes it becomes logical to collapse the two-level
creation of subscription instances into one. We do that here.

We also rename the creation and deletion functions for more consistency.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/server.c |  4 ++--
 net/tipc/server.h |  1 +
 net/tipc/subscr.c | 46 ++++++++++++----------------------------------
 net/tipc/subscr.h | 14 +++++++-------
 4 files changed, 22 insertions(+), 43 deletions(-)

diff --git a/net/tipc/server.c b/net/tipc/server.c
index 5d231fa8e4b5..6a18b10ac271 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -203,7 +203,7 @@ static void tipc_con_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
 	spin_lock_bh(&con->sub_lock);
 	list_for_each_entry_safe(sub, tmp, sub_list, subscrp_list) {
 		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s)))
-			tipc_sub_delete(sub);
+			tipc_sub_unsubscribe(sub);
 		else if (s)
 			break;
 	}
@@ -278,7 +278,7 @@ static int tipc_con_rcv_sub(struct tipc_server *srv,
 		tipc_con_delete_sub(con, s);
 		return 0;
 	}
-	sub = tipc_subscrp_subscribe(srv, s, con->conid);
+	sub = tipc_sub_subscribe(srv, s, con->conid);
 	if (!sub)
 		return -1;
 
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 2de8709b467d..995b79591ffe 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -2,6 +2,7 @@
  * net/tipc/server.h: Include file for TIPC server code
  *
  * Copyright (c) 2012-2013, Wind River Systems
+ * Copyright (c) 2017, Ericsson AB
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 406b09fc227b..8d37b61e3163 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -134,33 +134,29 @@ void tipc_subscrp_get(struct tipc_subscription *subscription)
 	kref_get(&subscription->kref);
 }
 
-static struct tipc_subscription *tipc_subscrp_create(struct tipc_server *srv,
-						     struct tipc_subscr *s,
-						     int conid)
+struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
+					     struct tipc_subscr *s,
+					     int conid)
 {
 	struct tipc_net *tn = tipc_net(srv->net);
 	struct tipc_subscription *sub;
 	u32 filter = tipc_sub_read(s, filter);
+	u32 timeout;
 
-	/* Refuse subscription if global limit exceeded */
-	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
-		pr_warn("Subscription rejected, limit reached (%u)\n",
-			TIPC_MAX_SUBSCRIPTIONS);
+	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+		return NULL;
+	}
+	if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
+	    (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+		pr_warn("Subscription rejected, illegal request\n");
 		return NULL;
 	}
-
-	/* Allocate subscription object */
 	sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
 	if (!sub) {
 		pr_warn("Subscription rejected, no memory\n");
 		return NULL;
 	}
-
-	/* Initialize subscription object */
-	if (filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE)
-		goto err;
-	if (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))
-		goto err;
 	sub->server = srv;
 	sub->conid = conid;
 	sub->inactive = false;
@@ -168,24 +164,6 @@ static struct tipc_subscription *tipc_subscrp_create(struct tipc_server *srv,
 	spin_lock_init(&sub->lock);
 	atomic_inc(&tn->subscription_count);
 	kref_init(&sub->kref);
-	return sub;
-err:
-	pr_warn("Subscription rejected, illegal request\n");
-	kfree(sub);
-	return NULL;
-}
-
-struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
-						 struct tipc_subscr *s,
-						 int conid)
-{
-	struct tipc_subscription *sub = NULL;
-	u32 timeout;
-
-	sub = tipc_subscrp_create(srv, s, conid);
-	if (!sub)
-		return NULL;
-
 	tipc_nametbl_subscribe(sub);
 	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
 	timeout = tipc_sub_read(&sub->evt.s, timeout);
@@ -194,7 +172,7 @@ struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
 	return sub;
 }
 
-void tipc_sub_delete(struct tipc_subscription *sub)
+void tipc_sub_unsubscribe(struct tipc_subscription *sub)
 {
 	tipc_nametbl_unsubscribe(sub);
 	if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index db80e41bba08..2d35f1046754 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/subscr.h: Include file for TIPC network topology service
  *
- * Copyright (c) 2003-2006, Ericsson AB
+ * Copyright (c) 2003-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
  * All rights reserved.
  *
@@ -39,8 +39,8 @@
 
 #include "server.h"
 
-#define TIPC_MAX_SUBSCRIPTIONS	65535
-#define TIPC_MAX_PUBLICATIONS	65535
+#define TIPC_MAX_SUBSCR         65535
+#define TIPC_MAX_PUBLICATIONS   65535
 
 struct tipc_subscription;
 struct tipc_conn;
@@ -66,10 +66,10 @@ struct tipc_subscription {
 	spinlock_t lock; /* serialize up/down and timer events */
 };
 
-struct tipc_subscription *tipc_subscrp_subscribe(struct tipc_server *srv,
-						 struct tipc_subscr *s,
-						 int conid);
-void tipc_sub_delete(struct tipc_subscription *sub);
+struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
+					     struct tipc_subscr *s,
+					     int conid);
+void tipc_sub_unsubscribe(struct tipc_subscription *sub);
 
 int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 			       u32 found_upper);

From da0a75e86ae230f92743c073843d3ea35bd061af Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:48 +0100
Subject: [PATCH 0164/1678] tipc: some prefix changes

Since we now have removed struct tipc_subscriber from the code, and
only struct tipc_subscription remains, there is no longer need for long
and awkward prefixes to distinguish between their pertaining functions.

We now change all tipc_subscrp_* prefixes to tipc_sub_*. This is
a purely cosmetic change.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 33 +++++++++++++--------------
 net/tipc/server.c     |  5 ++---
 net/tipc/subscr.c     | 52 +++++++++++++++++++++----------------------
 net/tipc/subscr.h     | 20 ++++++++---------
 4 files changed, 54 insertions(+), 56 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 2fbd0a20a958..b234b7ee0b84 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -326,10 +326,10 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-					    TIPC_PUBLISHED, publ->ref,
-					    publ->node, publ->scope,
-					    created_subseq);
+		tipc_sub_report_overlap(s, publ->lower, publ->upper,
+					TIPC_PUBLISHED, publ->ref,
+					publ->node, publ->scope,
+					created_subseq);
 	}
 	return publ;
 }
@@ -397,10 +397,9 @@ found:
 
 	/* Notify any waiting subscriptions */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_subscrp_report_overlap(s, publ->lower, publ->upper,
-					    TIPC_WITHDRAWN, publ->ref,
-					    publ->node, publ->scope,
-					    removed_subseq);
+		tipc_sub_report_overlap(s, publ->lower, publ->upper,
+					TIPC_WITHDRAWN, publ->ref, publ->node,
+					publ->scope, removed_subseq);
 	}
 
 	return publ;
@@ -424,25 +423,25 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 	ns.upper = tipc_sub_read(s, seq.upper);
 	no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
 
-	tipc_subscrp_get(sub);
+	tipc_sub_get(sub);
 	list_add(&sub->nameseq_list, &nseq->subscriptions);
 
 	if (no_status || !sseq)
 		return;
 
 	while (sseq != &nseq->sseqs[nseq->first_free]) {
-		if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
+		if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
 			struct publication *crs;
 			struct name_info *info = sseq->info;
 			int must_report = 1;
 
 			list_for_each_entry(crs, &info->zone_list, zone_list) {
-				tipc_subscrp_report_overlap(sub, sseq->lower,
-							    sseq->upper,
-							    TIPC_PUBLISHED,
-							    crs->ref, crs->node,
-							    crs->scope,
-							    must_report);
+				tipc_sub_report_overlap(sub, sseq->lower,
+							sseq->upper,
+							TIPC_PUBLISHED,
+							crs->ref, crs->node,
+							crs->scope,
+							must_report);
 				must_report = 0;
 			}
 		}
@@ -856,7 +855,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&sub->nameseq_list);
-		tipc_subscrp_put(sub);
+		tipc_sub_put(sub);
 		if (!seq->first_free && list_empty(&seq->subscriptions)) {
 			hlist_del_init_rcu(&seq->ns_list);
 			kfree(seq->sseqs);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 6a18b10ac271..a5c112ed3bbe 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -201,7 +201,7 @@ static void tipc_con_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
 	struct tipc_subscription *sub, *tmp;
 
 	spin_lock_bh(&con->sub_lock);
-	list_for_each_entry_safe(sub, tmp, sub_list, subscrp_list) {
+	list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
 		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s)))
 			tipc_sub_unsubscribe(sub);
 		else if (s)
@@ -281,9 +281,8 @@ static int tipc_con_rcv_sub(struct tipc_server *srv,
 	sub = tipc_sub_subscribe(srv, s, con->conid);
 	if (!sub)
 		return -1;
-
 	spin_lock_bh(&con->sub_lock);
-	list_add(&sub->subscrp_list, &con->sub_list);
+	list_add(&sub->sub_list, &con->sub_list);
 	spin_unlock_bh(&con->sub_lock);
 	return 0;
 }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 8d37b61e3163..3be1e4b6cbfa 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -38,9 +38,9 @@
 #include "name_table.h"
 #include "subscr.h"
 
-static void tipc_subscrp_send_event(struct tipc_subscription *sub,
-				    u32 found_lower, u32 found_upper,
-				    u32 event, u32 port, u32 node)
+static void tipc_sub_send_event(struct tipc_subscription *sub,
+				u32 found_lower, u32 found_upper,
+				u32 event, u32 port, u32 node)
 {
 	struct tipc_event *evt = &sub->evt;
 
@@ -55,13 +55,13 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
 }
 
 /**
- * tipc_subscrp_check_overlap - test for subscription overlap with the
+ * tipc_sub_check_overlap - test for subscription overlap with the
  * given values
  *
  * Returns 1 if there is overlap, otherwise 0.
  */
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-			       u32 found_upper)
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+			   u32 found_upper)
 {
 	if (found_lower < seq->lower)
 		found_lower = seq->lower;
@@ -72,20 +72,20 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
 	return 1;
 }
 
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
-				 u32 found_lower, u32 found_upper,
-				 u32 event, u32 port, u32 node,
-				 u32 scope, int must)
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+			     u32 found_lower, u32 found_upper,
+			     u32 event, u32 port, u32 node,
+			     u32 scope, int must)
 {
-	struct tipc_name_seq seq;
 	struct tipc_subscr *s = &sub->evt.s;
 	u32 filter = tipc_sub_read(s, filter);
+	struct tipc_name_seq seq;
 
 	seq.type = tipc_sub_read(s, seq.type);
 	seq.lower = tipc_sub_read(s, seq.lower);
 	seq.upper = tipc_sub_read(s, seq.upper);
 
-	if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
+	if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
 		return;
 
 	if (!must && !(filter & TIPC_SUB_PORTS))
@@ -95,24 +95,24 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
 	if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
 		return;
 	spin_lock(&sub->lock);
-	tipc_subscrp_send_event(sub, found_lower, found_upper,
-				event, port, node);
+	tipc_sub_send_event(sub, found_lower, found_upper,
+			    event, port, node);
 	spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_timeout(struct timer_list *t)
+static void tipc_sub_timeout(struct timer_list *t)
 {
 	struct tipc_subscription *sub = from_timer(sub, t, timer);
 	struct tipc_subscr *s = &sub->evt.s;
 
 	spin_lock(&sub->lock);
-	tipc_subscrp_send_event(sub, s->seq.lower, s->seq.upper,
-				TIPC_SUBSCR_TIMEOUT, 0, 0);
+	tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
+			    TIPC_SUBSCR_TIMEOUT, 0, 0);
 	sub->inactive = true;
 	spin_unlock(&sub->lock);
 }
 
-static void tipc_subscrp_kref_release(struct kref *kref)
+static void tipc_sub_kref_release(struct kref *kref)
 {
 	struct tipc_subscription *sub;
 	struct tipc_net *tn;
@@ -124,12 +124,12 @@ static void tipc_subscrp_kref_release(struct kref *kref)
 	kfree(sub);
 }
 
-void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_sub_put(struct tipc_subscription *subscription)
 {
-	kref_put(&subscription->kref, tipc_subscrp_kref_release);
+	kref_put(&subscription->kref, tipc_sub_kref_release);
 }
 
-void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_sub_get(struct tipc_subscription *subscription)
 {
 	kref_get(&subscription->kref);
 }
@@ -139,8 +139,8 @@ struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
 					     int conid)
 {
 	struct tipc_net *tn = tipc_net(srv->net);
-	struct tipc_subscription *sub;
 	u32 filter = tipc_sub_read(s, filter);
+	struct tipc_subscription *sub;
 	u32 timeout;
 
 	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
@@ -165,7 +165,7 @@ struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
 	atomic_inc(&tn->subscription_count);
 	kref_init(&sub->kref);
 	tipc_nametbl_subscribe(sub);
-	timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
+	timer_setup(&sub->timer, tipc_sub_timeout, 0);
 	timeout = tipc_sub_read(&sub->evt.s, timeout);
 	if (timeout != TIPC_WAIT_FOREVER)
 		mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
@@ -177,16 +177,16 @@ void tipc_sub_unsubscribe(struct tipc_subscription *sub)
 	tipc_nametbl_unsubscribe(sub);
 	if (sub->evt.s.timeout != TIPC_WAIT_FOREVER)
 		del_timer_sync(&sub->timer);
-	list_del(&sub->subscrp_list);
-	tipc_subscrp_put(sub);
+	list_del(&sub->sub_list);
+	tipc_sub_put(sub);
 }
 
 int tipc_topsrv_start(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	const char name[] = "topology_server";
-	struct tipc_server *topsrv;
 	struct sockaddr_tipc *saddr;
+	struct tipc_server *topsrv;
 
 	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
 	if (!saddr)
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 2d35f1046754..720932896ba6 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -51,7 +51,7 @@ struct tipc_conn;
  * @seq: name sequence associated with subscription
  * @timer: timer governing subscription duration (optional)
  * @nameseq_list: adjacent subscriptions in name sequence's subscription list
- * @subscrp_list: adjacent subscriptions in subscriber's subscription list
+ * @sub_list: adjacent subscriptions in subscriber's subscription list
  * @evt: template for events generated by subscription
  */
 struct tipc_subscription {
@@ -59,7 +59,7 @@ struct tipc_subscription {
 	struct tipc_server *server;
 	struct timer_list timer;
 	struct list_head nameseq_list;
-	struct list_head subscrp_list;
+	struct list_head sub_list;
 	struct tipc_event evt;
 	int conid;
 	bool inactive;
@@ -71,17 +71,17 @@ struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
 					     int conid);
 void tipc_sub_unsubscribe(struct tipc_subscription *sub);
 
-int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
-			       u32 found_upper);
-void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
-				 u32 found_lower, u32 found_upper,
-				 u32 event, u32 port, u32 node,
-				 u32 scope, int must);
+int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
+			   u32 found_upper);
+void tipc_sub_report_overlap(struct tipc_subscription *sub,
+			     u32 found_lower, u32 found_upper,
+			     u32 event, u32 port, u32 node,
+			     u32 scope, int must);
 int tipc_topsrv_start(struct net *net);
 void tipc_topsrv_stop(struct net *net);
 
-void tipc_subscrp_put(struct tipc_subscription *subscription);
-void tipc_subscrp_get(struct tipc_subscription *subscription);
+void tipc_sub_put(struct tipc_subscription *subscription);
+void tipc_sub_get(struct tipc_subscription *subscription);
 
 #define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL)
 

From 5c45ab24ac77ea32eae7d3576cf37c3ddb259f80 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:49 +0100
Subject: [PATCH 0165/1678] tipc: make struct tipc_server private for server.c

In order to narrow the interface and dependencies between the topology
server and the subscription/binding table functionality we move struct
tipc_server inside the file server.c. This requires some code
adaptations in other files, but those are mostly minor.

The most important change is that we have to move the start/stop
functions for the topology server to server.c, where they logically
belong anyway.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c |  10 ++--
 net/tipc/server.c     | 124 +++++++++++++++++++++++++++++++++---------
 net/tipc/server.h     |  36 +-----------
 net/tipc/subscr.c     |  64 ++--------------------
 net/tipc/subscr.h     |   4 +-
 5 files changed, 110 insertions(+), 128 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index b234b7ee0b84..e01c9c691ba2 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -813,8 +813,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
  */
 void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-	struct tipc_server *srv = sub->server;
-	struct tipc_net *tn = tipc_net(srv->net);
+	struct tipc_net *tn = tipc_net(sub->net);
 	struct tipc_subscr *s = &sub->evt.s;
 	u32 type = tipc_sub_read(s, seq.type);
 	int index = hash(type);
@@ -822,7 +821,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 	struct tipc_name_seq ns;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(srv->net, type);
+	seq = nametbl_find_seq(sub->net, type);
 	if (!seq)
 		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
 	if (seq) {
@@ -844,14 +843,13 @@ void tipc_nametbl_subscribe(struct tipc_subscription *sub)
  */
 void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_server *srv = sub->server;
 	struct tipc_subscr *s = &sub->evt.s;
-	struct tipc_net *tn = tipc_net(srv->net);
+	struct tipc_net *tn = tipc_net(sub->net);
 	struct name_seq *seq;
 	u32 type = tipc_sub_read(s, seq.type);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(srv->net, type);
+	seq = nametbl_find_seq(sub->net, type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&sub->nameseq_list);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index a5c112ed3bbe..0abbdd698662 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -49,7 +49,37 @@
 #define CF_CONNECTED		1
 #define CF_SERVER		2
 
-#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data)
+#define TIPC_SERVER_NAME_LEN	32
+
+/**
+ * struct tipc_server - TIPC server structure
+ * @conn_idr: identifier set of connection
+ * @idr_lock: protect the connection identifier set
+ * @idr_in_use: amount of allocated identifier entry
+ * @net: network namspace instance
+ * @rcvbuf_cache: memory cache of server receive buffer
+ * @rcv_wq: receive workqueue
+ * @send_wq: send workqueue
+ * @max_rcvbuf_size: maximum permitted receive message length
+ * @tipc_conn_new: callback will be called when new connection is incoming
+ * @tipc_conn_release: callback will be called before releasing the connection
+ * @tipc_conn_recvmsg: callback will be called when message arrives
+ * @saddr: TIPC server address
+ * @name: server name
+ * @imp: message importance
+ * @type: socket type
+ */
+struct tipc_server {
+	struct idr conn_idr;
+	spinlock_t idr_lock; /* for idr list */
+	int idr_in_use;
+	struct net *net;
+	struct workqueue_struct *rcv_wq;
+	struct workqueue_struct *send_wq;
+	int max_rcvbuf_size;
+	struct sockaddr_tipc *saddr;
+	char name[TIPC_SERVER_NAME_LEN];
+};
 
 /**
  * struct tipc_conn - TIPC connection structure
@@ -93,6 +123,11 @@ static void tipc_recv_work(struct work_struct *work);
 static void tipc_send_work(struct work_struct *work);
 static void tipc_clean_outqueues(struct tipc_conn *con);
 
+static struct tipc_conn *sock2con(struct sock *sk)
+{
+	return sk->sk_user_data;
+}
+
 static bool connected(struct tipc_conn *con)
 {
 	return con && test_bit(CF_CONNECTED, &con->flags);
@@ -198,14 +233,17 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
 static void tipc_con_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
 {
 	struct list_head *sub_list = &con->sub_list;
+	struct tipc_net *tn = tipc_net(con->server->net);
 	struct tipc_subscription *sub, *tmp;
 
 	spin_lock_bh(&con->sub_lock);
 	list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
-		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s)))
+		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
 			tipc_sub_unsubscribe(sub);
-		else if (s)
+			atomic_dec(&tn->subscription_count);
+		} else if (s) {
 			break;
+		}
 	}
 	spin_unlock_bh(&con->sub_lock);
 }
@@ -220,8 +258,7 @@ static void tipc_close_conn(struct tipc_conn *con)
 
 	if (disconnect) {
 		sk->sk_user_data = NULL;
-		if (con->conid)
-			tipc_con_delete_sub(con, NULL);
+		tipc_con_delete_sub(con, NULL);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
 
@@ -272,15 +309,21 @@ static int tipc_con_rcv_sub(struct tipc_server *srv,
 			    struct tipc_conn *con,
 			    struct tipc_subscr *s)
 {
+	struct tipc_net *tn = tipc_net(srv->net);
 	struct tipc_subscription *sub;
 
 	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
 		tipc_con_delete_sub(con, s);
 		return 0;
 	}
-	sub = tipc_sub_subscribe(srv, s, con->conid);
+	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+		return -1;
+	}
+	sub = tipc_sub_subscribe(srv->net, s, con->conid);
 	if (!sub)
 		return -1;
+	atomic_inc(&tn->subscription_count);
 	spin_lock_bh(&con->sub_lock);
 	list_add(&sub->sub_list, &con->sub_list);
 	spin_unlock_bh(&con->sub_lock);
@@ -426,13 +469,14 @@ static void tipc_clean_outqueues(struct tipc_conn *con)
 /* tipc_conn_queue_evt - interrupt level call from a subscription instance
  * The queued job is launched in tipc_send_to_sock()
  */
-void tipc_conn_queue_evt(struct tipc_server *s, int conid,
+void tipc_conn_queue_evt(struct net *net, int conid,
 			 u32 event, struct tipc_event *evt)
 {
+	struct tipc_server *srv = tipc_topsrv(net);
 	struct outqueue_entry *e;
 	struct tipc_conn *con;
 
-	con = tipc_conn_lookup(s, conid);
+	con = tipc_conn_lookup(srv, conid);
 	if (!con)
 		return;
 
@@ -448,7 +492,7 @@ void tipc_conn_queue_evt(struct tipc_server *s, int conid,
 	list_add_tail(&e->list, &con->outqueue);
 	spin_unlock_bh(&con->outqueue_lock);
 
-	if (queue_work(s->send_wq, &con->swork))
+	if (queue_work(srv->send_wq, &con->swork))
 		return;
 err:
 	conn_put(con);
@@ -620,41 +664,71 @@ static int tipc_work_start(struct tipc_server *s)
 	return 0;
 }
 
-int tipc_server_start(struct tipc_server *s)
+int tipc_topsrv_start(struct net *net)
 {
+	struct tipc_net *tn = tipc_net(net);
+	const char name[] = "topology_server";
+	struct sockaddr_tipc *saddr;
+	struct tipc_server *srv;
 	int ret;
 
-	spin_lock_init(&s->idr_lock);
-	idr_init(&s->conn_idr);
-	s->idr_in_use = 0;
+	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
+	if (!saddr)
+		return -ENOMEM;
+	saddr->family			= AF_TIPC;
+	saddr->addrtype			= TIPC_ADDR_NAMESEQ;
+	saddr->addr.nameseq.type	= TIPC_TOP_SRV;
+	saddr->addr.nameseq.lower	= TIPC_TOP_SRV;
+	saddr->addr.nameseq.upper	= TIPC_TOP_SRV;
+	saddr->scope			= TIPC_NODE_SCOPE;
 
-	ret = tipc_work_start(s);
+	srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
+	if (!srv) {
+		kfree(saddr);
+		return -ENOMEM;
+	}
+	srv->net			= net;
+	srv->saddr			= saddr;
+	srv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
+
+	strncpy(srv->name, name, strlen(name) + 1);
+	tn->topsrv = srv;
+	atomic_set(&tn->subscription_count, 0);
+
+	spin_lock_init(&srv->idr_lock);
+	idr_init(&srv->conn_idr);
+	srv->idr_in_use = 0;
+
+	ret = tipc_work_start(srv);
 	if (ret < 0)
 		return ret;
 
-	ret = tipc_open_listening_sock(s);
+	ret = tipc_open_listening_sock(srv);
 	if (ret < 0)
-		tipc_work_stop(s);
+		tipc_work_stop(srv);
 
 	return ret;
 }
 
-void tipc_server_stop(struct tipc_server *s)
+void tipc_topsrv_stop(struct net *net)
 {
+	struct tipc_server *srv = tipc_topsrv(net);
 	struct tipc_conn *con;
 	int id;
 
-	spin_lock_bh(&s->idr_lock);
-	for (id = 0; s->idr_in_use; id++) {
-		con = idr_find(&s->conn_idr, id);
+	spin_lock_bh(&srv->idr_lock);
+	for (id = 0; srv->idr_in_use; id++) {
+		con = idr_find(&srv->conn_idr, id);
 		if (con) {
-			spin_unlock_bh(&s->idr_lock);
+			spin_unlock_bh(&srv->idr_lock);
 			tipc_close_conn(con);
-			spin_lock_bh(&s->idr_lock);
+			spin_lock_bh(&srv->idr_lock);
 		}
 	}
-	spin_unlock_bh(&s->idr_lock);
+	spin_unlock_bh(&srv->idr_lock);
 
-	tipc_work_stop(s);
-	idr_destroy(&s->conn_idr);
+	tipc_work_stop(srv);
+	idr_destroy(&srv->conn_idr);
+	kfree(srv->saddr);
+	kfree(srv);
 }
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 995b79591ffe..ce93b6d68e41 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -47,45 +47,11 @@
 #define TIPC_SUB_NODE_SCOPE     0x40
 #define TIPC_SUB_NO_STATUS      0x80
 
-/**
- * struct tipc_server - TIPC server structure
- * @conn_idr: identifier set of connection
- * @idr_lock: protect the connection identifier set
- * @idr_in_use: amount of allocated identifier entry
- * @net: network namspace instance
- * @rcvbuf_cache: memory cache of server receive buffer
- * @rcv_wq: receive workqueue
- * @send_wq: send workqueue
- * @max_rcvbuf_size: maximum permitted receive message length
- * @tipc_conn_new: callback will be called when new connection is incoming
- * @tipc_conn_release: callback will be called before releasing the connection
- * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
- * @name: server name
- * @imp: message importance
- * @type: socket type
- */
-struct tipc_server {
-	struct idr conn_idr;
-	spinlock_t idr_lock;
-	int idr_in_use;
-	struct net *net;
-	struct workqueue_struct *rcv_wq;
-	struct workqueue_struct *send_wq;
-	int max_rcvbuf_size;
-	struct sockaddr_tipc *saddr;
-	char name[TIPC_SERVER_NAME_LEN];
-};
-
-void tipc_conn_queue_evt(struct tipc_server *s, int conid,
+void tipc_conn_queue_evt(struct net *net, int conid,
 			 u32 event, struct tipc_event *evt);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);
 void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
 
-int tipc_server_start(struct tipc_server *s);
-
-void tipc_server_stop(struct tipc_server *s);
-
 #endif
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 3be1e4b6cbfa..c8146568d04e 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -51,7 +51,7 @@ static void tipc_sub_send_event(struct tipc_subscription *sub,
 	tipc_evt_write(evt, found_upper, found_upper);
 	tipc_evt_write(evt, port.ref, port);
 	tipc_evt_write(evt, port.node, node);
-	tipc_conn_queue_evt(sub->server, sub->conid, event, evt);
+	tipc_conn_queue_evt(sub->net, sub->conid, event, evt);
 }
 
 /**
@@ -114,14 +114,7 @@ static void tipc_sub_timeout(struct timer_list *t)
 
 static void tipc_sub_kref_release(struct kref *kref)
 {
-	struct tipc_subscription *sub;
-	struct tipc_net *tn;
-
-	sub = container_of(kref, struct tipc_subscription, kref);
-	tn = tipc_net(sub->server->net);
-
-	atomic_dec(&tn->subscription_count);
-	kfree(sub);
+	kfree(container_of(kref, struct tipc_subscription, kref));
 }
 
 void tipc_sub_put(struct tipc_subscription *subscription)
@@ -134,19 +127,14 @@ void tipc_sub_get(struct tipc_subscription *subscription)
 	kref_get(&subscription->kref);
 }
 
-struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 					     struct tipc_subscr *s,
 					     int conid)
 {
-	struct tipc_net *tn = tipc_net(srv->net);
 	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_subscription *sub;
 	u32 timeout;
 
-	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
-		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
-		return NULL;
-	}
 	if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
 	    (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
 		pr_warn("Subscription rejected, illegal request\n");
@@ -157,12 +145,11 @@ struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
 		pr_warn("Subscription rejected, no memory\n");
 		return NULL;
 	}
-	sub->server = srv;
+	sub->net = net;
 	sub->conid = conid;
 	sub->inactive = false;
 	memcpy(&sub->evt.s, s, sizeof(*s));
 	spin_lock_init(&sub->lock);
-	atomic_inc(&tn->subscription_count);
 	kref_init(&sub->kref);
 	tipc_nametbl_subscribe(sub);
 	timer_setup(&sub->timer, tipc_sub_timeout, 0);
@@ -180,46 +167,3 @@ void tipc_sub_unsubscribe(struct tipc_subscription *sub)
 	list_del(&sub->sub_list);
 	tipc_sub_put(sub);
 }
-
-int tipc_topsrv_start(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	const char name[] = "topology_server";
-	struct sockaddr_tipc *saddr;
-	struct tipc_server *topsrv;
-
-	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
-	if (!saddr)
-		return -ENOMEM;
-	saddr->family			= AF_TIPC;
-	saddr->addrtype			= TIPC_ADDR_NAMESEQ;
-	saddr->addr.nameseq.type	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.lower	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.upper	= TIPC_TOP_SRV;
-	saddr->scope			= TIPC_NODE_SCOPE;
-
-	topsrv = kzalloc(sizeof(*topsrv), GFP_ATOMIC);
-	if (!topsrv) {
-		kfree(saddr);
-		return -ENOMEM;
-	}
-	topsrv->net			= net;
-	topsrv->saddr			= saddr;
-	topsrv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
-
-	strncpy(topsrv->name, name, strlen(name) + 1);
-	tn->topsrv = topsrv;
-	atomic_set(&tn->subscription_count, 0);
-
-	return tipc_server_start(topsrv);
-}
-
-void tipc_topsrv_stop(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_server *topsrv = tn->topsrv;
-
-	tipc_server_stop(topsrv);
-	kfree(topsrv->saddr);
-	kfree(topsrv);
-}
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 720932896ba6..82ba61afe638 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -56,7 +56,7 @@ struct tipc_conn;
  */
 struct tipc_subscription {
 	struct kref kref;
-	struct tipc_server *server;
+	struct net *net;
 	struct timer_list timer;
 	struct list_head nameseq_list;
 	struct list_head sub_list;
@@ -66,7 +66,7 @@ struct tipc_subscription {
 	spinlock_t lock; /* serialize up/down and timer events */
 };
 
-struct tipc_subscription *tipc_sub_subscribe(struct tipc_server *srv,
+struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 					     struct tipc_subscr *s,
 					     int conid);
 void tipc_sub_unsubscribe(struct tipc_subscription *sub);

From 0ef897be12b8b4cf297b6016e79ec97ec90f2cf6 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:50 +0100
Subject: [PATCH 0166/1678] tipc: separate topology server listener socket from
 subcsriber sockets

We move the listener socket to struct tipc_server and give it its own
work item. This makes it easier to follow the code, and entails some
simplifications in the reception code in subscriber sockets.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/server.c | 328 +++++++++++++++++++++-------------------------
 1 file changed, 147 insertions(+), 181 deletions(-)

diff --git a/net/tipc/server.c b/net/tipc/server.c
index 0abbdd698662..0e351e81690e 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -64,7 +64,6 @@
  * @tipc_conn_new: callback will be called when new connection is incoming
  * @tipc_conn_release: callback will be called before releasing the connection
  * @tipc_conn_recvmsg: callback will be called when message arrives
- * @saddr: TIPC server address
  * @name: server name
  * @imp: message importance
  * @type: socket type
@@ -74,10 +73,11 @@ struct tipc_server {
 	spinlock_t idr_lock; /* for idr list */
 	int idr_in_use;
 	struct net *net;
+	struct work_struct awork;
 	struct workqueue_struct *rcv_wq;
 	struct workqueue_struct *send_wq;
 	int max_rcvbuf_size;
-	struct sockaddr_tipc *saddr;
+	struct socket *listener;
 	char name[TIPC_SERVER_NAME_LEN];
 };
 
@@ -106,7 +106,6 @@ struct tipc_conn {
 	struct list_head sub_list;
 	spinlock_t sub_lock; /* for subscription list */
 	struct work_struct rwork;
-	int (*rx_action) (struct tipc_conn *con);
 	struct list_head outqueue;
 	spinlock_t outqueue_lock;
 	struct work_struct swork;
@@ -121,12 +120,6 @@ struct outqueue_entry {
 
 static void tipc_recv_work(struct work_struct *work);
 static void tipc_send_work(struct work_struct *work);
-static void tipc_clean_outqueues(struct tipc_conn *con);
-
-static struct tipc_conn *sock2con(struct sock *sk)
-{
-	return sk->sk_user_data;
-}
 
 static bool connected(struct tipc_conn *con)
 {
@@ -137,21 +130,21 @@ static void tipc_conn_kref_release(struct kref *kref)
 {
 	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
 	struct tipc_server *s = con->server;
-	struct socket *sock = con->sock;
+	struct outqueue_entry *e, *safe;
 
-	if (sock) {
-		if (test_bit(CF_SERVER, &con->flags)) {
-			__module_get(sock->ops->owner);
-			__module_get(sock->sk->sk_prot_creator->owner);
-		}
-		sock_release(sock);
-		con->sock = NULL;
-	}
 	spin_lock_bh(&s->idr_lock);
 	idr_remove(&s->conn_idr, con->conid);
 	s->idr_in_use--;
 	spin_unlock_bh(&s->idr_lock);
-	tipc_clean_outqueues(con);
+	if (con->sock)
+		sock_release(con->sock);
+
+	spin_lock_bh(&con->outqueue_lock);
+	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
+		list_del(&e->list);
+		kfree(e);
+	}
+	spin_unlock_bh(&con->outqueue_lock);
 	kfree(con);
 }
 
@@ -178,14 +171,14 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
 }
 
 /* sock_data_ready - interrupt callback indicating the socket has data to read
- * The queued job is launched in tipc_recv_from_sock()
+ * The queued work is launched into tipc_recv_work()->tipc_recv_from_sock()
  */
 static void sock_data_ready(struct sock *sk)
 {
 	struct tipc_conn *con;
 
 	read_lock_bh(&sk->sk_callback_lock);
-	con = sock2con(sk);
+	con = sk->sk_user_data;
 	if (connected(con)) {
 		conn_get(con);
 		if (!queue_work(con->server->rcv_wq, &con->rwork))
@@ -195,15 +188,15 @@ static void sock_data_ready(struct sock *sk)
 }
 
 /* sock_write_space - interrupt callback after a sendmsg EAGAIN
- * Indicates that there now is more is space in the send buffer
- * The queued job is launched in tipc_send_to_sock()
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
  */
 static void sock_write_space(struct sock *sk)
 {
 	struct tipc_conn *con;
 
 	read_lock_bh(&sk->sk_callback_lock);
-	con = sock2con(sk);
+	con = sk->sk_user_data;
 	if (connected(con)) {
 		conn_get(con);
 		if (!queue_work(con->server->send_wq, &con->swork))
@@ -212,23 +205,8 @@ static void sock_write_space(struct sock *sk)
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con)
-{
-	struct sock *sk = sock->sk;
-
-	write_lock_bh(&sk->sk_callback_lock);
-
-	sk->sk_data_ready = sock_data_ready;
-	sk->sk_write_space = sock_write_space;
-	sk->sk_user_data = con;
-
-	con->sock = sock;
-
-	write_unlock_bh(&sk->sk_callback_lock);
-}
-
 /* tipc_con_delete_sub - delete a specific or all subscriptions
- *  for a given subscriber
+ * for a given subscriber
  */
 static void tipc_con_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
 {
@@ -268,6 +246,7 @@ static void tipc_close_conn(struct tipc_conn *con)
 
 	/* Don't flush pending works, -just let them expire */
 	kernel_sock_shutdown(con->sock, SHUT_RDWR);
+
 	conn_put(con);
 }
 
@@ -357,117 +336,8 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
 	return ret;
 }
 
-static int tipc_accept_from_sock(struct tipc_conn *con)
-{
-	struct socket *sock = con->sock;
-	struct socket *newsock;
-	struct tipc_conn *newcon;
-	int ret;
-
-	ret = kernel_accept(sock, &newsock, O_NONBLOCK);
-	if (ret < 0)
-		return ret;
-
-	newcon = tipc_alloc_conn(con->server);
-	if (IS_ERR(newcon)) {
-		ret = PTR_ERR(newcon);
-		sock_release(newsock);
-		return ret;
-	}
-
-	newcon->rx_action = tipc_receive_from_sock;
-	tipc_register_callbacks(newsock, newcon);
-
-	/* Wake up receive process in case of 'SYN+' message */
-	newsock->sk->sk_data_ready(newsock->sk);
-	return ret;
-}
-
-static struct socket *tipc_create_listen_sock(struct tipc_conn *con)
-{
-	struct tipc_server *s = con->server;
-	struct socket *sock = NULL;
-	int imp = TIPC_CRITICAL_IMPORTANCE;
-	int ret;
-
-	ret = sock_create_kern(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock);
-	if (ret < 0)
-		return NULL;
-	ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE,
-				(char *)&imp, sizeof(imp));
-	if (ret < 0)
-		goto create_err;
-	ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr));
-	if (ret < 0)
-		goto create_err;
-
-	con->rx_action = tipc_accept_from_sock;
-	ret = kernel_listen(sock, 0);
-	if (ret < 0)
-		goto create_err;
-
-	/* As server's listening socket owner and creator is the same module,
-	 * we have to decrease TIPC module reference count to guarantee that
-	 * it remains zero after the server socket is created, otherwise,
-	 * executing "rmmod" command is unable to make TIPC module deleted
-	 * after TIPC module is inserted successfully.
-	 *
-	 * However, the reference count is ever increased twice in
-	 * sock_create_kern(): one is to increase the reference count of owner
-	 * of TIPC socket's proto_ops struct; another is to increment the
-	 * reference count of owner of TIPC proto struct. Therefore, we must
-	 * decrement the module reference count twice to ensure that it keeps
-	 * zero after server's listening socket is created. Of course, we
-	 * must bump the module reference count twice as well before the socket
-	 * is closed.
-	 */
-	module_put(sock->ops->owner);
-	module_put(sock->sk->sk_prot_creator->owner);
-	set_bit(CF_SERVER, &con->flags);
-
-	return sock;
-
-create_err:
-	kernel_sock_shutdown(sock, SHUT_RDWR);
-	sock_release(sock);
-	return NULL;
-}
-
-static int tipc_open_listening_sock(struct tipc_server *s)
-{
-	struct socket *sock;
-	struct tipc_conn *con;
-
-	con = tipc_alloc_conn(s);
-	if (IS_ERR(con))
-		return PTR_ERR(con);
-
-	sock = tipc_create_listen_sock(con);
-	if (!sock) {
-		idr_remove(&s->conn_idr, con->conid);
-		s->idr_in_use--;
-		kfree(con);
-		return -EINVAL;
-	}
-
-	tipc_register_callbacks(sock, con);
-	return 0;
-}
-
-static void tipc_clean_outqueues(struct tipc_conn *con)
-{
-	struct outqueue_entry *e, *safe;
-
-	spin_lock_bh(&con->outqueue_lock);
-	list_for_each_entry_safe(e, safe, &con->outqueue, list) {
-		list_del(&e->list);
-		kfree(e);
-	}
-	spin_unlock_bh(&con->outqueue_lock);
-}
-
-/* tipc_conn_queue_evt - interrupt level call from a subscription instance
- * The queued job is launched in tipc_send_to_sock()
+/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
  */
 void tipc_conn_queue_evt(struct net *net, int conid,
 			 u32 event, struct tipc_event *evt)
@@ -588,9 +458,9 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 					     1, sizeof(*evt));
 			if (ret == -EWOULDBLOCK || ret == 0) {
 				cond_resched();
-				goto out;
+				return;
 			} else if (ret < 0) {
-				goto err;
+				return tipc_close_conn(con);
 			}
 		} else {
 			tipc_send_kern_top_evt(srv->net, evt);
@@ -606,10 +476,6 @@ static void tipc_send_to_sock(struct tipc_conn *con)
 		kfree(e);
 	}
 	spin_unlock_bh(&con->outqueue_lock);
-out:
-	return;
-err:
-	tipc_close_conn(con);
 }
 
 static void tipc_recv_work(struct work_struct *work)
@@ -618,7 +484,7 @@ static void tipc_recv_work(struct work_struct *work)
 	int count = 0;
 
 	while (connected(con)) {
-		if (con->rx_action(con))
+		if (tipc_receive_from_sock(con))
 			break;
 
 		/* Don't flood Rx machine */
@@ -640,10 +506,113 @@ static void tipc_send_work(struct work_struct *work)
 	conn_put(con);
 }
 
-static void tipc_work_stop(struct tipc_server *s)
+static void tipc_accept_from_sock(struct work_struct *work)
 {
-	destroy_workqueue(s->rcv_wq);
-	destroy_workqueue(s->send_wq);
+	struct tipc_server *srv = container_of(work, struct tipc_server, awork);
+	struct socket *lsock = srv->listener;
+	struct socket *newsock;
+	struct tipc_conn *con;
+	struct sock *newsk;
+	int ret;
+
+	while (1) {
+		ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
+		if (ret < 0)
+			return;
+		con = tipc_alloc_conn(srv);
+		if (IS_ERR(con)) {
+			ret = PTR_ERR(con);
+			sock_release(newsock);
+			return;
+		}
+		/* Register callbacks */
+		newsk = newsock->sk;
+		write_lock_bh(&newsk->sk_callback_lock);
+		newsk->sk_data_ready = sock_data_ready;
+		newsk->sk_write_space = sock_write_space;
+		newsk->sk_user_data = con;
+		con->sock = newsock;
+		write_unlock_bh(&newsk->sk_callback_lock);
+
+		/* Wake up receive process in case of 'SYN+' message */
+		newsk->sk_data_ready(newsk);
+	}
+}
+
+/* listener_sock_data_ready - interrupt callback indicating new connection
+ * The queued job is launched into tipc_accept_from_sock()
+ */
+static void listener_sock_data_ready(struct sock *sk)
+{
+	struct tipc_server *srv;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	srv = sk->sk_user_data;
+	if (srv->listener)
+		queue_work(srv->rcv_wq, &srv->awork);
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int tipc_create_listener_sock(struct tipc_server *srv)
+{
+	int imp = TIPC_CRITICAL_IMPORTANCE;
+	struct socket *lsock = NULL;
+	struct sockaddr_tipc saddr;
+	struct sock *sk;
+	int rc;
+
+	rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock);
+	if (rc < 0)
+		return rc;
+
+	srv->listener = lsock;
+	sk = lsock->sk;
+	write_lock_bh(&sk->sk_callback_lock);
+	sk->sk_data_ready = listener_sock_data_ready;
+	sk->sk_user_data = srv;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
+			       (char *)&imp, sizeof(imp));
+	if (rc < 0)
+		goto err;
+
+	saddr.family	                = AF_TIPC;
+	saddr.addrtype		        = TIPC_ADDR_NAMESEQ;
+	saddr.addr.nameseq.type	        = TIPC_TOP_SRV;
+	saddr.addr.nameseq.lower	= TIPC_TOP_SRV;
+	saddr.addr.nameseq.upper	= TIPC_TOP_SRV;
+	saddr.scope			= TIPC_NODE_SCOPE;
+
+	rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (rc < 0)
+		goto err;
+	rc = kernel_listen(lsock, 0);
+	if (rc < 0)
+		goto err;
+
+	/* As server's listening socket owner and creator is the same module,
+	 * we have to decrease TIPC module reference count to guarantee that
+	 * it remains zero after the server socket is created, otherwise,
+	 * executing "rmmod" command is unable to make TIPC module deleted
+	 * after TIPC module is inserted successfully.
+	 *
+	 * However, the reference count is ever increased twice in
+	 * sock_create_kern(): one is to increase the reference count of owner
+	 * of TIPC socket's proto_ops struct; another is to increment the
+	 * reference count of owner of TIPC proto struct. Therefore, we must
+	 * decrement the module reference count twice to ensure that it keeps
+	 * zero after server's listening socket is created. Of course, we
+	 * must bump the module reference count twice as well before the socket
+	 * is closed.
+	 */
+	module_put(lsock->ops->owner);
+	module_put(sk->sk_prot_creator->owner);
+
+	return 0;
+err:
+	sock_release(lsock);
+	return -EINVAL;
 }
 
 static int tipc_work_start(struct tipc_server *s)
@@ -664,32 +633,26 @@ static int tipc_work_start(struct tipc_server *s)
 	return 0;
 }
 
+static void tipc_work_stop(struct tipc_server *s)
+{
+	destroy_workqueue(s->rcv_wq);
+	destroy_workqueue(s->send_wq);
+}
+
 int tipc_topsrv_start(struct net *net)
 {
 	struct tipc_net *tn = tipc_net(net);
 	const char name[] = "topology_server";
-	struct sockaddr_tipc *saddr;
 	struct tipc_server *srv;
 	int ret;
 
-	saddr = kzalloc(sizeof(*saddr), GFP_ATOMIC);
-	if (!saddr)
-		return -ENOMEM;
-	saddr->family			= AF_TIPC;
-	saddr->addrtype			= TIPC_ADDR_NAMESEQ;
-	saddr->addr.nameseq.type	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.lower	= TIPC_TOP_SRV;
-	saddr->addr.nameseq.upper	= TIPC_TOP_SRV;
-	saddr->scope			= TIPC_NODE_SCOPE;
-
 	srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
-	if (!srv) {
-		kfree(saddr);
+	if (!srv)
 		return -ENOMEM;
-	}
-	srv->net			= net;
-	srv->saddr			= saddr;
-	srv->max_rcvbuf_size		= sizeof(struct tipc_subscr);
+
+	srv->net = net;
+	srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
+	INIT_WORK(&srv->awork, tipc_accept_from_sock);
 
 	strncpy(srv->name, name, strlen(name) + 1);
 	tn->topsrv = srv;
@@ -703,7 +666,7 @@ int tipc_topsrv_start(struct net *net)
 	if (ret < 0)
 		return ret;
 
-	ret = tipc_open_listening_sock(srv);
+	ret = tipc_create_listener_sock(srv);
 	if (ret < 0)
 		tipc_work_stop(srv);
 
@@ -713,6 +676,7 @@ int tipc_topsrv_start(struct net *net)
 void tipc_topsrv_stop(struct net *net)
 {
 	struct tipc_server *srv = tipc_topsrv(net);
+	struct socket *lsock = srv->listener;
 	struct tipc_conn *con;
 	int id;
 
@@ -725,10 +689,12 @@ void tipc_topsrv_stop(struct net *net)
 			spin_lock_bh(&srv->idr_lock);
 		}
 	}
+	__module_get(lsock->ops->owner);
+	__module_get(lsock->sk->sk_prot_creator->owner);
+	sock_release(lsock);
+	srv->listener = NULL;
 	spin_unlock_bh(&srv->idr_lock);
-
 	tipc_work_stop(srv);
 	idr_destroy(&srv->conn_idr);
-	kfree(srv->saddr);
 	kfree(srv);
 }

From 026321c6d056a54b4145522492245d2b5913ee1d Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Feb 2018 10:40:51 +0100
Subject: [PATCH 0167/1678] tipc: rename tipc_server to tipc_topsrv

We rename struct tipc_server to struct tipc_topsrv. This reflect its now
specialized role as topology server. Accoringly, we change or add function
prefixes to make it clearer which functionality those belong to.

There are no functional changes in this commit.

Acked-by: Ying.Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Makefile               |   2 +-
 net/tipc/core.h                 |   6 +-
 net/tipc/group.c                |   2 +-
 net/tipc/subscr.c               |   2 +-
 net/tipc/subscr.h               |   2 +-
 net/tipc/{server.c => topsrv.c} | 502 ++++++++++++++++----------------
 net/tipc/{server.h => topsrv.h} |   7 +-
 7 files changed, 261 insertions(+), 262 deletions(-)
 rename net/tipc/{server.c => topsrv.c} (81%)
 rename net/tipc/{server.h => topsrv.h} (92%)

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 37bb0bfbd936..1edb7192aa2f 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,7 +9,7 @@ tipc-y	+= addr.o bcast.o bearer.o \
 	   core.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o monitor.o name_table.o net.o  \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
-	   server.o socket.o group.o
+	   topsrv.o socket.o group.o
 
 tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 20b21af2ff14..ff8b071654f5 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -64,7 +64,7 @@ struct tipc_bearer;
 struct tipc_bc_base;
 struct tipc_link;
 struct tipc_name_table;
-struct tipc_server;
+struct tipc_topsrv;
 struct tipc_monitor;
 
 #define TIPC_MOD_VER "2.0.0"
@@ -112,7 +112,7 @@ struct tipc_net {
 	struct list_head dist_queue;
 
 	/* Topology subscription server */
-	struct tipc_server *topsrv;
+	struct tipc_topsrv *topsrv;
 	atomic_t subscription_count;
 };
 
@@ -131,7 +131,7 @@ static inline struct list_head *tipc_nodes(struct net *net)
 	return &tipc_net(net)->node_list;
 }
 
-static inline struct tipc_server *tipc_topsrv(struct net *net)
+static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
 {
 	return tipc_net(net)->topsrv;
 }
diff --git a/net/tipc/group.c b/net/tipc/group.c
index 122162a31816..03086ccb7746 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -37,7 +37,7 @@
 #include "addr.h"
 #include "group.h"
 #include "bcast.h"
-#include "server.h"
+#include "topsrv.h"
 #include "msg.h"
 #include "socket.h"
 #include "node.h"
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c8146568d04e..6925a989569b 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -51,7 +51,7 @@ static void tipc_sub_send_event(struct tipc_subscription *sub,
 	tipc_evt_write(evt, found_upper, found_upper);
 	tipc_evt_write(evt, port.ref, port);
 	tipc_evt_write(evt, port.node, node);
-	tipc_conn_queue_evt(sub->net, sub->conid, event, evt);
+	tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
 }
 
 /**
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 82ba61afe638..8b2d22b18f22 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -37,7 +37,7 @@
 #ifndef _TIPC_SUBSCR_H
 #define _TIPC_SUBSCR_H
 
-#include "server.h"
+#include "topsrv.h"
 
 #define TIPC_MAX_SUBSCR         65535
 #define TIPC_MAX_PUBLICATIONS   65535
diff --git a/net/tipc/server.c b/net/tipc/topsrv.c
similarity index 81%
rename from net/tipc/server.c
rename to net/tipc/topsrv.c
index 0e351e81690e..02013e00f287 100644
--- a/net/tipc/server.c
+++ b/net/tipc/topsrv.c
@@ -2,7 +2,7 @@
  * net/tipc/server.c: TIPC server infrastructure
  *
  * Copyright (c) 2012-2013, Wind River Systems
- * Copyright (c) 2017, Ericsson AB
+ * Copyright (c) 2017-2018, Ericsson AB
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -35,7 +35,7 @@
  */
 
 #include "subscr.h"
-#include "server.h"
+#include "topsrv.h"
 #include "core.h"
 #include "socket.h"
 #include "addr.h"
@@ -52,7 +52,7 @@
 #define TIPC_SERVER_NAME_LEN	32
 
 /**
- * struct tipc_server - TIPC server structure
+ * struct tipc_topsrv - TIPC server structure
  * @conn_idr: identifier set of connection
  * @idr_lock: protect the connection identifier set
  * @idr_in_use: amount of allocated identifier entry
@@ -68,7 +68,7 @@
  * @imp: message importance
  * @type: socket type
  */
-struct tipc_server {
+struct tipc_topsrv {
 	struct idr conn_idr;
 	spinlock_t idr_lock; /* for idr list */
 	int idr_in_use;
@@ -102,12 +102,12 @@ struct tipc_conn {
 	int conid;
 	struct socket *sock;
 	unsigned long flags;
-	struct tipc_server *server;
+	struct tipc_topsrv *server;
 	struct list_head sub_list;
 	spinlock_t sub_lock; /* for subscription list */
 	struct work_struct rwork;
 	struct list_head outqueue;
-	spinlock_t outqueue_lock;
+	spinlock_t outqueue_lock; /* for outqueue */
 	struct work_struct swork;
 };
 
@@ -118,8 +118,10 @@ struct outqueue_entry {
 	struct list_head list;
 };
 
-static void tipc_recv_work(struct work_struct *work);
-static void tipc_send_work(struct work_struct *work);
+static void tipc_conn_recv_work(struct work_struct *work);
+static void tipc_conn_send_work(struct work_struct *work);
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt);
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s);
 
 static bool connected(struct tipc_conn *con)
 {
@@ -129,7 +131,7 @@ static bool connected(struct tipc_conn *con)
 static void tipc_conn_kref_release(struct kref *kref)
 {
 	struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
-	struct tipc_server *s = con->server;
+	struct tipc_topsrv *s = con->server;
 	struct outqueue_entry *e, *safe;
 
 	spin_lock_bh(&s->idr_lock);
@@ -158,75 +160,7 @@ static void conn_get(struct tipc_conn *con)
 	kref_get(&con->kref);
 }
 
-static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
-{
-	struct tipc_conn *con;
-
-	spin_lock_bh(&s->idr_lock);
-	con = idr_find(&s->conn_idr, conid);
-	if (!connected(con) || !kref_get_unless_zero(&con->kref))
-		con = NULL;
-	spin_unlock_bh(&s->idr_lock);
-	return con;
-}
-
-/* sock_data_ready - interrupt callback indicating the socket has data to read
- * The queued work is launched into tipc_recv_work()->tipc_recv_from_sock()
- */
-static void sock_data_ready(struct sock *sk)
-{
-	struct tipc_conn *con;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	con = sk->sk_user_data;
-	if (connected(con)) {
-		conn_get(con);
-		if (!queue_work(con->server->rcv_wq, &con->rwork))
-			conn_put(con);
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-
-/* sock_write_space - interrupt callback after a sendmsg EAGAIN
- * Indicates that there now is more space in the send buffer
- * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
- */
-static void sock_write_space(struct sock *sk)
-{
-	struct tipc_conn *con;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	con = sk->sk_user_data;
-	if (connected(con)) {
-		conn_get(con);
-		if (!queue_work(con->server->send_wq, &con->swork))
-			conn_put(con);
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-
-/* tipc_con_delete_sub - delete a specific or all subscriptions
- * for a given subscriber
- */
-static void tipc_con_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
-{
-	struct list_head *sub_list = &con->sub_list;
-	struct tipc_net *tn = tipc_net(con->server->net);
-	struct tipc_subscription *sub, *tmp;
-
-	spin_lock_bh(&con->sub_lock);
-	list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
-		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
-			tipc_sub_unsubscribe(sub);
-			atomic_dec(&tn->subscription_count);
-		} else if (s) {
-			break;
-		}
-	}
-	spin_unlock_bh(&con->sub_lock);
-}
-
-static void tipc_close_conn(struct tipc_conn *con)
+static void tipc_conn_close(struct tipc_conn *con)
 {
 	struct sock *sk = con->sock->sk;
 	bool disconnect = false;
@@ -236,7 +170,7 @@ static void tipc_close_conn(struct tipc_conn *con)
 
 	if (disconnect) {
 		sk->sk_user_data = NULL;
-		tipc_con_delete_sub(con, NULL);
+		tipc_conn_delete_sub(con, NULL);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
 
@@ -250,12 +184,12 @@ static void tipc_close_conn(struct tipc_conn *con)
 	conn_put(con);
 }
 
-static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
 {
 	struct tipc_conn *con;
 	int ret;
 
-	con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC);
+	con = kzalloc(sizeof(*con), GFP_ATOMIC);
 	if (!con)
 		return ERR_PTR(-ENOMEM);
 
@@ -264,8 +198,8 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
 	INIT_LIST_HEAD(&con->sub_list);
 	spin_lock_init(&con->outqueue_lock);
 	spin_lock_init(&con->sub_lock);
-	INIT_WORK(&con->swork, tipc_send_work);
-	INIT_WORK(&con->rwork, tipc_recv_work);
+	INIT_WORK(&con->swork, tipc_conn_send_work);
+	INIT_WORK(&con->rwork, tipc_conn_recv_work);
 
 	spin_lock_bh(&s->idr_lock);
 	ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC);
@@ -284,65 +218,108 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s)
 	return con;
 }
 
-static int tipc_con_rcv_sub(struct tipc_server *srv,
-			    struct tipc_conn *con,
-			    struct tipc_subscr *s)
+static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid)
 {
-	struct tipc_net *tn = tipc_net(srv->net);
-	struct tipc_subscription *sub;
+	struct tipc_conn *con;
 
-	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
-		tipc_con_delete_sub(con, s);
-		return 0;
-	}
-	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
-		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
-		return -1;
-	}
-	sub = tipc_sub_subscribe(srv->net, s, con->conid);
-	if (!sub)
-		return -1;
-	atomic_inc(&tn->subscription_count);
-	spin_lock_bh(&con->sub_lock);
-	list_add(&sub->sub_list, &con->sub_list);
-	spin_unlock_bh(&con->sub_lock);
-	return 0;
+	spin_lock_bh(&s->idr_lock);
+	con = idr_find(&s->conn_idr, conid);
+	if (!connected(con) || !kref_get_unless_zero(&con->kref))
+		con = NULL;
+	spin_unlock_bh(&s->idr_lock);
+	return con;
 }
 
-static int tipc_receive_from_sock(struct tipc_conn *con)
+/* tipc_conn_delete_sub - delete a specific or all subscriptions
+ * for a given subscriber
+ */
+static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s)
 {
-	struct tipc_server *srv = con->server;
-	struct sock *sk = con->sock->sk;
-	struct msghdr msg = {};
-	struct tipc_subscr s;
+	struct tipc_net *tn = tipc_net(con->server->net);
+	struct list_head *sub_list = &con->sub_list;
+	struct tipc_subscription *sub, *tmp;
+
+	spin_lock_bh(&con->sub_lock);
+	list_for_each_entry_safe(sub, tmp, sub_list, sub_list) {
+		if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) {
+			tipc_sub_unsubscribe(sub);
+			atomic_dec(&tn->subscription_count);
+		} else if (s) {
+			break;
+		}
+	}
+	spin_unlock_bh(&con->sub_lock);
+}
+
+static void tipc_conn_send_to_sock(struct tipc_conn *con)
+{
+	struct list_head *queue = &con->outqueue;
+	struct tipc_topsrv *srv = con->server;
+	struct outqueue_entry *e;
+	struct tipc_event *evt;
+	struct msghdr msg;
 	struct kvec iov;
+	int count = 0;
 	int ret;
 
-	iov.iov_base = &s;
-	iov.iov_len = sizeof(s);
-	msg.msg_name = NULL;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
-	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
-	if (ret == -EWOULDBLOCK)
-		return -EWOULDBLOCK;
-	if (ret > 0) {
-		read_lock_bh(&sk->sk_callback_lock);
-		ret = tipc_con_rcv_sub(srv, con, &s);
-		read_unlock_bh(&sk->sk_callback_lock);
-	}
-	if (ret < 0)
-		tipc_close_conn(con);
+	spin_lock_bh(&con->outqueue_lock);
 
-	return ret;
+	while (!list_empty(queue)) {
+		e = list_first_entry(queue, struct outqueue_entry, list);
+		evt = &e->evt;
+		spin_unlock_bh(&con->outqueue_lock);
+
+		if (e->inactive)
+			tipc_conn_delete_sub(con, &evt->s);
+
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_flags = MSG_DONTWAIT;
+		iov.iov_base = evt;
+		iov.iov_len = sizeof(*evt);
+		msg.msg_name = NULL;
+
+		if (con->sock) {
+			ret = kernel_sendmsg(con->sock, &msg, &iov,
+					     1, sizeof(*evt));
+			if (ret == -EWOULDBLOCK || ret == 0) {
+				cond_resched();
+				return;
+			} else if (ret < 0) {
+				return tipc_conn_close(con);
+			}
+		} else {
+			tipc_topsrv_kern_evt(srv->net, evt);
+		}
+
+		/* Don't starve users filling buffers */
+		if (++count >= MAX_SEND_MSG_COUNT) {
+			cond_resched();
+			count = 0;
+		}
+		spin_lock_bh(&con->outqueue_lock);
+		list_del(&e->list);
+		kfree(e);
+	}
+	spin_unlock_bh(&con->outqueue_lock);
+}
+
+static void tipc_conn_send_work(struct work_struct *work)
+{
+	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+
+	if (connected(con))
+		tipc_conn_send_to_sock(con);
+
+	conn_put(con);
 }
 
 /* tipc_conn_queue_evt() - interrupt level call from a subscription instance
  * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
  */
-void tipc_conn_queue_evt(struct net *net, int conid,
-			 u32 event, struct tipc_event *evt)
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+			   u32 event, struct tipc_event *evt)
 {
-	struct tipc_server *srv = tipc_topsrv(net);
+	struct tipc_topsrv *srv = tipc_topsrv(net);
 	struct outqueue_entry *e;
 	struct tipc_conn *con;
 
@@ -368,123 +345,83 @@ err:
 	conn_put(con);
 }
 
-bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
-			     u32 upper, u32 filter, int *conid)
-{
-	struct tipc_subscr sub;
-	struct tipc_conn *con;
-	int rc;
-
-	sub.seq.type = type;
-	sub.seq.lower = lower;
-	sub.seq.upper = upper;
-	sub.timeout = TIPC_WAIT_FOREVER;
-	sub.filter = filter;
-	*(u32 *)&sub.usr_handle = port;
-
-	con = tipc_alloc_conn(tipc_topsrv(net));
-	if (IS_ERR(con))
-		return false;
-
-	*conid = con->conid;
-	con->sock = NULL;
-	rc = tipc_con_rcv_sub(tipc_topsrv(net), con, &sub);
-	if (rc < 0)
-		tipc_close_conn(con);
-	return !rc;
-}
-
-void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN
+ * Indicates that there now is more space in the send buffer
+ * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock()
+ */
+static void tipc_conn_write_space(struct sock *sk)
 {
 	struct tipc_conn *con;
 
-	con = tipc_conn_lookup(tipc_topsrv(net), conid);
-	if (!con)
-		return;
-
-	test_and_clear_bit(CF_CONNECTED, &con->flags);
-	tipc_con_delete_sub(con, NULL);
-	conn_put(con);
-	conn_put(con);
+	read_lock_bh(&sk->sk_callback_lock);
+	con = sk->sk_user_data;
+	if (connected(con)) {
+		conn_get(con);
+		if (!queue_work(con->server->send_wq, &con->swork))
+			conn_put(con);
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
+static int tipc_conn_rcv_sub(struct tipc_topsrv *srv,
+			     struct tipc_conn *con,
+			     struct tipc_subscr *s)
 {
-	u32 port = *(u32 *)&evt->s.usr_handle;
-	u32 self = tipc_own_addr(net);
-	struct sk_buff_head evtq;
-	struct sk_buff *skb;
+	struct tipc_net *tn = tipc_net(srv->net);
+	struct tipc_subscription *sub;
 
-	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
-			      self, self, port, port, 0);
-	if (!skb)
-		return;
-	msg_set_dest_droppable(buf_msg(skb), true);
-	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
-	skb_queue_head_init(&evtq);
-	__skb_queue_tail(&evtq, skb);
-	tipc_sk_rcv(net, &evtq);
+	if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) {
+		tipc_conn_delete_sub(con, s);
+		return 0;
+	}
+	if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) {
+		pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR);
+		return -1;
+	}
+	sub = tipc_sub_subscribe(srv->net, s, con->conid);
+	if (!sub)
+		return -1;
+	atomic_inc(&tn->subscription_count);
+	spin_lock_bh(&con->sub_lock);
+	list_add(&sub->sub_list, &con->sub_list);
+	spin_unlock_bh(&con->sub_lock);
+	return 0;
 }
 
-static void tipc_send_to_sock(struct tipc_conn *con)
+static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
 {
-	struct list_head *queue = &con->outqueue;
-	struct tipc_server *srv = con->server;
-	struct outqueue_entry *e;
-	struct tipc_event *evt;
-	struct msghdr msg;
+	struct tipc_topsrv *srv = con->server;
+	struct sock *sk = con->sock->sk;
+	struct msghdr msg = {};
+	struct tipc_subscr s;
 	struct kvec iov;
-	int count = 0;
 	int ret;
 
-	spin_lock_bh(&con->outqueue_lock);
-
-	while (!list_empty(queue)) {
-		e = list_first_entry(queue, struct outqueue_entry, list);
-		evt = &e->evt;
-		spin_unlock_bh(&con->outqueue_lock);
-
-		if (e->inactive)
-			tipc_con_delete_sub(con, &evt->s);
-
-		memset(&msg, 0, sizeof(msg));
-		msg.msg_flags = MSG_DONTWAIT;
-		iov.iov_base = evt;
-		iov.iov_len = sizeof(*evt);
-		msg.msg_name = NULL;
-
-		if (con->sock) {
-			ret = kernel_sendmsg(con->sock, &msg, &iov,
-					     1, sizeof(*evt));
-			if (ret == -EWOULDBLOCK || ret == 0) {
-				cond_resched();
-				return;
-			} else if (ret < 0) {
-				return tipc_close_conn(con);
-			}
-		} else {
-			tipc_send_kern_top_evt(srv->net, evt);
-		}
-
-		/* Don't starve users filling buffers */
-		if (++count >= MAX_SEND_MSG_COUNT) {
-			cond_resched();
-			count = 0;
-		}
-		spin_lock_bh(&con->outqueue_lock);
-		list_del(&e->list);
-		kfree(e);
+	iov.iov_base = &s;
+	iov.iov_len = sizeof(s);
+	msg.msg_name = NULL;
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
+	if (ret == -EWOULDBLOCK)
+		return -EWOULDBLOCK;
+	if (ret > 0) {
+		read_lock_bh(&sk->sk_callback_lock);
+		ret = tipc_conn_rcv_sub(srv, con, &s);
+		read_unlock_bh(&sk->sk_callback_lock);
 	}
-	spin_unlock_bh(&con->outqueue_lock);
+	if (ret < 0)
+		tipc_conn_close(con);
+
+	return ret;
 }
 
-static void tipc_recv_work(struct work_struct *work)
+static void tipc_conn_recv_work(struct work_struct *work)
 {
 	struct tipc_conn *con = container_of(work, struct tipc_conn, rwork);
 	int count = 0;
 
 	while (connected(con)) {
-		if (tipc_receive_from_sock(con))
+		if (tipc_conn_rcv_from_sock(con))
 			break;
 
 		/* Don't flood Rx machine */
@@ -496,19 +433,26 @@ static void tipc_recv_work(struct work_struct *work)
 	conn_put(con);
 }
 
-static void tipc_send_work(struct work_struct *work)
+/* tipc_conn_data_ready - interrupt callback indicating the socket has data
+ * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock()
+ */
+static void tipc_conn_data_ready(struct sock *sk)
 {
-	struct tipc_conn *con = container_of(work, struct tipc_conn, swork);
+	struct tipc_conn *con;
 
-	if (connected(con))
-		tipc_send_to_sock(con);
-
-	conn_put(con);
+	read_lock_bh(&sk->sk_callback_lock);
+	con = sk->sk_user_data;
+	if (connected(con)) {
+		conn_get(con);
+		if (!queue_work(con->server->rcv_wq, &con->rwork))
+			conn_put(con);
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void tipc_accept_from_sock(struct work_struct *work)
+static void tipc_topsrv_accept(struct work_struct *work)
 {
-	struct tipc_server *srv = container_of(work, struct tipc_server, awork);
+	struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
 	struct socket *lsock = srv->listener;
 	struct socket *newsock;
 	struct tipc_conn *con;
@@ -519,7 +463,7 @@ static void tipc_accept_from_sock(struct work_struct *work)
 		ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
 		if (ret < 0)
 			return;
-		con = tipc_alloc_conn(srv);
+		con = tipc_conn_alloc(srv);
 		if (IS_ERR(con)) {
 			ret = PTR_ERR(con);
 			sock_release(newsock);
@@ -528,8 +472,8 @@ static void tipc_accept_from_sock(struct work_struct *work)
 		/* Register callbacks */
 		newsk = newsock->sk;
 		write_lock_bh(&newsk->sk_callback_lock);
-		newsk->sk_data_ready = sock_data_ready;
-		newsk->sk_write_space = sock_write_space;
+		newsk->sk_data_ready = tipc_conn_data_ready;
+		newsk->sk_write_space = tipc_conn_write_space;
 		newsk->sk_user_data = con;
 		con->sock = newsock;
 		write_unlock_bh(&newsk->sk_callback_lock);
@@ -539,12 +483,12 @@ static void tipc_accept_from_sock(struct work_struct *work)
 	}
 }
 
-/* listener_sock_data_ready - interrupt callback indicating new connection
- * The queued job is launched into tipc_accept_from_sock()
+/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+ * The queued job is launched into tipc_topsrv_accept()
  */
-static void listener_sock_data_ready(struct sock *sk)
+static void tipc_topsrv_listener_data_ready(struct sock *sk)
 {
-	struct tipc_server *srv;
+	struct tipc_topsrv *srv;
 
 	read_lock_bh(&sk->sk_callback_lock);
 	srv = sk->sk_user_data;
@@ -553,7 +497,7 @@ static void listener_sock_data_ready(struct sock *sk)
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static int tipc_create_listener_sock(struct tipc_server *srv)
+static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
 {
 	int imp = TIPC_CRITICAL_IMPORTANCE;
 	struct socket *lsock = NULL;
@@ -568,7 +512,7 @@ static int tipc_create_listener_sock(struct tipc_server *srv)
 	srv->listener = lsock;
 	sk = lsock->sk;
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_data_ready = listener_sock_data_ready;
+	sk->sk_data_ready = tipc_topsrv_listener_data_ready;
 	sk->sk_user_data = srv;
 	write_unlock_bh(&sk->sk_callback_lock);
 
@@ -615,7 +559,65 @@ err:
 	return -EINVAL;
 }
 
-static int tipc_work_start(struct tipc_server *s)
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+			     u32 upper, u32 filter, int *conid)
+{
+	struct tipc_subscr sub;
+	struct tipc_conn *con;
+	int rc;
+
+	sub.seq.type = type;
+	sub.seq.lower = lower;
+	sub.seq.upper = upper;
+	sub.timeout = TIPC_WAIT_FOREVER;
+	sub.filter = filter;
+	*(u32 *)&sub.usr_handle = port;
+
+	con = tipc_conn_alloc(tipc_topsrv(net));
+	if (IS_ERR(con))
+		return false;
+
+	*conid = con->conid;
+	con->sock = NULL;
+	rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
+	if (rc < 0)
+		tipc_conn_close(con);
+	return !rc;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+	struct tipc_conn *con;
+
+	con = tipc_conn_lookup(tipc_topsrv(net), conid);
+	if (!con)
+		return;
+
+	test_and_clear_bit(CF_CONNECTED, &con->flags);
+	tipc_conn_delete_sub(con, NULL);
+	conn_put(con);
+	conn_put(con);
+}
+
+static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt)
+{
+	u32 port = *(u32 *)&evt->s.usr_handle;
+	u32 self = tipc_own_addr(net);
+	struct sk_buff_head evtq;
+	struct sk_buff *skb;
+
+	skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+			      self, self, port, port, 0);
+	if (!skb)
+		return;
+	msg_set_dest_droppable(buf_msg(skb), true);
+	memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+	skb_queue_head_init(&evtq);
+	__skb_queue_tail(&evtq, skb);
+	tipc_sk_rcv(net, &evtq);
+}
+
+static int tipc_topsrv_work_start(struct tipc_topsrv *s)
 {
 	s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
 	if (!s->rcv_wq) {
@@ -633,7 +635,7 @@ static int tipc_work_start(struct tipc_server *s)
 	return 0;
 }
 
-static void tipc_work_stop(struct tipc_server *s)
+static void tipc_topsrv_work_stop(struct tipc_topsrv *s)
 {
 	destroy_workqueue(s->rcv_wq);
 	destroy_workqueue(s->send_wq);
@@ -643,7 +645,7 @@ int tipc_topsrv_start(struct net *net)
 {
 	struct tipc_net *tn = tipc_net(net);
 	const char name[] = "topology_server";
-	struct tipc_server *srv;
+	struct tipc_topsrv *srv;
 	int ret;
 
 	srv = kzalloc(sizeof(*srv), GFP_ATOMIC);
@@ -652,7 +654,7 @@ int tipc_topsrv_start(struct net *net)
 
 	srv->net = net;
 	srv->max_rcvbuf_size = sizeof(struct tipc_subscr);
-	INIT_WORK(&srv->awork, tipc_accept_from_sock);
+	INIT_WORK(&srv->awork, tipc_topsrv_accept);
 
 	strncpy(srv->name, name, strlen(name) + 1);
 	tn->topsrv = srv;
@@ -662,20 +664,20 @@ int tipc_topsrv_start(struct net *net)
 	idr_init(&srv->conn_idr);
 	srv->idr_in_use = 0;
 
-	ret = tipc_work_start(srv);
+	ret = tipc_topsrv_work_start(srv);
 	if (ret < 0)
 		return ret;
 
-	ret = tipc_create_listener_sock(srv);
+	ret = tipc_topsrv_create_listener(srv);
 	if (ret < 0)
-		tipc_work_stop(srv);
+		tipc_topsrv_work_stop(srv);
 
 	return ret;
 }
 
 void tipc_topsrv_stop(struct net *net)
 {
-	struct tipc_server *srv = tipc_topsrv(net);
+	struct tipc_topsrv *srv = tipc_topsrv(net);
 	struct socket *lsock = srv->listener;
 	struct tipc_conn *con;
 	int id;
@@ -685,7 +687,7 @@ void tipc_topsrv_stop(struct net *net)
 		con = idr_find(&srv->conn_idr, id);
 		if (con) {
 			spin_unlock_bh(&srv->idr_lock);
-			tipc_close_conn(con);
+			tipc_conn_close(con);
 			spin_lock_bh(&srv->idr_lock);
 		}
 	}
@@ -694,7 +696,7 @@ void tipc_topsrv_stop(struct net *net)
 	sock_release(lsock);
 	srv->listener = NULL;
 	spin_unlock_bh(&srv->idr_lock);
-	tipc_work_stop(srv);
+	tipc_topsrv_work_stop(srv);
 	idr_destroy(&srv->conn_idr);
 	kfree(srv);
 }
diff --git a/net/tipc/server.h b/net/tipc/topsrv.h
similarity index 92%
rename from net/tipc/server.h
rename to net/tipc/topsrv.h
index ce93b6d68e41..c7ea71293748 100644
--- a/net/tipc/server.h
+++ b/net/tipc/topsrv.h
@@ -38,17 +38,14 @@
 #define _TIPC_SERVER_H
 
 #include "core.h"
-#include <linux/idr.h>
-#include <linux/tipc.h>
-#include <net/net_namespace.h>
 
 #define TIPC_SERVER_NAME_LEN	32
 #define TIPC_SUB_CLUSTER_SCOPE  0x20
 #define TIPC_SUB_NODE_SCOPE     0x40
 #define TIPC_SUB_NO_STATUS      0x80
 
-void tipc_conn_queue_evt(struct net *net, int conid,
-			 u32 event, struct tipc_event *evt);
+void tipc_topsrv_queue_evt(struct net *net, int conid,
+			   u32 event, struct tipc_event *evt);
 
 bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 			     u32 upper, u32 filter, int *conid);

From 377cda13a25513f714ea5b5a765408292ca4627a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 15 Feb 2018 14:38:34 +0100
Subject: [PATCH 0168/1678] net: dsa: mv88e6xxx: Release mutex between each
 statistics read

The PTP code needs low latency access to the PTP hardware timestamps.
Reading all the statistics in one go adds a lot of latency to the PTP
code. So take and release the reg_lock mutex for each individual
statistics, allowing the PTP thread jump in between.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index af63710e93c1..bd5cb8a0330e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -714,9 +714,12 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 	for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
 		stat = &mv88e6xxx_hw_stats[i];
 		if (stat->type & types) {
+			mutex_lock(&chip->reg_lock);
 			data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
 							      bank1_select,
 							      histogram);
+			mutex_unlock(&chip->reg_lock);
+
 			j++;
 		}
 	}
@@ -764,14 +767,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
 	mutex_lock(&chip->reg_lock);
 
 	ret = mv88e6xxx_stats_snapshot(chip, port);
-	if (ret < 0) {
-		mutex_unlock(&chip->reg_lock);
+	mutex_unlock(&chip->reg_lock);
+
+	if (ret < 0)
 		return;
-	}
 
 	mv88e6xxx_get_stats(chip, port, data);
 
-	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)

From a61e54063423e9e8d604ea54e6664433946fe6af Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 15 Feb 2018 14:38:35 +0100
Subject: [PATCH 0169/1678] net: dsa: mv88e6xxx: Release mutex between each ATU
 read

The PTP code needs low latency access to the PTP hardware timestamps.
Reading all the ATU entries in one go adds a lot of latency to the PTP
code. So take and release the reg_lock mutex for each individual MAC
address in the ATU, allowing the PTP thread jump in between.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index bd5cb8a0330e..39c7ad7e490f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1437,7 +1437,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 	eth_broadcast_addr(addr.mac);
 
 	do {
+		mutex_lock(&chip->reg_lock);
 		err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr);
+		mutex_unlock(&chip->reg_lock);
 		if (err)
 			return err;
 
@@ -1470,7 +1472,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	int err;
 
 	/* Dump port's default Filtering Information Database (VLAN ID 0) */
+	mutex_lock(&chip->reg_lock);
 	err = mv88e6xxx_port_get_fid(chip, port, &fid);
+	mutex_unlock(&chip->reg_lock);
+
 	if (err)
 		return err;
 
@@ -1480,7 +1485,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 
 	/* Dump VLANs' Filtering Information Databases */
 	do {
+		mutex_lock(&chip->reg_lock);
 		err = mv88e6xxx_vtu_getnext(chip, &vlan);
+		mutex_unlock(&chip->reg_lock);
 		if (err)
 			return err;
 
@@ -1500,13 +1507,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
 				   dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	int err;
 
-	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_dump(chip, port, cb, data);
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
+	return mv88e6xxx_port_db_dump(chip, port, cb, data);
 }
 
 static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip,

From 66dede2d6b2340235ca212532275446d7bb010fe Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 15 Feb 2018 15:50:57 +0100
Subject: [PATCH 0170/1678] net: sched: fix unbalance in the error path of
 tca_action_flush()

When tca_action_flush() calls the action walk() and gets an error,
a successful call to nla_nest_start() is not followed by a call to
nla_nest_cancel(). It's harmless, as the skb is freed in the error
path - but it's worth to fix this unbalance.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4886ea4a7d6e..624995564e5a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -938,8 +938,10 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 		goto out_module_put;
 
 	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
-	if (err <= 0)
+	if (err <= 0) {
+		nla_nest_cancel(skb, nest);
 		goto out_module_put;
+	}
 
 	nla_nest_end(skb, nest);
 

From b7b347fa3cd496ad5b4cbcc8ea2931847c4d0d78 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:53 -0500
Subject: [PATCH 0171/1678] net: sched: act: fix code style

This patch is used by subsequent patches. It fixes code style issues
caught by checkpatch.

Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h  |  5 +++--
 net/sched/act_api.c    | 12 ++++++------
 net/sched/act_mirred.c |  6 +++---
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 6ed9692f20bd..32ef544f4ddc 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,12 +87,13 @@ struct tc_action_ops {
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *, struct tc_action **, u32);
+	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int, const struct tc_action_ops *);
+			struct netlink_callback *, int,
+			const struct tc_action_ops *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 624995564e5a..a32e6c2edbf6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -621,7 +621,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 			goto err_out;
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
-		if (kind == NULL)
+		if (!kind)
 			goto err_out;
 		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
@@ -822,7 +822,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest)
 		goto out_nlmsg_trim;
 
 	if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -934,7 +934,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest)
 		goto out_module_put;
 
 	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
@@ -1007,10 +1007,10 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
-		if (tb[1] != NULL)
+		if (tb[1])
 			return tca_action_flush(net, tb[1], n, portid);
-		else
-			return -EINVAL;
+
+		return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e6ff88f72900..abcd5f12b913 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -80,12 +80,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	int ret;
 
-	if (nla == NULL)
+	if (!nla)
 		return -EINVAL;
 	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
 	if (ret < 0)
 		return ret;
-	if (tb[TCA_MIRRED_PARMS] == NULL)
+	if (!tb[TCA_MIRRED_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
@@ -117,7 +117,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		if (dev == NULL)
+		if (!dev)
 			return -EINVAL;
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);

From 10defbd29e6218c1cab5c217a9d808fc05e3938a Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:54 -0500
Subject: [PATCH 0172/1678] net: sched: act: add extack to init

This patch adds extack to tcf_action_init and tcf_action_init_1
functions. These are necessary to make individual extack handling in
each act implementation.

Based on work by David Ahern <dsahern@gmail.com>

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  5 +++--
 net/sched/act_api.c   | 17 +++++++++++------
 net/sched/cls_api.c   |  4 ++--
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 32ef544f4ddc..41d95930ffbc 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -163,10 +163,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions);
+		    struct list_head *actions, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind);
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a32e6c2edbf6..662574646256 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -605,7 +605,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind)
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -726,7 +727,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions)
+		    struct list_head *actions, struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -738,7 +739,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+					extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
@@ -1062,12 +1064,14 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-			  struct nlmsghdr *n, u32 portid, int ovr)
+			  struct nlmsghdr *n, u32 portid, int ovr,
+			  struct netlink_ext_ack *extack)
 {
 	int ret = 0;
 	LIST_HEAD(actions);
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+			      extack);
 	if (ret)
 		return ret;
 
@@ -1115,7 +1119,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+				     extack);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2bc1bc23d42e..f21610c5da1a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1434,7 +1434,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND);
+						TCA_ACT_BIND, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1447,7 +1447,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions);
+					      &actions, extack);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)

From ee99b2d8bf4ad6d03046a8c2f25bad7cfd9de64a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 16 Feb 2018 16:03:39 -0500
Subject: [PATCH 0173/1678] net: Revert sched action extack support series.

It was mis-applied and the changes had rejects.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h  | 10 ++++------
 net/sched/act_api.c    | 29 ++++++++++++-----------------
 net/sched/act_mirred.c |  6 +++---
 net/sched/cls_api.c    |  4 ++--
 4 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 41d95930ffbc..6ed9692f20bd 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,13 +87,12 @@ struct tc_action_ops {
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
+	int     (*lookup)(struct net *, struct tc_action **, u32);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int,
-			const struct tc_action_ops *);
+			struct netlink_callback *, int, const struct tc_action_ops *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
@@ -163,11 +162,10 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, struct netlink_ext_ack *extack);
+		    struct list_head *actions);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind,
-				    struct netlink_ext_ack *extack);
+				    char *name, int ovr, int bind);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 662574646256..624995564e5a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -605,8 +605,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind,
-				    struct netlink_ext_ack *extack)
+				    char *name, int ovr, int bind)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -622,7 +621,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 			goto err_out;
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
-		if (!kind)
+		if (kind == NULL)
 			goto err_out;
 		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
@@ -727,7 +726,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, struct netlink_ext_ack *extack)
+		    struct list_head *actions)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -739,8 +738,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-					extack);
+		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
@@ -824,7 +822,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (!nest)
+	if (nest == NULL)
 		goto out_nlmsg_trim;
 
 	if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -936,7 +934,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (!nest)
+	if (nest == NULL)
 		goto out_module_put;
 
 	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
@@ -1009,10 +1007,10 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
-		if (tb[1])
+		if (tb[1] != NULL)
 			return tca_action_flush(net, tb[1], n, portid);
-
-		return -EINVAL;
+		else
+			return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
@@ -1064,14 +1062,12 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-			  struct nlmsghdr *n, u32 portid, int ovr,
-			  struct netlink_ext_ack *extack)
+			  struct nlmsghdr *n, u32 portid, int ovr)
 {
 	int ret = 0;
 	LIST_HEAD(actions);
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-			      extack);
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
 	if (ret)
 		return ret;
 
@@ -1119,8 +1115,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
-				     extack);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index abcd5f12b913..e6ff88f72900 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -80,12 +80,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	int ret;
 
-	if (!nla)
+	if (nla == NULL)
 		return -EINVAL;
 	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
 	if (ret < 0)
 		return ret;
-	if (!tb[TCA_MIRRED_PARMS])
+	if (tb[TCA_MIRRED_PARMS] == NULL)
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
@@ -117,7 +117,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		if (!dev)
+		if (dev == NULL)
 			return -EINVAL;
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f21610c5da1a..2bc1bc23d42e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1434,7 +1434,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, extack);
+						TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1447,7 +1447,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions, extack);
+					      &actions);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)

From 6f89dbce8e1134458de8a8e376acaaca4eee602e Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:32 -0800
Subject: [PATCH 0174/1678] skbuff: export mm_[un]account_pinned_pages for
 other modules

RDS would like to use the helper functions for managing pinned pages
added by Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 +++
 net/core/skbuff.c      | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ebc0f869720..b1cc38af53e1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -466,6 +466,9 @@ struct ubuf_info {
 
 #define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
 
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
+void mm_unaccount_pinned_pages(struct mmpin *mmp);
+
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
 struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
 					struct ubuf_info *uarg);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 09bd89c90a71..1a7485a2cdfa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -890,7 +890,7 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
-static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 {
 	unsigned long max_pg, num_pg, new_pg, old_pg;
 	struct user_struct *user;
@@ -919,14 +919,16 @@ static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mm_account_pinned_pages);
 
-static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+void mm_unaccount_pinned_pages(struct mmpin *mmp)
 {
 	if (mmp->user) {
 		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
 		free_uid(mmp->user);
 	}
 }
+EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
 
 struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 {

From ea8994cb0118993da179af836cc72a583d75dc4b Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:33 -0800
Subject: [PATCH 0175/1678] rds: hold a sock ref from rds_message to the
 rds_sock

The existing model holds a reference from the rds_sock to the
rds_message, but the rds_message does not itself hold a sock_put()
on the rds_sock. Instead the m_rs field in the rds_message is
assigned when the message is queued on the sock, and nulled when
the message is dequeued from the sock.

We want to be able to notify userspace when the rds_message
is actually freed (from rds_message_purge(), after the refcounts
to the rds_message go to 0). At the time that rds_message_purge()
is called, the message is no longer on the rds_sock retransmit
queue. Thus the explicit reference for the m_rs is needed to
send a notification that will signal to userspace that
it is now safe to free/reuse any pages that may have
been pinned down for zerocopy.

This patch manages the m_rs assignment in the rds_message with
the necessary refcount book-keeping.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 8 +++++++-
 net/rds/send.c    | 7 +------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/rds/message.c b/net/rds/message.c
index 4318cc9b78f7..ef3daafa3d79 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(rds_message_addref);
  */
 static void rds_message_purge(struct rds_message *rm)
 {
-	unsigned long i;
+	unsigned long i, flags;
 
 	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
 		return;
@@ -69,6 +69,12 @@ static void rds_message_purge(struct rds_message *rm)
 		__free_page(sg_page(&rm->data.op_sg[i]));
 	}
 	rm->data.op_nents = 0;
+	spin_lock_irqsave(&rm->m_rs_lock, flags);
+	if (rm->m_rs) {
+		sock_put(rds_rs_to_sk(rm->m_rs));
+		rm->m_rs = NULL;
+	}
+	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 	if (rm->rdma.op_active)
 		rds_rdma_free_op(&rm->rdma);
diff --git a/net/rds/send.c b/net/rds/send.c
index b1b0022b8370..e8f3ff471b15 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -649,7 +649,6 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status)
 				rm->rdma.op_notifier = NULL;
 			}
 			was_on_sock = 1;
-			rm->m_rs = NULL;
 		}
 		spin_unlock(&rs->rs_lock);
 
@@ -756,9 +755,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		 */
 		if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
 			spin_unlock_irqrestore(&cp->cp_lock, flags);
-			spin_lock_irqsave(&rm->m_rs_lock, flags);
-			rm->m_rs = NULL;
-			spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 			continue;
 		}
 		list_del_init(&rm->m_conn_item);
@@ -774,7 +770,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
 		spin_unlock(&rs->rs_lock);
 
-		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 		rds_message_put(rm);
@@ -798,7 +793,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		__rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
 		spin_unlock(&rs->rs_lock);
 
-		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
 		rds_message_put(rm);
@@ -849,6 +843,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
 		list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
 		set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
 		rds_message_addref(rm);
+		sock_hold(rds_rs_to_sk(rs));
 		rm->m_rs = rs;
 
 		/* The code ordering is a little weird, but we're

From 28190752c709272de3c2b6b092029da3f1614c5a Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:34 -0800
Subject: [PATCH 0176/1678] sock: permit SO_ZEROCOPY on PF_RDS socket

allow the application to set SO_ZEROCOPY on the underlying sk
of a PF_RDS socket

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index e90d461748f0..a1fa4a548f1b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1049,18 +1049,21 @@ set_rcvbuf:
 		break;
 
 	case SO_ZEROCOPY:
-		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
+			if (sk->sk_protocol != IPPROTO_TCP)
+				ret = -ENOTSUPP;
+			else if (sk->sk_state != TCP_CLOSE)
+				ret = -EBUSY;
+		} else if (sk->sk_family != PF_RDS) {
 			ret = -ENOTSUPP;
-		else if (sk->sk_protocol != IPPROTO_TCP)
-			ret = -ENOTSUPP;
-		else if (sk->sk_state != TCP_CLOSE)
-			ret = -EBUSY;
-		else if (val < 0 || val > 1)
-			ret = -EINVAL;
-		else
-			sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
-		break;
-
+		}
+		if (!ret) {
+			if (val < 0 || val > 1)
+				ret = -EINVAL;
+			else
+				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+			break;
+		}
 	default:
 		ret = -ENOPROTOOPT;
 		break;

From 01883eda72bd3f0a6c81447e4f223de14033fd9d Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:35 -0800
Subject: [PATCH 0177/1678] rds: support for zcopy completion notification

RDS removes a datagram (rds_message) from the retransmit queue when
an ACK is received. The ACK indicates that the receiver has queued
the RDS datagram, so that the sender can safely forget the datagram.
When all references to the rds_message are quiesced, rds_message_purge
is called to release resources used by the rds_message

If the datagram to be removed had pinned pages set up, add
an entry to the rs->rs_znotify_queue so that the notifcation
will be sent up via rds_rm_zerocopy_callback() when the
rds_message is eventually freed by rds_message_purge.

rds_rm_zerocopy_callback() attempts to batch the number of cookies
sent with each notification  to a max of SO_EE_ORIGIN_MAX_ZCOOKIES.
This is achieved by checking the tail skb in the sk_error_queue:
if this has room for one more cookie, the cookie from the
current notification is added; else a new skb is added to the
sk_error_queue. Every invocation of rds_rm_zerocopy_callback() will
trigger a ->sk_error_report to notify the application.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/errqueue.h |  2 +
 net/rds/af_rds.c              |  2 +
 net/rds/message.c             | 83 ++++++++++++++++++++++++++++++++---
 net/rds/rds.h                 | 14 ++++++
 net/rds/recv.c                |  2 +
 5 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index dc64cfaf13da..28812eda4209 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,11 +20,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
 #define SO_EE_ORIGIN_ZEROCOPY	5
+#define SO_EE_ORIGIN_ZCOOKIE	6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED	1
+#define	SO_EE_ORIGIN_MAX_ZCOOKIES	8
 
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 0a8eefd256b3..a937f18896ae 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -182,6 +182,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 		mask |= (EPOLLIN | EPOLLRDNORM);
 	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
 		mask |= (EPOLLOUT | EPOLLWRNORM);
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
 	read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
 	/* clear state any time we wake a seen-congested socket */
diff --git a/net/rds/message.c b/net/rds/message.c
index ef3daafa3d79..bf1a656b198a 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -33,6 +33,9 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/errqueue.h>
 
 #include "rds.h"
 
@@ -53,29 +56,95 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
+static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
+{
+	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+	int ncookies;
+	u32 *ptr;
+
+	if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
+		return false;
+	ncookies = serr->ee.ee_data;
+	if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
+		return false;
+	ptr = skb_put(skb, sizeof(u32));
+	*ptr = cookie;
+	serr->ee.ee_data = ++ncookies;
+	return true;
+}
+
+static void rds_rm_zerocopy_callback(struct rds_sock *rs,
+				     struct rds_znotifier *znotif)
+{
+	struct sock *sk = rds_rs_to_sk(rs);
+	struct sk_buff *skb, *tail;
+	struct sock_exterr_skb *serr;
+	unsigned long flags;
+	struct sk_buff_head *q;
+	u32 cookie = znotif->z_cookie;
+
+	q = &sk->sk_error_queue;
+	spin_lock_irqsave(&q->lock, flags);
+	tail = skb_peek_tail(q);
+
+	if (tail && skb_zcookie_add(tail, cookie)) {
+		spin_unlock_irqrestore(&q->lock, flags);
+		mm_unaccount_pinned_pages(&znotif->z_mmp);
+		consume_skb(rds_skb_from_znotifier(znotif));
+		sk->sk_error_report(sk);
+		return;
+	}
+
+	skb = rds_skb_from_znotifier(znotif);
+	serr = SKB_EXT_ERR(skb);
+	memset(&serr->ee, 0, sizeof(serr->ee));
+	serr->ee.ee_errno = 0;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
+	serr->ee.ee_info = 0;
+	WARN_ON(!skb_zcookie_add(skb, cookie));
+
+	__skb_queue_tail(q, skb);
+
+	spin_unlock_irqrestore(&q->lock, flags);
+	sk->sk_error_report(sk);
+
+	mm_unaccount_pinned_pages(&znotif->z_mmp);
+}
+
 /*
  * This relies on dma_map_sg() not touching sg[].page during merging.
  */
 static void rds_message_purge(struct rds_message *rm)
 {
 	unsigned long i, flags;
+	bool zcopy = false;
 
 	if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
 		return;
 
-	for (i = 0; i < rm->data.op_nents; i++) {
-		rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
-		/* XXX will have to put_page for page refs */
-		__free_page(sg_page(&rm->data.op_sg[i]));
-	}
-	rm->data.op_nents = 0;
 	spin_lock_irqsave(&rm->m_rs_lock, flags);
 	if (rm->m_rs) {
-		sock_put(rds_rs_to_sk(rm->m_rs));
+		struct rds_sock *rs = rm->m_rs;
+
+		if (rm->data.op_mmp_znotifier) {
+			zcopy = true;
+			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+			rm->data.op_mmp_znotifier = NULL;
+		}
+		sock_put(rds_rs_to_sk(rs));
 		rm->m_rs = NULL;
 	}
 	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 
+	for (i = 0; i < rm->data.op_nents; i++) {
+		/* XXX will have to put_page for page refs */
+		if (!zcopy)
+			__free_page(sg_page(&rm->data.op_sg[i]));
+		else
+			put_page(sg_page(&rm->data.op_sg[i]));
+	}
+	rm->data.op_nents = 0;
+
 	if (rm->rdma.op_active)
 		rds_rdma_free_op(&rm->rdma);
 	if (rm->rdma.op_rdma_mr)
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 7301b9b01890..24576bc4a5e9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -356,6 +356,19 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_PAGEVEC		7
 #define RDS_MSG_FLUSH		8
 
+struct rds_znotifier {
+	struct list_head	z_list;
+	struct mmpin		z_mmp;
+	u32			z_cookie;
+};
+
+#define	RDS_ZCOPY_SKB(__skb)	((struct rds_znotifier *)&((__skb)->cb[0]))
+
+static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z)
+{
+	return container_of((void *)z, struct sk_buff, cb);
+}
+
 struct rds_message {
 	refcount_t		m_refcount;
 	struct list_head	m_sock_item;
@@ -436,6 +449,7 @@ struct rds_message {
 			unsigned int		op_count;
 			unsigned int		op_dmasg;
 			unsigned int		op_dmaoff;
+			struct rds_znotifier	*op_mmp_znotifier;
 			struct scatterlist	*op_sg;
 		} data;
 	};
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b25bcfe411ca..b080961464df 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -594,6 +594,8 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 	if (msg_flags & MSG_OOB)
 		goto out;
+	if (msg_flags & MSG_ERRQUEUE)
+		return sock_recv_errqueue(sk, msg, size, SOL_IP, IP_RECVERR);
 
 	while (1) {
 		/* If there are pending notifications, do those - and nothing else */

From 0cebaccef3acbdfbc2d85880a2efb765d2f4e2e3 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:36 -0800
Subject: [PATCH 0178/1678] rds: zerocopy Tx support.

If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
application pages sent down with rds_sendmsg() are pinned.

The pinning uses the accounting infrastructure added by
Commit a91dbff551a6 ("sock: ulimit on MSG_ZEROCOPY pages")

The payload bytes in the message may not be modified for the
duration that the message has been pinned. A multi-threaded
application using this infrastructure may thus need to be notified
about send-completion so that it can free/reuse the buffers
passed to rds_sendmsg(). Notification of send-completion will
identify each message-buffer by a cookie that the application
must specify as ancillary data to rds_sendmsg().
The ancillary data in this case has cmsg_level == SOL_RDS
and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h |  1 +
 net/rds/message.c        | 51 +++++++++++++++++++++++++++++++++++++++-
 net/rds/rds.h            |  3 ++-
 net/rds/send.c           | 44 +++++++++++++++++++++++++++++-----
 4 files changed, 91 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index e71d4491f225..12e3bca32cad 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -103,6 +103,7 @@
 #define RDS_CMSG_MASKED_ATOMIC_FADD	8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 #define RDS_CMSG_RXPATH_LATENCY		11
+#define	RDS_CMSG_ZCOPY_COOKIE		12
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
diff --git a/net/rds/message.c b/net/rds/message.c
index bf1a656b198a..651834513481 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -341,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy)
 {
 	unsigned long to_copy, nbytes;
 	unsigned long sg_off;
 	struct scatterlist *sg;
 	int ret = 0;
+	int length = iov_iter_count(from);
 
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -356,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
 	sg = rm->data.op_sg;
 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
+	if (zcopy) {
+		int total_copied = 0;
+		struct sk_buff *skb;
+
+		skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
+				GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+		rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
+		if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+					    length)) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		while (iov_iter_count(from)) {
+			struct page *pages;
+			size_t start;
+			ssize_t copied;
+
+			copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+						    1, &start);
+			if (copied < 0) {
+				struct mmpin *mmp;
+				int i;
+
+				for (i = 0; i < rm->data.op_nents; i++)
+					put_page(sg_page(&rm->data.op_sg[i]));
+				mmp = &rm->data.op_mmp_znotifier->z_mmp;
+				mm_unaccount_pinned_pages(mmp);
+				ret = -EFAULT;
+				goto err;
+			}
+			total_copied += copied;
+			iov_iter_advance(from, copied);
+			length -= copied;
+			sg_set_page(sg, pages, copied, start);
+			rm->data.op_nents++;
+			sg++;
+		}
+		WARN_ON_ONCE(length != 0);
+		return ret;
+err:
+		consume_skb(skb);
+		rm->data.op_mmp_znotifier = NULL;
+		return ret;
+	} /* zcopy */
+
 	while (iov_iter_count(from)) {
 		if (!sg_page(sg)) {
 			ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 24576bc4a5e9..31cd38852050 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -785,7 +785,8 @@ rds_conn_connecting(struct rds_connection *conn)
 /* message.c */
 struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
 struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy);
 struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
 void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
 				 __be16 dport, u64 seq);
diff --git a/net/rds/send.c b/net/rds/send.c
index e8f3ff471b15..028ab598ac1b 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -875,12 +875,13 @@ out:
  * rds_message is getting to be quite complicated, and we'd like to allocate
  * it all in one go. This figures out how big it needs to be up front.
  */
-static int rds_rm_size(struct msghdr *msg, int data_len)
+static int rds_rm_size(struct msghdr *msg, int num_sgs)
 {
 	struct cmsghdr *cmsg;
 	int size = 0;
 	int cmsg_groups = 0;
 	int retval;
+	bool zcopy_cookie = false;
 
 	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
@@ -899,6 +900,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 			break;
 
+		case RDS_CMSG_ZCOPY_COOKIE:
+			zcopy_cookie = true;
 		case RDS_CMSG_RDMA_DEST:
 		case RDS_CMSG_RDMA_MAP:
 			cmsg_groups |= 2;
@@ -919,7 +922,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 
 	}
 
-	size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
+	if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
+		return -EINVAL;
+
+	size += num_sgs * sizeof(struct scatterlist);
 
 	/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
 	if (cmsg_groups == 3)
@@ -928,6 +934,18 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 	return size;
 }
 
+static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
+			  struct cmsghdr *cmsg)
+{
+	u32 *cookie;
+
+	if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
+		return -EINVAL;
+	cookie = CMSG_DATA(cmsg);
+	rm->data.op_mmp_znotifier->z_cookie = *cookie;
+	return 0;
+}
+
 static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			 struct msghdr *msg, int *allocated_mr)
 {
@@ -970,6 +988,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 			ret = rds_cmsg_atomic(rs, rm, cmsg);
 			break;
 
+		case RDS_CMSG_ZCOPY_COOKIE:
+			ret = rds_cmsg_zcopy(rs, rm, cmsg);
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -1040,10 +1062,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	long timeo = sock_sndtimeo(sk, nonblock);
 	struct rds_conn_path *cpath;
 	size_t total_payload_len = payload_len, rdma_payload_len = 0;
+	bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
+		      sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
+	int num_sgs = ceil(payload_len, PAGE_SIZE);
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
-	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
+	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -1087,8 +1112,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		goto out;
 	}
 
+	if (zcopy) {
+		if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+		num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
+	}
 	/* size of rm including all sgs */
-	ret = rds_rm_size(msg, payload_len);
+	ret = rds_rm_size(msg, num_sgs);
 	if (ret < 0)
 		goto out;
 
@@ -1100,12 +1132,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
 	/* Attach data to the rm */
 	if (payload_len) {
-		rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+		rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
 		if (!rm->data.op_sg) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = rds_message_copy_from_user(rm, &msg->msg_iter);
+		ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
 		if (ret)
 			goto out;
 	}

From b16ac920403e5623150e428e7f5429e8eb607308 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:37 -0800
Subject: [PATCH 0179/1678] selftests/net: add support for PF_RDS sockets

Add support for basic PF_RDS client-server testing in msg_zerocopy

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/msg_zerocopy.c | 65 +++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index e11fe84de0fd..7a5b35362f84 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -14,6 +14,9 @@
  * - SOCK_DGRAM
  * - SOCK_RAW
  *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
  * Start this program on two connected hosts, one in send mode and
  * the other with option '-r' to put it in receiver mode.
  *
@@ -53,6 +56,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <linux/rds.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
 #define SO_EE_ORIGIN_ZEROCOPY		5
@@ -300,10 +304,15 @@ static int do_setup_tx(int domain, int type, int protocol)
 	if (cfg_zerocopy)
 		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
 
-	if (domain != PF_PACKET)
+	if (domain != PF_PACKET && domain != PF_RDS)
 		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
 			error(1, errno, "connect");
 
+	if (domain == PF_RDS) {
+		if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+			error(1, errno, "bind");
+	}
+
 	return fd;
 }
 
@@ -444,6 +453,13 @@ static void do_tx(int domain, int type, int protocol)
 		msg.msg_iovlen++;
 	}
 
+	if (domain == PF_RDS) {
+		msg.msg_name = &cfg_dst_addr;
+		msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
+				    sizeof(struct sockaddr_in) :
+				    sizeof(struct sockaddr_in6));
+	}
+
 	iov[2].iov_base = payload;
 	iov[2].iov_len = cfg_payload_len;
 	msg.msg_iovlen++;
@@ -555,6 +571,40 @@ static void do_flush_datagram(int fd, int type)
 	bytes += cfg_payload_len;
 }
 
+
+static void do_recvmsg(int fd)
+{
+	int ret, off = 0;
+	char *buf;
+	struct iovec iov;
+	struct msghdr msg;
+	struct sockaddr_storage din;
+
+	buf = calloc(cfg_payload_len, sizeof(char));
+	iov.iov_base = buf;
+	iov.iov_len = cfg_payload_len;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name = &din;
+	msg.msg_namelen = sizeof(din);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	ret = recvmsg(fd, &msg, MSG_TRUNC);
+
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != cfg_payload_len)
+		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+
+	if (memcmp(buf + off, payload, ret))
+		error(1, 0, "recv: data mismatch");
+
+	free(buf);
+	packets++;
+	bytes += cfg_payload_len;
+}
+
 static void do_rx(int domain, int type, int protocol)
 {
 	uint64_t tstop;
@@ -566,6 +616,8 @@ static void do_rx(int domain, int type, int protocol)
 	do {
 		if (type == SOCK_STREAM)
 			do_flush_tcp(fd);
+		else if (domain == PF_RDS)
+			do_recvmsg(fd);
 		else
 			do_flush_datagram(fd, type);
 
@@ -610,6 +662,7 @@ static void parse_opts(int argc, char **argv)
 				    40 /* max tcp options */;
 	int c;
 	char *daddr = NULL, *saddr = NULL;
+	char *cfg_test;
 
 	cfg_payload_len = max_payload_len;
 
@@ -667,6 +720,14 @@ static void parse_opts(int argc, char **argv)
 			break;
 		}
 	}
+
+	cfg_test = argv[argc - 1];
+	if (strcmp(cfg_test, "rds") == 0) {
+		if (!daddr)
+			error(1, 0, "-D <server addr> required for PF_RDS\n");
+		if (!cfg_rx && !saddr)
+			error(1, 0, "-S <client addr> required for PF_RDS\n");
+	}
 	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
 	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
 
@@ -699,6 +760,8 @@ int main(int argc, char **argv)
 		do_test(cfg_family, SOCK_STREAM, 0);
 	else if (!strcmp(cfg_test, "udp"))
 		do_test(cfg_family, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "rds"))
+		do_test(PF_RDS, SOCK_SEQPACKET, 0);
 	else
 		error(1, 0, "unknown cfg_test %s", cfg_test);
 

From dfb8434b0a94651dac7adf373900561d8f6499e4 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Feb 2018 10:49:38 -0800
Subject: [PATCH 0180/1678] selftests/net: add zerocopy support for PF_RDS test
 case

Send a cookie with sendmsg() on PF_RDS sockets, and process the
returned batched cookies in do_recv_completion()

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/msg_zerocopy.c | 68 ++++++++++++++++++++--
 1 file changed, 64 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7a5b35362f84..5cc2a53bb71c 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -168,17 +168,39 @@ static int do_accept(int fd)
 	return fd;
 }
 
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+	struct cmsghdr *cm;
+
+	if (!msg->msg_control)
+		error(1, errno, "NULL cookie");
+	cm = (void *)msg->msg_control;
+	cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+	cm->cmsg_level = SOL_RDS;
+	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 {
 	int ret, len, i, flags;
+	static uint32_t cookie;
+	char ckbuf[CMSG_SPACE(sizeof(cookie))];
 
 	len = 0;
 	for (i = 0; i < msg->msg_iovlen; i++)
 		len += msg->msg_iov[i].iov_len;
 
 	flags = MSG_DONTWAIT;
-	if (do_zerocopy)
+	if (do_zerocopy) {
 		flags |= MSG_ZEROCOPY;
+		if (domain == PF_RDS) {
+			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+			msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+			msg->msg_control = (struct cmsghdr *)ckbuf;
+			add_zcopy_cookie(msg, ++cookie);
+		}
+	}
 
 	ret = sendmsg(fd, msg, flags);
 	if (ret == -1 && errno == EAGAIN)
@@ -194,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
 		if (do_zerocopy && ret)
 			expected_completions++;
 	}
+	if (do_zerocopy && domain == PF_RDS) {
+		msg->msg_control = NULL;
+		msg->msg_controllen = 0;
+	}
 
 	return true;
 }
@@ -220,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg)
 		msg->msg_iov[0].iov_len = payload_len + extra_len;
 		extra_len = 0;
 
-		do_sendmsg(fd, msg, do_zerocopy);
+		do_sendmsg(fd, msg, do_zerocopy,
+			   (cfg_dst_addr.ss_family == AF_INET ?
+			    PF_INET : PF_INET6));
 	}
 
 	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
@@ -316,6 +344,26 @@ static int do_setup_tx(int domain, int type, int protocol)
 	return fd;
 }
 
+static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
+				       uint32_t *ckbuf, size_t nbytes)
+{
+	int ncookies, i;
+
+	if (serr->ee_errno != 0)
+		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+	ncookies = serr->ee_data;
+	if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
+		error(1, 0, "Returned %d cookies, max expected %d\n",
+		      ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
+	if (nbytes != ncookies * sizeof(uint32_t))
+		error(1, 0, "Expected %d cookies, got %ld\n",
+		      ncookies, nbytes/sizeof(uint32_t));
+	for (i = 0; i < ncookies; i++)
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "%d\n", ckbuf[i]);
+	return ncookies;
+}
+
 static bool do_recv_completion(int fd)
 {
 	struct sock_extended_err *serr;
@@ -324,10 +372,17 @@ static bool do_recv_completion(int fd)
 	uint32_t hi, lo, range;
 	int ret, zerocopy;
 	char control[100];
+	uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
+	struct iovec iov;
 
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
 
+	iov.iov_base = ckbuf;
+	iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
 	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
 	if (ret == -1 && errno == EAGAIN)
 		return false;
@@ -346,6 +401,11 @@ static bool do_recv_completion(int fd)
 		      cm->cmsg_level, cm->cmsg_type);
 
 	serr = (void *) CMSG_DATA(cm);
+
+	if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
+		completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
+		return true;
+	}
 	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
 		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
 	if (serr->ee_errno != 0)
@@ -470,7 +530,7 @@ static void do_tx(int domain, int type, int protocol)
 		if (cfg_cork)
 			do_sendmsg_corked(fd, &msg);
 		else
-			do_sendmsg(fd, &msg, cfg_zerocopy);
+			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 
 		while (!do_poll(fd, POLLOUT)) {
 			if (cfg_zerocopy)

From 1af85155813622767d223af6d4dff283ebeea7a7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:53 -0500
Subject: [PATCH 0181/1678] net: sched: act: fix code style

This patch is used by subsequent patches. It fixes code style issues
caught by checkpatch.

Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h  |  5 +++--
 net/sched/act_api.c    | 12 ++++++------
 net/sched/act_mirred.c |  6 +++---
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 6ed9692f20bd..32ef544f4ddc 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,12 +87,13 @@ struct tc_action_ops {
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *, struct tc_action **, u32);
+	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind);
 	int     (*walk)(struct net *, struct sk_buff *,
-			struct netlink_callback *, int, const struct tc_action_ops *);
+			struct netlink_callback *, int,
+			const struct tc_action_ops *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 624995564e5a..a32e6c2edbf6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -621,7 +621,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 			goto err_out;
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
-		if (kind == NULL)
+		if (!kind)
 			goto err_out;
 		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
 			goto err_out;
@@ -822,7 +822,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest)
 		goto out_nlmsg_trim;
 
 	if (tcf_action_dump(skb, actions, bind, ref) < 0)
@@ -934,7 +934,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (nest == NULL)
+	if (!nest)
 		goto out_module_put;
 
 	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
@@ -1007,10 +1007,10 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
-		if (tb[1] != NULL)
+		if (tb[1])
 			return tca_action_flush(net, tb[1], n, portid);
-		else
-			return -EINVAL;
+
+		return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e6ff88f72900..abcd5f12b913 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -80,12 +80,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	int ret;
 
-	if (nla == NULL)
+	if (!nla)
 		return -EINVAL;
 	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
 	if (ret < 0)
 		return ret;
-	if (tb[TCA_MIRRED_PARMS] == NULL)
+	if (!tb[TCA_MIRRED_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
@@ -117,7 +117,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		if (dev == NULL)
+		if (!dev)
 			return -EINVAL;
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);

From aea0d727899140820a631bac78f36e9d9ef15ef6 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:54 -0500
Subject: [PATCH 0182/1678] net: sched: act: add extack to init

This patch adds extack to tcf_action_init and tcf_action_init_1
functions. These are necessary to make individual extack handling in
each act implementation.

Based on work by David Ahern <dsahern@gmail.com>

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  5 +++--
 net/sched/act_api.c   | 17 +++++++++++------
 net/sched/cls_api.c   |  4 ++--
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 32ef544f4ddc..41d95930ffbc 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -163,10 +163,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions);
+		    struct list_head *actions, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind);
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a32e6c2edbf6..662574646256 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -605,7 +605,8 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
-				    char *name, int ovr, int bind)
+				    char *name, int ovr, int bind,
+				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
 	struct tc_action_ops *a_o;
@@ -726,7 +727,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions)
+		    struct list_head *actions, struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -738,7 +739,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		return err;
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
+		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
+					extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
@@ -1062,12 +1064,14 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-			  struct nlmsghdr *n, u32 portid, int ovr)
+			  struct nlmsghdr *n, u32 portid, int ovr,
+			  struct netlink_ext_ack *extack)
 {
 	int ret = 0;
 	LIST_HEAD(actions);
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+			      extack);
 	if (ret)
 		return ret;
 
@@ -1115,7 +1119,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+				     extack);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2bc1bc23d42e..f21610c5da1a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1434,7 +1434,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND);
+						TCA_ACT_BIND, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1447,7 +1447,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions);
+					      &actions, extack);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)

From 84ae017a00776486390e2c0cceb4f717c3f809c1 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:55 -0500
Subject: [PATCH 0183/1678] net: sched: act: handle generic action errors

This patch adds extack support for generic act handling. The extack
will be set deeper to each called function which is not part of netdev
core api.

Based on work by David Ahern <dsahern@gmail.com>

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 93 +++++++++++++++++++++++++++++----------------
 1 file changed, 61 insertions(+), 32 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 662574646256..8e77ddd9f0ad 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -617,31 +617,40 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	int err;
 
 	if (name == NULL) {
-		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 		if (err < 0)
 			goto err_out;
 		err = -EINVAL;
 		kind = tb[TCA_ACT_KIND];
-		if (!kind)
+		if (!kind) {
+			NL_SET_ERR_MSG(extack, "TC action kind must be specified");
 			goto err_out;
-		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+		}
+		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+			NL_SET_ERR_MSG(extack, "TC action name too long");
 			goto err_out;
+		}
 		if (tb[TCA_ACT_COOKIE]) {
 			int cklen = nla_len(tb[TCA_ACT_COOKIE]);
 
-			if (cklen > TC_COOKIE_MAX_SIZE)
+			if (cklen > TC_COOKIE_MAX_SIZE) {
+				NL_SET_ERR_MSG(extack, "TC cookie size above the maximum");
 				goto err_out;
+			}
 
 			cookie = nla_memdup_cookie(tb);
 			if (!cookie) {
+				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
 				err = -ENOMEM;
 				goto err_out;
 			}
 		}
 	} else {
-		err = -EINVAL;
-		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+			NL_SET_ERR_MSG(extack, "TC action name too long");
+			err = -EINVAL;
 			goto err_out;
+		}
 	}
 
 	a_o = tc_lookup_action_n(act_name);
@@ -664,6 +673,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 			goto err_mod;
 		}
 #endif
+		NL_SET_ERR_MSG(extack, "Failed to load TC action module");
 		err = -ENOENT;
 		goto err_out;
 	}
@@ -698,6 +708,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 
 			list_add_tail(&a->list, &actions);
 			tcf_action_destroy(&actions, bind);
+			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
 	}
@@ -734,7 +745,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 	int err;
 	int i;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (err < 0)
 		return err;
 
@@ -842,7 +853,8 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct list_head *actions, int event)
+	       struct list_head *actions, int event,
+	       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 
@@ -851,6 +863,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 		return -ENOBUFS;
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
 			 0, 0) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -859,7 +872,8 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 }
 
 static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
-					  struct nlmsghdr *n, u32 portid)
+					  struct nlmsghdr *n, u32 portid,
+					  struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX + 1];
 	const struct tc_action_ops *ops;
@@ -867,20 +881,24 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 	int index;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 	if (err < 0)
 		goto err_out;
 
 	err = -EINVAL;
 	if (tb[TCA_ACT_INDEX] == NULL ||
-	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
+		NL_SET_ERR_MSG(extack, "Invalid TC action index value");
 		goto err_out;
+	}
 	index = nla_get_u32(tb[TCA_ACT_INDEX]);
 
 	err = -EINVAL;
 	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
-	if (!ops) /* could happen in batch of actions */
+	if (!ops) { /* could happen in batch of actions */
+		NL_SET_ERR_MSG(extack, "Specified TC action not found");
 		goto err_out;
+	}
 	err = -ENOENT;
 	if (ops->lookup(net, &a, index) == 0)
 		goto err_mod;
@@ -895,7 +913,8 @@ err_out:
 }
 
 static int tca_action_flush(struct net *net, struct nlattr *nla,
-			    struct nlmsghdr *n, u32 portid)
+			    struct nlmsghdr *n, u32 portid,
+			    struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
@@ -909,35 +928,39 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	int err = -ENOMEM;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb) {
-		pr_debug("tca_action_flush: failed skb alloc\n");
+	if (!skb)
 		return err;
-	}
 
 	b = skb_tail_pointer(skb);
 
-	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, NULL);
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL, extack);
 	if (err < 0)
 		goto err_out;
 
 	err = -EINVAL;
 	kind = tb[TCA_ACT_KIND];
 	ops = tc_lookup_action(kind);
-	if (!ops) /*some idjot trying to flush unknown action */
+	if (!ops) { /*some idjot trying to flush unknown action */
+		NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
 		goto err_out;
+	}
 
 	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
 			sizeof(*t), 0);
-	if (!nlh)
+	if (!nlh) {
+		NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
 		goto out_module_put;
+	}
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
-	if (!nest)
+	if (!nest) {
+		NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
 		goto out_module_put;
+	}
 
 	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
 	if (err <= 0) {
@@ -954,6 +977,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
 		return 0;
+	if (err < 0)
+		NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
 
 	return err;
 
@@ -966,7 +991,7 @@ err_out:
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid)
+	       u32 portid, struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
@@ -977,6 +1002,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
 			 0, 1) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -984,6 +1010,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 	/* now do the delete */
 	ret = tcf_action_destroy(actions, 0);
 	if (ret < 0) {
+		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
 		return ret;
 	}
@@ -997,26 +1024,27 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 static int
 tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-	      u32 portid, int event)
+	      u32 portid, int event, struct netlink_ext_ack *extack)
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
 	LIST_HEAD(actions);
 
-	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, NULL);
+	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (ret < 0)
 		return ret;
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
 		if (tb[1])
-			return tca_action_flush(net, tb[1], n, portid);
+			return tca_action_flush(net, tb[1], n, portid, extack);
 
+		NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
 		return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_get_1(net, tb[i], n, portid);
+		act = tcf_action_get_1(net, tb[i], n, portid, extack);
 		if (IS_ERR(act)) {
 			ret = PTR_ERR(act);
 			goto err;
@@ -1026,9 +1054,9 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	}
 
 	if (event == RTM_GETACTION)
-		ret = tcf_get_notify(net, portid, n, &actions, event);
+		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid);
+		ret = tcf_del_notify(net, n, &actions, portid, extack);
 		if (ret)
 			goto err;
 		return ret;
@@ -1041,7 +1069,7 @@ err:
 
 static int
 tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid)
+	       u32 portid, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	int err = 0;
@@ -1052,6 +1080,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
 			 RTM_NEWACTION, 0, 0) <= 0) {
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -1075,7 +1104,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	if (ret)
 		return ret;
 
-	return tcf_add_notify(net, n, &actions, portid);
+	return tcf_add_notify(net, n, &actions, portid, extack);
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
@@ -1103,7 +1132,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 		return ret;
 
 	if (tca[TCA_ACT_TAB] == NULL) {
-		pr_notice("tc_ctl_action: received NO action attribs\n");
+		NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
 		return -EINVAL;
 	}
 
@@ -1126,11 +1155,11 @@ replay:
 		break;
 	case RTM_DELACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    portid, RTM_DELACTION);
+				    portid, RTM_DELACTION, extack);
 		break;
 	case RTM_GETACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    portid, RTM_GETACTION);
+				    portid, RTM_GETACTION, extack);
 		break;
 	default:
 		BUG();

From 589dad6d71a72dd7912e5070c63f6bf1f561b5cf Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:56 -0500
Subject: [PATCH 0184/1678] net: sched: act: add extack to init callback

This patch adds extack support for act init callback api. This
prepares to handle extack support inside each specific act
implementation.

Based on work by David Ahern <dsahern@gmail.com>

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      | 2 +-
 net/sched/act_api.c        | 5 +++--
 net/sched/act_bpf.c        | 2 +-
 net/sched/act_connmark.c   | 3 ++-
 net/sched/act_csum.c       | 2 +-
 net/sched/act_gact.c       | 2 +-
 net/sched/act_ife.c        | 2 +-
 net/sched/act_ipt.c        | 4 ++--
 net/sched/act_mirred.c     | 2 +-
 net/sched/act_nat.c        | 3 ++-
 net/sched/act_pedit.c      | 2 +-
 net/sched/act_police.c     | 3 ++-
 net/sched/act_sample.c     | 2 +-
 net/sched/act_simple.c     | 2 +-
 net/sched/act_skbedit.c    | 2 +-
 net/sched/act_skbmod.c     | 2 +-
 net/sched/act_tunnel_key.c | 2 +-
 net/sched/act_vlan.c       | 2 +-
 18 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 41d95930ffbc..3717e0f2bb1b 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -90,7 +90,7 @@ struct tc_action_ops {
 	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind);
+			int bind, struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int,
 			const struct tc_action_ops *);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8e77ddd9f0ad..576a0c311e5e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -680,9 +680,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 
 	/* backward compatibility for policer */
 	if (name == NULL)
-		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
+		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
+				extack);
 	else
-		err = a_o->init(net, nla, est, &a, ovr, bind);
+		err = a_o->init(net, nla, est, &a, ovr, bind, extack);
 	if (err < 0)
 		goto err_mod;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b3f2c15affa7..b3ebfa9598e2 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -272,7 +272,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
-			int replace, int bind)
+			int replace, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2b15ba84e0c8..20e0215360b5 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,8 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
-			     int ovr, int bind)
+			     int ovr, int bind,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b7ba9b06b147..3b8c48bb2683 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind)
+			 int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_old, *params_new;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b56986d41c87..912f3398f1c1 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5954e992685a..e5127d400737 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -447,7 +447,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
-			int ovr, int bind)
+			int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 06e380ae0928..6894cfa83863 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -193,7 +193,7 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind)
+			int bind, struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
 			      bind);
@@ -201,7 +201,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind)
+		       int bind, struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
 			      bind);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index abcd5f12b913..7ccd4c71179f 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -69,7 +69,7 @@ static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind)
+			   int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 98c6a4b2f523..7e5ebd7f52a6 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -37,7 +37,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-			struct tc_action **a, int ovr, int bind)
+			struct tc_action **a, int ovr, int bind,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 349beaffb29e..bb2c35ed6f10 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,7 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  int ovr, int bind)
+			  int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 95d3c9097b25..6b6facbe251b 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -74,7 +74,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
-			       int ovr, int bind)
+			       int ovr, int bind,
+			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, err;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1ba0df238756..f4579ceba1f6 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind)
+			   int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 425eac11f6da..b0346347c5f0 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 5a3f691bb545..7651c9d2182d 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -66,7 +66,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
-			    int ovr, int bind)
+			    int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index fa975262dbac..1266449aa8ea 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind)
+			   int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0e23aac09ad6..dde01eca7ed0 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -70,7 +70,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind)
+			   int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index e1a1b3f3983a..6c387310b1b6 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind)
+			 int ovr, int bind, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];

From 331a9295de23a9428adb7f593d0701d393a2079e Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:57 -0500
Subject: [PATCH 0185/1678] net: sched: act: add extack for lookup callback

This patch adds extack support for act lookup callback api. This
prepares to handle extack support inside each specific act
implementation.

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      | 3 ++-
 net/sched/act_api.c        | 2 +-
 net/sched/act_bpf.c        | 3 ++-
 net/sched/act_connmark.c   | 3 ++-
 net/sched/act_csum.c       | 3 ++-
 net/sched/act_gact.c       | 3 ++-
 net/sched/act_ife.c        | 3 ++-
 net/sched/act_ipt.c        | 6 ++++--
 net/sched/act_mirred.c     | 3 ++-
 net/sched/act_nat.c        | 3 ++-
 net/sched/act_pedit.c      | 3 ++-
 net/sched/act_police.c     | 3 ++-
 net/sched/act_sample.c     | 3 ++-
 net/sched/act_simple.c     | 3 ++-
 net/sched/act_skbedit.c    | 3 ++-
 net/sched/act_skbmod.c     | 3 ++-
 net/sched/act_tunnel_key.c | 3 ++-
 net/sched/act_vlan.c       | 3 ++-
 18 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3717e0f2bb1b..0bd65db506ba 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -87,7 +87,8 @@ struct tc_action_ops {
 		       struct tcf_result *);
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
+	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind, struct netlink_ext_ack *extack);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 576a0c311e5e..74ed1e288e57 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -901,7 +901,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
 		goto err_out;
 	}
 	err = -ENOENT;
-	if (ops->lookup(net, &a, index) == 0)
+	if (ops->lookup(net, &a, index, extack) == 0)
 		goto err_mod;
 
 	module_put(ops->owner);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b3ebfa9598e2..d9654b863347 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -374,7 +374,8 @@ static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 20e0215360b5..0504b7600fb6 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -184,7 +184,8 @@ static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
+			       struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3b8c48bb2683..bdd17b9ef034 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -638,7 +638,8 @@ static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 912f3398f1c1..e1e69e38f4b0 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -208,7 +208,8 @@ static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index e5127d400737..0b70fb0cc609 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -831,7 +831,8 @@ static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6894cfa83863..f29af79a2d1f 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -310,7 +310,8 @@ static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -358,7 +359,8 @@ static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7ccd4c71179f..9980c6affb5e 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -272,7 +272,8 @@ static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7e5ebd7f52a6..6f6d7667ef9a 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -285,7 +285,8 @@ static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index bb2c35ed6f10..308b2680a6d9 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -426,7 +426,8 @@ static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 6b6facbe251b..1292f880eab0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -305,7 +305,8 @@ nla_put_failure:
 	return -1;
 }
 
-static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index f4579ceba1f6..22379b2cfd1a 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -209,7 +209,8 @@ static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index b0346347c5f0..3ebf71470977 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -177,7 +177,8 @@ static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 7651c9d2182d..ff1970e14016 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -215,7 +215,8 @@ static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
+			      struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 1266449aa8ea..110d7c1f823d 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -239,7 +239,8 @@ static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index dde01eca7ed0..65a19928f1a9 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -298,7 +298,8 @@ static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 6c387310b1b6..03dcbbc5ffd2 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -274,7 +274,8 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
-static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
+static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 

From 417801055b8cb4c052e989289ccf24a673178bbc Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:58 -0500
Subject: [PATCH 0186/1678] net: sched: act: add extack for walk callback

This patch adds extack support for act walker callback api. This
prepares to handle extack support inside each specific act
implementation.

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      | 3 ++-
 net/sched/act_api.c        | 4 ++--
 net/sched/act_bpf.c        | 3 ++-
 net/sched/act_connmark.c   | 3 ++-
 net/sched/act_csum.c       | 3 ++-
 net/sched/act_gact.c       | 3 ++-
 net/sched/act_ife.c        | 3 ++-
 net/sched/act_ipt.c        | 6 ++++--
 net/sched/act_mirred.c     | 3 ++-
 net/sched/act_nat.c        | 3 ++-
 net/sched/act_pedit.c      | 3 ++-
 net/sched/act_police.c     | 3 ++-
 net/sched/act_sample.c     | 3 ++-
 net/sched/act_simple.c     | 3 ++-
 net/sched/act_skbedit.c    | 3 ++-
 net/sched/act_skbmod.c     | 3 ++-
 net/sched/act_tunnel_key.c | 3 ++-
 net/sched/act_vlan.c       | 3 ++-
 18 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 0bd65db506ba..ab3529255377 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -94,7 +94,8 @@ struct tc_action_ops {
 			int bind, struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int,
-			const struct tc_action_ops *);
+			const struct tc_action_ops *,
+			struct netlink_ext_ack *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 74ed1e288e57..ab107997b259 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -963,7 +963,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 		goto out_module_put;
 	}
 
-	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
+	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
 	if (err <= 0) {
 		nla_nest_cancel(skb, nest);
 		goto out_module_put;
@@ -1255,7 +1255,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto out_module_put;
 
-	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
+	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
 	if (ret < 0)
 		goto out_module_put;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index d9654b863347..7e01e2c710c4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -367,7 +367,8 @@ static void tcf_bpf_cleanup(struct tc_action *act)
 
 static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 0504b7600fb6..cb722da0bb15 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -177,7 +177,8 @@ nla_put_failure:
 
 static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 			       struct netlink_callback *cb, int type,
-			       const struct tc_action_ops *ops)
+			       const struct tc_action_ops *ops,
+			       struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index bdd17b9ef034..3e8efadb750f 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -631,7 +631,8 @@ static void tcf_csum_cleanup(struct tc_action *a)
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e1e69e38f4b0..d96ebe4bb65a 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -201,7 +201,8 @@ nla_put_failure:
 
 static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 0b70fb0cc609..b777e381e0dd 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -824,7 +824,8 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 
 static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index f29af79a2d1f..f33a8cc5dee6 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -303,7 +303,8 @@ nla_put_failure:
 
 static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
@@ -352,7 +353,8 @@ static struct pernet_operations ipt_net_ops = {
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 			 struct netlink_callback *cb, int type,
-			 const struct tc_action_ops *ops)
+			 const struct tc_action_ops *ops,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 9980c6affb5e..3dcd295ea6a7 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -265,7 +265,8 @@ nla_put_failure:
 
 static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 6f6d7667ef9a..67243cdc0588 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -278,7 +278,8 @@ nla_put_failure:
 
 static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 			  struct netlink_callback *cb, int type,
-			  const struct tc_action_ops *ops)
+			  const struct tc_action_ops *ops,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 308b2680a6d9..6d6481f6bffa 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -419,7 +419,8 @@ nla_put_failure:
 
 static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 			    struct netlink_callback *cb, int type,
-			    const struct tc_action_ops *ops)
+			    const struct tc_action_ops *ops,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 1292f880eab0..ff803414a736 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -58,7 +58,8 @@ static struct tc_action_ops act_police_ops;
 
 static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 				 struct netlink_callback *cb, int type,
-				 const struct tc_action_ops *ops)
+				 const struct tc_action_ops *ops,
+				 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 22379b2cfd1a..7a2b6a33f239 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -202,7 +202,8 @@ nla_put_failure:
 
 static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3ebf71470977..3f5474d20702 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -170,7 +170,8 @@ nla_put_failure:
 
 static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ff1970e14016..d99b6f1f5181 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -208,7 +208,8 @@ nla_put_failure:
 
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
-			      const struct tc_action_ops *ops)
+			      const struct tc_action_ops *ops,
+			      struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 110d7c1f823d..369ea85d0f02 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -232,7 +232,8 @@ nla_put_failure:
 
 static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 65a19928f1a9..bced6fd00d43 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -291,7 +291,8 @@ nla_put_failure:
 
 static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
 			     struct netlink_callback *cb, int type,
-			     const struct tc_action_ops *ops)
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 03dcbbc5ffd2..7cf409443d02 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -267,7 +267,8 @@ nla_put_failure:
 
 static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 			   struct netlink_callback *cb, int type,
-			   const struct tc_action_ops *ops)
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 

From b36201455aa0749e8708ef97ed9c1c9ece29a113 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:54:59 -0500
Subject: [PATCH 0187/1678] net: sched: act: handle extack in
 tcf_generic_walker

This patch adds extack handling for a common used TC act function
"tcf_generic_walker()" to add an extack message on failures.
The tcf_generic_walker() function can fail if get a invalid command
different than DEL and GET. The naming "action" here is wrong, the
correct naming would be command.

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      | 3 ++-
 net/sched/act_api.c        | 6 ++++--
 net/sched/act_bpf.c        | 2 +-
 net/sched/act_connmark.c   | 2 +-
 net/sched/act_csum.c       | 2 +-
 net/sched/act_gact.c       | 2 +-
 net/sched/act_ife.c        | 2 +-
 net/sched/act_ipt.c        | 4 ++--
 net/sched/act_mirred.c     | 2 +-
 net/sched/act_nat.c        | 2 +-
 net/sched/act_pedit.c      | 2 +-
 net/sched/act_police.c     | 2 +-
 net/sched/act_sample.c     | 2 +-
 net/sched/act_simple.c     | 2 +-
 net/sched/act_skbedit.c    | 2 +-
 net/sched/act_skbmod.c     | 2 +-
 net/sched/act_tunnel_key.c | 2 +-
 net/sched/act_vlan.c       | 2 +-
 18 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index ab3529255377..9c2f22695025 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -140,7 +140,8 @@ static inline void tc_action_net_exit(struct list_head *net_list,
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       const struct tc_action_ops *ops);
+		       const struct tc_action_ops *ops,
+		       struct netlink_ext_ack *extack);
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		    int bind);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ab107997b259..1f65d6ada9ff 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -202,7 +202,8 @@ nla_put_failure:
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
-		       const struct tc_action_ops *ops)
+		       const struct tc_action_ops *ops,
+		       struct netlink_ext_ack *extack)
 {
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
@@ -211,7 +212,8 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 	} else if (type == RTM_GETACTION) {
 		return tcf_dump_walker(idrinfo, skb, cb);
 	} else {
-		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
+		WARN(1, "tcf_generic_walker: unknown command %d\n", type);
+		NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
 		return -EINVAL;
 	}
 }
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 7e01e2c710c4..cb3c5d403c88 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -372,7 +372,7 @@ static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index cb722da0bb15..e4b880fa51fe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -182,7 +182,7 @@ static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3e8efadb750f..d5c2e528d150 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -636,7 +636,7 @@ static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index d96ebe4bb65a..f072bcf33760 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -206,7 +206,7 @@ static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index b777e381e0dd..a5994cf0512b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -829,7 +829,7 @@ static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index f33a8cc5dee6..9784629090ad 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -308,7 +308,7 @@ static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
@@ -358,7 +358,7 @@ static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3dcd295ea6a7..05c2ebe92eca 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -270,7 +270,7 @@ static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 67243cdc0588..4b5848b6c252 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -283,7 +283,7 @@ static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 6d6481f6bffa..094303c27c5e 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -424,7 +424,7 @@ static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ff803414a736..ff55bd6c7db0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -63,7 +63,7 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 7a2b6a33f239..9765145aaf40 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -207,7 +207,7 @@ static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3f5474d20702..8244e221fe4f 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -175,7 +175,7 @@ static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index d99b6f1f5181..ddf69fc01bdf 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -213,7 +213,7 @@ static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 369ea85d0f02..a406f191cb84 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -237,7 +237,7 @@ static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index bced6fd00d43..41ff9d0e5c62 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -296,7 +296,7 @@ static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 7cf409443d02..71411a255f04 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -272,7 +272,7 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_generic_walker(tn, skb, cb, type, ops);
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
 }
 
 static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,

From 1d4760c75d4f8c7c73f040f0261a09cf1481fdec Mon Sep 17 00:00:00 2001
From: Alexander Aring <aring@mojatatu.com>
Date: Thu, 15 Feb 2018 10:55:00 -0500
Subject: [PATCH 0188/1678] net: sched: act: mirred: add extack support

This patch adds extack support for TC mirred action.

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 05c2ebe92eca..fd34015331ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -80,13 +80,17 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	int ret;
 
-	if (!nla)
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
 		return -EINVAL;
-	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, NULL);
+	}
+	ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy, extack);
 	if (ret < 0)
 		return ret;
-	if (!tb[TCA_MIRRED_PARMS])
+	if (!tb[TCA_MIRRED_PARMS]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing required mirred parameters");
 		return -EINVAL;
+	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
 	exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -102,6 +106,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
@@ -117,8 +122,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		if (!dev)
+		if (!dev) {
+			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
+		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
 		if (ret)

From 014f9008989c0d6353e15621c6d3c18d6dfc1ab2 Mon Sep 17 00:00:00 2001
From: Michael Rapson <michael.rapson@netronome.com>
Date: Thu, 15 Feb 2018 20:19:08 -0800
Subject: [PATCH 0189/1678] nfp: standardize FW header whitespace

The nfp_net_ctrl.h file used spaces for indentation in the past but
tabs have crept in.  Host driver files use tabs for indentation by
default, so let's convert to tabs for consistency across the file
and our drivers.

Signed-off-by: Michael Rapson <michael.rapson@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 278 +++++++++---------
 1 file changed, 139 insertions(+), 139 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 4499a7333078..bb63c115537d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2017 Netronome Systems, Inc.
+ * Copyright (C) 2015-2018 Netronome Systems, Inc.
  *
  * This software is dual licensed under the GNU General License Version 2,
  * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -51,12 +51,12 @@
  * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
-#define NFP_NET_CFG_BAR_SZ              (32 * 1024)
+#define NFP_NET_CFG_BAR_SZ		(32 * 1024)
 
 /**
  * Offset in Freelist buffer where packet starts on RX
  */
-#define NFP_NET_RX_OFFSET               32
+#define NFP_NET_RX_OFFSET		32
 
 /**
  * LSO parameters
@@ -75,65 +75,65 @@
 #define NFP_NET_META_PORTID		5
 #define NFP_NET_META_CSUM		6 /* checksum complete type */
 
-#define	NFP_META_PORT_ID_CTRL		~0U
+#define NFP_META_PORT_ID_CTRL		~0U
 
 /**
  * Hash type pre-pended when a RSS hash was computed
  */
-#define NFP_NET_RSS_NONE                0
-#define NFP_NET_RSS_IPV4                1
-#define NFP_NET_RSS_IPV6                2
-#define NFP_NET_RSS_IPV6_EX             3
-#define NFP_NET_RSS_IPV4_TCP            4
-#define NFP_NET_RSS_IPV6_TCP            5
-#define NFP_NET_RSS_IPV6_EX_TCP         6
-#define NFP_NET_RSS_IPV4_UDP            7
-#define NFP_NET_RSS_IPV6_UDP            8
-#define NFP_NET_RSS_IPV6_EX_UDP         9
+#define NFP_NET_RSS_NONE		0
+#define NFP_NET_RSS_IPV4		1
+#define NFP_NET_RSS_IPV6		2
+#define NFP_NET_RSS_IPV6_EX		3
+#define NFP_NET_RSS_IPV4_TCP		4
+#define NFP_NET_RSS_IPV6_TCP		5
+#define NFP_NET_RSS_IPV6_EX_TCP		6
+#define NFP_NET_RSS_IPV4_UDP		7
+#define NFP_NET_RSS_IPV6_UDP		8
+#define NFP_NET_RSS_IPV6_EX_UDP		9
 
 /**
  * Ring counts
- * %NFP_NET_TXR_MAX:         Maximum number of TX rings
- * %NFP_NET_RXR_MAX:         Maximum number of RX rings
+ * %NFP_NET_TXR_MAX:	     Maximum number of TX rings
+ * %NFP_NET_RXR_MAX:	     Maximum number of RX rings
  */
-#define NFP_NET_TXR_MAX                 64
-#define NFP_NET_RXR_MAX                 64
+#define NFP_NET_TXR_MAX			64
+#define NFP_NET_RXR_MAX			64
 
 /**
  * Read/Write config words (0x0000 - 0x002c)
- * %NFP_NET_CFG_CTRL:        Global control
+ * %NFP_NET_CFG_CTRL:	     Global control
  * %NFP_NET_CFG_UPDATE:      Indicate which fields are updated
  * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings
  * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings
- * %NFP_NET_CFG_MTU:         Set MTU size
+ * %NFP_NET_CFG_MTU:	     Set MTU size
  * %NFP_NET_CFG_FLBUFSZ:     Set freelist buffer size (must be larger than MTU)
- * %NFP_NET_CFG_EXN:         MSI-X table entry for exceptions
- * %NFP_NET_CFG_LSC:         MSI-X table entry for link state changes
+ * %NFP_NET_CFG_EXN:	     MSI-X table entry for exceptions
+ * %NFP_NET_CFG_LSC:	     MSI-X table entry for link state changes
  * %NFP_NET_CFG_MACADDR:     MAC address
  *
  * TODO:
  * - define Error details in UPDATE
  */
-#define NFP_NET_CFG_CTRL                0x0000
-#define   NFP_NET_CFG_CTRL_ENABLE         (0x1 <<  0) /* Global enable */
-#define   NFP_NET_CFG_CTRL_PROMISC        (0x1 <<  1) /* Enable Promisc mode */
-#define   NFP_NET_CFG_CTRL_L2BC           (0x1 <<  2) /* Allow L2 Broadcast */
-#define   NFP_NET_CFG_CTRL_L2MC           (0x1 <<  3) /* Allow L2 Multicast */
-#define   NFP_NET_CFG_CTRL_RXCSUM         (0x1 <<  4) /* Enable RX Checksum */
-#define   NFP_NET_CFG_CTRL_TXCSUM         (0x1 <<  5) /* Enable TX Checksum */
-#define   NFP_NET_CFG_CTRL_RXVLAN         (0x1 <<  6) /* Enable VLAN strip */
-#define   NFP_NET_CFG_CTRL_TXVLAN         (0x1 <<  7) /* Enable VLAN insert */
-#define   NFP_NET_CFG_CTRL_SCATTER        (0x1 <<  8) /* Scatter DMA */
-#define   NFP_NET_CFG_CTRL_GATHER         (0x1 <<  9) /* Gather DMA */
-#define   NFP_NET_CFG_CTRL_LSO            (0x1 << 10) /* LSO/TSO (version 1) */
+#define NFP_NET_CFG_CTRL		0x0000
+#define   NFP_NET_CFG_CTRL_ENABLE	  (0x1 <<  0) /* Global enable */
+#define   NFP_NET_CFG_CTRL_PROMISC	  (0x1 <<  1) /* Enable Promisc mode */
+#define   NFP_NET_CFG_CTRL_L2BC		  (0x1 <<  2) /* Allow L2 Broadcast */
+#define   NFP_NET_CFG_CTRL_L2MC		  (0x1 <<  3) /* Allow L2 Multicast */
+#define   NFP_NET_CFG_CTRL_RXCSUM	  (0x1 <<  4) /* Enable RX Checksum */
+#define   NFP_NET_CFG_CTRL_TXCSUM	  (0x1 <<  5) /* Enable TX Checksum */
+#define   NFP_NET_CFG_CTRL_RXVLAN	  (0x1 <<  6) /* Enable VLAN strip */
+#define   NFP_NET_CFG_CTRL_TXVLAN	  (0x1 <<  7) /* Enable VLAN insert */
+#define   NFP_NET_CFG_CTRL_SCATTER	  (0x1 <<  8) /* Scatter DMA */
+#define   NFP_NET_CFG_CTRL_GATHER	  (0x1 <<  9) /* Gather DMA */
+#define   NFP_NET_CFG_CTRL_LSO		  (0x1 << 10) /* LSO/TSO (version 1) */
 #define   NFP_NET_CFG_CTRL_CTAG_FILTER	  (0x1 << 11) /* VLAN CTAG filtering */
-#define   NFP_NET_CFG_CTRL_RINGCFG        (0x1 << 16) /* Ring runtime changes */
+#define   NFP_NET_CFG_CTRL_RINGCFG	  (0x1 << 16) /* Ring runtime changes */
 #define   NFP_NET_CFG_CTRL_RSS		  (0x1 << 17) /* RSS (version 1) */
-#define   NFP_NET_CFG_CTRL_IRQMOD         (0x1 << 18) /* Interrupt moderation */
-#define   NFP_NET_CFG_CTRL_RINGPRIO       (0x1 << 19) /* Ring priorities */
-#define   NFP_NET_CFG_CTRL_MSIXAUTO       (0x1 << 20) /* MSI-X auto-masking */
-#define   NFP_NET_CFG_CTRL_TXRWB          (0x1 << 21) /* Write-back of TX ring*/
-#define   NFP_NET_CFG_CTRL_L2SWITCH       (0x1 << 22) /* L2 Switch */
+#define   NFP_NET_CFG_CTRL_IRQMOD	  (0x1 << 18) /* Interrupt moderation */
+#define   NFP_NET_CFG_CTRL_RINGPRIO	  (0x1 << 19) /* Ring priorities */
+#define   NFP_NET_CFG_CTRL_MSIXAUTO	  (0x1 << 20) /* MSI-X auto-masking */
+#define   NFP_NET_CFG_CTRL_TXRWB	  (0x1 << 21) /* Write-back of TX ring*/
+#define   NFP_NET_CFG_CTRL_L2SWITCH	  (0x1 << 22) /* L2 Switch */
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
@@ -152,35 +152,35 @@
 #define NFP_NET_CFG_CTRL_CHAIN_META	(NFP_NET_CFG_CTRL_RSS2 | \
 					 NFP_NET_CFG_CTRL_CSUM_COMPLETE)
 
-#define NFP_NET_CFG_UPDATE              0x0004
-#define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
-#define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
-#define   NFP_NET_CFG_UPDATE_RSS          (0x1 <<  2) /* RSS config change */
-#define   NFP_NET_CFG_UPDATE_TXRPRIO      (0x1 <<  3) /* TX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_RXRPRIO      (0x1 <<  4) /* RX Ring prio change */
-#define   NFP_NET_CFG_UPDATE_MSIX         (0x1 <<  5) /* MSI-X change */
-#define   NFP_NET_CFG_UPDATE_L2SWITCH     (0x1 <<  6) /* Switch changes */
-#define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
-#define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
+#define NFP_NET_CFG_UPDATE		0x0004
+#define   NFP_NET_CFG_UPDATE_GEN	  (0x1 <<  0) /* General update */
+#define   NFP_NET_CFG_UPDATE_RING	  (0x1 <<  1) /* Ring config change */
+#define   NFP_NET_CFG_UPDATE_RSS	  (0x1 <<  2) /* RSS config change */
+#define   NFP_NET_CFG_UPDATE_TXRPRIO	  (0x1 <<  3) /* TX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_RXRPRIO	  (0x1 <<  4) /* RX Ring prio change */
+#define   NFP_NET_CFG_UPDATE_MSIX	  (0x1 <<  5) /* MSI-X change */
+#define   NFP_NET_CFG_UPDATE_L2SWITCH	  (0x1 <<  6) /* Switch changes */
+#define   NFP_NET_CFG_UPDATE_RESET	  (0x1 <<  7) /* Update due to FLR */
+#define   NFP_NET_CFG_UPDATE_IRQMOD	  (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
 #define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
 #define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
-#define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
-#define NFP_NET_CFG_TXRS_ENABLE         0x0008
-#define NFP_NET_CFG_RXRS_ENABLE         0x0010
-#define NFP_NET_CFG_MTU                 0x0018
-#define NFP_NET_CFG_FLBUFSZ             0x001c
-#define NFP_NET_CFG_EXN                 0x001f
-#define NFP_NET_CFG_LSC                 0x0020
-#define NFP_NET_CFG_MACADDR             0x0024
+#define   NFP_NET_CFG_UPDATE_ERR	  (0x1 << 31) /* A error occurred */
+#define NFP_NET_CFG_TXRS_ENABLE		0x0008
+#define NFP_NET_CFG_RXRS_ENABLE		0x0010
+#define NFP_NET_CFG_MTU			0x0018
+#define NFP_NET_CFG_FLBUFSZ		0x001c
+#define NFP_NET_CFG_EXN			0x001f
+#define NFP_NET_CFG_LSC			0x0020
+#define NFP_NET_CFG_MACADDR		0x0024
 
 /**
  * Read-only words (0x0030 - 0x0050):
  * %NFP_NET_CFG_VERSION:     Firmware version number
- * %NFP_NET_CFG_STS:         Status
- * %NFP_NET_CFG_CAP:         Capabilities (same bits as %NFP_NET_CFG_CTRL)
+ * %NFP_NET_CFG_STS:	     Status
+ * %NFP_NET_CFG_CAP:	     Capabilities (same bits as %NFP_NET_CFG_CTRL)
  * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
  * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
  * %NFP_NET_CFG_MAX_MTU:     Maximum support MTU
@@ -190,37 +190,37 @@
  * TODO:
  * - define more STS bits
  */
-#define NFP_NET_CFG_VERSION             0x0030
+#define NFP_NET_CFG_VERSION		0x0030
 #define   NFP_NET_CFG_VERSION_RESERVED_MASK	(0xff << 24)
 #define   NFP_NET_CFG_VERSION_CLASS_MASK  (0xff << 16)
-#define   NFP_NET_CFG_VERSION_CLASS(x)    (((x) & 0xff) << 16)
+#define   NFP_NET_CFG_VERSION_CLASS(x)	  (((x) & 0xff) << 16)
 #define   NFP_NET_CFG_VERSION_CLASS_GENERIC	0
 #define   NFP_NET_CFG_VERSION_MAJOR_MASK  (0xff <<  8)
-#define   NFP_NET_CFG_VERSION_MAJOR(x)    (((x) & 0xff) <<  8)
+#define   NFP_NET_CFG_VERSION_MAJOR(x)	  (((x) & 0xff) <<  8)
 #define   NFP_NET_CFG_VERSION_MINOR_MASK  (0xff <<  0)
-#define   NFP_NET_CFG_VERSION_MINOR(x)    (((x) & 0xff) <<  0)
-#define NFP_NET_CFG_STS                 0x0034
-#define   NFP_NET_CFG_STS_LINK            (0x1 << 0) /* Link up or down */
+#define   NFP_NET_CFG_VERSION_MINOR(x)	  (((x) & 0xff) <<  0)
+#define NFP_NET_CFG_STS			0x0034
+#define   NFP_NET_CFG_STS_LINK		  (0x1 << 0) /* Link up or down */
 /* Link rate */
 #define   NFP_NET_CFG_STS_LINK_RATE_SHIFT 1
 #define   NFP_NET_CFG_STS_LINK_RATE_MASK  0xF
-#define   NFP_NET_CFG_STS_LINK_RATE       \
+#define   NFP_NET_CFG_STS_LINK_RATE	  \
 	(NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT)
 #define   NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED   0
-#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN       1
-#define   NFP_NET_CFG_STS_LINK_RATE_1G            2
-#define   NFP_NET_CFG_STS_LINK_RATE_10G           3
-#define   NFP_NET_CFG_STS_LINK_RATE_25G           4
-#define   NFP_NET_CFG_STS_LINK_RATE_40G           5
-#define   NFP_NET_CFG_STS_LINK_RATE_50G           6
-#define   NFP_NET_CFG_STS_LINK_RATE_100G          7
-#define NFP_NET_CFG_CAP                 0x0038
-#define NFP_NET_CFG_MAX_TXRINGS         0x003c
-#define NFP_NET_CFG_MAX_RXRINGS         0x0040
-#define NFP_NET_CFG_MAX_MTU             0x0044
+#define   NFP_NET_CFG_STS_LINK_RATE_UNKNOWN	  1
+#define   NFP_NET_CFG_STS_LINK_RATE_1G		  2
+#define   NFP_NET_CFG_STS_LINK_RATE_10G		  3
+#define   NFP_NET_CFG_STS_LINK_RATE_25G		  4
+#define   NFP_NET_CFG_STS_LINK_RATE_40G		  5
+#define   NFP_NET_CFG_STS_LINK_RATE_50G		  6
+#define   NFP_NET_CFG_STS_LINK_RATE_100G	  7
+#define NFP_NET_CFG_CAP			0x0038
+#define NFP_NET_CFG_MAX_TXRINGS		0x003c
+#define NFP_NET_CFG_MAX_RXRINGS		0x0040
+#define NFP_NET_CFG_MAX_MTU		0x0044
 /* Next two words are being used by VFs for solving THB350 issue */
-#define NFP_NET_CFG_START_TXQ           0x0048
-#define NFP_NET_CFG_START_RXQ           0x004c
+#define NFP_NET_CFG_START_TXQ		0x0048
+#define NFP_NET_CFG_START_RXQ		0x004c
 
 /**
  * Prepend configuration
@@ -280,8 +280,8 @@
 /**
  * 40B reserved for future use (0x0098 - 0x00c0)
  */
-#define NFP_NET_CFG_RESERVED            0x0098
-#define NFP_NET_CFG_RESERVED_SZ         0x0028
+#define NFP_NET_CFG_RESERVED		0x0098
+#define NFP_NET_CFG_RESERVED_SZ		0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
@@ -290,26 +290,26 @@
  * %NFP_NET_CFG_RSS_KEY:     RSS "secret" key
  * %NFP_NET_CFG_RSS_ITBL:    RSS indirection table
  */
-#define NFP_NET_CFG_RSS_BASE            0x0100
-#define NFP_NET_CFG_RSS_CTRL            NFP_NET_CFG_RSS_BASE
-#define   NFP_NET_CFG_RSS_MASK            (0x7f)
-#define   NFP_NET_CFG_RSS_MASK_of(_x)     ((_x) & 0x7f)
-#define   NFP_NET_CFG_RSS_IPV4            (1 <<  8) /* RSS for IPv4 */
-#define   NFP_NET_CFG_RSS_IPV6            (1 <<  9) /* RSS for IPv6 */
-#define   NFP_NET_CFG_RSS_IPV4_TCP        (1 << 10) /* RSS for IPv4/TCP */
-#define   NFP_NET_CFG_RSS_IPV4_UDP        (1 << 11) /* RSS for IPv4/UDP */
-#define   NFP_NET_CFG_RSS_IPV6_TCP        (1 << 12) /* RSS for IPv6/TCP */
-#define   NFP_NET_CFG_RSS_IPV6_UDP        (1 << 13) /* RSS for IPv6/UDP */
+#define NFP_NET_CFG_RSS_BASE		0x0100
+#define NFP_NET_CFG_RSS_CTRL		NFP_NET_CFG_RSS_BASE
+#define   NFP_NET_CFG_RSS_MASK		  (0x7f)
+#define   NFP_NET_CFG_RSS_MASK_of(_x)	  ((_x) & 0x7f)
+#define   NFP_NET_CFG_RSS_IPV4		  (1 <<  8) /* RSS for IPv4 */
+#define   NFP_NET_CFG_RSS_IPV6		  (1 <<  9) /* RSS for IPv6 */
+#define   NFP_NET_CFG_RSS_IPV4_TCP	  (1 << 10) /* RSS for IPv4/TCP */
+#define   NFP_NET_CFG_RSS_IPV4_UDP	  (1 << 11) /* RSS for IPv4/UDP */
+#define   NFP_NET_CFG_RSS_IPV6_TCP	  (1 << 12) /* RSS for IPv6/TCP */
+#define   NFP_NET_CFG_RSS_IPV6_UDP	  (1 << 13) /* RSS for IPv6/UDP */
 #define   NFP_NET_CFG_RSS_HFUNC		  0xff000000
-#define   NFP_NET_CFG_RSS_TOEPLITZ        (1 << 24) /* Use Toeplitz hash */
+#define   NFP_NET_CFG_RSS_TOEPLITZ	  (1 << 24) /* Use Toeplitz hash */
 #define   NFP_NET_CFG_RSS_XOR		  (1 << 25) /* Use XOR as hash */
 #define   NFP_NET_CFG_RSS_CRC32		  (1 << 26) /* Use CRC32 as hash */
 #define   NFP_NET_CFG_RSS_HFUNCS	  3
-#define NFP_NET_CFG_RSS_KEY             (NFP_NET_CFG_RSS_BASE + 0x4)
-#define NFP_NET_CFG_RSS_KEY_SZ          0x28
-#define NFP_NET_CFG_RSS_ITBL            (NFP_NET_CFG_RSS_BASE + 0x4 + \
+#define NFP_NET_CFG_RSS_KEY		(NFP_NET_CFG_RSS_BASE + 0x4)
+#define NFP_NET_CFG_RSS_KEY_SZ		0x28
+#define NFP_NET_CFG_RSS_ITBL		(NFP_NET_CFG_RSS_BASE + 0x4 + \
 					 NFP_NET_CFG_RSS_KEY_SZ)
-#define NFP_NET_CFG_RSS_ITBL_SZ         0x80
+#define NFP_NET_CFG_RSS_ITBL_SZ		0x80
 
 /**
  * TX ring configuration (0x200 - 0x800)
@@ -321,13 +321,13 @@
  * %NFP_NET_CFG_TXR_PRIO:    Per TX ring priority (1B entries)
  * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet
  */
-#define NFP_NET_CFG_TXR_BASE            0x0200
-#define NFP_NET_CFG_TXR_ADDR(_x)        (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_WB_ADDR(_x)     (NFP_NET_CFG_TXR_BASE + 0x200 + \
+#define NFP_NET_CFG_TXR_BASE		0x0200
+#define NFP_NET_CFG_TXR_ADDR(_x)	(NFP_NET_CFG_TXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_TXR_WB_ADDR(_x)	(NFP_NET_CFG_TXR_BASE + 0x200 + \
 					 ((_x) * 0x8))
-#define NFP_NET_CFG_TXR_SZ(_x)          (NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
-#define NFP_NET_CFG_TXR_VEC(_x)         (NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
-#define NFP_NET_CFG_TXR_PRIO(_x)        (NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
+#define NFP_NET_CFG_TXR_SZ(_x)		(NFP_NET_CFG_TXR_BASE + 0x400 + (_x))
+#define NFP_NET_CFG_TXR_VEC(_x)		(NFP_NET_CFG_TXR_BASE + 0x440 + (_x))
+#define NFP_NET_CFG_TXR_PRIO(_x)	(NFP_NET_CFG_TXR_BASE + 0x480 + (_x))
 #define NFP_NET_CFG_TXR_IRQ_MOD(_x)	(NFP_NET_CFG_TXR_BASE + 0x500 + \
 					 ((_x) * 0x4))
 
@@ -340,11 +340,11 @@
  * %NFP_NET_CFG_RXR_PRIO:    Per RX ring priority (1B entries)
  * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries)
  */
-#define NFP_NET_CFG_RXR_BASE            0x0800
-#define NFP_NET_CFG_RXR_ADDR(_x)        (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
-#define NFP_NET_CFG_RXR_SZ(_x)          (NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
-#define NFP_NET_CFG_RXR_VEC(_x)         (NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
-#define NFP_NET_CFG_RXR_PRIO(_x)        (NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
+#define NFP_NET_CFG_RXR_BASE		0x0800
+#define NFP_NET_CFG_RXR_ADDR(_x)	(NFP_NET_CFG_RXR_BASE + ((_x) * 0x8))
+#define NFP_NET_CFG_RXR_SZ(_x)		(NFP_NET_CFG_RXR_BASE + 0x200 + (_x))
+#define NFP_NET_CFG_RXR_VEC(_x)		(NFP_NET_CFG_RXR_BASE + 0x240 + (_x))
+#define NFP_NET_CFG_RXR_PRIO(_x)	(NFP_NET_CFG_RXR_BASE + 0x280 + (_x))
 #define NFP_NET_CFG_RXR_IRQ_MOD(_x)	(NFP_NET_CFG_RXR_BASE + 0x300 + \
 					 ((_x) * 0x4))
 
@@ -358,36 +358,36 @@
  * the MSI-X entry and the host driver must clear the register to
  * re-enable the interrupt.
  */
-#define NFP_NET_CFG_ICR_BASE            0x0c00
-#define NFP_NET_CFG_ICR(_x)             (NFP_NET_CFG_ICR_BASE + (_x))
-#define   NFP_NET_CFG_ICR_UNMASKED      0x0
-#define   NFP_NET_CFG_ICR_RXTX          0x1
-#define   NFP_NET_CFG_ICR_LSC           0x2
+#define NFP_NET_CFG_ICR_BASE		0x0c00
+#define NFP_NET_CFG_ICR(_x)		(NFP_NET_CFG_ICR_BASE + (_x))
+#define   NFP_NET_CFG_ICR_UNMASKED	0x0
+#define   NFP_NET_CFG_ICR_RXTX		0x1
+#define   NFP_NET_CFG_ICR_LSC		0x2
 
 /**
  * General device stats (0x0d00 - 0x0d90)
  * all counters are 64bit.
  */
-#define NFP_NET_CFG_STATS_BASE          0x0d00
-#define NFP_NET_CFG_STATS_RX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x00)
-#define NFP_NET_CFG_STATS_RX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x08)
-#define NFP_NET_CFG_STATS_RX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x10)
-#define NFP_NET_CFG_STATS_RX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x18)
-#define NFP_NET_CFG_STATS_RX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x20)
-#define NFP_NET_CFG_STATS_RX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x28)
-#define NFP_NET_CFG_STATS_RX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x30)
-#define NFP_NET_CFG_STATS_RX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x38)
-#define NFP_NET_CFG_STATS_RX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x40)
+#define NFP_NET_CFG_STATS_BASE		0x0d00
+#define NFP_NET_CFG_STATS_RX_DISCARDS	(NFP_NET_CFG_STATS_BASE + 0x00)
+#define NFP_NET_CFG_STATS_RX_ERRORS	(NFP_NET_CFG_STATS_BASE + 0x08)
+#define NFP_NET_CFG_STATS_RX_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x10)
+#define NFP_NET_CFG_STATS_RX_UC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x18)
+#define NFP_NET_CFG_STATS_RX_MC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x20)
+#define NFP_NET_CFG_STATS_RX_BC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x28)
+#define NFP_NET_CFG_STATS_RX_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x30)
+#define NFP_NET_CFG_STATS_RX_MC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x38)
+#define NFP_NET_CFG_STATS_RX_BC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x40)
 
-#define NFP_NET_CFG_STATS_TX_DISCARDS   (NFP_NET_CFG_STATS_BASE + 0x48)
-#define NFP_NET_CFG_STATS_TX_ERRORS     (NFP_NET_CFG_STATS_BASE + 0x50)
-#define NFP_NET_CFG_STATS_TX_OCTETS     (NFP_NET_CFG_STATS_BASE + 0x58)
-#define NFP_NET_CFG_STATS_TX_UC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x60)
-#define NFP_NET_CFG_STATS_TX_MC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x68)
-#define NFP_NET_CFG_STATS_TX_BC_OCTETS  (NFP_NET_CFG_STATS_BASE + 0x70)
-#define NFP_NET_CFG_STATS_TX_FRAMES     (NFP_NET_CFG_STATS_BASE + 0x78)
-#define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
-#define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
+#define NFP_NET_CFG_STATS_TX_DISCARDS	(NFP_NET_CFG_STATS_BASE + 0x48)
+#define NFP_NET_CFG_STATS_TX_ERRORS	(NFP_NET_CFG_STATS_BASE + 0x50)
+#define NFP_NET_CFG_STATS_TX_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x58)
+#define NFP_NET_CFG_STATS_TX_UC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x60)
+#define NFP_NET_CFG_STATS_TX_MC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x68)
+#define NFP_NET_CFG_STATS_TX_BC_OCTETS	(NFP_NET_CFG_STATS_BASE + 0x70)
+#define NFP_NET_CFG_STATS_TX_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x78)
+#define NFP_NET_CFG_STATS_TX_MC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x80)
+#define NFP_NET_CFG_STATS_TX_BC_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x88)
 
 #define NFP_NET_CFG_STATS_APP0_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x90)
 #define NFP_NET_CFG_STATS_APP0_BYTES	(NFP_NET_CFG_STATS_BASE + 0x98)
@@ -404,11 +404,11 @@
  * %NFP_NET_CFG_TXR_STATS:   TX ring statistics (Packet and Byte count)
  * %NFP_NET_CFG_RXR_STATS:   RX ring statistics (Packet and Byte count)
  */
-#define NFP_NET_CFG_TXR_STATS_BASE      0x1000
-#define NFP_NET_CFG_TXR_STATS(_x)       (NFP_NET_CFG_TXR_STATS_BASE + \
+#define NFP_NET_CFG_TXR_STATS_BASE	0x1000
+#define NFP_NET_CFG_TXR_STATS(_x)	(NFP_NET_CFG_TXR_STATS_BASE + \
 					 ((_x) * 0x10))
-#define NFP_NET_CFG_RXR_STATS_BASE      0x1400
-#define NFP_NET_CFG_RXR_STATS(_x)       (NFP_NET_CFG_RXR_STATS_BASE + \
+#define NFP_NET_CFG_RXR_STATS_BASE	0x1400
+#define NFP_NET_CFG_RXR_STATS(_x)	(NFP_NET_CFG_RXR_STATS_BASE + \
 					 ((_x) * 0x10))
 
 /**
@@ -444,7 +444,7 @@
  * %NFP_NET_CFG_TLV_TYPE:	Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
  * %NFP_NET_CFG_TLV_LENGTH:	Offset of length within the TLV
- * %NFP_NET_CFG_TLV_LENGTH_INC:	TLV length increments
+ * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
  * %NFP_NET_CFG_TLV_VALUE:	Offset of value with the TLV
  *
  * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
@@ -457,12 +457,12 @@
  * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
  */
 #define NFP_NET_CFG_TLV_TYPE		0x00
-#define   NFP_NET_CFG_TLV_TYPE_REQUIRED	  0x8000
+#define   NFP_NET_CFG_TLV_TYPE_REQUIRED   0x8000
 #define NFP_NET_CFG_TLV_LENGTH		0x02
 #define   NFP_NET_CFG_TLV_LENGTH_INC	  4
 #define NFP_NET_CFG_TLV_VALUE		0x04
 
-#define NFP_NET_CFG_TLV_HEADER_REQUIRED	0x80000000
+#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
 #define NFP_NET_CFG_TLV_HEADER_TYPE	0x7fff0000
 #define NFP_NET_CFG_TLV_HEADER_LENGTH	0x0000ffff
 

From ffa61202fe2972577794004f79652360b5f4ddb0 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Thu, 15 Feb 2018 20:19:09 -0800
Subject: [PATCH 0190/1678] nfp: flower: implement tcp flag match offload

Implement tcp flag match offloading. Current tcp flag match support include
FIN, SYN, RST, PSH and URG flags, other flags are unsupported. The PSH and
URG flags are only set in the hardware fast path when used in combination
with the SYN, RST and PSH flags.

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/cmsg.h  | 11 ++++--
 .../net/ethernet/netronome/nfp/flower/main.h  |  1 +
 .../net/ethernet/netronome/nfp/flower/match.c | 20 +++++++++++
 .../ethernet/netronome/nfp/flower/offload.c   | 34 +++++++++++++++++++
 4 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index adfe474c2cf0..28c1cd5b823b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -61,6 +61,13 @@
 #define NFP_FLOWER_MASK_MPLS_BOS	BIT(8)
 #define NFP_FLOWER_MASK_MPLS_Q		BIT(0)
 
+/* Compressed HW representation of TCP Flags */
+#define NFP_FL_TCP_FLAG_URG		BIT(4)
+#define NFP_FL_TCP_FLAG_PSH		BIT(3)
+#define NFP_FL_TCP_FLAG_RST		BIT(2)
+#define NFP_FL_TCP_FLAG_SYN		BIT(1)
+#define NFP_FL_TCP_FLAG_FIN		BIT(0)
+
 #define NFP_FL_SC_ACT_DROP		0x80000000
 #define NFP_FL_SC_ACT_USER		0x7D000000
 #define NFP_FL_SC_ACT_POPV		0x6A000000
@@ -257,7 +264,7 @@ struct nfp_flower_tp_ports {
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |    DSCP   |ECN|   protocol    |           reserved            |
+ * |    DSCP   |ECN|   protocol    |      ttl      |     flags     |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                        ipv4_addr_src                          |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -268,7 +275,7 @@ struct nfp_flower_ipv4 {
 	u8 tos;
 	u8 proto;
 	u8 ttl;
-	u8 reserved;
+	u8 flags;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
 };
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 332ff0fdc038..c5cebf6fb1d3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -41,6 +41,7 @@
 #include <linux/time64.h>
 #include <linux/types.h>
 #include <net/pkt_cls.h>
+#include <net/tcp.h>
 #include <linux/workqueue.h>
 
 struct net_device;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 37c2ecae2a7a..b3bc8279d4fb 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -181,6 +181,26 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
 		frame->tos = flow_ip->tos;
 		frame->ttl = flow_ip->ttl;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_dissector_key_tcp *tcp;
+		u32 tcp_flags;
+
+		tcp = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_TCP, target);
+		tcp_flags = be16_to_cpu(tcp->flags);
+
+		if (tcp_flags & TCPHDR_FIN)
+			frame->flags |= NFP_FL_TCP_FLAG_FIN;
+		if (tcp_flags & TCPHDR_SYN)
+			frame->flags |= NFP_FL_TCP_FLAG_SYN;
+		if (tcp_flags & TCPHDR_RST)
+			frame->flags |= NFP_FL_TCP_FLAG_RST;
+		if (tcp_flags & TCPHDR_PSH)
+			frame->flags |= NFP_FL_TCP_FLAG_PSH;
+		if (tcp_flags & TCPHDR_URG)
+			frame->flags |= NFP_FL_TCP_FLAG_URG;
+	}
 }
 
 static void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index eb5c13dea8f5..f3586c519805 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -44,11 +44,16 @@
 #include "../nfp_net.h"
 #include "../nfp_port.h"
 
+#define NFP_FLOWER_SUPPORTED_TCPFLAGS \
+	(TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
+	 TCPHDR_PSH | TCPHDR_URG)
+
 #define NFP_FLOWER_WHITELIST_DISSECTOR \
 	(BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
 	 BIT(FLOW_DISSECTOR_KEY_BASIC) | \
 	 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \
+	 BIT(FLOW_DISSECTOR_KEY_TCP) | \
 	 BIT(FLOW_DISSECTOR_KEY_PORTS) | \
 	 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
 	 BIT(FLOW_DISSECTOR_KEY_VLAN) | \
@@ -288,6 +293,35 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_dissector_key_tcp *tcp;
+		u32 tcp_flags;
+
+		tcp = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_TCP,
+						flow->key);
+		tcp_flags = be16_to_cpu(tcp->flags);
+
+		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
+			return -EOPNOTSUPP;
+
+		/* We only support PSH and URG flags when either
+		 * FIN, SYN or RST is present as well.
+		 */
+		if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) &&
+		    !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST)))
+			return -EOPNOTSUPP;
+
+		/* We need to store TCP flags in the IPv4 key space, thus
+		 * we need to ensure we include a IPv4 key layer if we have
+		 * not done so already.
+		 */
+		if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) {
+			key_layer |= NFP_FLOWER_LAYER_IPV4;
+			key_size += sizeof(struct nfp_flower_ipv4);
+		}
+	}
+
 	ret_key_ls->key_layer = key_layer;
 	ret_key_ls->key_layer_two = key_layer_two;
 	ret_key_ls->key_size = key_size;

From 75efa06f457bbed3931bf693b7137cf4da3b5c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Fri, 16 Feb 2018 17:10:08 +0100
Subject: [PATCH 0191/1678] ravb: add support for changing MTU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow for changing the MTU within the limit of the maximum size of a
descriptor (2048 bytes). Add the callback to change MTU from user-space
and take the configurable MTU into account when configuring the
hardware.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb.h      |  1 +
 drivers/net/ethernet/renesas/ravb_main.c | 34 +++++++++++++++++++-----
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 96a27b00c90e..b81f4faf7b10 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1018,6 +1018,7 @@ struct ravb_private {
 	u32 dirty_rx[NUM_RX_QUEUE];	/* Producer ring indices */
 	u32 cur_tx[NUM_TX_QUEUE];
 	u32 dirty_tx[NUM_TX_QUEUE];
+	u32 rx_buf_sz;			/* Based on MTU+slack. */
 	struct napi_struct napi[NUM_RX_QUEUE];
 	struct work_struct work;
 	/* MII transceiver section. */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index c87f57ca4437..34e841306e04 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
 					       le32_to_cpu(desc->dptr)))
 				dma_unmap_single(ndev->dev.parent,
 						 le32_to_cpu(desc->dptr),
-						 PKT_BUF_SZ,
+						 priv->rx_buf_sz,
 						 DMA_FROM_DEVICE);
 		}
 		ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
 		/* RX descriptor */
 		rx_desc = &priv->rx_ring[q][i];
-		rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+		rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 		dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
-					  PKT_BUF_SZ,
+					  priv->rx_buf_sz,
 					  DMA_FROM_DEVICE);
 		/* We just set the data size to 0 for a failed mapping which
 		 * should prevent DMA from happening...
@@ -346,6 +346,10 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 	int ring_size;
 	int i;
 
+	/* +16 gets room from the status from the card. */
+	priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
+		ETH_HLEN + VLAN_HLEN;
+
 	/* Allocate RX and TX skb rings */
 	priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
 				  sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -355,7 +359,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 		goto error;
 
 	for (i = 0; i < priv->num_rx_ring[q]; i++) {
-		skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1);
+		skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
 		if (!skb)
 			goto error;
 		ravb_set_buffer_align(skb);
@@ -586,7 +590,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 			skb = priv->rx_skb[q][entry];
 			priv->rx_skb[q][entry] = NULL;
 			dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
-					 PKT_BUF_SZ,
+					 priv->rx_buf_sz,
 					 DMA_FROM_DEVICE);
 			get_ts &= (q == RAVB_NC) ?
 					RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -619,11 +623,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
 	for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
 		entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
 		desc = &priv->rx_ring[q][entry];
-		desc->ds_cc = cpu_to_le16(PKT_BUF_SZ);
+		desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
 
 		if (!priv->rx_skb[q][entry]) {
 			skb = netdev_alloc_skb(ndev,
-					       PKT_BUF_SZ + RAVB_ALIGN - 1);
+					       priv->rx_buf_sz +
+					       RAVB_ALIGN - 1);
 			if (!skb)
 				break;	/* Better luck next round. */
 			ravb_set_buffer_align(skb);
@@ -1854,6 +1859,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 	return phy_mii_ioctl(phydev, req, cmd);
 }
 
+static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	if (netif_running(ndev))
+		return -EBUSY;
+
+	ndev->mtu = new_mtu;
+	netdev_update_features(ndev);
+
+	return 0;
+}
+
 static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
@@ -1895,6 +1911,7 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_set_rx_mode	= ravb_set_rx_mode,
 	.ndo_tx_timeout		= ravb_tx_timeout,
 	.ndo_do_ioctl		= ravb_do_ioctl,
+	.ndo_change_mtu		= ravb_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_set_features	= ravb_set_features,
@@ -2117,6 +2134,9 @@ static int ravb_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
+	ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
+	ndev->min_mtu = ETH_MIN_MTU;
+
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
 	ndev->ethtool_ops = &ravb_ethtool_ops;

From b2d12101bff95c27960c69f3e54a98afdc205bb3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 16 Feb 2018 16:55:05 +0000
Subject: [PATCH 0192/1678] net: dsa: mv88e6xxx: avoid unintended sign
 extension on a 16 bit shift

The shifting of timehi by 16 bits to the left will be promoted to
a 32 bit signed int and then sign-extended to an u64. If the top bit
of timehi is set then all then all the upper bits of ns end up as also
being set because of the sign-extension. Fix this by making timehi and
timelo u64.  Also move the declaration of ns.

Detected by CoverityScan, CID#1465288 ("Unintended sign extension")

Fixes: c6fe0ad2c349 ("net: dsa: mv88e6xxx: add rx/tx timestamping support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/hwtstamp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index b251d534b70d..2149d332dea0 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -293,7 +293,8 @@ static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
 			       struct sk_buff *skb, u16 reg,
 			       struct sk_buff_head *rxq)
 {
-	u16 buf[4] = { 0 }, status, timelo, timehi, seq_id;
+	u16 buf[4] = { 0 }, status, seq_id;
+	u64 ns, timelo, timehi;
 	struct skb_shared_hwtstamps *shwt;
 	int err;
 
@@ -321,7 +322,7 @@ static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip,
 	 */
 	for ( ; skb; skb = skb_dequeue(rxq)) {
 		if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) {
-			u64 ns = timehi << 16 | timelo;
+			ns = timehi << 16 | timelo;
 
 			mutex_lock(&chip->reg_lock);
 			ns = timecounter_cyc2time(&chip->tstamp_tc, ns);

From 1cbec07649ec8c267cdfb486ab4ee73949974ca4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 16 Feb 2018 11:03:03 -0800
Subject: [PATCH 0193/1678] net: Only honor ifindex in IP_PKTINFO if non-0

Only allow ifindex from IP_PKTINFO to override SO_BINDTODEVICE settings
if the index is actually set in the message.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 008be04ac1cc..9dca0fb8c482 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 			if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 				return -EINVAL;
-			ipc->oif = src_info->ipi6_ifindex;
+			if (src_info->ipi6_ifindex)
+				ipc->oif = src_info->ipi6_ifindex;
 			ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 			continue;
 		}
@@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 				return -EINVAL;
 			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
-			ipc->oif = info->ipi_ifindex;
+			if (info->ipi_ifindex)
+				ipc->oif = info->ipi_ifindex;
 			ipc->addr = info->ipi_spec_dst.s_addr;
 			break;
 		}

From 1ec010e705934c8acbe7dbf31afc81e60e3d828b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 16 Feb 2018 11:03:07 +0100
Subject: [PATCH 0194/1678] tun: export flags, uid, gid, queue information over
 netlink

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c            | 56 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/if_link.h | 18 ++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8e9a0ac644d2..86b16013e9c5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2291,11 +2291,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
 	return -EINVAL;
 }
 
+static size_t tun_get_size(const struct net_device *dev)
+{
+	BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
+	BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
+
+	return nla_total_size(sizeof(uid_t)) + /* OWNER */
+	       nla_total_size(sizeof(gid_t)) + /* GROUP */
+	       nla_total_size(sizeof(u8)) + /* TYPE */
+	       nla_total_size(sizeof(u8)) + /* PI */
+	       nla_total_size(sizeof(u8)) + /* VNET_HDR */
+	       nla_total_size(sizeof(u8)) + /* PERSIST */
+	       nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
+	       nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
+	       nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
+	       0;
+}
+
+static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+
+	if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
+		goto nla_put_failure;
+	if (uid_valid(tun->owner) &&
+	    nla_put_u32(skb, IFLA_TUN_OWNER,
+			from_kuid_munged(current_user_ns(), tun->owner)))
+		goto nla_put_failure;
+	if (gid_valid(tun->group) &&
+	    nla_put_u32(skb, IFLA_TUN_GROUP,
+			from_kgid_munged(current_user_ns(), tun->group)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
+		goto nla_put_failure;
+	if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
+		       !!(tun->flags & IFF_MULTI_QUEUE)))
+		goto nla_put_failure;
+	if (tun->flags & IFF_MULTI_QUEUE) {
+		if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
+				tun->numdisabled))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static struct rtnl_link_ops tun_link_ops __read_mostly = {
 	.kind		= DRV_NAME,
 	.priv_size	= sizeof(struct tun_struct),
 	.setup		= tun_setup,
 	.validate	= tun_validate,
+	.get_size       = tun_get_size,
+	.fill_info      = tun_fill_info,
 };
 
 static void tun_sock_write_space(struct sock *sk)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d9447700e18..11d0c0ea2bfa 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -941,4 +941,22 @@ enum {
 	IFLA_EVENT_BONDING_OPTIONS,	/* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+	IFLA_TUN_UNSPEC,
+	IFLA_TUN_OWNER,
+	IFLA_TUN_GROUP,
+	IFLA_TUN_TYPE,
+	IFLA_TUN_PI,
+	IFLA_TUN_VNET_HDR,
+	IFLA_TUN_PERSIST,
+	IFLA_TUN_MULTI_QUEUE,
+	IFLA_TUN_NUM_QUEUES,
+	IFLA_TUN_NUM_DISABLED_QUEUES,
+	__IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */

From 7a9b3ec1e19f691f6e69429d851a4084b86e6219 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 31 Jan 2018 23:07:06 +0100
Subject: [PATCH 0195/1678] nl80211: remove unnecessary genlmsg_cancel() calls

If we free the message immediately, there's no reason to
trim it back to the previous size.

Done with spatch:

@@
identifier msg, hdr;
@@
-if (hdr)
-  genlmsg_cancel(msg, hdr);
... when != msg;
 nlmsg_free(msg);

@@
identifier msg, hdr;
@@
-genlmsg_cancel(msg, hdr);
... when != msg;
 nlmsg_free(msg);

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cc6ec5bab676..412ed8676306 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5847,7 +5847,6 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
  out:
 	nlmsg_free(msg);
 	return -ENOBUFS;
@@ -6328,7 +6327,6 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 nla_put_failure_rcu:
 	rcu_read_unlock();
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 put_failure:
 	nlmsg_free(msg);
 	return -EMSGSIZE;
@@ -13732,7 +13730,6 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
 	return;
 
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13780,7 +13777,6 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13868,7 +13864,6 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13944,7 +13939,6 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -13984,7 +13978,6 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14014,7 +14007,6 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14051,7 +14043,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14084,7 +14075,6 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14125,7 +14115,6 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
@@ -14164,7 +14153,6 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14219,7 +14207,6 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 	return;
 
 nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14265,7 +14252,6 @@ static void nl80211_send_remain_on_chan_event(
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14379,7 +14365,6 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_conn_failed);
@@ -14416,7 +14401,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 	return true;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 	return true;
 }
@@ -14500,7 +14484,6 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 	return -ENOBUFS;
 }
@@ -14544,7 +14527,6 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
@@ -14753,7 +14735,6 @@ static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14811,7 +14792,6 @@ nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14864,7 +14844,6 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -14946,7 +14925,6 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 
@@ -15041,7 +15019,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
@@ -15086,8 +15063,6 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 
  nla_put_failure:
 	spin_unlock_bh(&rdev->beacon_registrations_lock);
-	if (hdr)
-		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_report_obss_beacon);
@@ -15303,7 +15278,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer,
 	return;
 
  nla_put_failure:
-	genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_tdls_oper_request);
@@ -15448,8 +15422,6 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
 	return;
 
  nla_put_failure:
-	if (hdr)
-		genlmsg_cancel(msg, hdr);
 	nlmsg_free(msg);
 }
 EXPORT_SYMBOL(cfg80211_crit_proto_stopped);

From db8d93a7a355121d49777c059afbca23c53c8628 Mon Sep 17 00:00:00 2001
From: Srinivas Dasari <dasaris@codeaurora.org>
Date: Fri, 2 Feb 2018 11:15:27 +0200
Subject: [PATCH 0196/1678] nl80211: Fix external_auth check for offloaded
 authentication

Unfortunately removal of the ext_feature flag in the last revision of
the patch ended up negating the comparison and prevented the command
from being processed (either nl80211_external_auth() or
rdev_external_auth() returns -EOPNOTSUPP). Fix this by adding back the
lost '!'.

Fixes: 40cbfa90218b ("cfg80211/nl80211: Optional authentication offload to userspace")
Signed-off-by: Srinivas Dasari <dasaris@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 412ed8676306..c6f256b29c73 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12484,7 +12484,7 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev = info->user_ptr[1];
 	struct cfg80211_external_auth_params params;
 
-	if (rdev->ops->external_auth)
+	if (!rdev->ops->external_auth)
 		return -EOPNOTSUPP;
 
 	if (!info->attrs[NL80211_ATTR_SSID])

From 11b05ba34d89808ca99180d2656438a319670c56 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 2 Feb 2018 16:31:42 +0000
Subject: [PATCH 0197/1678] mac80211: remove redundant initialization to
 pointer 'hdr'

The pointer hrd is being initialized with a value that is never read
and re-assigned a little later, hence the initialization is redundant
and can be removed.

Cleans up clang warning:
net/mac80211/tx.c:1924:24: warning: Value stored to 'hdr' during its
initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 25904af38839..5decd0fb247c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1921,7 +1921,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_hdr *hdr;
 	int headroom;
 	bool may_encrypt;
 

From c4b50cd31d25c3d17886ffc47ca4a9a12c6dc9bf Mon Sep 17 00:00:00 2001
From: Venkateswara Naralasetty <vnaralas@codeaurora.org>
Date: Tue, 13 Feb 2018 11:03:06 +0530
Subject: [PATCH 0198/1678] cfg80211: send ack_signal to user in probe client
 response

This patch provides support to get ack signal in probe client response
and in station info from user.

Signed-off-by: Venkateswara Naralasetty <vnaralas@codeaurora.org>
[squash in compilation fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 3 ++-
 include/net/cfg80211.h                      | 7 ++++++-
 include/uapi/linux/nl80211.h                | 3 +++
 net/mac80211/status.c                       | 2 +-
 net/wireless/nl80211.c                      | 8 ++++++--
 5 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 768f63f38341..b799a5384abb 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -1599,7 +1599,8 @@ static void wil_probe_client_handle(struct wil6210_priv *wil,
 	 */
 	bool alive = (sta->status == wil_sta_connected);
 
-	cfg80211_probe_status(ndev, sta->addr, req->cookie, alive, GFP_KERNEL);
+	cfg80211_probe_status(ndev, sta->addr, req->cookie, alive,
+			      0, false, GFP_KERNEL);
 }
 
 static struct list_head *next_probe_client(struct wil6210_priv *wil)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7d49cd0cf92d..56e905cd4b07 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1147,6 +1147,7 @@ struct cfg80211_tid_stats {
  * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
  * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
  *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
+ * @ack_signal: signal strength (in dBm) of the last ACK frame.
  */
 struct station_info {
 	u64 filled;
@@ -1191,6 +1192,7 @@ struct station_info {
 	u64 rx_duration;
 	u8 rx_beacon_signal_avg;
 	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
+	s8 ack_signal;
 };
 
 #if IS_ENABLED(CONFIG_CFG80211)
@@ -5838,10 +5840,13 @@ bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
  * @addr: the address of the peer
  * @cookie: the cookie filled in @probe_client previously
  * @acked: indicates whether probe was acked or not
+ * @ack_signal: signal strength (in dBm) of the ACK frame.
+ * @is_valid_ack_signal: indicates the ack_signal is valid or not.
  * @gfp: allocation flags
  */
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-			   u64 cookie, bool acked, gfp_t gfp);
+			   u64 cookie, bool acked, s32 ack_signal,
+			   bool is_valid_ack_signal, gfp_t gfp);
 
 /**
  * cfg80211_report_obss_beacon - report beacon from other APs
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ca3d5a613fc0..c13c84304be3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2626,6 +2626,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_EXTERNAL_AUTH_SUPPORT,
 
 	NL80211_ATTR_NSS,
+	NL80211_ATTR_ACK_SIGNAL,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -2947,6 +2948,7 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_RX_DURATION: aggregate PPDU duration for all frames
  *	received from the station (u64, usec)
  * @NL80211_STA_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_STA_INFO_ACK_SIGNAL: signal strength of the last ACK frame(u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2985,6 +2987,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TID_STATS,
 	NL80211_STA_INFO_RX_DURATION,
 	NL80211_STA_INFO_PAD,
+	NL80211_STA_INFO_ACK_SIGNAL,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index da7427a41529..d74d44e65bd7 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -486,7 +486,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 			if (ieee80211_is_nullfunc(hdr->frame_control) ||
 			    ieee80211_is_qos_nullfunc(hdr->frame_control))
 				cfg80211_probe_status(sdata->dev, hdr->addr1,
-						      cookie, acked,
+						      cookie, acked, 0, false,
 						      GFP_ATOMIC);
 			else
 				cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c6f256b29c73..050ff61b06a3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4486,6 +4486,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
 	PUT_SINFO_U64(BEACON_RX, rx_beacon);
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+	PUT_SINFO(ACK_SIGNAL, ack_signal, u8);
 
 #undef PUT_SINFO
 #undef PUT_SINFO_U64
@@ -14984,7 +14985,8 @@ nla_put_failure:
 EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify);
 
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
-			   u64 cookie, bool acked, gfp_t gfp)
+			   u64 cookie, bool acked, s32 ack_signal,
+			   bool is_valid_ack_signal, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -15009,7 +15011,9 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
 	    nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
 			      NL80211_ATTR_PAD) ||
-	    (acked && nla_put_flag(msg, NL80211_ATTR_ACK)))
+	    (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) ||
+	    (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL,
+						ack_signal)))
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);

From a78b26fffd2368fcd079802897f4c97f9baea833 Mon Sep 17 00:00:00 2001
From: Venkateswara Naralasetty <vnaralas@codeaurora.org>
Date: Tue, 13 Feb 2018 11:04:46 +0530
Subject: [PATCH 0199/1678] mac80211: Add tx ack signal support in sta info

This allows users to get ack signal strength of
last transmitted frame.

Signed-off-by: Venkateswara Naralasetty <vnaralas@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h  |  1 +
 net/mac80211/sta_info.c |  6 ++++++
 net/mac80211/sta_info.h |  2 ++
 net/mac80211/status.c   | 13 +++++++++++--
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 906e90223066..854037b8163e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -934,6 +934,7 @@ struct ieee80211_tx_info {
 			u8 ampdu_len;
 			u8 antenna;
 			u16 tx_time;
+			bool is_valid_ack_signal;
 			void *status_driver_data[19 / sizeof(void *)];
 		} status;
 		struct {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 0c5627f8a104..0bc40c719a55 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2287,6 +2287,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+	    sta->status_stats.ack_signal_filled) {
+		sinfo->ack_signal = sta->status_stats.last_ack_signal;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+	}
 }
 
 u32 sta_get_expected_throughput(struct sta_info *sta)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index cd53619435b6..f64eb86ca64b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -548,6 +548,8 @@ struct sta_info {
 		u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
 		u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
 		unsigned long last_ack;
+		s8 last_ack_signal;
+		bool ack_signal_filled;
 	} status_stats;
 
 	/* Updated from TX path only, no locking requirements */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index d74d44e65bd7..743e89c5926c 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -187,9 +187,16 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 	struct ieee80211_mgmt *mgmt = (void *) skb->data;
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb);
 
-	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 		sta->status_stats.last_ack = jiffies;
+		if (txinfo->status.is_valid_ack_signal) {
+			sta->status_stats.last_ack_signal =
+					 (s8)txinfo->status.ack_signal;
+			sta->status_stats.ack_signal_filled = true;
+		}
+	}
 
 	if (ieee80211_is_data_qos(mgmt->frame_control)) {
 		struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -486,7 +493,9 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 			if (ieee80211_is_nullfunc(hdr->frame_control) ||
 			    ieee80211_is_qos_nullfunc(hdr->frame_control))
 				cfg80211_probe_status(sdata->dev, hdr->addr1,
-						      cookie, acked, 0, false,
+						      cookie, acked,
+						      info->status.ack_signal,
+						      info->status.is_valid_ack_signal,
 						      GFP_ATOMIC);
 			else
 				cfg80211_mgmt_tx_status(&sdata->wdev, cookie,

From 22e76844c566e474a9a3e0c2206e45766b5941b7 Mon Sep 17 00:00:00 2001
From: Srinivas Dasari <dasaris@codeaurora.org>
Date: Tue, 6 Feb 2018 19:49:35 +0530
Subject: [PATCH 0200/1678] ieee80211: Increase PMK maximum length to 64 bytes

Increase the PMK maximum length to 64 bytes to accommodate
the key length used in DPP with the NIST P-521 and
Brainpool 512 curves.

Signed-off-by: Srinivas Dasari <dasaris@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ee6657a0ed69..e4cba332b705 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2111,7 +2111,7 @@ enum ieee80211_key_len {
 #define FILS_ERP_MAX_REALM_LEN		253
 #define FILS_ERP_MAX_RRK_LEN		64
 
-#define PMK_MAX_LEN			48
+#define PMK_MAX_LEN			64
 
 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */
 enum ieee80211_pub_actioncode {

From e3f9f41757f5ce1e95ef3bc3bfb72bbcdb23ece2 Mon Sep 17 00:00:00 2001
From: Andrea Parri <parri.andrea@gmail.com>
Date: Fri, 16 Feb 2018 12:06:13 +0100
Subject: [PATCH 0201/1678] ptr_ring: Remove now-redundant
 smp_read_barrier_depends()

Because READ_ONCE() now implies smp_read_barrier_depends(), the
smp_read_barrier_depends() in __ptr_ring_consume() is redundant;
this commit removes it and updates the comments.

Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: <linux-kernel@vger.kernel.org>
Cc: <netdev@vger.kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index b884b7794187..ddfed1dce936 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -296,13 +296,14 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r)
 {
 	void *ptr;
 
+	/* The READ_ONCE in __ptr_ring_peek guarantees that anyone
+	 * accessing data through the pointer is up to date. Pairs
+	 * with smp_wmb in __ptr_ring_produce.
+	 */
 	ptr = __ptr_ring_peek(r);
 	if (ptr)
 		__ptr_ring_discard_one(r);
 
-	/* Make sure anyone accessing data through the pointer is up to date. */
-	/* Pairs with smp_wmb in __ptr_ring_produce. */
-	smp_read_barrier_depends();
 	return ptr;
 }
 

From e21fed5864e408732653e0aa067b6669b7d88cda Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 16 Feb 2018 11:47:39 -0600
Subject: [PATCH 0202/1678] net: dsa: mv88e6xxx: hwtstamp: remove unnecessary
 range checking tests

_port_ is already known to be a valid index in the callers [1]. So
these checks are unnecessary.

[1] https://lkml.org/lkml/2018/2/16/469

Addresses-Coverity-ID: 1465287
Addresses-Coverity-ID: 1465291
Suggested-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/hwtstamp.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index 2149d332dea0..ac7694c71266 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -179,9 +179,6 @@ int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port,
 	if (!chip->info->ptp_support)
 		return -EOPNOTSUPP;
 
-	if (port < 0 || port >= mv88e6xxx_num_ports(chip))
-		return -EINVAL;
-
 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
 		return -EFAULT;
 
@@ -206,9 +203,6 @@ int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
 	if (!chip->info->ptp_support)
 		return -EOPNOTSUPP;
 
-	if (port < 0 || port >= mv88e6xxx_num_ports(chip))
-		return -EINVAL;
-
 	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
 		-EFAULT : 0;
 }
@@ -255,9 +249,6 @@ static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port,
 	if (!chip->info->ptp_support)
 		return NULL;
 
-	if (port < 0 || port >= mv88e6xxx_num_ports(chip))
-		return NULL;
-
 	hdr = parse_ptp_header(skb, type);
 	if (!hdr)
 		return NULL;

From 11184a5f6117de49390d4862fbc7a28c7ebfdce0 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 17 Feb 2018 15:08:18 +0100
Subject: [PATCH 0203/1678] net: stmmac: dwmac-meson8b: simplify clock
 registration

To goal of this patch is to simplify the registration of the RGMII TX
clock (and it's parent clocks). This is achieved by:
- introducing the meson8b_dwmac_register_clk helper-function to remove
  code duplication when registering a single clock (this saves a few
  lines since we have 4 clocks internally)
- using devm_add_action_or_reset to disable the RGMII TX clock
  automatically when needed. This also allows us to re-use the standard
  stmmac_pltfr_remove function.
- devm_kasprintf() and devm_kstrdup() are not used anymore to generate
  the clock name (these are replaced by a variable on the stack) because
  the common clock framework already uses kstrdup() internally.

No functional changes intended.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/stmicro/stmmac/dwmac-meson8b.c   | 156 ++++++++----------
 1 file changed, 65 insertions(+), 91 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 5270d26f0bc6..84a9a900e74e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -55,17 +55,11 @@ struct meson8b_dwmac {
 	phy_interface_t		phy_mode;
 
 	struct clk_mux		m250_mux;
-	struct clk		*m250_mux_clk;
-	struct clk		*m250_mux_parent[MUX_CLK_NUM_PARENTS];
-
 	struct clk_divider	m250_div;
-	struct clk		*m250_div_clk;
-
 	struct clk_fixed_factor	fixed_div2;
-	struct clk		*fixed_div2_clk;
-
 	struct clk_gate		rgmii_tx_en;
-	struct clk		*rgmii_tx_en_clk;
+
+	struct clk		*rgmii_tx_clk;
 
 	u32			tx_delay_ns;
 };
@@ -82,106 +76,95 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
 	writel(data, dwmac->regs + reg);
 }
 
+static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
+					      const char *name_suffix,
+					      const char **parent_names,
+					      int num_parents,
+					      const struct clk_ops *ops,
+					      struct clk_hw *hw)
+{
+	struct device *dev = &dwmac->pdev->dev;
+	struct clk_init_data init;
+	char clk_name[32];
+
+	snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dev),
+		 name_suffix);
+
+	init.name = clk_name;
+	init.ops = ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
+	hw->init = &init;
+
+	return devm_clk_register(dev, hw);
+}
+
 static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 {
-	struct clk_init_data init;
 	int i, ret;
+	struct clk *clk;
 	struct device *dev = &dwmac->pdev->dev;
-	char clk_name[32];
-	const char *clk_div_parents[1];
-	const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
+	const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
 
 	/* get the mux parents from DT */
 	for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
 		char name[16];
 
 		snprintf(name, sizeof(name), "clkin%d", i);
-		dwmac->m250_mux_parent[i] = devm_clk_get(dev, name);
-		if (IS_ERR(dwmac->m250_mux_parent[i])) {
-			ret = PTR_ERR(dwmac->m250_mux_parent[i]);
+		clk = devm_clk_get(dev, name);
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
 			if (ret != -EPROBE_DEFER)
 				dev_err(dev, "Missing clock %s\n", name);
 			return ret;
 		}
 
-		mux_parent_names[i] =
-			__clk_get_name(dwmac->m250_mux_parent[i]);
+		mux_parent_names[i] = __clk_get_name(clk);
 	}
 
-	/* create the m250_mux */
-	snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
-	init.name = clk_name;
-	init.ops = &clk_mux_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	init.parent_names = mux_parent_names;
-	init.num_parents = MUX_CLK_NUM_PARENTS;
-
 	dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0;
 	dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
 	dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
-	dwmac->m250_mux.flags = 0;
-	dwmac->m250_mux.table = NULL;
-	dwmac->m250_mux.hw.init = &init;
-
-	dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw);
-	if (WARN_ON(IS_ERR(dwmac->m250_mux_clk)))
-		return PTR_ERR(dwmac->m250_mux_clk);
-
-	/* create the m250_div */
-	snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
-	init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-	init.ops = &clk_divider_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
+	clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
+					 MUX_CLK_NUM_PARENTS, &clk_mux_ops,
+					 &dwmac->m250_mux.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
 
+	parent_name = __clk_get_name(clk);
 	dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
 	dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
 	dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
-	dwmac->m250_div.hw.init = &init;
 	dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
 				CLK_DIVIDER_ALLOW_ZERO |
 				CLK_DIVIDER_ROUND_CLOSEST;
+	clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
+					 &clk_divider_ops,
+					 &dwmac->m250_div.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
 
-	dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
-	if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))
-		return PTR_ERR(dwmac->m250_div_clk);
-
-	/* create the fixed_div2 */
-	snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev));
-	init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
-	init.ops = &clk_fixed_factor_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
-
+	parent_name = __clk_get_name(clk);
 	dwmac->fixed_div2.mult = 1;
 	dwmac->fixed_div2.div = 2;
-	dwmac->fixed_div2.hw.init = &init;
-
-	dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw);
-	if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk)))
-		return PTR_ERR(dwmac->fixed_div2_clk);
-
-	/* create the rgmii_tx_en */
-	init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en",
-				   dev_name(dev));
-	init.ops = &clk_gate_ops;
-	init.flags = CLK_SET_RATE_PARENT;
-	clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk);
-	init.parent_names = clk_div_parents;
-	init.num_parents = ARRAY_SIZE(clk_div_parents);
+	clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
+					 &clk_fixed_factor_ops,
+					 &dwmac->fixed_div2.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
 
+	parent_name = __clk_get_name(clk);
 	dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
 	dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
-	dwmac->rgmii_tx_en.hw.init = &init;
+	clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
+					 &clk_gate_ops,
+					 &dwmac->rgmii_tx_en.hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
 
-	dwmac->rgmii_tx_en_clk = devm_clk_register(dev,
-						   &dwmac->rgmii_tx_en.hw);
-	if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk)))
-		return PTR_ERR(dwmac->rgmii_tx_en_clk);
+	dwmac->rgmii_tx_clk = clk;
 
 	return 0;
 }
@@ -219,19 +202,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		 * a register) based on the line-speed (125MHz for Gbit speeds,
 		 * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s).
 		 */
-		ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000);
+		ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
 		if (ret) {
 			dev_err(&dwmac->pdev->dev,
 				"failed to set RGMII TX clock\n");
 			return ret;
 		}
 
-		ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk);
+		ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
 		if (ret) {
 			dev_err(&dwmac->pdev->dev,
 				"failed to enable the RGMII TX clock\n");
 			return ret;
 		}
+
+		devm_add_action_or_reset(&dwmac->pdev->dev,
+					(void(*)(void *))clk_disable_unprepare,
+					dwmac->rgmii_tx_clk);
 		break;
 
 	case PHY_INTERFACE_MODE_RMII:
@@ -317,29 +304,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		goto err_clk_disable;
+		goto err_remove_config_dt;
 
 	return 0;
 
-err_clk_disable:
-	if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-		clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
 err_remove_config_dt:
 	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
 
-static int meson8b_dwmac_remove(struct platform_device *pdev)
-{
-	struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-
-	if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
-		clk_disable_unprepare(dwmac->rgmii_tx_en_clk);
-
-	return stmmac_pltfr_remove(pdev);
-}
-
 static const struct of_device_id meson8b_dwmac_match[] = {
 	{ .compatible = "amlogic,meson8b-dwmac" },
 	{ .compatible = "amlogic,meson-gxbb-dwmac" },
@@ -349,7 +323,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match);
 
 static struct platform_driver meson8b_dwmac_driver = {
 	.probe  = meson8b_dwmac_probe,
-	.remove = meson8b_dwmac_remove,
+	.remove = stmmac_pltfr_remove,
 	.driver = {
 		.name           = "meson8b-dwmac",
 		.pm		= &stmmac_pltfr_pm_ops,

From b756371e10d7809d77ac7f1f0b37a29e17e6a889 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 17 Feb 2018 15:08:19 +0100
Subject: [PATCH 0204/1678] net: stmmac: dwmac-meson8b: only keep struct device
 around

Nothing in the dwmac-meson8b driver (except .probe itself) requires the
platform_device anymore after .probe has finished. Replace it with a
pointer to struct device since this is what the functions inside the
driver are actually accessing.
No functional changes.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/stmicro/stmmac/dwmac-meson8b.c   | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 84a9a900e74e..0dfce35c5583 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -48,7 +48,7 @@
 #define MUX_CLK_NUM_PARENTS		2
 
 struct meson8b_dwmac {
-	struct platform_device	*pdev;
+	struct device		*dev;
 
 	void __iomem		*regs;
 
@@ -83,11 +83,10 @@ static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
 					      const struct clk_ops *ops,
 					      struct clk_hw *hw)
 {
-	struct device *dev = &dwmac->pdev->dev;
 	struct clk_init_data init;
 	char clk_name[32];
 
-	snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dev),
+	snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev),
 		 name_suffix);
 
 	init.name = clk_name;
@@ -98,14 +97,14 @@ static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac,
 
 	hw->init = &init;
 
-	return devm_clk_register(dev, hw);
+	return devm_clk_register(dwmac->dev, hw);
 }
 
 static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 {
 	int i, ret;
 	struct clk *clk;
-	struct device *dev = &dwmac->pdev->dev;
+	struct device *dev = dwmac->dev;
 	const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
 
 	/* get the mux parents from DT */
@@ -204,19 +203,19 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		 */
 		ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000);
 		if (ret) {
-			dev_err(&dwmac->pdev->dev,
+			dev_err(dwmac->dev,
 				"failed to set RGMII TX clock\n");
 			return ret;
 		}
 
 		ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
 		if (ret) {
-			dev_err(&dwmac->pdev->dev,
+			dev_err(dwmac->dev,
 				"failed to enable the RGMII TX clock\n");
 			return ret;
 		}
 
-		devm_add_action_or_reset(&dwmac->pdev->dev,
+		devm_add_action_or_reset(dwmac->dev,
 					(void(*)(void *))clk_disable_unprepare,
 					dwmac->rgmii_tx_clk);
 		break;
@@ -238,7 +237,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 		break;
 
 	default:
-		dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n",
+		dev_err(dwmac->dev, "unsupported phy-mode %s\n",
 			phy_modes(dwmac->phy_mode));
 		return -EINVAL;
 	}
@@ -279,7 +278,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 		goto err_remove_config_dt;
 	}
 
-	dwmac->pdev = pdev;
+	dwmac->dev = &pdev->dev;
 	dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
 	if (dwmac->phy_mode < 0) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");

From 8076759dc7d8fc2e7b6f30ea754e85e5b31e0be5 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 17 Feb 2018 15:08:20 +0100
Subject: [PATCH 0205/1678] net: stmmac: dwmac-meson8b: make the clock
 configurations private

The common clock framework needs access to the "clock configuration"
structs during runtime.
However, only the common clock framework should access these. Ensure
this by moving the configuration structs out of struct meson8b_dwmac,
so only meson8b_init_rgmii_tx_clk() and the common clock framework know
about these configurations.

Suggested-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/stmicro/stmmac/dwmac-meson8b.c   | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 0dfce35c5583..2d5d4aea3bcb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -49,19 +49,17 @@
 
 struct meson8b_dwmac {
 	struct device		*dev;
-
 	void __iomem		*regs;
-
 	phy_interface_t		phy_mode;
+	struct clk		*rgmii_tx_clk;
+	u32			tx_delay_ns;
+};
 
+struct meson8b_dwmac_clk_configs {
 	struct clk_mux		m250_mux;
 	struct clk_divider	m250_div;
 	struct clk_fixed_factor	fixed_div2;
 	struct clk_gate		rgmii_tx_en;
-
-	struct clk		*rgmii_tx_clk;
-
-	u32			tx_delay_ns;
 };
 
 static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
@@ -106,6 +104,11 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 	struct clk *clk;
 	struct device *dev = dwmac->dev;
 	const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
+	struct meson8b_dwmac_clk_configs *clk_configs;
+
+	clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
+	if (!clk_configs)
+		return -ENOMEM;
 
 	/* get the mux parents from DT */
 	for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) {
@@ -123,43 +126,43 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
 		mux_parent_names[i] = __clk_get_name(clk);
 	}
 
-	dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0;
-	dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
-	dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
+	clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT;
+	clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK;
 	clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names,
 					 MUX_CLK_NUM_PARENTS, &clk_mux_ops,
-					 &dwmac->m250_mux.hw);
+					 &clk_configs->m250_mux.hw);
 	if (WARN_ON(IS_ERR(clk)))
 		return PTR_ERR(clk);
 
 	parent_name = __clk_get_name(clk);
-	dwmac->m250_div.reg = dwmac->regs + PRG_ETH0;
-	dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
-	dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
-	dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+	clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
+	clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
+	clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
 				CLK_DIVIDER_ALLOW_ZERO |
 				CLK_DIVIDER_ROUND_CLOSEST;
 	clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
 					 &clk_divider_ops,
-					 &dwmac->m250_div.hw);
+					 &clk_configs->m250_div.hw);
 	if (WARN_ON(IS_ERR(clk)))
 		return PTR_ERR(clk);
 
 	parent_name = __clk_get_name(clk);
-	dwmac->fixed_div2.mult = 1;
-	dwmac->fixed_div2.div = 2;
+	clk_configs->fixed_div2.mult = 1;
+	clk_configs->fixed_div2.div = 2;
 	clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1,
 					 &clk_fixed_factor_ops,
-					 &dwmac->fixed_div2.hw);
+					 &clk_configs->fixed_div2.hw);
 	if (WARN_ON(IS_ERR(clk)))
 		return PTR_ERR(clk);
 
 	parent_name = __clk_get_name(clk);
-	dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
-	dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
+	clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0;
+	clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN;
 	clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1,
 					 &clk_gate_ops,
-					 &dwmac->rgmii_tx_en.hw);
+					 &clk_configs->rgmii_tx_en.hw);
 	if (WARN_ON(IS_ERR(clk)))
 		return PTR_ERR(clk);
 

From 607ea03221ff84943e840d3707dbeb0b4b88959f Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sun, 18 Feb 2018 18:21:05 +0300
Subject: [PATCH 0206/1678] sh_eth: simplify sh_eth_check_reset()

The *while* loop in this function  can be turned into a normal *for* loop.
And getting rid  of the  single return point saves us a few more LoCs...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a197e11f3a56..c17654f5ce32 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -962,20 +962,16 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
 
 static int sh_eth_check_reset(struct net_device *ndev)
 {
-	int ret = 0;
-	int cnt = 100;
+	int cnt;
 
-	while (cnt > 0) {
+	for (cnt = 100; cnt > 0; cnt--) {
 		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
-			break;
+			return 0;
 		mdelay(1);
-		cnt--;
 	}
-	if (cnt <= 0) {
-		netdev_err(ndev, "Device reset failed\n");
-		ret = -ETIMEDOUT;
-	}
-	return ret;
+
+	netdev_err(ndev, "Device reset failed\n");
+	return -ETIMEDOUT;
 }
 
 static int sh_eth_reset(struct net_device *ndev)

From 3021efb440d02bf5b952b6d151c7ffee9bdd49fe Mon Sep 17 00:00:00 2001
From: Jake Moroni <mail@jakemoroni.com>
Date: Sun, 18 Feb 2018 15:26:04 -0500
Subject: [PATCH 0207/1678] dpaa_eth: fix pause capability advertisement logic

The ADVERTISED_Asym_Pause bit was being improperly set when both
rx and tx pause were enabled. When rx and tx are both enabled, only
the ADVERTISED_Pause bit is supposed to be set.

Signed-off-by: Jake Moroni <mail@jakemoroni.com>
Acked-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index faea674094b9..85306d1b2acf 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
 	if (epause->rx_pause)
 		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
 	if (epause->tx_pause)
-		newadv |= ADVERTISED_Asym_Pause;
+		newadv ^= ADVERTISED_Asym_Pause;
 
 	oldadv = phydev->advertising &
 			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);

From 753d525a08f2e18d50da9a909d48ecb7596683b3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:48:04 +0300
Subject: [PATCH 0208/1678] net: Convert inet6_net_ops

init method initializes sysctl defaults, allocates
percpu arrays and creates /proc entries.
exit method reverts the above.

There are no pernet_operations, which are interested
in the above entities of foreign net namespace, so
inet6_net_ops are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c1e292db04db..dbbe04018813 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -857,6 +857,7 @@ static void __net_exit inet6_net_exit(struct net *net)
 static struct pernet_operations inet6_net_ops = {
 	.init = inet6_net_init,
 	.exit = inet6_net_exit,
+	.async = true,
 };
 
 static const struct ipv6_stub ipv6_stub_impl = {

From 9c537ca1554e5d7db69a3b606801a2101e02edda Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:48:14 +0300
Subject: [PATCH 0209/1678] net: Convert cfg80211_pernet_ops

This patch finishes converting pernet_operations
registered in net/wireless directory.

These pernet_operations have only exit method,
which moves devices to init_net. This action
is not pernet_operations-specific, and function
cfg80211_switch_netns() may be called all time
during the system life. All necessary protection
against concurrent cfg80211_pernet_exit() is made
by rtnl_lock(). So, cfg80211_pernet_ops is able
to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index a6f3cac8c640..670aa229168a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1340,6 +1340,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg80211_pernet_ops = {
 	.exit = cfg80211_pernet_exit,
+	.async = true,
 };
 
 static int __init cfg80211_init(void)

From b01a59a4884ed2d95b8db4741e552a689c18989b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:48:37 +0300
Subject: [PATCH 0210/1678] net: Convert ip6mr_net_ops

These pernet_operations create and destroy /proc entries,
populate and depopulate net::rules_ops and multiroute table.
All the structures are pernet, and they are not touched
by foreign net pernet_operations. So, it's possible to mark
them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..295eb5ecaee5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1397,6 +1397,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 static struct pernet_operations ip6mr_net_ops = {
 	.init = ip6mr_net_init,
 	.exit = ip6mr_net_exit,
+	.async = true,
 };
 
 int __init ip6_mr_init(void)

From 1a2e93329dd413585798c122aac0607093b2d379 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:48:57 +0300
Subject: [PATCH 0211/1678] net: Convert icmpv6_sk_ops, ndisc_net_ops and
 igmp6_net_ops

These pernet_operations create and destroy net::ipv6.icmp_sk
socket, used to send ICMP or error reply.

Nobody can dereference the socket to handle a packet before
net is initialized, as there is no routing; nobody can do
that in parallel with exit, as all of devices are moved
to init_net or destroyed and there are no packets it-flight.
So, it's possible to mark these pernet_operations as async.

The same for ndisc_net_ops and for igmp6_net_ops. The last
one also creates and destroys /proc entries.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c  | 1 +
 net/ipv6/mcast.c | 1 +
 net/ipv6/ndisc.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..4fa4f1b150a4 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -997,6 +997,7 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
 static struct pernet_operations icmpv6_sk_ops = {
 	.init = icmpv6_sk_init,
 	.exit = icmpv6_sk_exit,
+	.async = true,
 };
 
 int __init icmpv6_init(void)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..d9bb933dd5c4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2997,6 +2997,7 @@ static void __net_exit igmp6_net_exit(struct net *net)
 static struct pernet_operations igmp6_net_ops = {
 	.init = igmp6_net_init,
 	.exit = igmp6_net_exit,
+	.async = true,
 };
 
 int __init igmp6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f61a5b613b52..0a19ce3a6f7f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1882,6 +1882,7 @@ static void __net_exit ndisc_net_exit(struct net *net)
 static struct pernet_operations ndisc_net_ops = {
 	.init = ndisc_net_init,
 	.exit = ndisc_net_exit,
+	.async = true,
 };
 
 int __init ndisc_init(void)

From 509114112d0b71997dbe58d4f2976eeddf8eacc4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:49:10 +0300
Subject: [PATCH 0212/1678] net: Convert raw6_net_ops, udplite6_net_ops,
 ipv6_proc_ops, if6_proc_net_ops and ip6_route_net_late_ops

These pernet_operations create and destroy /proc entries
and safely may be converted and safely may be mark as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 +
 net/ipv6/proc.c     | 1 +
 net/ipv6/raw.c      | 1 +
 net/ipv6/route.c    | 2 ++
 4 files changed, 5 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8c17f8d8d5d9..4facfe0b1888 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4257,6 +4257,7 @@ static void __net_exit if6_proc_net_exit(struct net *net)
 static struct pernet_operations if6_proc_net_ops = {
 	.init = if6_proc_net_init,
 	.exit = if6_proc_net_exit,
+	.async = true,
 };
 
 int __init if6_proc_init(void)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..b8858c546f41 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -343,6 +343,7 @@ static void __net_exit ipv6_proc_exit_net(struct net *net)
 static struct pernet_operations ipv6_proc_ops = {
 	.init = ipv6_proc_init_net,
 	.exit = ipv6_proc_exit_net,
+	.async = true,
 };
 
 int __init ipv6_misc_proc_init(void)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..10a4ac4933b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1332,6 +1332,7 @@ static void __net_exit raw6_exit_net(struct net *net)
 static struct pernet_operations raw6_net_ops = {
 	.init = raw6_init_net,
 	.exit = raw6_exit_net,
+	.async = true,
 };
 
 int __init raw6_proc_init(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0baae26db8f..1be84ef23f43 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4972,6 +4972,7 @@ static void __net_exit ip6_route_net_exit_late(struct net *net)
 static struct pernet_operations ip6_route_net_ops = {
 	.init = ip6_route_net_init,
 	.exit = ip6_route_net_exit,
+	.async = true,
 };
 
 static int __net_init ipv6_inetpeer_init(struct net *net)
@@ -5002,6 +5003,7 @@ static struct pernet_operations ipv6_inetpeer_ops = {
 static struct pernet_operations ip6_route_net_late_ops = {
 	.init = ip6_route_net_init_late,
 	.exit = ip6_route_net_exit_late,
+	.async = true,
 };
 
 static struct notifier_block ip6_route_dev_notifier = {

From 85ca51b2a23969dccb5abff31eb54560db8195ca Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:49:20 +0300
Subject: [PATCH 0213/1678] net: Convert ipv6_inetpeer_ops

net->ipv6.peers is dereferenced in three places via inet_getpeer_v6(),
and it's used to handle skb. All the users of inet_getpeer_v6() do not
look like be able to be called from foreign net pernet_operations, so
we may mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1be84ef23f43..aa709b644945 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4998,6 +4998,7 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
 static struct pernet_operations ipv6_inetpeer_ops = {
 	.init	=	ipv6_inetpeer_init,
 	.exit	=	ipv6_inetpeer_exit,
+	.async	=	true,
 };
 
 static struct pernet_operations ip6_route_net_late_ops = {

From 7b7dd180b85b9c39c426cfaade73f7d0409c7f85 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:49:31 +0300
Subject: [PATCH 0214/1678] net: Convert fib6_rules_net_ops

These pernet_operations register and unregister
net::ipv6.fib6_rules_ops, which are used for
routing. It looks like there are no pernet_operations,
which send ipv6 packages to another net, so we
are able to mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..95a2c9e8699a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -368,6 +368,7 @@ static void __net_exit fib6_rules_net_exit(struct net *net)
 static struct pernet_operations fib6_rules_net_ops = {
 	.init = fib6_rules_net_init,
 	.exit = fib6_rules_net_exit,
+	.async = true,
 };
 
 int __init fib6_rules_init(void)

From fef65a2c6c3417fcf2fb0bebd8c27355d1b14a12 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:49:40 +0300
Subject: [PATCH 0215/1678] net: Convert tcpv6_net_ops

These pernet_operations create and destroy net::ipv6.tcp_sk
socket, which is used in tcp_v6_send_response() only. It looks
like foreign pernet_operations don't want to set ipv6 connection
inside destroyed net, so this socket may be created in destroyed
in parallel with anything else. inet_twsk_purge() is also safe
for that, as described in patch for tcp_sk_ops. So, it's possible
to mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 883df0ad5bfe..5425d7b100ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2007,6 +2007,7 @@ static struct pernet_operations tcpv6_net_ops = {
 	.init	    = tcpv6_net_init,
 	.exit	    = tcpv6_net_exit,
 	.exit_batch = tcpv6_net_exit_batch,
+	.async	    = true,
 };
 
 int __init tcpv6_init(void)

From 58708caef56b04da2c91d6aaba49c783f22055c2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:49:49 +0300
Subject: [PATCH 0216/1678] net: Convert ipv6_sysctl_net_ops

These pernet_operations create and destroy sysctl tables.
They are not touched by another net pernet_operations.
So, it's possible to execute them in parallel with others.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8ac6a64..262f791f1b9b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -251,6 +251,7 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 static struct pernet_operations ipv6_sysctl_net_ops = {
 	.init = ipv6_sysctl_net_init,
 	.exit = ipv6_sysctl_net_exit,
+	.async = true,
 };
 
 static struct ctl_table_header *ip6_header;

From ac34cb6c0c4d327d69e588d7a42bbc6428e2fdfd Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:49:59 +0300
Subject: [PATCH 0217/1678] net: Convert ping_v6_net_ops

These pernet_operations only register and unregister /proc
entries, so it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ping.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d12c55dad7d1..318c6e914234 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -240,6 +240,7 @@ static void __net_init ping_v6_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v6_net_ops = {
 	.init = ping_v6_proc_init_net,
 	.exit = ping_v6_proc_exit_net,
+	.async = true,
 };
 #endif
 

From a7852a76f414f69631ed3adc4b001c633829306d Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:50:09 +0300
Subject: [PATCH 0218/1678] net: Convert ip6_flowlabel_net_ops

These pernet_operations create and destroy /proc entries.
ip6_fl_purge() makes almost the same actions as timer
ip6_fl_gc_timer does, and as it can be executed in parallel
with ip6_fl_purge(), two parallel ip6_fl_purge() may be
executed. So, we can mark it async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..6ddf52282894 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -873,6 +873,7 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 static struct pernet_operations ip6_flowlabel_net_ops = {
 	.init = ip6_flowlabel_proc_init,
 	.exit = ip6_flowlabel_net_exit,
+	.async = true,
 };
 
 int ip6_flowlabel_init(void)

From b489141369f78ead6ed540cff29ac1974852cd7f Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:50:18 +0300
Subject: [PATCH 0219/1678] net: Convert xfrm6_net_ops

These pernet_operations create sysctl tables and
initialize net::xfrm.xfrm6_dst_ops used for routing.
It doesn't look like another pernet_operations send
ipv6 packets to foreign net namespaces, so it should
be safe to mark the pernet_operations as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/xfrm6_policy.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 09fb44ee3b45..88cd0c90fa81 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -395,6 +395,7 @@ static void __net_exit xfrm6_net_exit(struct net *net)
 static struct pernet_operations xfrm6_net_ops = {
 	.init	= xfrm6_net_init,
 	.exit	= xfrm6_net_exit,
+	.async	= true,
 };
 
 int __init xfrm6_init(void)

From d16784d9fb2a91e96374df7cb689f52cef55371b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:50:28 +0300
Subject: [PATCH 0220/1678] net: Convert fib6_net_ops, ipv6_addr_label_ops and
 ip6_segments_ops

These pernet_operations register and unregister tables
and lists for packets forwarding. All of the entities
are per-net. Init methods makes simple initializations,
and since net is not visible for foreigners at the time
it is working, it can't race with anything. Exit method
is executed when there are only local devices, and there
mustn't be packets in-flight. Also, it looks like no one
pernet_operations want to send ipv6 packets to foreign
net. The same reasons are for ipv6_addr_label_ops and
ip6_segments_ops. So, we are able to mark all them as
async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrlabel.c | 1 +
 net/ipv6/ip6_fib.c   | 1 +
 net/ipv6/seg6.c      | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced37ad71..ba2e63633370 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -344,6 +344,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
 static struct pernet_operations ipv6_addr_label_ops = {
 	.init = ip6addrlbl_net_init,
 	.exit = ip6addrlbl_net_exit,
+	.async = true,
 };
 
 int __init ipv6_addr_label_init(void)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..cab95cf3b39f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2160,6 +2160,7 @@ static void fib6_net_exit(struct net *net)
 static struct pernet_operations fib6_net_ops = {
 	.init = fib6_net_init,
 	.exit = fib6_net_exit,
+	.async = true,
 };
 
 int __init fib6_init(void)
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 7f5621d09571..c3f13c3bd8a9 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -395,6 +395,7 @@ static void __net_exit seg6_net_exit(struct net *net)
 static struct pernet_operations ip6_segments_ops = {
 	.init = seg6_net_init,
 	.exit = seg6_net_exit,
+	.async = true,
 };
 
 static const struct genl_ops seg6_genl_ops[] = {

From 5fc094f5b8c9b8e5012b7ccf303be3b123563a58 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:50:37 +0300
Subject: [PATCH 0221/1678] net: Convert ip6_frags_ops

Exit methods calls inet_frags_exit_net() with global ip6_frags
as argument. So, after we make the pernet_operations async,
a pair of exit methods may be called to iterate this hash table.
Since there is inet_frag_worker(), which already may work
in parallel with inet_frags_exit_net(), and it can make the same
cleanup, that inet_frags_exit_net() does, it's safe. So we may
mark these pernet_operations as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..b5da69c83123 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -733,6 +733,7 @@ static void __net_exit ipv6_frags_exit_net(struct net *net)
 static struct pernet_operations ip6_frags_ops = {
 	.init = ipv6_frags_init_net,
 	.exit = ipv6_frags_exit_net,
+	.async = true,
 };
 
 int __init ipv6_frag_init(void)

From 4d6b80762b9384db3281f792a71746b388f395be Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:50:45 +0300
Subject: [PATCH 0222/1678] net: Convert ip_tables_net_ops, udplite6_net_ops
 and xt_net_ops

ip_tables_net_ops and udplite6_net_ops create and destroy /proc entries.
xt_net_ops does nothing.

So, we are able to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 1 +
 net/ipv6/udplite.c             | 1 +
 net/netfilter/x_tables.c       | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9a71f3149507..39a7cf9160e6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1911,6 +1911,7 @@ static void __net_exit ip_tables_net_exit(struct net *net)
 static struct pernet_operations ip_tables_net_ops = {
 	.init = ip_tables_net_init,
 	.exit = ip_tables_net_exit,
+	.async = true,
 };
 
 static int __init ip_tables_init(void)
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 14ae32bb1f3d..f3839780dc31 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -123,6 +123,7 @@ static void __net_exit udplite6_proc_exit_net(struct net *net)
 static struct pernet_operations udplite6_net_ops = {
 	.init = udplite6_proc_init_net,
 	.exit = udplite6_proc_exit_net,
+	.async = true,
 };
 
 int __init udplite6_proc_init(void)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2f685ee1f9c8..a6a435d7c8f4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1765,6 +1765,7 @@ static void __net_exit xt_net_exit(struct net *net)
 static struct pernet_operations xt_net_ops = {
 	.init = xt_net_init,
 	.exit = xt_net_exit,
+	.async = true,
 };
 
 static int __init xt_init(void)

From da349fad8045cc44cd9b1752b0e2d943df62cabf Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 11:50:54 +0300
Subject: [PATCH 0223/1678] net: Convert iptable_filter_net_ops

These pernet_operations register and unregister
net::ipv4.iptable_filter table. Since there are
no packets in-flight at the time of exit method
is working, iptables rules should not be touched.
Also, pernet_operations should not send ipv4
packets each other. So, it's safe to mark them
async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 9ac92ea7b93c..c1c136a93911 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -87,6 +87,7 @@ static void __net_exit iptable_filter_net_exit(struct net *net)
 static struct pernet_operations iptable_filter_net_ops = {
 	.init = iptable_filter_net_init,
 	.exit = iptable_filter_net_exit,
+	.async = true,
 };
 
 static int __init iptable_filter_init(void)

From 96c252bf1c5c6d7e2dac3dea42f3f0a9c939d20e Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Mon, 19 Feb 2018 12:48:21 +0100
Subject: [PATCH 0224/1678] tipc: fix bug on error path in
 tipc_topsrv_kern_subscr()

In commit cc1ea9ffadf7 ("tipc: eliminate struct tipc_subscriber") we
re-introduced an old bug on the error path in the function
tipc_topsrv_kern_subscr(). We now re-introduce the correction too.

Reported-by: syzbot+f62e0f2a0ef578703946@syzkaller.appspotmail.com
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/topsrv.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 02013e00f287..25925be1cc08 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -580,9 +580,10 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
 	*conid = con->conid;
 	con->sock = NULL;
 	rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
-	if (rc < 0)
-		tipc_conn_close(con);
-	return !rc;
+	if (rc >= 0)
+		return true;
+	conn_put(con);
+	return false;
 }
 
 void tipc_topsrv_kern_unsubscr(struct net *net, int conid)

From 26736a08ee0fb89a4f09bfb2c9f0805028ff63aa Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 19 Feb 2018 19:02:24 +0100
Subject: [PATCH 0225/1678] tipc: don't call sock_release() in atomic context

syzbot reported a scheduling while atomic issue at netns
destruction time:

BUG: sleeping function called from invalid context at net/core/sock.c:2769
in_atomic(): 1, irqs_disabled(): 0, pid: 85, name: kworker/u4:3
5 locks held by kworker/u4:3/85:
  #0:  ((wq_completion)"%s""netns"){+.+.}, at: [<00000000c9792deb>]
process_one_work+0xaaf/0x1af0 kernel/workqueue.c:2084
  #1:  (net_cleanup_work){+.+.}, at: [<00000000adc12e2a>]
process_one_work+0xb01/0x1af0 kernel/workqueue.c:2088
  #2:  (net_sem){++++}, at: [<000000009ccb5669>] cleanup_net+0x23f/0xd20
net/core/net_namespace.c:494
  #3:  (net_mutex){+.+.}, at: [<00000000a92767d9>] cleanup_net+0xa7d/0xd20
net/core/net_namespace.c:496
  #4:  (&(&srv->idr_lock)->rlock){+...}, at: [<000000001343e568>]
spin_lock_bh include/linux/spinlock.h:315 [inline]
  #4:  (&(&srv->idr_lock)->rlock){+...}, at: [<000000001343e568>]
tipc_topsrv_stop+0x231/0x610 net/tipc/topsrv.c:685
CPU: 0 PID: 85 Comm: kworker/u4:3 Not tainted 4.16.0-rc1+ #230
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Workqueue: netns cleanup_net
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x194/0x257 lib/dump_stack.c:53
  ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6128
  __might_sleep+0x95/0x190 kernel/sched/core.c:6081
  lock_sock_nested+0x37/0x110 net/core/sock.c:2769
  lock_sock include/net/sock.h:1463 [inline]
  tipc_release+0x103/0xff0 net/tipc/socket.c:572
  sock_release+0x8d/0x1e0 net/socket.c:594
  tipc_topsrv_stop+0x3c0/0x610 net/tipc/topsrv.c:696
  tipc_exit_net+0x15/0x40 net/tipc/core.c:96
  ops_exit_list.isra.6+0xae/0x150 net/core/net_namespace.c:148
  cleanup_net+0x6ba/0xd20 net/core/net_namespace.c:529
  process_one_work+0xbbf/0x1af0 kernel/workqueue.c:2113
  worker_thread+0x223/0x1990 kernel/workqueue.c:2247
  kthread+0x33c/0x400 kernel/kthread.c:238
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:429

This is caused by tipc_topsrv_stop() releasing the listener socket
with the idr lock held. This changeset addresses the issue moving
the release operation outside such lock.

Reported-and-tested-by: syzbot+749d9d87c294c00ca856@syzkaller.appspotmail.com
Fixes: 0ef897be12b8 ("tipc: separate topology server listener socket from subcsriber sockets")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by:  ///jon
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/topsrv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 25925be1cc08..c8e34ef22c30 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -694,9 +694,9 @@ void tipc_topsrv_stop(struct net *net)
 	}
 	__module_get(lsock->ops->owner);
 	__module_get(lsock->sk->sk_prot_creator->owner);
-	sock_release(lsock);
 	srv->listener = NULL;
 	spin_unlock_bh(&srv->idr_lock);
+	sock_release(lsock);
 	tipc_topsrv_work_stop(srv);
 	idr_destroy(&srv->conn_idr);
 	kfree(srv);

From ffc385b95adb0e601f6858b06401adabedf59f81 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Sun, 18 Feb 2018 10:08:41 -0600
Subject: [PATCH 0226/1678] ibmvnic: Keep track of supplementary TX descriptors

Supplementary TX descriptors were not being accounted for, which
was resulting in an overflow of the hardware device's transmit
queue. Keep track of those descriptors now when determining
how many entries remain on the TX queue.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++--
 drivers/net/ethernet/ibm/ibmvnic.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 996f47568f9e..64770a1c406d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1467,6 +1467,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if ((*hdrs >> 7) & 1) {
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
 		tx_crq.v1.n_crq_elem = num_entries;
+		tx_buff->num_entries = num_entries;
 		tx_buff->indir_arr[0] = tx_crq;
 		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
 						    sizeof(tx_buff->indir_arr),
@@ -1515,7 +1516,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
-	if (atomic_inc_return(&tx_scrq->used)
+	if (atomic_add_return(num_entries, &tx_scrq->used)
 					>= adapter->req_tx_entries_per_subcrq) {
 		netdev_info(netdev, "Stopping queue %d\n", queue_num);
 		netif_stop_subqueue(netdev, queue_num);
@@ -2468,6 +2469,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
 		unsigned int pool = scrq->pool_index;
+		int num_entries = 0;
 
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
@@ -2498,6 +2500,8 @@ restart_loop:
 				txbuff->skb = NULL;
 			}
 
+			num_entries += txbuff->num_entries;
+
 			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
 						     producer_index] = index;
 			adapter->tx_pool[pool].producer_index =
@@ -2507,7 +2511,7 @@ restart_loop:
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
 
-		if (atomic_sub_return(next->tx_comp.num_comps, &scrq->used) <=
+		if (atomic_sub_return(num_entries, &scrq->used) <=
 		    (adapter->req_tx_entries_per_subcrq / 2) &&
 		    __netif_subqueue_stopped(adapter->netdev,
 					     scrq->pool_index)) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index fe21a6e2ddae..2f51458ccdc3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -909,6 +909,7 @@ struct ibmvnic_tx_buff {
 	union sub_crq indir_arr[6];
 	u8 hdr_data[140];
 	dma_addr_t indir_dma;
+	int num_entries;
 };
 
 struct ibmvnic_tx_pool {

From 19efbd93e6fb05eab81856b4fc8d64211dd37088 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 12:58:38 +0300
Subject: [PATCH 0227/1678] net: Kill net_mutex

We take net_mutex, when there are !async pernet_operations
registered, and read locking of net_sem is not enough. But
we may get rid of taking the mutex, and just change the logic
to write lock net_sem in such cases. This obviously reduces
the number of lock operations, we do.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h   |  1 -
 include/net/net_namespace.h | 11 +++++---
 net/core/net_namespace.c    | 53 ++++++++++++++++++++++---------------
 3 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index e9ee9ad0a681..3573b4bf2fdf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -35,7 +35,6 @@ extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct mutex net_mutex;
 extern struct rw_semaphore net_sem;
 
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9158ec1ad06f..115b01b92f4d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,8 +60,11 @@ struct net {
 
 	struct list_head	list;		/* list of network namespaces */
 	struct list_head	cleanup_list;	/* namespaces on death row */
-	struct list_head	exit_list;	/* Use only net_mutex */
-
+	struct list_head	exit_list;	/* To linked to call pernet exit
+						 * methods on dead net (net_sem
+						 * read locked), or to unregister
+						 * pernet ops (net_sem wr locked).
+						 */
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 	struct ucounts		*ucounts;
 	spinlock_t		nsid_lock;
@@ -89,7 +92,7 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
-	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
+	struct list_head	fib_notifier_ops;  /* protected by net_sem */
 
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
@@ -316,7 +319,7 @@ struct pernet_operations {
 	/*
 	 * Indicates above methods are allowed to be executed in parallel
 	 * with methods of any other pernet_operations, i.e. they are not
-	 * need synchronization via net_mutex.
+	 * need write locked net_sem.
 	 */
 	bool async;
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bcab9a938d6f..e89a516620dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -29,8 +29,6 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-/* Used only if there are !async pernet_operations registered */
-DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
@@ -407,6 +405,7 @@ struct net *copy_net_ns(unsigned long flags,
 {
 	struct ucounts *ucounts;
 	struct net *net;
+	unsigned write;
 	int rv;
 
 	if (!(flags & CLONE_NEWNET))
@@ -424,20 +423,26 @@ struct net *copy_net_ns(unsigned long flags,
 	refcount_set(&net->passive, 1);
 	net->ucounts = ucounts;
 	get_user_ns(user_ns);
-
-	rv = down_read_killable(&net_sem);
+again:
+	write = READ_ONCE(nr_sync_pernet_ops);
+	if (write)
+		rv = down_write_killable(&net_sem);
+	else
+		rv = down_read_killable(&net_sem);
 	if (rv < 0)
 		goto put_userns;
-	if (nr_sync_pernet_ops) {
-		rv = mutex_lock_killable(&net_mutex);
-		if (rv < 0)
-			goto up_read;
+
+	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+		up_read(&net_sem);
+		goto again;
 	}
 	rv = setup_net(net, user_ns);
-	if (nr_sync_pernet_ops)
-		mutex_unlock(&net_mutex);
-up_read:
-	up_read(&net_sem);
+
+	if (write)
+		up_write(&net_sem);
+	else
+		up_read(&net_sem);
+
 	if (rv < 0) {
 put_userns:
 		put_user_ns(user_ns);
@@ -485,15 +490,23 @@ static void cleanup_net(struct work_struct *work)
 	struct net *net, *tmp, *last;
 	struct list_head net_kill_list;
 	LIST_HEAD(net_exit_list);
+	unsigned write;
 
 	/* Atomically snapshot the list of namespaces to cleanup */
 	spin_lock_irq(&cleanup_list_lock);
 	list_replace_init(&cleanup_list, &net_kill_list);
 	spin_unlock_irq(&cleanup_list_lock);
+again:
+	write = READ_ONCE(nr_sync_pernet_ops);
+	if (write)
+		down_write(&net_sem);
+	else
+		down_read(&net_sem);
 
-	down_read(&net_sem);
-	if (nr_sync_pernet_ops)
-		mutex_lock(&net_mutex);
+	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
+		up_read(&net_sem);
+		goto again;
+	}
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
@@ -528,14 +541,14 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_exit_list(ops, &net_exit_list);
 
-	if (nr_sync_pernet_ops)
-		mutex_unlock(&net_mutex);
-
 	/* Free the net generic variables */
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	up_read(&net_sem);
+	if (write)
+		up_write(&net_sem);
+	else
+		up_read(&net_sem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -563,8 +576,6 @@ static void cleanup_net(struct work_struct *work)
 void net_ns_barrier(void)
 {
 	down_write(&net_sem);
-	mutex_lock(&net_mutex);
-	mutex_unlock(&net_mutex);
 	up_write(&net_sem);
 }
 EXPORT_SYMBOL(net_ns_barrier);

From 65b7b5b90fcd17b25ef43b0cd02bda47bf286675 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 12:58:45 +0300
Subject: [PATCH 0228/1678] net: Make cleanup_list and net::cleanup_list of
 llist type

This simplifies cleanup queueing and makes cleanup lists
to use llist primitives. Since llist has its own cmpxchg()
ordering, cleanup_list_lock is not more need.

Also, struct llist_node is smaller, than struct list_head,
so we save some bytes in struct net with this patch.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  3 ++-
 net/core/net_namespace.c    | 20 ++++++--------------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 115b01b92f4d..d4417495773a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -59,12 +59,13 @@ struct net {
 	atomic64_t		cookie_gen;
 
 	struct list_head	list;		/* list of network namespaces */
-	struct list_head	cleanup_list;	/* namespaces on death row */
 	struct list_head	exit_list;	/* To linked to call pernet exit
 						 * methods on dead net (net_sem
 						 * read locked), or to unregister
 						 * pernet ops (net_sem wr locked).
 						 */
+	struct llist_node	cleanup_list;	/* namespaces on death row */
+
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 	struct ucounts		*ucounts;
 	spinlock_t		nsid_lock;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e89a516620dd..abf8a46e94e2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -481,21 +481,18 @@ static void unhash_nsid(struct net *net, struct net *last)
 	spin_unlock_bh(&net->nsid_lock);
 }
 
-static DEFINE_SPINLOCK(cleanup_list_lock);
-static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
+static LLIST_HEAD(cleanup_list);
 
 static void cleanup_net(struct work_struct *work)
 {
 	const struct pernet_operations *ops;
 	struct net *net, *tmp, *last;
-	struct list_head net_kill_list;
+	struct llist_node *net_kill_list;
 	LIST_HEAD(net_exit_list);
 	unsigned write;
 
 	/* Atomically snapshot the list of namespaces to cleanup */
-	spin_lock_irq(&cleanup_list_lock);
-	list_replace_init(&cleanup_list, &net_kill_list);
-	spin_unlock_irq(&cleanup_list_lock);
+	net_kill_list = llist_del_all(&cleanup_list);
 again:
 	write = READ_ONCE(nr_sync_pernet_ops);
 	if (write)
@@ -510,7 +507,7 @@ again:
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
-	list_for_each_entry(net, &net_kill_list, cleanup_list)
+	llist_for_each_entry(net, net_kill_list, cleanup_list)
 		list_del_rcu(&net->list);
 	/* Cache last net. After we unlock rtnl, no one new net
 	 * added to net_namespace_list can assign nsid pointer
@@ -525,7 +522,7 @@ again:
 	last = list_last_entry(&net_namespace_list, struct net, list);
 	rtnl_unlock();
 
-	list_for_each_entry(net, &net_kill_list, cleanup_list) {
+	llist_for_each_entry(net, net_kill_list, cleanup_list) {
 		unhash_nsid(net, last);
 		list_add_tail(&net->exit_list, &net_exit_list);
 	}
@@ -585,12 +582,7 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
 void __put_net(struct net *net)
 {
 	/* Cleanup the network namespace in process context */
-	unsigned long flags;
-
-	spin_lock_irqsave(&cleanup_list_lock, flags);
-	list_add(&net->cleanup_list, &cleanup_list);
-	spin_unlock_irqrestore(&cleanup_list_lock, flags);
-
+	llist_add(&net->cleanup_list, &cleanup_list);
 	queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);

From 8349efd903394422d1598d196b6be70c410cf8f5 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Feb 2018 12:58:54 +0300
Subject: [PATCH 0229/1678] net: Queue net_cleanup_work only if there is first
 net added

When llist_add() returns false, cleanup_net() hasn't made its
llist_del_all(), while the work has already been scheduled
by the first queuer. So, we may skip queue_work() in this case.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index abf8a46e94e2..27a55236ad64 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -582,8 +582,8 @@ static DECLARE_WORK(net_cleanup_work, cleanup_net);
 void __put_net(struct net *net)
 {
 	/* Cleanup the network namespace in process context */
-	llist_add(&net->cleanup_list, &cleanup_list);
-	queue_work(netns_wq, &net_cleanup_work);
+	if (llist_add(&net->cleanup_list, &cleanup_list))
+		queue_work(netns_wq, &net_cleanup_work);
 }
 EXPORT_SYMBOL_GPL(__put_net);
 

From cc944ead839a28b07cea4f90878f2b7e0cb71a02 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Tue, 20 Feb 2018 08:44:20 +0100
Subject: [PATCH 0230/1678] devlink: Move size validation to core

Currently the size validation is done via a cb, which is unneeded. The
size validation can be moved to core. The next patch will perform cleanup.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 18d385ed8237..88e846779269 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2338,6 +2338,32 @@ out:
 	resource->size_valid = size_valid;
 }
 
+static int
+devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
+			       struct netlink_ext_ack *extack)
+{
+	u64 reminder;
+	int err = 0;
+
+	if (size > resource->size_params->size_max) {
+		NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+		err = -EINVAL;
+	}
+
+	if (size < resource->size_params->size_min) {
+		NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+		err = -EINVAL;
+	}
+
+	div64_u64_rem(size, resource->size_params->size_granularity, &reminder);
+	if (reminder) {
+		NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
 static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 				       struct genl_info *info)
 {
@@ -2356,12 +2382,8 @@ static int devlink_nl_cmd_resource_set(struct sk_buff *skb,
 	if (!resource)
 		return -EINVAL;
 
-	if (!resource->resource_ops->size_validate)
-		return -EINVAL;
-
 	size = nla_get_u64(info->attrs[DEVLINK_ATTR_RESOURCE_SIZE]);
-	err = resource->resource_ops->size_validate(devlink, size,
-						    info->extack);
+	err = devlink_resource_validate_size(resource, size, info->extack);
 	if (err)
 		return err;
 

From 4f4bbf7c4e3d4bd14987a13041c6b5b1ea59e21f Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Tue, 20 Feb 2018 08:44:21 +0100
Subject: [PATCH 0231/1678] devlink: Perform cleanup of resource_set cb

After adding size validation logic into core cleanup is required.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 83 +------------------
 include/net/devlink.h                         |  4 -
 2 files changed, 3 insertions(+), 84 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5e8ea712caa2..39196625ae8e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3798,70 +3798,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.resource_query_enable		= 1,
 };
 
-static bool
-mlxsw_sp_resource_kvd_granularity_validate(struct netlink_ext_ack *extack,
-					   u64 size)
-{
-	const struct mlxsw_config_profile *profile;
-
-	profile = &mlxsw_sp_config_profile;
-	if (size % profile->kvd_hash_granularity) {
-		NL_SET_ERR_MSG_MOD(extack, "resource set with wrong granularity");
-		return false;
-	}
-	return true;
-}
-
-static int
-mlxsw_sp_resource_kvd_size_validate(struct devlink *devlink, u64 size,
-				    struct netlink_ext_ack *extack)
-{
-	NL_SET_ERR_MSG_MOD(extack, "kvd size cannot be changed");
-	return -EINVAL;
-}
-
-static int
-mlxsw_sp_resource_kvd_linear_size_validate(struct devlink *devlink, u64 size,
-					   struct netlink_ext_ack *extack)
-{
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_single_size_validate(struct devlink *devlink, u64 size,
-						struct netlink_ext_ack *extack)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE)) {
-		NL_SET_ERR_MSG_MOD(extack, "hash single size is smaller than minimum");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static int
-mlxsw_sp_resource_kvd_hash_double_size_validate(struct devlink *devlink, u64 size,
-						struct netlink_ext_ack *extack)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-
-	if (!mlxsw_sp_resource_kvd_granularity_validate(extack, size))
-		return -EINVAL;
-
-	if (size < MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE)) {
-		NL_SET_ERR_MSG_MOD(extack, "hash double size is smaller than minimum");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
@@ -3870,23 +3806,10 @@ static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
 }
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_size_validate,
-};
-
 static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_linear_size_validate,
 	.occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
 };
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_single_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_hash_single_size_validate,
-};
-
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_hash_double_ops = {
-	.size_validate = mlxsw_sp_resource_kvd_hash_double_size_validate,
-};
-
 static struct devlink_resource_size_params mlxsw_sp_kvd_size_params;
 static struct devlink_resource_size_params mlxsw_sp_linear_size_params;
 static struct devlink_resource_size_params mlxsw_sp_hash_single_size_params;
@@ -3948,7 +3871,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 					MLXSW_SP_RESOURCE_KVD,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
 					&mlxsw_sp_kvd_size_params,
-					&mlxsw_sp_resource_kvd_ops);
+					NULL);
 	if (err)
 		return err;
 
@@ -3972,7 +3895,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 					MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
 					MLXSW_SP_RESOURCE_KVD,
 					&mlxsw_sp_hash_double_size_params,
-					&mlxsw_sp_resource_kvd_hash_double_ops);
+					NULL);
 	if (err)
 		return err;
 
@@ -3982,7 +3905,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 					MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 					MLXSW_SP_RESOURCE_KVD,
 					&mlxsw_sp_hash_single_size_params,
-					&mlxsw_sp_resource_kvd_hash_single_ops);
+					NULL);
 	if (err)
 		return err;
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6545b03e97f7..8d1c3f276dea 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -234,13 +234,9 @@ struct devlink_dpipe_headers {
 /**
  * struct devlink_resource_ops - resource ops
  * @occ_get: get the occupied size
- * @size_validate: validate the size of the resource before update, reload
- *                 is needed for changes to take place
  */
 struct devlink_resource_ops {
 	u64 (*occ_get)(struct devlink *devlink);
-	int (*size_validate)(struct devlink *devlink, u64 size,
-			     struct netlink_ext_ack *extack);
 };
 
 /**

From 51d3c08e33712e4867970fc5bafc73df3265ed04 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Tue, 20 Feb 2018 08:44:22 +0100
Subject: [PATCH 0232/1678] mlxsw: spectrum_kvdl: Add support for linear
 division resources

The linear part of the KVD memory is sub-divided into multiple parts. This
patch exposes this internal partitions via the resource interface.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    |  4 +
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  7 ++
 .../ethernet/mellanox/mlxsw/spectrum_kvdl.c   | 77 ++++++++++++++++++-
 3 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 39196625ae8e..bfde93910f82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3885,6 +3885,10 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	if (err)
 		return err;
 
+	err = mlxsw_sp_kvdl_resources_register(devlink);
+	if  (err)
+		return err;
+
 	double_size = kvd_size - linear_size;
 	double_size *= profile->kvd_hash_double_parts;
 	double_size /= profile->kvd_hash_double_parts +
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 6718a1f0482c..675e03a892ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -70,12 +70,18 @@
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR "linear"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE "hash_single"
 #define MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE "hash_double"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES "singles"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS "chunks"
+#define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
 enum mlxsw_sp_resource_id {
 	MLXSW_SP_RESOURCE_KVD,
 	MLXSW_SP_RESOURCE_KVD_LINEAR,
 	MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 	MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+	MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 };
 
 struct mlxsw_sp_port;
@@ -433,6 +439,7 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int entry_count,
 				   unsigned int *p_alloc_size);
 u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 55f9d2d70f9e..d4335055ff85 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -188,21 +188,27 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+enum mlxsw_sp_kvdl_part_id {
+	MLXSW_SP_KVDL_PART_SINGLE,
+	MLXSW_SP_KVDL_PART_CHUNKS,
+	MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
+};
+
 static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
 	{
-		.part_index	= 0,
+		.part_index	= MLXSW_SP_KVDL_PART_SINGLE,
 		.start_index	= MLXSW_SP_KVDL_SINGLE_BASE,
 		.end_index	= MLXSW_SP_KVDL_SINGLE_END,
 		.alloc_size	= 1,
 	},
 	{
-		.part_index	= 1,
+		.part_index	= MLXSW_SP_KVDL_PART_CHUNKS,
 		.start_index	= MLXSW_SP_KVDL_CHUNKS_BASE,
 		.end_index	= MLXSW_SP_KVDL_CHUNKS_END,
 		.alloc_size	= MLXSW_SP_CHUNK_MAX,
 	},
 	{
-		.part_index	= 2,
+		.part_index	= MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
 		.start_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
 		.end_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_END,
 		.alloc_size	= MLXSW_SP_LARGE_CHUNK_MAX,
@@ -312,6 +318,71 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 	return occ;
 }
 
+static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
+	.size_min = 0,
+	.size_granularity = 1,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
+	.size_min = 0,
+	.size_granularity = MLXSW_SP_CHUNK_MAX,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
+	.size_min = 0,
+	.size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
+	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
+};
+
+static void
+mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	u32 kvdl_max_size;
+
+	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+	mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
+	mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
+	mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
+}
+
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
+{
+	int err;
+
+	mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+					false, MLXSW_SP_KVDL_SINGLE_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_single_size_params,
+					NULL);
+	if (err)
+		return err;
+
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+					false, MLXSW_SP_KVDL_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_chunks_size_params,
+					NULL);
+	if (err)
+		return err;
+
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+					false, MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&mlxsw_sp_kvdl_large_chunks_size_params,
+					NULL);
+	return err;
+}
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_kvdl *kvdl;

From 887839e6960ddae0f34e2dd1b631d41e5672c738 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Tue, 20 Feb 2018 08:44:23 +0100
Subject: [PATCH 0233/1678] mlxsw: spectrum_kvdl: Add support for dynamic
 partition set

Add support for dynamic partition set via the resource interface.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_kvdl.c   | 58 +++++++++++++++++--
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index d4335055ff85..859966e5087b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -67,7 +67,7 @@ struct mlxsw_sp_kvdl_part_info {
 
 struct mlxsw_sp_kvdl_part {
 	struct list_head list;
-	const struct mlxsw_sp_kvdl_part_info *info;
+	struct mlxsw_sp_kvdl_part_info *info;
 	unsigned long usage[0];	/* Entries */
 };
 
@@ -228,27 +228,74 @@ mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
 	return NULL;
 }
 
+static void
+mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_kvdl_part *part, unsigned int size)
+{
+	struct mlxsw_sp_kvdl_part_info *info = part->info;
+
+	if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
+		info->end_index = size - 1;
+	} else  {
+		struct mlxsw_sp_kvdl_part *last_part;
+
+		last_part = list_next_entry(part, list);
+		info->start_index = last_part->info->end_index + 1;
+		info->end_index = info->start_index + size - 1;
+	}
+}
+
 static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int part_index)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	const struct mlxsw_sp_kvdl_part_info *info;
+	enum mlxsw_sp_resource_id resource_id;
 	struct mlxsw_sp_kvdl_part *part;
+	bool need_update = true;
 	unsigned int nr_entries;
 	size_t usage_size;
+	u64 resource_size;
+	int err;
 
 	info = &kvdl_parts_info[part_index];
 
-	nr_entries = (info->end_index - info->start_index + 1) /
-		     info->alloc_size;
+	switch (part_index) {
+	case MLXSW_SP_KVDL_PART_SINGLE:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
+		break;
+	case MLXSW_SP_KVDL_PART_CHUNKS:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
+		break;
+	case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
+		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
+		break;
+	}
+
+	err = devlink_resource_size_get(devlink, resource_id, &resource_size);
+	if (err) {
+		need_update = false;
+		resource_size = info->end_index - info->start_index + 1;
+	}
+
+	nr_entries = resource_size / info->alloc_size;
 	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
 	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
 	if (!part)
 		return -ENOMEM;
 
-	part->info = info;
-	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+	part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
+	if (!part->info)
+		goto err_part_info_alloc;
 
+	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
+	if (need_update)
+		mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
 	return 0;
+
+err_part_info_alloc:
+	kfree(part);
+	return -ENOMEM;
 }
 
 static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
@@ -261,6 +308,7 @@ static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
 		return;
 
 	list_del(&part->list);
+	kfree(part->info);
 	kfree(part);
 }
 

From 7f47b19bd744c7945ad554fc00665fdbd13794bc Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Tue, 20 Feb 2018 08:44:24 +0100
Subject: [PATCH 0234/1678] mlxsw: spectrum_kvdl: Add support for per part
 occupancy

Add support for calculating occupancy for separate kvdl parts.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_kvdl.c   | 58 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 859966e5087b..d27fa57ad3c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -366,6 +366,58 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 	return occ;
 }
 
+u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	struct mlxsw_sp_kvdl_part *part;
+
+	part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
+				       MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
+	if (!part)
+		return -EINVAL;
+
+	return mlxsw_sp_kvdl_part_occ(part);
+}
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
+	.occ_get = mlxsw_sp_kvdl_single_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
+	.occ_get = mlxsw_sp_kvdl_chunks_occ_get,
+};
+
+static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
+	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
+};
+
 static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
 	.size_min = 0,
 	.size_granularity = 1,
@@ -409,7 +461,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					&mlxsw_sp_kvdl_single_size_params,
-					NULL);
+					&mlxsw_sp_kvdl_single_ops);
 	if (err)
 		return err;
 
@@ -418,7 +470,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					&mlxsw_sp_kvdl_chunks_size_params,
-					NULL);
+					&mlxsw_sp_kvdl_chunks_ops);
 	if (err)
 		return err;
 
@@ -427,7 +479,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					&mlxsw_sp_kvdl_large_chunks_size_params,
-					NULL);
+					&mlxsw_sp_kvdl_chunks_large_ops);
 	return err;
 }
 

From 3fef2b6290e85de93a7096381e77f9a1b7544278 Mon Sep 17 00:00:00 2001
From: Antonio Cardace <anto.cardace@gmail.com>
Date: Mon, 19 Feb 2018 11:37:15 +0000
Subject: [PATCH 0235/1678] x25: use %*ph to print small buffer

Use %*ph format to print small buffer as hex string.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Antonio Cardace <anto.cardace@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_subr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index db0b1315d577..9c214ec681ac 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -335,8 +335,7 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q,
 		}
 	}
 
-	pr_debug("invalid PLP frame %02X %02X %02X\n",
-	       frame[0], frame[1], frame[2]);
+	pr_debug("invalid PLP frame %3ph\n", frame);
 
 	return X25_ILLEGAL;
 }

From 8d212a9ea65af7082adc577ec46b6d1372d0a8f3 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:09 +0100
Subject: [PATCH 0236/1678] net: stmmac: set MSS for each tx DMA channel

The DMA engine in dwmac4 can segment a large TSO packet to several
smaller packets of (max) size Maximum Segment Size (MSS).

The DMA engine fetches and saves the MSS via a context descriptor.

This context decriptor has to be provided to each tx DMA channel.
To ensure that this is done, move struct member mss from stmmac_priv
to stmmac_tx_queue.

stmmac_reset_queues_param() now also resets mss, together with other
queue parameters, so reset of mss value can be removed from
stmmac_resume().

init_dma_tx_desc_rings() now also resets mss, together with other
queue parameters, so reset of mss value can be removed from
stmmac_open().

This fixes tx queue timeouts for dwmac4, with DT property
snps,tx-queues-to-use > 1, when running iperf3 with multiple threads.

Fixes: ce736788e8a9 ("net: stmmac: adding multiple buffers for TX")
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 +++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index a916e13624eb..75161e1b7e55 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -58,6 +58,7 @@ struct stmmac_tx_queue {
 	unsigned int dirty_tx;
 	dma_addr_t dma_tx_phy;
 	u32 tx_tail_addr;
+	u32 mss;
 };
 
 struct stmmac_rx_queue {
@@ -138,7 +139,6 @@ struct stmmac_priv {
 	spinlock_t ptp_lock;
 	void __iomem *mmcaddr;
 	void __iomem *ptpaddr;
-	u32 mss;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbgfs_dir;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7ad841434ec8..d38bf38f12f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1355,6 +1355,7 @@ static int init_dma_tx_desc_rings(struct net_device *dev)
 
 		tx_q->dirty_tx = 0;
 		tx_q->cur_tx = 0;
+		tx_q->mss = 0;
 
 		netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
@@ -1946,6 +1947,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
 						     (i == DMA_TX_SIZE - 1));
 	tx_q->dirty_tx = 0;
 	tx_q->cur_tx = 0;
+	tx_q->mss = 0;
 	netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, chan));
 	stmmac_start_tx_dma(priv, chan);
 
@@ -2632,7 +2634,6 @@ static int stmmac_open(struct net_device *dev)
 
 	priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
 	priv->rx_copybreak = STMMAC_RX_COPYBREAK;
-	priv->mss = 0;
 
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
@@ -2872,10 +2873,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	mss = skb_shinfo(skb)->gso_size;
 
 	/* set new MSS value if needed */
-	if (mss != priv->mss) {
+	if (mss != tx_q->mss) {
 		mss_desc = tx_q->dma_tx + tx_q->cur_tx;
 		priv->hw->desc->set_mss(mss_desc, mss);
-		priv->mss = mss;
+		tx_q->mss = mss;
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
 	}
 
@@ -4436,6 +4437,7 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
 
 		tx_q->cur_tx = 0;
 		tx_q->dirty_tx = 0;
+		tx_q->mss = 0;
 	}
 }
 
@@ -4481,11 +4483,6 @@ int stmmac_resume(struct device *dev)
 
 	stmmac_reset_queues_param(priv);
 
-	/* reset private mss value to force mss context settings at
-	 * next tso xmit (only used for gmac4).
-	 */
-	priv->mss = 0;
-
 	stmmac_clear_descriptors(priv);
 
 	stmmac_hw_setup(ndev, false);

From f66b533d2991ed4613b48a0516811413a37a3020 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:10 +0100
Subject: [PATCH 0237/1678] net: stmmac: do not clear tx_skbuff entries in
 stmmac_xmit()/stmmac_tso_xmit()

tx_skbuff is initialized to NULL in init_dma_tx_desc_rings(), which is
called from ndo_open().

stmmac_tx_clean() frees any non-NULL skb, and sets the tx_skbuff
entry to NULL. Hence, there is no need to set skbuff entries to NULL
in stmmac_xmit()/stmmac_tso_xmit(), and doing so falsely gives the
reader the impression that it is needed.
Do not clear tx_skbuff entries in stmmac_xmit()/stmmac_tso_xmit().

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d38bf38f12f5..24afe7733cde 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2927,7 +2927,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
-		tx_q->tx_skbuff[tx_q->cur_tx] = NULL;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
 	}
 
@@ -3102,8 +3101,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (dma_mapping_error(priv->device, des))
 			goto dma_map_err; /* should reuse desc w/o issues */
 
-		tx_q->tx_skbuff[entry] = NULL;
-
 		tx_q->tx_skbuff_dma[entry].buf = des;
 		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
 			desc->des0 = cpu_to_le32(des);

From b4c9784cbff43a2b50f6db6e2400c8d003c21546 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:11 +0100
Subject: [PATCH 0238/1678] net: stmmac: WARN if tx_skbuff entries are reused
 before cleared

The current code assumes that a tx_skbuff entry has been cleared
by stmmac_tx_clean() before stmmac_xmit()/stmmac_tso_xmit()
assigns a new skb to that entry. However, since we never check
the current value before overwriting it, it is theoretically
possible that a non-NULL value is overwritten.

Add WARN_ONs to verify that each entry in tx_skbuff is NULL
before it is assigned a new value.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 24afe7733cde..9881df126227 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2794,6 +2794,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 
 	while (tmp_len > 0) {
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 		desc = tx_q->dma_tx + tx_q->cur_tx;
 
 		desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
@@ -2878,6 +2879,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->hw->desc->set_mss(mss_desc, mss);
 		tx_q->mss = mss;
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
 	}
 
 	if (netif_msg_tx_queued(priv)) {
@@ -2888,6 +2890,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	first_entry = tx_q->cur_tx;
+	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	desc = tx_q->dma_tx + first_entry;
 	first = desc;
@@ -3062,6 +3065,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	entry = tx_q->cur_tx;
 	first_entry = entry;
+	WARN_ON(tx_q->tx_skbuff[first_entry]);
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
@@ -3090,6 +3094,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		bool last_segment = (i == (nfrags - 1));
 
 		entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE);
+		WARN_ON(tx_q->tx_skbuff[entry]);
 
 		if (likely(priv->extend_desc))
 			desc = (struct dma_desc *)(tx_q->dma_etx + entry);

From e5a019921af1236927d0fa3c38bbc1d74a579ed5 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:12 +0100
Subject: [PATCH 0239/1678] net: stmmac: rename dwmac4_tx_queue_routing() to
 match reality

Looking at dwmac4_tx_queue_routing(), it is obvious that it
sets up rx queue routing.

Rename dwmac4_tx_queue_routing() to dwmac4_rx_queue_routing()
to better match reality.

Fixes: abe80fdc6ee6 ("net: stmmac: RX queue routing configuration")
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 63795ecafc8d..46b9ae20ff6c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -120,7 +120,7 @@ static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
 	writel(value, ioaddr + base_register);
 }
 
-static void dwmac4_tx_queue_routing(struct mac_device_info *hw,
+static void dwmac4_rx_queue_routing(struct mac_device_info *hw,
 				    u8 packet, u32 queue)
 {
 	void __iomem *ioaddr = hw->pcsr;
@@ -713,7 +713,7 @@ static const struct stmmac_ops dwmac4_ops = {
 	.rx_queue_enable = dwmac4_rx_queue_enable,
 	.rx_queue_prio = dwmac4_rx_queue_priority,
 	.tx_queue_prio = dwmac4_tx_queue_priority,
-	.rx_queue_routing = dwmac4_tx_queue_routing,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
 	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
 	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
@@ -744,7 +744,7 @@ static const struct stmmac_ops dwmac410_ops = {
 	.rx_queue_enable = dwmac4_rx_queue_enable,
 	.rx_queue_prio = dwmac4_rx_queue_priority,
 	.tx_queue_prio = dwmac4_tx_queue_priority,
-	.rx_queue_routing = dwmac4_tx_queue_routing,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
 	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
 	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
 	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,

From 13138de01400762f706c5e956e70660770d61962 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:13 +0100
Subject: [PATCH 0240/1678] net: stmmac: call correct function in
 stmmac_mac_config_rx_queues_routing()

stmmac_mac_config_rx_queues_routing() incorrectly calls rx_queue_prio()
instead of rx_queue_routing().

This looks like a copy paste issue, since
stmmac_mac_config_rx_queues_prio() already calls rx_queue_prio(),
and both stmmac_mac_config_rx_queues_routing() and
stmmac_mac_config_rx_queues_prio() are very similar in structure.

Fixes: abe80fdc6ee6 ("net: stmmac: RX queue routing configuration")
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9881df126227..c8d86d77e03d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2432,7 +2432,7 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
 			continue;
 
 		packet = priv->plat->rx_queues_cfg[queue].pkt_route;
-		priv->hw->mac->rx_queue_prio(priv->hw, packet, queue);
+		priv->hw->mac->rx_queue_routing(priv->hw, packet, queue);
 	}
 }
 

From 2ee2132ffb83e38338a85fb6b4eea63d7bc9660c Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:14 +0100
Subject: [PATCH 0241/1678] net: stmmac: add error handling in
 stmmac_mtl_setup()

The device tree binding for stmmac says:

- Multiple TX Queues parameters: below the list of all the parameters to
                                 configure the multiple TX queues:
        - snps,tx-queues-to-use: number of TX queues to be used in the driver
	[...]
        - For each TX queue
		[...]

However, if one specifies snps,tx-queues-to-use = 2,
but omits the queue subnodes, or defines just one queue subnode,
since the driver appears to initialize queues with sane default
values, we will get tx queue timeouts.

This is because the initialization code only initializes
as many queues as it finds subnodes. Potentially leaving
some queues uninitialized.

To avoid hard to debug issues, return an error if the number
of subnodes differ from snps,tx-queues-to-use/snps,rx-queues-to-use.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/stmicro/stmmac/stmmac_platform.c | 29 +++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 05f122b8424a..bcfac84cf6fb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -135,13 +135,14 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
  * stmmac_mtl_setup - parse DT parameters for multiple queues configuration
  * @pdev: platform device
  */
-static void stmmac_mtl_setup(struct platform_device *pdev,
-			     struct plat_stmmacenet_data *plat)
+static int stmmac_mtl_setup(struct platform_device *pdev,
+			    struct plat_stmmacenet_data *plat)
 {
 	struct device_node *q_node;
 	struct device_node *rx_node;
 	struct device_node *tx_node;
 	u8 queue = 0;
+	int ret = 0;
 
 	/* For backwards-compatibility with device trees that don't have any
 	 * snps,mtl-rx-config or snps,mtl-tx-config properties, we fall back
@@ -159,12 +160,12 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 	rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
 	if (!rx_node)
-		return;
+		return ret;
 
 	tx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-tx-config", 0);
 	if (!tx_node) {
 		of_node_put(rx_node);
-		return;
+		return ret;
 	}
 
 	/* Processing RX queues common config */
@@ -220,6 +221,11 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 		queue++;
 	}
+	if (queue != plat->rx_queues_to_use) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Not all RX queues were configured\n");
+		goto out;
+	}
 
 	/* Processing TX queues common config */
 	if (of_property_read_u32(tx_node, "snps,tx-queues-to-use",
@@ -281,10 +287,18 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 
 		queue++;
 	}
+	if (queue != plat->tx_queues_to_use) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "Not all TX queues were configured\n");
+		goto out;
+	}
 
+out:
 	of_node_put(rx_node);
 	of_node_put(tx_node);
 	of_node_put(q_node);
+
+	return ret;
 }
 
 /**
@@ -376,6 +390,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_dma_cfg *dma_cfg;
+	int rc;
 
 	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
 	if (!plat)
@@ -499,7 +514,11 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
 	plat->axi = stmmac_axi_setup(pdev);
 
-	stmmac_mtl_setup(pdev, plat);
+	rc = stmmac_mtl_setup(pdev, plat);
+	if (rc) {
+		stmmac_remove_config_dt(pdev, plat);
+		return ERR_PTR(rc);
+	}
 
 	/* clock setup */
 	plat->stmmac_clk = devm_clk_get(&pdev->dev,

From ce339abc9a40af75cd1e70b3160f11bda78f3284 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 19 Feb 2018 18:11:15 +0100
Subject: [PATCH 0242/1678] net: stmmac: honor error code from stmmac_dt_phy()

Honor error code from stmmac_dt_phy() instead of always
returning -ENODEV.

No functional change intended.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index bcfac84cf6fb..ebd3e5ffa73c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -417,8 +417,9 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
 
 	/* To Configure PHY by using all device-tree supported properties */
-	if (stmmac_dt_phy(plat, np, &pdev->dev))
-		return ERR_PTR(-ENODEV);
+	rc = stmmac_dt_phy(plat, np, &pdev->dev);
+	if (rc)
+		return ERR_PTR(rc);
 
 	of_property_read_u32(np, "tx-fifo-depth", &plat->tx_fifo_size);
 

From 022ddbca86ce692518bc1809e2dfe27add669608 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Feb 2018 07:23:03 +0100
Subject: [PATCH 0243/1678] r8169: remove some WOL-related dead code

Commit bde135a672bf "r8169: only enable PCI wakeups when WOL is active"
removed the only user of flag RTL_FEATURE_WOL. So let's remove some
now dead code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 39 ++--------------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0bf7d1759250..2f3e3ae08664 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -736,9 +736,8 @@ struct ring_info {
 };
 
 enum features {
-	RTL_FEATURE_WOL		= (1 << 0),
-	RTL_FEATURE_MSI		= (1 << 1),
-	RTL_FEATURE_GMII	= (1 << 2),
+	RTL_FEATURE_MSI		= (1 << 0),
+	RTL_FEATURE_GMII	= (1 << 1),
 };
 
 struct rtl8169_counters {
@@ -1859,10 +1858,6 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 	rtl_lock_work(tp);
 
-	if (wol->wolopts)
-		tp->features |= RTL_FEATURE_WOL;
-	else
-		tp->features &= ~RTL_FEATURE_WOL;
 	if (pm_runtime_active(d))
 		__rtl8169_set_wol(tp, wol->wolopts);
 	else
@@ -8521,36 +8516,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	RTL_W8(Cfg9346, Cfg9346_Unlock);
 	RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
 	RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_34:
-	case RTL_GIGA_MAC_VER_35:
-	case RTL_GIGA_MAC_VER_36:
-	case RTL_GIGA_MAC_VER_37:
-	case RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40:
-	case RTL_GIGA_MAC_VER_41:
-	case RTL_GIGA_MAC_VER_42:
-	case RTL_GIGA_MAC_VER_43:
-	case RTL_GIGA_MAC_VER_44:
-	case RTL_GIGA_MAC_VER_45:
-	case RTL_GIGA_MAC_VER_46:
-	case RTL_GIGA_MAC_VER_47:
-	case RTL_GIGA_MAC_VER_48:
-	case RTL_GIGA_MAC_VER_49:
-	case RTL_GIGA_MAC_VER_50:
-	case RTL_GIGA_MAC_VER_51:
-		if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2)
-			tp->features |= RTL_FEATURE_WOL;
-		if ((RTL_R8(Config3) & LinkUp) != 0)
-			tp->features |= RTL_FEATURE_WOL;
-		break;
-	default:
-		if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
-			tp->features |= RTL_FEATURE_WOL;
-		break;
-	}
-	if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
-		tp->features |= RTL_FEATURE_WOL;
 	tp->features |= rtl_try_msi(tp, cfg);
 	RTL_W8(Cfg9346, Cfg9346_Lock);
 

From 1f74edf9a5b12447b6be96130b334e8bb98d7e69 Mon Sep 17 00:00:00 2001
From: Xue Liu <liuxuenetmail@gmail.com>
Date: Tue, 20 Feb 2018 12:55:56 +0100
Subject: [PATCH 0244/1678] ieee802154: Add device tree documentation for
 MCR20A

Signed-off-by: Xue Liu <liuxuenetmail@gmail.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
---
 .../bindings/net/ieee802154/mcr20a.txt        | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt

diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
new file mode 100644
index 000000000000..2aaef567c5be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
@@ -0,0 +1,23 @@
+* MCR20A IEEE 802.15.4 *
+
+Required properties:
+  - compatible:		should be "nxp,mcr20a"
+  - spi-max-frequency:	maximal bus speed, should be set to a frequency
+			lower than 9000000 depends sync or async operation mode
+  - reg:		the chipselect index
+  - interrupts:		the interrupt generated by the device. Non high-level
+			can occur deadlocks while handling isr.
+
+Optional properties:
+  - rst_b-gpio:		GPIO spec for the RST_B pin
+
+Example:
+
+	mcr20a@0 {
+		compatible = "nxp,mcr20a";
+		spi-max-frequency = <9000000>;
+		reg = <0>;
+		interrupts = <17 2>;
+		interrupt-parent = <&gpio>;
+		rst_b-gpio = <&gpio 27 1>
+	};

From 962d0ca24855f2a3ff6fd093ff20bedbd5284823 Mon Sep 17 00:00:00 2001
From: Xue Liu <liuxuenetmail@gmail.com>
Date: Tue, 20 Feb 2018 12:55:57 +0100
Subject: [PATCH 0245/1678] ieee802154: Add entry in MAINTAINTERS for MCR20a
 driver

Signed-off-by: Xue Liu <liuxuenetmail@gmail.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3bdc260e36b7..38d58040a603 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8593,6 +8593,15 @@ S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531
 F:	drivers/iio/potentiometer/mcp4531.c
 
+MCR20A IEEE-802.15.4 RADIO DRIVER
+M:	Xue Liu <liuxuenetmail@gmail.com>
+L:	linux-wpan@vger.kernel.org
+W:	https://github.com/xueliu/mcr20a-linux
+S:	Maintained
+F:	drivers/net/ieee802154/mcr20a.c
+F:	drivers/net/ieee802154/mcr20a.h
+F:	Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:	William Breathitt Gray <vilhelm.gray@gmail.com>
 L:	linux-iio@vger.kernel.org

From 8c6ad9cc51574c8f20b5777e97b42457802ac36d Mon Sep 17 00:00:00 2001
From: Xue Liu <liuxuenetmail@gmail.com>
Date: Tue, 20 Feb 2018 12:55:58 +0100
Subject: [PATCH 0246/1678] ieee802154: Add NXP MCR20A IEEE 802.15.4
 transceiver driver

The MCR20AVHM transceiver (or MCR20A) is a low power,
high-performance 2.4 GHz, IEEE 802.15.4 compliant transceiver.

This driver implements a subset of ieee802154_ops.
It has no support for CSMA due to lack of hardware support.
It has currently no support for its proprietary Dual-PAN feature.

https://www.nxp.com/docs/en/reference-manual/MCR20RM.pdf

Signed-off-by: Xue Liu <liuxuenetmail@gmail.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
---
 drivers/net/ieee802154/Kconfig  |   11 +
 drivers/net/ieee802154/Makefile |    1 +
 drivers/net/ieee802154/mcr20a.c | 1413 +++++++++++++++++++++++++++++++
 drivers/net/ieee802154/mcr20a.h |  498 +++++++++++
 4 files changed, 1923 insertions(+)
 create mode 100644 drivers/net/ieee802154/mcr20a.c
 create mode 100644 drivers/net/ieee802154/mcr20a.h

diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig
index 303ba4133920..8782f5655e3f 100644
--- a/drivers/net/ieee802154/Kconfig
+++ b/drivers/net/ieee802154/Kconfig
@@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS
 	  exposes a debugfs node for each CA8210 instance which allows
 	  direct use of the Cascoda API, exposing the 802.15.4 MAC
 	  management entities.
+
+config IEEE802154_MCR20A
+       tristate "MCR20A transceiver driver"
+       depends on IEEE802154_DRIVERS && MAC802154
+       depends on SPI
+	---help---
+	  Say Y here to enable the MCR20A SPI 802.15.4 wireless
+	  controller.
+
+	  This driver can also be built as a module. To do so, say M here.
+	  the module will be called 'mcr20a'.
diff --git a/drivers/net/ieee802154/Makefile b/drivers/net/ieee802154/Makefile
index bea1de5e726c..104744d5a668 100644
--- a/drivers/net/ieee802154/Makefile
+++ b/drivers/net/ieee802154/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o
 obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o
 obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o
 obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o
+obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
new file mode 100644
index 000000000000..d9eb22a52551
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -0,0 +1,1413 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/ieee802154.h>
+#include <linux/debugfs.h>
+
+#include <net/mac802154.h>
+#include <net/cfg802154.h>
+
+#include <linux/device.h>
+
+#include "mcr20a.h"
+
+#define	SPI_COMMAND_BUFFER		3
+
+#define REGISTER_READ			BIT(7)
+#define REGISTER_WRITE			(0 << 7)
+#define REGISTER_ACCESS			(0 << 6)
+#define PACKET_BUFF_BURST_ACCESS	BIT(6)
+#define PACKET_BUFF_BYTE_ACCESS		BIT(5)
+
+#define MCR20A_WRITE_REG(x)		(x)
+#define MCR20A_READ_REG(x)		(REGISTER_READ | (x))
+#define MCR20A_BURST_READ_PACKET_BUF	(0xC0)
+#define MCR20A_BURST_WRITE_PACKET_BUF	(0x40)
+
+#define MCR20A_CMD_REG		0x80
+#define MCR20A_CMD_REG_MASK	0x3f
+#define MCR20A_CMD_WRITE	0x40
+#define MCR20A_CMD_FB		0x20
+
+/* Number of Interrupt Request Status Register */
+#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */
+
+/* MCR20A CCA Type */
+enum {
+	MCR20A_CCA_ED,	  // energy detect - CCA bit not active,
+			  // not to be used for T and CCCA sequences
+	MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE
+	MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE
+	MCR20A_CCA_MODE3
+};
+
+enum {
+	MCR20A_XCVSEQ_IDLE	= 0x00,
+	MCR20A_XCVSEQ_RX	= 0x01,
+	MCR20A_XCVSEQ_TX	= 0x02,
+	MCR20A_XCVSEQ_CCA	= 0x03,
+	MCR20A_XCVSEQ_TR	= 0x04,
+	MCR20A_XCVSEQ_CCCA	= 0x05,
+};
+
+/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */
+#define	MCR20A_MIN_CHANNEL	(11)
+#define	MCR20A_MAX_CHANNEL	(26)
+#define	MCR20A_CHANNEL_SPACING	(5)
+
+/* MCR20A CCA Threshold constans */
+#define MCR20A_MIN_CCA_THRESHOLD (0x6EU)
+#define MCR20A_MAX_CCA_THRESHOLD (0x00U)
+
+/* version 0C */
+#define MCR20A_OVERWRITE_VERSION (0x0C)
+
+/* MCR20A PLL configurations */
+static const u8  PLL_INT[16] = {
+	/* 2405 */ 0x0B,	/* 2410 */ 0x0B,	/* 2415 */ 0x0B,
+	/* 2420 */ 0x0B,	/* 2425 */ 0x0B,	/* 2430 */ 0x0B,
+	/* 2435 */ 0x0C,	/* 2440 */ 0x0C,	/* 2445 */ 0x0C,
+	/* 2450 */ 0x0C,	/* 2455 */ 0x0C,	/* 2460 */ 0x0C,
+	/* 2465 */ 0x0D,	/* 2470 */ 0x0D,	/* 2475 */ 0x0D,
+	/* 2480 */ 0x0D
+};
+
+static const u8 PLL_FRAC[16] = {
+	/* 2405 */ 0x28,	/* 2410 */ 0x50,	/* 2415 */ 0x78,
+	/* 2420 */ 0xA0,	/* 2425 */ 0xC8,	/* 2430 */ 0xF0,
+	/* 2435 */ 0x18,	/* 2440 */ 0x40,	/* 2445 */ 0x68,
+	/* 2450 */ 0x90,	/* 2455 */ 0xB8,	/* 2460 */ 0xE0,
+	/* 2465 */ 0x08,	/* 2470 */ 0x30,	/* 2475 */ 0x58,
+	/* 2480 */ 0x80
+};
+
+static const struct reg_sequence mar20a_iar_overwrites[] = {
+	{ IAR_MISC_PAD_CTRL,	0x02 },
+	{ IAR_VCO_CTRL1,	0xB3 },
+	{ IAR_VCO_CTRL2,	0x07 },
+	{ IAR_PA_TUNING,	0x71 },
+	{ IAR_CHF_IBUF,		0x2F },
+	{ IAR_CHF_QBUF,		0x2F },
+	{ IAR_CHF_IRIN,		0x24 },
+	{ IAR_CHF_QRIN,		0x24 },
+	{ IAR_CHF_IL,		0x24 },
+	{ IAR_CHF_QL,		0x24 },
+	{ IAR_CHF_CC1,		0x32 },
+	{ IAR_CHF_CCL,		0x1D },
+	{ IAR_CHF_CC2,		0x2D },
+	{ IAR_CHF_IROUT,	0x24 },
+	{ IAR_CHF_QROUT,	0x24 },
+	{ IAR_PA_CAL,		0x28 },
+	{ IAR_AGC_THR1,		0x55 },
+	{ IAR_AGC_THR2,		0x2D },
+	{ IAR_ATT_RSSI1,	0x5F },
+	{ IAR_ATT_RSSI2,	0x8F },
+	{ IAR_RSSI_OFFSET,	0x61 },
+	{ IAR_CHF_PMA_GAIN,	0x03 },
+	{ IAR_CCA1_THRESH,	0x50 },
+	{ IAR_CORR_NVAL,	0x13 },
+	{ IAR_ACKDELAY,		0x3D },
+};
+
+#define MCR20A_VALID_CHANNELS (0x07FFF800)
+
+struct mcr20a_platform_data {
+	int rst_gpio;
+};
+
+#define MCR20A_MAX_BUF		(127)
+
+#define printdev(X) (&X->spi->dev)
+
+/* regmap information for Direct Access Register (DAR) access */
+#define MCR20A_DAR_WRITE	0x01
+#define MCR20A_DAR_READ		0x00
+#define MCR20A_DAR_NUMREGS	0x3F
+
+/* regmap information for Indirect Access Register (IAR) access */
+#define MCR20A_IAR_ACCESS	0x80
+#define MCR20A_IAR_NUMREGS	0xBEFF
+
+/* Read/Write SPI Commands for DAR and IAR registers. */
+#define MCR20A_READSHORT(reg)	((reg) << 1)
+#define MCR20A_WRITESHORT(reg)	((reg) << 1 | 1)
+#define MCR20A_READLONG(reg)	(1 << 15 | (reg) << 5)
+#define MCR20A_WRITELONG(reg)	(1 << 15 | (reg) << 5 | 1 << 4)
+
+/* Type definitions for link configuration of instantiable layers  */
+#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12)
+
+static bool
+mcr20a_dar_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+	case DAR_PHY_CTRL1:
+	case DAR_PHY_CTRL2:
+	case DAR_PHY_CTRL3:
+	case DAR_PHY_CTRL4:
+	case DAR_SRC_CTRL:
+	case DAR_SRC_ADDRS_SUM_LSB:
+	case DAR_SRC_ADDRS_SUM_MSB:
+	case DAR_T3CMP_LSB:
+	case DAR_T3CMP_MSB:
+	case DAR_T3CMP_USB:
+	case DAR_T2PRIMECMP_LSB:
+	case DAR_T2PRIMECMP_MSB:
+	case DAR_T1CMP_LSB:
+	case DAR_T1CMP_MSB:
+	case DAR_T1CMP_USB:
+	case DAR_T2CMP_LSB:
+	case DAR_T2CMP_MSB:
+	case DAR_T2CMP_USB:
+	case DAR_T4CMP_LSB:
+	case DAR_T4CMP_MSB:
+	case DAR_T4CMP_USB:
+	case DAR_PLL_INT0:
+	case DAR_PLL_FRAC0_LSB:
+	case DAR_PLL_FRAC0_MSB:
+	case DAR_PA_PWR:
+	/* no DAR_ACM */
+	case DAR_OVERWRITE_VER:
+	case DAR_CLK_OUT_CTRL:
+	case DAR_PWR_MODES:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_readable(struct device *dev, unsigned int reg)
+{
+	bool rc;
+
+	/* all writeable are also readable */
+	rc = mcr20a_dar_writeable(dev, reg);
+	if (rc)
+		return rc;
+
+	/* readonly regs */
+	switch (reg) {
+	case DAR_RX_FRM_LEN:
+	case DAR_CCA1_ED_FNL:
+	case DAR_EVENT_TMR_LSB:
+	case DAR_EVENT_TMR_MSB:
+	case DAR_EVENT_TMR_USB:
+	case DAR_TIMESTAMP_LSB:
+	case DAR_TIMESTAMP_MSB:
+	case DAR_TIMESTAMP_USB:
+	case DAR_SEQ_STATE:
+	case DAR_LQI_VALUE:
+	case DAR_RSSI_CCA_CONT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_volatile(struct device *dev, unsigned int reg)
+{
+	/* can be changed during runtime */
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+	/* use them in spi_async and regmap so it's volatile */
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_dar_precious(struct device *dev, unsigned int reg)
+{
+	/* don't clear irq line on read */
+	switch (reg) {
+	case DAR_IRQ_STS1:
+	case DAR_IRQ_STS2:
+	case DAR_IRQ_STS3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config mcr20a_dar_regmap = {
+	.name			= "mcr20a_dar",
+	.reg_bits		= 8,
+	.val_bits		= 8,
+	.write_flag_mask	= REGISTER_ACCESS | REGISTER_WRITE,
+	.read_flag_mask		= REGISTER_ACCESS | REGISTER_READ,
+	.cache_type		= REGCACHE_RBTREE,
+	.writeable_reg		= mcr20a_dar_writeable,
+	.readable_reg		= mcr20a_dar_readable,
+	.volatile_reg		= mcr20a_dar_volatile,
+	.precious_reg		= mcr20a_dar_precious,
+	.fast_io		= true,
+	.can_multi_write	= true,
+};
+
+static bool
+mcr20a_iar_writeable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case IAR_XTAL_TRIM:
+	case IAR_PMC_LP_TRIM:
+	case IAR_MACPANID0_LSB:
+	case IAR_MACPANID0_MSB:
+	case IAR_MACSHORTADDRS0_LSB:
+	case IAR_MACSHORTADDRS0_MSB:
+	case IAR_MACLONGADDRS0_0:
+	case IAR_MACLONGADDRS0_8:
+	case IAR_MACLONGADDRS0_16:
+	case IAR_MACLONGADDRS0_24:
+	case IAR_MACLONGADDRS0_32:
+	case IAR_MACLONGADDRS0_40:
+	case IAR_MACLONGADDRS0_48:
+	case IAR_MACLONGADDRS0_56:
+	case IAR_RX_FRAME_FILTER:
+	case IAR_PLL_INT1:
+	case IAR_PLL_FRAC1_LSB:
+	case IAR_PLL_FRAC1_MSB:
+	case IAR_MACPANID1_LSB:
+	case IAR_MACPANID1_MSB:
+	case IAR_MACSHORTADDRS1_LSB:
+	case IAR_MACSHORTADDRS1_MSB:
+	case IAR_MACLONGADDRS1_0:
+	case IAR_MACLONGADDRS1_8:
+	case IAR_MACLONGADDRS1_16:
+	case IAR_MACLONGADDRS1_24:
+	case IAR_MACLONGADDRS1_32:
+	case IAR_MACLONGADDRS1_40:
+	case IAR_MACLONGADDRS1_48:
+	case IAR_MACLONGADDRS1_56:
+	case IAR_DUAL_PAN_CTRL:
+	case IAR_DUAL_PAN_DWELL:
+	case IAR_CCA1_THRESH:
+	case IAR_CCA1_ED_OFFSET_COMP:
+	case IAR_LQI_OFFSET_COMP:
+	case IAR_CCA_CTRL:
+	case IAR_CCA2_CORR_PEAKS:
+	case IAR_CCA2_CORR_THRESH:
+	case IAR_TMR_PRESCALE:
+	case IAR_ANT_PAD_CTRL:
+	case IAR_MISC_PAD_CTRL:
+	case IAR_BSM_CTRL:
+	case IAR_RNG:
+	case IAR_RX_WTR_MARK:
+	case IAR_SOFT_RESET:
+	case IAR_TXDELAY:
+	case IAR_ACKDELAY:
+	case IAR_CORR_NVAL:
+	case IAR_ANT_AGC_CTRL:
+	case IAR_AGC_THR1:
+	case IAR_AGC_THR2:
+	case IAR_PA_CAL:
+	case IAR_ATT_RSSI1:
+	case IAR_ATT_RSSI2:
+	case IAR_RSSI_OFFSET:
+	case IAR_XTAL_CTRL:
+	case IAR_CHF_PMA_GAIN:
+	case IAR_CHF_IBUF:
+	case IAR_CHF_QBUF:
+	case IAR_CHF_IRIN:
+	case IAR_CHF_QRIN:
+	case IAR_CHF_IL:
+	case IAR_CHF_QL:
+	case IAR_CHF_CC1:
+	case IAR_CHF_CCL:
+	case IAR_CHF_CC2:
+	case IAR_CHF_IROUT:
+	case IAR_CHF_QROUT:
+	case IAR_PA_TUNING:
+	case IAR_VCO_CTRL1:
+	case IAR_VCO_CTRL2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_iar_readable(struct device *dev, unsigned int reg)
+{
+	bool rc;
+
+	/* all writeable are also readable */
+	rc = mcr20a_iar_writeable(dev, reg);
+	if (rc)
+		return rc;
+
+	/* readonly regs */
+	switch (reg) {
+	case IAR_PART_ID:
+	case IAR_DUAL_PAN_STS:
+	case IAR_RX_BYTE_COUNT:
+	case IAR_FILTERFAIL_CODE1:
+	case IAR_FILTERFAIL_CODE2:
+	case IAR_RSSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool
+mcr20a_iar_volatile(struct device *dev, unsigned int reg)
+{
+/* can be changed during runtime */
+	switch (reg) {
+	case IAR_DUAL_PAN_STS:
+	case IAR_RX_BYTE_COUNT:
+	case IAR_FILTERFAIL_CODE1:
+	case IAR_FILTERFAIL_CODE2:
+	case IAR_RSSI:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config mcr20a_iar_regmap = {
+	.name			= "mcr20a_iar",
+	.reg_bits		= 16,
+	.val_bits		= 8,
+	.write_flag_mask	= REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX,
+	.read_flag_mask		= REGISTER_ACCESS | REGISTER_READ  | IAR_INDEX,
+	.cache_type		= REGCACHE_RBTREE,
+	.writeable_reg		= mcr20a_iar_writeable,
+	.readable_reg		= mcr20a_iar_readable,
+	.volatile_reg		= mcr20a_iar_volatile,
+	.fast_io		= true,
+};
+
+struct mcr20a_local {
+	struct spi_device *spi;
+
+	struct ieee802154_hw *hw;
+	struct mcr20a_platform_data *pdata;
+	struct regmap *regmap_dar;
+	struct regmap *regmap_iar;
+
+	u8 *buf;
+
+	bool is_tx;
+
+	/* for writing tx buffer */
+	struct spi_message tx_buf_msg;
+	u8 tx_header[1];
+	/* burst buffer write command */
+	struct spi_transfer tx_xfer_header;
+	u8 tx_len[1];
+	/* len of tx packet */
+	struct spi_transfer tx_xfer_len;
+	/* data of tx packet */
+	struct spi_transfer tx_xfer_buf;
+	struct sk_buff *tx_skb;
+
+	/* for read length rxfifo */
+	struct spi_message reg_msg;
+	u8 reg_cmd[1];
+	u8 reg_data[MCR20A_IRQSTS_NUM];
+	struct spi_transfer reg_xfer_cmd;
+	struct spi_transfer reg_xfer_data;
+
+	/* receive handling */
+	struct spi_message rx_buf_msg;
+	u8 rx_header[1];
+	struct spi_transfer rx_xfer_header;
+	u8 rx_lqi[1];
+	struct spi_transfer rx_xfer_lqi;
+	u8 rx_buf[MCR20A_MAX_BUF];
+	struct spi_transfer rx_xfer_buf;
+
+	/* isr handling for reading intstat */
+	struct spi_message irq_msg;
+	u8 irq_header[1];
+	u8 irq_data[MCR20A_IRQSTS_NUM];
+	struct spi_transfer irq_xfer_data;
+	struct spi_transfer irq_xfer_header;
+};
+
+static void
+mcr20a_write_tx_buf_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	lp->reg_msg.complete = NULL;
+	lp->reg_cmd[0]	= MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+	lp->reg_data[0] = MCR20A_XCVSEQ_TX;
+	lp->reg_xfer_data.len = 1;
+
+	ret = spi_async(lp->spi, &lp->reg_msg);
+	if (ret)
+		dev_err(printdev(lp), "failed to set SEQ TX\n");
+}
+
+static int
+mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	lp->tx_skb = skb;
+
+	print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     skb->data, skb->len, 0);
+
+	lp->is_tx = 1;
+
+	lp->reg_msg.complete	= NULL;
+	lp->reg_cmd[0]		= MCR20A_WRITE_REG(DAR_PHY_CTRL1);
+	lp->reg_data[0]		= MCR20A_XCVSEQ_IDLE;
+	lp->reg_xfer_data.len	= 1;
+
+	return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_ed(struct ieee802154_hw *hw, u8 *level)
+{
+	WARN_ON(!level);
+	*level = 0xbe;
+	return 0;
+}
+
+static int
+mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]);
+	if (ret)
+		return ret;
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00);
+	if (ret)
+		return ret;
+	ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB,
+			   PLL_FRAC[channel - 11]);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+mcr20a_start(struct ieee802154_hw *hw)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* No slotted operation */
+	dev_dbg(printdev(lp), "no slotted operation\n");
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_SLOTTED, 0x0);
+
+	/* enable irq */
+	enable_irq(lp->spi->irq);
+
+	/* Unmask SEQ interrupt */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
+				 DAR_PHY_CTRL2_SEQMSK, 0x0);
+
+	/* Start the RX sequence */
+	dev_dbg(printdev(lp), "start the RX sequence\n");
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+	return 0;
+}
+
+static void
+mcr20a_stop(struct ieee802154_hw *hw)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* stop all running sequence */
+	regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+			   DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+	/* disable irq */
+	disable_irq(lp->spi->irq);
+}
+
+static int
+mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw,
+			struct ieee802154_hw_addr_filt *filt,
+			unsigned long changed)
+{
+	struct mcr20a_local *lp = hw->priv;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	if (changed & IEEE802154_AFILT_SADDR_CHANGED) {
+		u16 addr = le16_to_cpu(filt->short_addr);
+
+		regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr);
+		regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8);
+	}
+
+	if (changed & IEEE802154_AFILT_PANID_CHANGED) {
+		u16 pan = le16_to_cpu(filt->pan_id);
+
+		regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan);
+		regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8);
+	}
+
+	if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) {
+		u8 addr[8], i;
+
+		memcpy(addr, &filt->ieee_addr, 8);
+		for (i = 0; i < 8; i++)
+			regmap_write(lp->regmap_iar,
+				     IAR_MACLONGADDRS0_0 + i, addr[i]);
+	}
+
+	if (changed & IEEE802154_AFILT_PANC_CHANGED) {
+		if (filt->pan_coord) {
+			regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					   DAR_PHY_CTRL4_PANCORDNTR0, 0x10);
+		} else {
+			regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					   DAR_PHY_CTRL4_PANCORDNTR0, 0x00);
+		}
+	}
+
+	return 0;
+}
+
+/* -30 dBm to 10 dBm */
+#define MCR20A_MAX_TX_POWERS 0x14
+static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = {
+	-3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400,
+	-1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000
+};
+
+static int
+mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+	struct mcr20a_local *lp = hw->priv;
+	u32 i;
+
+	dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm);
+
+	for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) {
+		if (lp->hw->phy->supported.tx_powers[i] == mbm)
+			return regmap_write(lp->regmap_dar, DAR_PA_PWR,
+					    ((i + 8) & 0x1F));
+	}
+
+	return -EINVAL;
+}
+
+#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD
+static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1];
+
+static int
+mcr20a_set_cca_mode(struct ieee802154_hw *hw,
+		    const struct wpan_phy_cca *cca)
+{
+	struct mcr20a_local *lp = hw->priv;
+	unsigned int cca_mode = 0xff;
+	bool cca_mode_and = false;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* mapping 802.15.4 to driver spec */
+	switch (cca->mode) {
+	case NL802154_CCA_ENERGY:
+		cca_mode = MCR20A_CCA_MODE1;
+		break;
+	case NL802154_CCA_CARRIER:
+		cca_mode = MCR20A_CCA_MODE2;
+		break;
+	case NL802154_CCA_ENERGY_CARRIER:
+		switch (cca->opt) {
+		case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+			cca_mode = MCR20A_CCA_MODE3;
+			cca_mode_and = true;
+			break;
+		case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+			cca_mode = MCR20A_CCA_MODE3;
+			cca_mode_and = false;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+				 DAR_PHY_CTRL4_CCATYPE_MASK,
+				 cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT);
+	if (ret < 0)
+		return ret;
+
+	if (cca_mode == MCR20A_CCA_MODE3) {
+		if (cca_mode_and) {
+			ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL,
+						 IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+						 0x08);
+		} else {
+			ret = regmap_update_bits(lp->regmap_iar,
+						 IAR_CCA_CTRL,
+						 IAR_CCA_CTRL_CCA3_AND_NOT_OR,
+						 0x00);
+		}
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int
+mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
+{
+	struct mcr20a_local *lp = hw->priv;
+	u32 i;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) {
+		if (hw->phy->supported.cca_ed_levels[i] == mbm)
+			return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i);
+	}
+
+	return 0;
+}
+
+static int
+mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
+{
+	struct mcr20a_local *lp = hw->priv;
+	int ret;
+	u8 rx_frame_filter_reg = 0x0;
+	u8 val;
+
+	dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
+
+	if (on) {
+		/* All frame types accepted*/
+		val |= DAR_PHY_CTRL4_PROMISCUOUS;
+		rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
+		rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
+				  IAR_RX_FRAME_FLT_NS_FT);
+
+		ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					 DAR_PHY_CTRL4_PROMISCUOUS,
+					 DAR_PHY_CTRL4_PROMISCUOUS);
+		if (ret < 0)
+			return ret;
+
+		ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+				   rx_frame_filter_reg);
+		if (ret < 0)
+			return ret;
+	} else {
+		ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4,
+					 DAR_PHY_CTRL4_PROMISCUOUS, 0x0);
+		if (ret < 0)
+			return ret;
+
+		ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+				   IAR_RX_FRAME_FLT_FRM_VER |
+				   IAR_RX_FRAME_FLT_BEACON_FT |
+				   IAR_RX_FRAME_FLT_DATA_FT |
+				   IAR_RX_FRAME_FLT_CMD_FT);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct ieee802154_ops mcr20a_hw_ops = {
+	.owner			= THIS_MODULE,
+	.xmit_async		= mcr20a_xmit,
+	.ed			= mcr20a_ed,
+	.set_channel		= mcr20a_set_channel,
+	.start			= mcr20a_start,
+	.stop			= mcr20a_stop,
+	.set_hw_addr_filt	= mcr20a_set_hw_addr_filt,
+	.set_txpower		= mcr20a_set_txpower,
+	.set_cca_mode		= mcr20a_set_cca_mode,
+	.set_cca_ed_level	= mcr20a_set_cca_ed_level,
+	.set_promiscuous_mode	= mcr20a_set_promiscuous_mode,
+};
+
+static int
+mcr20a_request_rx(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* Start the RX sequence */
+	regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+
+	return 0;
+}
+
+static void
+mcr20a_handle_rx_read_buf_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+	struct sk_buff *skb;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	dev_dbg(printdev(lp), "RX is done\n");
+
+	if (!ieee802154_is_valid_psdu_len(len)) {
+		dev_vdbg(&lp->spi->dev, "corrupted frame received\n");
+		len = IEEE802154_MTU;
+	}
+
+	len = len - 2;  /* get rid of frame check field */
+
+	skb = dev_alloc_skb(len);
+	if (!skb)
+		return;
+
+	memcpy(skb_put(skb, len), lp->rx_buf, len);
+	ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]);
+
+	print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1,
+			     lp->rx_buf, len, 0);
+	pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]);
+
+	/* start RX sequence */
+	mcr20a_request_rx(lp);
+}
+
+static void
+mcr20a_handle_rx_read_len_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 len;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* get the length of received frame */
+	len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK;
+	dev_dbg(printdev(lp), "frame len : %d\n", len);
+
+	/* prepare to read the rx buf */
+	lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+	lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF;
+	lp->rx_xfer_buf.len = len;
+
+	ret = spi_async(lp->spi, &lp->rx_buf_msg);
+	if (ret)
+		dev_err(printdev(lp), "failed to read rx buffer length\n");
+}
+
+static int
+mcr20a_handle_rx(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+	lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete;
+	lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN);
+	lp->reg_xfer_data.len	= 1;
+
+	return spi_async(lp->spi, &lp->reg_msg);
+}
+
+static int
+mcr20a_handle_tx_complete(struct mcr20a_local *lp)
+{
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	ieee802154_xmit_complete(lp->hw, lp->tx_skb, false);
+
+	return mcr20a_request_rx(lp);
+}
+
+static int
+mcr20a_handle_tx(struct mcr20a_local *lp)
+{
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* write tx buffer */
+	lp->tx_header[0]	= MCR20A_BURST_WRITE_PACKET_BUF;
+	/* add 2 bytes of FCS */
+	lp->tx_len[0]		= lp->tx_skb->len + 2;
+	lp->tx_xfer_buf.tx_buf	= lp->tx_skb->data;
+	/* add 1 byte psduLength */
+	lp->tx_xfer_buf.len	= lp->tx_skb->len + 1;
+
+	ret = spi_async(lp->spi, &lp->tx_buf_msg);
+	if (ret) {
+		dev_err(printdev(lp), "SPI write Failed for TX buf\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+mcr20a_irq_clean_complete(void *context)
+{
+	struct mcr20a_local *lp = context;
+	u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	enable_irq(lp->spi->irq);
+
+	dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n",
+		lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]);
+
+	switch (seq_state) {
+	/* TX IRQ, RX IRQ and SEQ IRQ */
+	case (0x03):
+		if (lp->is_tx) {
+			lp->is_tx = 0;
+			dev_dbg(printdev(lp), "TX is done. No ACK\n");
+			mcr20a_handle_tx_complete(lp);
+		}
+		break;
+	case (0x05):
+			/* rx is starting */
+			dev_dbg(printdev(lp), "RX is starting\n");
+			mcr20a_handle_rx(lp);
+		break;
+	case (0x07):
+		if (lp->is_tx) {
+			/* tx is done */
+			lp->is_tx = 0;
+			dev_dbg(printdev(lp), "TX is done. Get ACK\n");
+			mcr20a_handle_tx_complete(lp);
+		} else {
+			/* rx is starting */
+			dev_dbg(printdev(lp), "RX is starting\n");
+			mcr20a_handle_rx(lp);
+		}
+		break;
+	case (0x01):
+		if (lp->is_tx) {
+			dev_dbg(printdev(lp), "TX is starting\n");
+			mcr20a_handle_tx(lp);
+		} else {
+			dev_dbg(printdev(lp), "MCR20A is stop\n");
+		}
+		break;
+	}
+}
+
+static void mcr20a_irq_status_complete(void *context)
+{
+	int ret;
+	struct mcr20a_local *lp = context;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+	regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE);
+
+	lp->reg_msg.complete = mcr20a_irq_clean_complete;
+	lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1);
+	memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM);
+	lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM;
+
+	ret = spi_async(lp->spi, &lp->reg_msg);
+
+	if (ret)
+		dev_err(printdev(lp), "failed to clean irq status\n");
+}
+
+static irqreturn_t mcr20a_irq_isr(int irq, void *data)
+{
+	struct mcr20a_local *lp = data;
+	int ret;
+
+	disable_irq_nosync(irq);
+
+	lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1);
+	/* read IRQSTSx */
+	ret = spi_async(lp->spi, &lp->irq_msg);
+	if (ret) {
+		enable_irq(irq);
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mcr20a_get_platform_data(struct spi_device *spi,
+				    struct mcr20a_platform_data *pdata)
+{
+	int ret = 0;
+
+	if (!spi->dev.of_node)
+		return -EINVAL;
+
+	pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0);
+	dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio);
+
+	return ret;
+}
+
+static void mcr20a_hw_setup(struct mcr20a_local *lp)
+{
+	u8 i;
+	struct ieee802154_hw *hw = lp->hw;
+	struct wpan_phy *phy = lp->hw->phy;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	phy->symbol_duration = 16;
+	phy->lifs_period = 40;
+	phy->sifs_period = 12;
+
+	hw->flags = IEEE802154_HW_TX_OMIT_CKSUM |
+			IEEE802154_HW_AFILT |
+			IEEE802154_HW_PROMISCUOUS;
+
+	phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+			WPAN_PHY_FLAG_CCA_MODE;
+
+	phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+		BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
+	phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+		BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+	/* initiating cca_ed_levels */
+	for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1;
+	      ++i) {
+		mcr20a_ed_levels[i] =  -i * 100;
+	}
+
+	phy->supported.cca_ed_levels = mcr20a_ed_levels;
+	phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels);
+
+	phy->cca.mode = NL802154_CCA_ENERGY;
+
+	phy->supported.channels[0] = MCR20A_VALID_CHANNELS;
+	phy->current_page = 0;
+	/* MCR20A default reset value */
+	phy->current_channel = 20;
+	phy->symbol_duration = 16;
+	phy->supported.tx_powers = mcr20a_powers;
+	phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers);
+	phy->cca_ed_level = phy->supported.cca_ed_levels[75];
+	phy->transmit_power = phy->supported.tx_powers[0x0F];
+}
+
+static void
+mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->tx_buf_msg);
+	lp->tx_buf_msg.context = lp;
+	lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete;
+
+	lp->tx_xfer_header.len = 1;
+	lp->tx_xfer_header.tx_buf = lp->tx_header;
+
+	lp->tx_xfer_len.len = 1;
+	lp->tx_xfer_len.tx_buf = lp->tx_len;
+
+	spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg);
+	spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg);
+	spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg);
+}
+
+static void
+mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->reg_msg);
+	lp->reg_msg.context = lp;
+
+	lp->reg_xfer_cmd.len = 1;
+	lp->reg_xfer_cmd.tx_buf = lp->reg_cmd;
+	lp->reg_xfer_cmd.rx_buf = lp->reg_cmd;
+
+	lp->reg_xfer_data.rx_buf = lp->reg_data;
+	lp->reg_xfer_data.tx_buf = lp->reg_data;
+
+	spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg);
+	spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg);
+
+	spi_message_init(&lp->rx_buf_msg);
+	lp->rx_buf_msg.context = lp;
+	lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete;
+	lp->rx_xfer_header.len = 1;
+	lp->rx_xfer_header.tx_buf = lp->rx_header;
+	lp->rx_xfer_header.rx_buf = lp->rx_header;
+
+	lp->rx_xfer_buf.rx_buf = lp->rx_buf;
+
+	lp->rx_xfer_lqi.len = 1;
+	lp->rx_xfer_lqi.rx_buf = lp->rx_lqi;
+
+	spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg);
+	spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg);
+	spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg);
+}
+
+static void
+mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp)
+{
+	spi_message_init(&lp->irq_msg);
+	lp->irq_msg.context		= lp;
+	lp->irq_msg.complete	= mcr20a_irq_status_complete;
+	lp->irq_xfer_header.len	= 1;
+	lp->irq_xfer_header.tx_buf = lp->irq_header;
+	lp->irq_xfer_header.rx_buf = lp->irq_header;
+
+	lp->irq_xfer_data.len	= MCR20A_IRQSTS_NUM;
+	lp->irq_xfer_data.rx_buf = lp->irq_data;
+
+	spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg);
+	spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg);
+}
+
+static int
+mcr20a_phy_init(struct mcr20a_local *lp)
+{
+	u8 index;
+	unsigned int phy_reg = 0;
+	int ret;
+
+	dev_dbg(printdev(lp), "%s\n", __func__);
+
+	/* Disable Tristate on COCO MISO for SPI reads */
+	ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02);
+	if (ret)
+		goto err_ret;
+
+	/* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts
+	 * immediately after init
+	 */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF);
+	if (ret)
+		goto err_ret;
+
+	/* Clear all PP IRQ bits in IRQSTS2 */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2,
+			   DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ |
+			   DAR_IRQSTS2_WAKE_IRQ);
+	if (ret)
+		goto err_ret;
+
+	/* Disable all timer interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF);
+	if (ret)
+		goto err_ret;
+
+	/*  PHY_CTRL1 : default HW settings + AUTOACK enabled */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
+				 DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK);
+
+	/*  PHY_CTRL2 : disable all interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF);
+	if (ret)
+		goto err_ret;
+
+	/* PHY_CTRL3 : disable all timers and remaining interrupts */
+	ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3,
+			   DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK |
+			   DAR_PHY_CTRL3_WAKE_MSK);
+	if (ret)
+		goto err_ret;
+
+	/* SRC_CTRL : enable Acknowledge Frame Pending and
+	 * Source Address Matching Enable
+	 */
+	ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL,
+			   DAR_SRC_CTRL_ACK_FRM_PND |
+			   (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT));
+	if (ret)
+		goto err_ret;
+
+	/*  RX_FRAME_FILTER */
+	/*  FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */
+	ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER,
+			   IAR_RX_FRAME_FLT_FRM_VER |
+			   IAR_RX_FRAME_FLT_BEACON_FT |
+			   IAR_RX_FRAME_FLT_DATA_FT |
+			   IAR_RX_FRAME_FLT_CMD_FT);
+	if (ret)
+		goto err_ret;
+
+	dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n",
+		 MCR20A_OVERWRITE_VERSION);
+
+	/* Overwrites direct registers  */
+	ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER,
+			   MCR20A_OVERWRITE_VERSION);
+	if (ret)
+		goto err_ret;
+
+	/* Overwrites indirect registers  */
+	ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites,
+				     ARRAY_SIZE(mar20a_iar_overwrites));
+	if (ret)
+		goto err_ret;
+
+	/* Clear HW indirect queue */
+	dev_dbg(printdev(lp), "clear HW indirect queue\n");
+	for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) {
+		phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) <<
+			       DAR_SRC_CTRL_INDEX_SHIFT)
+			      | (DAR_SRC_CTRL_SRCADDR_EN)
+			      | (DAR_SRC_CTRL_INDEX_DISABLE));
+		ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg);
+		if (ret)
+			goto err_ret;
+		phy_reg = 0;
+	}
+
+	/* Assign HW Indirect hash table to PAN0 */
+	ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg);
+	if (ret)
+		goto err_ret;
+
+	/* Clear current lvl */
+	phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK;
+
+	/* Set new lvl */
+	phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE <<
+		IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT;
+	ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg);
+	if (ret)
+		goto err_ret;
+
+	/* Set CCA threshold to -75 dBm */
+	ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B);
+	if (ret)
+		goto err_ret;
+
+	/* Set prescaller to obtain 1 symbol (16us) timebase */
+	ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05);
+	if (ret)
+		goto err_ret;
+
+	/* Enable autodoze mode. */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES,
+				 DAR_PWR_MODES_AUTODOZE,
+				 DAR_PWR_MODES_AUTODOZE);
+	if (ret)
+		goto err_ret;
+
+	/* Disable clk_out */
+	ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL,
+				 DAR_CLK_OUT_CTRL_EN, 0x0);
+	if (ret)
+		goto err_ret;
+
+	return 0;
+
+err_ret:
+	return ret;
+}
+
+static int
+mcr20a_probe(struct spi_device *spi)
+{
+	struct ieee802154_hw *hw;
+	struct mcr20a_local *lp;
+	struct mcr20a_platform_data *pdata;
+	int irq_type;
+	int ret = -ENOMEM;
+
+	dev_dbg(&spi->dev, "%s\n", __func__);
+
+	if (!spi->irq) {
+		dev_err(&spi->dev, "no IRQ specified\n");
+		return -EINVAL;
+	}
+
+	pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	/* set mcr20a platform data */
+	ret = mcr20a_get_platform_data(spi, pdata);
+	if (ret < 0) {
+		dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n");
+		return ret;
+	}
+
+	/* init reset gpio */
+	if (gpio_is_valid(pdata->rst_gpio)) {
+		ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio,
+					    GPIOF_OUT_INIT_HIGH, "reset");
+		if (ret)
+			return ret;
+	}
+
+	/* reset mcr20a */
+	if (gpio_is_valid(pdata->rst_gpio)) {
+		usleep_range(10, 20);
+		gpio_set_value_cansleep(pdata->rst_gpio, 0);
+		usleep_range(10, 20);
+		gpio_set_value_cansleep(pdata->rst_gpio, 1);
+		usleep_range(120, 240);
+	}
+
+	/* allocate ieee802154_hw and private data */
+	hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops);
+	if (!hw) {
+		dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n");
+		return -ENOMEM;
+	}
+
+	/* init mcr20a local data */
+	lp = hw->priv;
+	lp->hw = hw;
+	lp->spi = spi;
+	lp->spi->dev.platform_data = pdata;
+	lp->pdata = pdata;
+
+	/* init ieee802154_hw */
+	hw->parent = &spi->dev;
+	ieee802154_random_extended_addr(&hw->phy->perm_extended_addr);
+
+	/* init buf */
+	lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL);
+
+	if (!lp->buf)
+		return -ENOMEM;
+
+	mcr20a_setup_tx_spi_messages(lp);
+	mcr20a_setup_rx_spi_messages(lp);
+	mcr20a_setup_irq_spi_messages(lp);
+
+	/* setup regmap */
+	lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap);
+	if (IS_ERR(lp->regmap_dar)) {
+		ret = PTR_ERR(lp->regmap_dar);
+		dev_err(&spi->dev, "Failed to allocate dar map: %d\n",
+			ret);
+		goto free_dev;
+	}
+
+	lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap);
+	if (IS_ERR(lp->regmap_iar)) {
+		ret = PTR_ERR(lp->regmap_iar);
+		dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret);
+		goto free_dev;
+	}
+
+	mcr20a_hw_setup(lp);
+
+	spi_set_drvdata(spi, lp);
+
+	ret = mcr20a_phy_init(lp);
+	if (ret < 0) {
+		dev_crit(&spi->dev, "mcr20a_phy_init failed\n");
+		goto free_dev;
+	}
+
+	irq_type = irq_get_trigger_type(spi->irq);
+	if (!irq_type)
+		irq_type = IRQF_TRIGGER_FALLING;
+
+	ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr,
+			       irq_type, dev_name(&spi->dev), lp);
+	if (ret) {
+		dev_err(&spi->dev, "could not request_irq for mcr20a\n");
+		ret = -ENODEV;
+		goto free_dev;
+	}
+
+	/* disable_irq by default and wait for starting hardware */
+	disable_irq(spi->irq);
+
+	ret = ieee802154_register_hw(hw);
+	if (ret) {
+		dev_crit(&spi->dev, "ieee802154_register_hw failed\n");
+		goto free_dev;
+	}
+
+	return ret;
+
+free_dev:
+	ieee802154_free_hw(lp->hw);
+
+	return ret;
+}
+
+static int mcr20a_remove(struct spi_device *spi)
+{
+	struct mcr20a_local *lp = spi_get_drvdata(spi);
+
+	dev_dbg(&spi->dev, "%s\n", __func__);
+
+	ieee802154_unregister_hw(lp->hw);
+	ieee802154_free_hw(lp->hw);
+
+	return 0;
+}
+
+static const struct of_device_id mcr20a_of_match[] = {
+	{ .compatible = "nxp,mcr20a", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mcr20a_of_match);
+
+static const struct spi_device_id mcr20a_device_id[] = {
+	{ .name = "mcr20a", },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
+
+static struct spi_driver mcr20a_driver = {
+	.id_table = mcr20a_device_id,
+	.driver = {
+		.of_match_table = of_match_ptr(mcr20a_of_match),
+		.name	= "mcr20a",
+	},
+	.probe      = mcr20a_probe,
+	.remove     = mcr20a_remove,
+};
+
+module_spi_driver(mcr20a_driver);
+
+MODULE_DESCRIPTION("MCR20A Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>");
diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h
new file mode 100644
index 000000000000..6da4fd00b3c5
--- /dev/null
+++ b/drivers/net/ieee802154/mcr20a.h
@@ -0,0 +1,498 @@
+/*
+ * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller
+ *
+ * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef _MCR20A_H
+#define _MCR20A_H
+
+/* Direct Accress Register */
+#define DAR_IRQ_STS1		0x00
+#define DAR_IRQ_STS2		0x01
+#define DAR_IRQ_STS3		0x02
+#define DAR_PHY_CTRL1		0x03
+#define DAR_PHY_CTRL2		0x04
+#define DAR_PHY_CTRL3		0x05
+#define DAR_RX_FRM_LEN		0x06
+#define DAR_PHY_CTRL4		0x07
+#define DAR_SRC_CTRL		0x08
+#define DAR_SRC_ADDRS_SUM_LSB	0x09
+#define DAR_SRC_ADDRS_SUM_MSB	0x0A
+#define DAR_CCA1_ED_FNL		0x0B
+#define DAR_EVENT_TMR_LSB	0x0C
+#define DAR_EVENT_TMR_MSB	0x0D
+#define DAR_EVENT_TMR_USB	0x0E
+#define DAR_TIMESTAMP_LSB	0x0F
+#define DAR_TIMESTAMP_MSB	0x10
+#define DAR_TIMESTAMP_USB	0x11
+#define DAR_T3CMP_LSB		0x12
+#define DAR_T3CMP_MSB		0x13
+#define DAR_T3CMP_USB		0x14
+#define DAR_T2PRIMECMP_LSB	0x15
+#define DAR_T2PRIMECMP_MSB	0x16
+#define DAR_T1CMP_LSB		0x17
+#define DAR_T1CMP_MSB		0x18
+#define DAR_T1CMP_USB		0x19
+#define DAR_T2CMP_LSB		0x1A
+#define DAR_T2CMP_MSB		0x1B
+#define DAR_T2CMP_USB		0x1C
+#define DAR_T4CMP_LSB		0x1D
+#define DAR_T4CMP_MSB		0x1E
+#define DAR_T4CMP_USB		0x1F
+#define DAR_PLL_INT0		0x20
+#define DAR_PLL_FRAC0_LSB	0x21
+#define DAR_PLL_FRAC0_MSB	0x22
+#define DAR_PA_PWR		0x23
+#define DAR_SEQ_STATE		0x24
+#define DAR_LQI_VALUE		0x25
+#define DAR_RSSI_CCA_CONT	0x26
+/*------------------            0x27 */
+#define DAR_ASM_CTRL1		0x28
+#define DAR_ASM_CTRL2		0x29
+#define DAR_ASM_DATA_0		0x2A
+#define DAR_ASM_DATA_1		0x2B
+#define DAR_ASM_DATA_2		0x2C
+#define DAR_ASM_DATA_3		0x2D
+#define DAR_ASM_DATA_4		0x2E
+#define DAR_ASM_DATA_5		0x2F
+#define DAR_ASM_DATA_6		0x30
+#define DAR_ASM_DATA_7		0x31
+#define DAR_ASM_DATA_8		0x32
+#define DAR_ASM_DATA_9		0x33
+#define DAR_ASM_DATA_A		0x34
+#define DAR_ASM_DATA_B		0x35
+#define DAR_ASM_DATA_C		0x36
+#define DAR_ASM_DATA_D		0x37
+#define DAR_ASM_DATA_E		0x38
+#define DAR_ASM_DATA_F		0x39
+/*-----------------------       0x3A */
+#define DAR_OVERWRITE_VER	0x3B
+#define DAR_CLK_OUT_CTRL	0x3C
+#define DAR_PWR_MODES		0x3D
+#define IAR_INDEX		0x3E
+#define IAR_DATA		0x3F
+
+/* Indirect Resgister Memory */
+#define IAR_PART_ID		0x00
+#define IAR_XTAL_TRIM		0x01
+#define IAR_PMC_LP_TRIM		0x02
+#define IAR_MACPANID0_LSB	0x03
+#define IAR_MACPANID0_MSB	0x04
+#define IAR_MACSHORTADDRS0_LSB	0x05
+#define IAR_MACSHORTADDRS0_MSB	0x06
+#define IAR_MACLONGADDRS0_0	0x07
+#define IAR_MACLONGADDRS0_8	0x08
+#define IAR_MACLONGADDRS0_16	0x09
+#define IAR_MACLONGADDRS0_24	0x0A
+#define IAR_MACLONGADDRS0_32	0x0B
+#define IAR_MACLONGADDRS0_40	0x0C
+#define IAR_MACLONGADDRS0_48	0x0D
+#define IAR_MACLONGADDRS0_56	0x0E
+#define IAR_RX_FRAME_FILTER	0x0F
+#define IAR_PLL_INT1		0x10
+#define IAR_PLL_FRAC1_LSB	0x11
+#define IAR_PLL_FRAC1_MSB	0x12
+#define IAR_MACPANID1_LSB	0x13
+#define IAR_MACPANID1_MSB	0x14
+#define IAR_MACSHORTADDRS1_LSB	0x15
+#define IAR_MACSHORTADDRS1_MSB	0x16
+#define IAR_MACLONGADDRS1_0	0x17
+#define IAR_MACLONGADDRS1_8	0x18
+#define IAR_MACLONGADDRS1_16	0x19
+#define IAR_MACLONGADDRS1_24	0x1A
+#define IAR_MACLONGADDRS1_32	0x1B
+#define IAR_MACLONGADDRS1_40	0x1C
+#define IAR_MACLONGADDRS1_48	0x1D
+#define IAR_MACLONGADDRS1_56	0x1E
+#define IAR_DUAL_PAN_CTRL	0x1F
+#define IAR_DUAL_PAN_DWELL	0x20
+#define IAR_DUAL_PAN_STS	0x21
+#define IAR_CCA1_THRESH		0x22
+#define IAR_CCA1_ED_OFFSET_COMP	0x23
+#define IAR_LQI_OFFSET_COMP	0x24
+#define IAR_CCA_CTRL		0x25
+#define IAR_CCA2_CORR_PEAKS	0x26
+#define IAR_CCA2_CORR_THRESH	0x27
+#define IAR_TMR_PRESCALE	0x28
+/*--------------------          0x29 */
+#define IAR_GPIO_DATA		0x2A
+#define IAR_GPIO_DIR		0x2B
+#define IAR_GPIO_PUL_EN		0x2C
+#define IAR_GPIO_PUL_SEL	0x2D
+#define IAR_GPIO_DS		0x2E
+/*------------------            0x2F */
+#define IAR_ANT_PAD_CTRL	0x30
+#define IAR_MISC_PAD_CTRL	0x31
+#define IAR_BSM_CTRL		0x32
+/*-------------------           0x33 */
+#define IAR_RNG			0x34
+#define IAR_RX_BYTE_COUNT	0x35
+#define IAR_RX_WTR_MARK		0x36
+#define IAR_SOFT_RESET		0x37
+#define IAR_TXDELAY		0x38
+#define IAR_ACKDELAY		0x39
+#define IAR_SEQ_MGR_CTRL	0x3A
+#define IAR_SEQ_MGR_STS		0x3B
+#define IAR_SEQ_T_STS		0x3C
+#define IAR_ABORT_STS		0x3D
+#define IAR_CCCA_BUSY_CNT	0x3E
+#define IAR_SRC_ADDR_CHECKSUM1	0x3F
+#define IAR_SRC_ADDR_CHECKSUM2	0x40
+#define IAR_SRC_TBL_VALID1	0x41
+#define IAR_SRC_TBL_VALID2	0x42
+#define IAR_FILTERFAIL_CODE1	0x43
+#define IAR_FILTERFAIL_CODE2	0x44
+#define IAR_SLOT_PRELOAD	0x45
+/*--------------------          0x46 */
+#define IAR_CORR_VT		0x47
+#define IAR_SYNC_CTRL		0x48
+#define IAR_PN_LSB_0		0x49
+#define IAR_PN_LSB_1		0x4A
+#define IAR_PN_MSB_0		0x4B
+#define IAR_PN_MSB_1		0x4C
+#define IAR_CORR_NVAL		0x4D
+#define IAR_TX_MODE_CTRL	0x4E
+#define IAR_SNF_THR		0x4F
+#define IAR_FAD_THR		0x50
+#define IAR_ANT_AGC_CTRL	0x51
+#define IAR_AGC_THR1		0x52
+#define IAR_AGC_THR2		0x53
+#define IAR_AGC_HYS		0x54
+#define IAR_AFC			0x55
+/*-------------------           0x56 */
+/*-------------------           0x57 */
+#define IAR_PHY_STS		0x58
+#define IAR_RX_MAX_CORR		0x59
+#define IAR_RX_MAX_PREAMBLE	0x5A
+#define IAR_RSSI		0x5B
+/*-------------------           0x5C */
+/*-------------------           0x5D */
+#define IAR_PLL_DIG_CTRL	0x5E
+#define IAR_VCO_CAL		0x5F
+#define IAR_VCO_BEST_DIFF	0x60
+#define IAR_VCO_BIAS		0x61
+#define IAR_KMOD_CTRL		0x62
+#define IAR_KMOD_CAL		0x63
+#define IAR_PA_CAL		0x64
+#define IAR_PA_PWRCAL		0x65
+#define IAR_ATT_RSSI1		0x66
+#define IAR_ATT_RSSI2		0x67
+#define IAR_RSSI_OFFSET		0x68
+#define IAR_RSSI_SLOPE		0x69
+#define IAR_RSSI_CAL1		0x6A
+#define IAR_RSSI_CAL2		0x6B
+/*-------------------           0x6C */
+/*-------------------           0x6D */
+#define IAR_XTAL_CTRL		0x6E
+#define IAR_XTAL_COMP_MIN	0x6F
+#define IAR_XTAL_COMP_MAX	0x70
+#define IAR_XTAL_GM		0x71
+/*-------------------           0x72 */
+/*-------------------           0x73 */
+#define IAR_LNA_TUNE		0x74
+#define IAR_LNA_AGCGAIN		0x75
+/*-------------------           0x76 */
+/*-------------------           0x77 */
+#define IAR_CHF_PMA_GAIN	0x78
+#define IAR_CHF_IBUF		0x79
+#define IAR_CHF_QBUF		0x7A
+#define IAR_CHF_IRIN		0x7B
+#define IAR_CHF_QRIN		0x7C
+#define IAR_CHF_IL		0x7D
+#define IAR_CHF_QL		0x7E
+#define IAR_CHF_CC1		0x7F
+#define IAR_CHF_CCL		0x80
+#define IAR_CHF_CC2		0x81
+#define IAR_CHF_IROUT		0x82
+#define IAR_CHF_QROUT		0x83
+/*-------------------           0x84 */
+/*-------------------           0x85 */
+#define IAR_RSSI_CTRL		0x86
+/*-------------------           0x87 */
+/*-------------------           0x88 */
+#define IAR_PA_BIAS		0x89
+#define IAR_PA_TUNING		0x8A
+/*-------------------           0x8B */
+/*-------------------           0x8C */
+#define IAR_PMC_HP_TRIM		0x8D
+#define IAR_VREGA_TRIM		0x8E
+/*-------------------           0x8F */
+/*-------------------           0x90 */
+#define IAR_VCO_CTRL1		0x91
+#define IAR_VCO_CTRL2		0x92
+/*-------------------           0x93 */
+/*-------------------           0x94 */
+#define IAR_ANA_SPARE_OUT1	0x95
+#define IAR_ANA_SPARE_OUT2	0x96
+#define IAR_ANA_SPARE_IN	0x97
+#define IAR_MISCELLANEOUS	0x98
+/*-------------------           0x99 */
+#define IAR_SEQ_MGR_OVRD0	0x9A
+#define IAR_SEQ_MGR_OVRD1	0x9B
+#define IAR_SEQ_MGR_OVRD2	0x9C
+#define IAR_SEQ_MGR_OVRD3	0x9D
+#define IAR_SEQ_MGR_OVRD4	0x9E
+#define IAR_SEQ_MGR_OVRD5	0x9F
+#define IAR_SEQ_MGR_OVRD6	0xA0
+#define IAR_SEQ_MGR_OVRD7	0xA1
+/*-------------------           0xA2 */
+#define IAR_TESTMODE_CTRL	0xA3
+#define IAR_DTM_CTRL1		0xA4
+#define IAR_DTM_CTRL2		0xA5
+#define IAR_ATM_CTRL1		0xA6
+#define IAR_ATM_CTRL2		0xA7
+#define IAR_ATM_CTRL3		0xA8
+/*-------------------           0xA9 */
+#define IAR_LIM_FE_TEST_CTRL	0xAA
+#define IAR_CHF_TEST_CTRL	0xAB
+#define IAR_VCO_TEST_CTRL	0xAC
+#define IAR_PLL_TEST_CTRL	0xAD
+#define IAR_PA_TEST_CTRL	0xAE
+#define IAR_PMC_TEST_CTRL	0xAF
+#define IAR_SCAN_DTM_PROTECT_1	0xFE
+#define IAR_SCAN_DTM_PROTECT_0	0xFF
+
+/* IRQSTS1 bits */
+#define DAR_IRQSTS1_RX_FRM_PEND		BIT(7)
+#define DAR_IRQSTS1_PLL_UNLOCK_IRQ	BIT(6)
+#define DAR_IRQSTS1_FILTERFAIL_IRQ	BIT(5)
+#define DAR_IRQSTS1_RXWTRMRKIRQ		BIT(4)
+#define DAR_IRQSTS1_CCAIRQ		BIT(3)
+#define DAR_IRQSTS1_RXIRQ		BIT(2)
+#define DAR_IRQSTS1_TXIRQ		BIT(1)
+#define DAR_IRQSTS1_SEQIRQ		BIT(0)
+
+/* IRQSTS2 bits */
+#define DAR_IRQSTS2_CRCVALID		BIT(7)
+#define DAR_IRQSTS2_CCA			BIT(6)
+#define DAR_IRQSTS2_SRCADDR		BIT(5)
+#define DAR_IRQSTS2_PI			BIT(4)
+#define DAR_IRQSTS2_TMRSTATUS		BIT(3)
+#define DAR_IRQSTS2_ASM_IRQ		BIT(2)
+#define DAR_IRQSTS2_PB_ERR_IRQ		BIT(1)
+#define DAR_IRQSTS2_WAKE_IRQ		BIT(0)
+
+/* IRQSTS3 bits */
+#define DAR_IRQSTS3_TMR4MSK		BIT(7)
+#define DAR_IRQSTS3_TMR3MSK		BIT(6)
+#define DAR_IRQSTS3_TMR2MSK		BIT(5)
+#define DAR_IRQSTS3_TMR1MSK		BIT(4)
+#define DAR_IRQSTS3_TMR4IRQ		BIT(3)
+#define DAR_IRQSTS3_TMR3IRQ		BIT(2)
+#define DAR_IRQSTS3_TMR2IRQ		BIT(1)
+#define DAR_IRQSTS3_TMR1IRQ		BIT(0)
+
+/* PHY_CTRL1 bits */
+#define DAR_PHY_CTRL1_TMRTRIGEN		BIT(7)
+#define DAR_PHY_CTRL1_SLOTTED		BIT(6)
+#define DAR_PHY_CTRL1_CCABFRTX		BIT(5)
+#define DAR_PHY_CTRL1_CCABFRTX_SHIFT	5
+#define DAR_PHY_CTRL1_RXACKRQD		BIT(4)
+#define DAR_PHY_CTRL1_AUTOACK		BIT(3)
+#define DAR_PHY_CTRL1_XCVSEQ_MASK	0x07
+
+/* PHY_CTRL2 bits */
+#define DAR_PHY_CTRL2_CRC_MSK		BIT(7)
+#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK	BIT(6)
+#define DAR_PHY_CTRL2_FILTERFAIL_MSK	BIT(5)
+#define DAR_PHY_CTRL2_RX_WMRK_MSK	BIT(4)
+#define DAR_PHY_CTRL2_CCAMSK		BIT(3)
+#define DAR_PHY_CTRL2_RXMSK		BIT(2)
+#define DAR_PHY_CTRL2_TXMSK		BIT(1)
+#define DAR_PHY_CTRL2_SEQMSK		BIT(0)
+
+/* PHY_CTRL3 bits */
+#define DAR_PHY_CTRL3_TMR4CMP_EN	BIT(7)
+#define DAR_PHY_CTRL3_TMR3CMP_EN	BIT(6)
+#define DAR_PHY_CTRL3_TMR2CMP_EN	BIT(5)
+#define DAR_PHY_CTRL3_TMR1CMP_EN	BIT(4)
+#define DAR_PHY_CTRL3_ASM_MSK		BIT(2)
+#define DAR_PHY_CTRL3_PB_ERR_MSK	BIT(1)
+#define DAR_PHY_CTRL3_WAKE_MSK		BIT(0)
+
+/* RX_FRM_LEN bits */
+#define DAR_RX_FRAME_LENGTH_MASK	(0x7F)
+
+/* PHY_CTRL4 bits */
+#define DAR_PHY_CTRL4_TRCV_MSK		BIT(7)
+#define DAR_PHY_CTRL4_TC3TMOUT		BIT(6)
+#define DAR_PHY_CTRL4_PANCORDNTR0	BIT(5)
+#define DAR_PHY_CTRL4_CCATYPE		(3)
+#define DAR_PHY_CTRL4_CCATYPE_SHIFT	(3)
+#define DAR_PHY_CTRL4_CCATYPE_MASK	(0x18)
+#define DAR_PHY_CTRL4_TMRLOAD		BIT(2)
+#define DAR_PHY_CTRL4_PROMISCUOUS	BIT(1)
+#define DAR_PHY_CTRL4_TC2PRIME_EN	BIT(0)
+
+/* SRC_CTRL bits */
+#define DAR_SRC_CTRL_INDEX		(0x0F)
+#define DAR_SRC_CTRL_INDEX_SHIFT	(4)
+#define DAR_SRC_CTRL_ACK_FRM_PND	BIT(3)
+#define DAR_SRC_CTRL_SRCADDR_EN		BIT(2)
+#define DAR_SRC_CTRL_INDEX_EN		BIT(1)
+#define DAR_SRC_CTRL_INDEX_DISABLE	BIT(0)
+
+/* DAR_ASM_CTRL1 bits */
+#define DAR_ASM_CTRL1_CLEAR		BIT(7)
+#define DAR_ASM_CTRL1_START		BIT(6)
+#define DAR_ASM_CTRL1_SELFTST		BIT(5)
+#define DAR_ASM_CTRL1_CTR		BIT(4)
+#define DAR_ASM_CTRL1_CBC		BIT(3)
+#define DAR_ASM_CTRL1_AES		BIT(2)
+#define DAR_ASM_CTRL1_LOAD_MAC		BIT(1)
+
+/* DAR_ASM_CTRL2 bits */
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL		(7)
+#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT	(5)
+#define DAR_ASM_CTRL2_TSTPAS			BIT(1)
+
+/* DAR_CLK_OUT_CTRL bits */
+#define DAR_CLK_OUT_CTRL_EXTEND		BIT(7)
+#define DAR_CLK_OUT_CTRL_HIZ		BIT(6)
+#define DAR_CLK_OUT_CTRL_SR		BIT(5)
+#define DAR_CLK_OUT_CTRL_DS		BIT(4)
+#define DAR_CLK_OUT_CTRL_EN		BIT(3)
+#define DAR_CLK_OUT_CTRL_DIV		(7)
+
+/* DAR_PWR_MODES bits */
+#define DAR_PWR_MODES_XTAL_READY	BIT(5)
+#define DAR_PWR_MODES_XTALEN		BIT(4)
+#define DAR_PWR_MODES_ASM_CLK_EN	BIT(3)
+#define DAR_PWR_MODES_AUTODOZE		BIT(1)
+#define DAR_PWR_MODES_PMC_MODE		BIT(0)
+
+/* RX_FRAME_FILTER bits */
+#define IAR_RX_FRAME_FLT_FRM_VER		(0xC0)
+#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT		(6)
+#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS	BIT(5)
+#define IAR_RX_FRAME_FLT_NS_FT			BIT(4)
+#define IAR_RX_FRAME_FLT_CMD_FT			BIT(3)
+#define IAR_RX_FRAME_FLT_ACK_FT			BIT(2)
+#define IAR_RX_FRAME_FLT_DATA_FT		BIT(1)
+#define IAR_RX_FRAME_FLT_BEACON_FT		BIT(0)
+
+/* DUAL_PAN_CTRL bits */
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK	(0xF0)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT	(4)
+#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK	BIT(3)
+#define IAR_DUAL_PAN_CTRL_PANCORDNTR1		BIT(2)
+#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO		BIT(1)
+#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK	BIT(0)
+
+/* DUAL_PAN_STS bits */
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN1		BIT(7)
+#define IAR_DUAL_PAN_STS_RECD_ON_PAN0		BIT(6)
+#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN	(0x3F)
+
+/* CCA_CTRL bits */
+#define IAR_CCA_CTRL_AGC_FRZ_EN			BIT(6)
+#define IAR_CCA_CTRL_CONT_RSSI_EN		BIT(5)
+#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR	BIT(4)
+#define IAR_CCA_CTRL_CCA3_AND_NOT_OR	BIT(3)
+#define IAR_CCA_CTRL_POWER_COMP_EN_LQI	BIT(2)
+#define IAR_CCA_CTRL_POWER_COMP_EN_ED	BIT(1)
+#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1	BIT(0)
+
+/* ANT_PAD_CTRL bits */
+#define IAR_ANT_PAD_CTRL_ANTX_POL	(0x0F)
+#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT	(4)
+#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE	BIT(3)
+#define IAR_ANT_PAD_CTRL_ANTX_HZ	BIT(2)
+#define IAR_ANT_PAD_CTRL_ANTX_EN	(3)
+
+/* MISC_PAD_CTRL bits */
+#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN	BIT(3)
+#define IAR_MISC_PAD_CTRL_IRQ_B_OD	BIT(2)
+#define IAR_MISC_PAD_CTRL_NON_GPIO_DS	BIT(1)
+#define IAR_MISC_PAD_CTRL_ANTX_CURR	(1)
+
+/* ANT_AGC_CTRL bits */
+#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT	(0)
+#define IAR_ANT_AGC_CTRL_FAD_EN_MASK	(1)
+#define IAR_ANT_AGC_CTRL_ANTX_SHIFT	(1)
+#define IAR_ANT_AGC_CTRL_ANTX_MASK	BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT)
+
+/* BSM_CTRL bits */
+#define BSM_CTRL_BSM_EN		(1)
+
+/* SOFT_RESET bits */
+#define IAR_SOFT_RESET_SOG_RST		BIT(7)
+#define IAR_SOFT_RESET_REGS_RST		BIT(4)
+#define IAR_SOFT_RESET_PLL_RST		BIT(3)
+#define IAR_SOFT_RESET_TX_RST		BIT(2)
+#define IAR_SOFT_RESET_RX_RST		BIT(1)
+#define IAR_SOFT_RESET_SEQ_MGR_RST	BIT(0)
+
+/* SEQ_MGR_CTRL bits */
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL		(3)
+#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT	(6)
+#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE		BIT(5)
+#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE		BIT(4)
+#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH	BIT(3)
+#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT	BIT(2)
+#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS		BIT(1)
+#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD		BIT(0)
+
+/* SEQ_MGR_STS bits */
+#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED	BIT(7)
+#define IAR_SEQ_MGR_STS_RX_MODE			BIT(6)
+#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING	BIT(5)
+#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT		BIT(4)
+#define IAR_SEQ_MGR_STS_SEQ_IDLE		BIT(3)
+#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL		(7)
+
+/* ABORT_STS bits */
+#define IAR_ABORT_STS_PLL_ABORTED	BIT(2)
+#define IAR_ABORT_STS_TC3_ABORTED	BIT(1)
+#define IAR_ABORT_STS_SW_ABORTED	BIT(0)
+
+/* IAR_FILTERFAIL_CODE2 bits */
+#define IAR_FILTERFAIL_CODE2_PAN_SEL	BIT(7)
+#define IAR_FILTERFAIL_CODE2_9_8	(3)
+
+/* PHY_STS bits */
+#define IAR_PHY_STS_PLL_UNLOCK		BIT(7)
+#define IAR_PHY_STS_PLL_LOCK_ERR	BIT(6)
+#define IAR_PHY_STS_PLL_LOCK		BIT(5)
+#define IAR_PHY_STS_CRCVALID		BIT(3)
+#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL	BIT(2)
+#define IAR_PHY_STS_SFD_DET		BIT(1)
+#define IAR_PHY_STS_PREAMBLE_DET	BIT(0)
+
+/* TESTMODE_CTRL bits */
+#define IAR_TEST_MODE_CTRL_HOT_ANT		BIT(4)
+#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN	BIT(3)
+#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN		BIT(2)
+#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN	BIT(1)
+#define IAR_TEST_MODE_CTRL_FPGA_EN		BIT(0)
+
+/* DTM_CTRL1 bits */
+#define IAR_DTM_CTRL1_ATM_LOCKED	BIT(7)
+#define IAR_DTM_CTRL1_DTM_EN		BIT(6)
+#define IAR_DTM_CTRL1_PAGE5		BIT(5)
+#define IAR_DTM_CTRL1_PAGE4		BIT(4)
+#define IAR_DTM_CTRL1_PAGE3		BIT(3)
+#define IAR_DTM_CTRL1_PAGE2		BIT(2)
+#define IAR_DTM_CTRL1_PAGE1		BIT(1)
+#define IAR_DTM_CTRL1_PAGE0		BIT(0)
+
+/* TX_MODE_CTRL */
+#define IAR_TX_MODE_CTRL_TX_INV		BIT(4)
+#define IAR_TX_MODE_CTRL_BT_EN		BIT(3)
+#define IAR_TX_MODE_CTRL_DTS2		BIT(2)
+#define IAR_TX_MODE_CTRL_DTS1		BIT(1)
+#define IAR_TX_MODE_CTRL_DTS0		BIT(0)
+
+#define TX_MODE_CTRL_DTS_MASK	(7)
+
+#endif /* _MCR20A_H */

From ccc007e4a746bb592d3e72106f00241f81d51410 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Thu, 15 Feb 2018 19:42:43 +0200
Subject: [PATCH 0247/1678] net: sched: add em_ipt ematch for calling xtables
 matches

The commit a new tc ematch for using netfilter xtable matches.

This allows early classification as well as mirroning/redirecting traffic
based on logic implemented in netfilter extensions.

Current supported use case is classification based on the incoming IPSec
state used during decpsulation using the 'policy' iptables extension
(xt_policy).

The module dynamically fetches the netfilter match module and calls
it using a fake xt_action_param structure based on validated userspace
provided parameters.

As the xt_policy match does not access skb->data, no skb modifications
are needed on match.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h             |   3 +-
 include/uapi/linux/tc_ematch/tc_em_ipt.h |  20 ++
 net/sched/Kconfig                        |  12 ++
 net/sched/Makefile                       |   1 +
 net/sched/em_ipt.c                       | 257 +++++++++++++++++++++++
 5 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/tc_ematch/tc_em_ipt.h
 create mode 100644 net/sched/em_ipt.c

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 46c506615f4a..7cafb26df555 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -555,7 +555,8 @@ enum {
 #define	TCF_EM_VLAN		6
 #define	TCF_EM_CANID		7
 #define	TCF_EM_IPSET		8
-#define	TCF_EM_MAX		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
 
 enum {
 	TCF_EM_PROG_TC
diff --git a/include/uapi/linux/tc_ematch/tc_em_ipt.h b/include/uapi/linux/tc_ematch/tc_em_ipt.h
new file mode 100644
index 000000000000..49a65530992c
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/tc_em_ipt.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_TC_EM_IPT_H
+#define __LINUX_TC_EM_IPT_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+enum {
+	TCA_EM_IPT_UNSPEC,
+	TCA_EM_IPT_HOOK,
+	TCA_EM_IPT_MATCH_NAME,
+	TCA_EM_IPT_MATCH_REVISION,
+	TCA_EM_IPT_NFPROTO,
+	TCA_EM_IPT_MATCH_DATA,
+	__TCA_EM_IPT_MAX
+};
+
+#define TCA_EM_IPT_MAX (__TCA_EM_IPT_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f24a6ae6819a..a01169fb5325 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -658,6 +658,18 @@ config NET_EMATCH_IPSET
 	  To compile this code as a module, choose M here: the
 	  module will be called em_ipset.
 
+config NET_EMATCH_IPT
+	tristate "IPtables Matches"
+	depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
+	---help---
+	  Say Y here to be able to classify packets based on iptables
+	  matches.
+	  Current supported match is "policy" which allows packet classification
+	  based on IPsec policy that was used during decapsulation
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_ipt.
+
 config NET_CLS_ACT
 	bool "Actions"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 5b635447e3f8..8811d3804878 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -75,3 +75,4 @@ obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
 obj-$(CONFIG_NET_EMATCH_TEXT)	+= em_text.o
 obj-$(CONFIG_NET_EMATCH_CANID)	+= em_canid.o
 obj-$(CONFIG_NET_EMATCH_IPSET)	+= em_ipset.o
+obj-$(CONFIG_NET_EMATCH_IPT)	+= em_ipt.o
diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
new file mode 100644
index 000000000000..a5f34e930eff
--- /dev/null
+++ b/net/sched/em_ipt.c
@@ -0,0 +1,257 @@
+/*
+ * net/sched/em_ipt.c IPtables matches Ematch
+ *
+ * (c) 2018 Eyal Birger <eyal.birger@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_ipt.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/pkt_cls.h>
+
+struct em_ipt_match {
+	const struct xt_match *match;
+	u32 hook;
+	u8 match_data[0] __aligned(8);
+};
+
+struct em_ipt_xt_match {
+	char *match_name;
+	int (*validate_match_data)(struct nlattr **tb, u8 mrev);
+};
+
+static const struct nla_policy em_ipt_policy[TCA_EM_IPT_MAX + 1] = {
+	[TCA_EM_IPT_MATCH_NAME]		= { .type = NLA_STRING,
+					    .len = XT_EXTENSION_MAXNAMELEN },
+	[TCA_EM_IPT_MATCH_REVISION]	= { .type = NLA_U8 },
+	[TCA_EM_IPT_HOOK]		= { .type = NLA_U32 },
+	[TCA_EM_IPT_NFPROTO]		= { .type = NLA_U8 },
+	[TCA_EM_IPT_MATCH_DATA]		= { .type = NLA_UNSPEC },
+};
+
+static int check_match(struct net *net, struct em_ipt_match *im, int mdata_len)
+{
+	struct xt_mtchk_param mtpar = {};
+	union {
+		struct ipt_entry e4;
+		struct ip6t_entry e6;
+	} e = {};
+
+	mtpar.net	= net;
+	mtpar.table	= "filter";
+	mtpar.hook_mask	= 1 << im->hook;
+	mtpar.family	= im->match->family;
+	mtpar.match	= im->match;
+	mtpar.entryinfo = &e;
+	mtpar.matchinfo	= (void *)im->match_data;
+	return xt_check_match(&mtpar, mdata_len, 0, 0);
+}
+
+static int policy_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+	if (mrev != 0) {
+		pr_err("only policy match revision 0 supported");
+		return -EINVAL;
+	}
+
+	if (nla_get_u32(tb[TCA_EM_IPT_HOOK]) != NF_INET_PRE_ROUTING) {
+		pr_err("policy can only be matched on NF_INET_PRE_ROUTING");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
+	{
+		.match_name = "policy",
+		.validate_match_data = policy_validate_match_data
+	},
+	{}
+};
+
+static struct xt_match *get_xt_match(struct nlattr **tb)
+{
+	const struct em_ipt_xt_match *m;
+	struct nlattr *mname_attr;
+	u8 nfproto, mrev = 0;
+	int ret;
+
+	mname_attr = tb[TCA_EM_IPT_MATCH_NAME];
+	for (m = em_ipt_xt_matches; m->match_name; m++) {
+		if (!nla_strcmp(mname_attr, m->match_name))
+			break;
+	}
+
+	if (!m->match_name) {
+		pr_err("Unsupported xt match");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (tb[TCA_EM_IPT_MATCH_REVISION])
+		mrev = nla_get_u8(tb[TCA_EM_IPT_MATCH_REVISION]);
+
+	ret = m->validate_match_data(tb, mrev);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+	return xt_request_find_match(nfproto, m->match_name, mrev);
+}
+
+static int em_ipt_change(struct net *net, void *data, int data_len,
+			 struct tcf_ematch *em)
+{
+	struct nlattr *tb[TCA_EM_IPT_MAX + 1];
+	struct em_ipt_match *im = NULL;
+	struct xt_match *match;
+	int mdata_len, ret;
+
+	ret = nla_parse(tb, TCA_EM_IPT_MAX, data, data_len, em_ipt_policy,
+			NULL);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_EM_IPT_HOOK] || !tb[TCA_EM_IPT_MATCH_NAME] ||
+	    !tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
+		return -EINVAL;
+
+	match = get_xt_match(tb);
+	if (IS_ERR(match)) {
+		pr_err("unable to load match\n");
+		return PTR_ERR(match);
+	}
+
+	mdata_len = XT_ALIGN(nla_len(tb[TCA_EM_IPT_MATCH_DATA]));
+	im = kzalloc(sizeof(*im) + mdata_len, GFP_KERNEL);
+	if (!im) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	im->match = match;
+	im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+	nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
+
+	ret = check_match(net, im, mdata_len);
+	if (ret)
+		goto err;
+
+	em->datalen = sizeof(*im) + mdata_len;
+	em->data = (unsigned long)im;
+	return 0;
+
+err:
+	kfree(im);
+	module_put(match->me);
+	return ret;
+}
+
+static void em_ipt_destroy(struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (!im)
+		return;
+
+	if (im->match->destroy) {
+		struct xt_mtdtor_param par = {
+			.net = em->net,
+			.match = im->match,
+			.matchinfo = im->match_data,
+			.family = im->match->family
+		};
+		im->match->destroy(&par);
+	}
+	module_put(im->match->me);
+	kfree((void *)im);
+}
+
+static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	const struct em_ipt_match *im = (const void *)em->data;
+	struct xt_action_param acpar = {};
+	struct net_device *indev = NULL;
+	struct nf_hook_state state;
+	int ret;
+
+	rcu_read_lock();
+
+	if (skb->skb_iif)
+		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+
+	nf_hook_state_init(&state, im->hook, im->match->family,
+			   indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
+
+	acpar.match = im->match;
+	acpar.matchinfo = im->match_data;
+	acpar.state = &state;
+
+	ret = im->match->match(skb, &acpar);
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static int em_ipt_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct em_ipt_match *im = (void *)em->data;
+
+	if (nla_put_string(skb, TCA_EM_IPT_MATCH_NAME, im->match->name) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u32(skb, TCA_EM_IPT_HOOK, im->hook) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+		return -EMSGSIZE;
+	if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
+		    im->match->usersize ?: im->match->matchsize,
+		    im->match_data) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_ipt_ops = {
+	.kind	  = TCF_EM_IPT,
+	.change	  = em_ipt_change,
+	.destroy  = em_ipt_destroy,
+	.match	  = em_ipt_match,
+	.dump	  = em_ipt_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_ipt_ops.link)
+};
+
+static int __init init_em_ipt(void)
+{
+	return tcf_em_register(&em_ipt_ops);
+}
+
+static void __exit exit_em_ipt(void)
+{
+	tcf_em_unregister(&em_ipt_ops);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eyal Birger <eyal.birger@gmail.com>");
+MODULE_DESCRIPTION("TC extended match for IPtables matches");
+
+module_init(init_em_ipt);
+module_exit(exit_em_ipt);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_IPT);

From 4be83e5aa21e667b0c25e2e87e5bb9bf260d870d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Feb 2018 07:30:16 +0100
Subject: [PATCH 0248/1678] r8169: remove not needed PHY soft reset in
 rtl8168e_2_hw_phy_config

rtl8169_init_phy() resets the PHY anyway after applying the chip-specific
PHY configuration. So we don't need to soft-reset the PHY as part of the
chip-specific configuration.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 2f3e3ae08664..c16b97a56d9f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -3800,8 +3800,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x1f, 0x0005);
 	rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
 	rtl_writephy(tp, 0x1f, 0x0000);
-	/* soft-reset phy */
-	rtl_writephy(tp, MII_BMCR, BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
 
 	/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
 	rtl_rar_exgmac_set(tp, tp->dev->dev_addr);

From 73219de29486637c82925e963380bda020e4edfb Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sun, 18 Feb 2018 21:39:17 -0500
Subject: [PATCH 0249/1678] net/8390: Remove redundant make dependencies

The hydra, zorro8390 and mcf8390 drivers all #include "lib8390.c" and
have no need for 8390.o. modinfo confirms no dependency on 8390.ko.
Drop the redundant dependency from the Makefile. objdump confirms
that this patch has no effect on the module binaries.

The superfluous additions of 8390.o were introduced in
commit 644570b83026 ("8390: Move the 8390 related drivers").

Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/8390/Makefile b/drivers/net/ethernet/8390/Makefile
index f975c2fc88a3..1d650e66cc6e 100644
--- a/drivers/net/ethernet/8390/Makefile
+++ b/drivers/net/ethernet/8390/Makefile
@@ -7,8 +7,8 @@ obj-$(CONFIG_MAC8390) += mac8390.o
 obj-$(CONFIG_APNE) += apne.o 8390.o
 obj-$(CONFIG_ARM_ETHERH) += etherh.o
 obj-$(CONFIG_AX88796) += ax88796.o
-obj-$(CONFIG_HYDRA) += hydra.o 8390.o
-obj-$(CONFIG_MCF8390) += mcf8390.o 8390.o
+obj-$(CONFIG_HYDRA) += hydra.o
+obj-$(CONFIG_MCF8390) += mcf8390.o
 obj-$(CONFIG_NE2000) += ne.o 8390p.o
 obj-$(CONFIG_NE2K_PCI) += ne2k-pci.o 8390.o
 obj-$(CONFIG_PCMCIA_AXNET) += axnet_cs.o 8390.o
@@ -16,4 +16,4 @@ obj-$(CONFIG_PCMCIA_PCNET) += pcnet_cs.o 8390.o
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
 obj-$(CONFIG_ULTRA) += smc-ultra.o 8390.o
 obj-$(CONFIG_WD80x3) += wd.o 8390.o
-obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o
+obj-$(CONFIG_ZORRO8390) += zorro8390.o

From 646fe03b0d5fb678f0901e25eb184ae8860744fa Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sun, 18 Feb 2018 21:39:17 -0500
Subject: [PATCH 0250/1678] net/8390: Fix msg_enable patch snafu

The lib8390 module parameter 'msg_enable' doesn't do anything useful:
it causes an ancient version string to be logged.

Remove redundant code that logs the same string.

In ne.c and wd.c, the value of ei_local->msg_enable is used before
being assigned. Use ne_msg_enable and wd_msg_enable, respectively.

Most of the other 8390 drivers never assign ei_local->msg_enable.
Use the 'msg_enable' module parameter from lib8390 as the default
value.

Eliminate the pointless static and local variables.

Clean up an indentation mistake.

All of these issues originated from the same patch.

Cc: Russell King <linux@armlinux.org.uk>
Fixes: c45f812f0280 ("8390 : Replace ei_debug with msg_enable/NETIF_MSG_* feature")
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/ax88796.c   |  3 ---
 drivers/net/ethernet/8390/axnet_cs.c  |  2 --
 drivers/net/ethernet/8390/etherh.c    | 17 -----------------
 drivers/net/ethernet/8390/hydra.c     |  4 ----
 drivers/net/ethernet/8390/lib8390.c   |  2 ++
 drivers/net/ethernet/8390/mac8390.c   |  8 --------
 drivers/net/ethernet/8390/mcf8390.c   |  4 ----
 drivers/net/ethernet/8390/ne.c        |  2 +-
 drivers/net/ethernet/8390/pcnet_cs.c  |  4 ----
 drivers/net/ethernet/8390/wd.c        |  2 +-
 drivers/net/ethernet/8390/zorro8390.c |  5 -----
 11 files changed, 4 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 245554707163..da61cf3cb3a9 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -77,8 +77,6 @@ static unsigned char version[] = "ax88796.c: Copyright 2005,2007 Simtec Electron
 
 #define AX_GPOC_PPDSET	BIT(6)
 
-static u32 ax_msg_enable;
-
 /* device private data */
 
 struct ax_device {
@@ -747,7 +745,6 @@ static int ax_init_dev(struct net_device *dev)
 	ei_local->block_output = &ax_block_output;
 	ei_local->get_8390_hdr = &ax_get_8390_hdr;
 	ei_local->priv = 0;
-	ei_local->msg_enable = ax_msg_enable;
 
 	dev->netdev_ops = &ax_netdev_ops;
 	dev->ethtool_ops = &ax_ethtool_ops;
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 7bddb8efb6d5..d422a124cd7c 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -104,7 +104,6 @@ static void AX88190_init(struct net_device *dev, int startp);
 static int ax_open(struct net_device *dev);
 static int ax_close(struct net_device *dev);
 static irqreturn_t ax_interrupt(int irq, void *dev_id);
-static u32 axnet_msg_enable;
 
 /*====================================================================*/
 
@@ -151,7 +150,6 @@ static int axnet_probe(struct pcmcia_device *link)
 	return -ENOMEM;
 
     ei_local = netdev_priv(dev);
-    ei_local->msg_enable = axnet_msg_enable;
     spin_lock_init(&ei_local->page_lock);
 
     info = PRIV(dev);
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index 11cbf22ad201..32e9627e3880 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -64,8 +64,6 @@ static char version[] =
 
 #include "lib8390.c"
 
-static u32 etherh_msg_enable;
-
 struct etherh_priv {
 	void __iomem	*ioc_fast;
 	void __iomem	*memc;
@@ -501,18 +499,6 @@ etherh_close(struct net_device *dev)
 	return 0;
 }
 
-/*
- * Initialisation
- */
-
-static void __init etherh_banner(void)
-{
-	static int version_printed;
-
-	if ((etherh_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0))
-		pr_info("%s", version);
-}
-
 /*
  * Read the ethernet address string from the on board rom.
  * This is an ascii string...
@@ -671,8 +657,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 	struct etherh_priv *eh;
 	int ret;
 
-	etherh_banner();
-
 	ret = ecard_request_resources(ec);
 	if (ret)
 		goto out;
@@ -757,7 +741,6 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id)
 	ei_local->block_output  = etherh_block_output;
 	ei_local->get_8390_hdr  = etherh_get_header;
 	ei_local->interface_num = 0;
-	ei_local->msg_enable = etherh_msg_enable;
 
 	etherh_reset(dev);
 	__NS8390_init(dev, 0);
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 8ae249195301..941754ea78ec 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -66,7 +66,6 @@ static void hydra_block_input(struct net_device *dev, int count,
 static void hydra_block_output(struct net_device *dev, int count,
 			       const unsigned char *buf, int start_page);
 static void hydra_remove_one(struct zorro_dev *z);
-static u32 hydra_msg_enable;
 
 static struct zorro_device_id hydra_zorro_tbl[] = {
     { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
@@ -119,7 +118,6 @@ static int hydra_init(struct zorro_dev *z)
     int start_page, stop_page;
     int j;
     int err;
-    struct ei_device *ei_local;
 
     static u32 hydra_offsets[16] = {
 	0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
@@ -138,8 +136,6 @@ static int hydra_init(struct zorro_dev *z)
     start_page = NESM_START_PG;
     stop_page = NESM_STOP_PG;
 
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = hydra_msg_enable;
     dev->base_addr = ioaddr;
     dev->irq = IRQ_AMIGA_PORTS;
 
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index 60f8e2c8e726..5d9bbde9fe68 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -975,6 +975,8 @@ static void ethdev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	spin_lock_init(&ei_local->page_lock);
+
+	ei_local->msg_enable = msg_enable;
 }
 
 /**
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 2f91ce8dc614..abe50338b9f7 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -168,7 +168,6 @@ static void slow_sane_block_output(struct net_device *dev, int count,
 				   const unsigned char *buf, int start_page);
 static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
 static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
-static u32 mac8390_msg_enable;
 
 static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
 {
@@ -299,8 +298,6 @@ static bool __init mac8390_init(struct net_device *dev,
 	int offset;
 	volatile unsigned short *i;
 
-	printk_once(KERN_INFO pr_fmt("%s"), version);
-
 	dev->irq = SLOT2IRQ(ndev->board->slot);
 	/* This is getting to be a habit */
 	dev->base_addr = (ndev->board->slot_addr |
@@ -398,8 +395,6 @@ struct net_device * __init mac8390_probe(int unit)
 	struct net_device *dev;
 	struct nubus_rsrc *ndev = NULL;
 	int err = -ENODEV;
-	struct ei_device *ei_local;
-
 	static unsigned int slots;
 
 	enum mac8390_type cardtype;
@@ -441,9 +436,6 @@ struct net_device * __init mac8390_probe(int unit)
 	if (!ndev)
 		goto out;
 
-	 ei_local = netdev_priv(dev);
-	 ei_local->msg_enable = mac8390_msg_enable;
-
 	err = register_netdev(dev);
 	if (err)
 		goto out;
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index 4bb967bc879e..4ad8031ab669 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -38,7 +38,6 @@ static const char version[] =
 
 #define NESM_START_PG	0x40	/* First page of TX buffer */
 #define NESM_STOP_PG	0x80	/* Last page +1 of RX ring */
-static u32 mcf8390_msg_enable;
 
 #ifdef NE2000_ODDOFFSET
 /*
@@ -407,7 +406,6 @@ static int mcf8390_init(struct net_device *dev)
 static int mcf8390_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	struct ei_device *ei_local;
 	struct resource *mem, *irq;
 	resource_size_t msize;
 	int ret;
@@ -435,8 +433,6 @@ static int mcf8390_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
-	ei_local = netdev_priv(dev);
-	ei_local->msg_enable = mcf8390_msg_enable;
 
 	dev->irq = irq->start;
 	dev->base_addr = mem->start;
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 66f47987e2a2..4cdff6e6af89 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -485,7 +485,7 @@ static int __init ne_probe1(struct net_device *dev, unsigned long ioaddr)
 		mdelay(10);		/* wait 10ms for interrupt to propagate */
 		outb_p(0x00, ioaddr + EN0_IMR); 		/* Mask it again. */
 		dev->irq = probe_irq_off(cookie);
-		if (netif_msg_probe(ei_local))
+		if (ne_msg_enable & NETIF_MSG_PROBE)
 			pr_cont(" autoirq is %d", dev->irq);
 	} else if (dev->irq == 2)
 		/* Fixup for users that don't know that IRQ 2 is really IRQ 9,
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index bcad4a7fac9f..61e43802b9a5 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -66,7 +66,6 @@
 #define PCNET_RDC_TIMEOUT (2*HZ/100)	/* Max wait in jiffies for Tx RDC */
 
 static const char *if_names[] = { "auto", "10baseT", "10base2"};
-static u32 pcnet_msg_enable;
 
 /*====================================================================*/
 
@@ -556,7 +555,6 @@ static int pcnet_config(struct pcmcia_device *link)
     int start_pg, stop_pg, cm_offset;
     int has_shmem = 0;
     struct hw_info *local_hw_info;
-    struct ei_device *ei_local;
 
     dev_dbg(&link->dev, "pcnet_config\n");
 
@@ -606,8 +604,6 @@ static int pcnet_config(struct pcmcia_device *link)
 	mii_phy_probe(dev);
 
     SET_NETDEV_DEV(dev, &link->dev);
-    ei_local = netdev_priv(dev);
-    ei_local->msg_enable = pcnet_msg_enable;
 
     if (register_netdev(dev) != 0) {
 	pr_notice("register_netdev() failed\n");
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index 6efa2722f850..fb17c2c7e1dd 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -299,7 +299,7 @@ static int __init wd_probe1(struct net_device *dev, int ioaddr)
 
 			outb_p(0x00, nic_addr+EN0_IMR);	/* Mask all intrs. again. */
 
-			if (netif_msg_drv(ei_local))
+			if (wd_msg_enable & NETIF_MSG_PROBE)
 				pr_cont(" autoirq is %d", dev->irq);
 			if (dev->irq < 2)
 				dev->irq = word16 ? 10 : 5;
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index 6d93956b293b..35a500a21521 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -44,8 +44,6 @@
 static const char version[] =
 	"8390.c:v1.10cvs 9/23/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
 
-static u32 zorro8390_msg_enable;
-
 #include "lib8390.c"
 
 #define DRV_NAME	"zorro8390"
@@ -296,7 +294,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	int err;
 	unsigned char SA_prom[32];
 	int start_page, stop_page;
-	struct ei_device *ei_local = netdev_priv(dev);
 	static u32 zorro8390_offsets[16] = {
 		0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
 		0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
@@ -388,8 +385,6 @@ static int zorro8390_init(struct net_device *dev, unsigned long board,
 	dev->netdev_ops = &zorro8390_netdev_ops;
 	__NS8390_init(dev, 0);
 
-	ei_local->msg_enable = zorro8390_msg_enable;
-
 	err = register_netdev(dev);
 	if (err) {
 		free_irq(IRQ_AMIGA_PORTS, dev);

From 494a973e22954249d35152cce1dcfba6d10c52e4 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sun, 18 Feb 2018 21:39:17 -0500
Subject: [PATCH 0251/1678] net/mac8390: Convert to nubus_driver

This resolves an old bug that constrained this driver to no more than
one card.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Space.c                 |   3 -
 drivers/net/ethernet/8390/mac8390.c | 139 ++++++++++++++--------------
 include/net/Space.h                 |   1 -
 3 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 11fe71278f40..64333ec999ac 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -114,9 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #ifdef CONFIG_MVME147_NET	/* MVME147 internal Ethernet */
 	{mvme147lance_probe, 0},
 #endif
-#ifdef CONFIG_MAC8390           /* NuBus NS8390-based cards */
-	{mac8390_probe, 0},
-#endif
 #ifdef CONFIG_MAC89x0
 	{mac89x0_probe, 0},
 #endif
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index abe50338b9f7..8042dd73eb6a 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -123,8 +123,7 @@ enum mac8390_access {
 };
 
 extern int mac8390_memtest(struct net_device *dev);
-static int mac8390_initdev(struct net_device *dev,
-			   struct nubus_rsrc *ndev,
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
 			   enum mac8390_type type);
 
 static int mac8390_open(struct net_device *dev);
@@ -169,7 +168,7 @@ static void slow_sane_block_output(struct net_device *dev, int count,
 static void word_memcpy_tocard(unsigned long tp, const void *fp, int count);
 static void word_memcpy_fromcard(void *tp, unsigned long fp, int count);
 
-static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
+static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres)
 {
 	switch (fres->dr_sw) {
 	case NUBUS_DRSW_3COM:
@@ -235,7 +234,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres)
 	return MAC8390_NONE;
 }
 
-static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
+static enum mac8390_access mac8390_testio(unsigned long membase)
 {
 	unsigned long outdata = 0xA5A0B5B0;
 	unsigned long indata =  0x00000000;
@@ -253,7 +252,7 @@ static enum mac8390_access __init mac8390_testio(volatile unsigned long membase)
 	return ACCESS_UNKNOWN;
 }
 
-static int __init mac8390_memsize(unsigned long membase)
+static int mac8390_memsize(unsigned long membase)
 {
 	unsigned long flags;
 	int i, j;
@@ -289,28 +288,28 @@ static int __init mac8390_memsize(unsigned long membase)
 	return i * 0x1000;
 }
 
-static bool __init mac8390_init(struct net_device *dev,
-				struct nubus_rsrc *ndev,
-				enum mac8390_type cardtype)
+static bool mac8390_rsrc_init(struct net_device *dev,
+			      struct nubus_rsrc *fres,
+			      enum mac8390_type cardtype)
 {
+	struct nubus_board *board = fres->board;
 	struct nubus_dir dir;
 	struct nubus_dirent ent;
 	int offset;
 	volatile unsigned short *i;
 
-	dev->irq = SLOT2IRQ(ndev->board->slot);
+	dev->irq = SLOT2IRQ(board->slot);
 	/* This is getting to be a habit */
-	dev->base_addr = (ndev->board->slot_addr |
-			  ((ndev->board->slot & 0xf) << 20));
+	dev->base_addr = board->slot_addr | ((board->slot & 0xf) << 20);
 
 	/*
 	 * Get some Nubus info - we will trust the card's idea
 	 * of where its memory and registers are.
 	 */
 
-	if (nubus_get_func_dir(ndev, &dir) == -1) {
+	if (nubus_get_func_dir(fres, &dir) == -1) {
 		pr_err("%s: Unable to get Nubus functional directory for slot %X!\n",
-		       dev->name, ndev->board->slot);
+		       dev->name, board->slot);
 		return false;
 	}
 
@@ -327,7 +326,7 @@ static bool __init mac8390_init(struct net_device *dev,
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
 				    &ent) == -1) {
 			pr_err("%s: Memory offset resource for slot %X not found!\n",
-			       dev->name, ndev->board->slot);
+			       dev->name, board->slot);
 			return false;
 		}
 		nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -338,7 +337,7 @@ static bool __init mac8390_init(struct net_device *dev,
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
 				    &ent) == -1) {
 			pr_info("%s: Memory length resource for slot %X not found, probing\n",
-				dev->name, ndev->board->slot);
+				dev->name, board->slot);
 			offset = mac8390_memsize(dev->mem_start);
 		} else {
 			nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -348,25 +347,25 @@ static bool __init mac8390_init(struct net_device *dev,
 		switch (cardtype) {
 		case MAC8390_KINETICS:
 		case MAC8390_DAYNA: /* it's the same */
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       DAYNA_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       DAYNA_8390_MEM);
 			dev->mem_end = dev->mem_start +
 				       mac8390_memsize(dev->mem_start);
 			break;
 		case MAC8390_INTERLAN:
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       INTERLAN_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       INTERLAN_8390_MEM);
 			dev->mem_end = dev->mem_start +
 				       mac8390_memsize(dev->mem_start);
 			break;
 		case MAC8390_CABLETRON:
-			dev->base_addr = (int)(ndev->board->slot_addr +
+			dev->base_addr = (int)(board->slot_addr +
 					       CABLETRON_8390_BASE);
-			dev->mem_start = (int)(ndev->board->slot_addr +
+			dev->mem_start = (int)(board->slot_addr +
 					       CABLETRON_8390_MEM);
 			/* The base address is unreadable if 0x00
 			 * has been written to the command register
@@ -382,7 +381,7 @@ static bool __init mac8390_init(struct net_device *dev,
 
 		default:
 			pr_err("Card type %s is unsupported, sorry\n",
-			       ndev->board->name);
+			       board->name);
 			return false;
 		}
 	}
@@ -390,86 +389,83 @@ static bool __init mac8390_init(struct net_device *dev,
 	return true;
 }
 
-struct net_device * __init mac8390_probe(int unit)
+static int mac8390_device_probe(struct nubus_board *board)
 {
 	struct net_device *dev;
-	struct nubus_rsrc *ndev = NULL;
 	int err = -ENODEV;
-	static unsigned int slots;
-
-	enum mac8390_type cardtype;
-
-	/* probably should check for Nubus instead */
-
-	if (!MACH_IS_MAC)
-		return ERR_PTR(-ENODEV);
+	struct nubus_rsrc *fres;
+	enum mac8390_type cardtype = MAC8390_NONE;
 
 	dev = ____alloc_ei_netdev(0);
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	if (unit >= 0)
-		sprintf(dev->name, "eth%d", unit);
+	SET_NETDEV_DEV(dev, &board->dev);
 
-	for_each_func_rsrc(ndev) {
-		if (ndev->category != NUBUS_CAT_NETWORK ||
-		    ndev->type != NUBUS_TYPE_ETHERNET)
+	for_each_board_func_rsrc(board, fres) {
+		if (fres->category != NUBUS_CAT_NETWORK ||
+		    fres->type != NUBUS_TYPE_ETHERNET)
 			continue;
 
-		/* Have we seen it already? */
-		if (slots & (1 << ndev->board->slot))
-			continue;
-		slots |= 1 << ndev->board->slot;
-
-		cardtype = mac8390_ident(ndev);
+		cardtype = mac8390_ident(fres);
 		if (cardtype == MAC8390_NONE)
 			continue;
 
-		if (!mac8390_init(dev, ndev, cardtype))
-			continue;
-
-		/* Do the nasty 8390 stuff */
-		if (!mac8390_initdev(dev, ndev, cardtype))
+		if (mac8390_rsrc_init(dev, fres, cardtype))
 			break;
 	}
+	if (!fres)
+		goto out;
 
-	if (!ndev)
+	err = mac8390_initdev(dev, board, cardtype);
+	if (err)
 		goto out;
 
 	err = register_netdev(dev);
 	if (err)
 		goto out;
-	return dev;
+
+	nubus_set_drvdata(board, dev);
+	return 0;
 
 out:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
 }
 
-#ifdef MODULE
+static int mac8390_device_remove(struct nubus_board *board)
+{
+	struct net_device *dev = nubus_get_drvdata(board);
+
+	unregister_netdev(dev);
+	free_netdev(dev);
+	return 0;
+}
+
+static struct nubus_driver mac8390_driver = {
+	.probe = mac8390_device_probe,
+	.remove = mac8390_device_remove,
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	}
+};
+
 MODULE_AUTHOR("David Huggins-Daines <dhd@debian.org> and others");
 MODULE_DESCRIPTION("Macintosh NS8390-based Nubus Ethernet driver");
 MODULE_LICENSE("GPL");
 
-static struct net_device *dev_mac8390;
-
-int __init init_module(void)
+static int __init mac8390_init(void)
 {
-	dev_mac8390 = mac8390_probe(-1);
-	if (IS_ERR(dev_mac8390)) {
-		pr_warn("mac8390: No card found\n");
-		return PTR_ERR(dev_mac8390);
-	}
-	return 0;
+	return nubus_driver_register(&mac8390_driver);
 }
+module_init(mac8390_init);
 
-void __exit cleanup_module(void)
+static void __exit mac8390_exit(void)
 {
-	unregister_netdev(dev_mac8390);
-	free_netdev(dev_mac8390);
+	nubus_driver_unregister(&mac8390_driver);
 }
-
-#endif /* MODULE */
+module_exit(mac8390_exit);
 
 static const struct net_device_ops mac8390_netdev_ops = {
 	.ndo_open 		= mac8390_open,
@@ -485,9 +481,8 @@ static const struct net_device_ops mac8390_netdev_ops = {
 #endif
 };
 
-static int __init mac8390_initdev(struct net_device *dev,
-				  struct nubus_rsrc *ndev,
-				  enum mac8390_type type)
+static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
+			   enum mac8390_type type)
 {
 	static u32 fwrd4_offsets[16] = {
 		0,      4,      8,      12,
@@ -605,7 +600,7 @@ static int __init mac8390_initdev(struct net_device *dev,
 
 	default:
 		pr_err("Card type %s is unsupported, sorry\n",
-		       ndev->board->name);
+		       board->name);
 		return -ENODEV;
 	}
 
@@ -613,7 +608,7 @@ static int __init mac8390_initdev(struct net_device *dev,
 
 	/* Good, done, now spit out some messages */
 	pr_info("%s: %s in slot %X (type %s)\n",
-		dev->name, ndev->board->name, ndev->board->slot,
+		dev->name, board->name, board->slot,
 		cardname[type]);
 	pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
 		dev->dev_addr, dev->irq,
diff --git a/include/net/Space.h b/include/net/Space.h
index 27fb5c937c4f..336da258885a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac8390_probe(int unit);
 struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);

From 4a1b27c9e32c3993bb2c9efd8776117ca7eefa78 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sun, 18 Feb 2018 21:39:17 -0500
Subject: [PATCH 0252/1678] net/mac8390: Fix log messages

Use dev_foo() to log the slot number instead of the unexpanded "eth%d"
format string.
Disambiguate the two identical "Card type %s is unsupported" messages.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/mac8390.c | 36 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index 8042dd73eb6a..b6d735bf8011 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -308,14 +308,14 @@ static bool mac8390_rsrc_init(struct net_device *dev,
 	 */
 
 	if (nubus_get_func_dir(fres, &dir) == -1) {
-		pr_err("%s: Unable to get Nubus functional directory for slot %X!\n",
-		       dev->name, board->slot);
+		dev_err(&board->dev,
+			"Unable to get Nubus functional directory\n");
 		return false;
 	}
 
 	/* Get the MAC address */
 	if (nubus_find_rsrc(&dir, NUBUS_RESID_MAC_ADDRESS, &ent) == -1) {
-		pr_info("%s: Couldn't get MAC address!\n", dev->name);
+		dev_info(&board->dev, "MAC address resource not found\n");
 		return false;
 	}
 
@@ -325,8 +325,8 @@ static bool mac8390_rsrc_init(struct net_device *dev,
 		nubus_rewinddir(&dir);
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_BASEOS,
 				    &ent) == -1) {
-			pr_err("%s: Memory offset resource for slot %X not found!\n",
-			       dev->name, board->slot);
+			dev_err(&board->dev,
+				"Memory offset resource not found\n");
 			return false;
 		}
 		nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -336,8 +336,8 @@ static bool mac8390_rsrc_init(struct net_device *dev,
 		nubus_rewinddir(&dir);
 		if (nubus_find_rsrc(&dir, NUBUS_RESID_MINOR_LENGTH,
 				    &ent) == -1) {
-			pr_info("%s: Memory length resource for slot %X not found, probing\n",
-				dev->name, board->slot);
+			dev_info(&board->dev,
+				 "Memory length resource not found, probing\n");
 			offset = mac8390_memsize(dev->mem_start);
 		} else {
 			nubus_get_rsrc_mem(&offset, &ent, 4);
@@ -380,8 +380,8 @@ static bool mac8390_rsrc_init(struct net_device *dev,
 			break;
 
 		default:
-			pr_err("Card type %s is unsupported, sorry\n",
-			       board->name);
+			dev_err(&board->dev,
+				"No known base address for card type\n");
 			return false;
 		}
 	}
@@ -533,7 +533,8 @@ static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
 	case MAC8390_APPLE:
 		switch (mac8390_testio(dev->mem_start)) {
 		case ACCESS_UNKNOWN:
-			pr_err("Don't know how to access card memory!\n");
+			dev_err(&board->dev,
+				"Don't know how to access card memory\n");
 			return -ENODEV;
 
 		case ACCESS_16:
@@ -599,21 +600,18 @@ static int mac8390_initdev(struct net_device *dev, struct nubus_board *board,
 		break;
 
 	default:
-		pr_err("Card type %s is unsupported, sorry\n",
-		       board->name);
+		dev_err(&board->dev, "Unsupported card type\n");
 		return -ENODEV;
 	}
 
 	__NS8390_init(dev, 0);
 
 	/* Good, done, now spit out some messages */
-	pr_info("%s: %s in slot %X (type %s)\n",
-		dev->name, board->name, board->slot,
-		cardname[type]);
-	pr_info("MAC %pM IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
-		dev->dev_addr, dev->irq,
-		(unsigned int)(dev->mem_end - dev->mem_start) >> 10,
-		dev->mem_start, access_bitmode ? 32 : 16);
+	dev_info(&board->dev, "%s (type %s)\n", board->name, cardname[type]);
+	dev_info(&board->dev, "MAC %pM, IRQ %d, %d KB shared memory at %#lx, %d-bit access.\n",
+		 dev->dev_addr, dev->irq,
+		 (unsigned int)(dev->mem_end - dev->mem_start) >> 10,
+		 dev->mem_start, access_bitmode ? 32 : 16);
 	return 0;
 }
 

From f905311356ecb9c88aceeb4fa63ee0a244fc2d72 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 19 Feb 2018 12:10:20 -0600
Subject: [PATCH 0253/1678] rds: send: mark expected switch fall-through in
 rds_rm_size

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Addresses-Coverity-ID: 1465362 ("Missing break in switch")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by:  Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/rds/send.c b/net/rds/send.c
index 028ab598ac1b..79d158b3def0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -902,6 +902,8 @@ static int rds_rm_size(struct msghdr *msg, int num_sgs)
 
 		case RDS_CMSG_ZCOPY_COOKIE:
 			zcopy_cookie = true;
+			/* fall through */
+
 		case RDS_CMSG_RDMA_DEST:
 		case RDS_CMSG_RDMA_MAP:
 			cmsg_groups |= 2;

From 8862541de781329b3e1e553bd936cecf998dc565 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Mon, 19 Feb 2018 13:30:05 -0600
Subject: [PATCH 0254/1678] ibmvnic: Rename active queue count variables

Rename the tx/rx active pool variables to be tx/rx active scrq
counts. The tx/rx pools are per sub-crq so this is a more appropriate
name. This also is a preparatory step for using thiese variables
for handling updates to sub-crqs and napi based on the active
count.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++--------
 drivers/net/ethernet/ibm/ibmvnic.h |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 64770a1c406d..f1166efd8eb7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -461,7 +461,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 	if (!adapter->rx_pool)
 		return;
 
-	for (i = 0; i < adapter->num_active_rx_pools; i++) {
+	for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
 		netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
@@ -484,7 +484,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->rx_pool);
 	adapter->rx_pool = NULL;
-	adapter->num_active_rx_pools = 0;
+	adapter->num_active_rx_scrqs = 0;
 }
 
 static int init_rx_pools(struct net_device *netdev)
@@ -509,7 +509,7 @@ static int init_rx_pools(struct net_device *netdev)
 		return -1;
 	}
 
-	adapter->num_active_rx_pools = 0;
+	adapter->num_active_rx_scrqs = 0;
 
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
@@ -554,7 +554,7 @@ static int init_rx_pools(struct net_device *netdev)
 		rx_pool->next_free = 0;
 	}
 
-	adapter->num_active_rx_pools = rxadd_subcrqs;
+	adapter->num_active_rx_scrqs = rxadd_subcrqs;
 
 	return 0;
 }
@@ -613,7 +613,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 	if (!adapter->tx_pool)
 		return;
 
-	for (i = 0; i < adapter->num_active_tx_pools; i++) {
+	for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
 		netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
 		tx_pool = &adapter->tx_pool[i];
 		kfree(tx_pool->tx_buff);
@@ -624,7 +624,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->tx_pool);
 	adapter->tx_pool = NULL;
-	adapter->num_active_tx_pools = 0;
+	adapter->num_active_tx_scrqs = 0;
 }
 
 static int init_tx_pools(struct net_device *netdev)
@@ -641,7 +641,7 @@ static int init_tx_pools(struct net_device *netdev)
 	if (!adapter->tx_pool)
 		return -1;
 
-	adapter->num_active_tx_pools = 0;
+	adapter->num_active_tx_scrqs = 0;
 
 	for (i = 0; i < tx_subcrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
@@ -690,7 +690,7 @@ static int init_tx_pools(struct net_device *netdev)
 		tx_pool->producer_index = 0;
 	}
 
-	adapter->num_active_tx_pools = tx_subcrqs;
+	adapter->num_active_tx_scrqs = tx_subcrqs;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 2f51458ccdc3..68e712c69211 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1092,8 +1092,8 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
-	u64 num_active_rx_pools;
-	u64 num_active_tx_pools;
+	u64 num_active_rx_scrqs;
+	u64 num_active_tx_scrqs;
 
 	struct tasklet_struct tasklet;
 	enum vnic_state state;

From d9043c102e73b9abc01f2120d47ba8195823172a Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Mon, 19 Feb 2018 13:30:14 -0600
Subject: [PATCH 0255/1678] ibmvnic: Move active sub-crq count settings

Inpreparation for using the active scrq count to track more active
resources, move the setting of the active count to after initialization
occurs in initial driver init and during driver reset.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index f1166efd8eb7..0c9adeffbe46 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -484,7 +484,6 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->rx_pool);
 	adapter->rx_pool = NULL;
-	adapter->num_active_rx_scrqs = 0;
 }
 
 static int init_rx_pools(struct net_device *netdev)
@@ -509,8 +508,6 @@ static int init_rx_pools(struct net_device *netdev)
 		return -1;
 	}
 
-	adapter->num_active_rx_scrqs = 0;
-
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
@@ -554,8 +551,6 @@ static int init_rx_pools(struct net_device *netdev)
 		rx_pool->next_free = 0;
 	}
 
-	adapter->num_active_rx_scrqs = rxadd_subcrqs;
-
 	return 0;
 }
 
@@ -624,7 +619,6 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->tx_pool);
 	adapter->tx_pool = NULL;
-	adapter->num_active_tx_scrqs = 0;
 }
 
 static int init_tx_pools(struct net_device *netdev)
@@ -641,8 +635,6 @@ static int init_tx_pools(struct net_device *netdev)
 	if (!adapter->tx_pool)
 		return -1;
 
-	adapter->num_active_tx_scrqs = 0;
-
 	for (i = 0; i < tx_subcrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
 
@@ -690,8 +682,6 @@ static int init_tx_pools(struct net_device *netdev)
 		tx_pool->producer_index = 0;
 	}
 
-	adapter->num_active_tx_scrqs = tx_subcrqs;
-
 	return 0;
 }
 
@@ -991,6 +981,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 		return rc;
 
 	rc = init_tx_pools(netdev);
+
+	adapter->num_active_tx_scrqs = adapter->req_tx_queues;
+	adapter->num_active_rx_scrqs = adapter->req_rx_queues;
+
 	return rc;
 }
 
@@ -1692,6 +1686,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			release_tx_pools(adapter);
 			init_rx_pools(netdev);
 			init_tx_pools(netdev);
+
+			adapter->num_active_tx_scrqs = adapter->req_tx_queues;
+			adapter->num_active_rx_scrqs = adapter->req_rx_queues;
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)

From d7c0ef36bde03af0d119d895bba4cc0362e79dbb Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Mon, 19 Feb 2018 13:30:31 -0600
Subject: [PATCH 0256/1678] ibmvnic: Free and re-allocate scrqs when tx/rx
 scrqs change

When the driver resets it is possible that the number of tx/rx
sub-crqs can change. This patch handles this so that the driver does
not try to access non-existent sub-crqs.

The count for releasing sub crqs depends on the adapter state. The
active queue count is not set in probe, so if we are relasing in probe
state we use the request queue count.

Additionally, a parameter is added to release_sub_crqs() so that
we know if the h_call to free the sub-crq needs to be made. In
the reset path we have to do a reset of the main crq, which is
a free followed by a register of the main crq. The free of main
crq results in all of the sub crq's being free'ed. When updating
sub-crq count in the reset path we do not want to h_free the
sub-crqs, they are already free'ed.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 78 ++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0c9adeffbe46..33cfd277f008 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -90,7 +90,7 @@ MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
 static int ibmvnic_remove(struct vio_dev *);
-static void release_sub_crqs(struct ibmvnic_adapter *);
+static void release_sub_crqs(struct ibmvnic_adapter *, bool);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *);
@@ -740,7 +740,7 @@ static int ibmvnic_login(struct net_device *netdev)
 	do {
 		if (adapter->renegotiate) {
 			adapter->renegotiate = false;
-			release_sub_crqs(adapter);
+			release_sub_crqs(adapter, 1);
 
 			reinit_completion(&adapter->init_done);
 			send_cap_queries(adapter);
@@ -1648,7 +1648,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
 	    adapter->wait_for_reset) {
 		release_resources(adapter);
-		release_sub_crqs(adapter);
+		release_sub_crqs(adapter, 1);
 		release_crq_queue(adapter);
 	}
 
@@ -2288,24 +2288,27 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 }
 
 static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
-				  struct ibmvnic_sub_crq_queue *scrq)
+				  struct ibmvnic_sub_crq_queue *scrq,
+				  bool do_h_free)
 {
 	struct device *dev = &adapter->vdev->dev;
 	long rc;
 
 	netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n");
 
-	/* Close the sub-crqs */
-	do {
-		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
-					adapter->vdev->unit_address,
-					scrq->crq_num);
-	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+	if (do_h_free) {
+		/* Close the sub-crqs */
+		do {
+			rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+						adapter->vdev->unit_address,
+						scrq->crq_num);
+		} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
-	if (rc) {
-		netdev_err(adapter->netdev,
-			   "Failed to release sub-CRQ %16lx, rc = %ld\n",
-			   scrq->crq_num, rc);
+		if (rc) {
+			netdev_err(adapter->netdev,
+				   "Failed to release sub-CRQ %16lx, rc = %ld\n",
+				   scrq->crq_num, rc);
+		}
 	}
 
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
@@ -2373,12 +2376,21 @@ zero_page_failed:
 	return NULL;
 }
 
-static void release_sub_crqs(struct ibmvnic_adapter *adapter)
+static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 {
+	u64 num_tx_scrqs, num_rx_scrqs;
 	int i;
 
+	if (adapter->state == VNIC_PROBED) {
+		num_tx_scrqs = adapter->req_tx_queues;
+		num_rx_scrqs = adapter->req_rx_queues;
+	} else {
+		num_tx_scrqs = adapter->num_active_tx_scrqs;
+		num_rx_scrqs = adapter->num_active_rx_scrqs;
+	}
+
 	if (adapter->tx_scrq) {
-		for (i = 0; i < adapter->req_tx_queues; i++) {
+		for (i = 0; i < num_tx_scrqs; i++) {
 			if (!adapter->tx_scrq[i])
 				continue;
 
@@ -2391,7 +2403,8 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				adapter->tx_scrq[i]->irq = 0;
 			}
 
-			release_sub_crq_queue(adapter, adapter->tx_scrq[i]);
+			release_sub_crq_queue(adapter, adapter->tx_scrq[i],
+					      do_h_free);
 		}
 
 		kfree(adapter->tx_scrq);
@@ -2399,7 +2412,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 	}
 
 	if (adapter->rx_scrq) {
-		for (i = 0; i < adapter->req_rx_queues; i++) {
+		for (i = 0; i < num_rx_scrqs; i++) {
 			if (!adapter->rx_scrq[i])
 				continue;
 
@@ -2412,7 +2425,8 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				adapter->rx_scrq[i]->irq = 0;
 			}
 
-			release_sub_crq_queue(adapter, adapter->rx_scrq[i]);
+			release_sub_crq_queue(adapter, adapter->rx_scrq[i],
+					      do_h_free);
 		}
 
 		kfree(adapter->rx_scrq);
@@ -2622,7 +2636,7 @@ req_tx_irq_failed:
 		free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
 		irq_dispose_mapping(adapter->rx_scrq[j]->irq);
 	}
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	return rc;
 }
 
@@ -2704,7 +2718,7 @@ rx_failed:
 	adapter->tx_scrq = NULL;
 tx_failed:
 	for (i = 0; i < registered_queues; i++)
-		release_sub_crq_queue(adapter, allqueues[i]);
+		release_sub_crq_queue(adapter, allqueues[i], 1);
 	kfree(allqueues);
 	return -1;
 }
@@ -4331,6 +4345,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
 	unsigned long timeout = msecs_to_jiffies(30000);
+	u64 old_num_rx_queues, old_num_tx_queues;
 	int rc;
 
 	if (adapter->resetting && !adapter->wait_for_reset) {
@@ -4348,6 +4363,9 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 
 	adapter->from_passive_init = false;
 
+	old_num_rx_queues = adapter->req_rx_queues;
+	old_num_tx_queues = adapter->req_tx_queues;
+
 	init_completion(&adapter->init_done);
 	adapter->init_done_rc = 0;
 	ibmvnic_send_crq_init(adapter);
@@ -4367,10 +4385,18 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
-	if (adapter->resetting && !adapter->wait_for_reset)
-		rc = reset_sub_crq_queues(adapter);
-	else
+	if (adapter->resetting && !adapter->wait_for_reset) {
+		if (adapter->req_rx_queues != old_num_rx_queues ||
+		    adapter->req_tx_queues != old_num_tx_queues) {
+			release_sub_crqs(adapter, 0);
+			rc = init_sub_crqs(adapter);
+		} else {
+			rc = reset_sub_crq_queues(adapter);
+		}
+	} else {
 		rc = init_sub_crqs(adapter);
+	}
+
 	if (rc) {
 		dev_err(dev, "Initialization of sub crqs failed\n");
 		release_crq_queue(adapter);
@@ -4470,7 +4496,7 @@ ibmvnic_register_fail:
 	device_remove_file(&dev->dev, &dev_attr_failover);
 
 ibmvnic_init_fail:
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 	free_netdev(netdev);
 
@@ -4487,7 +4513,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	mutex_lock(&adapter->reset_lock);
 
 	release_resources(adapter);
-	release_sub_crqs(adapter);
+	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 
 	adapter->state = VNIC_REMOVED;

From 86f669b2b7491b5697d9e63538c960ad7e94c6cc Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Mon, 19 Feb 2018 13:30:39 -0600
Subject: [PATCH 0257/1678] ibmvnic: Make napi usage dynamic

In order to handle the number of rx sub crqs changing during a driver
reset, the ibmvnic driver also needs to update the number of napi.
To do this the code to init and free napi's is moved to their own
routines so they can be called during the reset process.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 70 +++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 33cfd277f008..7dc67de269f5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -730,6 +730,43 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
 	adapter->napi_enabled = false;
 }
 
+static int init_napi(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	adapter->napi = kcalloc(adapter->req_rx_queues,
+				sizeof(struct napi_struct), GFP_KERNEL);
+	if (!adapter->napi)
+		return -ENOMEM;
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i);
+		netif_napi_add(adapter->netdev, &adapter->napi[i],
+			       ibmvnic_poll, NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+}
+
+static void release_napi(struct ibmvnic_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->napi)
+		return;
+
+	for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
+		if (&adapter->napi[i]) {
+			netdev_dbg(adapter->netdev,
+				   "Releasing napi[%d]\n", i);
+			netif_napi_del(&adapter->napi[i]);
+		}
+	}
+
+	kfree(adapter->napi);
+	adapter->napi = NULL;
+}
+
 static int ibmvnic_login(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -795,8 +832,6 @@ static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
 
 static void release_resources(struct ibmvnic_adapter *adapter)
 {
-	int i;
-
 	release_vpd_data(adapter);
 
 	release_tx_pools(adapter);
@@ -805,19 +840,7 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 	release_stats_token(adapter);
 	release_stats_buffers(adapter);
 	release_error_buffers(adapter);
-
-	if (adapter->napi) {
-		for (i = 0; i < adapter->req_rx_queues; i++) {
-			if (&adapter->napi[i]) {
-				netdev_dbg(adapter->netdev,
-					   "Releasing napi[%d]\n", i);
-				netif_napi_del(&adapter->napi[i]);
-			}
-		}
-	}
-	kfree(adapter->napi);
-	adapter->napi = NULL;
-
+	release_napi(adapter);
 	release_login_rsp_buffer(adapter);
 }
 
@@ -937,7 +960,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
 static int init_resources(struct ibmvnic_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	int i, rc;
+	int rc;
 
 	rc = set_real_num_queues(netdev);
 	if (rc)
@@ -963,16 +986,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	}
 
 	adapter->map_id = 1;
-	adapter->napi = kcalloc(adapter->req_rx_queues,
-				sizeof(struct napi_struct), GFP_KERNEL);
-	if (!adapter->napi)
-		return -ENOMEM;
 
-	for (i = 0; i < adapter->req_rx_queues; i++) {
-		netdev_dbg(netdev, "Adding napi[%d]\n", i);
-		netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
-			       NAPI_POLL_WEIGHT);
-	}
+	rc = init_napi(adapter);
+	if (rc)
+		return rc;
 
 	send_map_query(adapter);
 
@@ -1687,6 +1704,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			init_rx_pools(netdev);
 			init_tx_pools(netdev);
 
+			release_napi(adapter);
+			init_napi(adapter);
+
 			adapter->num_active_tx_scrqs = adapter->req_tx_queues;
 			adapter->num_active_rx_scrqs = adapter->req_rx_queues;
 		} else {

From abcae546f7b49f02f4b73d6cc8b33c2fc5f2f8a4 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Mon, 19 Feb 2018 13:30:47 -0600
Subject: [PATCH 0258/1678] ibmvnic: Allocate max queues stats buffers

To avoid losing any stats when the number of sub-crqs change, allocate
the max number of stats buffers so a stats buffer exists all possible
sub-crqs.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7dc67de269f5..ad0f9cc1be04 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -361,14 +361,14 @@ static void release_stats_buffers(struct ibmvnic_adapter *adapter)
 static int init_stats_buffers(struct ibmvnic_adapter *adapter)
 {
 	adapter->tx_stats_buffers =
-				kcalloc(adapter->req_tx_queues,
+				kcalloc(IBMVNIC_MAX_QUEUES,
 					sizeof(struct ibmvnic_tx_queue_stats),
 					GFP_KERNEL);
 	if (!adapter->tx_stats_buffers)
 		return -ENOMEM;
 
 	adapter->rx_stats_buffers =
-				kcalloc(adapter->req_rx_queues,
+				kcalloc(IBMVNIC_MAX_QUEUES,
 					sizeof(struct ibmvnic_rx_queue_stats),
 					GFP_KERNEL);
 	if (!adapter->rx_stats_buffers)

From 0a6b2a1dc2a2105f178255fe495eb914b09cb37a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Feb 2018 11:56:47 -0800
Subject: [PATCH 0259/1678] tcp: switch to GSO being always on

Oleksandr Natalenko reported performance issues with BBR without FQ
packet scheduler that were root caused to lack of SG and GSO/TSO on
his configuration.

In this mode, TCP internal pacing has to setup a high resolution timer
for each MSS sent.

We could implement in TCP a strategy similar to the one adopted
in commit fefa569a9d4b ("net_sched: sch_fq: account for schedule/timers drifts")
or decide to finally switch TCP stack to a GSO only mode.

This has many benefits :

1) Most TCP developments are done with TSO in mind.
2) Less high-resolution timers needs to be armed for TCP-pacing
3) GSO can benefit of xmit_more hint
4) Receiver GRO is more effective (as if TSO was used for real on sender)
   -> Lower ACK traffic
5) Write queues have less overhead (one skb holds about 64KB of payload)
6) SACK coalescing just works.
7) rtx rb-tree contains less packets, SACK is cheaper.

This patch implements the minimum patch, but we can remove some legacy
code as follow ups.

Tested:

On 40Gbit link, one netperf -t TCP_STREAM

BBR+fq:
sg on:  26 Gbits/sec
sg off: 15.7 Gbits/sec   (was 2.3 Gbit before patch)

BBR+pfifo_fast:
sg on:  24.2 Gbits/sec
sg off: 14.9 Gbits/sec  (was 0.66 Gbit before patch !!! )

BBR+fq_codel:
sg on:  24.4 Gbits/sec
sg off: 15 Gbits/sec  (was 0.66 Gbit before patch !!! )

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 1 +
 net/core/sock.c    | 2 +-
 net/ipv4/tcp.c     | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 3aa7b7d6e6c7..f0f576ff5603 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -417,6 +417,7 @@ struct sock {
 	struct page_frag	sk_frag;
 	netdev_features_t	sk_route_caps;
 	netdev_features_t	sk_route_nocaps;
+	netdev_features_t	sk_route_forced_caps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	gfp_t			sk_allocation;
diff --git a/net/core/sock.c b/net/core/sock.c
index a1fa4a548f1b..507d8c6c4319 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1777,7 +1777,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	u32 max_segs = 1;
 
 	sk_dst_set(sk, dst);
-	sk->sk_route_caps = dst->dev->features;
+	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 48636aee23c3..4b46a2ae46e3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -453,6 +453,7 @@ void tcp_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 
 	sk_sockets_allocated_inc(sk);
+	sk->sk_route_forced_caps = NETIF_F_GSO;
 }
 EXPORT_SYMBOL(tcp_init_sock);
 

From 74d4a8f8d378ddbe7caf3331804c3d5a276a9b1a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Feb 2018 11:56:48 -0800
Subject: [PATCH 0260/1678] tcp: remove sk_can_gso() use

After previous commit, sk_can_gso() is always true.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c       | 34 ++++++++--------------------------
 net/ipv4/tcp_input.c |  3 ---
 2 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4b46a2ae46e3..6f35c12af85a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -898,7 +898,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 new_size_goal, size_goal;
 
-	if (!large_allowed || !sk_can_gso(sk))
+	if (!large_allowed)
 		return mss_now;
 
 	/* Note : tcp_tso_autosize() will eventually split this later */
@@ -1103,27 +1103,11 @@ static int linear_payload_sz(bool first_skb)
 	return 0;
 }
 
-static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
+static int select_size(bool first_skb, bool zc)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
-	int tmp = tp->mss_cache;
-
-	if (sg) {
-		if (zc)
-			return 0;
-
-		if (sk_can_gso(sk)) {
-			tmp = linear_payload_sz(first_skb);
-		} else {
-			int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
-
-			if (tmp >= pgbreak &&
-			    tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
-				tmp = pgbreak;
-		}
-	}
-
-	return tmp;
+	if (zc)
+		return 0;
+	return linear_payload_sz(first_skb);
 }
 
 void tcp_free_fastopen_req(struct tcp_sock *tp)
@@ -1188,7 +1172,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 	int flags, err, copied = 0;
 	int mss_now = 0, size_goal, copied_syn = 0;
 	bool process_backlog = false;
-	bool sg, zc = false;
+	bool zc = false;
 	long timeo;
 
 	flags = msg->msg_flags;
@@ -1269,8 +1253,6 @@ restart:
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		goto do_error;
 
-	sg = !!(sk->sk_route_caps & NETIF_F_SG);
-
 	while (msg_data_left(msg)) {
 		int copy = 0;
 		int max = size_goal;
@@ -1298,7 +1280,7 @@ new_segment:
 				goto restart;
 			}
 			first_skb = tcp_rtx_and_write_queues_empty(sk);
-			linear = select_size(sk, sg, first_skb, zc);
+			linear = select_size(first_skb, zc);
 			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
 						  first_skb);
 			if (!skb)
@@ -1344,7 +1326,7 @@ new_segment:
 
 			if (!skb_can_coalesce(skb, i, pfrag->page,
 					      pfrag->offset)) {
-				if (i >= sysctl_max_skb_frags || !sg) {
+				if (i >= sysctl_max_skb_frags) {
 					tcp_mark_push(tp, skb);
 					goto new_segment;
 				}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a6b48f6253e3..06b9c4765f42 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1358,9 +1358,6 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	int len;
 	int in_sack;
 
-	if (!sk_can_gso(sk))
-		goto fallback;
-
 	/* Normally R but no L won't result in plain S */
 	if (!dup_sack &&
 	    (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS)

From dead7cdb0daec58490891e59f4fae0c5c76fa5f3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Feb 2018 11:56:49 -0800
Subject: [PATCH 0261/1678] tcp: remove sk_check_csum_caps()

Since TCP relies on GSO, we do not need this helper anymore.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  9 ---------
 net/ipv4/tcp.c     | 11 +++--------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index f0f576ff5603..b9624581d639 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1863,15 +1863,6 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 	sk->sk_route_caps &= ~flags;
 }
 
-static inline bool sk_check_csum_caps(struct sock *sk)
-{
-	return (sk->sk_route_caps & NETIF_F_HW_CSUM) ||
-	       (sk->sk_family == PF_INET &&
-		(sk->sk_route_caps & NETIF_F_IP_CSUM)) ||
-	       (sk->sk_family == PF_INET6 &&
-		(sk->sk_route_caps & NETIF_F_IPV6_CSUM));
-}
-
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
 					   struct iov_iter *from, char *to,
 					   int copy, int offset)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6f35c12af85a..7c4140271887 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1063,8 +1063,7 @@ EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
 			size_t size, int flags)
 {
-	if (!(sk->sk_route_caps & NETIF_F_SG) ||
-	    !sk_check_csum_caps(sk))
+	if (!(sk->sk_route_caps & NETIF_F_SG))
 		return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
@@ -1190,7 +1189,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			goto out_err;
 		}
 
-		zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+		zc = sk->sk_route_caps & NETIF_F_SG;
 		if (!zc)
 			uarg->zerocopy = 0;
 	}
@@ -1287,11 +1286,7 @@ new_segment:
 				goto wait_for_memory;
 
 			process_backlog = true;
-			/*
-			 * Check whether we can use HW checksum.
-			 */
-			if (sk_check_csum_caps(sk))
-				skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->ip_summed = CHECKSUM_PARTIAL;
 
 			skb_entail(sk, skb);
 			copy = size_goal;

From 65ec60973af97dab72094490b4d2e9b8e169456c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Feb 2018 11:56:50 -0800
Subject: [PATCH 0262/1678] tcp: tcp_sendmsg() only deals with CHECKSUM_PARTIAL

We no longer have skbs with skb->ip_summed == CHECKSUM_NONE
in TCP write queues.

We can remove dead code in tcp_sendmsg().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7c4140271887..a33539798bf6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1254,14 +1254,10 @@ restart:
 
 	while (msg_data_left(msg)) {
 		int copy = 0;
-		int max = size_goal;
 
 		skb = tcp_write_queue_tail(sk);
-		if (skb) {
-			if (skb->ip_summed == CHECKSUM_NONE)
-				max = mss_now;
-			copy = max - skb->len;
-		}
+		if (skb)
+			copy = size_goal - skb->len;
 
 		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
 			bool first_skb;
@@ -1290,7 +1286,6 @@ new_segment:
 
 			skb_entail(sk, skb);
 			copy = size_goal;
-			max = size_goal;
 
 			/* All packets are restored as if they have
 			 * already been sent. skb_mstamp isn't set to
@@ -1374,7 +1369,7 @@ new_segment:
 			goto out;
 		}
 
-		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
+		if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
 			continue;
 
 		if (forced_push(tp)) {

From 4a64fd6ccf127973d1e2b2fc2f8024e550130617 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Feb 2018 11:56:51 -0800
Subject: [PATCH 0263/1678] tcp: remove dead code from tcp_set_skb_tso_segs()

We no longer have skbs with skb->ip_summed == CHECKSUM_NONE
in TCP write queues.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b2bca373f8be..0196923aec42 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1206,7 +1206,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 /* Initialize TSO segments for a packet. */
 static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now)
 {
-	if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
+	if (skb->len <= mss_now) {
 		/* Avoid the costly divide in the normal
 		 * non-TSO case.
 		 */

From 98be9b12096fb46773b4a509d3822fd17c82218e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Feb 2018 11:56:52 -0800
Subject: [PATCH 0264/1678] tcp: remove dead code after CHECKSUM_PARTIAL
 adoption

Since all skbs in write/rtx queues have CHECKSUM_PARTIAL,
we can remove dead code.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c   | 13 +++----------
 net/ipv4/tcp_output.c | 38 +++++---------------------------------
 2 files changed, 8 insertions(+), 43 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f3e52bc98980..2c6aec2643e8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -561,16 +561,9 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct tcphdr, check);
-	} else {
-		th->check = tcp_v4_check(skb->len, saddr, daddr,
-					 csum_partial(th,
-						      th->doff << 2,
-						      skb->csum));
-	}
+	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
 }
 
 /* This routine computes an IPv4 TCP checksum. */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0196923aec42..8795d76f987c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1335,21 +1335,9 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	tcp_skb_fragment_eor(skb, buff);
 
-	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
-		/* Copy and checksum data tail into the new buffer. */
-		buff->csum = csum_partial_copy_nocheck(skb->data + len,
-						       skb_put(buff, nsize),
-						       nsize, 0);
+	skb_split(skb, buff, len);
 
-		skb_trim(skb, len);
-
-		skb->csum = csum_block_sub(skb->csum, buff->csum, len);
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb_split(skb, buff, len);
-	}
-
-	buff->ip_summed = skb->ip_summed;
+	buff->ip_summed = CHECKSUM_PARTIAL;
 
 	buff->tstamp = skb->tstamp;
 	tcp_fragment_tstamp(skb, buff);
@@ -1901,7 +1889,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 
 	tcp_skb_fragment_eor(skb, buff);
 
-	buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
+	buff->ip_summed = CHECKSUM_PARTIAL;
 	skb_split(skb, buff, len);
 	tcp_fragment_tstamp(skb, buff);
 
@@ -2134,7 +2122,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
-	nskb->ip_summed = skb->ip_summed;
+	nskb->ip_summed = CHECKSUM_PARTIAL;
 
 	tcp_insert_write_queue_before(nskb, skb, sk);
 	tcp_highest_sack_replace(sk, skb, nskb);
@@ -2142,14 +2130,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	len = 0;
 	tcp_for_write_queue_from_safe(skb, next, sk) {
 		copy = min_t(int, skb->len, probe_size - len);
-		if (nskb->ip_summed) {
-			skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
-		} else {
-			__wsum csum = skb_copy_and_csum_bits(skb, 0,
-							     skb_put(nskb, copy),
-							     copy, 0);
-			nskb->csum = csum_block_add(nskb->csum, csum, len);
-		}
+		skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
 
 		if (skb->len <= copy) {
 			/* We've eaten all the data from this skb.
@@ -2166,9 +2147,6 @@ static int tcp_mtu_probe(struct sock *sk)
 						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
-				if (skb->ip_summed != CHECKSUM_PARTIAL)
-					skb->csum = csum_partial(skb->data,
-								 skb->len, 0);
 			} else {
 				__pskb_trim_head(skb, copy);
 				tcp_set_skb_tso_segs(skb, mss_now);
@@ -2746,12 +2724,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_highest_sack_replace(sk, next_skb, skb);
 
-	if (next_skb->ip_summed == CHECKSUM_PARTIAL)
-		skb->ip_summed = CHECKSUM_PARTIAL;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
-
 	/* Update sequence range on original skb. */
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
 

From a823fed03b5d940e4d57271222a0b959fc2ab201 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Tue, 20 Feb 2018 21:28:31 +0800
Subject: [PATCH 0265/1678] tcp: remove the hardcode in the definition of TCPF
 Macro

TCPF_ macro depends on the definition of TCP_ macro.
So it is better to define them with TCP_ marco.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp_states.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index 50e78a74d0df..2875e169d744 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -32,21 +32,21 @@ enum {
 
 #define TCP_STATE_MASK	0xF
 
-#define TCP_ACTION_FIN	(1 << 7)
+#define TCP_ACTION_FIN	(1 << TCP_CLOSE)
 
 enum {
-	TCPF_ESTABLISHED = (1 << 1),
-	TCPF_SYN_SENT	 = (1 << 2),
-	TCPF_SYN_RECV	 = (1 << 3),
-	TCPF_FIN_WAIT1	 = (1 << 4),
-	TCPF_FIN_WAIT2	 = (1 << 5),
-	TCPF_TIME_WAIT	 = (1 << 6),
-	TCPF_CLOSE	 = (1 << 7),
-	TCPF_CLOSE_WAIT	 = (1 << 8),
-	TCPF_LAST_ACK	 = (1 << 9),
-	TCPF_LISTEN	 = (1 << 10),
-	TCPF_CLOSING	 = (1 << 11),
-	TCPF_NEW_SYN_RECV = (1 << 12),
+	TCPF_ESTABLISHED = (1 << TCP_ESTABLISHED),
+	TCPF_SYN_SENT	 = (1 << TCP_SYN_SENT),
+	TCPF_SYN_RECV	 = (1 << TCP_SYN_RECV),
+	TCPF_FIN_WAIT1	 = (1 << TCP_FIN_WAIT1),
+	TCPF_FIN_WAIT2	 = (1 << TCP_FIN_WAIT2),
+	TCPF_TIME_WAIT	 = (1 << TCP_TIME_WAIT),
+	TCPF_CLOSE	 = (1 << TCP_CLOSE),
+	TCPF_CLOSE_WAIT	 = (1 << TCP_CLOSE_WAIT),
+	TCPF_LAST_ACK	 = (1 << TCP_LAST_ACK),
+	TCPF_LISTEN	 = (1 << TCP_LISTEN),
+	TCPF_CLOSING	 = (1 << TCP_CLOSING),
+	TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
 };
 
 #endif	/* _LINUX_TCP_STATES_H */

From af9090c2375e6a4a7b3059c9a3d36cb363c35d42 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Tue, 20 Feb 2018 11:04:18 -0600
Subject: [PATCH 0266/1678] ibmvnic: Correct goto target for tx irq
 initialization failure

When a failure occurs during initialization of the tx sub crq
irqs, we should branch to the cleanup of the tx irqs. The current
code branches to the rx irq cleanup and attempts to cleanup the
rx irqs which have not been initialized.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ad0f9cc1be04..1703b881252f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2620,7 +2620,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
 			dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n",
 				scrq->irq, rc);
 			irq_dispose_mapping(scrq->irq);
-			goto req_rx_irq_failed;
+			goto req_tx_irq_failed;
 		}
 	}
 

From cac56209a66ea3b0be67aa2966b2c628b944da1e Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@cumulusnetworks.com>
Date: Tue, 20 Feb 2018 08:55:58 -0500
Subject: [PATCH 0267/1678] net: Allow a rule to track originating protocol

Allow a rule that is being added/deleted/modified or
dumped to contain the originating protocol's id.

The protocol is handled just like a routes originating
protocol is.  This is especially useful because there
is starting to be a plethora of different user space
programs adding rules.

Allow the vrf device to specify that the kernel is the originator
of the rule created for this device.

Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c              | 1 +
 include/net/fib_rules.h        | 3 ++-
 include/uapi/linux/fib_rules.h | 2 +-
 net/core/fib_rules.c           | 7 ++++++-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 239c78c53e58..951a4b42cb29 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1174,6 +1174,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
 	memset(frh, 0, sizeof(*frh));
 	frh->family = family;
 	frh->action = FR_ACT_TO_TBL;
+	frh->proto = RTPROT_KERNEL;
 
 	if (nla_put_u8(skb, FRA_L3MDEV, 1))
 		goto nla_put_failure;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 648caf90ec07..b166ef07e6d4 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -26,7 +26,8 @@ struct fib_rule {
 	u32			table;
 	u8			action;
 	u8			l3mdev;
-	/* 2 bytes hole, try to use */
+	u8                      proto;
+	/* 1 byte hole, try to use */
 	u32			target;
 	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 2b642bf9b5a0..925539172d5b 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
 	__u8		tos;
 
 	__u8		table;
+	__u8		proto;
 	__u8		res1;	/* reserved */
-	__u8		res2;	/* reserved */
 	__u8		action;
 
 	__u32		flags;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cb071b8e8d17..88298f18cbae 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -51,6 +51,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 	r->pref = pref;
 	r->table = table;
 	r->flags = flags;
+	r->proto = RTPROT_KERNEL;
 	r->fr_net = ops->fro_net;
 	r->uid_range = fib_kuid_range_unset;
 
@@ -465,6 +466,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 	refcount_set(&rule->refcnt, 1);
 	rule->fr_net = net;
+	rule->proto = frh->proto;
 
 	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
 	                              : fib_default_rule_pref(ops);
@@ -664,6 +666,9 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	list_for_each_entry(rule, &ops->rules_list, list) {
+		if (frh->proto && (frh->proto != rule->proto))
+			continue;
+
 		if (frh->action && (frh->action != rule->action))
 			continue;
 
@@ -808,9 +813,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
 		goto nla_put_failure;
 	frh->res1 = 0;
-	frh->res2 = 0;
 	frh->action = rule->action;
 	frh->flags = rule->flags;
+	frh->proto = rule->proto;
 
 	if (rule->action == FR_ACT_GOTO &&
 	    rcu_access_pointer(rule->ctarget) == NULL)

From 94333fac44d1da19ebdf41704780c1af3cabea61 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Wed, 21 Feb 2018 01:31:13 +0100
Subject: [PATCH 0268/1678] ipvlan: drop ipv6 dependency

IPVlan has an hard dependency on IPv6, refactor the ipvlan code to allow
compiling it with IPv6 disabled, move duplicate code into addr_equal()
and refactor series of if-else into a switch.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig              |  1 -
 drivers/net/ipvlan/ipvlan_core.c | 72 ++++++++++++++++++++++++--------
 drivers/net/ipvlan/ipvlan_main.c | 48 +++++++++++++--------
 3 files changed, 85 insertions(+), 36 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 944ec3c9282c..3234c6618d75 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,7 +149,6 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
-    depends on IPV6
     depends on NETFILTER
     depends on NET_L3_MASTER_DEV
     ---help---
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index c1f008fe4e1d..1b5dc200b573 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -35,6 +35,7 @@ void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
 }
 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
 
+#if IS_ENABLED(CONFIG_IPV6)
 static u8 ipvlan_get_v6_hash(const void *iaddr)
 {
 	const struct in6_addr *ip6_addr = iaddr;
@@ -42,6 +43,12 @@ static u8 ipvlan_get_v6_hash(const void *iaddr)
 	return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
 	       IPVLAN_HASH_MASK;
 }
+#else
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+	return 0;
+}
+#endif
 
 static u8 ipvlan_get_v4_hash(const void *iaddr)
 {
@@ -51,6 +58,23 @@ static u8 ipvlan_get_v4_hash(const void *iaddr)
 	       IPVLAN_HASH_MASK;
 }
 
+static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
+{
+	if (!is_v6 && addr->atype == IPVL_IPV4) {
+		struct in_addr *i4addr = (struct in_addr *)iaddr;
+
+		return addr->ip4addr.s_addr == i4addr->s_addr;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (is_v6 && addr->atype == IPVL_IPV6) {
+		struct in6_addr *i6addr = (struct in6_addr *)iaddr;
+
+		return ipv6_addr_equal(&addr->ip6addr, i6addr);
+#endif
+	}
+
+	return false;
+}
+
 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 					       const void *iaddr, bool is_v6)
 {
@@ -59,15 +83,9 @@ static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
 
 	hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
 	       ipvlan_get_v4_hash(iaddr);
-	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
-		if (is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr))
+	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
+		if (addr_equal(is_v6, addr, iaddr))
 			return addr;
-		else if (!is_v6 && addr->atype == IPVL_IPV4 &&
-			 addr->ip4addr.s_addr ==
-				((struct in_addr *)iaddr)->s_addr)
-			return addr;
-	}
 	return NULL;
 }
 
@@ -93,13 +111,9 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 {
 	struct ipvl_addr *addr;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode) {
-		if ((is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
-		    (!is_v6 && addr->atype == IPVL_IPV4 &&
-		    addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
+	list_for_each_entry(addr, &ipvlan->addrs, anode)
+		if (addr_equal(is_v6, addr, iaddr))
 			return addr;
-	}
 	return NULL;
 }
 
@@ -150,6 +164,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
 		lyr3h = ip4h;
 		break;
 	}
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *ip6h;
 
@@ -188,6 +203,7 @@ static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int
 		}
 		break;
 	}
+#endif
 	default:
 		return NULL;
 	}
@@ -337,14 +353,18 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 {
 	struct ipvl_addr *addr = NULL;
 
-	if (addr_type == IPVL_IPV6) {
+	switch (addr_type) {
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPVL_IPV6: {
 		struct ipv6hdr *ip6h;
 		struct in6_addr *i6addr;
 
 		ip6h = (struct ipv6hdr *)lyr3h;
 		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
 		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
-	} else if (addr_type == IPVL_ICMPV6) {
+		break;
+	}
+	case IPVL_ICMPV6: {
 		struct nd_msg *ndmh;
 		struct in6_addr *i6addr;
 
@@ -356,14 +376,19 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 			i6addr = &ndmh->target;
 			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
 		}
-	} else if (addr_type == IPVL_IPV4) {
+		break;
+	}
+#endif
+	case IPVL_IPV4: {
 		struct iphdr *ip4h;
 		__be32 *i4addr;
 
 		ip4h = (struct iphdr *)lyr3h;
 		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
 		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
-	} else if (addr_type == IPVL_ARP) {
+		break;
+	}
+	case IPVL_ARP: {
 		struct arphdr *arph;
 		unsigned char *arp_ptr;
 		__be32 dip;
@@ -377,6 +402,8 @@ static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
 
 		memcpy(&dip, arp_ptr, 4);
 		addr = ipvlan_ht_addr_lookup(port, &dip, false);
+		break;
+	}
 	}
 
 	return addr;
@@ -420,6 +447,7 @@ out:
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -456,6 +484,12 @@ err:
 out:
 	return ret;
 }
+#else
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+	return NET_XMIT_DROP;
+}
+#endif
 
 static int ipvlan_process_outbound(struct sk_buff *skb)
 {
@@ -759,6 +793,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 			goto out;
 		break;
 	}
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 	{
 		struct dst_entry *dst;
@@ -778,6 +813,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 		skb_dst_set(skb, dst);
 		break;
 	}
+#endif
 	default:
 		break;
 	}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 2469df118fbf..67c91ceda979 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -22,12 +22,14 @@ static const struct nf_hook_ops ipvl_nfops[] = {
 		.hooknum  = NF_INET_LOCAL_IN,
 		.priority = INT_MAX,
 	},
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.hook     = ipvlan_nf_input,
 		.pf       = NFPROTO_IPV6,
 		.hooknum  = NF_INET_LOCAL_IN,
 		.priority = INT_MAX,
 	},
+#endif
 };
 
 static const struct l3mdev_ops ipvl_l3mdev_ops = {
@@ -800,12 +802,14 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 		return -ENOMEM;
 
 	addr->master = ipvlan;
-	if (is_v6) {
-		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
-		addr->atype = IPVL_IPV6;
-	} else {
+	if (!is_v6) {
 		memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr));
 		addr->atype = IPVL_IPV4;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr));
+		addr->atype = IPVL_IPV6;
+#endif
 	}
 	list_add_tail(&addr->anode, &ipvlan->addrs);
 
@@ -833,6 +837,20 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 	return;
 }
 
+static bool ipvlan_is_valid_dev(const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!netif_is_ipvlan(dev))
+		return false;
+
+	if (!ipvlan || !ipvlan->port)
+		return false;
+
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
 	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
@@ -850,19 +868,6 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 	return ipvlan_del_addr(ipvlan, ip6_addr, true);
 }
 
-static bool ipvlan_is_valid_dev(const struct net_device *dev)
-{
-	struct ipvl_dev *ipvlan = netdev_priv(dev);
-
-	if (!netif_is_ipvlan(dev))
-		return false;
-
-	if (!ipvlan || !ipvlan->port)
-		return false;
-
-	return true;
-}
-
 static int ipvlan_addr6_event(struct notifier_block *unused,
 			      unsigned long event, void *ptr)
 {
@@ -913,6 +918,7 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
 	return NOTIFY_OK;
 }
+#endif
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
@@ -993,6 +999,7 @@ static struct notifier_block ipvlan_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_device_event,
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_event,
 };
@@ -1000,6 +1007,7 @@ static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = {
 	.notifier_call = ipvlan_addr6_validator_event,
 };
+#endif
 
 static void ipvlan_ns_exit(struct net *net)
 {
@@ -1024,9 +1032,11 @@ static int __init ipvlan_init_module(void)
 
 	ipvlan_init_secret();
 	register_netdevice_notifier(&ipvlan_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	register_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block);
 
@@ -1045,9 +1055,11 @@ error:
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	unregister_inetaddr_validator_notifier(
 	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	unregister_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 	unregister_netdevice_notifier(&ipvlan_notifier_block);
 	return err;
 }
@@ -1060,9 +1072,11 @@ static void __exit ipvlan_cleanup_module(void)
 	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
 	unregister_inetaddr_validator_notifier(
 	    &ipvlan_addr4_vtor_notifier_block);
+#if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
 	unregister_inet6addr_validator_notifier(
 	    &ipvlan_addr6_vtor_notifier_block);
+#endif
 }
 
 module_init(ipvlan_init_module);

From 218798f407875e8a1ce9164f5e0c50f02105dc64 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Wed, 21 Feb 2018 01:31:14 +0100
Subject: [PATCH 0269/1678] ipvlan: selects master_l3 device instead of
 depending on it

The L3 Master device is just a glue between the core networking code and
device drivers, so it should be selected automatically rather than
requiring to be enabled explicitly.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 3234c6618d75..d88b78a17440 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -150,7 +150,7 @@ config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
     depends on NETFILTER
-    depends on NET_L3_MASTER_DEV
+    select NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)

From aa9029479ef3fb88145cad3220e24a110c6ae723 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Wed, 21 Feb 2018 18:21:10 -0600
Subject: [PATCH 0270/1678] ibmvnic: Fix TX descriptor tracking

With the recent change, transmissions that only needed
one descriptor were being missed. The result is that such
packets were tracked as outstanding transmissions but never
removed when its completion notification was received.

Fixes: ffc385b95adb ("ibmvnic: Keep track of supplementary TX descriptors")
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 1703b881252f..4f7b38b6e2e0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1478,7 +1478,6 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if ((*hdrs >> 7) & 1) {
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
 		tx_crq.v1.n_crq_elem = num_entries;
-		tx_buff->num_entries = num_entries;
 		tx_buff->indir_arr[0] = tx_crq;
 		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
 						    sizeof(tx_buff->indir_arr),
@@ -1533,6 +1532,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		netif_stop_subqueue(netdev, queue_num);
 	}
 
+	tx_buff->num_entries = num_entries;
 	tx_packets++;
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;

From 2510babcfaf04d50d69946df31f0e9690c518144 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 22 Feb 2018 01:51:49 +0100
Subject: [PATCH 0271/1678] net: dsa: mv88e6xxx: scratch registers and external
 MDIO pins

MV88E6352 and later switches support GPIO control through the "Scratch
& Misc" global2 register. Two of the pins controlled this way on the
mv88e6390 family are the external MDIO pins. They can either by used
as part of the MII interface for port 0, GPIOs, or MDIO. Add a
function to configure them for MDIO, if possible, and call it when
registering the external MDIO bus.

Suggested-by: Russell King <rmk@armlinux.org.uk>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c            |  9 ++++
 drivers/net/dsa/mv88e6xxx/global2.h         | 14 ++++++
 drivers/net/dsa/mv88e6xxx/global2_scratch.c | 51 +++++++++++++++++++++
 3 files changed, 74 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 39c7ad7e490f..e1b5c5c66fce 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2165,6 +2165,15 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 	struct mii_bus *bus;
 	int err;
 
+	if (external) {
+		mutex_lock(&chip->reg_lock);
+		err = mv88e6xxx_g2_scratch_gpio_set_smi(chip, true);
+		mutex_unlock(&chip->reg_lock);
+
+		if (err)
+			return err;
+	}
+
 	bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus));
 	if (!bus)
 		return -ENOMEM;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 25f92b3d7157..d85c91036c0f 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -266,6 +266,11 @@
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL5		0x6D
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL6		0x6E
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL7		0x6F
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA0	0x70
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1	0x71
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU	BIT(2)
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2	0x72
+#define MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK	0x3
 
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO	0
 #define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG	1
@@ -325,6 +330,9 @@ extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops;
 
 extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops;
 
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+				      bool external);
+
 #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
@@ -465,6 +473,12 @@ static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {};
 
 static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {};
 
+static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+						    bool external)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
 
 #endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
index 0ff12bff9f0e..3f92b8892dc7 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -238,3 +238,54 @@ const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
 	.get_pctl = mv88e6352_g2_scratch_gpio_get_pctl,
 	.set_pctl = mv88e6352_g2_scratch_gpio_set_pctl,
 };
+
+/**
+ * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * @chip: chip private data
+ * @external: set mux for external smi, or free for gpio usage
+ *
+ * Some mv88e6xxx models have GPIO pins that may be configured as
+ * an external SMI interface, or they may be made free for other
+ * GPIO uses.
+ */
+int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip,
+				      bool external)
+{
+	int misc_cfg = MV88E6352_G2_SCRATCH_MISC_CFG;
+	int config_data1 = MV88E6352_G2_SCRATCH_CONFIG_DATA1;
+	int config_data2 = MV88E6352_G2_SCRATCH_CONFIG_DATA2;
+	bool no_cpu;
+	u8 p0_mode;
+	int err;
+	u8 val;
+
+	err = mv88e6xxx_g2_scratch_read(chip, config_data2, &val);
+	if (err)
+		return err;
+
+	p0_mode = val & MV88E6352_G2_SCRATCH_CONFIG_DATA2_P0_MODE_MASK;
+
+	if (p0_mode == 0x01 || p0_mode == 0x02)
+		return -EBUSY;
+
+	err = mv88e6xxx_g2_scratch_read(chip, config_data1, &val);
+	if (err)
+		return err;
+
+	no_cpu = !!(val & MV88E6352_G2_SCRATCH_CONFIG_DATA1_NO_CPU);
+
+	err = mv88e6xxx_g2_scratch_read(chip, misc_cfg, &val);
+	if (err)
+		return err;
+
+	/* NO_CPU being 0 inverts the meaning of the bit */
+	if (!no_cpu)
+		external = !external;
+
+	if (external)
+		val |= MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+	else
+		val &= ~MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI;
+
+	return mv88e6xxx_g2_scratch_write(chip, misc_cfg, val);
+}

From 82e3be320d1e38a5e91a79d0eb451954c87ab7fe Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 21 Feb 2018 21:33:56 -0600
Subject: [PATCH 0272/1678] ibmvnic: Split counters for scrq/pools/napi

The approach of one counter to rule them all when tracking the number
of active sub-crqs, pools, and napi has problems handling some failover
scenarios. This is due to the split in initializing the sub crqs,
pools and napi in different places and the placement of updating
the active counts.

This patch simplifies this by having a counter for tx and rx
sub-crqs, pools, and napi.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 38 +++++++++++++-----------------
 drivers/net/ethernet/ibm/ibmvnic.h |  7 ++++--
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4f7b38b6e2e0..69d6e01fb767 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -461,7 +461,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 	if (!adapter->rx_pool)
 		return;
 
-	for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
+	for (i = 0; i < adapter->num_active_rx_pools; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
 		netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
@@ -484,6 +484,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->rx_pool);
 	adapter->rx_pool = NULL;
+	adapter->num_active_rx_pools = 0;
 }
 
 static int init_rx_pools(struct net_device *netdev)
@@ -508,6 +509,8 @@ static int init_rx_pools(struct net_device *netdev)
 		return -1;
 	}
 
+	adapter->num_active_rx_pools = rxadd_subcrqs;
+
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
@@ -608,7 +611,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 	if (!adapter->tx_pool)
 		return;
 
-	for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
+	for (i = 0; i < adapter->num_active_tx_pools; i++) {
 		netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
 		tx_pool = &adapter->tx_pool[i];
 		kfree(tx_pool->tx_buff);
@@ -619,6 +622,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->tx_pool);
 	adapter->tx_pool = NULL;
+	adapter->num_active_tx_pools = 0;
 }
 
 static int init_tx_pools(struct net_device *netdev)
@@ -635,6 +639,8 @@ static int init_tx_pools(struct net_device *netdev)
 	if (!adapter->tx_pool)
 		return -1;
 
+	adapter->num_active_tx_pools = tx_subcrqs;
+
 	for (i = 0; i < tx_subcrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
 
@@ -745,6 +751,7 @@ static int init_napi(struct ibmvnic_adapter *adapter)
 			       ibmvnic_poll, NAPI_POLL_WEIGHT);
 	}
 
+	adapter->num_active_rx_napi = adapter->req_rx_queues;
 	return 0;
 }
 
@@ -755,7 +762,7 @@ static void release_napi(struct ibmvnic_adapter *adapter)
 	if (!adapter->napi)
 		return;
 
-	for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
+	for (i = 0; i < adapter->num_active_rx_napi; i++) {
 		if (&adapter->napi[i]) {
 			netdev_dbg(adapter->netdev,
 				   "Releasing napi[%d]\n", i);
@@ -765,6 +772,7 @@ static void release_napi(struct ibmvnic_adapter *adapter)
 
 	kfree(adapter->napi);
 	adapter->napi = NULL;
+	adapter->num_active_rx_napi = 0;
 }
 
 static int ibmvnic_login(struct net_device *netdev)
@@ -998,10 +1006,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 		return rc;
 
 	rc = init_tx_pools(netdev);
-
-	adapter->num_active_tx_scrqs = adapter->req_tx_queues;
-	adapter->num_active_rx_scrqs = adapter->req_rx_queues;
-
 	return rc;
 }
 
@@ -1706,9 +1710,6 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
 			release_napi(adapter);
 			init_napi(adapter);
-
-			adapter->num_active_tx_scrqs = adapter->req_tx_queues;
-			adapter->num_active_rx_scrqs = adapter->req_rx_queues;
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
@@ -2398,19 +2399,10 @@ zero_page_failed:
 
 static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 {
-	u64 num_tx_scrqs, num_rx_scrqs;
 	int i;
 
-	if (adapter->state == VNIC_PROBED) {
-		num_tx_scrqs = adapter->req_tx_queues;
-		num_rx_scrqs = adapter->req_rx_queues;
-	} else {
-		num_tx_scrqs = adapter->num_active_tx_scrqs;
-		num_rx_scrqs = adapter->num_active_rx_scrqs;
-	}
-
 	if (adapter->tx_scrq) {
-		for (i = 0; i < num_tx_scrqs; i++) {
+		for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
 			if (!adapter->tx_scrq[i])
 				continue;
 
@@ -2429,10 +2421,11 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 
 		kfree(adapter->tx_scrq);
 		adapter->tx_scrq = NULL;
+		adapter->num_active_tx_scrqs = 0;
 	}
 
 	if (adapter->rx_scrq) {
-		for (i = 0; i < num_rx_scrqs; i++) {
+		for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
 			if (!adapter->rx_scrq[i])
 				continue;
 
@@ -2451,6 +2444,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
 
 		kfree(adapter->rx_scrq);
 		adapter->rx_scrq = NULL;
+		adapter->num_active_rx_scrqs = 0;
 	}
 }
 
@@ -2718,6 +2712,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		adapter->tx_scrq[i] = allqueues[i];
 		adapter->tx_scrq[i]->pool_index = i;
+		adapter->num_active_tx_scrqs++;
 	}
 
 	adapter->rx_scrq = kcalloc(adapter->req_rx_queues,
@@ -2728,6 +2723,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues];
 		adapter->rx_scrq[i]->scrq_num = i;
+		adapter->num_active_rx_scrqs++;
 	}
 
 	kfree(allqueues);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 68e712c69211..099c89d49945 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1092,8 +1092,11 @@ struct ibmvnic_adapter {
 	u64 opt_rxba_entries_per_subcrq;
 	__be64 tx_rx_desc_req;
 	u8 map_id;
-	u64 num_active_rx_scrqs;
-	u64 num_active_tx_scrqs;
+	u32 num_active_rx_scrqs;
+	u32 num_active_rx_pools;
+	u32 num_active_rx_napi;
+	u32 num_active_tx_scrqs;
+	u32 num_active_tx_pools;
 
 	struct tasklet_struct tasklet;
 	enum vnic_state state;

From 4fe0de5b143762d327bfaf1d7be7c5b58041a18c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Wed, 31 Jan 2018 19:04:15 -0600
Subject: [PATCH 0273/1678] uapi: Add 802.11 Preauthentication to if_ether

This adds 0x88c7 protocol type to if_ether.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index f8cb5760ea4f..54585906e50a 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -89,6 +89,7 @@
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
 #define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
+#define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_MACSEC	0x88E5		/* 802.1ae MACsec */
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */

From 7299d6f7bfd1921c0cfb5e202155f1a5cfdb57d0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 Feb 2018 14:48:39 +0200
Subject: [PATCH 0274/1678] mac80211: support reporting A-MPDU EOF bit
 value/known

Support getting the EOF bit value reported from hardware
and writing it out to radiotap.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/ieee80211_radiotap.h | 2 ++
 include/net/mac80211.h           | 5 +++++
 net/mac80211/rx.c                | 4 ++++
 3 files changed, 11 insertions(+)

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index d91f9e7f4d71..960236fb1681 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -149,6 +149,8 @@ enum ieee80211_radiotap_ampdu_flags {
 	IEEE80211_RADIOTAP_AMPDU_IS_LAST = 0x0008,
 	IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR = 0x0010,
 	IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN = 0x0020,
+	IEEE80211_RADIOTAP_AMPDU_EOF = 0x0040,
+	IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN = 0x0080,
 };
 
 /* for IEEE80211_RADIOTAP_VHT */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 854037b8163e..649f073eb6df 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1099,6 +1099,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	the first subframe.
  * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
  *	be done in the hardware.
+ * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
+ *	frame
+ * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1125,6 +1128,8 @@ enum mac80211_rx_flags {
 	RX_FLAG_MIC_STRIPPED		= BIT(21),
 	RX_FLAG_ALLOW_SAME_PN		= BIT(22),
 	RX_FLAG_ICV_STRIPPED		= BIT(23),
+	RX_FLAG_AMPDU_EOF_BIT		= BIT(24),
+	RX_FLAG_AMPDU_EOF_BIT_KNOWN	= BIT(25),
 };
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e755f93ad735..478a9c735edb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -439,6 +439,10 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
 		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
 			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_EOF_BIT_KNOWN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_EOF_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_EOF_BIT)
+			flags |= IEEE80211_RADIOTAP_AMPDU_EOF;
 		put_unaligned_le16(flags, pos);
 		pos += 2;
 		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)

From a1f2ba04cc92414b6b933289365eab878b0b2bf4 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 19 Feb 2018 14:48:40 +0200
Subject: [PATCH 0275/1678] mac80211: add get TID helper

Extracting the TID from the QOS header is common enough
to justify helper.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h          | 12 ++++++++++++
 net/mac80211/iface.c               |  3 +--
 net/mac80211/michael.c             |  2 +-
 net/mac80211/mlme.c                |  2 +-
 net/mac80211/rc80211_minstrel_ht.c |  2 +-
 net/mac80211/rx.c                  |  6 ++----
 net/mac80211/tx.c                  |  9 ++-------
 net/mac80211/wpa.c                 |  8 +++-----
 8 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e4cba332b705..8fe7e4306816 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -8,6 +8,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2501,6 +2502,17 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr)
 		return (u8 *)hdr + 24;
 }
 
+/**
+ * ieee80211_get_tid - get qos TID
+ * @hdr: the frame
+ */
+static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr)
+{
+	u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+	return qc[0] & IEEE80211_QOS_CTL_TID_MASK;
+}
+
 /**
  * ieee80211_get_SA - get pointer to SA
  * @hdr: the frame
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5fe01f82df12..d13ba064951f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1324,8 +1324,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 			mutex_lock(&local->sta_mtx);
 			sta = sta_info_get_bss(sdata, mgmt->sa);
 			if (sta) {
-				u16 tid = *ieee80211_get_qos_ctl(hdr) &
-						IEEE80211_QOS_CTL_TID_MASK;
+				u16 tid = ieee80211_get_tid(hdr);
 
 				__ieee80211_stop_rx_ba_session(
 					sta, tid, WLAN_BACK_RECIPIENT,
diff --git a/net/mac80211/michael.c b/net/mac80211/michael.c
index 408649bd4702..37e172701a63 100644
--- a/net/mac80211/michael.c
+++ b/net/mac80211/michael.c
@@ -35,7 +35,7 @@ static void michael_mic_hdr(struct michael_mic_ctx *mctx, const u8 *key,
 	da = ieee80211_get_DA(hdr);
 	sa = ieee80211_get_SA(hdr);
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		tid = ieee80211_get_tid(hdr);
 	else
 		tid = 0;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 39b660b9a908..010b127a3937 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2151,7 +2151,7 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
 					   u16 tx_time)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	u16 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	u16 tid = ieee80211_get_tid(hdr);
 	int ac = ieee80211_ac_from_tid(tid);
 	struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
 	unsigned long now = jiffies;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 4a5bdad9f303..fb586b6e5d49 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -669,7 +669,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
 	if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
 		return;
 
-	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 478a9c735edb..3dc162ddc3a6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1189,7 +1189,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	ack_policy = *ieee80211_get_qos_ctl(hdr) &
 		     IEEE80211_QOS_CTL_ACK_POLICY_MASK;
-	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
 	if (!tid_agg_rx) {
@@ -1528,9 +1528,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
 		   ieee80211_has_pm(hdr->frame_control) &&
 		   (ieee80211_is_data_qos(hdr->frame_control) ||
 		    ieee80211_is_qos_nullfunc(hdr->frame_control))) {
-		u8 tid;
-
-		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		u8 tid = ieee80211_get_tid(hdr);
 
 		ieee80211_sta_uapsd_trigger(&rx->sta->sta, tid);
 	}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5decd0fb247c..7643178ef132 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -797,7 +797,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
-	u8 *qc;
 	int tid;
 
 	/*
@@ -844,9 +843,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 		return TX_CONTINUE;
 
 	/* include per-STA, per-TID sequence counter */
-
-	qc = ieee80211_get_qos_ctl(hdr);
-	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+	tid = ieee80211_get_tid(hdr);
 	tx->sta->tx_stats.msdu[tid]++;
 
 	hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -1158,7 +1155,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int tid;
-	u8 *qc;
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
@@ -1198,8 +1194,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	    !ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) {
 		struct tid_ampdu_tx *tid_tx;
 
-		qc = ieee80211_get_qos_ctl(hdr);
-		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+		tid = ieee80211_get_tid(hdr);
 
 		tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
 		if (tid_tx) {
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 785056cb76f6..58d0b258b684 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -340,7 +340,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
 	a4_included = ieee80211_has_a4(hdr->frame_control);
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
@@ -601,8 +601,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
 	aad[23] = 0;
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) &
-			IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 
@@ -867,8 +866,7 @@ ieee80211_crypto_cs_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_UNUSABLE;
 
 	if (ieee80211_is_data_qos(hdr->frame_control))
-		qos_tid = *ieee80211_get_qos_ctl(hdr) &
-				IEEE80211_QOS_CTL_TID_MASK;
+		qos_tid = ieee80211_get_tid(hdr);
 	else
 		qos_tid = 0;
 

From 94ba92713f8329c96e0a8e2880b3c1a785d1c95c Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 19 Feb 2018 14:48:41 +0200
Subject: [PATCH 0276/1678] mac80211: Call mgd_prep_tx before transmitting
 deauthentication

In multi channel scenarios, when disassociating from the AP before a
beacon was heard from the AP, it is not guaranteed that the virtual
interface is granted air time for the transmission of the
deauthentication frame. This in turn can lead to various issues as
the AP might never get the deauthentication frame.

To mitigate such possible issues, add a HW flag indicating that the
driver requires mac80211 to call the mgd_prep_tx() driver callback
to make sure that the virtual interface is granted immediate airtime
to be able to transmit the frame, in case that no beacon was heard
from the AP.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +++++++++++++
 net/mac80211/debugfs.c |  1 +
 net/mac80211/mlme.c    | 16 +++++++++++++++-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 649f073eb6df..dc3e9d9c3527 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6,6 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2069,6 +2070,14 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
  *	TDLS links.
  *
+ * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the
+ *	mgd_prepare_tx() callback to be called before transmission of a
+ *	deauthentication frame in case the association was completed but no
+ *	beacon was heard. This is required in multi-channel scenarios, where the
+ *	virtual interface might not be given air time for the transmission of
+ *	the frame, as it is not synced with the AP/P2P GO yet, and thus the
+ *	deauthentication frame might not be transmitted.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2112,6 +2121,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_REPORTS_LOW_ACK,
 	IEEE80211_HW_SUPPORTS_TX_FRAG,
 	IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+	IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
@@ -3356,6 +3366,9 @@ enum ieee80211_reconfig_type {
  *	management frame prior to having successfully associated to allow the
  *	driver to give it channel time for the transmission, to get a response
  *	and to be able to synchronize with the GO.
+ *	For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211
+ *	would also call this function before transmitting a deauthentication
+ *	frame in case that no beacon was heard from the AP/P2P GO.
  *	The callback will be called before each transmission and upon return
  *	mac80211 will transmit the frame right away.
  *	The callback is optional and can (should!) sleep.
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f466d12a6bc..a75653affbf7 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -212,6 +212,7 @@ static const char *hw_flag_names[] = {
 	FLAG(REPORTS_LOW_ACK),
 	FLAG(SUPPORTS_TX_FRAG),
 	FLAG(SUPPORTS_TDLS_BUFFER_STA),
+	FLAG(DEAUTH_NEED_MGD_TX_PREP),
 #undef FLAG
 };
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 010b127a3937..0024eff9bb84 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7,6 +7,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2008,9 +2009,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 		ieee80211_flush_queues(local, sdata, true);
 
 	/* deauthenticate/disassociate now */
-	if (tx || frame_buf)
+	if (tx || frame_buf) {
+		struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+		/*
+		 * In multi channel scenarios guarantee that the virtual
+		 * interface is granted immediate airtime to transmit the
+		 * deauthentication frame by calling mgd_prepare_tx, if the
+		 * driver requested so.
+		 */
+		if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
+		    !ifmgd->have_beacon)
+			drv_mgd_prepare_tx(sdata->local, sdata);
+
 		ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype,
 					       reason, tx, frame_buf);
+	}
 
 	/* flush out frame - make sure the deauth was actually sent */
 	if (tx)

From f7308991bfeea3f6a4c6281c64fc1ba9dc6e56b3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Feb 2018 19:50:05 -0800
Subject: [PATCH 0277/1678] nfp: add Makefiles to all directories

To be able to build separate objects we need to provide
Kbuild with a Makefile in each directory.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/Makefile             | 2 ++
 drivers/net/ethernet/netronome/nfp/flower/Makefile          | 2 ++
 drivers/net/ethernet/netronome/nfp/nfpcore/Makefile         | 2 ++
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile | 2 ++
 drivers/net/ethernet/netronome/nfp/nic/Makefile             | 2 ++
 5 files changed, 10 insertions(+)
 create mode 100644 drivers/net/ethernet/netronome/nfp/bpf/Makefile
 create mode 100644 drivers/net/ethernet/netronome/nfp/flower/Makefile
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
 create mode 100644 drivers/net/ethernet/netronome/nfp/nic/Makefile

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/Makefile b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/bpf/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/flower/Makefile b/drivers/net/ethernet/netronome/nfp/flower/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects
diff --git a/drivers/net/ethernet/netronome/nfp/nic/Makefile b/drivers/net/ethernet/netronome/nfp/nic/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nic/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects

From 420b93589977ac16018ae5bc0041c0d080f588e8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Feb 2018 19:50:06 -0800
Subject: [PATCH 0278/1678] aquantia: add Makefiles to all directories

To be able to build separate objects we need to provide
Kbuild with a Makefile in each directory.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
new file mode 100644
index 000000000000..805fa28f391a
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# kbuild requires Makefile in a directory to build individual objects

From 70271dadee110adea33d1f32e7793b17c54bc7c4 Mon Sep 17 00:00:00 2001
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Date: Wed, 21 Feb 2018 19:50:07 -0800
Subject: [PATCH 0279/1678] nfp: advertise firmware for mixed 10G/25G mode

The AMDA0099-0001 platform can support the 1x10G + 1x25G mixed mode
operation. Recently, firmware has been added for this configuration
mode.

Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index ab301d56430b..c4b1f344b4da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -645,6 +645,7 @@ MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_4x10_1x40.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0097-0001_8x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw");
 MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw");
+MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw");
 
 MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
 MODULE_LICENSE("GPL");

From 46182452cf5abe95782e100f0719b7497641d263 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Feb 2018 12:44:40 +0100
Subject: [PATCH 0280/1678] dsa: ptp: mark dummy helpers as 'inline'

Declaring a static function in a header leads to a warning every
time that header gets included without the function being used:

In file included from drivers/net/dsa/mv88e6xxx/chip.c:42:
drivers/net/dsa/mv88e6xxx/ptp.h:92:13: error: 'mv88e6xxx_hwtstamp_work' defined but not used [-Werror=unused-function]
 static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
In file included from drivers/net/dsa/mv88e6xxx/chip.c:38:
drivers/net/dsa/mv88e6xxx/global2.h:355:12: error: 'mv88e6xxx_g2_wait' defined but not used [-Werror=unused-function]
 static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
            ^~~~~~~~~~~~~~~~~
drivers/net/dsa/mv88e6xxx/global2.h:350:12: error: 'mv88e6xxx_g2_update' defined but not used [-Werror=unused-function]
 static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
            ^~~~~~~~~~~~~~~~~~~
drivers/net/dsa/mv88e6xxx/global2.h:345:12: error: 'mv88e6xxx_g2_write' defined but not used [-Werror=unused-function]
 static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
            ^~~~~~~~~~~~~~~~~~
drivers/net/dsa/mv88e6xxx/global2.h:340:12: error: 'mv88e6xxx_g2_read' defined but not used [-Werror=unused-function]
 static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)

This marks all such functions in dsa inline to make sure we don't warn
about them.

Fixes: c6fe0ad2c349 ("net: dsa: mv88e6xxx: add rx/tx timestamping support")
Fixes: 0d632c3d6fe3 ("net: dsa: mv88e6xxx: add accessors for PTP/TAI registers")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global2.h | 8 ++++----
 drivers/net/dsa/mv88e6xxx/ptp.h     | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index d85c91036c0f..aa3f0a736966 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -345,22 +345,22 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
-static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+static inline int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
 	return -EOPNOTSUPP;
 }
 
-static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
 	return -EOPNOTSUPP;
 }
 
-static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+static inline int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
 	return -EOPNOTSUPP;
 }
 
-static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+static inline int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h
index 992818ade746..10f271ab650d 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.h
+++ b/drivers/net/dsa/mv88e6xxx/ptp.h
@@ -89,7 +89,7 @@ void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip);
 
 #else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */
 
-static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
+static inline long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
 {
 	return -1;
 }
@@ -99,7 +99,7 @@ static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
 	return 0;
 }
 
-static void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
+static inline void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip)
 {
 }
 

From ee07862f7b4594d390b978f6636a6a6191632ab3 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Fri, 23 Feb 2018 14:58:41 +0800
Subject: [PATCH 0281/1678] bpf: NULL pointer check is not needed in
 BPF_CGROUP_RUN_PROG_INET_SOCK

sk is already allocated in inet_create/inet6_create, hence when
BPF_CGROUP_RUN_PROG_INET_SOCK is executed sk will never be NULL.

The logic is as bellow,
	sk = sk_alloc();
	if (!sk)
		goto out;
	BPF_CGROUP_RUN_PROG_INET_SOCK(sk);

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7f16e0f8d68..8a4566691c8f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
 ({									       \
 	int __ret = 0;							       \
-	if (cgroup_bpf_enabled && sk) {					       \
+	if (cgroup_bpf_enabled) {					       \
 		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
 						 BPF_CGROUP_INET_SOCK_CREATE); \
 	}								       \

From 7edf6d314cd061e1d0a1b7bc0b511d64322c3f72 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 22 Feb 2018 21:22:40 +0100
Subject: [PATCH 0282/1678] r8169: disable WOL per default

Currently, if BIOS enables WOL in the chip, settings are inconsistent
because the device isn't marked as wakeup-enabled (if not done
explicitly via userspace tools). This causes issues with suspend/
resume because mdio_bus_phy_may_suspend() checks whether device is
wakeup-enabled. In detail MDIO bus access in phy_suspend() can fail
because the MDIO bus is disabled.

In the history of the driver we find two competing approaches:
8f9d5138035d "r8169: remember WOL preferences on driver load" prefers
to preserve what the BIOS may have set, whilst bde135a672bf
"r8169: only enable PCI wakeups when WOL is active" disabled PCI
wakeup per default to work around a bug on one platform.

Seems like nobody complained after the latter patch about non-working
WOL, what makes me think that nobody uses WOL w/o configuring it
explicitly.

My opinion:
Vast majority of users doesn't use WOL even if the BIOS enables it in
the chip. And having WOL being active keeps the PHY(s) from powering
down if being idle.
If somebody needs WOL, he can enable it during boot, e.g. by
configuring systemd.link/WakeOnLan.

Therefore, to make WOL consistent again, disable it per default.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index c16b97a56d9f..91a03d575c75 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -8512,11 +8512,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
 	RTL_W8(Cfg9346, Cfg9346_Unlock);
-	RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-	RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
 	tp->features |= rtl_try_msi(tp, cfg);
 	RTL_W8(Cfg9346, Cfg9346_Lock);
 
+	/* override BIOS settings, use userspace tools to enable WOL */
+	__rtl8169_set_wol(tp, 0);
+
 	if (rtl_tbi_enabled(tp)) {
 		tp->set_speed = rtl8169_set_speed_tbi;
 		tp->get_link_ksettings = rtl8169_get_link_ksettings_tbi;

From 9dbe7896d95af114fcb5554b53cf796ca9293b13 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 22 Feb 2018 21:37:48 +0100
Subject: [PATCH 0283/1678] r8169: simplify and improve check for dash

r8168_check_dash() returns false anyway for all chip versions not
supporting dash. So we can simplify the check conditions.

In addition change the check functions to return bool instead of int,
because they actually return a bool value.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 39 +++++++---------------------
 1 file changed, 9 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 91a03d575c75..96db3283eecf 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1472,19 +1472,19 @@ static void rtl8168_driver_stop(struct rtl8169_private *tp)
 	}
 }
 
-static int r8168dp_check_dash(struct rtl8169_private *tp)
+static bool r8168dp_check_dash(struct rtl8169_private *tp)
 {
 	u16 reg = rtl8168_get_ocp_reg(tp);
 
-	return (ocp_read(tp, 0x0f, reg) & 0x00008000) ? 1 : 0;
+	return !!(ocp_read(tp, 0x0f, reg) & 0x00008000);
 }
 
-static int r8168ep_check_dash(struct rtl8169_private *tp)
+static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-	return (ocp_read(tp, 0x0f, 0x128) & 0x00000001) ? 1 : 0;
+	return !!(ocp_read(tp, 0x0f, 0x128) & 0x00000001);
 }
 
-static int r8168_check_dash(struct rtl8169_private *tp)
+static bool r8168_check_dash(struct rtl8169_private *tp)
 {
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_27:
@@ -1496,7 +1496,7 @@ static int r8168_check_dash(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_51:
 		return r8168ep_check_dash(tp);
 	default:
-		return 0;
+		return false;
 	}
 }
 
@@ -4982,15 +4982,8 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		return;
-	}
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
 	     tp->mac_version == RTL_GIGA_MAC_VER_24) &&
@@ -8202,15 +8195,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		rtl8168_driver_stop(tp);
-	}
 
 	netif_napi_del(&tp->napi);
 
@@ -8640,15 +8626,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			   rtl_chip_infos[chipset].jumbo_tx_csum ? "ok" : "ko");
 	}
 
-	if ((tp->mac_version == RTL_GIGA_MAC_VER_27 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_28 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_31 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_49 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_50 ||
-	     tp->mac_version == RTL_GIGA_MAC_VER_51) &&
-	    r8168_check_dash(tp)) {
+	if (r8168_check_dash(tp))
 		rtl8168_driver_start(tp);
-	}
 
 	netif_carrier_off(dev);
 

From 79a5b9727a1cceacd49921b78425ebda91836bd6 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 22 Feb 2018 13:40:27 -0800
Subject: [PATCH 0284/1678] rds: rds_msg_zcopy should return error of null
 rm->data.op_mmp_znotifier

if either or both of MSG_ZEROCOPY and SOCK_ZEROCOPY have not been
specified, the rm->data.op_mmp_znotifier allocation will be skipped.
In this case, it is invalid ot pass down a cmsghdr with
RDS_CMSG_ZCOPY_COOKIE, so return EINVAL from rds_msg_zcopy for this
case.

Reported-by: syzbot+f893ae7bb2f6456dfbc3@syzkaller.appspotmail.com
Fixes: 0cebaccef3ac ("rds: zerocopy Tx support.")
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index 79d158b3def0..acad04243b41 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -941,7 +941,8 @@ static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
 {
 	u32 *cookie;
 
-	if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
+	if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)) ||
+	    !rm->data.op_mmp_znotifier)
 		return -EINVAL;
 	cookie = CMSG_DATA(cmsg);
 	rm->data.op_mmp_znotifier->z_cookie = *cookie;

From 3c69f79233e7c8424abb38bbd0ae9fd9010014af Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Feb 2018 12:22:52 +0000
Subject: [PATCH 0285/1678] atm: idt77252: remove redundant bit-wise or'ing of
 zero

Zero is being bit-wise or'd in a calculation twice; these are redundant
and can be removed.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/idt77252.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 0277f36be85b..6e737142ceaa 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3173,14 +3173,10 @@ static void init_sram(struct idt77252_dev *card)
 				    (u32) 0xffffffff);
 	}
 
-	writel((SAR_FBQ0_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
-	writel((SAR_FBQ1_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
-	writel((SAR_FBQ2_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
-	writel((SAR_FBQ3_LOW << 28) | 0x00000000 | 0x00000000 |
-	       (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
+	writel((SAR_FBQ0_LOW << 28) | (SAR_FB_SIZE_0 / 48), SAR_REG_FBQS0);
+	writel((SAR_FBQ1_LOW << 28) | (SAR_FB_SIZE_1 / 48), SAR_REG_FBQS1);
+	writel((SAR_FBQ2_LOW << 28) | (SAR_FB_SIZE_2 / 48), SAR_REG_FBQS2);
+	writel((SAR_FBQ3_LOW << 28) | (SAR_FB_SIZE_3 / 48), SAR_REG_FBQS3);
 
 	/* Initialize rate table  */
 	for (i = 0; i < 256; i++) {

From cc30c93fa020e13c91f5076e20062df33f944cdc Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 23 Feb 2018 11:56:20 -0500
Subject: [PATCH 0286/1678] selftests/net: ignore background traffic in
 psock_fanout

The packet fanout test generates UDP traffic and reads this with
a pair of packet sockets, testing the various fanout algorithms.

Avoid non-determinism from reading unrelated background traffic.
Fanout decisions are made before unrelated packets can be dropped with
a filter, so that is an insufficient strategy [*]. Run the packet
socket tests in a network namespace, similar to msg_zerocopy.

It it still good practice to install a filter on a packet socket
before accepting traffic. Because this is example code, demonstrate
that pattern. Open the socket initially bound to no protocol, install
a filter, and only then bind to ETH_P_IP.

Another source of non-determinism is hash collisions in FANOUT_HASH.
The hash function used to select a socket in the fanout group includes
the pseudorandom number hashrnd, which is not visible from userspace.
To work around this, the test tries to find a pair of UDP source ports
that do not collide. It gives up too soon (5 times, every 32 runs) and
output is confusing. Increase tries to 20 and revise the error msg.

[*] another approach would be to add a third socket to the fanout
    group and direct all unexpected traffic here. This is possible
    only when reimplementing methods like RR or HASH alongside this
    extra catch-all bucket, using the BPF fanout method.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/in_netns.sh       | 23 +++++++++++++
 tools/testing/selftests/net/psock_fanout.c    | 32 +++++++++++++++----
 tools/testing/selftests/net/run_afpackettests |  4 +--
 3 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100755 tools/testing/selftests/net/in_netns.sh

diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755
index 000000000000..f57a2eaad069
--- /dev/null
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index d4346b16b2c1..bd9b9632c72b 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -50,6 +50,7 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/if_packet.h>
+#include <net/if.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
@@ -73,14 +74,29 @@
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
+	struct sockaddr_ll addr = {0};
 	int fd, val;
 
-	fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
 	if (fd < 0) {
 		perror("socket packet");
 		exit(1);
 	}
 
+	pair_udp_setfilter(fd);
+
+	addr.sll_family = AF_PACKET;
+	addr.sll_protocol = htons(ETH_P_IP);
+	addr.sll_ifindex = if_nametoindex("lo");
+	if (addr.sll_ifindex == 0) {
+		perror("if_nametoindex");
+		exit(1);
+	}
+	if (bind(fd, (void *) &addr, sizeof(addr))) {
+		perror("bind packet");
+		exit(1);
+	}
+
 	val = (((int) typeflags) << 16) | group_id;
 	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
 		if (close(fd)) {
@@ -90,7 +106,6 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 		return -1;
 	}
 
-	pair_udp_setfilter(fd);
 	return fd;
 }
 
@@ -229,7 +244,7 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[])
 
 	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
 	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
-		fprintf(stderr, "ERROR: incorrect queue lengths\n");
+		fprintf(stderr, "warning: incorrect queue lengths\n");
 		return 1;
 	}
 
@@ -348,7 +363,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
 	uint8_t type = typeflags & 0xFF;
 	int fds[2], fds_udp[2][2], ret;
 
-	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+	fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+		typeflags, PORT_BASE, PORT_BASE + port_off);
 
 	fds[0] = sock_fanout_open(typeflags, 0);
 	fds[1] = sock_fanout_open(typeflags, 0);
@@ -419,7 +435,7 @@ int main(int argc, char **argv)
 	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
 	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
 	const int expect_uniqueid[2][2] = { { 20, 20},  { 20, 20 } };
-	int port_off = 2, tries = 5, ret;
+	int port_off = 2, tries = 20, ret;
 
 	test_control_single();
 	test_control_group();
@@ -428,10 +444,14 @@ int main(int argc, char **argv)
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
 			    expect_hash[0], expect_hash[1]);
-	while (ret && tries--) {
+	while (ret) {
 		fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
 		ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
 				    expect_hash[0], expect_hash[1]);
+		if (!--tries) {
+			fprintf(stderr, "too many collisions\n");
+			return 1;
+		}
 	}
 
 	ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 21fe149e3de1..bea079edc278 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -9,7 +9,7 @@ fi
 echo "--------------------"
 echo "running psock_fanout test"
 echo "--------------------"
-./psock_fanout
+./in_netns.sh ./psock_fanout
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 else
@@ -19,7 +19,7 @@ fi
 echo "--------------------"
 echo "running psock_tpacket test"
 echo "--------------------"
-./psock_tpacket
+./in_netns.sh ./psock_tpacket
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 else

From 22215908d81f61d293e8b128e819a8437f37cc20 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Wed, 27 Sep 2017 07:24:18 +0000
Subject: [PATCH 0287/1678] net/mlx5: E-Switch, Add callback to get representor
 device

Add a callback interface to get a protocol device (per representor type).
The Ethernet representors will expose their netdev via this interface.

This functionality can be later used by IB representor in order to find the
corresponding net device representor.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  | 11 +++++++++
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  5 ++++
 .../mellanox/mlx5/core/eswitch_offloads.c     | 24 +++++++++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 363d8dcb7f17..ea4b255380a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	kfree(ppriv); /* mlx5e_rep_priv */
 }
 
+static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_rep_priv *rpriv;
+
+	rpriv = mlx5e_rep_to_rep_priv(rep);
+
+	return rpriv->netdev;
+}
+
 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
 
 		rep_if.load = mlx5e_vport_rep_load;
 		rep_if.unload = mlx5e_vport_rep_unload;
+		rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
 	}
 }
@@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
 
 	rep_if.load = mlx5e_nic_rep_load;
 	rep_if.unload = mlx5e_nic_rep_unload;
+	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 	rep_if.priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2fa037066b2f..4dfb1da435a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -150,6 +150,7 @@ struct mlx5_eswitch_rep_if {
 	int		       (*load)(struct mlx5_core_dev *dev,
 				       struct mlx5_eswitch_rep *rep);
 	void		       (*unload)(struct mlx5_eswitch_rep *rep);
+	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
 	void			*priv;
 	bool		       valid;
 };
@@ -286,6 +287,10 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 				       int vport_index,
 				       u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 				 struct mlx5_esw_flow_attr *attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 99f583a15cc3..06623c8e92a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1160,6 +1160,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
 	rep_if->load   = __rep_if->load;
 	rep_if->unload = __rep_if->unload;
+	rep_if->get_proto_dev = __rep_if->get_proto_dev;
 	rep_if->priv = __rep_if->priv;
 
 	rep_if->valid = true;
@@ -1188,3 +1189,26 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
 	return rep->rep_if[rep_type].priv;
 }
+
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	if (vport == FDB_UPLINK_VPORT)
+		vport = UPLINK_REP_INDEX;
+
+	rep = &offloads->vport_reps[vport];
+
+	if (rep->rep_if[rep_type].valid &&
+	    rep->rep_if[rep_type].get_proto_dev)
+		return rep->rep_if[rep_type].get_proto_dev(rep);
+	return NULL;
+}
+
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
+{
+	return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
+}

From 57cbd893c4c575a24594fa6c0835247506ce26e2 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 16 Jan 2018 14:04:14 +0000
Subject: [PATCH 0288/1678] net/mlx5: E-Switch, Move representors definition to
 a global scope

In preparation for IB representors, move representors structs to a global
scope, also expose functions needed for registration, unregistration,
eswitch mode and creating a flow rule to direct traffic from SQs to the
right VF.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |  6 ++
 .../net/ethernet/mellanox/mlx5/core/eswitch.h | 44 +-------------
 .../mellanox/mlx5/core/eswitch_offloads.c     | 12 ++++
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  6 --
 include/linux/mlx5/driver.h                   |  6 ++
 include/linux/mlx5/eswitch.h                  | 57 +++++++++++++++++++
 6 files changed, 82 insertions(+), 49 deletions(-)
 create mode 100644 include/linux/mlx5/eswitch.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5ecf2cddc16d..aec4653d88bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2175,3 +2175,9 @@ free_out:
 	kvfree(out);
 	return err;
 }
+
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return esw->mode;
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 4dfb1da435a4..9c1e1a2d02ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -37,19 +37,9 @@
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include <linux/mlx5/eswitch.h>
 #include "lib/mpfs.h"
 
-enum {
-	SRIOV_NONE,
-	SRIOV_LEGACY,
-	SRIOV_OFFLOADS
-};
-
-enum {
-	REP_ETH,
-	NUM_REP_TYPES,
-};
-
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
@@ -145,24 +135,6 @@ struct mlx5_eswitch_fdb {
 	};
 };
 
-struct mlx5_eswitch_rep;
-struct mlx5_eswitch_rep_if {
-	int		       (*load)(struct mlx5_core_dev *dev,
-				       struct mlx5_eswitch_rep *rep);
-	void		       (*unload)(struct mlx5_eswitch_rep *rep);
-	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
-	void			*priv;
-	bool		       valid;
-};
-
-struct mlx5_eswitch_rep {
-	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
-	u16		       vport;
-	u8		       hw_id[ETH_ALEN];
-	u16		       vlan;
-	u32		       vlan_refcount;
-};
-
 struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
@@ -232,9 +204,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 int vport,
 				 struct ifla_vf_stats *vf_stats);
-struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport,
-				    u32 sqn);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 struct mlx5_flow_spec;
@@ -279,18 +248,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     int vport_index,
-				     struct mlx5_eswitch_rep_if *rep_if,
-				     u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index,
-				       u8 rep_type);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
-void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
-				 int vport,
-				 u8 rep_type);
-void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 				 struct mlx5_esw_flow_attr *attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 06623c8e92a2..92fdb10dd29f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -338,6 +338,7 @@ out:
 	kvfree(spec);
 	return flow_rule;
 }
+EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule);
 
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 {
@@ -1165,6 +1166,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 
 	rep_if->valid = true;
 }
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 				       int vport_index, u8 rep_type)
@@ -1179,6 +1181,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
 	rep->rep_if[rep_type].valid = false;
 }
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
@@ -1207,8 +1210,17 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
 		return rep->rep_if[rep_type].get_proto_dev(rep);
 	return NULL;
 }
+EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev);
 
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type)
 {
 	return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type);
 }
+EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev);
+
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+						int vport)
+{
+	return &esw->offloads.vport_reps[vport];
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 23e17ac0cba5..ee1a42a078ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -43,12 +43,6 @@
 #define DRIVER_NAME "mlx5_core"
 #define DRIVER_VERSION "5.0-0"
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
-	(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-	 mlx5_core_is_pf(mdev))
-
 extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)				\
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bfea26af6de5..4814cad7456e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
 	return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
+	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+	 mlx5_core_is_pf(mdev))
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
 	if (param > 4) {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
new file mode 100644
index 000000000000..f62bf486c18c
--- /dev/null
+++ b/include/linux/mlx5/eswitch.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _MLX5_ESWITCH_
+#define _MLX5_ESWITCH_
+
+#include <linux/mlx5/driver.h>
+
+enum {
+	SRIOV_NONE,
+	SRIOV_LEGACY,
+	SRIOV_OFFLOADS
+};
+
+enum {
+	REP_ETH,
+	NUM_REP_TYPES,
+};
+
+struct mlx5_eswitch_rep;
+struct mlx5_eswitch_rep_if {
+	int		       (*load)(struct mlx5_core_dev *dev,
+				       struct mlx5_eswitch_rep *rep);
+	void		       (*unload)(struct mlx5_eswitch_rep *rep);
+	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+	void			*priv;
+	bool		       valid;
+};
+
+struct mlx5_eswitch_rep {
+	struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES];
+	u16		       vport;
+	u8		       hw_id[ETH_ALEN];
+	u16		       vlan;
+	u32		       vlan_refcount;
+};
+
+void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     int vport_index,
+				     struct mlx5_eswitch_rep_if *rep_if,
+				     u8 rep_type);
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport_index,
+				       u8 rep_type);
+void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
+				 int vport,
+				 u8 rep_type);
+struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
+						int vport);
+void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_flow_handle *
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
+				    int vport, u32 sqn);
+#endif

From cd3d07e7db69456beaeb90529270a95c7692f4b3 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 23 Jan 2018 11:19:12 +0000
Subject: [PATCH 0289/1678] net/mlx5: E-Switch, Increase number of FTEs in FDB
 in switchdev mode

The max FTE number should be the max number of SQs that can be opened.
Ethernet representors open one SQ each. Once we add IB representor this
will increase (depends on the user). For now lets start with 31
per IB representor and if needed increase in the future.

This increase only affects the number of FTEs in the slow path FDB,
offloaded rules (done via TC on the fast path portion of the FDB)
aren't affected.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 92fdb10dd29f..a5f5339a4e88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -427,6 +427,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 }
 
 #define MAX_PF_SQ 256
+#define MAX_SQ_NVPORTS 32
 
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
@@ -456,7 +457,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		goto fast_fdb_err;
 
-	table_size = nvports + MAX_PF_SQ + 1;
+	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 1;
 
 	ft_attr.max_fte = table_size;
 	ft_attr.prio = FDB_SLOW_PATH;
@@ -479,7 +480,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 
-	ix = nvports + MAX_PF_SQ;
+	ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ;
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 

From f80be5436de78d0231cfb79b368edb9b90c3b057 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 30 Jan 2018 10:46:34 +0000
Subject: [PATCH 0290/1678] net/mlx5: E-Switch, Optimize HW steering tables in
 switchdev mode

Under switchdev mode we insert an eswitch miss rule causing any
unmatched traffic to be sent towards the PF vport. This miss rule can
be optimized if we break it to two, one case is for multicast traffic and
the other for unicast.

Breaking the miss rule into two (unicast and multicast) allows the firmware
to program the hardware in a more efficient way.

Using ConncetX-5 Ex with IXIA and testpmd (which use IB representors):

IXIA -> NIC -> PF -> IB representor -> NIC -> VF:
    - Without this optimization: 9.2 MPPS.
    - With this optimization: 18 MPPS.

VF -> NIC -> IB representor-> PF -> NIC -> IXIA:
    - Without this optimization: 17 MPPS.
    - With this optimization: 23.4 MPPS.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  3 +-
 .../mellanox/mlx5/core/eswitch_offloads.c     | 48 ++++++++++++++++---
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9c1e1a2d02ef..98d2177d0806 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -129,7 +129,8 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_table *fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
-			struct mlx5_flow_handle *miss_rule;
+			struct mlx5_flow_handle *miss_rule_uni;
+			struct mlx5_flow_handle *miss_rule_multi;
 			int vlan_push_pop_refcount;
 		} offloads;
 	};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a5f5339a4e88..0692d280883c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -351,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
+	void *headers_c;
+	void *headers_v;
 	int err = 0;
+	u8 *dmac_c;
+	u8 *dmac_v;
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
@@ -359,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 		goto out;
 	}
 
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				 outer_headers);
+	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c,
+			      outer_headers.dmac_47_16);
+	dmac_c[0] = 0x01;
+
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = 0;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
@@ -367,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
-		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
+		esw_warn(esw->dev,  "FDB: Failed to add unicast miss flow rule err %d\n", err);
 		goto out;
 	}
 
-	esw->fdb_table.offloads.miss_rule = flow_rule;
+	esw->fdb_table.offloads.miss_rule_uni = flow_rule;
+
+	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				 outer_headers);
+	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v,
+			      outer_headers.dmac_47_16);
+	dmac_v[0] = 0x01;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err);
+		mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
+		goto out;
+	}
+
+	esw->fdb_table.offloads.miss_rule_multi = flow_rule;
+
 out:
 	kvfree(spec);
 	return err;
@@ -440,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	struct mlx5_flow_group *g;
 	void *match_criteria;
 	u32 *flow_group_in;
+	u8 *dmac;
 
 	esw_debug(esw->dev, "Create offloads FDB Tables\n");
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
@@ -457,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		goto fast_fdb_err;
 
-	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 1;
+	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
 
 	ft_attr.max_fte = table_size;
 	ft_attr.prio = FDB_SLOW_PATH;
@@ -494,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
 	/* create miss group */
 	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+				      match_criteria);
+	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+			    outer_headers.dmac_47_16);
+	dmac[0] = 0x01;
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
 
 	g = mlx5_create_flow_group(fdb, flow_group_in);
 	if (IS_ERR(g)) {
@@ -533,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
-	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 

From 5e65b02c00900155833008b7992bbbbc7f0df2ac Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 16 Jan 2018 14:13:46 +0000
Subject: [PATCH 0291/1678] net/mlx5: E-Switch, Add definition of IB
 representor

Create a new representor type: REP_IB. which will be initialized by an IB
device that is used as a logical representor of a eswitch vport (VF or
uplink) just like we have a net device today in switchdev mode.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/eswitch.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index f62bf486c18c..d3c9db492b30 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -16,6 +16,7 @@ enum {
 
 enum {
 	REP_ETH,
+	REP_IB,
 	NUM_REP_TYPES,
 };
 

From fc385b7ac48089ed1c6866cdc0dceb4ae1fa54de Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 16 Jan 2018 14:34:48 +0000
Subject: [PATCH 0292/1678] IB/mlx5: Add basic regiser/unregister representors
 code

Create the basic infrastructure of registering and unregistering
IB representors. The load/unload callbacks are left empty and
proper implementation will be introduced in following patches.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/Makefile  |   1 +
 drivers/infiniband/hw/mlx5/ib_rep.c  | 104 +++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/ib_rep.h  |  49 +++++++++++++
 drivers/infiniband/hw/mlx5/main.c    |  13 ++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   2 +
 5 files changed, 169 insertions(+)
 create mode 100644 drivers/infiniband/hw/mlx5/ib_rep.c
 create mode 100644 drivers/infiniband/hw/mlx5/ib_rep.h

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index bc6299697dda..d42b922bede8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..adf2439ddacb
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include "ib_rep.h"
+
+static int
+mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	return 0;
+}
+
+static void
+mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	return 0;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+}
+
+static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
+{
+	return mlx5_ib_rep_to_dev(rep);
+}
+
+static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+	int vport;
+
+	for (vport = 1; vport < total_vfs; vport++) {
+		struct mlx5_eswitch_rep_if rep_if = {};
+
+		rep_if.load = mlx5_ib_vport_rep_load;
+		rep_if.unload = mlx5_ib_vport_rep_unload;
+		rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
+	}
+}
+
+static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev);
+	int vport;
+
+	for (vport = 1; vport < total_vfs; vport++)
+		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+}
+
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep_if rep_if = {};
+
+	rep_if.load = mlx5_ib_nic_rep_load;
+	rep_if.unload = mlx5_ib_nic_rep_unload;
+	rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+	rep_if.priv = dev;
+
+	mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB);
+
+	mlx5_ib_rep_register_vf_vports(dev);
+}
+
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_eswitch *esw   = dev->mdev->priv.eswitch;
+
+	mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */
+	mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/
+}
+
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_mode(esw);
+}
+
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
+}
+
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+{
+	return mlx5_eswitch_vport_rep(esw, vport);
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..64ab54b82820
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index);
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+					   int vport_index);
+void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
+void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return SRIOV_NONE;
+}
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return NULL;
+}
+
+static inline
+struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
+					   int vport_index)
+{
+	return NULL;
+}
+
+static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+#endif
+
+static inline
+struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+	return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv;
+}
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4236c8086820..725322268530 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -57,6 +57,7 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 #include "cmd.h"
 
 #define DRIVER_NAME "mlx5_ib"
@@ -4905,6 +4906,18 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
+static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_register_vport_reps(dev);
+
+	return 0;
+}
+
+static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_unregister_vport_reps(dev);
+}
+
 static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 			     const struct mlx5_ib_profile *profile,
 			     int stage)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index eafb9751daf6..ec798c6371be 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -743,6 +743,7 @@ enum mlx5_ib_stages {
 	MLX5_IB_STAGE_UMR_RESOURCES,
 	MLX5_IB_STAGE_DELAY_DROP,
 	MLX5_IB_STAGE_CLASS_ATTR,
+	MLX5_IB_STAGE_REP_REG,
 	MLX5_IB_STAGE_MAX,
 };
 
@@ -807,6 +808,7 @@ struct mlx5_ib_dev {
 	struct mlx5_sq_bfreg	fp_bfreg;
 	struct mlx5_ib_delay_drop	delay_drop;
 	const struct mlx5_ib_profile	*profile;
+	struct mlx5_eswitch_rep		*rep;
 
 	/* protect the user_td */
 	struct mutex		lb_mutex;

From 9a4ca38d77054febf35cd568cd86aa41c6477301 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 16 Jan 2018 14:42:35 +0000
Subject: [PATCH 0293/1678] IB/mlx5: Allocate flow DB only on PF IB device

A flow DB is a shared resource between PF and representors,
need to allocate it only when creating the PF IB device.
Once we add IB representors, they will use the flow db which was
created by the PF.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 45 ++++++++++++++++++++--------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  3 +-
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 725322268530..7f65e9909901 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2632,7 +2632,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 							  ibflow);
 	struct mlx5_ib_flow_handler *iter, *tmp;
 
-	mutex_lock(&dev->flow_db.lock);
+	mutex_lock(&dev->flow_db->lock);
 
 	list_for_each_entry_safe(iter, tmp, &handler->list, list) {
 		mlx5_del_flow_rules(iter->rule);
@@ -2643,7 +2643,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 
 	mlx5_del_flow_rules(handler->rule);
 	put_flow_table(dev, handler->prio, true);
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 
 	kfree(handler);
 
@@ -2692,7 +2692,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 					     MLX5_FLOW_NAMESPACE_BYPASS);
 		num_entries = MLX5_FS_MAX_ENTRIES;
 		num_groups = MLX5_FS_MAX_TYPES;
-		prio = &dev->flow_db.prios[priority];
+		prio = &dev->flow_db->prios[priority];
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		ns = mlx5_get_flow_namespace(dev->mdev,
@@ -2700,7 +2700,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 		build_leftovers_ft_param(&priority,
 					 &num_entries,
 					 &num_groups);
-		prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+		prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
 	} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
 		if (!MLX5_CAP_FLOWTABLE(dev->mdev,
 					allow_sniffer_and_nic_rx_shared_tir))
@@ -2710,7 +2710,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 					     MLX5_FLOW_NAMESPACE_SNIFFER_RX :
 					     MLX5_FLOW_NAMESPACE_SNIFFER_TX);
 
-		prio = &dev->flow_db.sniffer[ft_type];
+		prio = &dev->flow_db->sniffer[ft_type];
 		priority = 0;
 		num_entries = 1;
 		num_groups = 1;
@@ -3000,7 +3000,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	if (!dst)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&dev->flow_db.lock);
+	mutex_lock(&dev->flow_db->lock);
 
 	ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
 	if (IS_ERR(ft_prio)) {
@@ -3049,7 +3049,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 		goto destroy_ft;
 	}
 
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
 
 	return &handler->ibflow;
@@ -3059,7 +3059,7 @@ destroy_ft:
 	if (ft_prio_tx)
 		put_flow_table(dev, ft_prio_tx, false);
 unlock:
-	mutex_unlock(&dev->flow_db.lock);
+	mutex_unlock(&dev->flow_db->lock);
 	kfree(dst);
 	kfree(handler);
 	return ERR_PTR(err);
@@ -3803,7 +3803,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
 		goto err_destroy_vport_lag;
 	}
 
-	dev->flow_db.lag_demux_ft = ft;
+	dev->flow_db->lag_demux_ft = ft;
 	return 0;
 
 err_destroy_vport_lag:
@@ -3815,9 +3815,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 
-	if (dev->flow_db.lag_demux_ft) {
-		mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
-		dev->flow_db.lag_demux_ft = NULL;
+	if (dev->flow_db->lag_demux_ft) {
+		mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+		dev->flow_db->lag_demux_ft = NULL;
 
 		mlx5_cmd_destroy_vport_lag(mdev);
 	}
@@ -4565,7 +4565,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 		dev->mdev->priv.eq_table.num_comp_vectors;
 	dev->ib_dev.dev.parent		= &mdev->pdev->dev;
 
-	mutex_init(&dev->flow_db.lock);
 	mutex_init(&dev->cap_mask_mutex);
 	INIT_LIST_HEAD(&dev->qp_list);
 	spin_lock_init(&dev->reset_flow_resource_lock);
@@ -4586,6 +4585,23 @@ err_free_port:
 	return -ENOMEM;
 }
 
+static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+{
+	dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+	if (!dev->flow_db)
+		return -ENOMEM;
+
+	mutex_init(&dev->flow_db->lock);
+
+	return 0;
+}
+
+static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
+{
+	kfree(dev->flow_db);
+}
+
 static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
@@ -4974,6 +4990,9 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_INIT,
 		     mlx5_ib_stage_init_init,
 		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_flow_db_init,
+		     mlx5_ib_stage_flow_db_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
 		     mlx5_ib_stage_caps_init,
 		     NULL),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index ec798c6371be..659bff5e687d 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -731,6 +731,7 @@ struct mlx5_ib_delay_drop {
 
 enum mlx5_ib_stages {
 	MLX5_IB_STAGE_INIT,
+	MLX5_IB_STAGE_FLOW_DB,
 	MLX5_IB_STAGE_CAPS,
 	MLX5_IB_STAGE_ROCE,
 	MLX5_IB_STAGE_DEVICE_RESOURCES,
@@ -798,7 +799,7 @@ struct mlx5_ib_dev {
 	struct srcu_struct      mr_srcu;
 	u32			null_mkey;
 #endif
-	struct mlx5_ib_flow_db	flow_db;
+	struct mlx5_ib_flow_db	*flow_db;
 	/* protect resources needed as part of reset flow */
 	spinlock_t		reset_flow_resource_lock;
 	struct list_head	qp_list;

From 018a94eec0efc04c678f694f903e621622fd0bcf Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 16 Jan 2018 14:44:29 +0000
Subject: [PATCH 0294/1678] IB/mlx5: Add match on vport when in switchdev mode

When we point to a representor, it means we are in switchdev mode.
The flow db is shared between PF and virtual function representors
so each rule created needs to have a match on its specific source port.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7f65e9909901..5624c5eba2ed 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2803,6 +2803,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	if (!flow_is_multicast_only(flow_attr))
 		set_underlay_qp(dev, spec, underlay_qpn);
 
+	if (dev->rep) {
+		void *misc;
+
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+		MLX5_SET(fte_match_set_misc, misc, source_port,
+			 dev->rep->vport);
+		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters);
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+	}
+
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
 	if (is_drop) {
 		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;

From bcf87f1dbbec0d9abaf89073dd761a41876bc6c1 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 16 Jan 2018 15:02:36 +0000
Subject: [PATCH 0295/1678] IB/mlx5: Listen to netdev register/unresiter events
 in switchdev mode

Currently we listen to netdev register/unregister event based on PCI
device. When in switchdev mode PF and representors share the same PCI
device, so in order to pair ib device and netdev in switchdev mode
compare the netdev that triggered the event to that of the representor.

Expose a function that lets you receive the netdev associated what
a given representor.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.h |  8 ++++++++
 drivers/infiniband/hw/mlx5/main.c   | 13 +++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 64ab54b82820..923ad4cba941 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -17,6 +17,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
 					   int vport_index);
 void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
 void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index);
 #else /* CONFIG_MLX5_ESWITCH */
 static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
 {
@@ -39,6 +41,12 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
 
 static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
 static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+					  int vport_index)
+{
+	return NULL;
+}
 #endif
 
 static inline
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5624c5eba2ed..bc97c73704c4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -155,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this,
 	case NETDEV_REGISTER:
 	case NETDEV_UNREGISTER:
 		write_lock(&roce->netdev_lock);
+		if (ibdev->rep) {
+			struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
+			struct net_device *rep_ndev;
 
-		if (ndev->dev.parent == &mdev->pdev->dev)
-			roce->netdev = (event == NETDEV_UNREGISTER) ?
+			rep_ndev = mlx5_ib_get_rep_netdev(esw,
+							  ibdev->rep->vport);
+			if (rep_ndev == ndev)
+				roce->netdev = (event == NETDEV_UNREGISTER) ?
 					NULL : ndev;
+		} else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) {
+			roce->netdev = (event == NETDEV_UNREGISTER) ?
+				NULL : ndev;
+		}
 		write_unlock(&roce->netdev_lock);
 		break;
 

From 8e6efa3a31f4a81a4d8817d68110446df383d049 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Mon, 6 Nov 2017 12:22:13 +0000
Subject: [PATCH 0296/1678] IB/mlx5: When in switchdev mode, expose only raw
 packet capabilities

Currently in switchdev mode we allow only for raw packet QPs.
Expose the right capabilities and set the gid table length to 0, also
make sure we don't try to enable RoCE, so split the function
to enable RoCE so representors can enable only the notifier needed for
net device events.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c    | 155 +++++++++++++++++++++------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   1 +
 2 files changed, 125 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index bc97c73704c4..2b05ba747d39 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -131,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev,
 	int ret;
 
 	memset(&attr, 0, sizeof(attr));
-	ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+	ret = ibdev->query_port(ibdev, port_num, &attr);
 	if (!ret)
 		*state = attr.state;
 	return ret;
@@ -1278,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	return ret;
 }
 
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+				  struct ib_port_attr *props)
+{
+	int ret;
+
+	/* Only link layer == ethernet is valid for representors */
+	ret = mlx5_query_port_roce(ibdev, port, props);
+	if (ret || !props)
+		return ret;
+
+	/* We don't support GIDS */
+	props->gid_tbl_len = 0;
+
+	return ret;
+}
+
 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 			     union ib_gid *gid)
 {
@@ -3794,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 	return 0;
 }
 
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+				   struct ib_port_immutable *immutable)
+{
+	struct ib_port_attr attr;
+	int err;
+
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	err = ib_query_port(ibdev, port_num, &attr);
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	return 0;
+}
+
 static void get_dev_fw_str(struct ib_device *ibdev, char *str)
 {
 	struct mlx5_ib_dev *dev =
@@ -3870,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 {
 	int err;
 
-	err = mlx5_add_netdev_notifier(dev, port_num);
-	if (err)
-		return err;
-
 	if (MLX5_CAP_GEN(dev->mdev, roce)) {
 		err = mlx5_nic_vport_enable_roce(dev->mdev);
 		if (err)
-			goto err_unregister_netdevice_notifier;
+			return err;
 	}
 
 	err = mlx5_eth_lag_init(dev);
@@ -3890,8 +3921,6 @@ err_disable_roce:
 	if (MLX5_CAP_GEN(dev->mdev, roce))
 		mlx5_nic_vport_disable_roce(dev->mdev);
 
-err_unregister_netdevice_notifier:
-	mlx5_remove_netdev_notifier(dev, port_num);
 	return err;
 }
 
@@ -4664,7 +4693,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
-	dev->ib_dev.query_port		= mlx5_ib_query_port;
 	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
 	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
 	dev->ib_dev.add_gid		= mlx5_ib_add_gid;
@@ -4707,7 +4735,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.alloc_mr		= mlx5_ib_alloc_mr;
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
-	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
 	dev->ib_dev.get_vector_affinity	= mlx5_ib_get_vector_affinity;
 	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
@@ -4758,6 +4785,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
+{
+	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	dev->ib_dev.query_port		= mlx5_ib_query_port;
+
+	return 0;
+}
+
+static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+{
+	dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
+	dev->ib_dev.query_port		= mlx5_ib_rep_query_port;
+
+	return 0;
+}
+
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
+					  u8 port_num)
+{
+	int i;
+
+	for (i = 0; i < dev->num_ports; i++) {
+		dev->roce[i].dev = dev;
+		dev->roce[i].native_port_num = i + 1;
+		dev->roce[i].last_port_state = IB_PORT_DOWN;
+	}
+
+	dev->ib_dev.get_netdev	= mlx5_ib_get_netdev;
+	dev->ib_dev.create_wq	 = mlx5_ib_create_wq;
+	dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq;
+	dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq;
+	dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
+	dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
+
+	dev->ib_dev.uverbs_ex_cmd_mask |=
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+
+	return mlx5_add_netdev_notifier(dev, port_num);
+}
+
+static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+	u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+	mlx5_remove_netdev_notifier(dev, port_num);
+}
+
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_core_dev *mdev = dev->mdev;
+	enum rdma_link_layer ll;
+	int port_type_cap;
+	int err = 0;
+	u8 port_num;
+
+	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+	if (ll == IB_LINK_LAYER_ETHERNET)
+		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+
+	return err;
+}
+
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+	mlx5_ib_stage_common_roce_cleanup(dev);
+}
+
 static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
@@ -4765,37 +4866,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 	int port_type_cap;
 	u8 port_num;
 	int err;
-	int i;
 
 	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
-		for (i = 0; i < dev->num_ports; i++) {
-			dev->roce[i].dev = dev;
-			dev->roce[i].native_port_num = i + 1;
-			dev->roce[i].last_port_state = IB_PORT_DOWN;
-		}
-
-		dev->ib_dev.get_netdev	= mlx5_ib_get_netdev;
-		dev->ib_dev.create_wq	 = mlx5_ib_create_wq;
-		dev->ib_dev.modify_wq	 = mlx5_ib_modify_wq;
-		dev->ib_dev.destroy_wq	 = mlx5_ib_destroy_wq;
-		dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
-		dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-		dev->ib_dev.uverbs_ex_cmd_mask |=
-			(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
-			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
-			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
-		err = mlx5_enable_eth(dev, port_num);
+		err = mlx5_ib_stage_common_roce_init(dev, port_num);
 		if (err)
 			return err;
+
+		err = mlx5_enable_eth(dev, port_num);
+		if (err)
+			goto cleanup;
 	}
 
 	return 0;
+cleanup:
+	mlx5_ib_stage_common_roce_cleanup(dev);
+
+	return err;
 }
 
 static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
@@ -4811,7 +4901,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
 		mlx5_disable_eth(dev);
-		mlx5_remove_netdev_notifier(dev, port_num);
+		mlx5_ib_stage_common_roce_cleanup(dev);
 	}
 }
 
@@ -5017,6 +5107,9 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
 		     mlx5_ib_stage_caps_init,
 		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_non_default_cb,
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
 		     mlx5_ib_stage_roce_init,
 		     mlx5_ib_stage_roce_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 659bff5e687d..4dd98b1e9165 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -733,6 +733,7 @@ enum mlx5_ib_stages {
 	MLX5_IB_STAGE_INIT,
 	MLX5_IB_STAGE_FLOW_DB,
 	MLX5_IB_STAGE_CAPS,
+	MLX5_IB_STAGE_NON_DEFAULT_CB,
 	MLX5_IB_STAGE_ROCE,
 	MLX5_IB_STAGE_DEVICE_RESOURCES,
 	MLX5_IB_STAGE_ODP,

From 72afcf82477a58f40d748271a4a78755983203ef Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Mon, 22 Jan 2018 15:29:44 +0000
Subject: [PATCH 0297/1678] IB/mlx5: Don't expose MR cache in switchdev mode

When enabling many VFs and switching to switchdev mode, the total amount
of mkeys we try to allocate when loading representors is very large and
may cause timeouts on allocations, the same issues was observed on VFs
and we employ the same fix that was done for them. We avoid allocating
the full MR cache on load but still allow it to be manipulated once the
IB device is loaded.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 556e015678de..a5fad3e87ff7 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 
 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 {
-	if (!mlx5_debugfs_root)
+	if (!mlx5_debugfs_root || dev->rep)
 		return;
 
 	debugfs_remove_recursive(dev->cache.root);
@@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 	struct mlx5_cache_ent *ent;
 	int i;
 
-	if (!mlx5_debugfs_root)
+	if (!mlx5_debugfs_root || dev->rep)
 		return 0;
 
 	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
@@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 			   MLX5_IB_UMR_OCTOWORD;
 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		    !dev->rep &&
 		    mlx5_core_is_pf(dev->mdev))
 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
 		else

From b96c9dde17359520d6a5a8eb6d56d91f22c5a413 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Mon, 29 Jan 2018 10:40:37 +0000
Subject: [PATCH 0298/1678] IB/mlx5: E-Switch, Add rule to forward traffic to
 vport

In order to forward traffic from representor's SQ to the right virtual
function, every time an SQ is created also add the corresponding flow rule
to the FDB.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c  | 20 ++++++++++++++++++++
 drivers/infiniband/hw/mlx5/ib_rep.h  |  8 ++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  1 +
 drivers/infiniband/hw/mlx5/qp.c      | 16 ++++++++++++++++
 4 files changed, 45 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index adf2439ddacb..a5d0c3917568 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -102,3 +102,23 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
 {
 	return mlx5_eswitch_vport_rep(esw, vport);
 }
+
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+			      struct mlx5_ib_sq *sq)
+{
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+
+	if (!dev->rep)
+		return 0;
+
+	flow_rule =
+		mlx5_eswitch_add_send_to_vport_rule(esw,
+						    dev->rep->vport,
+						    sq->base.mqp.qpn);
+	if (IS_ERR(flow_rule))
+		return PTR_ERR(flow_rule);
+	sq->flow_rule = flow_rule;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 923ad4cba941..832cfd382ecc 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -17,6 +17,8 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
 					   int vport_index);
 void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
 void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev);
+int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+			      struct mlx5_ib_sq *sq);
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
 					  int vport_index);
 #else /* CONFIG_MLX5_ESWITCH */
@@ -41,6 +43,12 @@ struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
 
 static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {}
 static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {}
+static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+					    struct mlx5_ib_sq *sq)
+{
+	return 0;
+}
+
 static inline
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
 					  int vport_index)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4dd98b1e9165..86d07670bfeb 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -343,6 +343,7 @@ struct mlx5_ib_sq {
 	struct mlx5_ib_wq	*sq;
 	struct mlx5_ib_ubuffer  ubuffer;
 	struct mlx5_db		*doorbell;
+	struct mlx5_flow_handle	*flow_rule;
 	u32			tisn;
 	u8			state;
 };
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 39d24bf694a8..8aed091036c6 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -36,6 +36,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "ib_rep.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 	mlx5_core_destroy_tis(dev->mdev, sq->tisn);
 }
 
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+				       struct mlx5_ib_sq *sq)
+{
+	if (sq->flow_rule)
+		mlx5_del_flow_rules(sq->flow_rule);
+}
+
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				   struct mlx5_ib_sq *sq, void *qpin,
 				   struct ib_pd *pd)
@@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 	if (err)
 		goto err_umem;
 
+	err = create_flow_rule_vport_sq(dev, sq);
+	if (err)
+		goto err_flow;
+
 	return 0;
 
+err_flow:
+	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+
 err_umem:
 	ib_umem_release(sq->ubuffer.umem);
 	sq->ubuffer.umem = NULL;
@@ -1157,6 +1172,7 @@ err_umem:
 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				     struct mlx5_ib_sq *sq)
 {
+	destroy_flow_rule_vport_sq(dev, sq);
 	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
 	ib_umem_release(sq->ubuffer.umem);
 }

From b5ca15ad7e6189f39dff73d786e41d2a507f7b8b Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 23 Jan 2018 11:16:30 +0000
Subject: [PATCH 0299/1678] IB/mlx5: Add proper representors support

This commit adds full support for IB representor:

1) Representors profile, We add two new profiles:
   nic_rep_profile - This profile will be used to create an IB device that
   represents the PF/UPLINK.
   rep_profile - This profile will be used to create an IB device that
   represents VFs. Each VF will be its own representor.
2) Proper load/unload callbacks, Those are called by the E-Switch when
   moving to/from switchdev mode.
3) Different flow DB handling for when we in switchdev mode.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c  |  65 ++++++++++++++
 drivers/infiniband/hw/mlx5/ib_rep.h  |   7 ++
 drivers/infiniband/hw/mlx5/main.c    | 125 ++++++++++++++++++++-------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  25 ++++++
 4 files changed, 192 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index a5d0c3917568..61cc3d7db257 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -5,6 +5,42 @@
 
 #include "ib_rep.h"
 
+static const struct mlx5_ib_profile rep_profile = {
+	STAGE_CREATE(MLX5_IB_STAGE_INIT,
+		     mlx5_ib_stage_init_init,
+		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_rep_flow_db_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+		     mlx5_ib_stage_caps_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_rep_non_default_cb,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+		     mlx5_ib_stage_rep_roce_init,
+		     mlx5_ib_stage_rep_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+		     mlx5_ib_stage_dev_res_init,
+		     mlx5_ib_stage_dev_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+		     mlx5_ib_stage_counters_init,
+		     mlx5_ib_stage_counters_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+		     mlx5_ib_stage_bfrag_init,
+		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+		     mlx5_ib_stage_ib_reg_init,
+		     mlx5_ib_stage_ib_reg_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+		     mlx5_ib_stage_umr_res_init,
+		     mlx5_ib_stage_umr_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+		     mlx5_ib_stage_class_attr_init,
+		     NULL),
+};
+
 static int
 mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
@@ -14,17 +50,41 @@ mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 static void
 mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
 {
+	rep->rep_if[REP_IB].priv = NULL;
 }
 
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
+	struct mlx5_ib_dev *ibdev;
+
+	ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+	if (!ibdev)
+		return -ENOMEM;
+
+	ibdev->rep = rep;
+	ibdev->mdev = dev;
+	ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports),
+			       MLX5_CAP_GEN(dev, num_vhca_ports));
+	if (!__mlx5_ib_add(ibdev, &rep_profile))
+		return -EINVAL;
+
+	rep->rep_if[REP_IB].priv = ibdev;
+
 	return 0;
 }
 
 static void
 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 {
+	struct mlx5_ib_dev *dev;
+
+	if (!rep->rep_if[REP_IB].priv)
+		return;
+
+	dev = mlx5_ib_rep_to_dev(rep);
+	__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+	rep->rep_if[REP_IB].priv = NULL;
 }
 
 static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
@@ -98,6 +158,11 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
 	return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
 }
 
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+}
+
 struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
 {
 	return mlx5_eswitch_vport_rep(esw, vport);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 832cfd382ecc..046fd942fd46 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -13,6 +13,7 @@
 u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
 struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
 					  int vport_index);
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
 struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
 					   int vport_index);
 void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev);
@@ -34,6 +35,12 @@ struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
 	return NULL;
 }
 
+static inline
+struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
+{
+	return NULL;
+}
+
 static inline
 struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
 					   int vport_index)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2b05ba747d39..ee55d7d64554 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4554,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
 	mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -4563,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 	kfree(dev->port);
 }
 
-static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	const char *name;
@@ -4647,12 +4647,26 @@ static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_ib_dev *nic_dev;
+
+	nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch);
+
+	if (!nic_dev)
+		return -EINVAL;
+
+	dev->flow_db = nic_dev->flow_db;
+
+	return 0;
+}
+
 static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
 {
 	kfree(dev->flow_db);
 }
 
-static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	int err;
@@ -4793,7 +4807,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
 {
 	dev->ib_dev.get_port_immutable  = mlx5_port_rep_immutable;
 	dev->ib_dev.query_port		= mlx5_ib_rep_query_port;
@@ -4905,12 +4919,12 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 	}
 }
 
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
 {
 	return create_dev_resources(&dev->devr);
 }
 
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_dev_resources(&dev->devr);
 }
@@ -4922,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
 	return mlx5_ib_odp_init_one(dev);
 }
 
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
 		dev->ib_dev.get_hw_stats	= mlx5_ib_get_hw_stats;
@@ -4934,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
 		mlx5_ib_dealloc_counters(dev);
@@ -4965,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
 	mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 }
 
-static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 {
 	int err;
 
@@ -4980,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
 	return err;
 }
 
-static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
 	mlx5_free_bfreg(dev->mdev, &dev->bfreg);
 }
 
-static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
 	return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
 {
 	ib_unregister_device(&dev->ib_dev);
 }
 
-static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
 {
 	return create_umr_res(dev);
 }
 
-static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_umrc_res(dev);
 }
@@ -5018,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
 	cancel_delay_drop(dev);
 }
 
-static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
 {
 	int err;
 	int i;
@@ -5045,9 +5059,9 @@ static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
 	mlx5_ib_unregister_vport_reps(dev);
 }
 
-static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
-			     const struct mlx5_ib_profile *profile,
-			     int stage)
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+		      const struct mlx5_ib_profile *profile,
+		      int stage)
 {
 	/* Number of stages to cleanup */
 	while (stage) {
@@ -5061,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 
-static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
-			   const struct mlx5_ib_profile *profile)
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+		    const struct mlx5_ib_profile *profile)
 {
-	struct mlx5_ib_dev *dev;
 	int err;
 	int i;
 
 	printk_once(KERN_INFO "%s", mlx5_version);
 
-	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
-	if (!dev)
-		return NULL;
-
-	dev->mdev = mdev;
-	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
-			     MLX5_CAP_GEN(mdev, num_vhca_ports));
-
 	for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
 		if (profile->stage[i].init) {
 			err = profile->stage[i].init(dev);
@@ -5145,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = {
 		     NULL),
 };
 
+static const struct mlx5_ib_profile nic_rep_profile = {
+	STAGE_CREATE(MLX5_IB_STAGE_INIT,
+		     mlx5_ib_stage_init_init,
+		     mlx5_ib_stage_init_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
+		     mlx5_ib_stage_flow_db_init,
+		     mlx5_ib_stage_flow_db_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+		     mlx5_ib_stage_caps_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+		     mlx5_ib_stage_rep_non_default_cb,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+		     mlx5_ib_stage_rep_roce_init,
+		     mlx5_ib_stage_rep_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+		     mlx5_ib_stage_dev_res_init,
+		     mlx5_ib_stage_dev_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+		     mlx5_ib_stage_counters_init,
+		     mlx5_ib_stage_counters_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_UAR,
+		     mlx5_ib_stage_uar_init,
+		     mlx5_ib_stage_uar_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+		     mlx5_ib_stage_bfrag_init,
+		     mlx5_ib_stage_bfrag_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+		     mlx5_ib_stage_ib_reg_init,
+		     mlx5_ib_stage_ib_reg_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
+		     mlx5_ib_stage_umr_res_init,
+		     mlx5_ib_stage_umr_res_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
+		     mlx5_ib_stage_class_attr_init,
+		     NULL),
+	STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
+		     mlx5_ib_stage_rep_reg_init,
+		     mlx5_ib_stage_rep_reg_cleanup),
+};
+
 static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 {
 	struct mlx5_ib_multiport_info *mpi;
@@ -5190,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 {
 	enum rdma_link_layer ll;
+	struct mlx5_ib_dev *dev;
 	int port_type_cap;
 
+	printk_once(KERN_INFO "%s", mlx5_version);
+
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5201,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		return mlx5_ib_add_slave_port(mdev, port_num);
 	}
 
-	return __mlx5_ib_add(mdev, &pf_profile);
+	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+	if (!dev)
+		return NULL;
+
+	dev->mdev = mdev;
+	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+			     MLX5_CAP_GEN(mdev, num_vhca_ports));
+
+	if (MLX5_VPORT_MANAGER(mdev) &&
+	    mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
+		dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
+
+		return __mlx5_ib_add(dev, &nic_rep_profile);
+	}
+
+	return __mlx5_ib_add(dev, &pf_profile);
 }
 
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 86d07670bfeb..e0bad28e0f09 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1054,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* Needed for rep profile */
+int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+		      const struct mlx5_ib_profile *profile,
+		      int stage);
+void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
+		    const struct mlx5_ib_profile *profile);
+
 int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
 			  u8 port, struct ifla_vf_info *info);
 int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,

From c5447c70594b6a8e1e62a9dd9373813cb38f1c69 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 23 Jan 2018 11:24:13 +0000
Subject: [PATCH 0300/1678] net/mlx5: E-Switch, Reload IB interface when
 switching devlink modes

Up until this point it wasn't possible to activate IB representors
when switching to switchdev mode, remove this limitation.

We trigger reload of the PF IB interface in order to make sure that
already allocated resources are invalid and new resources will be opened
correctly with all the limitations of switchdev mode applied (only raw
packet capabilities, without RoCE). We also move the remove/add to a
place where the E-Switch mode is set/unset to better control when to
trigger this action, this will allow the IB side to start in the correct
mode.

For better code reuse, create a function which reloads an interface and
export it.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c   |  8 ++++++++
 .../net/ethernet/mellanox/mlx5/core/eswitch.c   | 17 +++++++++++++++--
 .../mellanox/mlx5/core/eswitch_offloads.c       | 17 ++---------------
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h |  1 +
 4 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 17b723218b0c..b994b80d5714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf)
 }
 EXPORT_SYMBOL(mlx5_unregister_interface);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol)
+{
+	mutex_lock(&mlx5_intf_mutex);
+	mlx5_remove_dev_by_protocol(mdev, protocol);
+	mlx5_add_dev_by_protocol(mdev, protocol);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
 {
 	struct mlx5_priv *priv = &mdev->priv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aec4653d88bc..964cd8c4fdcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
 	esw->mode = mode;
 
-	if (mode == SRIOV_LEGACY)
+	if (mode == SRIOV_LEGACY) {
 		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
-	else
+	} else {
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
 		err = esw_offloads_init(esw, nvfs + 1);
+	}
+
 	if (err)
 		goto abort;
 
@@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
 	esw->mode = SRIOV_NONE;
+
+	if (mode == SRIOV_OFFLOADS)
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
+
 	return err;
 }
 
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
+	int old_mode;
 	int nvports;
 	int i;
 
@@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	else if (esw->mode == SRIOV_OFFLOADS)
 		esw_offloads_cleanup(esw, nvports);
 
+	old_mode = esw->mode;
 	esw->mode = SRIOV_NONE;
+
+	if (old_mode == SRIOV_OFFLOADS)
+		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 0692d280883c..0a8303c1b52f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -827,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 {
 	int err;
 
-	/* disable PF RoCE so missed packets don't go through RoCE steering */
-	mlx5_dev_list_lock();
-	mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	err = esw_create_offloads_fdb_tables(esw, nvports);
 	if (err)
-		goto create_fdb_err;
+		return err;
 
 	err = esw_create_offloads_table(esw);
 	if (err)
@@ -859,12 +854,6 @@ create_fg_err:
 create_ft_err:
 	esw_destroy_offloads_fdb_tables(esw);
 
-create_fdb_err:
-	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
-
 	return err;
 }
 
@@ -882,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw)
 	}
 
 	/* enable back PF RoCE */
-	mlx5_dev_list_lock();
-	mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-	mlx5_dev_list_unlock();
+	mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index ee1a42a078ee..4e25f2b2e0bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -201,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
 int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
+void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
 #endif /* __MLX5_CORE_H__ */

From ec9c2fb8ceb5b514c4820f732537cb2982de0620 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Mon, 15 Jan 2018 13:11:37 +0000
Subject: [PATCH 0301/1678] IB/mlx5: Disable self loopback check when in
 switchdev mode

When in switchdev mode, there is no need to do self loopback checks
as we can't receive those packets, we insert steering rules to the
eswitch that make sure packets can't be looped back.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8aed091036c6..5663530ea5fd 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1279,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 	if (tunnel_offload_en)
 		MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
 
+	if (dev->rep)
+		MLX5_SET(tirc, tirc, self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
 	kvfree(in);
@@ -1570,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
+	if (dev->rep)
+		MLX5_SET(tirc, tirc, self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
 	if (err)

From 1b71af6053af1bd2f849e9fda4f71c1e3f145dcf Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@cumulusnetworks.com>
Date: Fri, 23 Feb 2018 14:01:52 -0500
Subject: [PATCH 0302/1678] net: fib_rules: Add new attribute to set protocol

For ages iproute2 has used `struct rtmsg` as the ancillary header for
FIB rules and in the process set the protocol value to RTPROT_BOOT.
Until ca56209a66 ("net: Allow a rule to track originating protocol")
the kernel rules code ignored the protocol value sent from userspace
and always returned 0 in notifications. To avoid incompatibility with
existing iproute2, send the protocol as a new attribute.

Fixes: cac56209a66 ("net: Allow a rule to track originating protocol")
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c              |  5 ++++-
 include/net/fib_rules.h        |  3 ++-
 include/uapi/linux/fib_rules.h |  5 +++--
 net/core/fib_rules.c           | 15 +++++++++++----
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 951a4b42cb29..9ce0182223a0 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1145,6 +1145,7 @@ static inline size_t vrf_fib_rule_nl_size(void)
 	sz  = NLMSG_ALIGN(sizeof(struct fib_rule_hdr));
 	sz += nla_total_size(sizeof(u8));	/* FRA_L3MDEV */
 	sz += nla_total_size(sizeof(u32));	/* FRA_PRIORITY */
+	sz += nla_total_size(sizeof(u8));       /* FRA_PROTOCOL */
 
 	return sz;
 }
@@ -1174,7 +1175,9 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
 	memset(frh, 0, sizeof(*frh));
 	frh->family = family;
 	frh->action = FR_ACT_TO_TBL;
-	frh->proto = RTPROT_KERNEL;
+
+	if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL))
+		goto nla_put_failure;
 
 	if (nla_put_u8(skb, FRA_L3MDEV, 1))
 		goto nla_put_failure;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b166ef07e6d4..b3d216249240 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -109,7 +109,8 @@ struct fib_rule_notifier_info {
 	[FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
 	[FRA_GOTO]	= { .type = NLA_U32 }, \
 	[FRA_L3MDEV]	= { .type = NLA_U8 }, \
-	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }
+	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }, \
+	[FRA_PROTOCOL]  = { .type = NLA_U8 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 925539172d5b..77d90ae38114 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -23,8 +23,8 @@ struct fib_rule_hdr {
 	__u8		tos;
 
 	__u8		table;
-	__u8		proto;
-	__u8		res1;	/* reserved */
+	__u8		res1;   /* reserved */
+	__u8		res2;	/* reserved */
 	__u8		action;
 
 	__u32		flags;
@@ -58,6 +58,7 @@ enum {
 	FRA_PAD,
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
+	FRA_PROTOCOL,   /* Originator of the rule */
 	__FRA_MAX
 };
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 88298f18cbae..a6aea805a0a2 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -466,11 +466,13 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 	refcount_set(&rule->refcnt, 1);
 	rule->fr_net = net;
-	rule->proto = frh->proto;
 
 	rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY])
 	                              : fib_default_rule_pref(ops);
 
+	rule->proto = tb[FRA_PROTOCOL] ?
+		nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC;
+
 	if (tb[FRA_IIFNAME]) {
 		struct net_device *dev;
 
@@ -666,7 +668,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	list_for_each_entry(rule, &ops->rules_list, list) {
-		if (frh->proto && (frh->proto != rule->proto))
+		if (tb[FRA_PROTOCOL] &&
+		    (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
 			continue;
 
 		if (frh->action && (frh->action != rule->action))
@@ -786,7 +789,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(4) /* FRA_FWMARK */
 			 + nla_total_size(4) /* FRA_FWMASK */
 			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
-			 + nla_total_size(sizeof(struct fib_kuid_range));
+			 + nla_total_size(sizeof(struct fib_kuid_range))
+			 + nla_total_size(1); /* FRA_PROTOCOL */
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -813,9 +817,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
 		goto nla_put_failure;
 	frh->res1 = 0;
+	frh->res2 = 0;
 	frh->action = rule->action;
 	frh->flags = rule->flags;
-	frh->proto = rule->proto;
+
+	if (nla_put_u8(skb, FRA_PROTOCOL, rule->proto))
+		goto nla_put_failure;
 
 	if (rule->action == FR_ACT_GOTO &&
 	    rcu_access_pointer(rule->ctarget) == NULL)

From 88e69a1fcc1e67dec3025af64736a84532528242 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 24 Feb 2018 01:07:58 +0100
Subject: [PATCH 0303/1678] bpf, x64: save one byte per shl/shr/sar when imm is
 1

When we shift by one, we can use a different encoding where imm
is not explicitly needed, which saves 1 byte per such op.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92f918d..4bc36bd1b97a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -640,7 +640,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			case BPF_RSH: b3 = 0xE8; break;
 			case BPF_ARSH: b3 = 0xF8; break;
 			}
-			EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+			if (imm32 == 1)
+				EMIT2(0xD1, add_1reg(b3, dst_reg));
+			else
+				EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU | BPF_LSH | BPF_X:

From 6fe8b9c1f41dfe3209dabc5bd0726e003a065288 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 24 Feb 2018 01:07:59 +0100
Subject: [PATCH 0304/1678] bpf, x64: save several bytes by using mov over
 movabsq when possible

While analyzing some of the more complex BPF programs from Cilium,
I found that LLVM generally prefers to emit LD_IMM64 instead of MOV32
BPF instructions for loading unsigned 32-bit immediates into a
register. Given we cannot change the current/stable LLVM versions
that are already out there, lets optimize this case such that the
JIT prefers to emit 'mov %eax, imm32' over 'movabsq %rax, imm64'
whenever suitable in order to reduce the image size by 4-5 bytes per
such load in the typical case, reducing image size on some of the
bigger programs by up to 4%. emit_mov_imm32() and emit_mov_imm64()
have been added as helpers.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 125 +++++++++++++++++++++---------------
 1 file changed, 74 insertions(+), 51 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4bc36bd1b97a..f3e5cd8c1e68 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -60,7 +60,12 @@ static bool is_imm8(int value)
 
 static bool is_simm32(s64 value)
 {
-	return value == (s64) (s32) value;
+	return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+	return value == (u64)(u32)value;
 }
 
 /* mov dst, src */
@@ -355,6 +360,68 @@ static void emit_load_skb_data_hlen(u8 **pprog)
 	*pprog = prog;
 }
 
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+			   u32 dst_reg, const u32 imm32)
+{
+	u8 *prog = *pprog;
+	u8 b1, b2, b3;
+	int cnt = 0;
+
+	/* optimization: if imm32 is positive, use 'mov %eax, imm32'
+	 * (which zero-extends imm32) to save 2 bytes.
+	 */
+	if (sign_propagate && (s32)imm32 < 0) {
+		/* 'mov %rax, imm32' sign extends imm32 */
+		b1 = add_1mod(0x48, dst_reg);
+		b2 = 0xC7;
+		b3 = 0xC0;
+		EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+		goto done;
+	}
+
+	/* optimization: if imm32 is zero, use 'xor %eax, %eax'
+	 * to save 3 bytes.
+	 */
+	if (imm32 == 0) {
+		if (is_ereg(dst_reg))
+			EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+		b2 = 0x31; /* xor */
+		b3 = 0xC0;
+		EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+		goto done;
+	}
+
+	/* mov %eax, imm32 */
+	if (is_ereg(dst_reg))
+		EMIT1(add_1mod(0x40, dst_reg));
+	EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+	*pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+			   const u32 imm32_hi, const u32 imm32_lo)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+		/* For emitting plain u32, where sign bit must not be
+		 * propagated LLVM tends to load imm64 over mov32
+		 * directly, so save couple of bytes by just doing
+		 * 'mov %eax, imm32' instead.
+		 */
+		emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+	} else {
+		/* movabsq %rax, imm64 */
+		EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+		EMIT(imm32_lo, 4);
+		EMIT(imm32_hi, 4);
+	}
+
+	*pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -377,7 +444,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		const s32 imm32 = insn->imm;
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
-		u8 b1 = 0, b2 = 0, b3 = 0;
+		u8 b2 = 0, b3 = 0;
 		s64 jmp_offset;
 		u8 jmp_cond;
 		bool reload_skb_data;
@@ -485,58 +552,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			break;
 
 		case BPF_ALU64 | BPF_MOV | BPF_K:
-			/* optimization: if imm32 is positive,
-			 * use 'mov eax, imm32' (which zero-extends imm32)
-			 * to save 2 bytes
-			 */
-			if (imm32 < 0) {
-				/* 'mov rax, imm32' sign extends imm32 */
-				b1 = add_1mod(0x48, dst_reg);
-				b2 = 0xC7;
-				b3 = 0xC0;
-				EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
-				break;
-			}
-
 		case BPF_ALU | BPF_MOV | BPF_K:
-			/* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
-			 * to save 3 bytes.
-			 */
-			if (imm32 == 0) {
-				if (is_ereg(dst_reg))
-					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
-				b2 = 0x31; /* xor */
-				b3 = 0xC0;
-				EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
-				break;
-			}
-
-			/* mov %eax, imm32 */
-			if (is_ereg(dst_reg))
-				EMIT1(add_1mod(0x40, dst_reg));
-			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+			emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+				       dst_reg, imm32);
 			break;
 
 		case BPF_LD | BPF_IMM | BPF_DW:
-			/* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
-			 * to save 7 bytes.
-			 */
-			if (insn[0].imm == 0 && insn[1].imm == 0) {
-				b1 = add_2mod(0x48, dst_reg, dst_reg);
-				b2 = 0x31; /* xor */
-				b3 = 0xC0;
-				EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
-				insn++;
-				i++;
-				break;
-			}
-
-			/* movabsq %rax, imm64 */
-			EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
-			EMIT(insn[0].imm, 4);
-			EMIT(insn[1].imm, 4);
-
+			emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
 			insn++;
 			i++;
 			break;
@@ -604,7 +626,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 				EMIT_mov(BPF_REG_0, src_reg);
 			else
 				/* mov rax, imm32 */
-				EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+				emit_mov_imm32(&prog, true,
+					       BPF_REG_0, imm32);
 
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
 				EMIT1(add_1mod(0x48, AUX_REG));

From d806a0cf2a1ddb97c91d902ef1c8219e1e2b2c4c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 24 Feb 2018 01:08:00 +0100
Subject: [PATCH 0305/1678] bpf, x64: save several bytes when mul dest is r0/r3
 anyway

Instead of unconditionally performing push/pop on rax/rdx
in case of multiplication, we can save a few bytes in case
of dest register being either BPF r0 (rax) or r3 (rdx)
since the result is written in there anyway.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f3e5cd8c1e68..9895ca383023 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -615,8 +615,10 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU | BPF_MUL | BPF_X:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
 		case BPF_ALU64 | BPF_MUL | BPF_X:
-			EMIT1(0x50); /* push rax */
-			EMIT1(0x52); /* push rdx */
+			if (dst_reg != BPF_REG_0)
+				EMIT1(0x50); /* push rax */
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x52); /* push rdx */
 
 			/* mov r11, dst_reg */
 			EMIT_mov(AUX_REG, dst_reg);
@@ -636,14 +638,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			/* mul(q) r11 */
 			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
 
-			/* mov r11, rax */
-			EMIT_mov(AUX_REG, BPF_REG_0);
-
-			EMIT1(0x5A); /* pop rdx */
-			EMIT1(0x58); /* pop rax */
-
-			/* mov dst_reg, r11 */
-			EMIT_mov(dst_reg, AUX_REG);
+			if (dst_reg != BPF_REG_3)
+				EMIT1(0x5A); /* pop rdx */
+			if (dst_reg != BPF_REG_0) {
+				/* mov dst_reg, rax */
+				EMIT_mov(dst_reg, BPF_REG_0);
+				EMIT1(0x58); /* pop rax */
+			}
 			break;
 
 			/* shifts */

From 4c38e2f386b4fc5fd95d1203c74819948e2e903d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 24 Feb 2018 01:08:01 +0100
Subject: [PATCH 0306/1678] bpf, x64: save few bytes when mul is in alu32

Add a generic emit_mov_reg() helper in order to reuse it in BPF
multiplication to load the src into rax, we can save a few bytes
in alu32 while doing so.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 43 ++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9895ca383023..5b8fc1326aa1 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -422,6 +422,24 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
 	*pprog = prog;
 }
 
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is64) {
+		/* mov dst, src */
+		EMIT_mov(dst_reg, src_reg);
+	} else {
+		/* mov32 dst, src */
+		if (is_ereg(dst_reg) || is_ereg(src_reg))
+			EMIT1(add_2mod(0x40, dst_reg, src_reg));
+		EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+	}
+
+	*pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -480,16 +498,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* mov dst, src */
 		case BPF_ALU64 | BPF_MOV | BPF_X:
-			EMIT_mov(dst_reg, src_reg);
-			break;
-
-			/* mov32 dst, src */
 		case BPF_ALU | BPF_MOV | BPF_X:
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT1(add_2mod(0x40, dst_reg, src_reg));
-			EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+			emit_mov_reg(&prog,
+				     BPF_CLASS(insn->code) == BPF_ALU64,
+				     dst_reg, src_reg);
 			break;
 
 			/* neg dst */
@@ -615,6 +628,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU | BPF_MUL | BPF_X:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
 		case BPF_ALU64 | BPF_MUL | BPF_X:
+		{
+			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
 			if (dst_reg != BPF_REG_0)
 				EMIT1(0x50); /* push rax */
 			if (dst_reg != BPF_REG_3)
@@ -624,14 +640,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT_mov(AUX_REG, dst_reg);
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov rax, src_reg */
-				EMIT_mov(BPF_REG_0, src_reg);
+				emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
 			else
-				/* mov rax, imm32 */
-				emit_mov_imm32(&prog, true,
-					       BPF_REG_0, imm32);
+				emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
 
-			if (BPF_CLASS(insn->code) == BPF_ALU64)
+			if (is64)
 				EMIT1(add_1mod(0x48, AUX_REG));
 			else if (is_ereg(AUX_REG))
 				EMIT1(add_1mod(0x40, AUX_REG));
@@ -646,7 +659,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 				EMIT1(0x58); /* pop rax */
 			}
 			break;
-
+		}
 			/* shifts */
 		case BPF_ALU | BPF_LSH | BPF_K:
 		case BPF_ALU | BPF_RSH | BPF_K:

From 0869175220b339b81de48872c8198c3ed75782e3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 24 Feb 2018 01:08:02 +0100
Subject: [PATCH 0307/1678] bpf, x64: save 5 bytes in prologue when ebpf insns
 came from cbpf

While it's rather cumbersome to reduce prologue for cBPF->eBPF
migrations wrt spill/fill for r15 which is callee saved register
due to bpf_error path in bpf_jit.S that is both used by migrations
as well as native eBPF, we can still trivially save 5 bytes in
prologue for the former since tail calls can never be used there.
cBPF->eBPF migrations also have their own custom prologue in BPF
asm that xors A and X reg anyway, so it's fine we skip this here.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5b8fc1326aa1..70f9748da7aa 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -216,7 +216,7 @@ struct jit_context {
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
 {
 	u8 *prog = *pprog;
 	int cnt = 0;
@@ -251,18 +251,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
 	/* mov qword ptr [rbp+24],r15 */
 	EMIT4(0x4C, 0x89, 0x7D, 24);
 
-	/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
-	 * we need to reset the counter to 0. It's done in two instructions,
-	 * resetting rax register to 0 (xor on eax gets 0 extended), and
-	 * moving it to the counter location.
-	 */
+	if (!ebpf_from_cbpf) {
+		/* Clear the tail call counter (tail_call_cnt): for eBPF tail
+		 * calls we need to reset the counter to 0. It's done in two
+		 * instructions, resetting rax register to 0, and moving it
+		 * to the counter location.
+		 */
 
-	/* xor eax, eax */
-	EMIT2(0x31, 0xc0);
-	/* mov qword ptr [rbp+32], rax */
-	EMIT4(0x48, 0x89, 0x45, 32);
+		/* xor eax, eax */
+		EMIT2(0x31, 0xc0);
+		/* mov qword ptr [rbp+32], rax */
+		EMIT4(0x48, 0x89, 0x45, 32);
+
+		BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+	}
 
-	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
 	*pprog = prog;
 }
 
@@ -453,7 +456,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	int proglen = 0;
 	u8 *prog = temp;
 
-	emit_prologue(&prog, bpf_prog->aux->stack_depth);
+	emit_prologue(&prog, bpf_prog->aux->stack_depth,
+		      bpf_prog_was_classic(bpf_prog));
 
 	if (seen_ld_abs)
 		emit_load_skb_data_hlen(&prog);

From 23d191a82c133c31bb85aa4b4b26851cd4a4b4ac Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 24 Feb 2018 01:08:03 +0100
Subject: [PATCH 0308/1678] bpf: add various jit test cases

Add few test cases that check the rnu-time results under JIT.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 89 +++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 2971ba2829ac..c987d3a2426f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11140,6 +11140,95 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
+	{
+		"jit: lsh, rsh, arsh by 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+			BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: various mul tests",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)

From c53507778998d45543b27266742d04cd384de356 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 26 Feb 2018 09:19:12 +0800
Subject: [PATCH 0309/1678] samples/bpf: Add program for CPU state statistics

CPU is active when have running tasks on it and CPUFreq governor can
select different operating points (OPP) according to different workload;
we use 'pstate' to present CPU state which have running tasks with one
specific OPP.  On the other hand, CPU is idle which only idle task on
it, CPUIdle governor can select one specific idle state to power off
hardware logics; we use 'cstate' to present CPU idle state.

Based on trace events 'cpu_idle' and 'cpu_frequency' we can accomplish
the duration statistics for every state.  Every time when CPU enters
into or exits from idle states, the trace event 'cpu_idle' is recorded;
trace event 'cpu_frequency' records the event for CPU OPP changing, so
it's easily to know how long time the CPU stays in the specified OPP,
and the CPU must be not in any idle state.

This patch is to utilize the mentioned trace events for pstate and
cstate statistics.  To achieve more accurate profiling data, the program
uses below sequence to insure CPU running/idle time aren't missed:

- Before profiling the user space program wakes up all CPUs for once, so
  can avoid to missing account time for CPU staying in idle state for
  long time; the program forces to set 'scaling_max_freq' to lowest
  frequency and then restore 'scaling_max_freq' to highest frequency,
  this can ensure the frequency to be set to lowest frequency and later
  after start to run workload the frequency can be easily to be changed
  to higher frequency;

- User space program reads map data and update statistics for every 5s,
  so this is same with other sample bpf programs for avoiding big
  overload introduced by bpf program self;

- When send signal to terminate program, the signal handler wakes up
  all CPUs, set lowest frequency and restore highest frequency to
  'scaling_max_freq'; this is exactly same with the first step so
  avoid to missing account CPU pstate and cstate time during last
  stage.  Finally it reports the latest statistics.

The program has been tested on Hikey board with octa CA53 CPUs, below
is one example for statistics result, the format mainly follows up
Jesper Dangaard Brouer suggestion.

Jesper reminds to 'get printf to pretty print with thousands separators
use %' and setlocale(LC_NUMERIC, "en_US")', tried three different arm64
GCC toolchains (5.4.0 20160609, 6.2.1 20161016, 6.3.0 20170516) but all
of them cannot support printf flag character %' on arm64 platform, so go
back print number without grouping mode.

CPU states statistics:
state(ms)  cstate-0    cstate-1    cstate-2    pstate-0    pstate-1    pstate-2    pstate-3    pstate-4
CPU-0      767         6111        111863      561         31          756         853         190
CPU-1      241         10606       107956      484         125         646         990         85
CPU-2      413         19721       98735       636         84          696         757         89
CPU-3      84          11711       79989       17516       909         4811        5773        341
CPU-4      152         19610       98229       444         53          649         708         1283
CPU-5      185         8781        108697      666         91          671         677         1365
CPU-6      157         21964       95825       581         67          566         684         1284
CPU-7      125         15238       102704      398         20          665         786         1197

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/Makefile       |   4 +
 samples/bpf/cpustat_kern.c | 281 +++++++++++++++++++++++++++++++++++++
 samples/bpf/cpustat_user.c | 219 +++++++++++++++++++++++++++++
 3 files changed, 504 insertions(+)
 create mode 100644 samples/bpf/cpustat_kern.c
 create mode 100644 samples/bpf/cpustat_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index ec3fc8d88e87..2c2a587e0942 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
 hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
+hostprogs-y += cpustat
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
 xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -144,6 +146,7 @@ always += xdp_monitor_kern.o
 always += xdp_rxq_info_kern.o
 always += xdp2skb_meta_kern.o
 always += syscall_tp_kern.o
+always += cpustat_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
 HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_cpustat += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
new file mode 100644
index 000000000000..68c84da065b1
--- /dev/null
+++ b/samples/bpf/cpustat_kern.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/*
+ * The CPU number, cstate number and pstate number are based
+ * on 96boards Hikey with octa CA53 CPUs.
+ *
+ * Every CPU have three idle states for cstate:
+ *   WFI, CPU_OFF, CLUSTER_OFF
+ *
+ * Every CPU have 5 operating points:
+ *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
+ *
+ * This code is based on these assumption and other platforms
+ * need to adjust these definitions.
+ */
+#define MAX_CPU			8
+#define MAX_PSTATE_ENTRIES	5
+#define MAX_CSTATE_ENTRIES	3
+
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
+
+/*
+ * my_map structure is used to record cstate and pstate index and
+ * timestamp (Idx, Ts), when new event incoming we need to update
+ * combination for new state index and timestamp (Idx`, Ts`).
+ *
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
+ *
+ * Every CPU has one below array for recording state index and
+ * timestamp, and record for cstate and pstate saperately:
+ *
+ * +--------------------------+
+ * | cstate timestamp         |
+ * +--------------------------+
+ * | cstate index             |
+ * +--------------------------+
+ * | pstate timestamp         |
+ * +--------------------------+
+ * | pstate index             |
+ * +--------------------------+
+ */
+#define MAP_OFF_CSTATE_TIME	0
+#define MAP_OFF_CSTATE_IDX	1
+#define MAP_OFF_PSTATE_TIME	2
+#define MAP_OFF_PSTATE_IDX	3
+#define MAP_OFF_NUM		4
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAP_OFF_NUM,
+};
+
+/* cstate_duration records duration time for every idle state per CPU */
+struct bpf_map_def SEC("maps") cstate_duration = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
+};
+
+/* pstate_duration records duration time for every operating point per CPU */
+struct bpf_map_def SEC("maps") pstate_duration = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
+};
+
+/*
+ * The trace events for cpu_idle and cpu_frequency are taken from:
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
+ *
+ * These two events have same format, so define one common structure.
+ */
+struct cpu_args {
+	u64 pad;
+	u32 state;
+	u32 cpu_id;
+};
+
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
+static u32 find_cpu_pstate_idx(u32 frequency)
+{
+	u32 i;
+
+	for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
+		if (frequency == cpu_opps[i])
+			return i;
+	}
+
+	return i;
+}
+
+SEC("tracepoint/power/cpu_idle")
+int bpf_prog1(struct cpu_args *ctx)
+{
+	u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+	u32 key, cpu, pstate_idx;
+	u64 *val;
+
+	if (ctx->cpu_id > MAX_CPU)
+		return 0;
+
+	cpu = ctx->cpu_id;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
+	cts = bpf_map_lookup_elem(&my_map, &key);
+	if (!cts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+	cstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!cstate)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+	pts = bpf_map_lookup_elem(&my_map, &key);
+	if (!pts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+	pstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!pstate)
+		return 0;
+
+	prev_state = *cstate;
+	*cstate = ctx->state;
+
+	if (!*cts) {
+		*cts = bpf_ktime_get_ns();
+		return 0;
+	}
+
+	cur_ts = bpf_ktime_get_ns();
+	delta = cur_ts - *cts;
+	*cts = cur_ts;
+
+	/*
+	 * When state doesn't equal to (u32)-1, the cpu will enter
+	 * one idle state; for this case we need to record interval
+	 * for the pstate.
+	 *
+	 *                 OPP2
+	 *            +---------------------+
+	 *     OPP1   |                     |
+	 *   ---------+                     |
+	 *                                  |  Idle state
+	 *                                  +---------------
+	 *
+	 *            |<- pstate duration ->|
+	 *            ^                     ^
+	 *           pts                  cur_ts
+	 */
+	if (ctx->state != (u32)-1) {
+
+		/* record pstate after have first cpu_frequency event */
+		if (!*pts)
+			return 0;
+
+		delta = cur_ts - *pts;
+
+		pstate_idx = find_cpu_pstate_idx(*pstate);
+		if (pstate_idx >= MAX_PSTATE_ENTRIES)
+			return 0;
+
+		key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+		val = bpf_map_lookup_elem(&pstate_duration, &key);
+		if (val)
+			__sync_fetch_and_add((long *)val, delta);
+
+	/*
+	 * When state equal to (u32)-1, the cpu just exits from one
+	 * specific idle state; for this case we need to record
+	 * interval for the pstate.
+	 *
+	 *       OPP2
+	 *   -----------+
+	 *              |                          OPP1
+	 *              |                     +-----------
+	 *              |     Idle state      |
+	 *              +---------------------+
+	 *
+	 *              |<- cstate duration ->|
+	 *              ^                     ^
+	 *             cts                  cur_ts
+	 */
+	} else {
+
+		key = cpu * MAX_CSTATE_ENTRIES + prev_state;
+		val = bpf_map_lookup_elem(&cstate_duration, &key);
+		if (val)
+			__sync_fetch_and_add((long *)val, delta);
+	}
+
+	/* Update timestamp for pstate as new start time */
+	if (*pts)
+		*pts = cur_ts;
+
+	return 0;
+}
+
+SEC("tracepoint/power/cpu_frequency")
+int bpf_prog2(struct cpu_args *ctx)
+{
+	u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
+	u32 key, cpu, pstate_idx;
+	u64 *val;
+
+	cpu = ctx->cpu_id;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
+	pts = bpf_map_lookup_elem(&my_map, &key);
+	if (!pts)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
+	pstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!pstate)
+		return 0;
+
+	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
+	cstate = bpf_map_lookup_elem(&my_map, &key);
+	if (!cstate)
+		return 0;
+
+	prev_state = *pstate;
+	*pstate = ctx->state;
+
+	if (!*pts) {
+		*pts = bpf_ktime_get_ns();
+		return 0;
+	}
+
+	cur_ts = bpf_ktime_get_ns();
+	delta = cur_ts - *pts;
+	*pts = cur_ts;
+
+	/* When CPU is in idle, bail out to skip pstate statistics */
+	if (*cstate != (u32)(-1))
+		return 0;
+
+	/*
+	 * The cpu changes to another different OPP (in below diagram
+	 * change frequency from OPP3 to OPP1), need recording interval
+	 * for previous frequency OPP3 and update timestamp as start
+	 * time for new frequency OPP1.
+	 *
+	 *                 OPP3
+	 *            +---------------------+
+	 *     OPP2   |                     |
+	 *   ---------+                     |
+	 *                                  |    OPP1
+	 *                                  +---------------
+	 *
+	 *            |<- pstate duration ->|
+	 *            ^                     ^
+	 *           pts                  cur_ts
+	 */
+	pstate_idx = find_cpu_pstate_idx(*pstate);
+	if (pstate_idx >= MAX_PSTATE_ENTRIES)
+		return 0;
+
+	key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
+	val = bpf_map_lookup_elem(&pstate_duration, &key);
+	if (val)
+		__sync_fetch_and_add((long *)val, delta);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
new file mode 100644
index 000000000000..2b4cd1ae57c5
--- /dev/null
+++ b/samples/bpf/cpustat_user.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sched.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CPU			8
+#define MAX_PSTATE_ENTRIES	5
+#define MAX_CSTATE_ENTRIES	3
+#define MAX_STARS		40
+
+#define CPUFREQ_MAX_SYSFS_PATH	"/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+#define CPUFREQ_LOWEST_FREQ	"208000"
+#define CPUFREQ_HIGHEST_FREQ	"12000000"
+
+struct cpu_stat_data {
+	unsigned long cstate[MAX_CSTATE_ENTRIES];
+	unsigned long pstate[MAX_PSTATE_ENTRIES];
+};
+
+static struct cpu_stat_data stat_data[MAX_CPU];
+
+static void cpu_stat_print(void)
+{
+	int i, j;
+	char state_str[sizeof("cstate-9")];
+	struct cpu_stat_data *data;
+
+	/* Clear screen */
+	printf("\033[2J");
+
+	/* Header */
+	printf("\nCPU states statistics:\n");
+	printf("%-10s ", "state(ms)");
+
+	for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+		sprintf(state_str, "cstate-%d", i);
+		printf("%-11s ", state_str);
+	}
+
+	for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+		sprintf(state_str, "pstate-%d", i);
+		printf("%-11s ", state_str);
+	}
+
+	printf("\n");
+
+	for (j = 0; j < MAX_CPU; j++) {
+		data = &stat_data[j];
+
+		printf("CPU-%-6d ", j);
+		for (i = 0; i < MAX_CSTATE_ENTRIES; i++)
+			printf("%-11ld ", data->cstate[i] / 1000000);
+
+		for (i = 0; i < MAX_PSTATE_ENTRIES; i++)
+			printf("%-11ld ", data->pstate[i] / 1000000);
+
+		printf("\n");
+	}
+}
+
+static void cpu_stat_update(int cstate_fd, int pstate_fd)
+{
+	unsigned long key, value;
+	int c, i;
+
+	for (c = 0; c < MAX_CPU; c++) {
+		for (i = 0; i < MAX_CSTATE_ENTRIES; i++) {
+			key = c * MAX_CSTATE_ENTRIES + i;
+			bpf_map_lookup_elem(cstate_fd, &key, &value);
+			stat_data[c].cstate[i] = value;
+		}
+
+		for (i = 0; i < MAX_PSTATE_ENTRIES; i++) {
+			key = c * MAX_PSTATE_ENTRIES + i;
+			bpf_map_lookup_elem(pstate_fd, &key, &value);
+			stat_data[c].pstate[i] = value;
+		}
+	}
+}
+
+/*
+ * This function is copied from 'idlestat' tool function
+ * idlestat_wake_all() in idlestate.c.
+ *
+ * It sets the self running task affinity to cpus one by one so can wake up
+ * the specific CPU to handle scheduling; this results in all cpus can be
+ * waken up once and produce ftrace event 'trace_cpu_idle'.
+ */
+static int cpu_stat_inject_cpu_idle_event(void)
+{
+	int rcpu, i, ret;
+	cpu_set_t cpumask;
+	cpu_set_t original_cpumask;
+
+	ret = sysconf(_SC_NPROCESSORS_CONF);
+	if (ret < 0)
+		return -1;
+
+	rcpu = sched_getcpu();
+	if (rcpu < 0)
+		return -1;
+
+	/* Keep track of the CPUs we will run on */
+	sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask);
+
+	for (i = 0; i < ret; i++) {
+
+		/* Pointless to wake up ourself */
+		if (i == rcpu)
+			continue;
+
+		/* Pointless to wake CPUs we will not run on */
+		if (!CPU_ISSET(i, &original_cpumask))
+			continue;
+
+		CPU_ZERO(&cpumask);
+		CPU_SET(i, &cpumask);
+
+		sched_setaffinity(0, sizeof(cpumask), &cpumask);
+	}
+
+	/* Enable all the CPUs of the original mask */
+	sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask);
+	return 0;
+}
+
+/*
+ * It's possible to have no any frequency change for long time and cannot
+ * get ftrace event 'trace_cpu_frequency' for long period, this introduces
+ * big deviation for pstate statistics.
+ *
+ * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz
+ * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to
+ * the maximum frequency value 1.2GHz.
+ */
+static int cpu_stat_inject_cpu_frequency_event(void)
+{
+	int len, fd;
+
+	fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY);
+	if (fd < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		return fd;
+	}
+
+	len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ));
+	if (len < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		goto err;
+	}
+
+	len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ));
+	if (len < 0) {
+		printf("failed to open scaling_max_freq, errno=%d\n", errno);
+		goto err;
+	}
+
+err:
+	close(fd);
+	return len;
+}
+
+static void int_exit(int sig)
+{
+	cpu_stat_inject_cpu_idle_event();
+	cpu_stat_inject_cpu_frequency_event();
+	cpu_stat_update(map_fd[1], map_fd[2]);
+	cpu_stat_print();
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int ret;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	ret = cpu_stat_inject_cpu_idle_event();
+	if (ret < 0)
+		return 1;
+
+	ret = cpu_stat_inject_cpu_frequency_event();
+	if (ret < 0)
+		return 1;
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	while (1) {
+		cpu_stat_update(map_fd[1], map_fd[2]);
+		cpu_stat_print();
+		sleep(5);
+	}
+
+	return 0;
+}

From abb374fe84606ba48cf6c31c14460dfbfb4959e0 Mon Sep 17 00:00:00 2001
From: Ignacio Nunez Hernanz <nacho.nunez@aoifes.com>
Date: Mon, 26 Feb 2018 12:16:46 +0200
Subject: [PATCH 0310/1678] ath10k: make ath10k report discarded packets to
 mac80211

Whenever ath10k firmware discards a packet (HTT_TX_COMPL_STATE_DISCARD
flag), the skb is freed and mac80211 does not get feedback through
ieee80211_tx_status().

Instead, make sure that the IEEE80211_TX_STAT_ACK flag is disabled and
let the packet go through, like ath9k does.

Signed-off-by: Ignacio Nunez Hernanz <nacho.nunez@aoifes.com>
[kvalo@codeaurora.org: rebase patch manually]
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/txrx.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 5b3b021526ab..70e23bbf7171 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -102,11 +102,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 	memset(&info->status, 0, sizeof(info->status));
 	trace_ath10k_txrx_tx_unref(ar, tx_done->msdu_id);
 
-	if (tx_done->status == HTT_TX_COMPL_STATE_DISCARD) {
-		ieee80211_free_txskb(htt->ar->hw, msdu);
-		return 0;
-	}
-
 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
 		info->flags |= IEEE80211_TX_STAT_ACK;
 
@@ -117,6 +112,13 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 	    (info->flags & IEEE80211_TX_CTL_NO_ACK))
 		info->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED;
 
+	if (tx_done->status == HTT_TX_COMPL_STATE_DISCARD) {
+		if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+			info->flags &= ~IEEE80211_TX_STAT_NOACK_TRANSMITTED;
+		else
+			info->flags &= ~IEEE80211_TX_STAT_ACK;
+	}
+
 	ieee80211_tx_status(htt->ar->hw, msdu);
 	/* we do not own the msdu anymore */
 

From 77b83829ebf6b57eacd0c57a3cb238096ddc86fc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Feb 2018 12:17:06 +0200
Subject: [PATCH 0311/1678] wil6210: fix spelling mistake: "preperation"->
 "preparation"

Trivial fix to spelling mistake in debug error message text.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 0c61a6c13991..04c8651274d9 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -715,7 +715,7 @@ static void wil_bl_prepare_halt(struct wil6210_priv *wil)
 		    offsetof(struct bl_dedicated_registers_v0,
 			     boot_loader_struct_version));
 	if (!tmp) {
-		wil_dbg_misc(wil, "old BL, skipping halt preperation\n");
+		wil_dbg_misc(wil, "old BL, skipping halt preparation\n");
 		return;
 	}
 

From 65b53bfd497b052277f89afb3839ace38190974c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 20 Feb 2018 08:45:12 +0100
Subject: [PATCH 0312/1678] mlxsw: spectrum_switchdev: Allow port enslavement
 to a VLAN-unaware bridge

Up until now we only allowed VLAN devices to be put in a VLAN-unaware
bridge, but some users need the ability to enslave physical ports as
well.

This is achieved by mapping the port and VID 1 to the bridge's vFID,
instead of the port and the VID used by the VLAN device.

The above is valid because as long as the port is not enslaved to a
bridge, VID 1 is guaranteed to be configured as PVID and egress
untagged.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Tested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index f9f53af04fe1..917663adf925 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1882,14 +1882,10 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
+	struct net_device *dev = bridge_port->dev;
 	u16 vid;
 
-	if (!is_vlan_dev(bridge_port->dev)) {
-		NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge");
-		return -EINVAL;
-	}
-	vid = vlan_dev_vlan_id(bridge_port->dev);
-
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return -EINVAL;
@@ -1912,8 +1908,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
 				 struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-	u16 vid = vlan_dev_vlan_id(bridge_port->dev);
+	struct net_device *dev = bridge_port->dev;
+	u16 vid;
 
+	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
 	if (WARN_ON(!mlxsw_sp_port_vlan))
 		return;

From 294d711ee8c04fb2baa704cae15a7d039bb50615 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 22 Feb 2018 22:58:32 +0100
Subject: [PATCH 0313/1678] net: dsa: mv88e6xxx: Poll when no interrupt defined

Not all boards have the interrupt output from the switch connected to
a GPIO line. In such cases, phylib has to poll the internal PHYs,
rather than receive an interrupt when there is a change in the link
state. phylib polls once per second, and per PHY reads around 4
words. With a switch typically having 4 internal PHYs, this means 16
MDIO transactions per second.

Rather than performing this phylib level polling, have the driver poll
the interrupt status register. If the status register indicates an
interrupt condition processing of interrupts in the same way as if a
GPIO was used.

Polling 10 times a second places less load on the MDIO bus. But rather
than taking on average 0.5s to detect a link change, it takes less
than 0.05s. Additionally, other interrupts, such as the watchdog, ATU
and VTU violations will be reported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 148 ++++++++++++++++++++++---------
 drivers/net/dsa/mv88e6xxx/chip.h |   3 +
 2 files changed, 107 insertions(+), 44 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index e1b5c5c66fce..24486f96dd39 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -253,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d)
 	chip->g1_irq.masked &= ~(1 << n);
 }
 
-static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip)
 {
-	struct mv88e6xxx_chip *chip = dev_id;
 	unsigned int nhandled = 0;
 	unsigned int sub_irq;
 	unsigned int n;
@@ -280,6 +279,13 @@ out:
 	return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
 }
 
+static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id)
+{
+	struct mv88e6xxx_chip *chip = dev_id;
+
+	return mv88e6xxx_g1_irq_thread_work(chip);
+}
+
 static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d)
 {
 	struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
@@ -335,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
-static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 {
 	int irq, virq;
 	u16 mask;
@@ -344,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
 	mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask);
 
-	free_irq(chip->irq, chip);
-
 	for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
 		virq = irq_find_mapping(chip->g1_irq.domain, irq);
 		irq_dispose_mapping(virq);
@@ -354,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 	irq_domain_remove(chip->g1_irq.domain);
 }
 
-static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
+{
+	mv88e6xxx_g1_irq_free(chip);
+
+	free_irq(chip->irq, chip);
+}
+
+static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip)
 {
 	int err, irq, virq;
 	u16 reg, mask;
@@ -387,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 	if (err)
 		goto out_disable;
 
-	err = request_threaded_irq(chip->irq, NULL,
-				   mv88e6xxx_g1_irq_thread_fn,
-				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-				   dev_name(chip->dev), chip);
-	if (err)
-		goto out_disable;
-
 	return 0;
 
 out_disable:
@@ -411,6 +415,62 @@ out_mapping:
 	return err;
 }
 
+static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_irq_setup_common(chip);
+	if (err)
+		return err;
+
+	err = request_threaded_irq(chip->irq, NULL,
+				   mv88e6xxx_g1_irq_thread_fn,
+				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+				   dev_name(chip->dev), chip);
+	if (err)
+		mv88e6xxx_g1_irq_free_common(chip);
+
+	return err;
+}
+
+static void mv88e6xxx_irq_poll(struct kthread_work *work)
+{
+	struct mv88e6xxx_chip *chip = container_of(work,
+						   struct mv88e6xxx_chip,
+						   irq_poll_work.work);
+	mv88e6xxx_g1_irq_thread_work(chip);
+
+	kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+				   msecs_to_jiffies(100));
+}
+
+static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6xxx_g1_irq_setup_common(chip);
+	if (err)
+		return err;
+
+	kthread_init_delayed_work(&chip->irq_poll_work,
+				  mv88e6xxx_irq_poll);
+
+	chip->kworker = kthread_create_worker(0, dev_name(chip->dev));
+	if (IS_ERR(chip->kworker))
+		return PTR_ERR(chip->kworker);
+
+	kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work,
+				   msecs_to_jiffies(100));
+
+	return 0;
+}
+
+static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
+{
+	kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
+	kthread_destroy_worker(chip->kworker);
+}
+
 int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
 	int i;
@@ -4034,33 +4094,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 		goto out;
 	}
 
-	if (chip->irq > 0) {
-		/* Has to be performed before the MDIO bus is created,
-		 * because the PHYs will link there interrupts to these
-		 * interrupt controllers
-		 */
-		mutex_lock(&chip->reg_lock);
+	/* Has to be performed before the MDIO bus is created, because
+	 * the PHYs will link there interrupts to these interrupt
+	 * controllers
+	 */
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		err = mv88e6xxx_g1_irq_setup(chip);
-		mutex_unlock(&chip->reg_lock);
+	else
+		err = mv88e6xxx_irq_poll_setup(chip);
+	mutex_unlock(&chip->reg_lock);
 
+	if (err)
+		goto out;
+
+	if (chip->info->g2_irqs > 0) {
+		err = mv88e6xxx_g2_irq_setup(chip);
 		if (err)
-			goto out;
-
-		if (chip->info->g2_irqs > 0) {
-			err = mv88e6xxx_g2_irq_setup(chip);
-			if (err)
-				goto out_g1_irq;
-		}
-
-		err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
-		if (err)
-			goto out_g2_irq;
-
-		err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
-		if (err)
-			goto out_g1_atu_prob_irq;
+			goto out_g1_irq;
 	}
 
+	err = mv88e6xxx_g1_atu_prob_irq_setup(chip);
+	if (err)
+		goto out_g2_irq;
+
+	err = mv88e6xxx_g1_vtu_prob_irq_setup(chip);
+	if (err)
+		goto out_g1_atu_prob_irq;
+
 	err = mv88e6xxx_mdios_register(chip, np);
 	if (err)
 		goto out_g1_vtu_prob_irq;
@@ -4074,20 +4135,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
 	mv88e6xxx_mdios_unregister(chip);
 out_g1_vtu_prob_irq:
-	if (chip->irq > 0)
-		mv88e6xxx_g1_vtu_prob_irq_free(chip);
+	mv88e6xxx_g1_vtu_prob_irq_free(chip);
 out_g1_atu_prob_irq:
-	if (chip->irq > 0)
-		mv88e6xxx_g1_atu_prob_irq_free(chip);
+	mv88e6xxx_g1_atu_prob_irq_free(chip);
 out_g2_irq:
-	if (chip->info->g2_irqs > 0 && chip->irq > 0)
+	if (chip->info->g2_irqs > 0)
 		mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-	if (chip->irq > 0) {
-		mutex_lock(&chip->reg_lock);
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		mv88e6xxx_g1_irq_free(chip);
-		mutex_unlock(&chip->reg_lock);
-	}
+	else
+		mv88e6xxx_irq_poll_free(chip);
+	mutex_unlock(&chip->reg_lock);
 out:
 	return err;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 97d7915f32c7..d6a1391dc268 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -15,6 +15,7 @@
 #include <linux/if_vlan.h>
 #include <linux/irq.h>
 #include <linux/gpio/consumer.h>
+#include <linux/kthread.h>
 #include <linux/phy.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
@@ -245,6 +246,8 @@ struct mv88e6xxx_chip {
 	int watchdog_irq;
 	int atu_prob_irq;
 	int vtu_prob_irq;
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work irq_poll_work;
 
 	/* GPIO resources */
 	u8 gpio_data[2];

From f8c193ca1f7d3b69c031970815d14e09de396ee8 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 22 Feb 2018 22:58:33 +0100
Subject: [PATCH 0314/1678] arm: mvebu: 370-rd: Enable PHY interrupt handling

The Ethernet switch has an embedded interrupt controller. Interrupts
from the embedded PHYs are part of this interrupt controller.
Explicitly list the MDIO bus the embedded PHYs are on, and wire up the
interrupts.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/armada-370-rd.dts | 32 +++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts
index 8b2fa9a49967..c28afb242393 100644
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@@ -56,6 +56,7 @@
 
 /dts-v1/;
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/gpio/gpio.h>
 #include "armada-370.dtsi"
 
@@ -243,6 +244,8 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 		reg = <0x10>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
 
 		ports {
 			#address-cells = <1>;
@@ -278,6 +281,35 @@
 				};
 			};
 		};
+
+		mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			switchphy0: switchphy@0 {
+				reg = <0>;
+				interrupt-parent = <&switch>;
+				interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy1: switchphy@1 {
+				reg = <1>;
+				interrupt-parent = <&switch>;
+				interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy2: switchphy@2 {
+				reg = <2>;
+				interrupt-parent = <&switch>;
+				interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			switchphy3: switchphy@3 {
+				reg = <3>;
+				interrupt-parent = <&switch>;
+				interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+			};
+		};
 	};
 };
 

From 8f611fb046126eeee4a768a6ad50956f2aea9194 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 16 Jan 2018 12:48:09 +0000
Subject: [PATCH 0315/1678] ixgbe: remove redundant initialization of 'pool'

Variable pool is being assigned zero and then in the following for-loop
is it being set to zero again. Remove the redundant first assignment.

Cleans up clang warning:
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c:61:2: warning: Value stored
to 'pool' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 4242f0213e46..ed4cbe94c355 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
 		return false;
 
 	/* start at VMDq register offset for SR-IOV enabled setups */
-	pool = 0;
 	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
 	for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
 		/* If we are greater than indices move to next pool */

From b89c7695b14f58686ac89d5add861f1f6d3fd4da Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 23 Feb 2018 14:15:31 +0100
Subject: [PATCH 0316/1678] mlxsw: spectrum_kvdl: use div_u64() for 64-bit
 division

Calculating the number of entries now uses 64-bit arithmetic that
causes a link error on 32-bit architectures:

drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.o: In function `mlxsw_sp_kvdl_init':
spectrum_kvdl.c:(.text+0x51c): undefined reference to `__aeabi_uldivmod'

We could probably use a 32-bit division here as before, but since this is
not in a performance critical path, div_u64() seems cleaner here.

Fixes: 887839e6960d ("mlxsw: spectrum_kvdl: Add support for dynamic partition set")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index d27fa57ad3c3..6fd701db90c9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -278,7 +278,7 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 		resource_size = info->end_index - info->start_index + 1;
 	}
 
-	nr_entries = resource_size / info->alloc_size;
+	nr_entries = div_u64(resource_size, info->alloc_size);
 	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
 	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
 	if (!part)

From ed2da6270ed9ea37d39e9ab597fc5b3b4215ae2a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 23 Feb 2018 14:15:32 +0100
Subject: [PATCH 0317/1678] mlxsw: spectrum_kvdl: avoid uninitialized variable
 warning

gcc warns that 'resource_id' is not initialized if we don't come though
any of the three 'case' statements before:

drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c: In function 'mlxsw_sp_kvdl_part_init':
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c:275:8: error: 'resource_id' may be used uninitialized in this function [-Werror=maybe-uninitialized]

In the current code, that won't happen, but it's more robust to explicitly
handle this by returning a failure from mlxsw_sp_kvdl_part_init.

Fixes: 887839e6960d ("mlxsw: spectrum_kvdl: Add support for dynamic partition set")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 6fd701db90c9..059eb3214328 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -270,6 +270,8 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 	case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
 		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
 		break;
+	default:
+		return -EINVAL;
 	}
 
 	err = devlink_resource_size_get(devlink, resource_id, &resource_size);

From 9baeb5eb1f83b8c64a80ee9926204909e275b5cc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Feb 2018 14:00:56 +0000
Subject: [PATCH 0318/1678] sfc: falcon: remove duplicated bit-wise or of
 LOOPBACK_SGMII

Bit pattern LOOPBACK_SGMII is being bit-wise or'd twice; remove the
redundant 2nd LOOPBACK_SGMII

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/falcon/enum.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
index 30a1136fc909..4824fcf5c3d4 100644
--- a/drivers/net/ethernet/sfc/falcon/enum.h
+++ b/drivers/net/ethernet/sfc/falcon/enum.h
@@ -81,7 +81,6 @@ enum ef4_loopback_mode {
 			    (1 << LOOPBACK_XAUI) |		\
 			    (1 << LOOPBACK_GMII) |		\
 			    (1 << LOOPBACK_SGMII) |		\
-			    (1 << LOOPBACK_SGMII) |		\
 			    (1 << LOOPBACK_XGBR) |		\
 			    (1 << LOOPBACK_XFI) |		\
 			    (1 << LOOPBACK_XAUI_FAR) |		\

From 6a7b75f7c16d85f782ae86c71f0f31f72a61602f Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Fri, 23 Feb 2018 12:16:37 -0500
Subject: [PATCH 0319/1678] tools: tc-testing: Fix indentation

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index b3754b9aa302..ab28373dccd7 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -347,9 +347,9 @@ def check_default_settings(args, remaining, pm):
     global NAMES
 
     if args.path != None:
-         NAMES['TC'] = args.path
+        NAMES['TC'] = args.path
     if args.device != None:
-         NAMES['DEV2'] = args.device
+        NAMES['DEV2'] = args.device
     if not os.path.isfile(NAMES['TC']):
         print("The specified tc path " + NAMES['TC'] + " does not exist.")
         exit(1)

From f9b63a1c91d4d2b4fcac5c786cdd7de1242c162b Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Fri, 23 Feb 2018 12:16:38 -0500
Subject: [PATCH 0320/1678] tools: tc-testing: better error reporting

Do a better job with error handling - in pre- and post-suite,
in pre- and post-case.  Show a traceback for errors.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 82 +++++++++++++++++++----
 1 file changed, 69 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index ab28373dccd7..6c220bc26d9f 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -15,6 +15,7 @@ import importlib
 import json
 import subprocess
 import time
+import traceback
 from collections import OrderedDict
 from string import Template
 
@@ -23,6 +24,13 @@ from tdc_helper import *
 
 import TdcPlugin
 
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
 class PluginMgr:
     def __init__(self, argparser):
         super().__init__()
@@ -135,7 +143,7 @@ def exec_cmd(args, pm, stage, command):
     return proc, foutput
 
 
-def prepare_env(args, pm, stage, prefix, cmdlist):
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
     """
     Execute the setup/teardown commands for a test case.
     Optionally terminate test execution if the command fails.
@@ -164,7 +172,9 @@ def prepare_env(args, pm, stage, prefix, cmdlist):
             print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
             print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
             print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
-            raise Exception('"{}" did not complete successfully'.format(prefix))
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
 
 def run_one_test(pm, args, index, tidx):
     result = True
@@ -194,8 +204,11 @@ def run_one_test(pm, args, index, tidx):
         match_pattern = re.compile(
             str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
         (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
-        match_index = re.findall(match_pattern, procout)
-        if len(match_index) != int(tidx["matchCount"]):
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
             result = False
 
     if not result:
@@ -204,9 +217,12 @@ def run_one_test(pm, args, index, tidx):
     tap += tresult
 
     if result == False:
-        tap += procout
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
 
-    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'])
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
     pm.call_post_case()
 
     index += 1
@@ -227,30 +243,70 @@ def test_runner(pm, args, filtered_tests):
     index = 1
     tap = str(index) + ".." + str(tcount) + "\n"
     badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
 
-    pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
 
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
     if args.verbose > 1:
-        print('Run tests here')
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
     for tidx in testlist:
         if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
             continue
         try:
             badtest = tidx  # in case it goes bad
             tap += run_one_test(pm, args, index, tidx)
-        except Exception as ee:
-            print('Exception {} (caught in test_runner, running test {} {} {})'.
-                  format(ee, index, tidx['id'], tidx['name']))
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
             break
         index += 1
 
     # if we failed in setup or teardown,
-    # fill in the remaining tests with not ok
+    # fill in the remaining tests with ok-skipped
     count = index
     tap += 'about to flush the tap output if tests need to be skipped\n'
     if tcount + 1 != index:
         for tidx in testlist[index - 1:]:
-            msg = 'skipped - previous setup or teardown failed'
+            msg = 'skipped - previous {} failed'.format(stage)
             tap += 'ok {} - {} # {} {} {}\n'.format(
                 count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
             count += 1

From ed820f47bc5f9336840a43b3542c0281812bec03 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Feb 2018 17:16:57 +0000
Subject: [PATCH 0321/1678] xen-netback: make function xenvif_rx_skb static

The function xenvif_rx_skb is local to the source and does not need
to be in global scope, so make it static.

Cleans up sparse warning:
drivers/net/xen-netback/rx.c:422:6: warning: symbol 'xenvif_rx_skb'
was not declared. Should it be static?

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b1cf7c6f407a..ef5887037b22 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
 	BUG();
 }
 
-void xenvif_rx_skb(struct xenvif_queue *queue)
+static void xenvif_rx_skb(struct xenvif_queue *queue)
 {
 	struct xenvif_pkt_state pkt;
 

From 7e81c55ee94338e4fe5bdd87e5fb15dbf040156f Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Tue, 27 Feb 2018 17:56:10 +0100
Subject: [PATCH 0322/1678] batman-adv: Start new development cycle

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f7ba3f96d8f3..69bfedfad174 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -25,7 +25,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.0"
+#define BATADV_SOURCE_VERSION "2018.1"
 #endif
 
 /* B.A.T.M.A.N. parameters */

From 6b1aea8cf2c8618146edaf6b35775ab55f7cafe5 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 1 Jan 2018 00:00:00 +0100
Subject: [PATCH 0323/1678] batman-adv: Update copyright years for 2018

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h     | 2 +-
 include/uapi/linux/batman_adv.h        | 2 +-
 net/batman-adv/Kconfig                 | 2 +-
 net/batman-adv/Makefile                | 2 +-
 net/batman-adv/bat_algo.c              | 2 +-
 net/batman-adv/bat_algo.h              | 2 +-
 net/batman-adv/bat_iv_ogm.c            | 2 +-
 net/batman-adv/bat_iv_ogm.h            | 2 +-
 net/batman-adv/bat_v.c                 | 2 +-
 net/batman-adv/bat_v.h                 | 2 +-
 net/batman-adv/bat_v_elp.c             | 2 +-
 net/batman-adv/bat_v_elp.h             | 2 +-
 net/batman-adv/bat_v_ogm.c             | 2 +-
 net/batman-adv/bat_v_ogm.h             | 2 +-
 net/batman-adv/bitarray.c              | 2 +-
 net/batman-adv/bitarray.h              | 2 +-
 net/batman-adv/bridge_loop_avoidance.c | 2 +-
 net/batman-adv/bridge_loop_avoidance.h | 2 +-
 net/batman-adv/debugfs.c               | 2 +-
 net/batman-adv/debugfs.h               | 2 +-
 net/batman-adv/distributed-arp-table.c | 2 +-
 net/batman-adv/distributed-arp-table.h | 2 +-
 net/batman-adv/fragmentation.c         | 2 +-
 net/batman-adv/fragmentation.h         | 2 +-
 net/batman-adv/gateway_client.c        | 2 +-
 net/batman-adv/gateway_client.h        | 2 +-
 net/batman-adv/gateway_common.c        | 2 +-
 net/batman-adv/gateway_common.h        | 2 +-
 net/batman-adv/hard-interface.c        | 2 +-
 net/batman-adv/hard-interface.h        | 2 +-
 net/batman-adv/hash.c                  | 2 +-
 net/batman-adv/hash.h                  | 2 +-
 net/batman-adv/icmp_socket.c           | 2 +-
 net/batman-adv/icmp_socket.h           | 2 +-
 net/batman-adv/log.c                   | 2 +-
 net/batman-adv/log.h                   | 2 +-
 net/batman-adv/main.c                  | 2 +-
 net/batman-adv/main.h                  | 2 +-
 net/batman-adv/multicast.c             | 2 +-
 net/batman-adv/multicast.h             | 2 +-
 net/batman-adv/netlink.c               | 2 +-
 net/batman-adv/netlink.h               | 2 +-
 net/batman-adv/network-coding.c        | 2 +-
 net/batman-adv/network-coding.h        | 2 +-
 net/batman-adv/originator.c            | 2 +-
 net/batman-adv/originator.h            | 2 +-
 net/batman-adv/routing.c               | 2 +-
 net/batman-adv/routing.h               | 2 +-
 net/batman-adv/send.c                  | 2 +-
 net/batman-adv/send.h                  | 2 +-
 net/batman-adv/soft-interface.c        | 2 +-
 net/batman-adv/soft-interface.h        | 2 +-
 net/batman-adv/sysfs.c                 | 2 +-
 net/batman-adv/sysfs.h                 | 2 +-
 net/batman-adv/tp_meter.c              | 2 +-
 net/batman-adv/tp_meter.h              | 2 +-
 net/batman-adv/translation-table.c     | 2 +-
 net/batman-adv/translation-table.h     | 2 +-
 net/batman-adv/tvlv.c                  | 2 +-
 net/batman-adv/tvlv.h                  | 2 +-
 net/batman-adv/types.h                 | 2 +-
 61 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 5cb360be2a11..daefd7283896 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index ae00c99cbed0..56ae28934070 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c44f6515be5e..e4e2e02b7380 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 022f6e77307b..b97ba6fb8353 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 80c72c7d3cad..ea309ad06175 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 029221615ba3..534b790c3753 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 79e326383726..e21aa147607b 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 9dc0dd5c83df..317cafd302cf 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 27e165ac9302..9c3a34b65b15 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index a17ab68bbce8..ec4a2a569750 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index a83478c46597..28687493599f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 5e39d0588a48..e8c7b7fd290d 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index ba59b77c605d..2948b41b06d4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 6a4c14ccc3c6..ed36c5e79fde 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index bdc1ef06e05b..a296a4d851f5 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index ca9d0753dd6b..48f683289531 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index fad47853ad3c..8ff81346ff0c 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index b27571abcd2f..71f95a3e4d3f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 21d1189957a7..4229b01ac7b5 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 90a08d35c501..37b069698b04 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 9703c791ffc5..19b15de455ab 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 12897eb46268..e24aa9601c52 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 22dde42fd80e..d815acc13c35 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 138b22a1836a..944512e07782 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 37fe9a644f22..c294f6fd43e0 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 981f58421a32..f0b86fcb2493 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b3e156af2256..936c107f3199 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index afebd9c7edf4..80afb2793687 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 5f186bff284a..fd4a263dd6b7 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index de5e9a374ece..d1c0f6189301 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 04d964358c98..7b49e4001778 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 4ce1b6d3ad5c..9490a7ca2ba6 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index e91f29c7c638..7d5e9abb7a65 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 84cddd01eeab..958be22beda9 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index dc9fa37ddd14..52d8a4b848c0 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35e02b2b9e72..35f4f397ed57 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d31c8266e244..69c0d85bceb3 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 69bfedfad174..d5d65999207e 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cbdeb47ec3f6..6eaffe50335a 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3ac06337ab71..6b8594e23da3 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a823d3899bad..129af56b944d 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 0e7e57b69b54..571d9a5ae7aa 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index b48116bb24ef..c3578444f3cb 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index adaeafa4f71e..65c346812bc1 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 58a7d9274435..2a51a0cbb82a 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 8e543a3cdc6c..f3601ab0872e 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index b6891e8b741c..289df027ecdd 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index a1289bc5f115..db54c2d9b8bf 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 2a5ab6f1076d..4a35f5c2f52b 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1e8c79093623..64cce07b8fe6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 900c5ce21cd4..c95e2b2677fd 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 075c5b5b2ce1..daf87f07fadd 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index c1578fa0b952..f2eef43bd2ec 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index bbeee61221fa..c1e3fb69952d 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 8b576712d0c1..11520de96ccb 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index c8b8f2cb2c2b..68e600974759 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7550a9ccd695..0225616d5771 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 8d9e3abec2c8..01b6c8eafaf9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 5ffcb45ac6ff..a637458205d1 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index a74df33f446d..ef5867f49824 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index bb1578410e0c..4a3b8837e1b5 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *

From 60f4b645492592501868c4b7a361f3f972b73196 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Sun, 28 Jan 2018 03:53:37 -0800
Subject: [PATCH 0324/1678] ixgbe: Avoid to write the RETA table when
 unnecessary

If indir == 0 in the ixgbe_set_rxfh(), it is unnecessary
to write the HW. Because redirection table is not changed.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 221f15803480..89edb9fae6f0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3059,6 +3059,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 
 		for (i = 0; i < reta_entries; i++)
 			adapter->rss_indir_tbl[i] = indir[i];
+
+		ixgbe_store_reta(adapter);
 	}
 
 	/* Fill out the rss hash key */
@@ -3067,8 +3069,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 		ixgbe_store_key(adapter);
 	}
 
-	ixgbe_store_reta(adapter);
-
 	return 0;
 }
 

From 6704a3abf4cf4181a1ee64f5db4969347b88ca1d Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 29 Jan 2018 15:57:48 -0800
Subject: [PATCH 0325/1678] ixgbe: prevent ptp_rx_hang from running when in
 FILTER_ALL mode

On hardware which supports timestamping all packets, the timestamps are
recorded in the packet buffer, and the driver no longer uses or reads
the registers. This makes the logic for checking and clearing Rx
timestamp hangs meaningless.

If we run the ixgbe_ptp_rx_hang() function in this case, then the driver
will continuously spam the log output with "Clearing Rx timestamp hang".
These messages are spurious, and confusing to end users.

The original code in commit a9763f3cb54c ("ixgbe: Update PTP to support
X550EM_x devices", 2015-12-03) did have a flag PTP_RX_TIMESTAMP_IN_REGISTER
which was intended to be used to avoid the Rx timestamp hang check,
however it did not actually check the flag before calling the function.

Do so now in order to stop the checks and prevent the spurious log
messages.

Fixes: a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x devices", 2015-12-03)
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0da5aa2c8aba..b032091022a8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7703,7 +7703,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
 	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
 		ixgbe_ptp_overflow_check(adapter);
-		ixgbe_ptp_rx_hang(adapter);
+		if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+			ixgbe_ptp_rx_hang(adapter);
 		ixgbe_ptp_tx_hang(adapter);
 	}
 

From 9913db03d76bc27add1afea2c3c43e5705497529 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:12 -0800
Subject: [PATCH 0326/1678] ixgbevf: use page_address offset from page

Based on commit 3456fd53421e
("igb: Use page_address offset from page instead of masking virtual address")

Update the handling of page addresses so that we always refer to them using
a void pointer, and try to use the consistent name of va indicating we are
working with a virtual address.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 9b3d43d28106..604b026d1a63 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -795,7 +795,7 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 				struct sk_buff *skb)
 {
 	struct page *page = rx_buffer->page;
-	unsigned char *va = page_address(page) + rx_buffer->page_offset;
+	void *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = IXGBEVF_RX_BUFSZ;
 #else
@@ -831,7 +831,7 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 
 add_tail_frag:
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			(unsigned long)va & ~PAGE_MASK, size, truesize);
+			va - page_address(page), size, truesize);
 
 	return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
 }
@@ -856,13 +856,12 @@ static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
 				      DMA_FROM_DEVICE);
 
 	if (likely(!skb)) {
-		void *page_addr = page_address(page) +
-				  rx_buffer->page_offset;
+		void *va = page_address(page) + rx_buffer->page_offset;
 
 		/* prefetch first cache line of first page */
-		prefetch(page_addr);
+		prefetch(va);
 #if L1_CACHE_BYTES < 128
-		prefetch(page_addr + L1_CACHE_BYTES);
+		prefetch(va + L1_CACHE_BYTES);
 #endif
 
 		/* allocate a skb to store the frags */

From bc04347f5b4ad22ec831131f8b97384e40edc354 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:17 -0800
Subject: [PATCH 0327/1678] ixgbevf: add ethtool private flag for legacy Rx

Introduce legacy-rx private flag that will allow switching between the
old and new (build_skb based) Rx code paths. The implementation is the
same as in commit e08912985b29
("igb: Add support for ethtool private flag to allow use of legacy Rx")

This provides a means of validating the legacy Rx path in the event that
we are forced to fall back.  At some point in the future when we are
convinced we don't need it anymore we might be able to drop the legacy-rx
flag.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ethtool.c | 48 ++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h |  2 +
 2 files changed, 50 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 4400e49090b4..e7623fed42da 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 
 #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
 static int ixgbevf_get_link_ksettings(struct net_device *netdev,
 				      struct ethtool_link_ksettings *cmd)
 {
@@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev,
 		sizeof(drvinfo->version));
 	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
 		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
 }
 
 static void ixgbevf_get_ringparam(struct net_device *netdev,
@@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
 		return IXGBEVF_TEST_LEN;
 	case ETH_SS_STATS:
 		return IXGBEVF_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IXGBEVF_PRIV_FLAGS_STR_LEN;
 	default:
 		return -EINVAL;
 	}
@@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
 			p += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, ixgbevf_priv_flags_strings,
+		       IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
 	}
 }
 
@@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	return err;
 }
 
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+	if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+		flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			ixgbevf_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops ixgbevf_ethtool_ops = {
 	.get_drvinfo		= ixgbevf_get_drvinfo,
 	.get_regs_len		= ixgbevf_get_regs_len,
@@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
 	.get_rxfh_key_size	= ixgbevf_get_rxfh_key_size,
 	.get_rxfh		= ixgbevf_get_rxfh,
 	.get_link_ksettings	= ixgbevf_get_link_ksettings,
+	.get_priv_flags		= ixgbevf_get_priv_flags,
+	.set_priv_flags		= ixgbevf_set_priv_flags,
 };
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f6952425c87d..62a366905766 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -331,6 +331,8 @@ struct ixgbevf_adapter {
 
 	u32 *rss_key;
 	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+	u32 flags;
+#define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
 };
 
 enum ixbgevf_state_t {

From f15c5ba5b6cdf27c8e75f5a3b57c06c9ac4e515d Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:22 -0800
Subject: [PATCH 0328/1678] ixgbevf: add support for using order 1 pages to
 receive large frames

Based on commit 8649aaef4044
("igb: Add support for using order 1 pages to receive large frames")

Add support for using 3K buffers in order 1 page. We are reserving 1K for
now to have space available for future tail room and head room when we
enable build_skb support.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  | 52 ++++++++++++---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 64 ++++++++++++++-----
 2 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 62a366905766..d4ee6b1719a9 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -89,17 +89,11 @@ struct ixgbevf_rx_queue_stats {
 };
 
 enum ixgbevf_ring_state_t {
+	__IXGBEVF_RX_3K_BUFFER,
 	__IXGBEVF_TX_DETECT_HANG,
 	__IXGBEVF_HANG_CHECK_ARMED,
 };
 
-#define check_for_tx_hang(ring) \
-	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define set_check_for_tx_hang(ring) \
-	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-#define clear_check_for_tx_hang(ring) \
-	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
-
 struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
 	struct net_device *netdev;
@@ -156,12 +150,20 @@ struct ixgbevf_ring {
 /* Supported Rx Buffer Sizes */
 #define IXGBEVF_RXBUFFER_256	256    /* Used for packet split */
 #define IXGBEVF_RXBUFFER_2048	2048
+#define IXGBEVF_RXBUFFER_3072	3072
 
 #define IXGBEVF_RX_HDR_SIZE	IXGBEVF_RXBUFFER_256
-#define IXGBEVF_RX_BUFSZ	IXGBEVF_RXBUFFER_2048
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
+#define IXGBEVF_SKB_PAD		(NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB	IXGBEVF_RXBUFFER_2048
+#endif
+
 #define IXGBE_TX_FLAGS_CSUM		BIT(0)
 #define IXGBE_TX_FLAGS_VLAN		BIT(1)
 #define IXGBE_TX_FLAGS_TSO		BIT(2)
@@ -170,6 +172,40 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
 
+#define ring_uses_large_buffer(ring) \
+	test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IXGBEVF_RXBUFFER_3072;
+#endif
+	return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
 struct ixgbevf_ring_container {
 	struct ixgbevf_ring *ring;	/* pointer to linked list of rings */
 	unsigned int total_bytes;	/* total bytes processed this int */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 604b026d1a63..f8e1b1c5e8fa 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -565,21 +565,22 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 		return true;
 
 	/* alloc new page for storage */
-	page = dev_alloc_page();
+	page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_rx_page_failed++;
 		return false;
 	}
 
 	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 ixgbevf_rx_pg_size(rx_ring),
 				 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_page(page);
+		__free_pages(page, ixgbevf_rx_pg_order(rx_ring));
 
 		rx_ring->rx_stats.alloc_rx_page_failed++;
 		return false;
@@ -621,7 +622,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		/* sync the buffer for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
 						 bi->page_offset,
-						 IXGBEVF_RX_BUFSZ,
+						 ixgbevf_rx_bufsz(rx_ring),
 						 DMA_FROM_DEVICE);
 
 		/* Refresh the desc even if pkt_addr didn't change
@@ -750,13 +751,16 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 		return false;
 
 	/* flip page offset to other buffer */
-	rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
+	rx_buffer->page_offset ^= truesize;
 
 #else
 	/* move offset up to the next cache line */
 	rx_buffer->page_offset += truesize;
 
-	if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+#define IXGBEVF_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
+
+	if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
 		return false;
 
 #endif
@@ -797,7 +801,7 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 	struct page *page = rx_buffer->page;
 	void *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = IXGBEVF_RX_BUFSZ;
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
 	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
 #endif
@@ -888,8 +892,8 @@ static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
 		 * any references we are holding to it
 		 */
 		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     PAGE_SIZE, DMA_FROM_DEVICE,
-				     IXGBEVF_RX_DMA_ATTR);
+				     ixgbevf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
 		__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
 	}
 
@@ -1586,7 +1590,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT	2
 
-static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+				     struct ixgbevf_ring *ring, int index)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 srrctl;
@@ -1594,7 +1599,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
 	srrctl = IXGBE_SRRCTL_DROP_EN;
 
 	srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
-	srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 	IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
@@ -1766,7 +1774,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 	ring->next_to_use = 0;
 	ring->next_to_alloc = 0;
 
-	ixgbevf_configure_srrctl(adapter, reg_idx);
+	ixgbevf_configure_srrctl(adapter, ring, reg_idx);
 
 	/* allow any size packet since we can handle overflow */
 	rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
@@ -1778,6 +1786,26 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 	ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
 }
 
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+				      struct ixgbevf_ring *rx_ring)
+{
+	struct net_device *netdev = adapter->netdev;
+	unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	/* set build_skb and buffer size flags */
+	clear_ring_uses_large_buffer(rx_ring);
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		return;
+
+#if (PAGE_SIZE < 8192)
+	if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+		return;
+
+	set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
 /**
  * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
  * @adapter: board private structure
@@ -1805,8 +1833,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
 	 * the Base and Length of the Rx Descriptor Ring
 	 */
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]);
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+		ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+		ixgbevf_configure_rx_ring(adapter, rx_ring);
+	}
 }
 
 static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
@@ -2135,13 +2167,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
 		dma_sync_single_range_for_cpu(rx_ring->dev,
 					      rx_buffer->dma,
 					      rx_buffer->page_offset,
-					      IXGBEVF_RX_BUFSZ,
+					      ixgbevf_rx_bufsz(rx_ring),
 					      DMA_FROM_DEVICE);
 
 		/* free resources associated with mapping */
 		dma_unmap_page_attrs(rx_ring->dev,
 				     rx_buffer->dma,
-				     PAGE_SIZE,
+				     ixgbevf_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE,
 				     IXGBEVF_RX_DMA_ATTR);
 

From f2d00eca279fda3ddc9ff7cfffac58144c9b95c0 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:27 -0800
Subject: [PATCH 0329/1678] ixgbevf: setup queue counts

Add calls for netif_set_real_num_t/rx_queues() in ixgbevf_open().
Make sure that calls to ixgbevf_open() are rtnl protected and improve
the error handling when setting up multiple queues.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 46 ++++++++++++-------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index f8e1b1c5e8fa..cb9d00a38658 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3119,9 +3119,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
-		break;
+		goto err_setup_tx;
 	}
 
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbevf_free_tx_resources(adapter->tx_ring[i]);
 	return err;
 }
 
@@ -3179,8 +3184,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
-		break;
+		goto err_setup_rx;
 	}
+
+	return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbevf_free_rx_resources(adapter->rx_ring[i]);
 	return err;
 }
 
@@ -3285,18 +3296,27 @@ int ixgbevf_open(struct net_device *netdev)
 	if (err)
 		goto err_req_irq;
 
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
 	ixgbevf_up_complete(adapter);
 
 	return 0;
 
+err_set_queues:
+	ixgbevf_free_irq(adapter);
 err_req_irq:
-	ixgbevf_down(adapter);
-err_setup_rx:
 	ixgbevf_free_all_rx_resources(adapter);
-err_setup_tx:
+err_setup_rx:
 	ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
 	ixgbevf_reset(adapter);
-
 err_setup_reset:
 
 	return err;
@@ -3948,17 +3968,11 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 
 	rtnl_lock();
 	err = ixgbevf_init_interrupt_scheme(adapter);
-	rtnl_unlock();
-	if (err) {
-		dev_err(&pdev->dev, "Cannot initialize interrupts\n");
-		return err;
-	}
-
-	if (netif_running(netdev)) {
+	if (!err && netif_running(netdev))
 		err = ixgbevf_open(netdev);
-		if (err)
-			return err;
-	}
+	rtnl_unlock();
+	if (err)
+		return err;
 
 	netif_device_attach(netdev);
 

From 1ab37e12e365c139fc595d7dccddfd13e9ca34c8 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:33 -0800
Subject: [PATCH 0330/1678] ixgbevf: add support for padding packet

Following the logic from commit 2de6aa3a666e
("ixgbe: Add support for padding packet")

Add support for providing a buffer with headroom and tail room
to allow for shared info, NET_SKB_PAD, and NET_IP_ALIGN.  With this
combined with the DMA changes we can start using build_skb to build frames
around an incoming Rx buffer instead of having to memcpy the headers.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  | 11 +++++++
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 32 ++++++++++++++++---
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index d4ee6b1719a9..a5e9127a1156 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -90,6 +90,7 @@ struct ixgbevf_rx_queue_stats {
 
 enum ixgbevf_ring_state_t {
 	__IXGBEVF_RX_3K_BUFFER,
+	__IXGBEVF_RX_BUILD_SKB_ENABLED,
 	__IXGBEVF_TX_DETECT_HANG,
 	__IXGBEVF_HANG_CHECK_ARMED,
 };
@@ -179,11 +180,21 @@ struct ixgbevf_ring {
 #define clear_ring_uses_large_buffer(ring) \
 	clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
 
+#define ring_uses_build_skb(ring) \
+	test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+	set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+	clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
 static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
 {
 #if (PAGE_SIZE < 8192)
 	if (ring_uses_large_buffer(ring))
 		return IXGBEVF_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IXGBEVF_MAX_FRAME_BUILD_SKB;
 #endif
 	return IXGBEVF_RXBUFFER_2048;
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index cb9d00a38658..189d6af43b37 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -554,6 +554,11 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
 	return true;
 }
 
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
 static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 				      struct ixgbevf_rx_buffer *bi)
 {
@@ -588,7 +593,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 
 	bi->dma = dma;
 	bi->page = page;
-	bi->page_offset = 0;
+	bi->page_offset = ixgbevf_rx_offset(rx_ring);
 	bi->pagecnt_bias = 1;
 	rx_ring->rx_stats.alloc_rx_page++;
 
@@ -803,7 +808,9 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
 #endif
 	unsigned int pull_len;
 
@@ -1776,8 +1783,19 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
 
 	ixgbevf_configure_srrctl(adapter, ring, reg_idx);
 
-	/* allow any size packet since we can handle overflow */
-	rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+	/* RXDCTL.RLPML does not work on 82599 */
+	if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
+
+#if (PAGE_SIZE < 8192)
+		/* Limit the maximum frame size so we don't overrun the skb */
+		if (ring_uses_build_skb(ring) &&
+		    !ring_uses_large_buffer(ring))
+			rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+				  IXGBE_RXDCTL_RLPML_EN;
+#endif
+	}
 
 	rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
 	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
@@ -1793,11 +1811,14 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
 	unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
 
 	/* set build_skb and buffer size flags */
+	clear_ring_build_skb_enabled(rx_ring);
 	clear_ring_uses_large_buffer(rx_ring);
 
 	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
 		return;
 
+	set_ring_build_skb_enabled(rx_ring);
+
 #if (PAGE_SIZE < 8192)
 	if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
 		return;
@@ -3890,6 +3911,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
 
+	if (netif_running(netdev))
+		ixgbevf_reinit_locked(adapter);
+
 	return 0;
 }
 

From 5cc0f1c0dc56404a46e8bccd6c96e63cde257268 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:38 -0800
Subject: [PATCH 0331/1678] ixgbevf: make sure all frames fit minimum size
 requirements

Similar to commit a50c29dd09ed
("ixgbe: Make certain that all frames fit minimum size requirements")

Make sure that any packet we attempt to transmit will meet minimum
size requirements.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 30 +++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 189d6af43b37..a10b1bdc99e3 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3779,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
 	return __ixgbevf_maybe_stop_tx(tx_ring, size);
 }
 
-static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+				   struct ixgbevf_ring *tx_ring)
 {
-	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbevf_tx_buffer *first;
-	struct ixgbevf_ring *tx_ring;
 	int tso;
 	u32 tx_flags = 0;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
@@ -3798,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_OK;
 	}
 
-	tx_ring = adapter->tx_ring[skb->queue_mapping];
-
 	/* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
 	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
 	 *       + 2 desc gap to keep tail from touching head,
@@ -3852,6 +3849,29 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
+static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbevf_ring *tx_ring;
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* The minimum packet size for olinfo paylen is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	tx_ring = adapter->tx_ring[skb->queue_mapping];
+	return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
 /**
  * ixgbevf_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure

From 21c046e448616529a181a35445d9f6d60352e01f Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:43 -0800
Subject: [PATCH 0332/1678] ixgbevf: allocate the rings as part of q_vector

Make it so that all rings allocations are made as part of q_vector.
The advantage to this is that we can keep all of the memory related to
a single interrupt in one page.

The goal is to bring the logic of handling rings closer to ixgbe.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  |   7 +-
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 398 ++++++++----------
 2 files changed, 185 insertions(+), 220 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index a5e9127a1156..f65ca156af2d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -97,6 +97,7 @@ enum ixgbevf_ring_state_t {
 
 struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
+	struct ixgbevf_q_vector *q_vector;	/* backpointer to q_vector */
 	struct net_device *netdev;
 	struct device *dev;
 	void *desc;			/* descriptor ring memory */
@@ -128,7 +129,7 @@ struct ixgbevf_ring {
 	 */
 	u16 reg_idx;
 	int queue_index; /* needed for multiqueue queue management */
-};
+} ____cacheline_internodealigned_in_smp;
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define IXGBEVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
@@ -241,7 +242,11 @@ struct ixgbevf_q_vector {
 	u16 itr; /* Interrupt throttle rate written to EITR */
 	struct napi_struct napi;
 	struct ixgbevf_ring_container rx, tx;
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
 	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define IXGBEVF_QV_STATE_IDLE		0
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index a10b1bdc99e3..6219ab2e3f52 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1270,85 +1270,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx,
-				     int r_idx)
-{
-	struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-	a->rx_ring[r_idx]->next = q_vector->rx.ring;
-	q_vector->rx.ring = a->rx_ring[r_idx];
-	q_vector->rx.count++;
-}
-
-static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx,
-				     int t_idx)
-{
-	struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx];
-
-	a->tx_ring[t_idx]->next = q_vector->tx.ring;
-	q_vector->tx.ring = a->tx_ring[t_idx];
-	q_vector->tx.count++;
-}
-
-/**
- * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors
- * @adapter: board private structure to initialize
- *
- * This function maps descriptor rings to the queue-specific vectors
- * we were allotted through the MSI-X enabling code.  Ideally, we'd have
- * one vector per ring/queue, but on a constrained vector budget, we
- * group the rings as "efficiently" as possible.  You would add new
- * mapping configurations in here.
- **/
-static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter)
-{
-	int q_vectors;
-	int v_start = 0;
-	int rxr_idx = 0, txr_idx = 0;
-	int rxr_remaining = adapter->num_rx_queues;
-	int txr_remaining = adapter->num_tx_queues;
-	int i, j;
-	int rqpv, tqpv;
-
-	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	/* The ideal configuration...
-	 * We have enough vectors to map one per queue.
-	 */
-	if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) {
-		for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
-			map_vector_to_rxq(adapter, v_start, rxr_idx);
-
-		for (; txr_idx < txr_remaining; v_start++, txr_idx++)
-			map_vector_to_txq(adapter, v_start, txr_idx);
-		return 0;
-	}
-
-	/* If we don't have enough vectors for a 1-to-1
-	 * mapping, we'll have to group them so there are
-	 * multiple queues per vector.
-	 */
-	/* Re-adjusting *qpv takes care of the remainder. */
-	for (i = v_start; i < q_vectors; i++) {
-		rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
-		for (j = 0; j < rqpv; j++) {
-			map_vector_to_rxq(adapter, i, rxr_idx);
-			rxr_idx++;
-			rxr_remaining--;
-		}
-	}
-	for (i = v_start; i < q_vectors; i++) {
-		tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
-		for (j = 0; j < tqpv; j++) {
-			map_vector_to_txq(adapter, i, txr_idx);
-			txr_idx++;
-			txr_remaining--;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -1421,20 +1342,6 @@ free_queue_irqs:
 	return err;
 }
 
-static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter)
-{
-	int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
-
-	for (i = 0; i < q_vectors; i++) {
-		struct ixgbevf_q_vector *q_vector = adapter->q_vector[i];
-
-		q_vector->rx.ring = NULL;
-		q_vector->tx.ring = NULL;
-		q_vector->rx.count = 0;
-		q_vector->tx.count = 0;
-	}
-}
-
 /**
  * ixgbevf_request_irq - initialize interrupts
  * @adapter: board private structure
@@ -1474,8 +1381,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
 		free_irq(adapter->msix_entries[i].vector,
 			 adapter->q_vector[i]);
 	}
-
-	ixgbevf_reset_q_vectors(adapter);
 }
 
 /**
@@ -2456,63 +2361,6 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 	}
 }
 
-/**
- * ixgbevf_alloc_queues - Allocate memory for all rings
- * @adapter: board private structure to initialize
- *
- * We allocate one ring per queue at run-time since we don't know the
- * number of queues at compile-time.  The polling_netdev array is
- * intended for Multiqueue, but should work fine with a single queue.
- **/
-static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter)
-{
-	struct ixgbevf_ring *ring;
-	int rx = 0, tx = 0;
-
-	for (; tx < adapter->num_tx_queues; tx++) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_allocation;
-
-		ring->dev = &adapter->pdev->dev;
-		ring->netdev = adapter->netdev;
-		ring->count = adapter->tx_ring_count;
-		ring->queue_index = tx;
-		ring->reg_idx = tx;
-
-		adapter->tx_ring[tx] = ring;
-	}
-
-	for (; rx < adapter->num_rx_queues; rx++) {
-		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-		if (!ring)
-			goto err_allocation;
-
-		ring->dev = &adapter->pdev->dev;
-		ring->netdev = adapter->netdev;
-
-		ring->count = adapter->rx_ring_count;
-		ring->queue_index = rx;
-		ring->reg_idx = rx;
-
-		adapter->rx_ring[rx] = ring;
-	}
-
-	return 0;
-
-err_allocation:
-	while (tx) {
-		kfree(adapter->tx_ring[--tx]);
-		adapter->tx_ring[tx] = NULL;
-	}
-
-	while (rx) {
-		kfree(adapter->rx_ring[--rx]);
-		adapter->rx_ring[rx] = NULL;
-	}
-	return -ENOMEM;
-}
-
 /**
  * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
  * @adapter: board private structure to initialize
@@ -2522,8 +2370,6 @@ err_allocation:
  **/
 static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 {
-	struct net_device *netdev = adapter->netdev;
-	int err;
 	int vector, v_budget;
 
 	/* It's easy to be greedy for MSI-X vectors, but it really
@@ -2536,9 +2382,6 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 	v_budget = min_t(int, v_budget, num_online_cpus());
 	v_budget += NON_Q_VECTORS;
 
-	/* A failure in MSI-X entry allocation isn't fatal, but it does
-	 * mean we disable MSI-X capabilities of the adapter.
-	 */
 	adapter->msix_entries = kcalloc(v_budget,
 					sizeof(struct msix_entry), GFP_KERNEL);
 	if (!adapter->msix_entries)
@@ -2547,15 +2390,143 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
 	for (vector = 0; vector < v_budget; vector++)
 		adapter->msix_entries[vector].entry = vector;
 
-	err = ixgbevf_acquire_msix_vectors(adapter, v_budget);
-	if (err)
-		return err;
+	/* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+	 * does not support any other modes, so we will simply fail here. Note
+	 * that we clean up the msix_entries pointer else-where.
+	 */
+	return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
 
-	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
-	if (err)
-		return err;
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+			     struct ixgbevf_ring_container *head)
+{
+	ring->next = head->ring;
+	head->ring = ring;
+	head->count++;
+}
 
-	return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+				  int txr_count, int txr_idx,
+				  int rxr_count, int rxr_idx)
+{
+	struct ixgbevf_q_vector *q_vector;
+	struct ixgbevf_ring *ring;
+	int ring_count, size;
+
+	ring_count = txr_count + rxr_count;
+	size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+	/* allocate q_vector and rings */
+	q_vector = kzalloc(size, GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+	q_vector->v_idx = v_idx;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	while (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+		ring->reg_idx = txr_idx;
+
+		/* assign ring to adapter */
+		 adapter->tx_ring[txr_idx] = ring;
+
+		/* update count and index */
+		txr_count--;
+		txr_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		ixgbevf_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+		ring->reg_idx = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+
+		/* update count and index */
+		rxr_count--;
+		rxr_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
+{
+	struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+	struct ixgbevf_ring *ring;
+
+	ixgbevf_for_each_ring(ring, q_vector->tx)
+		adapter->tx_ring[ring->queue_index] = NULL;
+
+	ixgbevf_for_each_ring(ring, q_vector->rx)
+		adapter->rx_ring[ring->queue_index] = NULL;
+
+	adapter->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
+
+	/* ixgbevf_get_stats() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	kfree_rcu(q_vector, rcu);
 }
 
 /**
@@ -2567,35 +2538,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
  **/
 static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 {
-	int q_idx, num_q_vectors;
-	struct ixgbevf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int err;
 
-	num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++, q_vectors--) {
+			int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
 
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL);
-		if (!q_vector)
+			err = ixgbevf_alloc_q_vector(adapter, v_idx,
+						     0, 0, rqpv, rxr_idx);
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining -= rqpv;
+			rxr_idx += rqpv;
+		}
+	}
+
+	for (; q_vectors; v_idx++, q_vectors--) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
+
+		err = ixgbevf_alloc_q_vector(adapter, v_idx,
+					     tqpv, txr_idx,
+					     rqpv, rxr_idx);
+
+		if (err)
 			goto err_out;
-		q_vector->adapter = adapter;
-		q_vector->v_idx = q_idx;
-		netif_napi_add(adapter->netdev, &q_vector->napi,
-			       ixgbevf_poll, 64);
-		adapter->q_vector[q_idx] = q_vector;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		rxr_idx += rqpv;
+		txr_remaining -= tqpv;
+		txr_idx += tqpv;
 	}
 
 	return 0;
 
 err_out:
-	while (q_idx) {
-		q_idx--;
-		q_vector = adapter->q_vector[q_idx];
-#ifdef CONFIG_NET_RX_BUSY_POLL
-		napi_hash_del(&q_vector->napi);
-#endif
-		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
-		adapter->q_vector[q_idx] = NULL;
+	while (v_idx) {
+		v_idx--;
+		ixgbevf_free_q_vector(adapter, v_idx);
 	}
+
 	return -ENOMEM;
 }
 
@@ -2609,17 +2598,11 @@ err_out:
  **/
 static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
 {
-	int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 
-	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
-		struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx];
-
-		adapter->q_vector[q_idx] = NULL;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-		napi_hash_del(&q_vector->napi);
-#endif
-		netif_napi_del(&q_vector->napi);
-		kfree(q_vector);
+	while (q_vectors) {
+		q_vectors--;
+		ixgbevf_free_q_vector(adapter, q_vectors);
 	}
 }
 
@@ -2663,12 +2646,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
-	err = ixgbevf_alloc_queues(adapter);
-	if (err) {
-		pr_err("Unable to allocate memory for queues\n");
-		goto err_alloc_queues;
-	}
-
 	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
 	       (adapter->num_rx_queues > 1) ? "Enabled" :
 	       "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
@@ -2676,8 +2653,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
 	set_bit(__IXGBEVF_DOWN, &adapter->state);
 
 	return 0;
-err_alloc_queues:
-	ixgbevf_free_q_vectors(adapter);
 err_alloc_q_vectors:
 	ixgbevf_reset_interrupt_capability(adapter);
 err_set_interrupt:
@@ -2693,17 +2668,6 @@ err_set_interrupt:
  **/
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
-	int i;
-
-	for (i = 0; i < adapter->num_tx_queues; i++) {
-		kfree(adapter->tx_ring[i]);
-		adapter->tx_ring[i] = NULL;
-	}
-	for (i = 0; i < adapter->num_rx_queues; i++) {
-		kfree(adapter->rx_ring[i]);
-		adapter->rx_ring[i] = NULL;
-	}
-
 	adapter->num_tx_queues = 0;
 	adapter->num_rx_queues = 0;
 
@@ -3307,12 +3271,6 @@ int ixgbevf_open(struct net_device *netdev)
 
 	ixgbevf_configure(adapter);
 
-	/* Map the Tx/Rx rings to the vectors we were allotted.
-	 * if request_irq will be called in this function map_rings
-	 * must be called *before* up_complete
-	 */
-	ixgbevf_map_rings_to_vectors(adapter);
-
 	err = ixgbevf_request_irq(adapter);
 	if (err)
 		goto err_req_irq;
@@ -4042,6 +4000,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
 	stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
 
+	rcu_read_lock();
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		ring = adapter->rx_ring[i];
 		do {
@@ -4063,6 +4022,7 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 		stats->tx_bytes += bytes;
 		stats->tx_packets += packets;
 	}
+	rcu_read_unlock();
 }
 
 #define IXGBEVF_MAX_MAC_HDR_LEN		127

From 925f5690ff5d5a1d9ec027e938d37b539e3fd186 Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:49 -0800
Subject: [PATCH 0333/1678] ixgbevf: break out Rx buffer page management

Based on commit e014272672b9 ("igb: Break out Rx buffer page management")

Consolidate Rx code paths to reduce duplication when we expand them in
the future.

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 227 +++++++++---------
 1 file changed, 114 insertions(+), 113 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 6219ab2e3f52..faeb426c2f0f 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
 static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
 static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
 static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *old_buff);
 
 static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
 {
@@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+						const unsigned int size)
+{
+	struct ixgbevf_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *rx_buffer)
+{
+	if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* We are not reusing the buffer so unmap it and free
+		 * any references we are holding to it
+		 */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     ixgbevf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IXGBEVF_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
 /**
  * ixgbevf_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
@@ -740,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
 	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
 }
 
-static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
-				      struct page *page,
-				      const unsigned int truesize)
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
 {
-	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
 
 	/* avoid re-using remote pages */
 	if (unlikely(ixgbevf_page_is_reserved(page)))
@@ -752,16 +797,9 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 
 #if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
-	if (unlikely(page_ref_count(page) != pagecnt_bias))
+	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
 		return false;
-
-	/* flip page offset to other buffer */
-	rx_buffer->page_offset ^= truesize;
-
 #else
-	/* move offset up to the next cache line */
-	rx_buffer->page_offset += truesize;
-
 #define IXGBEVF_LAST_OFFSET \
 	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
 
@@ -774,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(pagecnt_bias == 1)) {
+	if (unlikely(!pagecnt_bias)) {
 		page_ref_add(page, USHRT_MAX);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
@@ -786,25 +824,16 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
  * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: buffer containing page to add
- * @rx_desc: descriptor containing length of buffer written by hardware
  * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
  *
  * This function will add the data contained in rx_buffer->page to the skb.
- * This is done either through a direct copy if the data in the buffer is
- * less than the skb header size, otherwise it will just attach the page as
- * a frag to the skb.
- *
- * The function will then update the page offset if necessary and return
- * true if the buffer can be reused by the adapter.
  **/
-static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 				struct ixgbevf_rx_buffer *rx_buffer,
-				u16 size,
-				union ixgbe_adv_rx_desc *rx_desc,
-				struct sk_buff *skb)
+				struct sk_buff *skb,
+				unsigned int size)
 {
-	struct page *page = rx_buffer->page;
-	void *va = page_address(page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
@@ -812,102 +841,64 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
 				SKB_DATA_ALIGN(size);
 #endif
-	unsigned int pull_len;
-
-	if (unlikely(skb_is_nonlinear(skb)))
-		goto add_tail_frag;
-
-	if (likely(size <= IXGBEVF_RX_HDR_SIZE)) {
-		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
-
-		/* page is not reserved, we can reuse buffer as is */
-		if (likely(!ixgbevf_page_is_reserved(page)))
-			return true;
-
-		/* this page cannot be reused so discard it */
-		return false;
-	}
-
-	/* we need the header to contain the greater of either ETH_HLEN or
-	 * 60 bytes if the skb->len is less than 60 for skb_pad.
-	 */
-	pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
-
-	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
-
-	/* update all of the pointers */
-	va += pull_len;
-	size -= pull_len;
-
-add_tail_frag:
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-			va - page_address(page), size, truesize);
-
-	return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
 }
 
-static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
-					       union ixgbe_adv_rx_desc *rx_desc,
-					       struct sk_buff *skb)
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+				      struct ixgbevf_rx_buffer *rx_buffer,
+				      union ixgbe_adv_rx_desc *rx_desc,
+				      unsigned int size)
 {
-	struct ixgbevf_rx_buffer *rx_buffer;
-	struct page *page;
-	u16 size = le16_to_cpu(rx_desc->wb.upper.length);
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
 
-	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-	page = rx_buffer->page;
-	prefetchw(page);
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      rx_buffer->dma,
-				      rx_buffer->page_offset,
-				      size,
-				      DMA_FROM_DEVICE);
-
-	if (likely(!skb)) {
-		void *va = page_address(page) + rx_buffer->page_offset;
-
-		/* prefetch first cache line of first page */
-		prefetch(va);
+	/* prefetch first cache line of first page */
+	prefetch(va);
 #if L1_CACHE_BYTES < 128
-		prefetch(va + L1_CACHE_BYTES);
+	prefetch(va + L1_CACHE_BYTES);
 #endif
 
-		/* allocate a skb to store the frags */
-		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
-						IXGBEVF_RX_HDR_SIZE);
-		if (unlikely(!skb)) {
-			rx_ring->rx_stats.alloc_rx_buff_failed++;
-			return NULL;
-		}
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+	if (unlikely(!skb))
+		return NULL;
 
-		/* we will be copying header into skb->data in
-		 * pskb_may_pull so it is in our interest to prefetch
-		 * it now to avoid a possible cache miss
-		 */
-		prefetchw(skb->data);
-	}
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IXGBEVF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
 
-	/* pull page into skb */
-	if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
-		/* hand second half of page back to the ring */
-		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(va + headlen) - page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
 	} else {
-		/* We are not reusing the buffer so unmap it and free
-		 * any references we are holding to it
-		 */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     ixgbevf_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
-		__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
+		rx_buffer->pagecnt_bias++;
 	}
 
-	/* clear contents of buffer_info */
-	rx_buffer->dma = 0;
-	rx_buffer->page = NULL;
-
 	return skb;
 }
 
@@ -929,6 +920,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
+		struct ixgbevf_rx_buffer *rx_buffer;
+		unsigned int size;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
@@ -937,8 +930,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		}
 
 		rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
-
-		if (!rx_desc->wb.upper.length)
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
 			break;
 
 		/* This memory barrier is needed to keep us from reading
@@ -947,15 +940,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		 */
 		rmb();
 
+		rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
 		/* retrieve a buffer from the ring */
-		skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
+		if (skb)
+			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else
+			skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+						    rx_desc, size);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
 			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			rx_buffer->pagecnt_bias++;
 			break;
 		}
 
+		ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
 		cleaned_count++;
 
 		/* fetch next buffer in frame if non-eop */

From 6d9c02171a8081f3746ea8cf7c67b0063a2272db Mon Sep 17 00:00:00 2001
From: Emil Tantilov <emil.s.tantilov@intel.com>
Date: Tue, 30 Jan 2018 16:51:54 -0800
Subject: [PATCH 0334/1678] ixgbevf: add build_skb support

Add support for build_skb() similar to:
commit 6f429223b31c ("ixgbe: Add support for build_skb")

Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index faeb426c2f0f..cb943187816f 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -910,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
 	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
 }
 
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+					 struct ixgbevf_rx_buffer *rx_buffer,
+					 union ixgbe_adv_rx_desc *rx_desc,
+					 unsigned int size)
+{
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+#endif
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	/* build an skb to around the page buffer */
+	skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, IXGBEVF_SKB_PAD);
+	__skb_put(skb, size);
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 				struct ixgbevf_ring *rx_ring,
 				int budget)
@@ -945,6 +983,9 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		/* retrieve a buffer from the ring */
 		if (skb)
 			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+						rx_desc, size);
 		else
 			skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
 						    rx_desc, size);

From 93a6a37c6982ac39206da2fa05dcf5868049a378 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 1 Feb 2018 18:35:39 +0000
Subject: [PATCH 0335/1678] ixgbevf: remove redundant initialization of
 variable 'dma'

Variable dma is initialized with a value that is never read, later
on it is re-assigned a new value, hence the initialization is redundant
and can be removed.

Cleans up clang warning:
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:584:13: warning: Value
stored to 'dma' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index cb943187816f..f37307131eb6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -609,7 +609,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
 				      struct ixgbevf_rx_buffer *bi)
 {
 	struct page *page = bi->page;
-	dma_addr_t dma = bi->dma;
+	dma_addr_t dma;
 
 	/* since we are recycling buffers we should seldom need to alloc */
 	if (likely(page))

From 3adc1c63e288c3f60b1bdee461a0b15447901bcc Mon Sep 17 00:00:00 2001
From: BTaskaya <batuhanosmantaskaya@gmail.com>
Date: Fri, 23 Feb 2018 22:57:35 +0300
Subject: [PATCH 0336/1678] tc: python3, string formattings

This patch converts old type string formattings to new type string
formattings for adapting Linux Traffic Control (tc) unit testing suite
python3.

Linux Traffic Control (tc) unit testing suite's code quality improved is improved with this patch.
According to python documentation;
"The built-in string class provides the ability to do complex variable substitutions and
value formatting via the format() method described in PEP 3101. "
but the project was using old type formattings and new type string formattings together,
this patch's main purpose is converting all old types to new types.

Following files changed:
 1. tools/testing/selftests/tc-testing/tdc.py
 2. tools/testing/selftests/tc-testing/tdc_batch.py

Following PEP rules applied:
 1. PEP8 - Code Styling
 2. PEP3101 - Advanced Code Formatting

 Signed-off-by: Batuhan Osman Taskaya <batuhanosmantaskaya@gmail.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py       | 2 +-
 tools/testing/selftests/tc-testing/tdc_batch.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 6c220bc26d9f..7b50775abaf6 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -445,7 +445,7 @@ def generate_case_ids(alltests):
     for c in alltests:
         if (c["id"] == ""):
             while True:
-                newid = str('%04x' % random.randrange(16**4))
+                newid = str('{:04x}'.format(random.randrange(16**4)))
                 if (does_id_exist(alltests, newid)):
                     continue
                 else:
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 707c6bfef689..52fa539dc662 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -49,13 +49,13 @@ index = 0
 for i in range(0x100):
     for j in range(0x100):
         for k in range(0x100):
-            mac = ("%02x:%02x:%02x" % (i, j, k))
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
             src_mac = "e4:11:00:" + mac
             dst_mac = "e4:12:00:" + mac
-            cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s "
-                   "src_mac %s dst_mac %s action drop %s" %
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
                    (device, prio, skip, src_mac, dst_mac, share_action))
-            file.write("%s\n" % cmd)
+            file.write("{}\n".format(cmd))
             index += 1
             if index >= number:
                 file.close()

From 4869a1476df5ef2d09fa52acc9cfcc21b47194c5 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Feb 2018 20:28:16 +0300
Subject: [PATCH 0337/1678] sh_eth: TSU_QTAG0/1 registers the same as
 TSU_QTAGM0/1

The TSU_QTAG0/1 registers found in the Gigabit Ether controllers actually
have the same long name  as the TSU_QTAGM0/1 registers in the early Ether
controllers:  Qtag Addition/Deletion Set Register (Port 0/1 to 1/0); thus
there's no need to make a difference in sh_eth_tsu_init() between those
controllers. Unfortunately, we can't just remove TSU_QTAG0/1 from the
register *enum* because that would break the ethtool register dump...

Fixes: b0ca2a21f769 ("sh_eth: Add support of SH7763 to sh_eth")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 15 ++++-----------
 drivers/net/ethernet/renesas/sh_eth.h |  4 ++--
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d7d5a6d15219..4502ff7bc19f 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -123,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
 	[TSU_FWSL0]	= 0x0030,
 	[TSU_FWSL1]	= 0x0034,
 	[TSU_FWSLC]	= 0x0038,
-	[TSU_QTAG0]	= 0x0040,
-	[TSU_QTAG1]	= 0x0044,
+	[TSU_QTAGM0]	= 0x0040,
+	[TSU_QTAGM1]	= 0x0044,
 	[TSU_FWSR]	= 0x0050,
 	[TSU_FWINMK]	= 0x0054,
 	[TSU_ADQT0]	= 0x0048,
@@ -2097,8 +2097,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
 		add_tsu_reg(TSU_FWSL0);
 		add_tsu_reg(TSU_FWSL1);
 		add_tsu_reg(TSU_FWSLC);
-		add_tsu_reg(TSU_QTAG0);
-		add_tsu_reg(TSU_QTAG1);
 		add_tsu_reg(TSU_QTAGM0);
 		add_tsu_reg(TSU_QTAGM1);
 		add_tsu_reg(TSU_FWSR);
@@ -2934,13 +2932,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 	sh_eth_tsu_write(mdp, 0, TSU_FWSL0);
 	sh_eth_tsu_write(mdp, 0, TSU_FWSL1);
 	sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC);
-	if (sh_eth_is_gether(mdp)) {
-		sh_eth_tsu_write(mdp, 0, TSU_QTAG0);	/* Disable QTAG(0->1) */
-		sh_eth_tsu_write(mdp, 0, TSU_QTAG1);	/* Disable QTAG(1->0) */
-	} else {
-		sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);	/* Disable QTAG(0->1) */
-		sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);	/* Disable QTAG(1->0) */
-	}
+	sh_eth_tsu_write(mdp, 0, TSU_QTAGM0);	/* Disable QTAG(0->1) */
+	sh_eth_tsu_write(mdp, 0, TSU_QTAGM1);	/* Disable QTAG(1->0) */
 	sh_eth_tsu_write(mdp, 0, TSU_FWSR);	/* all interrupt status clear */
 	sh_eth_tsu_write(mdp, 0, TSU_FWINMK);	/* Disable all interrupt */
 	sh_eth_tsu_write(mdp, 0, TSU_TEN);	/* Disable all CAM entry */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index a6753ccba711..35bfeeb3fcdc 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -118,8 +118,8 @@ enum {
 	TSU_FWSL0,
 	TSU_FWSL1,
 	TSU_FWSLC,
-	TSU_QTAG0,
-	TSU_QTAG1,
+	TSU_QTAG0,			/* Same as TSU_QTAGM0 */
+	TSU_QTAG1,			/* Same as TSU_QTAGM1 */
 	TSU_QTAGM0,
 	TSU_QTAGM1,
 	TSU_FWSR,

From a94cf2a614f8bc5b2b33c708626ce695bf71e424 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Feb 2018 22:41:45 +0300
Subject: [PATCH 0338/1678] sh_eth: fix TSU init on SH7734/R8A7740

It appears that the single port Ether controllers having TSU (like SH7734/
R8A7740) need the same kind of treating in sh_eth_tsu_init() as R7S72100
currently has -- they also don't have the TSU registers related e.g. to
passing the frames between ports. Add the 'sh_eth_cpu_data::dual_port'
flag and use it as a new criterion for taking a "short path" in the TSU
init sequence in order to avoid writing to the non-existent registers...

Fixes: f0e81fecd4f8 ("net: sh_eth: Add support SH7734")
Fixes: 73a0d907301e ("net: sh_eth: add support R8A7740")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 6 +++++-
 drivers/net/ethernet/renesas/sh_eth.h | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 4502ff7bc19f..d3e1bc05ca9c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -752,6 +752,7 @@ static struct sh_eth_cpu_data sh7757_data = {
 	.rpadir		= 1,
 	.rpadir_value   = 2 << 16,
 	.rtrate		= 1,
+	.dual_port	= 1,
 };
 
 #define SH_GIGA_ETH_BASE	0xfee00000UL
@@ -830,6 +831,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 	.no_trimd	= 1,
 	.no_ade		= 1,
 	.tsu		= 1,
+	.dual_port	= 1,
 };
 
 /* SH7734 */
@@ -900,6 +902,7 @@ static struct sh_eth_cpu_data sh7763_data = {
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
+	.dual_port	= 1,
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
@@ -932,6 +935,7 @@ static struct sh_eth_cpu_data sh771x_data = {
 			  EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
 			  EESIPR_PREIP | EESIPR_CERFIP,
 	.tsu		= 1,
+	.dual_port	= 1,
 };
 
 static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
@@ -2915,7 +2919,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev,
 /* SuperH's TSU register init function */
 static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
-	if (sh_eth_is_rz_fast_ether(mdp)) {
+	if (!mdp->cd->dual_port) {
 		sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
 		sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
 				 TSU_FWSLC);	/* Enable POST registers */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 35bfeeb3fcdc..5bbaf9e56e92 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -509,6 +509,7 @@ struct sh_eth_cpu_data {
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
+	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 
 struct sh_eth_private {

From 0d12c6870dc4dc9067cb103c20cdb35c89f841e3 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 24 Feb 2018 18:27:25 -0500
Subject: [PATCH 0339/1678] net/macsonic: Convert to nubus_driver

This resolves an old issue preventing any NuBus SONIC NICs from
working in a Mac with an on-board SONIC device.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/macsonic.c | 173 ++++++++++++++++--------
 1 file changed, 119 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index b922ab5cedea..c744912f55a9 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -60,8 +60,6 @@
 #include <asm/macints.h>
 #include <asm/mac_via.h>
 
-static char mac_sonic_string[] = "macsonic";
-
 #include "sonic.h"
 
 /* These should basically be bus-size and endian independent (since
@@ -410,7 +408,7 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	return macsonic_init(dev);
 }
 
-static int mac_nubus_sonic_ethernet_addr(struct net_device *dev,
+static int mac_sonic_nubus_ethernet_addr(struct net_device *dev,
 					 unsigned long prom_addr, int id)
 {
 	int i;
@@ -449,70 +447,49 @@ static int macsonic_ident(struct nubus_rsrc *fres)
 	return -1;
 }
 
-static int mac_nubus_sonic_probe(struct net_device *dev)
+static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
+				       struct net_device *dev)
 {
-	static int slots;
-	struct nubus_rsrc *ndev = NULL;
 	struct sonic_local* lp = netdev_priv(dev);
 	unsigned long base_addr, prom_addr;
 	u16 sonic_dcr;
-	int id = -1;
 	int reg_offset, dma_bitmode;
 
-	/* Find the first SONIC that hasn't been initialized already */
-	for_each_func_rsrc(ndev) {
-		if (ndev->category != NUBUS_CAT_NETWORK ||
-		    ndev->type != NUBUS_TYPE_ETHERNET)
-			continue;
-
-		/* Have we seen it already? */
-		if (slots & (1<<ndev->board->slot))
-			continue;
-		slots |= 1<<ndev->board->slot;
-
-		/* Is it one of ours? */
-		if ((id = macsonic_ident(ndev)) != -1)
-			break;
-	}
-
-	if (ndev == NULL)
-		return -ENODEV;
-
 	switch (id) {
 	case MACSONIC_DUODOCK:
-		base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 |
 		            SONIC_DCR_TFT0;
 		reg_offset = 2;
 		dma_bitmode = SONIC_BITMODE32;
 		break;
 	case MACSONIC_APPLE:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE32;
 		break;
 	case MACSONIC_APPLE16:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE;
 		sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
 		            SONIC_DCR_PO1 | SONIC_DCR_BMS;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE16;
 		break;
 	case MACSONIC_DAYNALINK:
-		base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE;
+		base_addr = board->slot_addr + APPLE_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DAYNALINK_PROM_BASE;
 		sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 |
 		            SONIC_DCR_PO1 | SONIC_DCR_BMS;
 		reg_offset = 0;
 		dma_bitmode = SONIC_BITMODE16;
 		break;
 	case MACSONIC_DAYNA:
-		base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS;
-		prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR;
+		base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS;
+		prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR;
 		sonic_dcr = SONIC_DCR_BMS |
 		            SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1;
 		reg_offset = 0;
@@ -528,14 +505,14 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
 	dev->base_addr = base_addr;
 	lp->reg_offset = reg_offset;
 	lp->dma_bitmode = dma_bitmode;
-	dev->irq = SLOT2IRQ(ndev->board->slot);
+	dev->irq = SLOT2IRQ(board->slot);
 
 	if (!sonic_version_printed) {
 		printk(KERN_INFO "%s", version);
 		sonic_version_printed = 1;
 	}
 	printk(KERN_INFO "%s: %s in slot %X\n",
-	       dev_name(lp->device), ndev->board->name, ndev->board->slot);
+	       dev_name(lp->device), board->name, board->slot);
 	printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
 	       dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
 
@@ -557,14 +534,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev)
 	SONIC_WRITE(SONIC_ISR, 0x7fff);
 
 	/* Now look for the MAC address. */
-	if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0)
+	if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0)
 		return -ENODEV;
 
+	dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		 dev->base_addr, dev->dev_addr, dev->irq);
+
 	/* Shared init code */
 	return macsonic_init(dev);
 }
 
-static int mac_sonic_probe(struct platform_device *pdev)
+static int mac_sonic_platform_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct sonic_local *lp;
@@ -579,16 +559,10 @@ static int mac_sonic_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
 
-	/* This will catch fatal stuff like -ENOMEM as well as success */
 	err = mac_onboard_sonic_probe(dev);
-	if (err == 0)
-		goto found;
-	if (err != -ENODEV)
-		goto out;
-	err = mac_nubus_sonic_probe(dev);
 	if (err)
 		goto out;
-found:
+
 	err = register_netdev(dev);
 	if (err)
 		goto out;
@@ -610,7 +584,7 @@ MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
 
-static int mac_sonic_device_remove(struct platform_device *pdev)
+static int mac_sonic_platform_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sonic_local* lp = netdev_priv(dev);
@@ -623,12 +597,103 @@ static int mac_sonic_device_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver mac_sonic_driver = {
-	.probe  = mac_sonic_probe,
-	.remove = mac_sonic_device_remove,
-	.driver	= {
-		.name	= mac_sonic_string,
+static struct platform_driver mac_sonic_platform_driver = {
+	.probe  = mac_sonic_platform_probe,
+	.remove = mac_sonic_platform_remove,
+	.driver = {
+		.name = "macsonic",
 	},
 };
 
-module_platform_driver(mac_sonic_driver);
+static int mac_sonic_nubus_probe(struct nubus_board *board)
+{
+	struct net_device *ndev;
+	struct sonic_local *lp;
+	struct nubus_rsrc *fres;
+	int id = -1;
+	int err;
+
+	/* The platform driver will handle a PDS or Comm Slot card (even if
+	 * it has a pseudoslot declaration ROM).
+	 */
+	if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+		return -ENODEV;
+
+	for_each_board_func_rsrc(board, fres) {
+		if (fres->category != NUBUS_CAT_NETWORK ||
+		    fres->type != NUBUS_TYPE_ETHERNET)
+			continue;
+
+		id = macsonic_ident(fres);
+		if (id != -1)
+			break;
+	}
+	if (!fres)
+		return -ENODEV;
+
+	ndev = alloc_etherdev(sizeof(struct sonic_local));
+	if (!ndev)
+		return -ENOMEM;
+
+	lp = netdev_priv(ndev);
+	lp->device = &board->dev;
+	SET_NETDEV_DEV(ndev, &board->dev);
+
+	err = mac_sonic_nubus_probe_board(board, id, ndev);
+	if (err)
+		goto out;
+
+	err = register_netdev(ndev);
+	if (err)
+		goto out;
+
+	nubus_set_drvdata(board, ndev);
+
+	return 0;
+
+out:
+	free_netdev(ndev);
+	return err;
+}
+
+static int mac_sonic_nubus_remove(struct nubus_board *board)
+{
+	struct net_device *ndev = nubus_get_drvdata(board);
+	struct sonic_local *lp = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+	dma_free_coherent(lp->device,
+			  SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+			  lp->descriptors, lp->descriptors_laddr);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct nubus_driver mac_sonic_nubus_driver = {
+	.probe  = mac_sonic_nubus_probe,
+	.remove = mac_sonic_nubus_remove,
+	.driver = {
+		.name = "macsonic-nubus",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int perr, nerr;
+
+static int __init mac_sonic_init(void)
+{
+	perr = platform_driver_register(&mac_sonic_platform_driver);
+	nerr = nubus_driver_register(&mac_sonic_nubus_driver);
+	return 0;
+}
+module_init(mac_sonic_init);
+
+static void __exit mac_sonic_exit(void)
+{
+	if (!perr)
+		platform_driver_unregister(&mac_sonic_platform_driver);
+	if (!nerr)
+		nubus_driver_unregister(&mac_sonic_nubus_driver);
+}
+module_exit(mac_sonic_exit);

From 3d16bada58b2bdd15a840be09c04f49b40641838 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 24 Feb 2018 18:27:25 -0500
Subject: [PATCH 0340/1678] net/macsonic: Drop redundant MACH_IS_MAC test

The MACH_IS_MAC test is redundant here because the platform device
won't get registered unless MACH_IS_MAC.

Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/macsonic.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index c744912f55a9..f6745a893c82 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -311,9 +311,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	int sr;
 	bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
 	printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
 
 	/* Bogus probing, on the models which may or may not have

From bbc2f23a8f32cb403136d20cbed5fd056c1f6a7b Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 24 Feb 2018 18:27:25 -0500
Subject: [PATCH 0341/1678] net/sonic: Clean up and modernize log messages

Add missing printk severity levels by adopting pr_foo() calls for the
platform_driver and dev_foo() calls for the nubus_driver.
Avoid KERN_CONT usage as per advice from checkpatch.
Avoid #ifdef around printk calls.
Don't log driver probe messages after calling register_netdev().

Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Chris Zankel <chris@zankel.net>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/jazzsonic.c | 13 +++----
 drivers/net/ethernet/natsemi/macsonic.c  | 47 ++++++++++--------------
 drivers/net/ethernet/natsemi/xtsonic.c   | 11 +++---
 3 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index d5b28884e21e..58495316d318 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -142,17 +142,14 @@ static int sonic_probe1(struct net_device *dev)
 		i++;
 
 	if (known_revisions[i] == 0xffff) {
-		printk("SONIC ethernet controller not found (0x%4x)\n",
-		       silicon_revision);
+		pr_info("SONIC ethernet controller not found (0x%4x)\n",
+			silicon_revision);
 		goto out;
 	}
 
 	if (sonic_debug  &&  version_printed++ == 0)
 		printk(version);
 
-	printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ",
-	       dev_name(lp->device), dev->base_addr);
-
 	/*
 	 * Put the sonic into software reset, then
 	 * retrieve and print the ethernet address.
@@ -245,12 +242,14 @@ static int jazz_sonic_probe(struct platform_device *pdev)
 	err = sonic_probe1(dev);
 	if (err)
 		goto out;
+
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
 
-	printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out1:
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index f6745a893c82..dc690f287a6d 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -311,8 +311,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	int sr;
 	bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM;
 
-	printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. ");
-
 	/* Bogus probing, on the models which may or may not have
 	   Ethernet (BTW, the Ethernet *is* always at the same
 	   address, and nothing else lives there, at least if Apple's
@@ -322,13 +320,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 
 		card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS);
 		if (!card_present) {
-			printk("none.\n");
+			pr_info("Onboard/comm-slot SONIC not found\n");
 			return -ENODEV;
 		}
 	}
 
-	printk("yes\n");
-
 	/* Danger!  My arms are flailing wildly!  You *must* set lp->reg_offset
 	 * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */
 	dev->base_addr = ONBOARD_SONIC_REGISTERS;
@@ -341,14 +337,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 		printk(KERN_INFO "%s", version);
 		sonic_version_printed = 1;
 	}
-	printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n",
-	       dev_name(lp->device), dev->base_addr);
 
 	/* The PowerBook's SONIC is 16 bit always. */
 	if (macintosh_config->ident == MAC_MODEL_PB520) {
 		lp->reg_offset = 0;
 		lp->dma_bitmode = SONIC_BITMODE16;
-		sr = SONIC_READ(SONIC_SR);
 	} else if (commslot) {
 		/* Some of the comm-slot cards are 16 bit.  But some
 		   of them are not.  The 32-bit cards use offset 2 and
@@ -365,22 +358,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 		else {
 			lp->dma_bitmode = SONIC_BITMODE16;
 			lp->reg_offset = 0;
-			sr = SONIC_READ(SONIC_SR);
 		}
 	} else {
 		/* All onboard cards are at offset 2 with 32 bit DMA. */
 		lp->reg_offset = 2;
 		lp->dma_bitmode = SONIC_BITMODE32;
-		sr = SONIC_READ(SONIC_SR);
 	}
-	printk(KERN_INFO
-	       "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-	       dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-	printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-	       SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+	pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n",
+		SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16,
+		lp->reg_offset);
+
+	/* This is sometimes useful to find out how MacOS configured the card */
+	pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+		 SONIC_READ(SONIC_DCR) & 0xffff,
+		 SONIC_READ(SONIC_DCR2) & 0xffff);
 
 	/* Software reset, then initialize control registers. */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -401,6 +393,9 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	/* Now look for the MAC address. */
 	mac_onboard_sonic_ethernet_addr(dev);
 
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
 	/* Shared init code */
 	return macsonic_init(dev);
 }
@@ -508,15 +503,15 @@ static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
 		printk(KERN_INFO "%s", version);
 		sonic_version_printed = 1;
 	}
-	printk(KERN_INFO "%s: %s in slot %X\n",
-	       dev_name(lp->device), board->name, board->slot);
-	printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n",
-	       dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset);
 
-#if 0 /* This is sometimes useful to find out how MacOS configured the card. */
-	printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device),
-	       SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff);
-#endif
+	dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
+		 board->name, SONIC_READ(SONIC_SR),
+		 lp->dma_bitmode ? 32 : 16, lp->reg_offset);
+
+	/* This is sometimes useful to find out how MacOS configured the card */
+	dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__,
+		SONIC_READ(SONIC_DCR) & 0xffff,
+		SONIC_READ(SONIC_DCR2) & 0xffff);
 
 	/* Software reset, then initialize control registers. */
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
@@ -564,8 +559,6 @@ static int mac_sonic_platform_probe(struct platform_device *pdev)
 	if (err)
 		goto out;
 
-	printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out:
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 1817deea98a4..d1659c75cce8 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -155,8 +155,8 @@ static int __init sonic_probe1(struct net_device *dev)
 		i++;
 
 	if (known_revisions[i] == 0xffff) {
-		printk("SONIC ethernet controller not found (0x%4x)\n",
-				silicon_revision);
+		pr_info("SONIC ethernet controller not found (0x%4x)\n",
+			silicon_revision);
 		return -ENODEV;
 	}
 
@@ -273,12 +273,13 @@ int xtsonic_probe(struct platform_device *pdev)
 
 	if ((err = sonic_probe1(dev)))
 		goto out;
+
+	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
+		dev->base_addr, dev->dev_addr, dev->irq);
+
 	if ((err = register_netdev(dev)))
 		goto out1;
 
-	printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name,
-	       dev->base_addr, dev->dev_addr, dev->irq);
-
 	return 0;
 
 out1:

From 995b2a65232f9bd54d8e41a7669436a93698bf01 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sat, 24 Feb 2018 18:27:25 -0500
Subject: [PATCH 0342/1678] net/sonic: Replace custom debug logging with
 netif_* calls

Eliminate duplicated debug code by moving it into the core driver.
Don't log the only valid silicon revision number (it's in the source).

Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Chris Zankel <chris@zankel.net>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/natsemi/jazzsonic.c | 19 +----
 drivers/net/ethernet/natsemi/macsonic.c  | 25 +-----
 drivers/net/ethernet/natsemi/sonic.c     | 99 ++++++++++++------------
 drivers/net/ethernet/natsemi/sonic.h     |  2 +
 drivers/net/ethernet/natsemi/xtsonic.c   | 19 +----
 5 files changed, 61 insertions(+), 103 deletions(-)

diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index 58495316d318..51fa82b429a3 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -60,14 +60,6 @@ do {									\
 	*((volatile unsigned int *)dev->base_addr+(reg)) = (val);		\
 } while (0)
 
-
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 
 static int sonic_probe1(struct net_device *dev)
 {
-	static unsigned version_printed;
 	unsigned int silicon_revision;
 	unsigned int val;
 	struct sonic_local *lp = netdev_priv(dev);
@@ -133,9 +124,6 @@ static int sonic_probe1(struct net_device *dev)
 	 * the expected location.
 	 */
 	silicon_revision = SONIC_READ(SONIC_SR);
-	if (sonic_debug > 1)
-		printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
 	i = 0;
 	while (known_revisions[i] != 0xffff &&
 	       known_revisions[i] != silicon_revision)
@@ -147,9 +135,6 @@ static int sonic_probe1(struct net_device *dev)
 		goto out;
 	}
 
-	if (sonic_debug  &&  version_printed++ == 0)
-		printk(version);
-
 	/*
 	 * Put the sonic into software reset, then
 	 * retrieve and print the ethernet address.
@@ -246,6 +231,8 @@ static int jazz_sonic_probe(struct platform_device *pdev)
 	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
 		dev->base_addr, dev->dev_addr, dev->irq);
 
+	sonic_msg_init(dev);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
@@ -261,8 +248,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Jazz SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)");
 MODULE_ALIAS("platform:jazzsonic");
 
 #include "sonic.c"
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index dc690f287a6d..0937fc2a928e 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -70,15 +70,6 @@
 #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \
 	      + lp->reg_offset))
 
-/* use 0 for production, 1 for verification, >1 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
-static int sonic_version_printed;
-
 /* For onboard SONIC */
 #define ONBOARD_SONIC_REGISTERS	0x50F0A000
 #define ONBOARD_SONIC_PROM_BASE	0x50f08000
@@ -333,11 +324,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev)
 	else
 		dev->irq = IRQ_NUBUS_9;
 
-	if (!sonic_version_printed) {
-		printk(KERN_INFO "%s", version);
-		sonic_version_printed = 1;
-	}
-
 	/* The PowerBook's SONIC is 16 bit always. */
 	if (macintosh_config->ident == MAC_MODEL_PB520) {
 		lp->reg_offset = 0;
@@ -499,11 +485,6 @@ static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id,
 	lp->dma_bitmode = dma_bitmode;
 	dev->irq = SLOT2IRQ(board->slot);
 
-	if (!sonic_version_printed) {
-		printk(KERN_INFO "%s", version);
-		sonic_version_printed = 1;
-	}
-
 	dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n",
 		 board->name, SONIC_READ(SONIC_SR),
 		 lp->dma_bitmode ? 32 : 16, lp->reg_offset);
@@ -555,6 +536,8 @@ static int mac_sonic_platform_probe(struct platform_device *pdev)
 	if (err)
 		goto out;
 
+	sonic_msg_init(dev);
+
 	err = register_netdev(dev);
 	if (err)
 		goto out;
@@ -568,8 +551,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Macintosh SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)");
 MODULE_ALIAS("platform:macsonic");
 
 #include "sonic.c"
@@ -633,6 +614,8 @@ static int mac_sonic_nubus_probe(struct nubus_board *board)
 	if (err)
 		goto out;
 
+	sonic_msg_init(ndev);
+
 	err = register_netdev(ndev);
 	if (err)
 		goto out;
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 612c7a44b26c..7ed08486ae23 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -33,7 +33,21 @@
  * the NetBSD file "sys/arch/mac68k/dev/if_sn.c".
  */
 
+static unsigned int version_printed;
 
+static int sonic_debug = -1;
+module_param(sonic_debug, int, 0);
+MODULE_PARM_DESC(sonic_debug, "debug message level");
+
+static void sonic_msg_init(struct net_device *dev)
+{
+	struct sonic_local *lp = netdev_priv(dev);
+
+	lp->msg_enable = netif_msg_init(sonic_debug, 0);
+
+	if (version_printed++ == 0)
+		netif_dbg(lp, drv, dev, "%s", version);
+}
 
 /*
  * Open/initialize the SONIC controller.
@@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
-	if (sonic_debug > 2)
-		printk("sonic_open: initializing sonic driver.\n");
+	netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
 
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2);
@@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev)
 
 	netif_start_queue(dev);
 
-	if (sonic_debug > 2)
-		printk("sonic_open: Initialization done.\n");
+	netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__);
 
 	return 0;
 }
@@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev)
 	struct sonic_local *lp = netdev_priv(dev);
 	int i;
 
-	if (sonic_debug > 2)
-		printk("sonic_close\n");
+	netif_dbg(lp, ifdown, dev, "%s\n", __func__);
 
 	netif_stop_queue(dev);
 
@@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	int length;
 	int entry = lp->next_tx;
 
-	if (sonic_debug > 2)
-		printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev);
+	netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
 
 	length = skb->len;
 	if (length < ETH_ZLEN) {
@@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
 	lp->next_tx = (entry + 1) & SONIC_TDS_MASK;
 	if (lp->tx_skb[lp->next_tx] != NULL) {
 		/* The ring is full, the ISR has yet to process the next TD. */
-		if (sonic_debug > 3)
-			printk("%s: stopping queue\n", dev->name);
+		netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__);
 		netif_stop_queue(dev);
 		/* after this packet, wait for ISR to free up some TDAs */
 	} else netif_start_queue(dev);
 
-	if (sonic_debug > 2)
-		printk("sonic_send_packet: issuing Tx command\n");
+	netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
 
@@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
 	do {
 		if (status & SONIC_INT_PKTRX) {
-			if (sonic_debug > 2)
-				printk("%s: packet rx\n", dev->name);
+			netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__);
 			sonic_rx(dev);	/* got packet(s) */
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */
 		}
@@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 			 *   still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear)
 			 */
 
-			if (sonic_debug > 2)
-				printk("%s: tx done\n", dev->name);
+			netif_dbg(lp, intr, dev, "%s: tx done\n", __func__);
 
 			while (lp->tx_skb[entry] != NULL) {
 				if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0)
@@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 		 * check error conditions
 		 */
 		if (status & SONIC_INT_RFO) {
-			if (sonic_debug > 1)
-				printk("%s: rx fifo overrun\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n",
+				  __func__);
 			lp->stats.rx_fifo_errors++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RDE) {
-			if (sonic_debug > 1)
-				printk("%s: rx descriptors exhausted\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n",
+				  __func__);
 			lp->stats.rx_dropped++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */
 		}
 		if (status & SONIC_INT_RBAE) {
-			if (sonic_debug > 1)
-				printk("%s: rx buffer area exceeded\n", dev->name);
+			netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n",
+				  __func__);
 			lp->stats.rx_dropped++;
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */
 		}
@@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id)
 
 		/* transmit error */
 		if (status & SONIC_INT_TXER) {
-			if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2))
-				printk(KERN_ERR "%s: tx fifo underrun\n", dev->name);
+			if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU)
+				netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n",
+					  __func__);
 			SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */
 		}
 
@@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev)
 			if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff;
 			SONIC_WRITE(SONIC_RWP, lp->cur_rwp);
 			if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) {
-				if (sonic_debug > 2)
-					printk("%s: rx buffer exhausted\n", dev->name);
+				netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n",
+					  __func__);
 				SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */
 			}
 		} else
@@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev)
 		    (netdev_mc_count(dev) > 15)) {
 			rcr |= SONIC_RCR_AMC;
 		} else {
-			if (sonic_debug > 2)
-				printk("sonic_multicast_list: mc_count %d\n",
-				       netdev_mc_count(dev));
+			netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__,
+				  netdev_mc_count(dev));
 			sonic_set_cam_enable(dev, 1);  /* always enable our own address */
 			i = 1;
 			netdev_for_each_mc_addr(ha, dev) {
@@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev)
 		}
 	}
 
-	if (sonic_debug > 2)
-		printk("sonic_multicast_list: setting RCR=%x\n", rcr);
+	netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr);
 
 	SONIC_WRITE(SONIC_RCR, rcr);
 }
@@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev)
 	/*
 	 * initialize the receive resource area
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize receive resource area\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n",
+		  __func__);
 
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff;
@@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev)
 	SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
 
 	/* load the resource pointers */
-	if (sonic_debug > 3)
-		printk("sonic_init: issuing RRRA command\n");
+	netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
 
 	SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA);
 	i = 0;
@@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev)
 			break;
 	}
 
-	if (sonic_debug > 2)
-		printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i);
+	netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__,
+		  SONIC_READ(SONIC_CMD), i);
 
 	/*
 	 * Initialize the receive descriptors so that they
 	 * become a circular linked list, ie. let the last
 	 * descriptor point to the first again.
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize receive descriptors\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n",
+		  __func__);
+
 	for (i=0; i<SONIC_NUM_RDS; i++) {
 		sonic_rda_put(dev, i, SONIC_RD_STATUS, 0);
 		sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0);
@@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev)
 	/*
 	 * initialize transmit descriptors
 	 */
-	if (sonic_debug > 2)
-		printk("sonic_init: initialize transmit descriptors\n");
+	netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n",
+		  __func__);
+
 	for (i = 0; i < SONIC_NUM_TDS; i++) {
 		sonic_tda_put(dev, i, SONIC_TD_STATUS, 0);
 		sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0);
@@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev)
 		if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD)
 			break;
 	}
-	if (sonic_debug > 2) {
-		printk("sonic_init: CMD=%x, ISR=%x\n, i=%d",
-		       SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
-	}
+	netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__,
+		  SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i);
 
 	/*
 	 * enable receiver, disable loopback
@@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev)
 	if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0)
 		printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd);
 
-	if (sonic_debug > 2)
-		printk("sonic_init: new status=%x\n",
-		       SONIC_READ(SONIC_CMD));
+	netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__,
+		  SONIC_READ(SONIC_CMD));
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 421b1a283fed..2b27f7049acb 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -319,6 +319,7 @@ struct sonic_local {
 	unsigned int eol_rx;
 	unsigned int eol_tx;           /* last unacked transmit packet */
 	unsigned int next_tx;          /* next free TD */
+	int msg_enable;
 	struct device *device;         /* generic device */
 	struct net_device_stats stats;
 };
@@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev);
 static void sonic_multicast_list(struct net_device *dev);
 static int sonic_init(struct net_device *dev);
 static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_msg_init(struct net_device *dev);
 
 /* Internal inlines for reading/writing DMA buffers.  Note that bus
    size and endianness matter here, whereas they don't for registers,
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index d1659c75cce8..e1b886e87a76 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf);
 #define SONIC_WRITE(reg,val) \
 	*((volatile unsigned int *)dev->base_addr+reg) = val
 
-
-/* Use 0 for production, 1 for verification, and >2 for debug */
-#ifdef SONIC_DEBUG
-static unsigned int sonic_debug = SONIC_DEBUG;
-#else
-static unsigned int sonic_debug = 1;
-#endif
-
 /*
  * We cannot use station (ethernet) address prefixes to detect the
  * sonic controller since these are board manufacturer depended.
@@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 
 static int __init sonic_probe1(struct net_device *dev)
 {
-	static unsigned version_printed = 0;
 	unsigned int silicon_revision;
 	struct sonic_local *lp = netdev_priv(dev);
 	unsigned int base_addr = dev->base_addr;
@@ -146,9 +137,6 @@ static int __init sonic_probe1(struct net_device *dev)
 	 * the expected location.
 	 */
 	silicon_revision = SONIC_READ(SONIC_SR);
-	if (sonic_debug > 1)
-		printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision);
-
 	i = 0;
 	while ((known_revisions[i] != 0xffff) &&
 			(known_revisions[i] != silicon_revision))
@@ -160,9 +148,6 @@ static int __init sonic_probe1(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	if (sonic_debug  &&  version_printed++ == 0)
-		printk(version);
-
 	/*
 	 * Put the sonic into software reset, then retrieve ethernet address.
 	 * Note: we are assuming that the boot-loader has initialized the cam.
@@ -277,6 +262,8 @@ int xtsonic_probe(struct platform_device *pdev)
 	pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n",
 		dev->base_addr, dev->dev_addr, dev->irq);
 
+	sonic_msg_init(dev);
+
 	if ((err = register_netdev(dev)))
 		goto out1;
 
@@ -291,8 +278,6 @@ out:
 }
 
 MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver");
-module_param(sonic_debug, int, 0);
-MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)");
 
 #include "sonic.c"
 

From c3880bd159d431d06b687b0b5ab22e24e6ef0070 Mon Sep 17 00:00:00 2001
From: Mariusz Stachura <mariusz.stachura@intel.com>
Date: Tue, 14 Nov 2017 07:00:50 -0500
Subject: [PATCH 0343/1678] i40e: link_down_on_close private flag support

This patch introduces new ethtool private flag used for
forcing true link state. Function i40e_force_link_state that implements
this functionality was added, it sets phy_type = 0 in order to
work-around firmware's LESM. False positive error messages were
suppressed.

The ndo_open() should not succeed if there were issues with forcing link
state to be UP.

Added I40E_PHY_TYPES_BITMASK define with all phy types OR-ed together in
one bitmask.  Added after phy type definition, so it will be hard to
forget to include new phy types to the bitmask.

Signed-off-by: Mariusz Stachura <mariusz.stachura@intel.com>
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_adminq_cmd.h | 37 +++++++++
 .../net/ethernet/intel/i40e/i40e_ethtool.c    |  2 +
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 75 +++++++++++++++++++
 3 files changed, 114 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index a852775d3059..0dfc52772c45 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_DEFAULT			= 0xFF,
 };
 
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+				BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XFI) | \
+				BIT_ULL(I40E_PHY_TYPE_SFI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+				BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+				BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+				BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+
 #define I40E_LINK_SPEED_100MB_SHIFT	0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT	0x2
 #define I40E_LINK_SPEED_10GB_SHIFT	0x3
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0dcbbda164c4..89807e32a898 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
 	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
 	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+	I40E_PRIV_FLAG("link-down-on-close",
+		       I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
 	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
 	I40E_PRIV_FLAG("disable-source-pruning",
 		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f6d37456f3b7..be9a1467a1a1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6546,6 +6546,75 @@ int i40e_up(struct i40e_vsi *vsi)
 	return err;
 }
 
+/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_aq_set_phy_config config = {0};
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status err;
+	u64 mask;
+
+	/* Get the current phy config */
+	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %s last_status =  %s\n",
+			i40e_stat_str(hw, err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+
+	/* If link needs to go up, but was not forced to go down,
+	 * no need for a flap
+	 */
+	if (is_up && abilities.phy_type != 0)
+		return I40E_SUCCESS;
+
+	/* To force link we need to set bits for all supported PHY types,
+	 * but there are now more than 32, so we need to split the bitmap
+	 * across two fields.
+	 */
+	mask = I40E_PHY_TYPES_BITMASK;
+	config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+	config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0;
+	/* Copy the old settings, except of phy_type */
+	config.abilities = abilities.abilities;
+	config.link_speed = abilities.link_speed;
+	config.eee_capability = abilities.eee_capability;
+	config.eeer = abilities.eeer_val;
+	config.low_power_ctrl = abilities.d3_lpan;
+	err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"set phy config ret =  %s last_status =  %s\n",
+			i40e_stat_str(&pf->hw, err),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return err;
+	}
+
+	/* Update the link info */
+	err = i40e_update_link_info(hw);
+	if (err) {
+		/* Wait a little bit (on 40G cards it sometimes takes a really
+		 * long time for link to come back from the atomic reset)
+		 * and try once more
+		 */
+		msleep(1000);
+		i40e_update_link_info(hw);
+	}
+
+	i40e_aq_set_link_restart_an(hw, true, NULL);
+
+	return I40E_SUCCESS;
+}
+
 /**
  * i40e_down - Shutdown the connection processing
  * @vsi: the VSI being stopped
@@ -6563,6 +6632,9 @@ void i40e_down(struct i40e_vsi *vsi)
 	}
 	i40e_vsi_disable_irq(vsi);
 	i40e_vsi_stop_rings(vsi);
+	if (vsi->type == I40E_VSI_MAIN &&
+	    vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED)
+		i40e_force_link_state(vsi->back, false);
 	i40e_napi_disable_all(vsi);
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -7524,6 +7596,9 @@ int i40e_open(struct net_device *netdev)
 
 	netif_carrier_off(netdev);
 
+	if (i40e_force_link_state(pf, true))
+		return -EAGAIN;
+
 	err = i40e_vsi_open(vsi);
 	if (err)
 		return err;

From 08009a760213cf6125af9453a51203f4ae108ba1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 24 Feb 2018 21:20:33 +0300
Subject: [PATCH 0344/1678] net: make kmem caches as __ro_after_init

All kmem caches aren't reallocated once set up.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 2 +-
 net/core/skbuff.c        | 4 ++--
 net/ipv4/fib_trie.c      | 5 +++--
 net/ipv4/inetpeer.c      | 3 ++-
 net/ipv4/ipmr.c          | 3 ++-
 net/socket.c             | 2 +-
 6 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 27a55236ad64..690e78c6af45 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -362,7 +362,7 @@ static void dec_net_namespaces(struct ucounts *ucounts)
 	dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 }
 
-static struct kmem_cache *net_cachep;
+static struct kmem_cache *net_cachep __ro_after_init;
 static struct workqueue_struct *netns_wq;
 
 static struct net *net_alloc(void)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1a7485a2cdfa..96d36b81a3a5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -77,8 +77,8 @@
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
 
-struct kmem_cache *skbuff_head_cache __read_mostly;
-static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+struct kmem_cache *skbuff_head_cache __ro_after_init;
+static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 EXPORT_SYMBOL(sysctl_max_skb_frags);
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5530cd6fdbc7..62243a8abf92 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,6 +50,7 @@
 
 #define VERSION "0.409"
 
+#include <linux/cache.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
@@ -191,8 +192,8 @@ static size_t tnode_free_size;
  */
 static const int sync_pages = 128;
 
-static struct kmem_cache *fn_alias_kmem __read_mostly;
-static struct kmem_cache *trie_leaf_kmem __read_mostly;
+static struct kmem_cache *fn_alias_kmem __ro_after_init;
+static struct kmem_cache *trie_leaf_kmem __ro_after_init;
 
 static inline struct tnode *tn_info(struct key_vector *kv)
 {
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 914d56928578..1f04bd91fc2e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -6,6 +6,7 @@
  *  Authors:	Andrey V. Savochkin <saw@msu.ru>
  */
 
+#include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -51,7 +52,7 @@
  *		daddr: unchangeable
  */
 
-static struct kmem_cache *peer_cachep __read_mostly;
+static struct kmem_cache *peer_cachep __ro_after_init;
 
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7c7ac9d32e77..591d1fc80a1f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -28,6 +28,7 @@
 
 #include <linux/uaccess.h>
 #include <linux/types.h>
+#include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -96,7 +97,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
  * In this case data path is free of exclusive locks at all.
  */
 
-static struct kmem_cache *mrt_cachep __read_mostly;
+static struct kmem_cache *mrt_cachep __ro_after_init;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static void ipmr_free_table(struct mr_table *mrt);
diff --git a/net/socket.c b/net/socket.c
index ab58e57c09ca..645d32b4872c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -233,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 	return __put_user(klen, ulen);
 }
 
-static struct kmem_cache *sock_inode_cachep __read_mostly;
+static struct kmem_cache *sock_inode_cachep __ro_after_init;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
 {

From 8cd5fe62cc5a2aca1c698c28baa46ac6931ba11f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Jab=C5=82o=C5=84ski?=
 <pawel.jablonski@intel.com>
Date: Mon, 5 Feb 2018 13:03:36 -0800
Subject: [PATCH 0345/1678] i40evf: Fix double locking the same resource
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes the locking of adapter->mac_vlan_list_lock resource in
i40evf_add_filter(). The locking part is moved above i40evf_add_filter().
i40evf_add_filter(), called by i40evf_addr_sync(), was trying to lock the
resource again and double locking generated a kernel panic after bringing
an interface up.

Fixes: 8946b56354b7 ("i40evf: use __dev_[um]c_sync routines in
       .set_rx_mode")
Signed-off-by: Paweł Jabłoński <pawel.jablonski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 4955ce3ab6a2..4f5744d488aa 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -815,13 +815,11 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 	if (!macaddr)
 		return NULL;
 
-	spin_lock_bh(&adapter->mac_vlan_list_lock);
-
 	f = i40evf_find_filter(adapter, macaddr);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
 		if (!f)
-			goto clearout;
+			return f;
 
 		ether_addr_copy(f->macaddr, macaddr);
 
@@ -832,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
 		f->remove = false;
 	}
 
-clearout:
-	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 	return f;
 }
 
@@ -868,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p)
 		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
 	}
 
+	f = i40evf_add_filter(adapter, addr->sa_data);
+
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
-	f = i40evf_add_filter(adapter, addr->sa_data);
 	if (f) {
 		ether_addr_copy(hw->mac.addr, addr->sa_data);
 		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
@@ -3040,7 +3037,12 @@ static int i40evf_open(struct net_device *netdev)
 	if (err)
 		goto err_req_irq;
 
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
 	i40evf_add_filter(adapter, adapter->hw.mac.addr);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
 	i40evf_configure(adapter);
 
 	i40evf_up_complete(adapter);

From 04d4105174349dceccf9545a3e5e421c18f2cc56 Mon Sep 17 00:00:00 2001
From: Alan Brady <alan.brady@intel.com>
Date: Mon, 12 Feb 2018 09:16:59 -0500
Subject: [PATCH 0346/1678] i40e/i40evf: use SW variables for hang detection

The i40e_detect_recover_hung function uses the i40e_get_tx_pending
function to determine if there are packets stalled on the ring.
i40e_get_tx_pending calculates the pending packets using the head
writeback value and HW tail.  If the queue is stopped and we lose the
interrupt to update our next_to_clean then we a) won't get another
interrupt to clean because queue is stopped b) we won't catch the
problem with i40e_detect_recover_hung because the HW values look like
there's no packets waiting to be transmitted.  Using the SW values we
can catch the issue because next_to_clean will be out of sync with head
writeback.

This has the added benefit being less CPU intensive because we don't
need to reach into the hardware to get the values.

Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 16 +++++++++++-----
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  2 +-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c |  2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 1ec9b1d8023d..97cfe944b568 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
 /**
  * i40e_get_tx_pending - how many tx descriptors not processed
  * @tx_ring: the ring of descriptors
+ * @in_sw: use SW variables
  *
  * Since there is no access to the ring head register
  * in XL710, we need to use our local copies
  **/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 {
 	u32 head, tail;
 
-	head = i40e_get_head(ring);
-	tail = readl(ring->tail);
+	if (!in_sw) {
+		head = i40e_get_head(ring);
+		tail = readl(ring->tail);
+	} else {
+		head = ring->next_to_clean;
+		tail = ring->next_to_use;
+	}
 
 	if (head != tail)
 		return (head < tail) ?
@@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			    i40e_get_tx_pending(tx_ring) ? packets : -1;
+			    i40e_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }
@@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		 * them to be written back in case we stay in NAPI.
 		 * In this mode on X722 we do not enable Interrupt.
 		 */
-		unsigned int j = i40e_get_tx_pending(tx_ring);
+		unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
 		if (budget &&
 		    ((j / WB_STRIDE) == 0) && (j > 0) &&
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index f75a8fe68fcf..3c80ea784389 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -505,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring);
 void i40e_free_rx_resources(struct i40e_ring *rx_ring);
 int i40e_napi_poll(struct napi_struct *napi, int budget);
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
-u32 i40e_get_tx_pending(struct i40e_ring *ring);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index eb8f3e327f6b..e088d23eb083 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
 			 */
 			smp_rmb();
 			tx_ring->tx_stats.prev_pkt_ctr =
-			  i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+			  i40evf_get_tx_pending(tx_ring, true) ? packets : -1;
 		}
 	}
 }

From e85c1b8234e65134ccf801938f98352a43263a9f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Feb 2018 19:26:05 +0000
Subject: [PATCH 0347/1678] i40evf: pass struct virtchnl_filter by reference
 rather than by value

Passing struct virtchnl_filter f by value requires a 272 byte copy
on x86_64, so instead pass it by reference is much more efficient. Also
adjust some lines that are over 80 chars.

Detected by CoverityScan, CID#1465285 ("Big parameter passed by value")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../ethernet/intel/i40evf/i40evf_virtchnl.c   | 32 +++++++++++--------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 6134b61e0938..3c76c817ca1a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1048,24 +1048,28 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter)
  * Print the cloud filter
  **/
 static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter,
-				      struct virtchnl_filter f)
+				      struct virtchnl_filter *f)
 {
-	switch (f.flow_type) {
+	switch (f->flow_type) {
 	case VIRTCHNL_TCP_V4_FLOW:
 		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
-			 &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
-			 ntohs(f.data.tcp_spec.vlan_id),
-			 &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0],
-			 ntohs(f.data.tcp_spec.dst_port),
-			 ntohs(f.data.tcp_spec.src_port));
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip[0],
+			 &f->data.tcp_spec.src_ip[0],
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
 		break;
 	case VIRTCHNL_TCP_V6_FLOW:
 		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
-			 &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac,
-			 ntohs(f.data.tcp_spec.vlan_id),
-			 &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip,
-			 ntohs(f.data.tcp_spec.dst_port),
-			 ntohs(f.data.tcp_spec.src_port));
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip,
+			 &f->data.tcp_spec.src_ip,
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
 		break;
 	}
 }
@@ -1303,7 +1307,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 						 i40evf_stat_str(&adapter->hw,
 								 v_retval));
 					i40evf_print_cloud_filter(adapter,
-								  cf->f);
+								  &cf->f);
 					list_del(&cf->list);
 					kfree(cf);
 					adapter->num_cloud_filters--;
@@ -1322,7 +1326,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 						 i40evf_stat_str(&adapter->hw,
 								 v_retval));
 					i40evf_print_cloud_filter(adapter,
-								  cf->f);
+								  &cf->f);
 				}
 			}
 			}

From 46345b38e917bd2f27b3730adaa67e4160e1d94d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Feb 2018 19:50:52 +0000
Subject: [PATCH 0348/1678] i40e: check that pointer VSI is not null before
 dereferencing it

Function i40e_find_vsi_from_id can potentially return null, hence
VSI may be null, so defensively check it is non-null before
dereferencing it to check the seid.

Fixes: e284fc280473 ("i40e: Add and delete cloud filter")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Avinash Dayanand <avinash.dayanand@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 5cca083da93c..14bbd5c1db78 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3062,7 +3062,7 @@ static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
 
 	for (i = 0; i < vf->num_tc ; i++) {
 		vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
-		if (vsi->seid == seid)
+		if (vsi && vsi->seid == seid)
 			return vsi;
 	}
 	return NULL;

From deb9a9ad3ec100fa7169d0ddfbd24b1520bf26a3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 19 Feb 2018 10:23:30 +0000
Subject: [PATCH 0349/1678] i40evf: remove redundant array comparisons to 0
 checks

The checks to see if key->dst.s6_addr and key->src.s6_addr are null
pointers are redundant because these are constant size arrays and
so the checks always return true.  Fix this by removing the redundant
checks.   Also replace filter->f with vf, allowing wide lines to be
condensed and to rejoin some split wide lines.

Detected by CoverityScan, CID#1465279 ("Array compared to 0")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 61 ++++++++-----------
 1 file changed, 25 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 4f5744d488aa..dae121877935 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2490,6 +2490,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 	u16 addr_type = 0;
 	u16 n_proto = 0;
 	int i = 0;
+	struct virtchnl_filter *vf = &filter->f;
 
 	if (f->dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -2537,7 +2538,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 			return -EINVAL;
 		if (n_proto == ETH_P_IPV6) {
 			/* specify flow type as TCP IPv6 */
-			filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW;
+			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
 		}
 
 		if (key->ip_proto != IPPROTO_TCP) {
@@ -2582,9 +2583,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 			    is_multicast_ether_addr(key->dst)) {
 				/* set the mask if a valid dst_mac address */
 				for (i = 0; i < ETH_ALEN; i++)
-					filter->f.mask.tcp_spec.dst_mac[i] |=
-									0xff;
-				ether_addr_copy(filter->f.data.tcp_spec.dst_mac,
+					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.dst_mac,
 						key->dst);
 			}
 
@@ -2593,9 +2593,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 			    is_multicast_ether_addr(key->src)) {
 				/* set the mask if a valid dst_mac address */
 				for (i = 0; i < ETH_ALEN; i++)
-					filter->f.mask.tcp_spec.src_mac[i] |=
-									0xff;
-				ether_addr_copy(filter->f.data.tcp_spec.src_mac,
+					vf->mask.tcp_spec.src_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.src_mac,
 						key->src);
 		}
 	}
@@ -2619,8 +2618,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 				return I40E_ERR_CONFIG;
 			}
 		}
-		filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
-		filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+		vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
@@ -2667,14 +2666,12 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 			return I40E_ERR_CONFIG;
 		}
 		if (key->dst) {
-			filter->f.mask.tcp_spec.dst_ip[0] |=
-							cpu_to_be32(0xffffffff);
-			filter->f.data.tcp_spec.dst_ip[0] = key->dst;
+			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.dst_ip[0] = key->dst;
 		}
 		if (key->src) {
-			filter->f.mask.tcp_spec.src_ip[0] |=
-							cpu_to_be32(0xffffffff);
-			filter->f.data.tcp_spec.src_ip[0] = key->src;
+			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.src_ip[0] = key->src;
 		}
 	}
 
@@ -2707,22 +2704,14 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
 			field_flags |= I40EVF_CLOUD_FIELD_IIP;
 
-		if (key->dst.s6_addr) {
-			for (i = 0; i < 4; i++)
-				filter->f.mask.tcp_spec.dst_ip[i] |=
-							cpu_to_be32(0xffffffff);
-			memcpy(&filter->f.data.tcp_spec.dst_ip,
-			       &key->dst.s6_addr32,
-			       sizeof(filter->f.data.tcp_spec.dst_ip));
-		}
-		if (key->src.s6_addr) {
-			for (i = 0; i < 4; i++)
-				filter->f.mask.tcp_spec.src_ip[i] |=
-							cpu_to_be32(0xffffffff);
-			memcpy(&filter->f.data.tcp_spec.src_ip,
-			       &key->src.s6_addr32,
-			       sizeof(filter->f.data.tcp_spec.src_ip));
-		}
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+		       sizeof(vf->data.tcp_spec.dst_ip));
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+		       sizeof(vf->data.tcp_spec.src_ip));
 	}
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
 		struct flow_dissector_key_ports *key =
@@ -2754,16 +2743,16 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 			}
 		}
 		if (key->dst) {
-			filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
-			filter->f.data.tcp_spec.dst_port = key->dst;
+			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.dst_port = key->dst;
 		}
 
 		if (key->src) {
-			filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
-			filter->f.data.tcp_spec.src_port = key->dst;
+			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.src_port = key->dst;
 		}
 	}
-	filter->f.field_flags = field_flags;
+	vf->field_flags = field_flags;
 
 	return 0;
 }

From 5dd3691c9828abd2e479cc9339d964d76e318c3b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 21 Feb 2018 15:53:53 +0300
Subject: [PATCH 0350/1678] i40e: remove some stray indenting

These two lines are indented too far.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 14bbd5c1db78..e23975c67417 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3146,8 +3146,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
 		dev_info(&pf->pdev->dev,
 			 "VF %d: Invalid input, can't apply cloud filter\n",
 			 vf->vf_id);
-			aq_ret = I40E_ERR_PARAM;
-			goto err;
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
 	}
 
 	memset(&cfilter, 0, sizeof(cfilter));

From fe8d662aef26394388bfcd3b96ce123b6d33044b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Feb 2018 22:34:32 +0100
Subject: [PATCH 0351/1678] bpf: unify rlimit handling in selftests

Unify memlock handling into bpf_rlimit.h and replace all occurences
in BPF kselftests with it.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_rlimit.h      | 28 +++++++++++++++++++
 tools/testing/selftests/bpf/test_align.c      |  6 +---
 tools/testing/selftests/bpf/test_dev_cgroup.c |  6 +---
 tools/testing/selftests/bpf/test_lpm_map.c    | 14 ++--------
 tools/testing/selftests/bpf/test_lru_map.c    |  6 ++--
 tools/testing/selftests/bpf/test_maps.c       |  7 ++---
 tools/testing/selftests/bpf/test_progs.c      |  7 ++---
 tools/testing/selftests/bpf/test_tag.c        |  4 +--
 .../testing/selftests/bpf/test_tcpbpf_user.c  |  6 +---
 tools/testing/selftests/bpf/test_verifier.c   |  6 +---
 .../testing/selftests/bpf/test_verifier_log.c |  8 ++----
 11 files changed, 44 insertions(+), 54 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_rlimit.h

diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644
index 000000000000..9dac9b30f8ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rlimit.h
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+	struct rlimit rlim_old, rlim_new = {
+		.rlim_cur	= RLIM_INFINITY,
+		.rlim_max	= RLIM_INFINITY,
+	};
+
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+	/* For the sake of running the test cases, we temporarily
+	 * set rlimit to infinity in order for kernel to focus on
+	 * errors from actual test cases and not getting noise
+	 * from hitting memlock limits. The limit is on per-process
+	 * basis and not a global one, hence destructor not really
+	 * needed here.
+	 */
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+		perror("Unable to lift memlock rlimit");
+		/* Trying out lower limit, but expect potential test
+		 * case failures from this!
+		 */
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
+}
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index ff8bd7e3e50c..6b1b302310fe 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -9,8 +9,6 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-#include <sys/resource.h>
-
 #include <linux/unistd.h>
 #include <linux/filter.h>
 #include <linux/bpf_perf_event.h>
@@ -19,6 +17,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
@@ -702,9 +701,6 @@ static int do_test(unsigned int from, unsigned int to)
 int main(int argc, char **argv)
 {
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
 
 	if (argc == 3) {
 		unsigned int l = atoi(argv[argc - 2]);
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index 3489cc283433..9c8b50bac7e0 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -11,13 +11,13 @@
 #include <errno.h>
 #include <assert.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
@@ -25,15 +25,11 @@
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	struct bpf_object *obj;
 	int error = EXIT_FAILURE;
 	int prog_fd, cgroup_fd;
 	__u32 prog_cnt;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
 			  &obj, &prog_fd)) {
 		printf("Failed to load DEV_CGROUP program\n");
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 2be87e9ee28d..147e34cfceb7 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -22,10 +22,11 @@
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 struct tlpm_node {
 	struct tlpm_node *next;
@@ -736,17 +737,11 @@ static void test_lpm_multi_thread(void)
 
 int main(void)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
-	int i, ret;
+	int i;
 
 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);
 
-	/* allow unlimited locked memory */
-	ret = setrlimit(RLIMIT_MEMLOCK, &limit);
-	if (ret < 0)
-		perror("Unable to lift memlock rlimit");
-
 	test_lpm_basic();
 	test_lpm_order();
 
@@ -755,11 +750,8 @@ int main(void)
 		test_lpm_map(i);
 
 	test_lpm_ipaddr();
-
 	test_lpm_delete();
-
 	test_lpm_get_next_key();
-
 	test_lpm_multi_thread();
 
 	printf("test_lpm: OK\n");
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 8c10c9180c1a..781c7de343be 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -16,10 +16,11 @@
 #include <time.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <bpf/bpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 #define LOCAL_FREE_TARGET	(128)
 #define PERCPU_FREE_TARGET	(4)
@@ -613,7 +614,6 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
 
 int main(int argc, char **argv)
 {
-	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
 			     BPF_MAP_TYPE_LRU_PERCPU_HASH};
 	int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
@@ -621,8 +621,6 @@ int main(int argc, char **argv)
 
 	setbuf(stdout, NULL);
 
-	assert(!setrlimit(RLIMIT_MEMLOCK, &r));
-
 	nr_cpus = bpf_num_possible_cpus();
 	assert(nr_cpus != -1);
 	printf("nr_cpus:%d\n\n", nr_cpus);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9e03a4c356a4..1238733c5b33 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -17,13 +17,14 @@
 #include <stdlib.h>
 
 #include <sys/wait.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 
 static int map_flags;
 
@@ -1126,10 +1127,6 @@ static void run_all_tests(void)
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
-
 	map_flags = 0;
 	run_all_tests();
 
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b549308abd19..27ad5404389e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -26,7 +26,6 @@ typedef __u16 __sum16;
 
 #include <sys/ioctl.h>
 #include <sys/wait.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <fcntl.h>
 
@@ -34,9 +33,11 @@ typedef __u16 __sum16;
 #include <linux/err.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+
 #include "test_iptunnel_common.h"
 #include "bpf_util.h"
 #include "bpf_endian.h"
+#include "bpf_rlimit.h"
 
 static int error_cnt, pass_cnt;
 
@@ -965,10 +966,6 @@ out:
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
-
 	test_pkt_access();
 	test_xdp();
 	test_l4lb_all();
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 8b201895c569..6272c784ca2a 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -12,7 +12,6 @@
 #include <assert.h>
 
 #include <sys/socket.h>
-#include <sys/resource.h>
 
 #include <linux/filter.h>
 #include <linux/bpf.h>
@@ -21,6 +20,7 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
 
@@ -184,11 +184,9 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
 
 int main(void)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 	uint32_t tests = 0;
 	int i, fd_map;
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
 	fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
 				sizeof(int), 1, BPF_F_NO_PREALLOC);
 	assert(fd_map > 0);
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 5d73db416460..84ab5163c828 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -12,13 +12,13 @@
 #include <linux/bpf.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "bpf_util.h"
+#include "bpf_rlimit.h"
 #include <linux/perf_event.h>
 #include "test_tcpbpf.h"
 
@@ -44,7 +44,6 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	const char *file = "test_tcpbpf_kern.o";
 	struct tcpbpf_globals g = {0};
 	int cg_fd, prog_fd, map_fd;
@@ -57,9 +56,6 @@ int main(int argc, char **argv)
 	int pid;
 	int rv;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	if (argc > 1 && strcmp(argv[1], "-d") == 0)
 		debug_flag = true;
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 2164d218322e..bd3a08c7cc15 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -24,7 +24,6 @@
 #include <limits.h>
 
 #include <sys/capability.h>
-#include <sys/resource.h>
 
 #include <linux/unistd.h>
 #include <linux/filter.h>
@@ -41,7 +40,7 @@
 #  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
 # endif
 #endif
-
+#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -11543,8 +11542,6 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 
 int main(int argc, char **argv)
 {
-	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-	struct rlimit rlim = { 1 << 20, 1 << 20 };
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
 	bool unpriv = !is_admin();
 
@@ -11572,6 +11569,5 @@ int main(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
-	setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf);
 	return do_test(unpriv, from, to);
 }
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index e9626cf5607a..8d6918c3b4a2 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -4,7 +4,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/time.h>
-#include <sys/resource.h>
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
@@ -12,6 +11,8 @@
 
 #include <bpf/bpf.h>
 
+#include "bpf_rlimit.h"
+
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)	printf("ERROR: " str)
@@ -133,16 +134,11 @@ static void test_log_bad(char *log, size_t log_len, int log_level)
 
 int main(int argc, char **argv)
 {
-	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
 	char full_log[LOG_SIZE];
 	char log[LOG_SIZE];
 	size_t want_len;
 	int i;
 
-	/* allow unlimited locked memory to have more consistent error code */
-	if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0)
-		perror("Unable to lift memlock rlimit");
-
 	memset(log, 1, LOG_SIZE);
 
 	/* Test incorrect attr */

From b33eb735836224e55cd8182aff9bbb7ddc17f38b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Feb 2018 22:34:33 +0100
Subject: [PATCH 0352/1678] bpf: add tail call tests to test_verifier

One of the downsides of the test_bpf module was that since being
in kernel space, it couldn't test-run tail calls. Now that the
test_verifier has the ability to perform run-time tests, populate
the prog array so we actually jump into other BPF programs and
can check all corner cases. Most useful in combination with JITs.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 117 ++++++++++++++++++--
 1 file changed, 110 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index bd3a08c7cc15..9eb05f3135ac 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2588,6 +2588,62 @@ static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 	},
+	{
+		"runtime/jit: tail_call within bounds, prog once",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"runtime/jit: tail_call within bounds, prog loop",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 41,
+	},
+	{
+		"runtime/jit: tail_call within bounds, no prog",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"runtime/jit: tail_call out of bounds",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 256),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog = { 1 },
+		.result = ACCEPT,
+		.retval = 2,
+	},
 	{
 		"runtime/jit: pass negative index to tail_call",
 		.insns = {
@@ -2595,11 +2651,12 @@ static struct bpf_test tests[] = {
 			BPF_LD_MAP_FD(BPF_REG_2, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_tail_call),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_prog = { 1 },
 		.result = ACCEPT,
+		.retval = 2,
 	},
 	{
 		"runtime/jit: pass > 32bit index to tail_call",
@@ -2608,11 +2665,12 @@ static struct bpf_test tests[] = {
 			BPF_LD_MAP_FD(BPF_REG_2, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_tail_call),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_prog = { 2 },
 		.result = ACCEPT,
+		.retval = 42,
 	},
 	{
 		"stack pointer arithmetic",
@@ -11278,16 +11336,61 @@ static int create_map(uint32_t size_value, uint32_t max_elem)
 	return fd;
 }
 
+static int create_prog_dummy1(void)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 42),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(int mfd, int idx)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_3, idx),
+		BPF_LD_MAP_FD(BPF_REG_2, mfd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_tail_call),
+		BPF_MOV64_IMM(BPF_REG_0, 41),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
 static int create_prog_array(void)
 {
-	int fd;
+	int p1key = 0, p2key = 1;
+	int mfd, p1fd, p2fd;
 
-	fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
-			    sizeof(int), 4, 0);
-	if (fd < 0)
+	mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+			     sizeof(int), 4, 0);
+	if (mfd < 0) {
 		printf("Failed to create prog array '%s'!\n", strerror(errno));
+		return -1;
+	}
 
-	return fd;
+	p1fd = create_prog_dummy1();
+	p2fd = create_prog_dummy2(mfd, p2key);
+	if (p1fd < 0 || p2fd < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+		goto out;
+	close(p2fd);
+	close(p1fd);
+
+	return mfd;
+out:
+	close(p2fd);
+	close(p1fd);
+	close(mfd);
+	return -1;
 }
 
 static int create_map_in_map(void)

From f8c3d0dda4b09e05ad8781764cbe153815c1bf23 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 24 Feb 2018 21:21:38 +0300
Subject: [PATCH 0353/1678] xfrm: mark kmem_caches as __ro_after_init

Kmem caches aren't relocated once set up.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_input.c  | 3 ++-
 net/xfrm/xfrm_policy.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1472c0857975..44fc54dc013c 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/bottom_half.h>
+#include <linux/cache.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -31,7 +32,7 @@ struct xfrm_trans_cb {
 
 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
 
-static struct kmem_cache *secpath_cachep __read_mostly;
+static struct kmem_cache *secpath_cachep __ro_after_init;
 
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
 static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7a23078132cf..12bd415d349e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
 
-static struct kmem_cache *xfrm_dst_cache __read_mostly;
+static struct kmem_cache *xfrm_dst_cache __ro_after_init;
 static __read_mostly seqcount_t xfrm_policy_hash_generation;
 
 static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);

From 5211fcfb8110f77ff9f389e476563345817f61a5 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Mon, 26 Feb 2018 14:28:19 -0800
Subject: [PATCH 0354/1678] esp: check the NETIF_F_HW_ESP_TX_CSUM bit before
 segmenting

If I understand correctly, we should not be asking for a
checksum offload on an ipsec packet if the netdev isn't
advertising NETIF_F_HW_ESP_TX_CSUM.  In that case, we should
clear the NETIF_F_CSUM_MASK bits.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4_offload.c | 2 ++
 net/ipv6/esp6_offload.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index da5635fc52c2..7cf755ef9efb 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -138,6 +138,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
 	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
 	    (x->xso.dev != skb->dev))
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+	else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+		esp_features = features & ~NETIF_F_CSUM_MASK;
 
 	xo->flags |= XFRM_GSO_SEGMENT;
 
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 3fd1ec775dc2..27f59b61f70f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
 	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
 	    (x->xso.dev != skb->dev))
 		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+	else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+		esp_features = features & ~NETIF_F_CSUM_MASK;
 
 	xo->flags |= XFRM_GSO_SEGMENT;
 

From 59cae5b9d6d39db2804ed5f0330eb5507d40c34c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 23 Feb 2018 10:06:04 +0100
Subject: [PATCH 0355/1678] mac80211: support AP 4-addr mode fast-rx

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3dc162ddc3a6..89dcf9f762ce 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3783,6 +3783,15 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 			!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
 			(sdata->vif.type != NL80211_IFTYPE_AP_VLAN ||
 			 !sdata->u.vlan.sta);
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+		    sdata->u.vlan.sta) {
+			fastrx.expected_ds_bits |=
+				cpu_to_le16(IEEE80211_FCTL_FROMDS);
+			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+			fastrx.internal_forward = 0;
+		}
+
 		break;
 	default:
 		goto clear;

From 1d870162418a826905161d2276c912986d3b9d9a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 23 Feb 2018 10:06:05 +0100
Subject: [PATCH 0356/1678] mac80211: support fast-rx with incompatible PS
 capabilities when PS is disabled

When powersave is disabled for the interface, we can do fast-rx anyway.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
[fixed indentation on one line]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c |  1 +
 net/mac80211/rx.c  | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f4195a0f0279..fd68f6fb02d7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2685,6 +2685,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 
 	ieee80211_recalc_ps(local);
 	ieee80211_recalc_ps_vif(sdata);
+	ieee80211_check_fast_rx_iface(sdata);
 
 	return 0;
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 89dcf9f762ce..1d417960d376 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3750,12 +3750,7 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 		/* 4-addr is harder to deal with, later maybe */
 		if (sdata->u.mgd.use_4addr)
 			goto clear;
-		/* software powersave is a huge mess, avoid all of it */
-		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
-			goto clear;
-		if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
-		    !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
-			goto clear;
+
 		if (sta->sta.tdls) {
 			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
 			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3767,6 +3762,16 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 			fastrx.expected_ds_bits =
 				cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		}
+
+		if (!sdata->u.mgd.powersave)
+			break;
+
+		/* software powersave is a huge mess, avoid all of it */
+		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
+			goto clear;
+		if (ieee80211_hw_check(&local->hw, SUPPORTS_PS) &&
+		    !ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
+			goto clear;
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_AP:

From 9251a4736ee360d3850644ddaaa1a61dcec96238 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 23 Feb 2018 13:55:50 +0100
Subject: [PATCH 0357/1678] mac80211: support station 4-addr mode fast-rx

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1d417960d376..2783c5cd7de7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3747,10 +3747,6 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_STATION:
-		/* 4-addr is harder to deal with, later maybe */
-		if (sdata->u.mgd.use_4addr)
-			goto clear;
-
 		if (sta->sta.tdls) {
 			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr1);
 			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr2);
@@ -3763,6 +3759,13 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
 				cpu_to_le16(IEEE80211_FCTL_FROMDS);
 		}
 
+		if (sdata->u.mgd.use_4addr && !sta->sta.tdls) {
+			fastrx.expected_ds_bits |=
+				cpu_to_le16(IEEE80211_FCTL_TODS);
+			fastrx.da_offs = offsetof(struct ieee80211_hdr, addr3);
+			fastrx.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+		}
+
 		if (!sdata->u.mgd.powersave)
 			break;
 

From 21b7022f13fb038b3e204a892c7cc42749754f7f Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 19 Feb 2018 14:48:44 +0200
Subject: [PATCH 0358/1678] mac80211: agg-rx: Accept ADDBA request update if
 timeout did not change

As there is no support for updating an existing ADDBA session with
a peer, we decline the request (while keeping the session active).
However, in case that the timeout did not change, there is no need
to decline the request, so modify the code to reply with status success
in such a case (this is useful for interoperability with APs that send an
ADDBA request update without changing the timeout value).

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1f3188d03840..e83c19d4c292 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -298,13 +298,23 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 
 	if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
 		if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
+			struct tid_ampdu_rx *tid_rx;
+
 			ht_dbg_ratelimited(sta->sdata,
 					   "updated AddBA Req from %pM on tid %u\n",
 					   sta->sta.addr, tid);
 			/* We have no API to update the timeout value in the
-			 * driver so reject the timeout update.
+			 * driver so reject the timeout update if the timeout
+			 * changed. If if did not change, i.e., no real update,
+			 * just reply with success.
 			 */
-			status = WLAN_STATUS_REQUEST_DECLINED;
+			rcu_read_lock();
+			tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
+			if (tid_rx && tid_rx->timeout == timeout)
+				status = WLAN_STATUS_SUCCESS;
+			else
+				status = WLAN_STATUS_REQUEST_DECLINED;
+			rcu_read_unlock();
 			goto end;
 		}
 

From 84d0a394808621619a4b319c6eed16e3b389e214 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 5 Jan 2018 09:54:32 +0100
Subject: [PATCH 0359/1678] batman-adv: Fix indentation of batadv_seq_before

Also multiline macros should have their statements start on a tabstop. This
was detected by checkpatch.pl after commit a134f8de9f40 ("checkpatch:
improve the TABSTOP test to include declarations").

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/main.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d5d65999207e..057a28a9fe88 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -331,11 +331,13 @@ static inline bool batadv_has_timed_out(unsigned long timestamp,
  *
  * Return: true when x is a predecessor of y, false otherwise
  */
-#define batadv_seq_before(x, y) ({typeof(x)_d1 = (x); \
-				 typeof(y)_d2 = (y); \
-				 typeof(x)_dummy = (_d1 - _d2); \
-				 (void)(&_d1 == &_d2); \
-				 _dummy > batadv_smallest_signed_int(_dummy); })
+#define batadv_seq_before(x, y) ({ \
+	typeof(x)_d1 = (x); \
+	typeof(y)_d2 = (y); \
+	typeof(x)_dummy = (_d1 - _d2); \
+	(void)(&_d1 == &_d2); \
+	_dummy > batadv_smallest_signed_int(_dummy); \
+})
 
 /**
  * batadv_seq_after() - Checks if a sequence number x is a successor of y

From d7625f9f72dc148b4f25d9bc5014b710e1024b15 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 20 Feb 2018 12:08:10 +0100
Subject: [PATCH 0360/1678] batman-adv: Avoid relation operator comparison with
 bool

commit 785ea1144182 ("batman-adv: Distributed ARP Table - create DHT helper
functions") introduced a return check of batadv_compare_eth which uses a
boolean return value since commit 16af73458aca ("batman-adv: main,
batadv_compare_eth return bool"). A relational (<, >, <= or >=) operator
is not the right one for such a check.

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/distributed-arp-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 19b15de455ab..4469dcc1558f 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -495,7 +495,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
 	 * the one with the lowest address
 	 */
 	if (tmp_max == max && max_orig_node &&
-	    batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
+	    batadv_compare_eth(candidate->orig, max_orig_node->orig))
 		goto out;
 
 	ret = true;

From a163dc22d515d17844435c8217ff66193d35b3fa Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Wed, 24 Jan 2018 12:21:37 +0100
Subject: [PATCH 0361/1678] batman-adv: always assume 2-byte packet alignment

NIC drivers generally try to ensure that the "network header" is aligned
to a 4-byte boundary. This is not always possible: When Ethernet frames are
encapsulated in other packets with 4-byte aligned headers, the inner
Ethernet header will have 4-byte alignment, and in consequence, the inner
network header is aligned to 2, but not to 4 bytes.

Most parts of batman-adv only care about 2-byte alignment; in particular,
no unaligned accesses occur in performance-critical paths that handle
actual payload data. This is not true for OGM handling: the seqno and crc
fields are accessed as 32-bit values. To avoid these unaligned accesses,
this patch reduces the expected packet alignment to 2 bytes for all of
batadv's packet types.

As no unaligned accesses existed on the performance-critical paths anyways,
this chance does have any (positive or negative) effect on performance, but
it still makes sense to avoid these accesses to prevent log noise when
examining other unaligned accesses in the kernel while batman-adv is
active.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index daefd7283896..894d8d2f713d 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -196,8 +196,6 @@ struct batadv_bla_claim_dst {
 	__be16 group;		/* group id */
 };
 
-#pragma pack()
-
 /**
  * struct batadv_ogm_packet - ogm (routing protocol) packet
  * @packet_type: batman-adv packet type, part of the general header
@@ -222,9 +220,6 @@ struct batadv_ogm_packet {
 	__u8   reserved;
 	__u8   tq;
 	__be16 tvlv_len;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
@@ -249,9 +244,6 @@ struct batadv_ogm2_packet {
 	__u8   orig[ETH_ALEN];
 	__be16 tvlv_len;
 	__be32 throughput;
-	/* __packed is not needed as the struct size is divisible by 4,
-	 * and the largest data type in this struct has a size of 4.
-	 */
 };
 
 #define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
@@ -405,7 +397,6 @@ struct batadv_icmp_packet_rr {
  * misalignment of the payload after the ethernet header. It may also lead to
  * leakage of information when the padding it not initialized before sending.
  */
-#pragma pack(2)
 
 /**
  * struct batadv_unicast_packet - unicast packet for network payload
@@ -533,8 +524,6 @@ struct batadv_coded_packet {
 	__be16 coded_len;
 };
 
-#pragma pack()
-
 /**
  * struct batadv_unicast_tvlv_packet - generic unicast packet with tvlv payload
  * @packet_type: batman-adv packet type, part of the general header
@@ -641,4 +630,6 @@ struct batadv_tvlv_mcast_data {
 	__u8 reserved[3];
 };
 
+#pragma pack()
+
 #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */

From 24bba078eca099b5bd25e17e97b485f013589f8c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 27 Feb 2018 13:03:07 +0100
Subject: [PATCH 0362/1678] mac80211: support A-MSDU in fast-rx

Only works if the IV was stripped from packets. Create a smaller
variant of ieee80211_rx_h_amsdu, which bypasses checks already done
within the fast-rx context.

In order to do so, update cfg80211's ieee80211_data_to_8023_exthdr()
to take the offset between header and snap.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |   6 +-
 net/mac80211/rx.c      | 124 ++++++++++++++++++++++++-----------------
 net/wireless/util.c    |   5 +-
 3 files changed, 80 insertions(+), 55 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 56e905cd4b07..fc40843baed3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4410,10 +4410,12 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  *	of it being pushed into the SKB
  * @addr: the device MAC address
  * @iftype: the virtual interface type
+ * @data_offset: offset of payload after the 802.11 header
  * Return: 0 on success. Non-zero on error.
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-				  const u8 *addr, enum nl80211_iftype iftype);
+				  const u8 *addr, enum nl80211_iftype iftype,
+				  u8 data_offset);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -4425,7 +4427,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 					 enum nl80211_iftype iftype)
 {
-	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
 }
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2783c5cd7de7..de7d10732fd5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2353,39 +2353,17 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 {
 	struct net_device *dev = rx->sdata->dev;
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	__le16 fc = hdr->frame_control;
 	struct sk_buff_head frame_list;
-	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	struct ethhdr ethhdr;
 	const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
 
-	if (unlikely(!ieee80211_is_data(fc)))
-		return RX_CONTINUE;
-
-	if (unlikely(!ieee80211_is_data_present(fc)))
-		return RX_DROP_MONITOR;
-
-	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
-		return RX_CONTINUE;
-
 	if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
-		switch (rx->sdata->vif.type) {
-		case NL80211_IFTYPE_AP_VLAN:
-			if (!rx->sdata->u.vlan.sta)
-				return RX_DROP_UNUSABLE;
-			break;
-		case NL80211_IFTYPE_STATION:
-			if (!rx->sdata->u.mgd.use_4addr)
-				return RX_DROP_UNUSABLE;
-			break;
-		default:
-			return RX_DROP_UNUSABLE;
-		}
 		check_da = NULL;
 		check_sa = NULL;
 	} else switch (rx->sdata->vif.type) {
@@ -2405,15 +2383,13 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 			break;
 	}
 
-	if (is_multicast_ether_addr(hdr->addr1))
-		return RX_DROP_UNUSABLE;
-
 	skb->dev = dev;
 	__skb_queue_head_init(&frame_list);
 
 	if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
 					  rx->sdata->vif.addr,
-					  rx->sdata->vif.type))
+					  rx->sdata->vif.type,
+					  data_offset))
 		return RX_DROP_UNUSABLE;
 
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2435,6 +2411,44 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	return RX_QUEUED;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	__le16 fc = hdr->frame_control;
+
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU))
+		return RX_CONTINUE;
+
+	if (unlikely(!ieee80211_is_data(fc)))
+		return RX_CONTINUE;
+
+	if (unlikely(!ieee80211_is_data_present(fc)))
+		return RX_DROP_MONITOR;
+
+	if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+		switch (rx->sdata->vif.type) {
+		case NL80211_IFTYPE_AP_VLAN:
+			if (!rx->sdata->u.vlan.sta)
+				return RX_DROP_UNUSABLE;
+			break;
+		case NL80211_IFTYPE_STATION:
+			if (!rx->sdata->u.mgd.use_4addr)
+				return RX_DROP_UNUSABLE;
+			break;
+		default:
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	if (is_multicast_ether_addr(hdr->addr1))
+		return RX_DROP_UNUSABLE;
+
+	return __ieee80211_rx_h_amsdu(rx, 0);
+}
+
 #ifdef CONFIG_MAC80211_MESH
 static ieee80211_rx_result
 ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
@@ -3898,7 +3912,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct sta_info *sta = rx->sta;
 	int orig_len = skb->len;
-	int snap_offs = ieee80211_hdrlen(hdr->frame_control);
+	int hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	int snap_offs = hdrlen;
 	struct {
 		u8 snap[sizeof(rfc1042_header)];
 		__be16 proto;
@@ -3929,10 +3944,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	    (status->flag & FAST_RX_CRYPT_FLAGS) != FAST_RX_CRYPT_FLAGS)
 		return false;
 
-	/* we don't deal with A-MSDU deaggregation here */
-	if (status->rx_flags & IEEE80211_RX_AMSDU)
-		return false;
-
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return false;
 
@@ -3964,21 +3975,24 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 		snap_offs += IEEE80211_CCMP_HDR_LEN;
 	}
 
-	if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
-		goto drop;
-	payload = (void *)(skb->data + snap_offs);
+	if (!(status->rx_flags & IEEE80211_RX_AMSDU)) {
+		if (!pskb_may_pull(skb, snap_offs + sizeof(*payload)))
+			goto drop;
 
-	if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
-		return false;
+		payload = (void *)(skb->data + snap_offs);
 
-	/* Don't handle these here since they require special code.
-	 * Accept AARP and IPX even though they should come with a
-	 * bridge-tunnel header - but if we get them this way then
-	 * there's little point in discarding them.
-	 */
-	if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
-		     payload->proto == fast_rx->control_port_protocol))
-		return false;
+		if (!ether_addr_equal(payload->snap, fast_rx->rfc1042_hdr))
+			return false;
+
+		/* Don't handle these here since they require special code.
+		 * Accept AARP and IPX even though they should come with a
+		 * bridge-tunnel header - but if we get them this way then
+		 * there's little point in discarding them.
+		 */
+		if (unlikely(payload->proto == cpu_to_be16(ETH_P_TDLS) ||
+			     payload->proto == fast_rx->control_port_protocol))
+			return false;
+	}
 
 	/* after this point, don't punt to the slowpath! */
 
@@ -3992,12 +4006,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	}
 
 	/* statistics part of ieee80211_rx_h_sta_process() */
-	stats->last_rx = jiffies;
-	stats->last_rate = sta_stats_encode_rate(status);
-
-	stats->fragments++;
-	stats->packets++;
-
 	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
 		stats->last_signal = status->signal;
 		if (!fast_rx->uses_rss)
@@ -4026,6 +4034,20 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	if (rx->key && !ieee80211_has_protected(hdr->frame_control))
 		goto drop;
 
+	if (status->rx_flags & IEEE80211_RX_AMSDU) {
+		if (__ieee80211_rx_h_amsdu(rx, snap_offs - hdrlen) !=
+		    RX_QUEUED)
+			goto drop;
+
+		return true;
+	}
+
+	stats->last_rx = jiffies;
+	stats->last_rate = sta_stats_encode_rate(status);
+
+	stats->fragments++;
+	stats->packets++;
+
 	/* do the header conversion - first grab the addresses */
 	ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
 	ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index c69160694b6c..d112e9a89364 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -420,7 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
-				  const u8 *addr, enum nl80211_iftype iftype)
+				  const u8 *addr, enum nl80211_iftype iftype,
+				  u8 data_offset)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct {
@@ -434,7 +435,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
 		return -1;
 
-	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	hdrlen = ieee80211_hdrlen(hdr->frame_control) + data_offset;
 	if (skb->len < hdrlen + 8)
 		return -1;
 

From f63b4c971f5fb1310f145785c3b2b77651ef129e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Fri, 10 Nov 2017 00:38:23 +0100
Subject: [PATCH 0363/1678] wl1251: Update wl->nvs_len after wl->nvs is valid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If kmemdup fails, then wl->nvs_len will contain invalid non-zero size.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ti/wl1251/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 037defd10b91..412fbd413003 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -122,8 +122,7 @@ static int wl1251_fetch_nvs(struct wl1251 *wl)
 		goto out;
 	}
 
-	wl->nvs_len = fw->size;
-	wl->nvs = kmemdup(fw->data, wl->nvs_len, GFP_KERNEL);
+	wl->nvs = kmemdup(fw->data, fw->size, GFP_KERNEL);
 
 	if (!wl->nvs) {
 		wl1251_error("could not allocate memory for the nvs file");
@@ -131,6 +130,8 @@ static int wl1251_fetch_nvs(struct wl1251 *wl)
 		goto out;
 	}
 
+	wl->nvs_len = fw->size;
+
 	ret = 0;
 
 out:

From 562da3a39cb78ffaba44511ec39d8c245023a02f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Fri, 10 Nov 2017 00:38:24 +0100
Subject: [PATCH 0364/1678] wl1251: Generate random MAC address only if driver
 does not have valid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before this patch, driver generated random MAC address every time it was
initialized. After that random MAC address could be overwritten with fixed
one, if provided.

This patch changes order. First it tries to read fixed MAC address and if
it fails then driver generates random MAC address.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ti/wl1251/main.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 412fbd413003..be07243d590e 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -1491,7 +1491,24 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 	wl->hw->queues = 4;
 
 	if (wl->use_eeprom)
-		wl1251_read_eeprom_mac(wl);
+		ret = wl1251_read_eeprom_mac(wl);
+	else
+		ret = -EINVAL;
+
+	if (ret == 0 && !is_valid_ether_addr(wl->mac_addr))
+		ret = -EINVAL;
+
+	if (ret < 0) {
+		/*
+		 * In case our MAC address is not correctly set,
+		 * we use a random but Nokia MAC.
+		 */
+		static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
+		memcpy(wl->mac_addr, nokia_oui, 3);
+		get_random_bytes(wl->mac_addr + 3, 3);
+		wl1251_warning("MAC address in eeprom or nvs data is not valid");
+		wl1251_warning("Setting random MAC address: %pM", wl->mac_addr);
+	}
 
 	ret = wl1251_register_hw(wl);
 	if (ret)
@@ -1512,7 +1529,6 @@ struct ieee80211_hw *wl1251_alloc_hw(void)
 	struct ieee80211_hw *hw;
 	struct wl1251 *wl;
 	int i;
-	static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
 
 	hw = ieee80211_alloc_hw(sizeof(*wl), &wl1251_ops);
 	if (!hw) {
@@ -1562,13 +1578,6 @@ struct ieee80211_hw *wl1251_alloc_hw(void)
 	INIT_WORK(&wl->irq_work, wl1251_irq_work);
 	INIT_WORK(&wl->tx_work, wl1251_tx_work);
 
-	/*
-	 * In case our MAC address is not correctly set,
-	 * we use a random but Nokia MAC.
-	 */
-	memcpy(wl->mac_addr, nokia_oui, 3);
-	get_random_bytes(wl->mac_addr + 3, 3);
-
 	wl->state = WL1251_STATE_OFF;
 	mutex_init(&wl->mutex);
 	spin_lock_init(&wl->wl_lock);

From 4f507d588d08429126530988f657a7a7ca3e6180 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Fri, 10 Nov 2017 00:38:25 +0100
Subject: [PATCH 0365/1678] wl1251: Parse and use MAC address from supplied NVS
 data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch implements parsing MAC address from NVS data which are sent to
wl1251 chip. Calibration NVS data could contain valid MAC address and it
will be used instead of randomly generated one.

This patch also moves code for requesting NVS data from userspace to driver
initialization code to make sure that NVS data will be there at time when
permanent MAC address is needed.

Calibration NVS data for wl1251 are device specific. Every device with
wl1251 chip should have been calibrated in factory and needs to provide own
calibration data.

Default example file wl1251-nvs.bin, found in linux-firmware repository,
contains MAC address 00:00:20:07:03:09. So this MAC address is marked as
invalid as it is not real device specific address, just example one.

Format of calibration NVS data can be found at:
http://notaz.gp2x.de/misc/pnd/wl1251/nvs_map.txt

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ti/wl1251/main.c | 55 +++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index be07243d590e..5ac7965c7d53 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -203,13 +203,6 @@ static int wl1251_chip_wakeup(struct wl1251 *wl)
 			goto out;
 	}
 
-	if (wl->nvs == NULL && !wl->use_eeprom) {
-		/* No NVS from netlink, try to get it from the filesystem */
-		ret = wl1251_fetch_nvs(wl);
-		if (ret < 0)
-			goto out;
-	}
-
 out:
 	return ret;
 }
@@ -1447,6 +1440,46 @@ static int wl1251_read_eeprom_mac(struct wl1251 *wl)
 	return 0;
 }
 
+#define NVS_OFF_MAC_LEN 0x19
+#define NVS_OFF_MAC_ADDR_LO 0x1a
+#define NVS_OFF_MAC_ADDR_HI 0x1b
+#define NVS_OFF_MAC_DATA 0x1c
+
+static int wl1251_check_nvs_mac(struct wl1251 *wl)
+{
+	if (wl->nvs_len < 0x24)
+		return -ENODATA;
+
+	/* length is 2 and data address is 0x546c (ANDed with 0xfffe) */
+	if (wl->nvs[NVS_OFF_MAC_LEN] != 2 ||
+	    wl->nvs[NVS_OFF_MAC_ADDR_LO] != 0x6d ||
+	    wl->nvs[NVS_OFF_MAC_ADDR_HI] != 0x54)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int wl1251_read_nvs_mac(struct wl1251 *wl)
+{
+	u8 mac[ETH_ALEN];
+	int i, ret;
+
+	ret = wl1251_check_nvs_mac(wl);
+	if (ret)
+		return ret;
+
+	/* MAC is stored in reverse order */
+	for (i = 0; i < ETH_ALEN; i++)
+		mac[i] = wl->nvs[NVS_OFF_MAC_DATA + ETH_ALEN - i - 1];
+
+	/* 00:00:20:07:03:09 is in example file wl1251-nvs.bin, so invalid */
+	if (ether_addr_equal_unaligned(mac, "\x00\x00\x20\x07\x03\x09"))
+		return -EINVAL;
+
+	memcpy(wl->mac_addr, mac, ETH_ALEN);
+	return 0;
+}
+
 static int wl1251_register_hw(struct wl1251 *wl)
 {
 	int ret;
@@ -1490,10 +1523,16 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 
 	wl->hw->queues = 4;
 
+	if (wl->nvs == NULL && !wl->use_eeprom) {
+		ret = wl1251_fetch_nvs(wl);
+		if (ret < 0)
+			goto out;
+	}
+
 	if (wl->use_eeprom)
 		ret = wl1251_read_eeprom_mac(wl);
 	else
-		ret = -EINVAL;
+		ret = wl1251_read_nvs_mac(wl);
 
 	if (ret == 0 && !is_valid_ether_addr(wl->mac_addr))
 		ret = -EINVAL;

From 3142467fc15ba19a327dcedafcf913bc7832f6d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Fri, 10 Nov 2017 00:38:26 +0100
Subject: [PATCH 0366/1678] wl1251: Set generated MAC address back to NVS data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case there is no valid MAC address kernel generates random one. This
patch propagate this generated MAC address back to NVS data which will be
uploaded to wl1251 chip. So HW would have same MAC address as linux kernel
uses.

This should not change any functionality, but it is better to tell wl1251
correct mac address since beginning of chip usage.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ti/wl1251/main.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 5ac7965c7d53..bd8641ad953b 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -1480,6 +1480,21 @@ static int wl1251_read_nvs_mac(struct wl1251 *wl)
 	return 0;
 }
 
+static int wl1251_write_nvs_mac(struct wl1251 *wl)
+{
+	int i, ret;
+
+	ret = wl1251_check_nvs_mac(wl);
+	if (ret)
+		return ret;
+
+	/* MAC is stored in reverse order */
+	for (i = 0; i < ETH_ALEN; i++)
+		wl->nvs[NVS_OFF_MAC_DATA + i] = wl->mac_addr[ETH_ALEN - i - 1];
+
+	return 0;
+}
+
 static int wl1251_register_hw(struct wl1251 *wl)
 {
 	int ret;
@@ -1545,6 +1560,8 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 		static const u8 nokia_oui[3] = {0x00, 0x1f, 0xdf};
 		memcpy(wl->mac_addr, nokia_oui, 3);
 		get_random_bytes(wl->mac_addr + 3, 3);
+		if (!wl->use_eeprom)
+			wl1251_write_nvs_mac(wl);
 		wl1251_warning("MAC address in eeprom or nvs data is not valid");
 		wl1251_warning("Setting random MAC address: %pM", wl->mac_addr);
 	}

From 8100091d02487ff267af0d410ceb9eefebc8ea03 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sat, 27 Jan 2018 00:38:35 +0800
Subject: [PATCH 0367/1678] bcma: Replace mdelay with usleep_range in
 bcma_pmu_resources_init

After checking all possible call chains to bcma_pmu_resources_init() here,
my tool finds that this function is never called in atomic context,
namely never in an interrupt handler or holding a spinlock.
Thus mdelay can be replaced with usleep_range to avoid busy wait.

This is found by a static analysis tool named DCNS written by myself.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_chipcommon_pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index f1eb4d3e1d57..f4161064365c 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -203,7 +203,7 @@ static void bcma_pmu_resources_init(struct bcma_drv_cc *cc)
 	 * Add some delay; allow resources to come up and settle.
 	 * Delay is required for SoC (early init).
 	 */
-	mdelay(2);
+	usleep_range(2000, 2500);
 }
 
 /* Disable to allow reading SPROM. Don't know the adventages of enabling it. */

From 7f897db37b76235d50151dfea7d543568d54535a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 26 Feb 2018 10:41:30 +0100
Subject: [PATCH 0368/1678] ipvlan: fix building with modular IPV6

We no longer depend on IPV6, but that now causes a link error with
CONFIG_IPV6=m and CONFIG_IPVLAN=y:

drivers/net/ipvlan/ipvlan_core.o: In function `ipvlan_queue_xmit':
ipvlan_core.c:(.text+0x1440): undefined reference to `ip6_route_output_flags'
drivers/net/ipvlan/ipvlan_core.o: In function `ipvlan_l3_rcv':
ipvlan_core.c:(.text+0x1818): undefined reference to `ip6_route_input_lookup'

This adds back the dependency on IPV6, with the option of building without
IPV6, but forcing IPVLAN to be a module when IPV6 is a module.

Fixes: 94333fac44d1 ("ipvlan: drop ipv6 dependency")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d88b78a17440..08b85215c2be 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,7 @@ config MACVTAP
 config IPVLAN
     tristate "IP-VLAN support"
     depends on INET
+    depends on IPV6 || !IPV6
     depends on NETFILTER
     select NET_L3_MASTER_DEV
     ---help---

From c80afa026a7f6eb01f5431760cd894526153d4a8 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 15:59:19 +0300
Subject: [PATCH 0369/1678] net: Convert /proc creating and destroying
 pernet_operations

These pernet_operations just create and destroy /proc entries,
and they can safely marked as async:

pppoe_net_ops
vlan_net_ops
canbcm_pernet_ops
kcm_net_ops
pfkey_net_ops
pppol2tp_net_ops
phonet_net_ops

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c | 1 +
 net/8021q/vlan.c        | 1 +
 net/can/bcm.c           | 1 +
 net/kcm/kcmproc.c       | 1 +
 net/key/af_key.c        | 1 +
 net/l2tp/l2tp_ppp.c     | 1 +
 net/phonet/pn_dev.c     | 1 +
 7 files changed, 7 insertions(+)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index bd89d1c559ce..c10e6181a2f0 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1161,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = {
 	.exit = pppoe_exit_net,
 	.id   = &pppoe_net_id,
 	.size = sizeof(struct pppoe_net),
+	.async = true,
 };
 
 static int __init pppoe_init(void)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..bd0ed39f65fb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = {
 	.exit = vlan_exit_net,
 	.id   = &vlan_net_id,
 	.size = sizeof(struct vlan_net),
+	.async = true,
 };
 
 static int __init vlan_proto_init(void)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e34fee3..26730d39e048 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net)
 static struct pernet_operations canbcm_pernet_ops __read_mostly = {
 	.init = canbcm_pernet_init,
 	.exit = canbcm_pernet_exit,
+	.async = true,
 };
 
 static int __init bcm_module_init(void)
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 9d5649e4e8b7..2c1c8b3e4452 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net)
 static struct pernet_operations kcm_net_ops = {
 	.init = kcm_proc_init_net,
 	.exit = kcm_proc_exit_net,
+	.async = true,
 };
 
 int __init kcm_proc_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7e2e7188e7f4..3ac08ab26207 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = {
 	.exit = pfkey_net_exit,
 	.id   = &pfkey_net_id,
 	.size = sizeof(struct netns_pfkey),
+	.async = true,
 };
 
 static void __exit ipsec_pfkey_exit(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 99a03c72db4f..0c4f49a6a0cb 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1770,6 +1770,7 @@ static struct pernet_operations pppol2tp_net_ops = {
 	.init = pppol2tp_init_net,
 	.exit = pppol2tp_exit_net,
 	.id   = &pppol2tp_net_id,
+	.async = true,
 };
 
 /*****************************************************************************
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 77787512fc32..9454e8393793 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = {
 	.exit = phonet_exit_net,
 	.id   = &phonet_net_id,
 	.size = sizeof(struct phonet_net),
+	.async = true,
 };
 
 /* Initialize Phonet devices list */

From 47d63a01797be8de142beeb0090704501701eafa Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 15:59:28 +0300
Subject: [PATCH 0370/1678] net: Convert hashlimit_net_ops and recent_net_ops

These pernet_operations just create and destroy /proc entries.
Also, new /proc entries also may come after new nf rules
are added, but this is not possible, when net isn't alive.
So, they are safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 1 +
 net/netfilter/xt_recent.c    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 66f5aca62a08..db2fe0911740 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1345,6 +1345,7 @@ static struct pernet_operations hashlimit_net_ops = {
 	.exit	= hashlimit_net_exit,
 	.id	= &hashlimit_net_id,
 	.size	= sizeof(struct hashlimit_net),
+	.async	= true,
 };
 
 static int __init hashlimit_mt_init(void)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 6d232d18faff..19efdb757944 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = {
 	.exit	= recent_net_exit,
 	.id	= &recent_net_id,
 	.size	= sizeof(struct recent_net),
+	.async	= true,
 };
 
 static struct xt_match recent_mt_reg[] __read_mostly = {

From f0aad8e340eace8a13736277a0e8c040a879740c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 15:59:37 +0300
Subject: [PATCH 0371/1678] net: Convert synproxy_net_ops

These pernet_operations create and destroy /proc entries
and allocate extents to template ct, which depend on global
nf_ct_ext_types[] array. So, we are able to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_synproxy_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 92139a087260..64b875e452ca 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = {
 	.exit		= synproxy_net_exit,
 	.id		= &synproxy_net_id,
 	.size		= sizeof(struct synproxy_net),
+	.async		= true,
 };
 
 static int __init synproxy_core_init(void)

From 7300bd94e622a44277a72b82f848db4d961d02e6 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 15:59:47 +0300
Subject: [PATCH 0372/1678] net: Convert nfs_net_ops

These pernet_operations just create and destroy /proc entries
and net_generic()->cb_ident_idr IDR. So, we are able to mark
them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d893543cf3b..6c3083c992e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = {
 	.exit = nfs_net_exit,
 	.id   = &nfs_net_id,
 	.size = sizeof(struct nfs_net),
+	.async = true,
 };
 
 /*

From 02df428ca291f20285f04c25a7efd664a5d83551 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 15:59:56 +0300
Subject: [PATCH 0373/1678] net: Convert simple pernet_operations

These pernet_operations make pretty simple actions
like variable initialization on init, debug checks
on exit, and so on, and they obviously are able
to be executed in parallel with any others:

vrf_net_ops
lockd_net_ops
grace_net_ops
xfrm6_tunnel_net_ops
kcm_net_ops
tcf_net_ops

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c       | 1 +
 fs/lockd/svc.c          | 1 +
 fs/nfs_common/grace.c   | 1 +
 net/ipv6/xfrm6_tunnel.c | 1 +
 net/kcm/kcmsock.c       | 1 +
 net/sched/cls_api.c     | 1 +
 6 files changed, 6 insertions(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9ce0182223a0..e459e601c57f 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1434,6 +1434,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
 	.init = vrf_netns_init,
 	.id   = &vrf_net_id,
 	.size = sizeof(bool),
+	.async = true,
 };
 
 static int __init vrf_init_module(void)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9c36d614bf89..2dee4e03ff1c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = {
 	.exit = lockd_exit_net,
 	.id = &lockd_net_id,
 	.size = sizeof(struct lockd_net),
+	.async = true,
 };
 
 
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 5be08f02a76b..8c743a405df6 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = {
 	.exit = grace_exit_net,
 	.id   = &grace_net_id,
 	.size = sizeof(struct list_head),
+	.async = true,
 };
 
 static int __init
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f85f0d7480ac..a9673619e0e9 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
 	.exit	= xfrm6_tunnel_net_exit,
 	.id	= &xfrm6_tunnel_net_id,
 	.size	= sizeof(struct xfrm6_tunnel_net),
+	.async	= true,
 };
 
 static int __init xfrm6_tunnel_init(void)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 435594648dac..a6cd0712e063 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2015,6 +2015,7 @@ static struct pernet_operations kcm_net_ops = {
 	.exit = kcm_exit_net,
 	.id   = &kcm_net_id,
 	.size = sizeof(struct kcm_net),
+	.async = true,
 };
 
 static int __init kcm_init(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9d1a8bbf8152..19f9f421d5b7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1618,6 +1618,7 @@ static struct pernet_operations tcf_net_ops = {
 	.exit = tcf_net_exit,
 	.id   = &tcf_net_id,
 	.size = sizeof(struct tcf_net),
+	.async = true,
 };
 
 static int __init tc_filter_init(void)

From 25354866e03d416ad0e9395ce46d38912ddcae87 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:00:12 +0300
Subject: [PATCH 0374/1678] net: Convert cma_pernet_operations

These pernet_operations just create and destroy IDR.
So, we mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/cma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e66963ca58bd..3ae32d1ddd27 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4549,6 +4549,7 @@ static struct pernet_operations cma_pernet_operations = {
 	.exit = cma_exit_net,
 	.id = &cma_pernet_id,
 	.size = sizeof(struct cma_pernet),
+	.async = true,
 };
 
 static int __init cma_init(void)

From 5fcc85843d94e40ac1633d18d9651f69d9f16770 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:00:22 +0300
Subject: [PATCH 0375/1678] net: Convert sysctl creating and destroying
 pernet_operations

These pernet_operations create and destroy sysctl tables,
and they are able to be executed in parallel with any others:

ip_vs_lblc_ops
ip_vs_lblcr_ops

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_lblc.c  | 1 +
 net/netfilter/ipvs/ip_vs_lblcr.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index d625179de485..6a340c94c4b8 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblc_ops = {
 	.init = __ip_vs_lblc_init,
 	.exit = __ip_vs_lblc_exit,
+	.async = true,
 };
 
 static int __init ip_vs_lblc_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 84c57b62a588..0627881128da 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblcr_ops = {
 	.init = __ip_vs_lblcr_init,
 	.exit = __ip_vs_lblcr_exit,
+	.async = true,
 };
 
 static int __init ip_vs_lblcr_init(void)

From 685ecfb19888963f61c6085c17c254dbf665e9da Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:00:31 +0300
Subject: [PATCH 0376/1678] net: Convert tc_action_net_init() and
 tc_action_net_exit() based pernet_operations

These pernet_operations are from net/sched directory, and they call only
tc_action_net_init() and tc_action_net_exit():

bpf_net_ops
connmark_net_ops
csum_net_ops
gact_net_ops
ife_net_ops
ipt_net_ops
xt_net_ops
mirred_net_ops
nat_net_ops
pedit_net_ops
police_net_ops
sample_net_ops
simp_net_ops
skbedit_net_ops
skbmod_net_ops
tunnel_key_net_ops
vlan_net_ops

1)tc_action_net_init() just allocates and initializes per-net memory.
2)There should not be in-flight packets at the time of tc_action_net_exit()
call, or another pernet_operations send packets to dying net (except
netlink). So, it seems they can be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_bpf.c        | 1 +
 net/sched/act_connmark.c   | 1 +
 net/sched/act_csum.c       | 1 +
 net/sched/act_gact.c       | 1 +
 net/sched/act_ife.c        | 1 +
 net/sched/act_ipt.c        | 2 ++
 net/sched/act_mirred.c     | 1 +
 net/sched/act_nat.c        | 1 +
 net/sched/act_pedit.c      | 1 +
 net/sched/act_police.c     | 1 +
 net/sched/act_sample.c     | 1 +
 net/sched/act_simple.c     | 1 +
 net/sched/act_skbedit.c    | 1 +
 net/sched/act_skbmod.c     | 1 +
 net/sched/act_tunnel_key.c | 1 +
 net/sched/act_vlan.c       | 1 +
 16 files changed, 17 insertions(+)

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index cb3c5d403c88..da72e0cf2b1f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -413,6 +413,7 @@ static struct pernet_operations bpf_net_ops = {
 	.exit_batch = bpf_exit_net,
 	.id   = &bpf_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init bpf_init_module(void)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880fa51fe..371e5e4ab3e2 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -222,6 +222,7 @@ static struct pernet_operations connmark_net_ops = {
 	.exit_batch = connmark_exit_net,
 	.id   = &connmark_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init connmark_init_module(void)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index d5c2e528d150..1fb1f1f6a555 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -677,6 +677,7 @@ static struct pernet_operations csum_net_ops = {
 	.exit_batch = csum_exit_net,
 	.id   = &csum_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_DESCRIPTION("Checksum updating actions");
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f072bcf33760..74563254e676 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -247,6 +247,7 @@ static struct pernet_operations gact_net_ops = {
 	.exit_batch = gact_exit_net,
 	.id   = &gact_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a5994cf0512b..555b1caeff72 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -870,6 +870,7 @@ static struct pernet_operations ife_net_ops = {
 	.exit_batch = ife_exit_net,
 	.id   = &ife_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init ife_init_module(void)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9784629090ad..10866717f88e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -349,6 +349,7 @@ static struct pernet_operations ipt_net_ops = {
 	.exit_batch = ipt_exit_net,
 	.id   = &ipt_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
@@ -399,6 +400,7 @@ static struct pernet_operations xt_net_ops = {
 	.exit_batch = xt_exit_net,
 	.id   = &xt_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015331ab..64c86579c3d9 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -353,6 +353,7 @@ static struct pernet_operations mirred_net_ops = {
 	.exit_batch = mirred_exit_net,
 	.id   = &mirred_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b6c252..b1bc757f6491 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -323,6 +323,7 @@ static struct pernet_operations nat_net_ops = {
 	.exit_batch = nat_exit_net,
 	.id   = &nat_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_DESCRIPTION("Stateless NAT actions");
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 094303c27c5e..5e8cc8f63acd 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -465,6 +465,7 @@ static struct pernet_operations pedit_net_ops = {
 	.exit_batch = pedit_exit_net,
 	.id   = &pedit_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ff55bd6c7db0..51fe4fe343f7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -347,6 +347,7 @@ static struct pernet_operations police_net_ops = {
 	.exit_batch = police_exit_net,
 	.id   = &police_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init police_init_module(void)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9765145aaf40..238dfd27e995 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -248,6 +248,7 @@ static struct pernet_operations sample_net_ops = {
 	.exit_batch = sample_exit_net,
 	.id   = &sample_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init sample_init_module(void)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 8244e221fe4f..91816d73f3f3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -216,6 +216,7 @@ static struct pernet_operations simp_net_ops = {
 	.exit_batch = simp_exit_net,
 	.id   = &simp_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ddf69fc01bdf..7971510fe61b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -253,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = {
 	.exit_batch = skbedit_exit_net,
 	.id   = &skbedit_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a406f191cb84..febec75f4f7a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -278,6 +278,7 @@ static struct pernet_operations skbmod_net_ops = {
 	.exit_batch = skbmod_exit_net,
 	.id   = &skbmod_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 41ff9d0e5c62..9169b7e78ada 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -337,6 +337,7 @@ static struct pernet_operations tunnel_key_net_ops = {
 	.exit_batch = tunnel_key_exit_net,
 	.id   = &tunnel_key_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init tunnel_key_init_module(void)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 71411a255f04..c2ee7fd51cc9 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -313,6 +313,7 @@ static struct pernet_operations vlan_net_ops = {
 	.exit_batch = vlan_exit_net,
 	.id   = &vlan_net_id,
 	.size = sizeof(struct tc_action_net),
+	.async = true,
 };
 
 static int __init vlan_init_module(void)

From 6963ad69cee28d3b2011ee9ff7d4f0abe20e52b9 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:00:40 +0300
Subject: [PATCH 0377/1678] net: Convert bond_net_ops

These pernet_operations populate/depopulate /proc and /sys
entries. Exit method unregisters all net bond devices, and
it seems another pernet_operations are not interested in
foreign net bond list. So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c669554d70bb..4c19d23dd282 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = {
 	.exit = bond_net_exit,
 	.id   = &bond_net_id,
 	.size = sizeof(struct bond_net),
+	.async = true,
 };
 
 static int __init bonding_init(void)

From f60f33460a8cc690398f3e978851271c20364aa3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:00:48 +0300
Subject: [PATCH 0378/1678] net: Convert geneve_net_ops

These pernet_operations are similar to bond_net_ops. Exit method
unregisters all net geneve devices, and it looks like another
pernet_operations are not interested in foreign net geneve list.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89a9b93..516dd59249d7 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = {
 	.exit_batch = geneve_exit_batch_net,
 	.id   = &geneve_net_id,
 	.size = sizeof(struct geneve_net),
+	.async = true,
 };
 
 static int __init geneve_init_module(void)

From 9e7674519151646b1f36b2bf9d5fbdccba6711d2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:01:16 +0300
Subject: [PATCH 0379/1678] net: Convert gtp_net_ops

These pernet_operations are similar to bond_net_ops. Exit method
unregisters all net gtp devices, and it looks like another
pernet_operations are not interested in foreign net gtp list.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gtp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index f38e32a7ec9c..127edd23018f 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = {
 	.exit	= gtp_net_exit,
 	.id	= &gtp_net_id,
 	.size	= sizeof(struct gtp_net),
+	.async	= true,
 };
 
 static int __init gtp_init(void)

From cd59b28ce9491243a14947376332689de6ec568a Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:01:25 +0300
Subject: [PATCH 0380/1678] net: Convert ppp_net_ops

These pernet_operations are similar to bond_net_ops. Exit method
unregisters all net ppp devices, and it looks like another
pernet_operations are not interested in foreign net ppp list.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 255a5def56e9..a393c1dff7dc 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -971,6 +971,7 @@ static struct pernet_operations ppp_net_ops = {
 	.exit = ppp_exit_net,
 	.id   = &ppp_net_id,
 	.size = sizeof(struct ppp_net),
+	.async = true,
 };
 
 static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)

From ef74c07cf17958d90bfda34a42b54a132fbee57e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:01:34 +0300
Subject: [PATCH 0381/1678] net: Convert vxlan_net_ops

These pernet_operations are similar to bond_net_ops. Exit method
unregisters all net vlanx devices, and it looks like another
pernet_operations are not interested in foreign net vlanx list.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fab7a4db249e..aa5f034d6ad1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = {
 	.exit_batch = vxlan_exit_batch_net,
 	.id   = &vxlan_net_id,
 	.size = sizeof(struct vxlan_net),
+	.async = true,
 };
 
 static int __init vxlan_init_module(void)

From 3cec5fb3476e07833fea9a1e2d5f6c629078b4ae Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:01:43 +0300
Subject: [PATCH 0382/1678] net: Convert br_net_ops

These pernet_operations are similar to bond_net_ops. Exit method
unregisters all net bridge devices, and it looks like another
pernet_operations are not interested in foreign net bridge list.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 6bf06e756df2..7770481a6506 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net)
 
 static struct pernet_operations br_net_ops = {
 	.exit	= br_net_exit,
+	.async	= true,
 };
 
 static const struct stp_proto br_stp_proto = {

From 31502104b301c0dcacd5df8e92fba9dcf20dfccb Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:01:52 +0300
Subject: [PATCH 0383/1678] net: Convert ipgre_net_ops, ipgre_tap_net_ops,
 erspan_net_ops, vti_net_ops and ipip_net_ops

These pernet_operations are similar to bond_net_ops. Exit methods
unregisters all net ipgre/ipgre_tap/erspan/vti/ipip devices, and it
looks like another pernet_operations are not interested in foreign
net ipgre/ipgre_tap/erspan/vti/ipip list. Init method also does not
intersect with something pernet-specific. So, it's possible
to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 3 +++
 net/ipv4/ip_vti.c | 1 +
 net/ipv4/ipip.c   | 1 +
 3 files changed, 5 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45d97e9b2759..e496afa47709 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1044,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = {
 	.exit_batch = ipgre_exit_batch_net,
 	.id   = &ipgre_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1623,6 +1624,7 @@ static struct pernet_operations ipgre_tap_net_ops = {
 	.exit_batch = ipgre_tap_exit_batch_net,
 	.id   = &gre_tap_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int __net_init erspan_init_net(struct net *net)
@@ -1641,6 +1643,7 @@ static struct pernet_operations erspan_net_ops = {
 	.exit_batch = erspan_exit_batch_net,
 	.id   = &erspan_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int __init ipgre_init(void)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 51b1669334fe..b10bf563afd9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = {
 	.exit_batch = vti_exit_batch_net,
 	.id   = &vti_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c891235b4966..9c5a4d164f09 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = {
 	.exit_batch = ipip_exit_batch_net,
 	.id   = &ipip_net_id,
 	.size = sizeof(struct ip_tunnel_net),
+	.async = true,
 };
 
 static int __init ipip_init(void)

From 5c155c50244a0c6e1a7778ae7b4d2753e5e1f617 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:03 +0300
Subject: [PATCH 0384/1678] net: Convert ip6gre_net_ops

These pernet_operations are similar to bond_net_ops. Exit method
unregisters all net ip6gre devices, and it looks like another
pernet_operations are not interested in foreign net ip6gre list
or net_generic()->tunnels_wc. Init method registers net device.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c353125546d..3026662a6413 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1517,6 +1517,7 @@ static struct pernet_operations ip6gre_net_ops = {
 	.exit_batch = ip6gre_exit_batch_net,
 	.id   = &ip6gre_net_id,
 	.size = sizeof(struct ip6gre_net),
+	.async = true,
 };
 
 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],

From 66997ba0834ac1b3f22873d349614405e48c69d7 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:11 +0300
Subject: [PATCH 0385/1678] net: Convert ip6_tnl_net_ops

These pernet_operations are similar to ip6gre_net_ops. Exit method
unregisters all net ip6_tnl tunnels, and it looks like another
pernet_operations are not interested in foreign net tunnels list.
So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4b15fe928278..869e2e6750f7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2250,6 +2250,7 @@ static struct pernet_operations ip6_tnl_net_ops = {
 	.exit_batch = ip6_tnl_exit_batch_net,
 	.id   = &ip6_tnl_net_id,
 	.size = sizeof(struct ip6_tnl_net),
+	.async = true,
 };
 
 /**

From 5ecc29550add41a0f077b07840501d7a3abfc9db Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:19 +0300
Subject: [PATCH 0386/1678] net: Convert vti6_net_ops

These pernet_operations are similar to ip6_tnl_net_ops. Exit method
unregisters all net vti6 tunnels, and it looks like another
pernet_operations are not interested in foreign net vti6 list.
Init method registers netdevice. So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_vti.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index fa3ae1cb50d3..c617ea17faa8 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = {
 	.exit_batch = vti6_exit_batch_net,
 	.id   = &vti6_net_id,
 	.size = sizeof(struct vti6_net),
+	.async = true,
 };
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {

From 989d9812b7cabbda2e82f47e52dbc48ed4e40ce5 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:27 +0300
Subject: [PATCH 0387/1678] net: Convert sit_net_ops

These pernet_operations are similar to ip6_tnl_net_ops. Exit method
unregisters all net sit devices, and it looks like another
pernet_operations are not interested in foreign net sit list.
Init method registers netdevice. So, it's possible to mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3a1775a62973..182db078f01e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1878,6 +1878,7 @@ static struct pernet_operations sit_net_ops = {
 	.exit_batch = sit_exit_batch_net,
 	.id   = &sit_net_id,
 	.size = sizeof(struct sit_net),
+	.async = true,
 };
 
 static void __exit sit_cleanup(void)

From f17c9bf07f9cde430e5c566a5383875ffba104cf Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:37 +0300
Subject: [PATCH 0388/1678] net: Convert cfg802154_pernet_ops

These pernet_operations have only exit method, which
moves devices from cfg802154_rdev_list to init_net.
This may occur in any time from nl802154_wpan_phy_netns(),
so we are nice with rtnl_lock() synchronization.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index cb7176cd4cd6..9104943c15ba 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg802154_pernet_ops = {
 	.exit = cfg802154_pernet_exit,
+	.async = true,
 };
 
 static int __init wpan_phy_class_init(void)

From 68eabe8b660c4210f8a6dc1dbc1ae1cac5543a68 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:48 +0300
Subject: [PATCH 0389/1678] net: Convert ipvlan_net_ops

These pernet_operations unregister ipvlan net hooks.
nf_unregister_net_hooks() removes hooks one-by-one,
and then frees the memory via rcu. This looks similar
to that happens, when a new hooks is added: allocation
of bigger memory region, copy of old content, and rcu
freeing the old memory. So, all of net code should be
well with this behavior. Also at the time of hook
unregistering, there are no packets, and foreign net
pernet_operations are not interested in others hooks.
So, we mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 67c91ceda979..d05b902c925b 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -1024,6 +1024,7 @@ static struct pernet_operations ipvlan_net_ops = {
 	.id = &ipvlan_netid,
 	.size = sizeof(struct ipvlan_netns),
 	.exit = ipvlan_ns_exit,
+	.async = true,
 };
 
 static int __init ipvlan_init_module(void)

From 7ca9e67febb1441baa0481fbd46745288338afe6 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:02:56 +0300
Subject: [PATCH 0390/1678] net: Convert brnf_net_ops

These pernet_operations only unregister nf hooks.
So, they are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter_hooks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 27f1d4f2114a..484f54150525 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -967,6 +967,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
 	.exit = brnf_exit_net,
 	.id   = &brnf_net_id,
 	.size = sizeof(struct brnf_net),
+	.async = true,
 };
 
 static struct notifier_block brnf_notifier __read_mostly = {

From f95978b7ad0972610b14c65f13f69b3093ff9101 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:03:05 +0300
Subject: [PATCH 0391/1678] net: Convert clusterip_net_ops

These pernet_operations register and unregister nf hooks,
and populate and destroy /proc entry. So, they are able
to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4b02ab39ebc5..08b3e48f44fc 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -840,6 +840,7 @@ static struct pernet_operations clusterip_net_ops = {
 	.exit = clusterip_net_exit,
 	.id   = &clusterip_net_id,
 	.size = sizeof(struct clusterip_net),
+	.async = true,
 };
 
 static int __init clusterip_tg_init(void)

From e5b2ae93b523304461b2e4ddb59be47f4d4ed1b0 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:03:15 +0300
Subject: [PATCH 0392/1678] net: Convert defrag4_net_ops

These pernet_operations only unregister nf hooks.
So, they are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a0d3ad60a411..57244b62a4fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net)
 
 static struct pernet_operations defrag4_net_ops = {
 	.exit = defrag4_net_exit,
+	.async = true,
 };
 
 static int __init nf_defrag_init(void)

From afd7b3eb13463693250e82d19f8605312bb5c04d Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:03:36 +0300
Subject: [PATCH 0393/1678] net: Convert ila_net_ops

These pernet_operations register and unregister nf hooks.
Also they populate and depopulate ila_net_id-pointed hash
table. The table is changed by hooks during skb processing
and via netlink request. It looks impossible for another
net pernet_operations to force the table reading or writing,
so, they are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ila/ila_xlat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 44c39c5f0638..e438699f000f 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = {
 	.exit = ila_exit_net,
 	.id   = &ila_net_id,
 	.size = sizeof(struct ila_net),
+	.async = true,
 };
 
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)

From 9532ce17f7d59d1129165949076491c06f89d1b0 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:03:45 +0300
Subject: [PATCH 0394/1678] net: Convert defrag6_net_ops

These pernet_operations only unregister nf hooks.
So, they are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index c87b48359e8f..32f98bc06900 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net)
 
 static struct pernet_operations defrag6_net_ops = {
 	.exit = defrag6_net_exit,
+	.async = true,
 };
 
 static int __init nf_defrag_init(void)

From 79a4fb084326638b5e71e58e2207abffdf3ec031 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:03:55 +0300
Subject: [PATCH 0395/1678] net: Convert selinux_net_ops

These pernet_operations only register and unregister nf hooks.
So, they are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/hooks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8644d864e3c1..b4d7b6242a40 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net)
 static struct pernet_operations selinux_net_ops = {
 	.init = selinux_nf_register,
 	.exit = selinux_nf_unregister,
+	.async = true,
 };
 
 static int __init selinux_nf_ip_init(void)

From 3edbccf96d2de30e7b986ceae090becfc07d3573 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Feb 2018 16:04:02 +0300
Subject: [PATCH 0396/1678] net: Convert smack_net_ops

These pernet_operations only register and unregister nf hooks.
So, they are able to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/smack/smack_netfilter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index e36d17835d4f..3f29c03162ca 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net)
 static struct pernet_operations smack_net_ops = {
 	.init = smack_nf_register,
 	.exit = smack_nf_unregister,
+	.async = true,
 };
 
 static int __init smack_nf_ip_init(void)

From bd42cd022e44c68254cedf5e04cbcc781dcceed8 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 18 Jan 2018 17:54:28 -0600
Subject: [PATCH 0397/1678] ssb: return boolean instead of integer in
 ssb_dma_translation_special_bit

Return statements in functions returning bool should use
true/false instead of 1/0.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/ssb/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 65420a9f0e82..fdb89b6bfb40 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -1116,7 +1116,7 @@ static bool ssb_dma_translation_special_bit(struct ssb_device *dev)
 			chip_id == 43231 || chip_id == 43222);
 	}
 
-	return 0;
+	return false;
 }
 
 u32 ssb_dma_translation(struct ssb_device *dev)

From 6942bdc4bf577e6fedff03111d10763a07622734 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:32 +0800
Subject: [PATCH 0398/1678] rtlwifi: enable mac80211 fast-tx support

Enable the mac80211 fast-tx feature, since our driver already support
hw_flags required by fast-tx and is able to let mac80211 stack to transmit
packet through fast-xmit path.

Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index d6c03bd5cc65..e461eed32699 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -397,6 +397,7 @@ static void _rtl_init_mac80211(struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, MFP_CAPABLE);
 	ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS);
 	ieee80211_hw_set(hw, SUPPORTS_AMSDU_IN_AMPDU);
+	ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
 
 	/* swlps or hwlps has been set in diff chip in init_sw_vars */
 	if (rtlpriv->psc.swctrl_lps) {

From 1ca72c3047aa1146fe22b1e79be713ac90572827 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:33 +0800
Subject: [PATCH 0399/1678] rtlwifi: Add Support VHT to spec_ver

We are going to add 8822be, which is a VHT 2x2 wifi chip, so add VHT flag
to replace enumeration of chips.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c         | 6 ++++--
 drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c | 1 +
 drivers/net/wireless/realtek/rtlwifi/wifi.h         | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index e461eed32699..a2da057d3cdd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -244,6 +244,9 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
 
+	if (!(rtlpriv->cfg->spec_ver & RTL_SPEC_SUPPORT_VHT))
+		return;
+
 	if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE ||
 	    rtlhal->hw_type == HARDWARE_TYPE_RTL8822BE) {
 		u16 mcs_map;
@@ -887,8 +890,7 @@ static void _rtl_query_bandwidth_mode(struct ieee80211_hw *hw,
 
 	tcb_desc->packet_bw = HT_CHANNEL_WIDTH_20_40;
 
-	if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8812AE ||
-	    rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8821AE) {
+	if (rtlpriv->cfg->spec_ver & RTL_SPEC_SUPPORT_VHT) {
 		if (mac->opmode == NL80211_IFTYPE_AP ||
 		    mac->opmode == NL80211_IFTYPE_ADHOC ||
 		    mac->opmode == NL80211_IFTYPE_MESH_POINT) {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index ab5d462b1a3a..9bb3d9dfce79 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -328,6 +328,7 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
 	.alt_fw_name = "rtlwifi/rtl8821aefw.bin",
 	.ops = &rtl8821ae_hal_ops,
 	.mod_params = &rtl8821ae_mod_params,
+	.spec_ver = RTL_SPEC_SUPPORT_VHT,
 	.maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
 	.maps[SYS_FUNC_EN] = REG_SYS_FUNC_EN,
 	.maps[SYS_CLK] = REG_SYS_CLKR,
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 46dcb7fef195..f6fe7ca120b8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -952,6 +952,7 @@ enum package_type {
 
 enum rtl_spec_ver {
 	RTL_SPEC_NEW_RATEID = BIT(0),	/* use ratr_table_mode_new */
+	RTL_SPEC_SUPPORT_VHT = BIT(1),	/* support VHT */
 };
 
 struct octet_string {

From 5f380ceff5f5c84039cd399120c5cb11f39376a8 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:34 +0800
Subject: [PATCH 0400/1678] rtlwifi: Use 6 bits as sequence number of TX report

In new design, SW_DEFINE[1:0] of tx desc are used by firmware, and the TX
report only contains SW_DEFINE[7:0]. To satisfy with all cases, driver uses
SW_DEFINE[7:2] as sequence number.
Besides, the format of tx report have been changed, so a new flag
RTL_SPEC_EXT_C2H is used to access report.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 22 +++++++++++++++++----
 drivers/net/wireless/realtek/rtlwifi/wifi.h |  8 ++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index a2da057d3cdd..92f99a303920 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1597,7 +1597,11 @@ static u16 rtl_get_tx_report_sn(struct ieee80211_hw *hw)
 	struct rtl_tx_report *tx_report = &rtlpriv->tx_report;
 	u16 sn;
 
-	sn = atomic_inc_return(&tx_report->sn) & 0x0FFF;
+	/* SW_DEFINE[11:8] are reserved (driver fills zeros)
+	 * SW_DEFINE[7:2] are used by driver
+	 * SW_DEFINE[1:0] are reserved for firmware (driver fills zeros)
+	 */
+	sn = (atomic_inc_return(&tx_report->sn) & 0x003F) << 2;
 
 	tx_report->last_sent_sn = sn;
 	tx_report->last_sent_time = jiffies;
@@ -1625,14 +1629,23 @@ void rtl_tx_report_handler(struct ieee80211_hw *hw, u8 *tmp_buf, u8 c2h_cmd_len)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_tx_report *tx_report = &rtlpriv->tx_report;
 	u16 sn;
+	u8 st, retry;
 
-	sn = ((tmp_buf[7] & 0x0F) << 8) | tmp_buf[6];
+	if (rtlpriv->cfg->spec_ver & RTL_SPEC_EXT_C2H) {
+		sn = GET_TX_REPORT_SN_V2(tmp_buf);
+		st = GET_TX_REPORT_ST_V2(tmp_buf);
+		retry = GET_TX_REPORT_RETRY_V2(tmp_buf);
+	} else {
+		sn = GET_TX_REPORT_SN_V1(tmp_buf);
+		st = GET_TX_REPORT_ST_V1(tmp_buf);
+		retry = GET_TX_REPORT_RETRY_V1(tmp_buf);
+	}
 
 	tx_report->last_recv_sn = sn;
 
 	RT_TRACE(rtlpriv, COMP_TX_REPORT, DBG_DMESG,
 		 "Recv TX-Report st=0x%02X sn=0x%X retry=0x%X\n",
-		 tmp_buf[0], sn, tmp_buf[2]);
+		 st, sn, retry);
 }
 EXPORT_SYMBOL_GPL(rtl_tx_report_handler);
 
@@ -1646,7 +1659,8 @@ bool rtl_check_tx_report_acked(struct ieee80211_hw *hw)
 
 	if (time_before(tx_report->last_sent_time + 3 * HZ, jiffies)) {
 		RT_TRACE(rtlpriv, COMP_TX_REPORT, DBG_WARNING,
-			 "Check TX-Report timeout!!\n");
+			 "Check TX-Report timeout!! s_sn=0x%X r_sn=0x%X\n",
+			 tx_report->last_sent_sn, tx_report->last_recv_sn);
 		return true;	/* 3 sec. (timeout) seen as acked */
 	}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index f6fe7ca120b8..31ef9f72bef1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -154,6 +154,13 @@ enum rtl8192c_h2c_cmd {
 	MAX_H2CCMD
 };
 
+#define GET_TX_REPORT_SN_V1(c2h)	(c2h[6])
+#define GET_TX_REPORT_ST_V1(c2h)	(c2h[0] & 0xC0)
+#define GET_TX_REPORT_RETRY_V1(c2h)	(c2h[2] & 0x3F)
+#define GET_TX_REPORT_SN_V2(c2h)	(c2h[6])
+#define GET_TX_REPORT_ST_V2(c2h)	(c2h[7] & 0xC0)
+#define GET_TX_REPORT_RETRY_V2(c2h)	(c2h[8] & 0x3F)
+
 #define MAX_TX_COUNT			4
 #define MAX_REGULATION_NUM		4
 #define MAX_RF_PATH_NUM			4
@@ -953,6 +960,7 @@ enum package_type {
 enum rtl_spec_ver {
 	RTL_SPEC_NEW_RATEID = BIT(0),	/* use ratr_table_mode_new */
 	RTL_SPEC_SUPPORT_VHT = BIT(1),	/* support VHT */
+	RTL_SPEC_EXT_C2H = BIT(2),	/* extend FW C2H (e.g. TX REPORT) */
 };
 
 struct octet_string {

From 4a7093b914aaee8d6fd7da75789a075745bc2d29 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:35 +0800
Subject: [PATCH 0401/1678] rtlwifi: Extend tx_power_by_rate_offset size for
 newer IC

In older design, the TX power is grouped into rate section (smaller array
size), but new design groups them into rate (larger array size). Thus,
we extend the size for both cases, and add compile time assertion.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 2 ++
 drivers/net/wireless/realtek/rtlwifi/wifi.h | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 92f99a303920..c000c85a3ded 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -2646,6 +2646,8 @@ EXPORT_SYMBOL_GPL(rtl_global_var);
 
 static int __init rtl_core_module_init(void)
 {
+	BUILD_BUG_ON(TX_PWR_BY_RATE_NUM_RATE < TX_PWR_BY_RATE_NUM_SECTION);
+
 	if (rtl_rate_control_register())
 		pr_err("rtl: Unable to register rtl_rc, use default RC !!\n");
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 31ef9f72bef1..29c1289ad6b2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -174,6 +174,7 @@ enum rtl8192c_h2c_cmd {
 #define TX_PWR_BY_RATE_NUM_BAND		2
 #define TX_PWR_BY_RATE_NUM_RF		4
 #define TX_PWR_BY_RATE_NUM_SECTION	12
+#define TX_PWR_BY_RATE_NUM_RATE		84 /* >= TX_PWR_BY_RATE_NUM_SECTION */
 #define MAX_BASE_NUM_IN_PHY_REG_PG_24G  6
 #define MAX_BASE_NUM_IN_PHY_REG_PG_5G	5
 
@@ -1286,7 +1287,7 @@ struct rtl_phy {
 	u32 tx_power_by_rate_offset[TX_PWR_BY_RATE_NUM_BAND]
 				   [TX_PWR_BY_RATE_NUM_RF]
 				   [TX_PWR_BY_RATE_NUM_RF]
-				   [TX_PWR_BY_RATE_NUM_SECTION];
+				   [TX_PWR_BY_RATE_NUM_RATE];
 	u8 txpwr_by_rate_base_24g[TX_PWR_BY_RATE_NUM_RF]
 				 [TX_PWR_BY_RATE_NUM_RF]
 				 [MAX_BASE_NUM_IN_PHY_REG_PG_24G];

From 81b813ed2fde96fd28cee477281b724ea4a28dcc Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:36 +0800
Subject: [PATCH 0402/1678] rtlwifi: Add rate section and its related
 definition and comment

Add comments to make it to be easier to understand, and add compile time
assertions.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 3 +++
 drivers/net/wireless/realtek/rtlwifi/wifi.h | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index c000c85a3ded..6db3389e2ced 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -2647,6 +2647,9 @@ EXPORT_SYMBOL_GPL(rtl_global_var);
 static int __init rtl_core_module_init(void)
 {
 	BUILD_BUG_ON(TX_PWR_BY_RATE_NUM_RATE < TX_PWR_BY_RATE_NUM_SECTION);
+	BUILD_BUG_ON(MAX_RATE_SECTION_NUM != MAX_RATE_SECTION);
+	BUILD_BUG_ON(MAX_BASE_NUM_IN_PHY_REG_PG_24G != MAX_RATE_SECTION);
+	BUILD_BUG_ON(MAX_BASE_NUM_IN_PHY_REG_PG_5G != (MAX_RATE_SECTION - 1));
 
 	if (rtl_rate_control_register())
 		pr_err("rtl: Unable to register rtl_rc, use default RC !!\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 29c1289ad6b2..8dba4d9952be 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -164,7 +164,7 @@ enum rtl8192c_h2c_cmd {
 #define MAX_TX_COUNT			4
 #define MAX_REGULATION_NUM		4
 #define MAX_RF_PATH_NUM			4
-#define MAX_RATE_SECTION_NUM		6
+#define MAX_RATE_SECTION_NUM		6	/* = MAX_RATE_SECTION */
 #define MAX_2_4G_BANDWIDTH_NUM		4
 #define MAX_5G_BANDWIDTH_NUM		4
 #define	MAX_RF_PATH			4
@@ -175,8 +175,8 @@ enum rtl8192c_h2c_cmd {
 #define TX_PWR_BY_RATE_NUM_RF		4
 #define TX_PWR_BY_RATE_NUM_SECTION	12
 #define TX_PWR_BY_RATE_NUM_RATE		84 /* >= TX_PWR_BY_RATE_NUM_SECTION */
-#define MAX_BASE_NUM_IN_PHY_REG_PG_24G  6
-#define MAX_BASE_NUM_IN_PHY_REG_PG_5G	5
+#define MAX_BASE_NUM_IN_PHY_REG_PG_24G	6  /* MAX_RATE_SECTION */
+#define MAX_BASE_NUM_IN_PHY_REG_PG_5G	5  /* MAX_RATE_SECTION -1 */
 
 #define BUFDESC_SEG_NUM		1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */
 
@@ -272,6 +272,7 @@ enum rate_section {
 	HT_MCS8_MCS15,
 	VHT_1SSMCS0_1SSMCS9,
 	VHT_2SSMCS0_2SSMCS9,
+	MAX_RATE_SECTION,
 };
 
 enum intf_type {

From 50da5da1562b0bb54c537b0d218dcbbdd9843775 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:37 +0800
Subject: [PATCH 0403/1678] rtlwifi: Fix VHT NSS in RC

NSS is a argument of highest rate in RC, and it occupies bit 4-7 so use
ieee80211_rate_set_vht() to fill the values. Since it got correct rate
index, we don't need to check chips to assign NSS in set function anymore.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/rc.c | 55 +++++++++++++++++++----
 1 file changed, 46 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index d1cb7d405618..6c78c6dabbdf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c
@@ -42,6 +42,23 @@ static u8 _rtl_rc_get_highest_rix(struct rtl_priv *rtlpriv,
 	struct rtl_phy *rtlphy = &(rtlpriv->phy);
 	struct rtl_sta_info *sta_entry = NULL;
 	u16 wireless_mode = 0;
+	u8 nss;
+	struct ieee80211_tx_rate rate;
+
+	switch (get_rf_type(rtlphy)) {
+	case RF_4T4R:
+		nss = 4;
+		break;
+	case RF_3T3R:
+		nss = 3;
+		break;
+	case RF_2T2R:
+		nss = 2;
+		break;
+	default:
+		nss = 1;
+		break;
+	}
 
 	/*
 	 *this rate is no use for true rate, firmware
@@ -66,28 +83,51 @@ static u8 _rtl_rc_get_highest_rix(struct rtl_priv *rtlpriv,
 			} else if (wireless_mode == WIRELESS_MODE_G) {
 				return G_MODE_MAX_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_N_24G) {
-				if (get_rf_type(rtlphy) != RF_2T2R)
+				if (nss == 1)
 					return N_MODE_MCS7_RIX;
 				else
 					return N_MODE_MCS15_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_AC_24G) {
-				return AC_MODE_MCS9_RIX;
+				if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS8_RIX,
+							       nss);
+					goto out;
+				} else {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS9_RIX,
+							       nss);
+					goto out;
+				}
 			}
 			return 0;
 		} else {
 			if (wireless_mode == WIRELESS_MODE_A) {
 				return A_MODE_MAX_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_N_5G) {
-				if (get_rf_type(rtlphy) != RF_2T2R)
+				if (nss == 1)
 					return N_MODE_MCS7_RIX;
 				else
 					return N_MODE_MCS15_RIX;
 			} else if (wireless_mode == WIRELESS_MODE_AC_5G) {
-				return AC_MODE_MCS9_RIX;
+				if (sta->bandwidth == IEEE80211_STA_RX_BW_20) {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS8_RIX,
+							       nss);
+					goto out;
+				} else {
+					ieee80211_rate_set_vht(&rate,
+							       AC_MODE_MCS9_RIX,
+							       nss);
+					goto out;
+				}
 			}
 			return 0;
 		}
 	}
+
+out:
+	return rate.idx;
 }
 
 static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
@@ -111,9 +151,6 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
 	}
 	rate->count = tries;
 	rate->idx = rix >= 0x00 ? rix : 0x00;
-	if (rtlpriv->rtlhal.hw_type == HARDWARE_TYPE_RTL8812AE &&
-	    wireless_mode == WIRELESS_MODE_AC_5G)
-		rate->idx += 0x10;/*2NSS for 8812AE*/
 
 	if (!not_data) {
 		if (txrc->short_preamble)
@@ -126,10 +163,10 @@ static void _rtl_rc_rate_set_series(struct rtl_priv *rtlpriv,
 			if (sta && sta->vht_cap.vht_supported)
 				rate->flags |= IEEE80211_TX_RC_80_MHZ_WIDTH;
 		} else {
-			if (mac->bw_40)
-				rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 			if (mac->bw_80)
 				rate->flags |= IEEE80211_TX_RC_80_MHZ_WIDTH;
+			else if (mac->bw_40)
+				rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 		}
 
 		if (sgi_20 || sgi_40 || sgi_80)

From ed979a1ed46ba98307ce624f114056b138fbe600 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:38 +0800
Subject: [PATCH 0404/1678] rtlwifi: add definition radio_mask for RF and
 maximum bandwidth

Add rf mask definition (BIT 0, BIT 1, BIT 2, ...) that is different from
rf path definition (0, 1, 2, ...), and then combinations of rf path are
possible.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/wifi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 8dba4d9952be..a789205169fb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -287,6 +287,13 @@ enum radio_path {
 	RF90_PATH_D = 3,
 };
 
+enum radio_mask {
+	RF_MASK_A = BIT(0),
+	RF_MASK_B = BIT(1),
+	RF_MASK_C = BIT(2),
+	RF_MASK_D = BIT(3),
+};
+
 enum regulation_txpwr_lmt {
 	TXPWR_LMT_FCC = 0,
 	TXPWR_LMT_MKK = 1,
@@ -580,6 +587,7 @@ enum ht_channel_width {
 	HT_CHANNEL_WIDTH_20 = 0,
 	HT_CHANNEL_WIDTH_20_40 = 1,
 	HT_CHANNEL_WIDTH_80 = 2,
+	HT_CHANNEL_WIDTH_MAX,
 };
 
 /* Ref: 802.11i sepc D10.0 7.3.2.25.1

From 2cdd634e7e364af7104901d9f81065cddfcb96de Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:39 +0800
Subject: [PATCH 0405/1678] rtlwifi: add efuse ops for other components

The new component phydm need to access efuse content, so we prepare ops
for reference.

Signed-off-by: Tsang-Shian Lin <thlin@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/efuse.c | 13 +++++++++++++
 drivers/net/wireless/realtek/rtlwifi/efuse.h |  2 +-
 drivers/net/wireless/realtek/rtlwifi/pci.c   |  1 +
 drivers/net/wireless/realtek/rtlwifi/wifi.h  |  7 +++++++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c b/drivers/net/wireless/realtek/rtlwifi/efuse.c
index 35b50be633f1..fd13d4ef53b8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.c
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c
@@ -50,6 +50,11 @@ static const struct efuse_map RTL8712_SDIO_EFUSE_TABLE[] = {
 	{11, 0, 0, 28}
 };
 
+static const struct rtl_efuse_ops efuse_ops = {
+	.efuse_onebyte_read = efuse_one_byte_read,
+	.efuse_logical_map_read = efuse_shadow_read,
+};
+
 static void efuse_shadow_read_1byte(struct ieee80211_hw *hw, u16 offset,
 				    u8 *value);
 static void efuse_shadow_read_2byte(struct ieee80211_hw *hw, u16 offset,
@@ -1364,3 +1369,11 @@ void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen)
 	*pfwlen = fwlen;
 }
 EXPORT_SYMBOL_GPL(rtl_fill_dummy);
+
+void rtl_efuse_ops_init(struct ieee80211_hw *hw)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+
+	rtlpriv->efuse.efuse_ops = &efuse_ops;
+}
+EXPORT_SYMBOL_GPL(rtl_efuse_ops_init);
diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h b/drivers/net/wireless/realtek/rtlwifi/efuse.h
index 952fdc288f0e..dfa31c13fc7a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/efuse.h
+++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h
@@ -116,5 +116,5 @@ void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen);
 void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer,
 		       u32 size);
 void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size);
-
+void rtl_efuse_ops_init(struct ieee80211_hw *hw);
 #endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 01ccf8884831..2437422625bf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -2238,6 +2238,7 @@ int rtl_pci_probe(struct pci_dev *pdev,
 	rtlpriv->cfg = (struct rtl_hal_cfg *)(id->driver_data);
 	rtlpriv->intf_ops = &rtl_pci_ops;
 	rtlpriv->glb_var = &rtl_global_var;
+	rtl_efuse_ops_init(hw);
 
 	/* MEM map */
 	err = pci_request_regions(pdev, KBUILD_MODNAME);
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index a789205169fb..f13aa5c7c814 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -1813,6 +1813,7 @@ struct rtl_dm {
 #define	EFUSE_MAX_LOGICAL_SIZE			512
 
 struct rtl_efuse {
+	const struct rtl_efuse_ops *efuse_ops;
 	bool autoLoad_ok;
 	bool bootfromefuse;
 	u16 max_physical_size;
@@ -1918,6 +1919,12 @@ struct rtl_efuse {
 	u8 channel_plan;
 };
 
+struct rtl_efuse_ops {
+	int (*efuse_onebyte_read)(struct ieee80211_hw *hw, u16 addr, u8 *data);
+	void (*efuse_logical_map_read)(struct ieee80211_hw *hw, u8 type,
+				       u16 offset, u32 *value);
+};
+
 struct rtl_tx_report {
 	atomic_t sn;
 	u16 last_sent_sn;

From d7297a86fc86d1e373c2f26e3c1cfcf7b6129217 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:40 +0800
Subject: [PATCH 0406/1678] rtlwifi: btcoex: add routine to set default port id

Tell wifi and BT firmware the default port ID to set multiports' state
properly, but only 8822be needs this function currently.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.c  | 22 +++++++++++++++++++
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.h  |  2 ++
 drivers/net/wireless/realtek/rtlwifi/wifi.h   |  5 +++++
 3 files changed, 29 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 1404729441a2..823694cb4fdb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -1039,6 +1039,28 @@ static void halbtc_fill_h2c_cmd(void *bt_context, u8 element_id,
 					cmd_len, cmd_buf);
 }
 
+void halbtc_send_wifi_port_id_cmd(void *bt_context)
+{
+	struct btc_coexist *btcoexist = (struct btc_coexist *)bt_context;
+	struct rtl_priv *rtlpriv = btcoexist->adapter;
+	u8 cmd_buf[1] = {0};	/* port id [2:0] = 0 */
+
+	rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, H2C_BT_PORT_ID,
+					1, cmd_buf);
+}
+
+void halbtc_set_default_port_id_cmd(void *bt_context)
+{
+	struct btc_coexist *btcoexist = (struct btc_coexist *)bt_context;
+	struct rtl_priv *rtlpriv = btcoexist->adapter;
+	struct ieee80211_hw *hw = rtlpriv->mac80211.hw;
+
+	if (!rtlpriv->cfg->ops->set_default_port_id_cmd)
+		return;
+
+	rtlpriv->cfg->ops->set_default_port_id_cmd(hw);
+}
+
 static
 void halbtc_set_bt_reg(void *btc_context, u8 reg_type, u32 offset, u32 set_val)
 {
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 8ed217656539..f5d8159a88eb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -691,6 +691,8 @@ void exhalbtc_lps_leave(struct btc_coexist *btcoexist);
 void exhalbtc_low_wifi_traffic_notify(struct btc_coexist *btcoexist);
 void exhalbtc_set_single_ant_path(struct btc_coexist *btcoexist,
 				  u8 single_ant_path);
+void halbtc_send_wifi_port_id_cmd(void *bt_context);
+void halbtc_set_default_port_id_cmd(void *bt_context);
 
 /* The following are used by wifi_only case */
 enum wifionly_chip_interface {
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index f13aa5c7c814..4f48b934ec01 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -154,6 +154,10 @@ enum rtl8192c_h2c_cmd {
 	MAX_H2CCMD
 };
 
+enum {
+	H2C_BT_PORT_ID = 0x71,
+};
+
 #define GET_TX_REPORT_SN_V1(c2h)	(c2h[6])
 #define GET_TX_REPORT_ST_V1(c2h)	(c2h[0] & 0xC0)
 #define GET_TX_REPORT_RETRY_V1(c2h)	(c2h[2] & 0x3F)
@@ -2257,6 +2261,7 @@ struct rtl_hal_ops {
 	void (*bt_coex_off_before_lps) (struct ieee80211_hw *hw);
 	void (*fill_h2c_cmd) (struct ieee80211_hw *hw, u8 element_id,
 			      u32 cmd_len, u8 *p_cmdbuffer);
+	void (*set_default_port_id_cmd)(struct ieee80211_hw *hw);
 	bool (*get_btc_status) (void);
 	bool (*is_fw_header)(struct rtlwifi_firmware_header *hdr);
 	u32 (*rx_command_packet)(struct ieee80211_hw *hw,

From 94907e8d135b61011d157097c92c9f28983d96ea Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 29 Jan 2018 11:26:41 +0800
Subject: [PATCH 0407/1678] rtlwifi: btcoex: Add 8822be btcoex supported files
 for wifi only

The wifi only btcoex is used to solo card (without BT), and it is also
useful to exclude the interference with BT to make debug easier.
There are only four ops for wifi only btcoex to initialze antenna and
switch the settings while band is changed.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../rtlwifi/btcoexist/halbtc8822bwifionly.c   | 55 +++++++++++++++++++
 .../rtlwifi/btcoexist/halbtc8822bwifionly.h   | 25 +++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c
 create mode 100644 drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c
new file mode 100644
index 000000000000..951b8c1e0153
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.c
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2016-2017  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *****************************************************************************/
+#include "halbt_precomp.h"
+
+void ex_hal8822b_wifi_only_hw_config(struct wifi_only_cfg *wifionlycfg)
+{
+	/*BB control*/
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x4c, 0x01800000, 0x2);
+	/*SW control*/
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0xcb4, 0xff, 0x77);
+	/*antenna mux switch */
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x974, 0x300, 0x3);
+
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x1990, 0x300, 0x0);
+
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x80000, 0x0);
+	/*switch to WL side controller and gnt_wl gnt_bt debug signal */
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x70, 0xff000000, 0x0e);
+	/*gnt_wl=1 , gnt_bt=0*/
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x1704, 0xffffffff, 0x7700);
+	halwifionly_phy_set_bb_reg(wifionlycfg, 0x1700, 0xffffffff, 0xc00f0038);
+}
+
+void ex_hal8822b_wifi_only_scannotify(struct wifi_only_cfg *wifionlycfg,
+				      u8 is_5g)
+{
+	hal8822b_wifi_only_switch_antenna(wifionlycfg, is_5g);
+}
+
+void ex_hal8822b_wifi_only_switchbandnotify(struct wifi_only_cfg *wifionlycfg,
+					    u8 is_5g)
+{
+	hal8822b_wifi_only_switch_antenna(wifionlycfg, is_5g);
+}
+
+void hal8822b_wifi_only_switch_antenna(struct wifi_only_cfg *wifionlycfg,
+				       u8 is_5g)
+{
+	if (is_5g)
+		halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x300, 0x1);
+	else
+		halwifionly_phy_set_bb_reg(wifionlycfg, 0xcbc, 0x300, 0x2);
+}
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h
new file mode 100644
index 000000000000..6ec356542eea
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8822bwifionly.h
@@ -0,0 +1,25 @@
+/******************************************************************************
+ *
+ * Copyright(c) 2016-2017  Realtek Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ *****************************************************************************/
+#ifndef __INC_HAL8822BWIFIONLYHWCFG_H
+#define __INC_HAL8822BWIFIONLYHWCFG_H
+
+void ex_hal8822b_wifi_only_hw_config(struct wifi_only_cfg *wifionlycfg);
+void ex_hal8822b_wifi_only_scannotify(struct wifi_only_cfg *wifionlycfg,
+				      u8 is_5g);
+void ex_hal8822b_wifi_only_switchbandnotify(struct wifi_only_cfg *wifionlycfg,
+					    u8 is_5g);
+void hal8822b_wifi_only_switch_antenna(struct wifi_only_cfg *wifionlycfg,
+				       u8 is_5g);
+#endif

From 76d1f95983a0903eaff857ba1e4ec0ac9febf225 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 30 Jan 2018 18:25:37 +0000
Subject: [PATCH 0408/1678] wireless: zd1211rw: remove redundant assignment of
 pointer 'q'

Pointer q is initialized and then almost immediately afterwards being
re-assigned the same value. Remove the second redundant assignment.

Cleans up clang warning:
drivers/net/wireless/zydas/zd1211rw/zd_mac.c:503:23: warning: Value
stored to 'q' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/zydas/zd1211rw/zd_mac.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
index b785742bfd9e..b01b44a5d16e 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c
@@ -509,7 +509,6 @@ void zd_mac_tx_failed(struct urb *urb)
 	int found = 0;
 	int i, position = 0;
 
-	q = &mac->ack_wait_queue;
 	spin_lock_irqsave(&q->lock, flags);
 
 	skb_queue_walk(q, skb) {

From fb239c1209bb0f0b4830cc72507cc2f2d63fadbd Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Thu, 8 Feb 2018 16:57:12 -0800
Subject: [PATCH 0409/1678] rtlwifi: rtl8192cu: Remove variable self-assignment
 in rf.c

In _rtl92c_get_txpower_writeval_by_regulatory() the variable writeVal
is assigned to itself in an if ... else statement, apparently only to
document that the branch condition is handled and that a previously read
value should be returned unmodified. The self-assignment causes clang to
raise the following warning:

drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c:304:13:
  error: explicitly assigning value of variable of type 'u32'
    (aka 'unsigned int') to itself [-Werror,-Wself-assign]
  writeVal = writeVal;

Delete the branch with the self-assignment.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
index 9cff6bc4049c..cf551785eb08 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c
@@ -299,9 +299,6 @@ static void _rtl92c_get_txpower_writeval_by_regulatory(struct ieee80211_hw *hw,
 			writeVal = 0x00000000;
 		if (rtlpriv->dm.dynamic_txhighpower_lvl == TXHIGHPWRLEVEL_BT1)
 			writeVal = writeVal - 0x06060606;
-		else if (rtlpriv->dm.dynamic_txhighpower_lvl ==
-			 TXHIGHPWRLEVEL_BT2)
-			writeVal = writeVal;
 		*(p_outwriteval + rf) = writeVal;
 	}
 }

From dcdd54c2bcae086a5834d3f823914d7535843b17 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Feb 2018 14:24:41 +0100
Subject: [PATCH 0410/1678] rtlwifi: rtl8192cu: remove pointless memcpy

gcc-8 points out that source and destination of the memcpy() are
always the same pointer, so the effect of memcpy() is undefined
here (its arguments must not overlap):

drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c: In function '_rtl_rx_process':
drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c:430:2: error: 'memcpy' source argument is the same as destination [-Werror=restrict]

Most likely this is harmless, but it's easy to just remove the
line and get rid of the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
index ac4a82de40c7..9ab56827124e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
@@ -427,7 +427,6 @@ static void _rtl_rx_process(struct ieee80211_hw *hw, struct sk_buff *skb)
 		 (u32)hdr->addr1[0], (u32)hdr->addr1[1],
 		 (u32)hdr->addr1[2], (u32)hdr->addr1[3],
 		 (u32)hdr->addr1[4], (u32)hdr->addr1[5]);
-	memcpy(IEEE80211_SKB_RXCB(skb), rx_status, sizeof(*rx_status));
 	ieee80211_rx(hw, skb);
 }
 

From 1d5e3b90ab0160375d15dd8f322c1bfdb8a83e1e Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Sat, 10 Feb 2018 17:04:17 +0300
Subject: [PATCH 0411/1678] qtnfmac: fix releasing Tx/Rx data buffers

Add missing PCI unmap for Tx buffers and release all buffers explicitly.
Managed release using devm_add_action is not suitable for qtnfmac Tx/Rx
data buffers. The reason is in ordering and dependencies: buffers
should be released after transmission is stopped but before PCI
device resources and DMA allocations are released.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../wireless/quantenna/qtnfmac/pearl/pcie.c   | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
index 6f6190964320..be5813aa1486 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
@@ -478,10 +478,11 @@ static int alloc_rx_buffers(struct qtnf_pcie_bus_priv *priv)
 }
 
 /* all rx/tx activity should have ceased before calling this function */
-static void free_xfer_buffers(void *data)
+static void qtnf_free_xfer_buffers(struct qtnf_pcie_bus_priv *priv)
 {
-	struct qtnf_pcie_bus_priv *priv = (struct qtnf_pcie_bus_priv *)data;
+	struct qtnf_tx_bd *txbd;
 	struct qtnf_rx_bd *rxbd;
+	struct sk_buff *skb;
 	dma_addr_t paddr;
 	int i;
 
@@ -489,19 +490,26 @@ static void free_xfer_buffers(void *data)
 	for (i = 0; i < priv->rx_bd_num; i++) {
 		if (priv->rx_skb && priv->rx_skb[i]) {
 			rxbd = &priv->rx_bd_vbase[i];
+			skb = priv->rx_skb[i];
 			paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
 					      le32_to_cpu(rxbd->addr));
 			pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
 					 PCI_DMA_FROMDEVICE);
-
-			dev_kfree_skb_any(priv->rx_skb[i]);
+			dev_kfree_skb_any(skb);
+			priv->rx_skb[i] = NULL;
 		}
 	}
 
 	/* free tx buffers */
 	for (i = 0; i < priv->tx_bd_num; i++) {
 		if (priv->tx_skb && priv->tx_skb[i]) {
-			dev_kfree_skb_any(priv->tx_skb[i]);
+			txbd = &priv->tx_bd_vbase[i];
+			skb = priv->tx_skb[i];
+			paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
+					      le32_to_cpu(txbd->addr));
+			pci_unmap_single(priv->pdev, paddr, skb->len,
+					 PCI_DMA_TODEVICE);
+			dev_kfree_skb_any(skb);
 			priv->tx_skb[i] = NULL;
 		}
 	}
@@ -1321,12 +1329,6 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_base;
 	}
 
-	ret = devm_add_action(&pdev->dev, free_xfer_buffers, (void *)pcie_priv);
-	if (ret) {
-		pr_err("custom release callback init failed\n");
-		goto err_base;
-	}
-
 	ret = qtnf_pcie_init_xfer(pcie_priv);
 	if (ret) {
 		pr_err("PCIE xfer init failed\n");
@@ -1343,7 +1345,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 			       "qtnf_pcie_irq", (void *)bus);
 	if (ret) {
 		pr_err("failed to request pcie irq %d\n", pdev->irq);
-		goto err_base;
+		goto err_xfer;
 	}
 
 	tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
@@ -1387,6 +1389,9 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 err_bringup_fw:
 	netif_napi_del(&bus->mux_napi);
 
+err_xfer:
+	qtnf_free_xfer_buffers(pcie_priv);
+
 err_base:
 	flush_workqueue(pcie_priv->workqueue);
 	destroy_workqueue(pcie_priv->workqueue);
@@ -1416,6 +1421,7 @@ static void qtnf_pcie_remove(struct pci_dev *pdev)
 	destroy_workqueue(priv->workqueue);
 	tasklet_kill(&priv->reclaim_tq);
 
+	qtnf_free_xfer_buffers(priv);
 	qtnf_debugfs_remove(bus);
 
 	qtnf_pcie_free_shm_ipc(priv);

From a34d7bcb73f553f1ea65eeff744c66e77b167175 Mon Sep 17 00:00:00 2001
From: Sergei Maksimenko <smaksimenko@quantenna.com>
Date: Sat, 10 Feb 2018 17:04:18 +0300
Subject: [PATCH 0412/1678] qtnfmac: enable reloading of qtnfmac kernel modules

This patch enables rmmod/insmod for qtnfmac kernel modules:
- do not 'pin' pci device in order to disable it on module unload
- implement card reset procedure
- restore PCI bar addresses for restarted wireless card

Signed-off-by: Sergei Maksimenko <smaksimenko@quantenna.com>
Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/quantenna/qtnfmac/pearl/pcie.c   | 15 ++++++++++++++-
 .../wireless/quantenna/qtnfmac/pearl/pcie_ipc.h   |  1 +
 .../quantenna/qtnfmac/pearl/pcie_regs_pearl.h     |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
index be5813aa1486..7aa222286d8e 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
@@ -162,6 +162,17 @@ static void qtnf_deassert_intx(struct qtnf_pcie_bus_priv *priv)
 	qtnf_non_posted_write(cfg, reg);
 }
 
+static void qtnf_reset_card(struct qtnf_pcie_bus_priv *priv)
+{
+	const u32 data = QTN_PEARL_IPC_IRQ_WORD(QTN_PEARL_LHOST_EP_RESET);
+	void __iomem *reg = priv->sysctl_bar +
+			    QTN_PEARL_SYSCTL_LHOST_IRQ_OFFSET;
+
+	qtnf_non_posted_write(data, reg);
+	msleep(QTN_EP_RESET_WAIT_MS);
+	pci_restore_state(priv->pdev);
+}
+
 static void qtnf_ipc_gen_ep_int(void *arg)
 {
 	const struct qtnf_pcie_bus_priv *priv = arg;
@@ -1308,7 +1319,6 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_base;
 	}
 
-	pcim_pin_device(pdev);
 	pci_set_master(pdev);
 
 	ret = qtnf_pcie_init_irq(pcie_priv);
@@ -1323,6 +1333,8 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_base;
 	}
 
+	pci_save_state(pdev);
+
 	ret = qtnf_pcie_init_shm_ipc(pcie_priv);
 	if (ret < 0) {
 		pr_err("PCIE SHM IPC init failed\n");
@@ -1425,6 +1437,7 @@ static void qtnf_pcie_remove(struct pci_dev *pdev)
 	qtnf_debugfs_remove(bus);
 
 	qtnf_pcie_free_shm_ipc(priv);
+	qtnf_reset_card(priv);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
index c5a4e46d26ef..00bb21a1c47a 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
@@ -46,6 +46,7 @@
 /* state transition timeouts */
 #define QTN_FW_DL_TIMEOUT_MS	3000
 #define QTN_FW_QLINK_TIMEOUT_MS	30000
+#define QTN_EP_RESET_WAIT_MS	1000
 
 #define PCIE_HDP_INT_RX_BITS (0		\
 	| PCIE_HDP_INT_EP_TXDMA		\
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
index 5b48b425fa7f..0bfe285b6b48 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
@@ -351,5 +351,6 @@
 
 #define QTN_PEARL_IPC_IRQ_WORD(irq)	(BIT(irq) | BIT(irq + 16))
 #define QTN_PEARL_LHOST_IPC_IRQ		(6)
+#define QTN_PEARL_LHOST_EP_RESET	(7)
 
 #endif /* __PEARL_PCIE_H */

From c3b2f7ca4186cad3584915eba10d2085c3eed029 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Sat, 10 Feb 2018 17:04:19 +0300
Subject: [PATCH 0413/1678] qtnfmac: implement asynchronous firmware loading

In pci probe() function start firmware loading, protocol handshake
and driver core initialization, and not wait for completion.

Signed-off-by: Sergei Maksimenko <smaksimenko@quantenna.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/quantenna/qtnfmac/bus.h  |   3 +-
 .../wireless/quantenna/qtnfmac/pearl/pcie.c   | 387 +++++++++---------
 2 files changed, 186 insertions(+), 204 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/bus.h b/drivers/net/wireless/quantenna/qtnfmac/bus.h
index 56e5fed92a2a..0a1604683bab 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/bus.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/bus.h
@@ -59,8 +59,9 @@ struct qtnf_bus {
 	char fwname[32];
 	struct napi_struct mux_napi;
 	struct net_device mux_dev;
-	struct completion request_firmware_complete;
+	struct completion firmware_init_complete;
 	struct workqueue_struct *workqueue;
+	struct work_struct fw_work;
 	struct work_struct event_work;
 	struct mutex bus_lock; /* lock during command/event processing */
 	struct dentry *dbg_dir;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
index 7aa222286d8e..f117904d9120 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
@@ -127,7 +127,7 @@ static inline void qtnf_dis_txdone_irq(struct qtnf_pcie_bus_priv *priv)
 	spin_unlock_irqrestore(&priv->irq_lock, flags);
 }
 
-static int qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
+static void qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
 {
 	struct pci_dev *pdev = priv->pdev;
 
@@ -148,8 +148,6 @@ static int qtnf_pcie_init_irq(struct qtnf_pcie_bus_priv *priv)
 		pr_warn("legacy PCIE interrupts enabled\n");
 		pci_intx(pdev, 1);
 	}
-
-	return 0;
 }
 
 static void qtnf_deassert_intx(struct qtnf_pcie_bus_priv *priv)
@@ -956,6 +954,98 @@ static const struct qtnf_bus_ops qtnf_pcie_bus_ops = {
 	.data_rx_stop		= qtnf_pcie_data_rx_stop,
 };
 
+static int qtnf_dbg_mps_show(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "%d\n", priv->mps);
+
+	return 0;
+}
+
+static int qtnf_dbg_msi_show(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "%u\n", priv->msi_enabled);
+
+	return 0;
+}
+
+static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+	u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
+	u32 status;
+
+	seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
+	seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
+	status = reg &  PCIE_HDP_INT_TX_BITS;
+	seq_printf(s, "pcie_irq_tx_status(%s)\n",
+		   (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
+	seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
+	status = reg &  PCIE_HDP_INT_RX_BITS;
+	seq_printf(s, "pcie_irq_rx_status(%s)\n",
+		   (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
+	seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
+	status = reg &  PCIE_HDP_INT_HHBM_UF;
+	seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
+		   (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
+
+	return 0;
+}
+
+static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
+	seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
+	seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
+	seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
+
+	seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
+	seq_printf(s, "tx_bd_p_index(%u)\n",
+		   readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->tx_bd_num - 1));
+	seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
+	seq_printf(s, "tx queue len(%u)\n",
+		   CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
+			    priv->tx_bd_num));
+
+	seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
+	seq_printf(s, "rx_bd_p_index(%u)\n",
+		   readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->rx_bd_num - 1));
+	seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
+	seq_printf(s, "rx alloc queue len(%u)\n",
+		   CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
+			      priv->rx_bd_num));
+
+	return 0;
+}
+
+static int qtnf_dbg_shm_stats(struct seq_file *s, void *data)
+{
+	struct qtnf_bus *bus = dev_get_drvdata(s->private);
+	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+
+	seq_printf(s, "shm_ipc_ep_in.tx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_in.tx_packet_count);
+	seq_printf(s, "shm_ipc_ep_in.rx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_in.rx_packet_count);
+	seq_printf(s, "shm_ipc_ep_out.tx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_out.tx_timeout_count);
+	seq_printf(s, "shm_ipc_ep_out.rx_packet_count(%zu)\n",
+		   priv->shm_ipc_ep_out.rx_packet_count);
+
+	return 0;
+}
+
 static int qtnf_ep_fw_send(struct qtnf_pcie_bus_priv *priv, uint32_t size,
 			   int blk, const u8 *pblk, const u8 *fw)
 {
@@ -1071,81 +1161,94 @@ qtnf_ep_fw_load(struct qtnf_pcie_bus_priv *priv, const u8 *fw, u32 fw_size)
 	return 0;
 }
 
-static void qtnf_firmware_load(const struct firmware *fw, void *context)
-{
-	struct qtnf_pcie_bus_priv *priv = (void *)context;
-	struct pci_dev *pdev = priv->pdev;
-	struct qtnf_bus *bus = pci_get_drvdata(pdev);
-	int ret;
-
-	if (!fw) {
-		pr_err("failed to get firmware %s\n", bus->fwname);
-		goto fw_load_err;
-	}
-
-	ret = qtnf_ep_fw_load(priv, fw->data, fw->size);
-	if (ret) {
-		pr_err("FW upload error\n");
-		goto fw_load_err;
-	}
-
-	if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
-			    QTN_FW_DL_TIMEOUT_MS)) {
-		pr_err("FW bringup timed out\n");
-		goto fw_load_err;
-	}
-
-	bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
-	pr_info("firmware is up and running\n");
-
-fw_load_err:
-
-	if (fw)
-		release_firmware(fw);
-
-	complete(&bus->request_firmware_complete);
-}
-
-static int qtnf_bringup_fw(struct qtnf_bus *bus)
+static void qtnf_fw_work_handler(struct work_struct *work)
 {
+	struct qtnf_bus *bus = container_of(work, struct qtnf_bus, fw_work);
 	struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
 	struct pci_dev *pdev = priv->pdev;
+	const struct firmware *fw;
 	int ret;
 	u32 state = QTN_RC_FW_LOADRDY | QTN_RC_FW_QLINK;
 
-	if (flashboot)
+	if (flashboot) {
 		state |= QTN_RC_FW_FLASHBOOT;
+	} else {
+		ret = request_firmware(&fw, bus->fwname, &pdev->dev);
+		if (ret < 0) {
+			pr_err("failed to get firmware %s\n", bus->fwname);
+			goto fw_load_fail;
+		}
+	}
 
 	qtnf_set_state(&priv->bda->bda_rc_state, state);
 
 	if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_LOADRDY,
 			    QTN_FW_DL_TIMEOUT_MS)) {
 		pr_err("card is not ready\n");
-		return -ETIMEDOUT;
+		goto fw_load_fail;
 	}
 
 	qtnf_clear_state(&priv->bda->bda_ep_state, QTN_EP_FW_LOADRDY);
 
 	if (flashboot) {
-		pr_info("Booting FW from flash\n");
+		pr_info("booting firmware from flash\n");
+	} else {
+		pr_info("starting firmware upload: %s\n", bus->fwname);
 
-		if (!qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
-				     QTN_FW_DL_TIMEOUT_MS))
-			bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
-
-		return 0;
+		ret = qtnf_ep_fw_load(priv, fw->data, fw->size);
+		release_firmware(fw);
+		if (ret) {
+			pr_err("firmware upload error\n");
+			goto fw_load_fail;
+		}
 	}
 
-	pr_info("starting firmware upload: %s\n", bus->fwname);
+	if (qtnf_poll_state(&priv->bda->bda_ep_state, QTN_EP_FW_DONE,
+			    QTN_FW_DL_TIMEOUT_MS)) {
+		pr_err("firmware bringup timed out\n");
+		goto fw_load_fail;
+	}
 
-	ret = request_firmware_nowait(THIS_MODULE, 1, bus->fwname, &pdev->dev,
-				      GFP_KERNEL, priv, qtnf_firmware_load);
-	if (ret < 0)
-		pr_err("request_firmware_nowait error %d\n", ret);
-	else
-		ret = 1;
+	bus->fw_state = QTNF_FW_STATE_FW_DNLD_DONE;
+	pr_info("firmware is up and running\n");
 
-	return ret;
+	if (qtnf_poll_state(&priv->bda->bda_ep_state,
+			    QTN_EP_FW_QLINK_DONE, QTN_FW_QLINK_TIMEOUT_MS)) {
+		pr_err("firmware runtime failure\n");
+		goto fw_load_fail;
+	}
+
+	ret = qtnf_core_attach(bus);
+	if (ret) {
+		pr_err("failed to attach core\n");
+		goto fw_load_fail;
+	}
+
+	qtnf_debugfs_init(bus, DRV_NAME);
+	qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
+	qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
+	qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
+	qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
+	qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
+
+	goto fw_load_exit;
+
+fw_load_fail:
+	bus->fw_state = QTNF_FW_STATE_DEAD;
+
+fw_load_exit:
+	complete(&bus->firmware_init_complete);
+	put_device(&pdev->dev);
+}
+
+static void qtnf_bringup_fw_async(struct qtnf_bus *bus)
+{
+	struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
+	struct pci_dev *pdev = priv->pdev;
+
+	get_device(&pdev->dev);
+	INIT_WORK(&bus->fw_work, qtnf_fw_work_handler);
+	schedule_work(&bus->fw_work);
 }
 
 static void qtnf_reclaim_tasklet_fn(unsigned long data)
@@ -1156,98 +1259,6 @@ static void qtnf_reclaim_tasklet_fn(unsigned long data)
 	qtnf_en_txdone_irq(priv);
 }
 
-static int qtnf_dbg_mps_show(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-	seq_printf(s, "%d\n", priv->mps);
-
-	return 0;
-}
-
-static int qtnf_dbg_msi_show(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-	seq_printf(s, "%u\n", priv->msi_enabled);
-
-	return 0;
-}
-
-static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-	u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
-	u32 status;
-
-	seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
-	seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
-	status = reg &  PCIE_HDP_INT_TX_BITS;
-	seq_printf(s, "pcie_irq_tx_status(%s)\n",
-		   (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
-	seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
-	status = reg &  PCIE_HDP_INT_RX_BITS;
-	seq_printf(s, "pcie_irq_rx_status(%s)\n",
-		   (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
-	seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
-	status = reg &  PCIE_HDP_INT_HHBM_UF;
-	seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
-		   (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
-
-	return 0;
-}
-
-static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-	seq_printf(s, "tx_full_count(%u)\n", priv->tx_full_count);
-	seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
-	seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
-	seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
-
-	seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
-	seq_printf(s, "tx_bd_p_index(%u)\n",
-		   readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
-			& (priv->tx_bd_num - 1));
-	seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
-	seq_printf(s, "tx queue len(%u)\n",
-		   CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
-			    priv->tx_bd_num));
-
-	seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
-	seq_printf(s, "rx_bd_p_index(%u)\n",
-		   readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
-			& (priv->rx_bd_num - 1));
-	seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
-	seq_printf(s, "rx alloc queue len(%u)\n",
-		   CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
-			      priv->rx_bd_num));
-
-	return 0;
-}
-
-static int qtnf_dbg_shm_stats(struct seq_file *s, void *data)
-{
-	struct qtnf_bus *bus = dev_get_drvdata(s->private);
-	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
-
-	seq_printf(s, "shm_ipc_ep_in.tx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_in.tx_packet_count);
-	seq_printf(s, "shm_ipc_ep_in.rx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_in.rx_packet_count);
-	seq_printf(s, "shm_ipc_ep_out.tx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_out.tx_timeout_count);
-	seq_printf(s, "shm_ipc_ep_out.rx_packet_count(%zu)\n",
-		   priv->shm_ipc_ep_out.rx_packet_count);
-
-	return 0;
-}
-
 static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct qtnf_pcie_bus_priv *pcie_priv;
@@ -1256,10 +1267,8 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	bus = devm_kzalloc(&pdev->dev,
 			   sizeof(*bus) + sizeof(*pcie_priv), GFP_KERNEL);
-	if (!bus) {
-		ret = -ENOMEM;
-		goto err_init;
-	}
+	if (!bus)
+		return -ENOMEM;
 
 	pcie_priv = get_bus_priv(bus);
 
@@ -1270,7 +1279,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pcie_priv->pdev = pdev;
 
 	strcpy(bus->fwname, QTN_PCI_PEARL_FW_NAME);
-	init_completion(&bus->request_firmware_complete);
+	init_completion(&bus->firmware_init_complete);
 	mutex_init(&bus->bus_lock);
 	spin_lock_init(&pcie_priv->tx0_lock);
 	spin_lock_init(&pcie_priv->irq_lock);
@@ -1286,11 +1295,18 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pcie_priv->tx_reclaim_done = 0;
 	pcie_priv->tx_reclaim_req = 0;
 
+	tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
+		     (unsigned long)pcie_priv);
+
+	init_dummy_netdev(&bus->mux_dev);
+	netif_napi_add(&bus->mux_dev, &bus->mux_napi,
+		       qtnf_rx_poll, 10);
+
 	pcie_priv->workqueue = create_singlethread_workqueue("QTNF_PEARL_PCIE");
 	if (!pcie_priv->workqueue) {
 		pr_err("failed to alloc bus workqueue\n");
 		ret = -ENODEV;
-		goto err_priv;
+		goto err_init;
 	}
 
 	if (!pci_is_pcie(pdev)) {
@@ -1320,12 +1336,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	pci_set_master(pdev);
-
-	ret = qtnf_pcie_init_irq(pcie_priv);
-	if (ret < 0) {
-		pr_err("irq init failed\n");
-		goto err_base;
-	}
+	qtnf_pcie_init_irq(pcie_priv);
 
 	ret = qtnf_pcie_init_memory(pcie_priv);
 	if (ret < 0) {
@@ -1344,7 +1355,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	ret = qtnf_pcie_init_xfer(pcie_priv);
 	if (ret) {
 		pr_err("PCIE xfer init failed\n");
-		goto err_base;
+		goto err_ipc;
 	}
 
 	/* init default irq settings */
@@ -1360,58 +1371,25 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_xfer;
 	}
 
-	tasklet_init(&pcie_priv->reclaim_tq, qtnf_reclaim_tasklet_fn,
-		     (unsigned long)pcie_priv);
-	init_dummy_netdev(&bus->mux_dev);
-	netif_napi_add(&bus->mux_dev, &bus->mux_napi,
-		       qtnf_rx_poll, 10);
-
-	ret = qtnf_bringup_fw(bus);
-	if (ret < 0)
-		goto err_bringup_fw;
-	else if (ret)
-		wait_for_completion(&bus->request_firmware_complete);
-
-	if (bus->fw_state != QTNF_FW_STATE_FW_DNLD_DONE) {
-		pr_err("failed to start FW\n");
-		goto err_bringup_fw;
-	}
-
-	if (qtnf_poll_state(&pcie_priv->bda->bda_ep_state, QTN_EP_FW_QLINK_DONE,
-			    QTN_FW_QLINK_TIMEOUT_MS)) {
-		pr_err("FW runtime failure\n");
-		goto err_bringup_fw;
-	}
-
-	ret = qtnf_core_attach(bus);
-	if (ret) {
-		pr_err("failed to attach core\n");
-		goto err_bringup_fw;
-	}
-
-	qtnf_debugfs_init(bus, DRV_NAME);
-	qtnf_debugfs_add_entry(bus, "mps", qtnf_dbg_mps_show);
-	qtnf_debugfs_add_entry(bus, "msi_enabled", qtnf_dbg_msi_show);
-	qtnf_debugfs_add_entry(bus, "hdp_stats", qtnf_dbg_hdp_stats);
-	qtnf_debugfs_add_entry(bus, "irq_stats", qtnf_dbg_irq_stats);
-	qtnf_debugfs_add_entry(bus, "shm_stats", qtnf_dbg_shm_stats);
+	qtnf_bringup_fw_async(bus);
 
 	return 0;
 
-err_bringup_fw:
-	netif_napi_del(&bus->mux_napi);
-
 err_xfer:
 	qtnf_free_xfer_buffers(pcie_priv);
 
+err_ipc:
+	qtnf_pcie_free_shm_ipc(pcie_priv);
+
 err_base:
 	flush_workqueue(pcie_priv->workqueue);
 	destroy_workqueue(pcie_priv->workqueue);
-
-err_priv:
-	pci_set_drvdata(pdev, NULL);
+	netif_napi_del(&bus->mux_napi);
 
 err_init:
+	tasklet_kill(&pcie_priv->reclaim_tq);
+	pci_set_drvdata(pdev, NULL);
+
 	return ret;
 }
 
@@ -1424,11 +1402,14 @@ static void qtnf_pcie_remove(struct pci_dev *pdev)
 	if (!bus)
 		return;
 
+	wait_for_completion(&bus->firmware_init_complete);
+
+	if (bus->fw_state == QTNF_FW_STATE_ACTIVE)
+		qtnf_core_detach(bus);
+
 	priv = get_bus_priv(bus);
 
-	qtnf_core_detach(bus);
 	netif_napi_del(&bus->mux_napi);
-
 	flush_workqueue(priv->workqueue);
 	destroy_workqueue(priv->workqueue);
 	tasklet_kill(&priv->reclaim_tq);

From 864164683678e27c931b5909c72a001b1b943f36 Mon Sep 17 00:00:00 2001
From: Xinming Hu <huxm@marvell.com>
Date: Tue, 13 Feb 2018 14:10:15 +0800
Subject: [PATCH 0414/1678] mwifiex: set different mac address for interfaces
 with same bss type

Multiple interfaces with same bss type could affect each other if
they are sharing the same mac address. In this patch, different
mac address is assigned to new interface which have same bss type
with exist interfaces.

Signed-off-by: Xinming Hu <huxm@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/main.c | 24 ++++++++++++++++-----
 drivers/net/wireless/marvell/mwifiex/main.h | 13 +++++++++++
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 12e739950332..b6484582845a 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -943,13 +943,26 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
 			    struct net_device *dev)
 {
 	int ret;
-	u64 mac_addr;
+	u64 mac_addr, old_mac_addr;
 
-	if (priv->bss_type != MWIFIEX_BSS_TYPE_P2P)
-		goto done;
+	if (priv->bss_type == MWIFIEX_BSS_TYPE_ANY)
+		return -ENOTSUPP;
 
 	mac_addr = ether_addr_to_u64(priv->curr_addr);
-	mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+	old_mac_addr = mac_addr;
+
+	if (priv->bss_type == MWIFIEX_BSS_TYPE_P2P)
+		mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+
+	if (mwifiex_get_intf_num(priv->adapter, priv->bss_type) > 1) {
+		/* Set mac address based on bss_type/bss_num */
+		mac_addr ^= BIT_ULL(priv->bss_type + 8);
+		mac_addr += priv->bss_num;
+	}
+
+	if (mac_addr == old_mac_addr)
+		goto done;
+
 	u64_to_ether_addr(mac_addr, priv->curr_addr);
 
 	/* Send request to firmware */
@@ -957,13 +970,14 @@ int mwifiex_set_mac_address(struct mwifiex_private *priv,
 			       HostCmd_ACT_GEN_SET, 0, NULL, true);
 
 	if (ret) {
+		u64_to_ether_addr(old_mac_addr, priv->curr_addr);
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "set mac address failed: ret=%d\n", ret);
 		return ret;
 	}
 
 done:
-	memcpy(dev->dev_addr, priv->curr_addr, ETH_ALEN);
+	ether_addr_copy(dev->dev_addr, priv->curr_addr);
 	return 0;
 }
 
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 6b5539b1f4d8..58e6ae5415e7 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -1280,6 +1280,19 @@ mwifiex_copy_rates(u8 *dest, u32 pos, u8 *src, int len)
 	return pos;
 }
 
+/* This function return interface number with the same bss_type.
+ */
+static inline u8
+mwifiex_get_intf_num(struct mwifiex_adapter *adapter, u8 bss_type)
+{
+	u8 i, num = 0;
+
+	for (i = 0; i < adapter->priv_num; i++)
+		if (adapter->priv[i] && adapter->priv[i]->bss_type == bss_type)
+			num++;
+	return num;
+}
+
 /*
  * This function returns the correct private structure pointer based
  * upon the BSS type and BSS number.

From da472385a29f1fddcac7cfa0499482704310bd16 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:18 +0100
Subject: [PATCH 0415/1678] brcmfmac: move brcmf_bus_preinit() call just after
 changing bus state

Moving the brcmf_bus_preinit() call allows the bus code to do some
required initialization before handling firmware control messages.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c | 3 ---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c   | 5 +++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 9be0b051066a..70ef9835b647 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -365,9 +365,6 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 
 	/* Enable tx beamforming, errors can be ignored (not supported) */
 	(void)brcmf_fil_iovar_int_set(ifp, "txbf", 1);
-
-	/* do bus specific preinit here */
-	err = brcmf_bus_preinit(ifp->drvr->bus_if);
 done:
 	return err;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 930e423f83a8..6dd6da7b9481 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -1013,6 +1013,11 @@ int brcmf_bus_started(struct device *dev)
 	/* signal bus ready */
 	brcmf_bus_change_state(bus_if, BRCMF_BUS_UP);
 
+	/* do bus specific preinit here */
+	ret = brcmf_bus_preinit(ifp->drvr->bus_if);
+	if (ret < 0)
+		goto fail;
+
 	/* Bus is ready, do any initialization */
 	ret = brcmf_c_preinit_dcmds(ifp);
 	if (ret < 0)

From 4b5adc736828dc25ca33e263ad8c0b9dcd3bf325 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:19 +0100
Subject: [PATCH 0416/1678] brcmfmac: move allocation of control rx buffer to
 brcmf_sdio_bus_preinit()

Allocate the control rx buffer needed for firmware control interface
during brcmf_sdio_bus_preinit(). This relies on common layer setting
struct brcmf_bus::maxctl during brcmf_attach(). By moving the allocation
we can move brcmf_attach() in subsequent change.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 08686147b59d..9ea525e27388 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -1707,7 +1707,6 @@ brcmf_sdio_read_control(struct brcmf_sdio *bus, u8 *hdr, uint len, uint doff)
 	int sdret;
 
 	brcmf_dbg(TRACE, "Enter\n");
-
 	if (bus->rxblen)
 		buf = vzalloc(bus->rxblen);
 	if (!buf)
@@ -3411,6 +3410,18 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
 	u32 value;
 	int err;
 
+	/* maxctl provided by common layer */
+	if (WARN_ON(!bus_if->maxctl))
+		return -EINVAL;
+
+	/* Allocate control receive buffer */
+	bus_if->maxctl += bus->roundup;
+	value = roundup((bus_if->maxctl + SDPCM_HDRLEN), ALIGNMENT);
+	value += bus->head_align;
+	bus->rxbuf = kmalloc(value, GFP_ATOMIC);
+	if (bus->rxbuf)
+		bus->rxblen = value;
+
 	/* the commands below use the terms tx and rx from
 	 * a device perspective, ie. bus:txglom affects the
 	 * bus transfers from device to host.
@@ -4208,19 +4219,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	bus->blocksize = bus->sdiodev->func2->cur_blksize;
 	bus->roundup = min(max_roundup, bus->blocksize);
 
-	/* Allocate buffers */
-	if (bus->sdiodev->bus_if->maxctl) {
-		bus->sdiodev->bus_if->maxctl += bus->roundup;
-		bus->rxblen =
-		    roundup((bus->sdiodev->bus_if->maxctl + SDPCM_HDRLEN),
-			    ALIGNMENT) + bus->head_align;
-		bus->rxbuf = kmalloc(bus->rxblen, GFP_ATOMIC);
-		if (!(bus->rxbuf)) {
-			brcmf_err("rxbuf allocation failed\n");
-			goto fail;
-		}
-	}
-
 	sdio_claim_host(bus->sdiodev->func1);
 
 	/* Disable F2 to clear any intermediate frame state on the dongle */

From 262f2b53f67936b59cc8dfc6f3899ab8905bf1ed Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:20 +0100
Subject: [PATCH 0417/1678] brcmfmac: call brcmf_attach() just before calling
 brcmf_bus_started()

Now we can move brcmf_attach() until after the firmware has been downloaded
to the device. Make the call just before brcmf_bus_started().

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/core.c        |  6 ++++
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 34 +++++++++----------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 6dd6da7b9481..e553d1abceb3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -1190,6 +1190,12 @@ void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state)
 	int ifidx;
 
 	brcmf_dbg(TRACE, "%d -> %d\n", bus->state, state);
+
+	if (!drvr) {
+		brcmf_dbg(INFO, "ignoring transition, bus not attached yet\n");
+		return;
+	}
+
 	bus->state = state;
 
 	if (state == BRCMF_BUS_UP) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 9ea525e27388..0f1e4528fe0d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4048,9 +4048,6 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	if (err)
 		goto fail;
 
-	if (!bus_if->drvr)
-		return;
-
 	/* try to download image and nvram to the dongle */
 	bus->alp_only = true;
 	err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4126,11 +4123,28 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 
 	sdio_release_host(sdiodev->func1);
 
+	/* Assign bus interface call back */
+	sdiodev->bus_if->dev = sdiodev->dev;
+	sdiodev->bus_if->ops = &brcmf_sdio_bus_ops;
+	sdiodev->bus_if->chip = bus->ci->chip;
+	sdiodev->bus_if->chiprev = bus->ci->chiprev;
+
+	/* Attach to the common layer, reserve hdr space */
+	err = brcmf_attach(sdiodev->dev, sdiodev->settings);
+	if (err != 0) {
+		brcmf_err("brcmf_attach failed\n");
+		goto fail;
+	}
+
+	brcmf_sdio_debugfs_create(bus);
+
 	err = brcmf_bus_started(dev);
 	if (err != 0) {
 		brcmf_err("dongle is not responding\n");
 		goto fail;
 	}
+
+	/* ready */
 	return;
 
 release:
@@ -4199,22 +4213,9 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	bus->dpc_triggered = false;
 	bus->dpc_running = false;
 
-	/* Assign bus interface call back */
-	bus->sdiodev->bus_if->dev = bus->sdiodev->dev;
-	bus->sdiodev->bus_if->ops = &brcmf_sdio_bus_ops;
-	bus->sdiodev->bus_if->chip = bus->ci->chip;
-	bus->sdiodev->bus_if->chiprev = bus->ci->chiprev;
-
 	/* default sdio bus header length for tx packet */
 	bus->tx_hdrlen = SDPCM_HWHDR_LEN + SDPCM_SWHDR_LEN;
 
-	/* Attach to the common layer, reserve hdr space */
-	ret = brcmf_attach(bus->sdiodev->dev, bus->sdiodev->settings);
-	if (ret != 0) {
-		brcmf_err("brcmf_attach failed\n");
-		goto fail;
-	}
-
 	/* Query the F2 block size, set roundup accordingly */
 	bus->blocksize = bus->sdiodev->func2->cur_blksize;
 	bus->roundup = min(max_roundup, bus->blocksize);
@@ -4239,7 +4240,6 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	/* SR state */
 	bus->sr_enabled = false;
 
-	brcmf_sdio_debugfs_create(bus);
 	brcmf_dbg(INFO, "completed!!\n");
 
 	ret = brcmf_fw_map_chip_to_name(bus->ci->chip, bus->ci->chiprev,

From a7f4a80c0070b673d4a4ce94b99979ea6d0c6296 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:21 +0100
Subject: [PATCH 0418/1678] brcmfmac: usb: call brcmf_usb_up() during
 brcmf_bus_preinit()

By calling brcmf_usb_up() during brcmf_bus_preinit() it does not need
to be called in brcmf_usb_bus_setup().

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index b27170c12482..d22cd1662da6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1146,8 +1146,9 @@ static int brcmf_usb_get_fwname(struct device *dev, u32 chip, u32 chiprev,
 }
 
 static const struct brcmf_bus_ops brcmf_usb_bus_ops = {
-	.txdata = brcmf_usb_tx,
+	.preinit = brcmf_usb_up,
 	.stop = brcmf_usb_down,
+	.txdata = brcmf_usb_tx,
 	.txctl = brcmf_usb_tx_ctlpkt,
 	.rxctl = brcmf_usb_rx_ctlpkt,
 	.wowl_config = brcmf_usb_wowl_config,
@@ -1165,10 +1166,6 @@ static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo)
 		return ret;
 	}
 
-	ret = brcmf_usb_up(devinfo->dev);
-	if (ret)
-		goto fail;
-
 	ret = brcmf_bus_started(devinfo->dev);
 	if (ret)
 		goto fail;

From 0542503c4c164c65cd1567b0f2b3f887af6c81eb Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:22 +0100
Subject: [PATCH 0419/1678] brcmfmac: move brcmf_attach() function in core.c

Moving the function in preparation of subsequent patch.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/core.c        | 98 +++++++++----------
 1 file changed, 49 insertions(+), 49 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index e553d1abceb3..9eab7a94747e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -914,55 +914,6 @@ static int brcmf_inet6addr_changed(struct notifier_block *nb,
 }
 #endif
 
-int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
-{
-	struct brcmf_pub *drvr = NULL;
-	int ret = 0;
-	int i;
-
-	brcmf_dbg(TRACE, "Enter\n");
-
-	/* Allocate primary brcmf_info */
-	drvr = kzalloc(sizeof(struct brcmf_pub), GFP_ATOMIC);
-	if (!drvr)
-		return -ENOMEM;
-
-	for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
-		drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
-
-	mutex_init(&drvr->proto_block);
-
-	/* Link to bus module */
-	drvr->hdrlen = 0;
-	drvr->bus_if = dev_get_drvdata(dev);
-	drvr->bus_if->drvr = drvr;
-	drvr->settings = settings;
-
-	/* attach debug facilities */
-	brcmf_debug_attach(drvr);
-
-	/* Attach and link in the protocol */
-	ret = brcmf_proto_attach(drvr);
-	if (ret != 0) {
-		brcmf_err("brcmf_prot_attach failed\n");
-		goto fail;
-	}
-
-	/* Attach to events important for core code */
-	brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
-			    brcmf_psm_watchdog_notify);
-
-	/* attach firmware event handler */
-	brcmf_fweh_attach(drvr);
-
-	return ret;
-
-fail:
-	brcmf_detach(dev);
-
-	return ret;
-}
-
 static int brcmf_revinfo_read(struct seq_file *s, void *data)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(s->private);
@@ -1093,6 +1044,55 @@ fail:
 	return ret;
 }
 
+int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
+{
+	struct brcmf_pub *drvr = NULL;
+	int ret = 0;
+	int i;
+
+	brcmf_dbg(TRACE, "Enter\n");
+
+	/* Allocate primary brcmf_info */
+	drvr = kzalloc(sizeof(*drvr), GFP_ATOMIC);
+	if (!drvr)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
+		drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
+
+	mutex_init(&drvr->proto_block);
+
+	/* Link to bus module */
+	drvr->hdrlen = 0;
+	drvr->bus_if = dev_get_drvdata(dev);
+	drvr->bus_if->drvr = drvr;
+	drvr->settings = settings;
+
+	/* attach debug facilities */
+	brcmf_debug_attach(drvr);
+
+	/* Attach and link in the protocol */
+	ret = brcmf_proto_attach(drvr);
+	if (ret != 0) {
+		brcmf_err("brcmf_prot_attach failed\n");
+		goto fail;
+	}
+
+	/* Attach to events important for core code */
+	brcmf_fweh_register(drvr, BRCMF_E_PSM_WATCHDOG,
+			    brcmf_psm_watchdog_notify);
+
+	/* attach firmware event handler */
+	brcmf_fweh_attach(drvr);
+
+	return ret;
+
+fail:
+	brcmf_detach(dev);
+
+	return ret;
+}
+
 void brcmf_bus_add_txhdrlen(struct device *dev, uint len)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);

From de2a3027f6f15e2f6558dc4d178282ccc1f054db Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:23 +0100
Subject: [PATCH 0420/1678] brcmfmac: remove brcmf_bus_started() from bus api

No longer needed to call this in bus layer so make it static and call
it in the last phase of brcmf_attach() instead.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/bus.h         |  1 -
 .../broadcom/brcm80211/brcmfmac/core.c        | 14 ++++++----
 .../broadcom/brcm80211/brcmfmac/pcie.c        | 20 +------------
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 10 ++-----
 .../broadcom/brcm80211/brcmfmac/usb.c         | 28 +++----------------
 5 files changed, 16 insertions(+), 57 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 0b76a615708e..0b90a63bdeb1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -253,7 +253,6 @@ void brcmf_dev_reset(struct device *dev);
 /* Configure the "global" bus state used by upper layers */
 void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state);
 
-int brcmf_bus_started(struct device *dev);
 s32 brcmf_iovar_data_set(struct device *dev, char *name, void *data, u32 len);
 void brcmf_bus_add_txhdrlen(struct device *dev, uint len);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 9eab7a94747e..19048526b4af 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -944,11 +944,10 @@ static int brcmf_revinfo_read(struct seq_file *s, void *data)
 	return 0;
 }
 
-int brcmf_bus_started(struct device *dev)
+static int brcmf_bus_started(struct brcmf_pub *drvr)
 {
 	int ret = -1;
-	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_pub *drvr = bus_if->drvr;
+	struct brcmf_bus *bus_if = drvr->bus_if;
 	struct brcmf_if *ifp;
 	struct brcmf_if *p2p_ifp;
 
@@ -965,7 +964,7 @@ int brcmf_bus_started(struct device *dev)
 	brcmf_bus_change_state(bus_if, BRCMF_BUS_UP);
 
 	/* do bus specific preinit here */
-	ret = brcmf_bus_preinit(ifp->drvr->bus_if);
+	ret = brcmf_bus_preinit(bus_if);
 	if (ret < 0)
 		goto fail;
 
@@ -1085,7 +1084,12 @@ int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
 	/* attach firmware event handler */
 	brcmf_fweh_attach(drvr);
 
-	return ret;
+	ret = brcmf_bus_started(drvr);
+	if (ret != 0) {
+		brcmf_err("dongle is not responding: err=%d\n", ret);
+		goto fail;
+	}
+	return 0;
 
 fail:
 	brcmf_detach(dev);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 8752707557bf..a7d827ce1684 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1581,24 +1581,6 @@ static void brcmf_pcie_release_resource(struct brcmf_pciedev_info *devinfo)
 }
 
 
-static int brcmf_pcie_attach_bus(struct brcmf_pciedev_info *devinfo)
-{
-	int ret;
-
-	/* Attach to the common driver interface */
-	ret = brcmf_attach(&devinfo->pdev->dev, devinfo->settings);
-	if (ret) {
-		brcmf_err("brcmf_attach failed\n");
-	} else {
-		ret = brcmf_bus_started(&devinfo->pdev->dev);
-		if (ret)
-			brcmf_err("dongle is not responding\n");
-	}
-
-	return ret;
-}
-
-
 static u32 brcmf_pcie_buscore_prep_addr(const struct pci_dev *pdev, u32 addr)
 {
 	u32 ret_addr;
@@ -1735,7 +1717,7 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
 	init_waitqueue_head(&devinfo->mbdata_resp_wait);
 
 	brcmf_pcie_intr_enable(devinfo);
-	if (brcmf_pcie_attach_bus(devinfo) == 0)
+	if (brcmf_attach(&devinfo->pdev->dev, devinfo->settings) == 0)
 		return;
 
 	brcmf_pcie_bus_console_read(devinfo);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 0f1e4528fe0d..aa8233620bb9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -3422,6 +3422,8 @@ static int brcmf_sdio_bus_preinit(struct device *dev)
 	if (bus->rxbuf)
 		bus->rxblen = value;
 
+	brcmf_sdio_debugfs_create(bus);
+
 	/* the commands below use the terms tx and rx from
 	 * a device perspective, ie. bus:txglom affects the
 	 * bus transfers from device to host.
@@ -4136,14 +4138,6 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		goto fail;
 	}
 
-	brcmf_sdio_debugfs_create(bus);
-
-	err = brcmf_bus_started(dev);
-	if (err != 0) {
-		brcmf_err("dongle is not responding\n");
-		goto fail;
-	}
-
 	/* ready */
 	return;
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index d22cd1662da6..41642dda40fd 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1155,27 +1155,6 @@ static const struct brcmf_bus_ops brcmf_usb_bus_ops = {
 	.get_fwname = brcmf_usb_get_fwname,
 };
 
-static int brcmf_usb_bus_setup(struct brcmf_usbdev_info *devinfo)
-{
-	int ret;
-
-	/* Attach to the common driver interface */
-	ret = brcmf_attach(devinfo->dev, devinfo->settings);
-	if (ret) {
-		brcmf_err("brcmf_attach failed\n");
-		return ret;
-	}
-
-	ret = brcmf_bus_started(devinfo->dev);
-	if (ret)
-		goto fail;
-
-	return 0;
-fail:
-	brcmf_detach(devinfo->dev);
-	return ret;
-}
-
 static void brcmf_usb_probe_phase2(struct device *dev, int ret,
 				   const struct firmware *fw,
 				   void *nvram, u32 nvlen)
@@ -1203,7 +1182,8 @@ static void brcmf_usb_probe_phase2(struct device *dev, int ret,
 	if (ret)
 		goto error;
 
-	ret = brcmf_usb_bus_setup(devinfo);
+	/* Attach to the common driver interface */
+	ret = brcmf_attach(devinfo->dev, devinfo->settings);
 	if (ret)
 		goto error;
 
@@ -1253,7 +1233,7 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 	}
 
 	if (!brcmf_usb_dlneeded(devinfo)) {
-		ret = brcmf_usb_bus_setup(devinfo);
+		ret = brcmf_attach(devinfo->dev, devinfo->settings);
 		if (ret)
 			goto fail;
 		/* we are done */
@@ -1456,7 +1436,7 @@ static int brcmf_usb_resume(struct usb_interface *intf)
 
 	brcmf_dbg(USB, "Enter\n");
 	if (!devinfo->wowl_enabled)
-		return brcmf_usb_bus_setup(devinfo);
+		return brcmf_attach(devinfo->dev, devinfo->settings);
 
 	devinfo->bus_pub.state = BRCMFMAC_USB_STATE_UP;
 	brcmf_usb_rx_fill_all(devinfo);

From d678296bfb9a630d0000222fc21f4ed0d0d65332 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:24 +0100
Subject: [PATCH 0421/1678] brcmfmac: change log level for some low-level sdio
 functions

Reducing the number of trace level messages in sdio code giving
them sdio log level instead.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index aa8233620bb9..b94ef3cd781d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -1706,7 +1706,7 @@ brcmf_sdio_read_control(struct brcmf_sdio *bus, u8 *hdr, uint len, uint doff)
 	u8 *buf = NULL, *rbuf;
 	int sdret;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 	if (bus->rxblen)
 		buf = vzalloc(bus->rxblen);
 	if (!buf)
@@ -1809,7 +1809,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes)
 	struct brcmf_sdio_hdrinfo *rd = &bus->cur_read, rd_new;
 	u8 head_read = 0;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 
 	/* Not finished unless we encounter no more frames indication */
 	bus->rxpending = true;
@@ -2344,7 +2344,7 @@ static int brcmf_sdio_tx_ctrlframe(struct brcmf_sdio *bus, u8 *frame, u16 len)
 	struct brcmf_sdio_hdrinfo hd_info = {0};
 	int ret;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 
 	/* Back the pointer to make room for bus header */
 	frame -= bus->tx_hdrlen;
@@ -2520,7 +2520,7 @@ static void brcmf_sdio_dpc(struct brcmf_sdio *bus)
 	uint framecnt;			/* Temporary counter of tx/rx frames */
 	int err = 0;
 
-	brcmf_dbg(TRACE, "Enter\n");
+	brcmf_dbg(SDIO, "Enter\n");
 
 	sdio_claim_host(bus->sdiodev->func1);
 
@@ -2605,7 +2605,7 @@ static void brcmf_sdio_dpc(struct brcmf_sdio *bus)
 
 	/* Would be active due to wake-wlan in gSPI */
 	if (intstatus & I_CHIPACTIVE) {
-		brcmf_dbg(INFO, "Dongle reports CHIPACTIVE\n");
+		brcmf_dbg(SDIO, "Dongle reports CHIPACTIVE\n");
 		intstatus &= ~I_CHIPACTIVE;
 	}
 

From 2d6edad4b2da1991f74e7b02053eeb4a043b887f Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 20 Feb 2018 00:14:25 +0100
Subject: [PATCH 0422/1678] brcmfmac: remove duplicate pointer variable from
 brcmf_sdio_firmware_callback()

In brcmf_sdio_firmware_callback() two pointer variables were used
pointing to the same construct. Get rid of sdiodev variable.

Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 37 +++++++++----------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index b94ef3cd781d..4a6459a429ec 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4039,9 +4039,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 					 void *nvram, u32 nvram_len)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-	struct brcmf_sdio *bus = sdiodev->bus;
-	struct brcmf_sdio_dev *sdiod = bus->sdiodev;
+	struct brcmf_sdio_dev *sdiod = bus_if->bus_priv.sdio;
+	struct brcmf_sdio *bus = sdiod->bus;
 	struct brcmf_core *core = bus->sdio_core;
 	u8 saveclk;
 
@@ -4061,7 +4060,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	bus->sdcnt.tickcnt = 0;
 	brcmf_sdio_wd_timer(bus, true);
 
-	sdio_claim_host(sdiodev->func1);
+	sdio_claim_host(sdiod->func1);
 
 	/* Make sure backplane clock is on, needed to generate F2 interrupt */
 	brcmf_sdio_clkctl(bus, CLK_AVAIL, false);
@@ -4069,9 +4068,9 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		goto release;
 
 	/* Force clocks on backplane to be sure F2 interrupt propagates */
-	saveclk = brcmf_sdiod_readb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR, &err);
+	saveclk = brcmf_sdiod_readb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR, &err);
 	if (!err) {
-		brcmf_sdiod_writeb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+		brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR,
 				   (saveclk | SBSDIO_FORCE_HT), &err);
 	}
 	if (err) {
@@ -4083,7 +4082,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	brcmf_sdiod_writel(sdiod, core->base + SD_REG(tosbmailboxdata),
 			   SDPCM_PROT_VERSION << SMB_DATA_VERSION_SHIFT, NULL);
 
-	err = sdio_enable_func(sdiodev->func2);
+	err = sdio_enable_func(sdiod->func2);
 
 	brcmf_dbg(INFO, "enable F2: err=%d\n", err);
 
@@ -4095,10 +4094,10 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 				   bus->hostintmask, NULL);
 
 
-		brcmf_sdiod_writeb(sdiodev, SBSDIO_WATERMARK, 8, &err);
+		brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK, 8, &err);
 	} else {
 		/* Disable F2 again */
-		sdio_disable_func(sdiodev->func2);
+		sdio_disable_func(sdiod->func2);
 		goto release;
 	}
 
@@ -4106,7 +4105,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		brcmf_sdio_sr_init(bus);
 	} else {
 		/* Restore previous clock setting */
-		brcmf_sdiod_writeb(sdiodev, SBSDIO_FUNC1_CHIPCLKCSR,
+		brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_CHIPCLKCSR,
 				   saveclk, &err);
 	}
 
@@ -4114,7 +4113,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 		/* Allow full data communication using DPC from now on. */
 		brcmf_sdiod_change_state(bus->sdiodev, BRCMF_SDIOD_DATA);
 
-		err = brcmf_sdiod_intr_register(sdiodev);
+		err = brcmf_sdiod_intr_register(sdiod);
 		if (err != 0)
 			brcmf_err("intr register failed:%d\n", err);
 	}
@@ -4123,16 +4122,16 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	if (err != 0)
 		brcmf_sdio_clkctl(bus, CLK_NONE, false);
 
-	sdio_release_host(sdiodev->func1);
+	sdio_release_host(sdiod->func1);
 
 	/* Assign bus interface call back */
-	sdiodev->bus_if->dev = sdiodev->dev;
-	sdiodev->bus_if->ops = &brcmf_sdio_bus_ops;
-	sdiodev->bus_if->chip = bus->ci->chip;
-	sdiodev->bus_if->chiprev = bus->ci->chiprev;
+	sdiod->bus_if->dev = sdiod->dev;
+	sdiod->bus_if->ops = &brcmf_sdio_bus_ops;
+	sdiod->bus_if->chip = bus->ci->chip;
+	sdiod->bus_if->chiprev = bus->ci->chiprev;
 
 	/* Attach to the common layer, reserve hdr space */
-	err = brcmf_attach(sdiodev->dev, sdiodev->settings);
+	err = brcmf_attach(sdiod->dev, sdiod->settings);
 	if (err != 0) {
 		brcmf_err("brcmf_attach failed\n");
 		goto fail;
@@ -4142,10 +4141,10 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	return;
 
 release:
-	sdio_release_host(sdiodev->func1);
+	sdio_release_host(sdiod->func1);
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s, err=%d\n", dev_name(dev), err);
-	device_release_driver(&sdiodev->func2->dev);
+	device_release_driver(&sdiod->func2->dev);
 	device_release_driver(dev);
 }
 

From 64d1519edc959f5b8f86a66a51c40971c215e4ec Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 Feb 2018 13:30:45 +0100
Subject: [PATCH 0423/1678] brcmfmac: reject too long PSK

nl80211 already allows specifying 48 bytes, but brcmfmac
only supports 32. Reject keys that are too long.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 15fa00d79fc6..74a83020c073 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -5124,6 +5124,9 @@ static int brcmf_cfg80211_set_pmk(struct wiphy *wiphy, struct net_device *dev,
 	if (WARN_ON(ifp->vif->profile.use_fwsup != BRCMF_PROFILE_FWSUP_1X))
 		return -EINVAL;
 
+	if (conf->pmk_len > BRCMF_WSEC_MAX_PSK_LEN)
+		return -ERANGE;
+
 	return brcmf_set_pmk(ifp, conf->pmk, conf->pmk_len);
 }
 

From 47c8a3956a60b2f47389420b1814ad07e4306cea Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Feb 2018 13:36:22 +0000
Subject: [PATCH 0424/1678] brcmsmac: remove duplicated bit-wise or of
 IEEE80211_CHAN_NO_IR

Bit pattern IEEE80211_CHAN_NO_IR is being bit-wise or'd twice;
remove the redundant 2nd IEEE80211_CHAN_NO_IR

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
index 3a03287fa912..db783e94f929 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/channel.c
@@ -652,7 +652,6 @@ static void brcms_reg_apply_radar_flags(struct wiphy *wiphy)
 		 */
 		if (!(ch->flags & IEEE80211_CHAN_DISABLED))
 			ch->flags |= IEEE80211_CHAN_RADAR |
-				     IEEE80211_CHAN_NO_IR |
 				     IEEE80211_CHAN_NO_IR;
 	}
 }

From e28adf9ae63c257c630057ad93491b6eb2ad7211 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 27 Feb 2018 09:57:24 +0800
Subject: [PATCH 0425/1678] rtlwifi: btcoex: fix argument typo of if-statement
 found by Coccinelle

This was detected with static analysis using Coccinelle:

./drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c:1107:5-18:
duplicated argument to && or ||

Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
index fd3b1fb35dff..05beb16f0a0a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
@@ -1104,7 +1104,7 @@ static void halbtc8723b1ant_ps_tdma(struct btc_coexist *btcoexist,
 	}
 
 	if ((type == 1) || (type == 2) || (type == 9) || (type == 11) ||
-	    (type == 101) || (type == 102) || (type == 109) || (type == 101)) {
+	    (type == 101) || (type == 102) || (type == 109) || (type == 111)) {
 		if (!coex_sta->force_lps_on) {
 			/* Native power save TDMA, only for A2DP-only case
 			 * 1/2/9/11 while wifi noisy threshold > 30

From 120d75ecf043044554abbba8507f6d22e4715beb Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Mon, 26 Feb 2018 11:24:01 -0600
Subject: [PATCH 0426/1678] dpaa_eth: fix SG mapping

An issue in the code mapping the skb fragments into
scatter-gather frames was evidentiated by netperf
TCP_SENDFILE tests. The size was set wrong for all
fragments but the first, affecting the transmission
of any skb with more than one fragment.

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa/dpaa_eth.c    | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index a998c36c5e61..ff110e65cee4 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -1916,8 +1916,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 		goto csum_failed;
 	}
 
+	/* SGT[0] is used by the linear part */
 	sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
-	qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+	frag_len = skb_headlen(skb);
+	qm_sg_entry_set_len(&sgt[0], frag_len);
 	sgt[0].bpid = FSL_DPAA_BPID_INV;
 	sgt[0].offset = 0;
 	addr = dma_map_single(dev, skb->data,
@@ -1930,9 +1932,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	qm_sg_entry_set64(&sgt[0], addr);
 
 	/* populate the rest of SGT entries */
-	frag = &skb_shinfo(skb)->frags[0];
-	frag_len = frag->size;
-	for (i = 1; i <= nr_frags; i++, frag++) {
+	for (i = 0; i < nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		frag_len = frag->size;
 		WARN_ON(!skb_frag_page(frag));
 		addr = skb_frag_dma_map(dev, frag, 0,
 					frag_len, dma_dir);
@@ -1942,15 +1944,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 			goto sg_map_failed;
 		}
 
-		qm_sg_entry_set_len(&sgt[i], frag_len);
-		sgt[i].bpid = FSL_DPAA_BPID_INV;
-		sgt[i].offset = 0;
+		qm_sg_entry_set_len(&sgt[i + 1], frag_len);
+		sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
+		sgt[i + 1].offset = 0;
 
 		/* keep the offset in the address */
-		qm_sg_entry_set64(&sgt[i], addr);
-		frag_len = frag->size;
+		qm_sg_entry_set64(&sgt[i + 1], addr);
 	}
-	qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+	/* Set the final bit in the last used entry of the SGT */
+	qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
 
 	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
 

From 81084b354782b56267b94415420b47e5aedd01cb Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Mon, 26 Feb 2018 11:24:02 -0600
Subject: [PATCH 0427/1678] dpaa_eth: make sure all Rx errors are counted

Simplify the code and avoid some Rx errors not being
accounted.

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index ff110e65cee4..bdf57feecfd7 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2204,14 +2204,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
 	if (dpaa_eth_napi_schedule(percpu_priv, portal))
 		return qman_cb_dqrr_stop;
 
-	if (dpaa_eth_refill_bpools(priv))
-		/* Unable to refill the buffer pool due to insufficient
-		 * system memory. Just release the frame back into the pool,
-		 * otherwise we'll soon end up with an empty buffer pool.
-		 */
-		dpaa_fd_release(net_dev, &dq->fd);
-	else
-		dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+	dpaa_eth_refill_bpools(priv);
+	dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
 
 	return qman_cb_dqrr_consume;
 }

From 056a01ba9453258a56993e8645b7b922e55be81c Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Mon, 26 Feb 2018 11:24:03 -0600
Subject: [PATCH 0428/1678] dpaa_eth: refactor frag count checking

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa/dpaa_eth.c    | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index bdf57feecfd7..9bd0ff03f389 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2055,19 +2055,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 	/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
 	 * make sure we don't feed FMan with more fragments than it supports.
 	 */
-	if (nonlinear &&
-	    likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
+	if (unlikely(nonlinear &&
+		     (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
+		/* If the egress skb contains more fragments than we support
+		 * we have no choice but to linearize it ourselves.
+		 */
+		if (__skb_linearize(skb))
+			goto enomem;
+
+		nonlinear = skb_is_nonlinear(skb);
+	}
+
+	if (nonlinear) {
 		/* Just create a S/G fd based on the skb */
 		err = skb_to_sg_fd(priv, skb, &fd);
 		percpu_priv->tx_frag_skbuffs++;
 	} else {
-		/* If the egress skb contains more fragments than we support
-		 * we have no choice but to linearize it ourselves.
-		 */
-		if (unlikely(nonlinear) && __skb_linearize(skb))
-			goto enomem;
-
-		/* Finally, create a contig FD from this skb */
+		/* Create a contig FD from this skb */
 		err = skb_to_contig_fd(priv, skb, &fd, &offset);
 	}
 	if (unlikely(err < 0))

From c893238e5d9b279be4c73d7fdf0dc8986a6c118f Mon Sep 17 00:00:00 2001
From: Radu Bulie <radu-andrei.bulie@nxp.com>
Date: Mon, 26 Feb 2018 11:24:04 -0600
Subject: [PATCH 0429/1678] dpaa_eth: Add allmulti option

This patch adds allmulticast option for memac, dtsec
and 10GEC controllers.

Signed-off-by: Radu Bulie <radu-andrei.bulie@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa/dpaa_eth.c    | 10 ++++++
 .../net/ethernet/freescale/fman/fman_dtsec.c  | 19 +++++++++++
 .../net/ethernet/freescale/fman/fman_dtsec.h  |  1 +
 .../net/ethernet/freescale/fman/fman_memac.c  | 32 ++++++++++++++++--
 .../net/ethernet/freescale/fman/fman_memac.h  |  1 +
 .../net/ethernet/freescale/fman/fman_tgec.c   | 33 +++++++++++++++++--
 .../net/ethernet/freescale/fman/fman_tgec.h   |  1 +
 drivers/net/ethernet/freescale/fman/mac.c     |  3 ++
 drivers/net/ethernet/freescale/fman/mac.h     |  2 ++
 9 files changed, 97 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 9bd0ff03f389..159dc2df878d 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev)
 				  err);
 	}
 
+	if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
+		priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
+		err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
+						  priv->mac_dev->allmulti);
+		if (err < 0)
+			netif_err(priv, drv, net_dev,
+				  "mac_dev->set_allmulti() = %d\n",
+				  err);
+	}
+
 	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
 	if (err < 0)
 		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index ea43b4974149..9a581faaa742 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable)
+{
+	u32 tmp;
+	struct dtsec_regs __iomem *regs = dtsec->regs;
+
+	if (!is_init_done(dtsec->dtsec_drv_param))
+		return -EINVAL;
+
+	tmp = ioread32be(&regs->rctrl);
+	if (enable)
+		tmp |= RCTRL_MPROM;
+	else
+		tmp &= ~RCTRL_MPROM;
+
+	iowrite32be(tmp, &regs->rctrl);
+
+	return 0;
+}
+
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index c4467c072058..1a689adf5a22 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec,
 int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr);
 int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version);
+int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable);
 
 #endif /* __DTSEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index c0296880feba..446a97b792e3 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -350,6 +350,7 @@ struct fman_mac {
 	struct fman_rev_info fm_rev_info;
 	bool basex_if;
 	struct phy_device *pcsphy;
+	bool allmulti_enabled;
 };
 
 static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
@@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int memac_set_allmulti(struct fman_mac *memac, bool enable)
+{
+	u32 entry;
+	struct memac_regs __iomem *regs = memac->regs;
+
+	if (!is_init_done(memac->memac_drv_param))
+		return -EINVAL;
+
+	if (enable) {
+		for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry | HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	} else {
+		for (entry = 0; entry < HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry & ~HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
+
+	memac->allmulti_enabled = enable;
+
+	return 0;
+}
+
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 {
 	struct memac_regs __iomem *regs = memac->regs;
@@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr)
 			break;
 		}
 	}
-	if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
-		iowrite32be(hash & ~HASH_CTRL_MCAST_EN, &regs->hashtable_ctrl);
+
+	if (!memac->allmulti_enabled) {
+		if (list_empty(&memac->multicast_addr_hash->lsts[hash]))
+			iowrite32be(hash & ~HASH_CTRL_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index c4a66469a907..b5a50338ed9a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac,
 			enum fman_mac_exceptions exception, bool enable);
 int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
 int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr);
+int memac_set_allmulti(struct fman_mac *memac, bool enable);
 
 #endif /* __MEMAC_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 4b0f3a50b293..284735d4ebe9 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -217,6 +217,7 @@ struct fman_mac {
 	struct tgec_cfg *cfg;
 	void *fm;
 	struct fman_rev_info fm_rev_info;
+	bool allmulti_enabled;
 };
 
 static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
@@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 	return 0;
 }
 
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable)
+{
+	u32 entry;
+	struct tgec_regs __iomem *regs = tgec->regs;
+
+	if (!is_init_done(tgec->cfg))
+		return -EINVAL;
+
+	if (enable) {
+		for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry | TGEC_HASH_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	} else {
+		for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++)
+			iowrite32be(entry & ~TGEC_HASH_MCAST_EN,
+				    &regs->hashtable_ctrl);
+	}
+
+	tgec->allmulti_enabled = enable;
+
+	return 0;
+}
+
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 {
 	struct tgec_regs __iomem *regs = tgec->regs;
@@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr)
 			break;
 		}
 	}
-	if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
-		iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
-			    &regs->hashtable_ctrl);
+
+	if (!tgec->allmulti_enabled) {
+		if (list_empty(&tgec->multicast_addr_hash->lsts[hash]))
+			iowrite32be((hash & ~TGEC_HASH_MCAST_EN),
+				    &regs->hashtable_ctrl);
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index 514bba9f47ce..cbbd3b422a98 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec,
 int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr);
 int tgec_get_version(struct fman_mac *tgec, u32 *mac_version);
+int tgec_set_allmulti(struct fman_mac *tgec, bool enable);
 
 #endif /* __TGEC_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 88c0a0636b44..4829dcd9e077 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= dtsec_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= dtsec_accept_rx_pause_frames;
 	mac_dev->set_exception		= dtsec_set_exception;
+	mac_dev->set_allmulti		= dtsec_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= tgec_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= tgec_accept_rx_pause_frames;
 	mac_dev->set_exception		= tgec_set_exception;
+	mac_dev->set_allmulti		= tgec_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
@@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev)
 	mac_dev->set_tx_pause		= memac_set_tx_pause_frames;
 	mac_dev->set_rx_pause		= memac_accept_rx_pause_frames;
 	mac_dev->set_exception		= memac_set_exception;
+	mac_dev->set_allmulti		= memac_set_allmulti;
 	mac_dev->set_multi		= set_multi;
 	mac_dev->start			= start;
 	mac_dev->stop			= stop;
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index eefb3357e304..b520cec120ee 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -59,6 +59,7 @@ struct mac_device {
 	bool rx_pause_active;
 	bool tx_pause_active;
 	bool promisc;
+	bool allmulti;
 
 	int (*init)(struct mac_device *mac_dev);
 	int (*start)(struct mac_device *mac_dev);
@@ -66,6 +67,7 @@ struct mac_device {
 	void (*adjust_link)(struct mac_device *mac_dev);
 	int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
 	int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+	int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
 	int (*set_multi)(struct net_device *net_dev,
 			 struct mac_device *mac_dev);
 	int (*set_rx_pause)(struct fman_mac *mac_dev, bool en);

From cf0d37aecc06801d4847fb36740da4a5690d9d45 Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Mon, 5 Feb 2018 09:41:34 +0530
Subject: [PATCH 0430/1678] ath10k: fix log message for hif power on failure

HIF power-on failure is applicable to each underlying
bus type. Fix log message for hif power on failure.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index f3ec13b80b20..830b7fe466f3 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -2439,7 +2440,7 @@ static int ath10k_core_probe_fw(struct ath10k *ar)
 
 	ret = ath10k_hif_power_up(ar);
 	if (ret) {
-		ath10k_err(ar, "could not start pci hif (%d)\n", ret);
+		ath10k_err(ar, "could not power on hif bus (%d)\n", ret);
 		return ret;
 	}
 

From a52b839752aab7063c38c1cb7b8e63efc54e3c30 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 26 Feb 2018 09:53:15 -0800
Subject: [PATCH 0431/1678] selftests: Add fib-onlink-tests.sh to TEST_PROGS

Fixes: 153e1b84f477 ("selftests: Add FIB onlink tests")
Reported-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index d7c30d366935..229a038966e3 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa

From fc8b92635f79cfc4dd3015384bebafada0b08f19 Mon Sep 17 00:00:00 2001
From: Ryan Hsu <ryanhsu@codeaurora.org>
Date: Thu, 8 Feb 2018 16:20:31 -0800
Subject: [PATCH 0432/1678] ath10k: update the IRAM bank number for QCA9377

Preparation for a new QCA9377 firmware release. The new firmware release
requires more IRAM banks, hence update that on ath10k.

The IRAM banks promotion won't break any backwards compatibility, as those IRAM
banks were not getting used in previous firmware releases.

Signed-off-by: Ryan Hsu <ryanhsu@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 1b266cd0c2ec..808f3d67ba90 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -2221,7 +2221,7 @@ static int ath10k_pci_get_num_banks(struct ath10k *ar)
 		}
 		break;
 	case QCA9377_1_0_DEVICE_ID:
-		return 4;
+		return 9;
 	}
 
 	ath10k_warn(ar, "unknown number of banks, assuming 1\n");

From 2571c081cb43c156f63b735dfb952119775324f4 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 1 Feb 2018 18:03:27 +0000
Subject: [PATCH 0433/1678] ath5k: remove duplicated re-assignment to pointer
 'tq'

Pointer tq is initialized with &ah->ah_txq[queue] and then a few
lines later is re-assigned the same value, hence this duplicate
assignment is redundant and can be removed.

Cleans up clang warning:
drivers/net/wireless/ath/ath5k/qcu.c:326:25: warning: Value stored
to 'tq' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath5k/qcu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath5k/qcu.c b/drivers/net/wireless/ath/ath5k/qcu.c
index beda11ce34a7..147947f632f7 100644
--- a/drivers/net/wireless/ath/ath5k/qcu.c
+++ b/drivers/net/wireless/ath/ath5k/qcu.c
@@ -327,8 +327,6 @@ ath5k_hw_reset_tx_queue(struct ath5k_hw *ah, unsigned int queue)
 
 	AR5K_ASSERT_ENTRY(queue, ah->ah_capabilities.cap_queues.q_tx_num);
 
-	tq = &ah->ah_txq[queue];
-
 	/* Skip if queue inactive or if we are on AR5210
 	 * that doesn't have QCU/DCU */
 	if ((ah->ah_version == AR5K_AR5210) ||

From 91f1ee65d999a36241cb43bc820b1b59050bc79e Mon Sep 17 00:00:00 2001
From: Wojciech Dubowik <Wojciech.Dubowik@neratec.com>
Date: Tue, 20 Feb 2018 15:41:43 +0100
Subject: [PATCH 0434/1678] ath9k: Fix airtime calculation for quarter/half
 channels

The bitrate value for airtime calculation is specified for
full rates. We need to divide it for 5 and 10MHz channels to
get correct result.

Signed-off-by: Wojciech Dubowik <Wojciech.Dubowik@neratec.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/hw.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index cd0f023ccf77..3017078c6d40 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -184,7 +184,8 @@ u16 ath9k_hw_computetxtime(struct ath_hw *ah,
 		break;
 	case WLAN_RC_PHY_OFDM:
 		if (ah->curchan && IS_CHAN_QUARTER_RATE(ah->curchan)) {
-			bitsPerSymbol =	(kbps * OFDM_SYMBOL_TIME_QUARTER) / 1000;
+			bitsPerSymbol =
+				((kbps >> 2) * OFDM_SYMBOL_TIME_QUARTER) / 1000;
 			numBits = OFDM_PLCP_BITS + (frameLen << 3);
 			numSymbols = DIV_ROUND_UP(numBits, bitsPerSymbol);
 			txTime = OFDM_SIFS_TIME_QUARTER
@@ -192,7 +193,8 @@ u16 ath9k_hw_computetxtime(struct ath_hw *ah,
 				+ (numSymbols * OFDM_SYMBOL_TIME_QUARTER);
 		} else if (ah->curchan &&
 			   IS_CHAN_HALF_RATE(ah->curchan)) {
-			bitsPerSymbol =	(kbps * OFDM_SYMBOL_TIME_HALF) / 1000;
+			bitsPerSymbol =
+				((kbps >> 1) * OFDM_SYMBOL_TIME_HALF) / 1000;
 			numBits = OFDM_PLCP_BITS + (frameLen << 3);
 			numSymbols = DIV_ROUND_UP(numBits, bitsPerSymbol);
 			txTime = OFDM_SIFS_TIME_HALF +

From 52c528ffaf1d4697e35c433a6a2ff81c469c967a Mon Sep 17 00:00:00 2001
From: Wojciech Dubowik <Wojciech.Dubowik@neratec.com>
Date: Tue, 20 Feb 2018 15:42:00 +0100
Subject: [PATCH 0435/1678] ath9k: Fix ack SIFS time for quarter/half channels

Ack timing generation has to be adapted for 5/10 MHz channels.
Do it by properly initializing ack shift field in TXSIFS
register. Ack shift assumes channel width of 2.5 Mhz so
value zero means 2.5 MHz, 1 is 5 MHz and so on.

Signed-off-by: Wojciech Dubowik <Wojciech.Dubowik@neratec.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/hw.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 3017078c6d40..6b37036b2d36 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -1038,7 +1038,7 @@ void ath9k_hw_init_global_settings(struct ath_hw *ah)
 	int acktimeout, ctstimeout, ack_offset = 0;
 	int slottime;
 	int sifstime;
-	int rx_lat = 0, tx_lat = 0, eifs = 0;
+	int rx_lat = 0, tx_lat = 0, eifs = 0, ack_shift = 0;
 	u32 reg;
 
 	ath_dbg(ath9k_hw_common(ah), RESET, "ah->misc_mode 0x%x\n",
@@ -1070,6 +1070,7 @@ void ath9k_hw_init_global_settings(struct ath_hw *ah)
 
 		sifstime = 32;
 		ack_offset = 16;
+		ack_shift = 3;
 		slottime = 13;
 	} else if (IS_CHAN_QUARTER_RATE(chan)) {
 		eifs = 340;
@@ -1080,6 +1081,7 @@ void ath9k_hw_init_global_settings(struct ath_hw *ah)
 
 		sifstime = 64;
 		ack_offset = 32;
+		ack_shift = 1;
 		slottime = 21;
 	} else {
 		if (AR_SREV_9287(ah) && AR_SREV_9287_13_OR_LATER(ah)) {
@@ -1136,6 +1138,10 @@ void ath9k_hw_init_global_settings(struct ath_hw *ah)
 		SM(tx_lat, AR_USEC_TX_LAT),
 		AR_USEC_TX_LAT | AR_USEC_RX_LAT | AR_USEC_USEC);
 
+	if (IS_CHAN_HALF_RATE(chan) || IS_CHAN_QUARTER_RATE(chan))
+		REG_RMW(ah, AR_TXSIFS,
+			sifstime | SM(ack_shift, AR_TXSIFS_ACK_SHIFT),
+			(AR_TXSIFS_TIME | AR_TXSIFS_ACK_SHIFT));
 }
 EXPORT_SYMBOL(ath9k_hw_init_global_settings);
 

From 6c6aa15fdea52aa4a19cc0b5478884af591c9351 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 24 Feb 2018 16:53:23 +0100
Subject: [PATCH 0436/1678] r8169: improve interrupt handling

This patch improves few aspects of interrupt handling:
- update to current interrupt allocation API
  (use pci_alloc_irq_vectors() instead of deprecated pci_enable_msi())
- this implicitly will allocate a MSI-X interrupt if available
- get rid of flag RTL_FEATURE_MSI
- remove some dead code, intentionally disabling (unreliable) MSI
  being partially available on old PCI chips.

The patch works fine on a RTL8168evl (chip version 34) and on a
RTL8169SB (chip version 04).

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 48 ++++++++++++----------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 96db3283eecf..cc51286ee51f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -736,8 +736,7 @@ struct ring_info {
 };
 
 enum features {
-	RTL_FEATURE_MSI		= (1 << 0),
-	RTL_FEATURE_GMII	= (1 << 1),
+	RTL_FEATURE_GMII	= (1 << 0),
 };
 
 struct rtl8169_counters {
@@ -7847,7 +7846,7 @@ static int rtl8169_close(struct net_device *dev)
 
 	cancel_work_sync(&tp->wk.work);
 
-	free_irq(pdev->irq, dev);
+	pci_free_irq(pdev, 0, dev);
 
 	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
 			  tp->RxPhyAddr);
@@ -7903,9 +7902,8 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_request_firmware(tp);
 
-	retval = request_irq(pdev->irq, rtl8169_interrupt,
-			     (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
-			     dev->name, dev);
+	retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev,
+				 dev->name);
 	if (retval < 0)
 		goto err_release_fw_2;
 
@@ -8253,7 +8251,7 @@ static const struct rtl_cfg_info {
 		.region		= 2,
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow,
-		.features	= RTL_FEATURE_GMII | RTL_FEATURE_MSI,
+		.features	= RTL_FEATURE_GMII,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_11,
 	},
@@ -8263,32 +8261,26 @@ static const struct rtl_cfg_info {
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver |
 				  PCSTimeout,
-		.features	= RTL_FEATURE_MSI,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_13,
 	}
 };
 
-/* Cfg9346_Unlock assumed. */
-static unsigned rtl_try_msi(struct rtl8169_private *tp,
-			    const struct rtl_cfg_info *cfg)
+static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
-	unsigned msi = 0;
-	u8 cfg2;
+	unsigned int flags;
 
-	cfg2 = RTL_R8(Config2) & ~MSIEnable;
-	if (cfg->features & RTL_FEATURE_MSI) {
-		if (pci_enable_msi(tp->pci_dev)) {
-			netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n");
-		} else {
-			cfg2 |= MSIEnable;
-			msi = RTL_FEATURE_MSI;
-		}
+	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
+		RTL_W8(Cfg9346, Cfg9346_Unlock);
+		RTL_W8(Config2, RTL_R8(Config2) & ~MSIEnable);
+		RTL_W8(Cfg9346, Cfg9346_Lock);
+		flags = PCI_IRQ_LEGACY;
+	} else {
+		flags = PCI_IRQ_ALL_TYPES;
 	}
-	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
-		RTL_W8(Config2, cfg2);
-	return msi;
+
+	return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags);
 }
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
@@ -8497,9 +8489,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	chipset = tp->mac_version;
 	tp->txd_version = rtl_chip_infos[chipset].txd_version;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
-	tp->features |= rtl_try_msi(tp, cfg);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	rc = rtl_alloc_irq(tp);
+	if (rc < 0) {
+		netif_err(tp, probe, dev, "Can't allocate interrupt\n");
+		return rc;
+	}
 
 	/* override BIOS settings, use userspace tools to enable WOL */
 	__rtl8169_set_wol(tp, 0);

From 9bfd05e35ac3519c26ffa0bfb1ab8933c8f00c74 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Thu, 15 Feb 2018 12:08:28 +0100
Subject: [PATCH 0437/1678] wcn36xx: Fix warning due to duplicate
 scan_completed notification

The wcn36xx_cancel_hw_scan method stops the hw scan and notify the
scan completion via ieee80211_scan_completed.
However, on scan offload cancellation, firmware sends a scan complete
indication, triggering a new call to ieee80211_scan_completed.
This leads to kernel warn since the scan has already been completed.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/main.c | 9 +++------
 drivers/net/wireless/ath/wcn36xx/smd.c  | 2 ++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index ab5be6d2c691..fcc98d4f9f9e 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -666,16 +666,13 @@ static void wcn36xx_cancel_hw_scan(struct ieee80211_hw *hw,
 {
 	struct wcn36xx *wcn = hw->priv;
 
-	if (!wcn36xx_smd_stop_hw_scan(wcn)) {
-		struct cfg80211_scan_info scan_info = { .aborted = true };
-
-		ieee80211_scan_completed(wcn->hw, &scan_info);
-	}
-
 	mutex_lock(&wcn->scan_lock);
 	wcn->scan_aborted = true;
 	mutex_unlock(&wcn->scan_lock);
 
+	/* ieee80211_scan_completed will be called on FW scan indication */
+	wcn36xx_smd_stop_hw_scan(wcn);
+
 	cancel_work_sync(&wcn->scan_work);
 }
 
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 2a4871ca9c72..7cc29285e052 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2138,6 +2138,8 @@ static int wcn36xx_smd_hw_scan_ind(struct wcn36xx *wcn, void *buf, size_t len)
 	case WCN36XX_HAL_SCAN_IND_COMPLETED:
 		mutex_lock(&wcn->scan_lock);
 		wcn->scan_req = NULL;
+		if (wcn->scan_aborted)
+			scan_info.aborted = true;
 		mutex_unlock(&wcn->scan_lock);
 		ieee80211_scan_completed(wcn->hw, &scan_info);
 		break;

From 9f38f28624555af82a2909c9716688367d7297b1 Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:11 +0200
Subject: [PATCH 0438/1678] wil6210: add wil6210_vif structure for per-VIF data

For supporting multiple virtual interfaces in the future,
introduce a wil6210_vif structure which will hold per-VIF
data. Change the module initialization so wil6210_vif will
be part of net_device structure, and wireless_dev will be
embedded inside the wil6210_vif structure. This will allow
us to find the appropriate wil6210_vif structure when we
only have access to wireless_dev or net_device.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 57 +++++++------
 drivers/net/wireless/ath/wil6210/netdev.c   | 95 +++++++++++++--------
 drivers/net/wireless/ath/wil6210/wil6210.h  | 22 ++++-
 3 files changed, 108 insertions(+), 66 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index b799a5384abb..eeed85cf7b10 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -418,6 +419,7 @@ wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	struct net_device *ndev = wil_to_ndev(wil);
+	struct wil6210_vif *vif;
 	struct wireless_dev *p2p_wdev;
 
 	wil_dbg_misc(wil, "add_iface\n");
@@ -432,10 +434,11 @@ wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 		return ERR_PTR(-EINVAL);
 	}
 
-	p2p_wdev = kzalloc(sizeof(*p2p_wdev), GFP_KERNEL);
-	if (!p2p_wdev)
+	vif = kzalloc(sizeof(*vif), GFP_KERNEL);
+	if (!vif)
 		return ERR_PTR(-ENOMEM);
 
+	p2p_wdev = &vif->wdev;
 	p2p_wdev->iftype = type;
 	p2p_wdev->wiphy = wiphy;
 	/* use our primary ethernet address */
@@ -1893,51 +1896,52 @@ static void wil_wiphy_init(struct wiphy *wiphy)
 #endif
 }
 
-struct wireless_dev *wil_cfg80211_init(struct device *dev)
+struct wil6210_priv *wil_cfg80211_init(struct device *dev)
 {
-	int rc = 0;
-	struct wireless_dev *wdev;
+	struct wiphy *wiphy;
+	struct wil6210_priv *wil;
+	struct ieee80211_channel *ch;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
-	wdev = kzalloc(sizeof(*wdev), GFP_KERNEL);
-	if (!wdev)
+	/* Note: the wireless_dev structure is no longer allocated here.
+	 * Instead, it is allocated as part of the net_device structure
+	 * for main interface and each VIF.
+	 */
+	wiphy = wiphy_new(&wil_cfg80211_ops, sizeof(struct wil6210_priv));
+	if (!wiphy)
 		return ERR_PTR(-ENOMEM);
 
-	wdev->wiphy = wiphy_new(&wil_cfg80211_ops,
-				sizeof(struct wil6210_priv));
-	if (!wdev->wiphy) {
-		rc = -ENOMEM;
-		goto out;
-	}
+	set_wiphy_dev(wiphy, dev);
+	wil_wiphy_init(wiphy);
 
-	set_wiphy_dev(wdev->wiphy, dev);
-	wil_wiphy_init(wdev->wiphy);
+	wil = wiphy_to_wil(wiphy);
+	wil->wiphy = wiphy;
 
-	return wdev;
+	/* default monitor channel */
+	ch = wiphy->bands[NL80211_BAND_60GHZ]->channels;
+	cfg80211_chandef_create(&wil->monitor_chandef, ch, NL80211_CHAN_NO_HT);
 
-out:
-	kfree(wdev);
-
-	return ERR_PTR(rc);
+	return wil;
 }
 
-void wil_wdev_free(struct wil6210_priv *wil)
+void wil_cfg80211_deinit(struct wil6210_priv *wil)
 {
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wiphy *wiphy = wil_to_wiphy(wil);
 
 	dev_dbg(wil_to_dev(wil), "%s()\n", __func__);
 
-	if (!wdev)
+	if (!wiphy)
 		return;
 
-	wiphy_free(wdev->wiphy);
-	kfree(wdev);
+	wiphy_free(wiphy);
+	/* do not access wil6210_priv after returning from here */
 }
 
 void wil_p2p_wdev_free(struct wil6210_priv *wil)
 {
 	struct wireless_dev *p2p_wdev;
+	struct wil6210_vif *vif;
 
 	mutex_lock(&wil->p2p_wdev_mutex);
 	p2p_wdev = wil->p2p_wdev;
@@ -1946,7 +1950,8 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil)
 	mutex_unlock(&wil->p2p_wdev_mutex);
 	if (p2p_wdev) {
 		cfg80211_unregister_wdev(p2p_wdev);
-		kfree(p2p_wdev);
+		vif = wdev_to_vif(p2p_wdev);
+		kfree(vif);
 	}
 }
 
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 7ba4e0af8f57..648f63682f59 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -121,43 +122,30 @@ static void wil_dev_setup(struct net_device *dev)
 	dev->tx_queue_len = WIL_TX_Q_LEN_DEFAULT;
 }
 
-void *wil_if_alloc(struct device *dev)
+struct wil6210_vif *
+wil_vif_alloc(struct wil6210_priv *wil, const char *name,
+	      unsigned char name_assign_type, enum nl80211_iftype iftype,
+	      u8 mid)
 {
 	struct net_device *ndev;
 	struct wireless_dev *wdev;
-	struct wil6210_priv *wil;
-	struct ieee80211_channel *ch;
-	int rc = 0;
+	struct wil6210_vif *vif;
 
-	wdev = wil_cfg80211_init(dev);
-	if (IS_ERR(wdev)) {
-		dev_err(dev, "wil_cfg80211_init failed\n");
-		return wdev;
-	}
-
-	wil = wdev_to_wil(wdev);
-	wil->wdev = wdev;
-	wil->radio_wdev = wdev;
-
-	wil_dbg_misc(wil, "if_alloc\n");
-
-	rc = wil_priv_init(wil);
-	if (rc) {
-		dev_err(dev, "wil_priv_init failed\n");
-		goto out_wdev;
-	}
-
-	wdev->iftype = NL80211_IFTYPE_STATION; /* TODO */
-	/* default monitor channel */
-	ch = wdev->wiphy->bands[NL80211_BAND_60GHZ]->channels;
-	cfg80211_chandef_create(&wil->monitor_chandef, ch, NL80211_CHAN_NO_HT);
-
-	ndev = alloc_netdev(0, "wlan%d", NET_NAME_UNKNOWN, wil_dev_setup);
+	ndev = alloc_netdev(sizeof(*vif), name, name_assign_type,
+			    wil_dev_setup);
 	if (!ndev) {
-		dev_err(dev, "alloc_netdev_mqs failed\n");
-		rc = -ENOMEM;
-		goto out_priv;
+		dev_err(wil_to_dev(wil), "alloc_netdev failed\n");
+		return ERR_PTR(-ENOMEM);
 	}
+	if (mid == 0)
+		wil->ndev = ndev;
+	vif = ndev_to_vif(ndev);
+	vif->wil = wil;
+	vif->mid = mid;
+
+	wdev = &vif->wdev;
+	wdev->wiphy = wil->wiphy;
+	wdev->iftype = iftype;
 
 	ndev->netdev_ops = &wil_netdev_ops;
 	wil_set_ethtoolops(ndev);
@@ -170,14 +158,49 @@ void *wil_if_alloc(struct device *dev)
 	ndev->features |= ndev->hw_features;
 	SET_NETDEV_DEV(ndev, wiphy_dev(wdev->wiphy));
 	wdev->netdev = ndev;
+	return vif;
+}
+
+void *wil_if_alloc(struct device *dev)
+{
+	struct wireless_dev *wdev;
+	struct wil6210_priv *wil;
+	struct wil6210_vif *vif;
+	int rc = 0;
+
+	wil = wil_cfg80211_init(dev);
+	if (IS_ERR(wil)) {
+		dev_err(dev, "wil_cfg80211_init failed\n");
+		return wil;
+	}
+
+	rc = wil_priv_init(wil);
+	if (rc) {
+		dev_err(dev, "wil_priv_init failed\n");
+		goto out_cfg;
+	}
+
+	wil_dbg_misc(wil, "if_alloc\n");
+
+	vif = wil_vif_alloc(wil, "wlan%d", NET_NAME_UNKNOWN,
+			    NL80211_IFTYPE_STATION, 0);
+	if (IS_ERR(vif)) {
+		dev_err(dev, "wil_vif_alloc failed\n");
+		rc = -ENOMEM;
+		goto out_priv;
+	}
+
+	wdev = &vif->wdev;
+	wil->wdev = wdev;
+	wil->radio_wdev = wdev;
 
 	return wil;
 
- out_priv:
+out_priv:
 	wil_priv_deinit(wil);
 
- out_wdev:
-	wil_wdev_free(wil);
+out_cfg:
+	wil_cfg80211_deinit(wil);
 
 	return ERR_PTR(rc);
 }
@@ -196,13 +219,13 @@ void wil_if_free(struct wil6210_priv *wil)
 	wil_to_ndev(wil) = NULL;
 	free_netdev(ndev);
 
-	wil_wdev_free(wil);
+	wil_cfg80211_deinit(wil);
 }
 
 int wil_if_add(struct wil6210_priv *wil)
 {
 	struct wireless_dev *wdev = wil_to_wdev(wil);
-	struct wiphy *wiphy = wdev->wiphy;
+	struct wiphy *wiphy = wil->wiphy;
 	struct net_device *ndev = wil_to_ndev(wil);
 	int rc;
 
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 0df2aada6659..b33aa7ddf24e 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -669,10 +669,18 @@ extern struct blink_on_off_time led_blink_time[WIL_LED_TIME_LAST];
 extern u8 led_id;
 extern u8 led_polarity;
 
+struct wil6210_vif {
+	struct wireless_dev wdev;
+	struct wil6210_priv *wil;
+	u8 mid;
+};
+
 struct wil6210_priv {
 	struct pci_dev *pdev;
 	u32 bar_size;
+	struct wiphy *wiphy;
 	struct wireless_dev *wdev;
+	struct net_device *ndev;
 	void __iomem *csr;
 	DECLARE_BITMAP(status, wil_status_last);
 	u8 fw_version[ETHTOOL_FWVERS_LEN];
@@ -798,13 +806,15 @@ struct wil6210_priv {
 	u32 iccm_base;
 };
 
-#define wil_to_wiphy(i) (i->wdev->wiphy)
+#define wil_to_wiphy(i) (i->wiphy)
 #define wil_to_dev(i) (wiphy_dev(wil_to_wiphy(i)))
 #define wiphy_to_wil(w) (struct wil6210_priv *)(wiphy_priv(w))
 #define wil_to_wdev(i) (i->wdev)
 #define wdev_to_wil(w) (struct wil6210_priv *)(wdev_priv(w))
-#define wil_to_ndev(i) (wil_to_wdev(i)->netdev)
+#define wil_to_ndev(i) (i->ndev)
 #define ndev_to_wil(n) (wdev_to_wil(n->ieee80211_ptr))
+#define ndev_to_vif(n) (struct wil6210_vif *)(netdev_priv(n))
+#define wdev_to_vif(w) (container_of(w, struct wil6210_vif, wdev))
 
 __printf(2, 3)
 void wil_dbg_trace(struct wil6210_priv *wil, const char *fmt, ...);
@@ -900,6 +910,10 @@ void wil_memcpy_fromio_32(void *dst, const volatile void __iomem *src,
 void wil_memcpy_toio_32(volatile void __iomem *dst, const void *src,
 			size_t count);
 
+struct wil6210_vif *
+wil_vif_alloc(struct wil6210_priv *wil, const char *name,
+	      unsigned char name_assign_type, enum nl80211_iftype iftype,
+	      u8 mid);
 void *wil_if_alloc(struct device *dev);
 void wil_if_free(struct wil6210_priv *wil);
 int wil_if_add(struct wil6210_priv *wil);
@@ -1010,8 +1024,8 @@ static inline void wil6210_debugfs_remove(struct wil6210_priv *wil) {}
 int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 		       struct station_info *sinfo);
 
-struct wireless_dev *wil_cfg80211_init(struct device *dev);
-void wil_wdev_free(struct wil6210_priv *wil);
+struct wil6210_priv *wil_cfg80211_init(struct device *dev);
+void wil_cfg80211_deinit(struct wil6210_priv *wil);
 void wil_p2p_wdev_free(struct wil6210_priv *wil);
 
 int wmi_set_mac_address(struct wil6210_priv *wil, void *addr);

From 7bfe9e22e487b0cb14bc3bd03e6e987d9789756b Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:12 +0200
Subject: [PATCH 0439/1678] wil6210: support concurrency record in FW file

New FW which supports multiple virtual interfaces, reports
its allowed interface combinations using a special comment
record in the FW file. The format of the interface combinations
is similar to the kernel wiphy->iface_combinations.
When parsing FW file during module initialization, also parse
and validate the concurrency record, and initialize
wiphy->n_iface_combinations and wiphy->iface_combinations
accordingly.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 69 +++++++++++++++++++++
 drivers/net/wireless/ath/wil6210/fw.h       | 38 +++++++++++-
 drivers/net/wireless/ath/wil6210/fw_inc.c   | 52 ++++++++++++++--
 drivers/net/wireless/ath/wil6210/wil6210.h  |  4 ++
 4 files changed, 158 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index eeed85cf7b10..c959f152091f 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -20,6 +20,7 @@
 #include <net/netlink.h>
 #include "wil6210.h"
 #include "wmi.h"
+#include "fw.h"
 
 #define WIL_MAX_ROC_DURATION_MS 5000
 
@@ -1896,6 +1897,71 @@ static void wil_wiphy_init(struct wiphy *wiphy)
 #endif
 }
 
+int wil_cfg80211_iface_combinations_from_fw(
+	struct wil6210_priv *wil, const struct wil_fw_record_concurrency *conc)
+{
+	struct wiphy *wiphy = wil_to_wiphy(wil);
+	u32 total_limits = 0;
+	u16 n_combos;
+	const struct wil_fw_concurrency_combo *combo;
+	const struct wil_fw_concurrency_limit *limit;
+	struct ieee80211_iface_combination *iface_combinations;
+	struct ieee80211_iface_limit *iface_limit;
+	int i, j;
+
+	if (wiphy->iface_combinations) {
+		wil_dbg_misc(wil, "iface_combinations already set, skipping\n");
+		return 0;
+	}
+
+	combo = conc->combos;
+	n_combos = le16_to_cpu(conc->n_combos);
+	for (i = 0; i < n_combos; i++) {
+		total_limits += combo->n_limits;
+		limit = combo->limits + combo->n_limits;
+		combo = (struct wil_fw_concurrency_combo *)limit;
+	}
+
+	iface_combinations =
+		kzalloc(n_combos * sizeof(struct ieee80211_iface_combination) +
+			total_limits * sizeof(struct ieee80211_iface_limit),
+			GFP_KERNEL);
+	if (!iface_combinations)
+		return -ENOMEM;
+	iface_limit = (struct ieee80211_iface_limit *)(iface_combinations +
+						       n_combos);
+	combo = conc->combos;
+	for (i = 0; i < n_combos; i++) {
+		iface_combinations[i].max_interfaces = combo->max_interfaces;
+		iface_combinations[i].num_different_channels =
+			combo->n_diff_channels;
+		iface_combinations[i].beacon_int_infra_match =
+			combo->same_bi;
+		iface_combinations[i].n_limits = combo->n_limits;
+		wil_dbg_misc(wil,
+			     "iface_combination %d: max_if %d, num_ch %d, bi_match %d\n",
+			     i, iface_combinations[i].max_interfaces,
+			     iface_combinations[i].num_different_channels,
+			     iface_combinations[i].beacon_int_infra_match);
+		limit = combo->limits;
+		for (j = 0; j < combo->n_limits; j++) {
+			iface_limit[j].max = le16_to_cpu(limit[j].max);
+			iface_limit[j].types = le16_to_cpu(limit[j].types);
+			wil_dbg_misc(wil,
+				     "limit %d: max %d types 0x%x\n", j,
+				     iface_limit[j].max, iface_limit[j].types);
+		}
+		iface_combinations[i].limits = iface_limit;
+		iface_limit += combo->n_limits;
+		limit += combo->n_limits;
+		combo = (struct wil_fw_concurrency_combo *)limit;
+	}
+
+	wiphy->n_iface_combinations = n_combos;
+	wiphy->iface_combinations = iface_combinations;
+	return 0;
+}
+
 struct wil6210_priv *wil_cfg80211_init(struct device *dev)
 {
 	struct wiphy *wiphy;
@@ -1934,6 +2000,9 @@ void wil_cfg80211_deinit(struct wil6210_priv *wil)
 	if (!wiphy)
 		return;
 
+	kfree(wiphy->iface_combinations);
+	wiphy->iface_combinations = NULL;
+
 	wiphy_free(wiphy);
 	/* do not access wil6210_priv after returning from here */
 }
diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h
index 2c7b24f61587..3e7a28045cab 100644
--- a/drivers/net/wireless/ath/wil6210/fw.h
+++ b/drivers/net/wireless/ath/wil6210/fw.h
@@ -14,6 +14,8 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
+#ifndef __WIL_FW_H__
+#define __WIL_FW_H__
 
 #define WIL_FW_SIGNATURE (0x36323130) /* '0126' */
 #define WIL_FW_FMT_VERSION (1) /* format version driver supports */
@@ -71,7 +73,39 @@ struct wil_fw_record_capabilities { /* type == wil_fw_type_comment */
 	struct wil_fw_record_comment_hdr hdr;
 	/* capabilities (variable size), see enum wmi_fw_capability */
 	u8 capabilities[0];
-};
+} __packed;
+
+/* FW VIF concurrency encoded inside a comment record
+ * Format is similar to wiphy->iface_combinations
+ */
+#define WIL_FW_CONCURRENCY_MAGIC (0xfedccdef)
+#define WIL_FW_CONCURRENCY_REC_VER	1
+struct wil_fw_concurrency_limit {
+	__le16 max; /* maximum number of interfaces of these types */
+	__le16 types; /* interface types (bit mask of enum nl80211_iftype) */
+} __packed;
+
+struct wil_fw_concurrency_combo {
+	u8 n_limits; /* number of wil_fw_concurrency_limit entries */
+	u8 max_interfaces; /* max number of concurrent interfaces allowed */
+	u8 n_diff_channels; /* total number of different channels allowed */
+	u8 same_bi; /* for APs, 1 if all APs must have same BI */
+	/* keep last - concurrency limits, variable size by n_limits */
+	struct wil_fw_concurrency_limit limits[0];
+} __packed;
+
+struct wil_fw_record_concurrency { /* type == wil_fw_type_comment */
+	/* identifies concurrency record */
+	__le32 magic;
+	/* structure version, currently always 1 */
+	u8 version;
+	/* maximum number of supported MIDs _in addition_ to MID 0 */
+	u8 n_mids;
+	/* number of concurrency combinations that follow */
+	__le16 n_combos;
+	/* keep last - combinations, variable size by n_combos */
+	struct wil_fw_concurrency_combo combos[0];
+} __packed;
 
 /* brd file info encoded inside a comment record */
 #define WIL_BRD_FILE_MAGIC (0xabcddcbb)
@@ -175,3 +209,5 @@ struct wil_fw_record_gateway_data4 { /* type == wil_fw_type_gateway_data4 */
 	__le32 command;
 	struct wil_fw_data_gw4 data[0]; /* total size [data_size], see above */
 } __packed;
+
+#endif /* __WIL_FW_H__ */
diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c
index 914c0106e94b..718161b829c2 100644
--- a/drivers/net/wireless/ath/wil6210/fw_inc.c
+++ b/drivers/net/wireless/ath/wil6210/fw_inc.c
@@ -136,8 +136,8 @@ fw_handle_capabilities(struct wil6210_priv *wil, const void *data,
 	size_t capa_size;
 
 	if (size < sizeof(*rec)) {
-		wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1,
-				data, size, true);
+		wil_err_fw(wil, "capabilities record too short: %zu\n", size);
+		/* let the FW load anyway */
 		return 0;
 	}
 
@@ -158,8 +158,7 @@ fw_handle_brd_file(struct wil6210_priv *wil, const void *data,
 	const struct wil_fw_record_brd_file *rec = data;
 
 	if (size < sizeof(*rec)) {
-		wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1,
-				data, size, true);
+		wil_err_fw(wil, "brd_file record too short: %zu\n", size);
 		return 0;
 	}
 
@@ -172,6 +171,44 @@ fw_handle_brd_file(struct wil6210_priv *wil, const void *data,
 	return 0;
 }
 
+static int
+fw_handle_concurrency(struct wil6210_priv *wil, const void *data,
+		      size_t size)
+{
+	const struct wil_fw_record_concurrency *rec = data;
+	const struct wil_fw_concurrency_combo *combo;
+	const struct wil_fw_concurrency_limit *limit;
+	size_t remain, lsize;
+	int i, n_combos;
+
+	if (size < sizeof(*rec)) {
+		wil_err_fw(wil, "concurrency record too short: %zu\n", size);
+		/* continue, let the FW load anyway */
+		return 0;
+	}
+
+	n_combos = le16_to_cpu(rec->n_combos);
+	remain = size - offsetof(struct wil_fw_record_concurrency, combos);
+	combo = rec->combos;
+	for (i = 0; i < n_combos; i++) {
+		if (remain < sizeof(*combo))
+			goto out_short;
+		remain -= sizeof(*combo);
+		limit = combo->limits;
+		lsize = combo->n_limits * sizeof(*limit);
+		if (remain < lsize)
+			goto out_short;
+		remain -= lsize;
+		limit += combo->n_limits;
+		combo = (struct wil_fw_concurrency_combo *)limit;
+	}
+
+	return wil_cfg80211_iface_combinations_from_fw(wil, rec);
+out_short:
+	wil_err_fw(wil, "concurrency record truncated\n");
+	return 0;
+}
+
 static int
 fw_handle_comment(struct wil6210_priv *wil, const void *data,
 		  size_t size)
@@ -194,6 +231,13 @@ fw_handle_comment(struct wil6210_priv *wil, const void *data,
 		wil_dbg_fw(wil, "magic is WIL_BRD_FILE_MAGIC\n");
 		rc = fw_handle_brd_file(wil, data, size);
 		break;
+	case WIL_FW_CONCURRENCY_MAGIC:
+		wil_dbg_fw(wil, "magic is WIL_FW_CONCURRENCY_MAGIC\n");
+		rc = fw_handle_concurrency(wil, data, size);
+		break;
+	default:
+		wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1,
+				data, size, true);
 	}
 
 	return rc;
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index b33aa7ddf24e..81cb27af64e4 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include "wmi.h"
 #include "wil_platform.h"
+#include "fw.h"
 
 extern bool no_fw_recovery;
 extern unsigned int mtu_max;
@@ -1012,6 +1013,9 @@ int wmi_stop_discovery(struct wil6210_priv *wil);
 int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 struct cfg80211_mgmt_tx_params *params,
 			 u64 *cookie);
+int wil_cfg80211_iface_combinations_from_fw(
+	struct wil6210_priv *wil,
+	const struct wil_fw_record_concurrency *conc);
 
 #if defined(CONFIG_WIL6210_DEBUGFS)
 int wil6210_debugfs_init(struct wil6210_priv *wil);

From e00243fab84b4efd5a250d1c47a4ddcca4c666ce Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:13 +0200
Subject: [PATCH 0440/1678] wil6210: infrastructure for multiple virtual
 interfaces

Simple infrastructure changes for supporting multiple
virtual interfaces (multiple VIFs).
It is still not possible to add new VIFs so the only VIF
belongs to the main interface.
Main changes:
1. Add MAC ID(mid) argument to wmi_send and wmi_call to
allow invoking WMI commands on different VIFs.
2. Similarly, in WMI event handler look at the mid reported
by FW and extract VIF structure (currently only for main
interface). All WMI event handlers operate on wil6210_vif
structure so they know on which VIF they were called.
3. Trivial changes to use wil6210_vif structure and MID
throughout the code.
4. Various changes to logging to report MID.

More complete multiple VIFs support will be added gradually
in next patches.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c   | 341 ++++++++++--------
 drivers/net/wireless/ath/wil6210/debug.c      |   9 +-
 drivers/net/wireless/ath/wil6210/debugfs.c    |  62 ++--
 drivers/net/wireless/ath/wil6210/ethtool.c    |   4 +-
 drivers/net/wireless/ath/wil6210/interrupt.c  |   6 +-
 drivers/net/wireless/ath/wil6210/main.c       | 207 +++++------
 drivers/net/wireless/ath/wil6210/netdev.c     | 102 +++++-
 drivers/net/wireless/ath/wil6210/p2p.c        | 145 ++++----
 drivers/net/wireless/ath/wil6210/pcie_bus.c   |   4 +-
 drivers/net/wireless/ath/wil6210/pm.c         |   9 +-
 drivers/net/wireless/ath/wil6210/pmc.c        |   8 +-
 drivers/net/wireless/ath/wil6210/rx_reorder.c |  13 +-
 drivers/net/wireless/ath/wil6210/txrx.c       | 106 +++---
 drivers/net/wireless/ath/wil6210/wil6210.h    | 150 ++++----
 drivers/net/wireless/ath/wil6210/wmi.c        | 326 ++++++++++-------
 15 files changed, 877 insertions(+), 615 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index c959f152091f..2fd4af8d94dc 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -260,9 +260,10 @@ int wil_iftype_nl2wmi(enum nl80211_iftype type)
 	return -EOPNOTSUPP;
 }
 
-int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
+int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid,
 		       struct station_info *sinfo)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_notify_req_cmd cmd = {
 		.cid = cid,
 		.interval_usec = 0,
@@ -274,17 +275,17 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 	struct wil_net_stats *stats = &wil->sta[cid].stats;
 	int rc;
 
-	rc = wmi_call(wil, WMI_NOTIFY_REQ_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_NOTIFY_REQ_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_NOTIFY_REQ_DONE_EVENTID, &reply, sizeof(reply), 20);
 	if (rc)
 		return rc;
 
-	wil_dbg_wmi(wil, "Link status for CID %d: {\n"
+	wil_dbg_wmi(wil, "Link status for CID %d MID %d: {\n"
 		    "  MCS %d TSF 0x%016llx\n"
 		    "  BF status 0x%08x RSSI %d SQI %d%%\n"
 		    "  Tx Tpt %d goodput %d Rx goodput %d\n"
 		    "  Sectors(rx:tx) my %d:%d peer %d:%d\n""}\n",
-		    cid, le16_to_cpu(reply.evt.bf_mcs),
+		    cid, vif->mid, le16_to_cpu(reply.evt.bf_mcs),
 		    le64_to_cpu(reply.evt.tsf), reply.evt.status,
 		    reply.evt.rssi,
 		    reply.evt.sqi,
@@ -333,30 +334,34 @@ static int wil_cfg80211_get_station(struct wiphy *wiphy,
 				    struct net_device *ndev,
 				    const u8 *mac, struct station_info *sinfo)
 {
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	int rc;
 
-	int cid = wil_find_cid(wil, mac);
+	int cid = wil_find_cid(wil, vif->mid, mac);
 
-	wil_dbg_misc(wil, "get_station: %pM CID %d\n", mac, cid);
+	wil_dbg_misc(wil, "get_station: %pM CID %d MID %d\n", mac, cid,
+		     vif->mid);
 	if (cid < 0)
 		return cid;
 
-	rc = wil_cid_fill_sinfo(wil, cid, sinfo);
+	rc = wil_cid_fill_sinfo(vif, cid, sinfo);
 
 	return rc;
 }
 
 /*
- * Find @idx-th active STA for station dump.
+ * Find @idx-th active STA for specific MID for station dump.
  */
-static int wil_find_cid_by_idx(struct wil6210_priv *wil, int idx)
+static int wil_find_cid_by_idx(struct wil6210_priv *wil, u8 mid, int idx)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(wil->sta); i++) {
 		if (wil->sta[i].status == wil_sta_unused)
 			continue;
+		if (wil->sta[i].mid != mid)
+			continue;
 		if (idx == 0)
 			return i;
 		idx--;
@@ -369,17 +374,19 @@ static int wil_cfg80211_dump_station(struct wiphy *wiphy,
 				     struct net_device *dev, int idx,
 				     u8 *mac, struct station_info *sinfo)
 {
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	int rc;
-	int cid = wil_find_cid_by_idx(wil, idx);
+	int cid = wil_find_cid_by_idx(wil, vif->mid, idx);
 
 	if (cid < 0)
 		return -ENOENT;
 
 	ether_addr_copy(mac, wil->sta[cid].addr);
-	wil_dbg_misc(wil, "dump_station: %pM CID %d\n", mac, cid);
+	wil_dbg_misc(wil, "dump_station: %pM CID %d MID %d\n", mac, cid,
+		     vif->mid);
 
-	rc = wil_cid_fill_sinfo(wil, cid, sinfo);
+	rc = wil_cid_fill_sinfo(vif, cid, sinfo);
 
 	return rc;
 }
@@ -390,7 +397,7 @@ static int wil_cfg80211_start_p2p_device(struct wiphy *wiphy,
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 
 	wil_dbg_misc(wil, "start_p2p_device: entered\n");
-	wil->p2p.p2p_dev_started = 1;
+	wil->p2p_dev_started = 1;
 	return 0;
 }
 
@@ -398,16 +405,15 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
 					 struct wireless_dev *wdev)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	struct wil_p2p_info *p2p = &wil->p2p;
 
-	if (!p2p->p2p_dev_started)
+	if (!wil->p2p_dev_started)
 		return;
 
 	wil_dbg_misc(wil, "stop_p2p_device: entered\n");
 	mutex_lock(&wil->mutex);
 	mutex_lock(&wil->p2p_wdev_mutex);
 	wil_p2p_stop_radio_operations(wil);
-	p2p->p2p_dev_started = 0;
+	wil->p2p_dev_started = 0;
 	mutex_unlock(&wil->p2p_wdev_mutex);
 	mutex_unlock(&wil->mutex);
 }
@@ -419,7 +425,7 @@ wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 		       struct vif_params *params)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct wil6210_vif *vif;
 	struct wireless_dev *p2p_wdev;
 
@@ -474,12 +480,13 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 				     struct vif_params *params)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
+	struct wireless_dev *wdev = vif_to_wdev(vif);
 	int rc;
 
 	wil_dbg_misc(wil, "change_iface: type=%d\n", type);
 
-	if (netif_running(wil_to_ndev(wil)) && !wil_is_recovery_blocked(wil)) {
+	if (netif_running(ndev) && !wil_is_recovery_blocked(wil)) {
 		wil_dbg_misc(wil, "interface is up. resetting...\n");
 		mutex_lock(&wil->mutex);
 		__wil_down(wil);
@@ -514,6 +521,7 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	struct wireless_dev *wdev = request->wdev;
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 	struct {
 		struct wmi_start_scan_cmd cmd;
 		u16 chnl[4];
@@ -542,7 +550,7 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 	mutex_lock(&wil->mutex);
 
 	mutex_lock(&wil->p2p_wdev_mutex);
-	if (wil->scan_request || wil->p2p.discovery_started) {
+	if (vif->scan_request || vif->p2p.discovery_started) {
 		wil_err(wil, "Already scanning\n");
 		mutex_unlock(&wil->p2p_wdev_mutex);
 		rc = -EAGAIN;
@@ -551,25 +559,28 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 	mutex_unlock(&wil->p2p_wdev_mutex);
 
 	if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE) {
-		if (!wil->p2p.p2p_dev_started) {
+		if (!wil->p2p_dev_started) {
 			wil_err(wil, "P2P search requested on stopped P2P device\n");
 			rc = -EIO;
 			goto out;
 		}
 		/* social scan on P2P_DEVICE is handled as p2p search */
 		if (wil_p2p_is_social_scan(request)) {
-			wil->scan_request = request;
-			wil->radio_wdev = wdev;
-			rc = wil_p2p_search(wil, request);
+			vif->scan_request = request;
+			if (vif->mid == 0)
+				wil->radio_wdev = wdev;
+			rc = wil_p2p_search(vif, request);
 			if (rc) {
-				wil->radio_wdev = wil_to_wdev(wil);
-				wil->scan_request = NULL;
+				if (vif->mid == 0)
+					wil->radio_wdev =
+						wil->main_ndev->ieee80211_ptr;
+				vif->scan_request = NULL;
 			}
 			goto out;
 		}
 	}
 
-	(void)wil_p2p_stop_discovery(wil);
+	(void)wil_p2p_stop_discovery(vif);
 
 	wil_dbg_misc(wil, "Start scan_request 0x%p\n", request);
 	wil_dbg_misc(wil, "SSID count: %d", request->n_ssids);
@@ -582,18 +593,18 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 	}
 
 	if (request->n_ssids)
-		rc = wmi_set_ssid(wil, request->ssids[0].ssid_len,
+		rc = wmi_set_ssid(vif, request->ssids[0].ssid_len,
 				  request->ssids[0].ssid);
 	else
-		rc = wmi_set_ssid(wil, 0, NULL);
+		rc = wmi_set_ssid(vif, 0, NULL);
 
 	if (rc) {
 		wil_err(wil, "set SSID for scan request failed: %d\n", rc);
 		goto out;
 	}
 
-	wil->scan_request = request;
-	mod_timer(&wil->scan_timer, jiffies + WIL6210_SCAN_TO);
+	vif->scan_request = request;
+	mod_timer(&vif->scan_timer, jiffies + WIL6210_SCAN_TO);
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd.scan_type = WMI_ACTIVE_SCAN;
@@ -620,7 +631,8 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 	else
 		wil_dbg_misc(wil, "Scan has no IE's\n");
 
-	rc = wmi_set_ie(wil, WMI_FRAME_PROBE_REQ, request->ie_len, request->ie);
+	rc = wmi_set_ie(vif, WMI_FRAME_PROBE_REQ,
+			request->ie_len, request->ie);
 	if (rc)
 		goto out_restore;
 
@@ -629,15 +641,18 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 		wil_dbg_misc(wil, "active scan with discovery_mode=1\n");
 	}
 
-	wil->radio_wdev = wdev;
-	rc = wmi_send(wil, WMI_START_SCAN_CMDID, &cmd, sizeof(cmd.cmd) +
-			cmd.cmd.num_channels * sizeof(cmd.cmd.channel_list[0]));
+	if (vif->mid == 0)
+		wil->radio_wdev = wdev;
+	rc = wmi_send(wil, WMI_START_SCAN_CMDID, vif->mid,
+		      &cmd, sizeof(cmd.cmd) +
+		      cmd.cmd.num_channels * sizeof(cmd.cmd.channel_list[0]));
 
 out_restore:
 	if (rc) {
-		del_timer_sync(&wil->scan_timer);
-		wil->radio_wdev = wil_to_wdev(wil);
-		wil->scan_request = NULL;
+		del_timer_sync(&vif->scan_timer);
+		if (vif->mid == 0)
+			wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
+		vif->scan_request = NULL;
 	}
 out:
 	mutex_unlock(&wil->mutex);
@@ -648,24 +663,25 @@ static void wil_cfg80211_abort_scan(struct wiphy *wiphy,
 				    struct wireless_dev *wdev)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 
 	wil_dbg_misc(wil, "wdev=0x%p iftype=%d\n", wdev, wdev->iftype);
 
 	mutex_lock(&wil->mutex);
 	mutex_lock(&wil->p2p_wdev_mutex);
 
-	if (!wil->scan_request)
+	if (!vif->scan_request)
 		goto out;
 
-	if (wdev != wil->scan_request->wdev) {
+	if (wdev != vif->scan_request->wdev) {
 		wil_dbg_misc(wil, "abort scan was called on the wrong iface\n");
 		goto out;
 	}
 
-	if (wil->radio_wdev == wil->p2p_wdev)
+	if (wdev == wil->p2p_wdev && wil->radio_wdev == wil->p2p_wdev)
 		wil_p2p_stop_radio_operations(wil);
 	else
-		wil_abort_scan(wil, true);
+		wil_abort_scan(vif, true);
 
 out:
 	mutex_unlock(&wil->p2p_wdev_mutex);
@@ -719,6 +735,7 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 				struct cfg80211_connect_params *sme)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct cfg80211_bss *bss;
 	struct wmi_connect_cmd conn;
 	const u8 *ssid_eid;
@@ -727,7 +744,7 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 	int rc = 0;
 	enum ieee80211_bss_type bss_type = IEEE80211_BSS_TYPE_ESS;
 
-	wil_dbg_misc(wil, "connect\n");
+	wil_dbg_misc(wil, "connect, mid=%d\n", vif->mid);
 	wil_print_connect_params(wil, sme);
 
 	if (test_bit(wil_status_fwconnecting, wil->status) ||
@@ -762,18 +779,18 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 		rc = -ENOENT;
 		goto out;
 	}
-	wil->privacy = sme->privacy;
-	wil->pbss = sme->pbss;
+	vif->privacy = sme->privacy;
+	vif->pbss = sme->pbss;
 
-	if (wil->privacy) {
+	if (vif->privacy) {
 		/* For secure assoc, remove old keys */
-		rc = wmi_del_cipher_key(wil, 0, bss->bssid,
+		rc = wmi_del_cipher_key(vif, 0, bss->bssid,
 					WMI_KEY_USE_PAIRWISE);
 		if (rc) {
 			wil_err(wil, "WMI_DELETE_CIPHER_KEY_CMD(PTK) failed\n");
 			goto out;
 		}
-		rc = wmi_del_cipher_key(wil, 0, bss->bssid,
+		rc = wmi_del_cipher_key(vif, 0, bss->bssid,
 					WMI_KEY_USE_RX_GROUP);
 		if (rc) {
 			wil_err(wil, "WMI_DELETE_CIPHER_KEY_CMD(GTK) failed\n");
@@ -785,7 +802,7 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 	 * elements. Send it also in case it's empty, to erase previously set
 	 * ies in FW.
 	 */
-	rc = wmi_set_ie(wil, WMI_FRAME_ASSOC_REQ, sme->ie_len, sme->ie);
+	rc = wmi_set_ie(vif, WMI_FRAME_ASSOC_REQ, sme->ie_len, sme->ie);
 	if (rc)
 		goto out;
 
@@ -803,7 +820,7 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 			bss->capability);
 		goto out;
 	}
-	if (wil->privacy) {
+	if (vif->privacy) {
 		if (rsn_eid) { /* regular secure connection */
 			conn.dot11_auth_mode = WMI_AUTH11_SHARED;
 			conn.auth_mode = WMI_AUTH_WPA2_PSK;
@@ -837,13 +854,13 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 
 	set_bit(wil_status_fwconnecting, wil->status);
 
-	rc = wmi_send(wil, WMI_CONNECT_CMDID, &conn, sizeof(conn));
+	rc = wmi_send(wil, WMI_CONNECT_CMDID, vif->mid, &conn, sizeof(conn));
 	if (rc == 0) {
 		netif_carrier_on(ndev);
 		wil6210_bus_request(wil, WIL_MAX_BUS_REQUEST_KBPS);
-		wil->bss = bss;
+		vif->bss = bss;
 		/* Connect can take lots of time */
-		mod_timer(&wil->connect_timer,
+		mod_timer(&vif->connect_timer,
 			  jiffies + msecs_to_jiffies(5000));
 	} else {
 		clear_bit(wil_status_fwconnecting, wil->status);
@@ -861,8 +878,10 @@ static int wil_cfg80211_disconnect(struct wiphy *wiphy,
 {
 	int rc;
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 
-	wil_dbg_misc(wil, "disconnect: reason=%d\n", reason_code);
+	wil_dbg_misc(wil, "disconnect: reason=%d, mid=%d\n",
+		     reason_code, vif->mid);
 
 	if (!(test_bit(wil_status_fwconnecting, wil->status) ||
 	      test_bit(wil_status_fwconnected, wil->status))) {
@@ -870,8 +889,8 @@ static int wil_cfg80211_disconnect(struct wiphy *wiphy,
 		return 0;
 	}
 
-	wil->locally_generated_disc = true;
-	rc = wmi_call(wil, WMI_DISCONNECT_CMDID, NULL, 0,
+	vif->locally_generated_disc = true;
+	rc = wmi_call(wil, WMI_DISCONNECT_CMDID, vif->mid, NULL, 0,
 		      WMI_DISCONNECT_EVENTID, NULL, 0,
 		      WIL6210_DISCONNECT_TO_MS);
 	if (rc)
@@ -907,6 +926,7 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	const u8 *buf = params->buf;
 	size_t len = params->len, total;
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 	int rc;
 	bool tx_status = false;
 	struct ieee80211_mgmt *mgmt_frame = (void *)buf;
@@ -923,7 +943,7 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	 * different from currently "listened" channel and fail if it is.
 	 */
 
-	wil_dbg_misc(wil, "mgmt_tx\n");
+	wil_dbg_misc(wil, "mgmt_tx mid %d\n", vif->mid);
 	wil_hex_dump_misc("mgmt tx frame ", DUMP_PREFIX_OFFSET, 16, 1, buf,
 			  len, true);
 
@@ -944,7 +964,7 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	cmd->len = cpu_to_le16(len);
 	memcpy(cmd->payload, buf, len);
 
-	rc = wmi_call(wil, WMI_SW_TX_REQ_CMDID, cmd, total,
+	rc = wmi_call(wil, WMI_SW_TX_REQ_CMDID, vif->mid, cmd, total,
 		      WMI_SW_TX_COMPLETE_EVENTID, &evt, sizeof(evt), 2000);
 	if (rc == 0)
 		tx_status = !evt.evt.status;
@@ -966,10 +986,10 @@ static int wil_cfg80211_set_channel(struct wiphy *wiphy,
 	return 0;
 }
 
-static enum wmi_key_usage wil_detect_key_usage(struct wil6210_priv *wil,
+static enum wmi_key_usage wil_detect_key_usage(struct wireless_dev *wdev,
 					       bool pairwise)
 {
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wil6210_priv *wil = wdev_to_wil(wdev);
 	enum wmi_key_usage rc;
 
 	if (pairwise) {
@@ -997,7 +1017,7 @@ static enum wmi_key_usage wil_detect_key_usage(struct wil6210_priv *wil,
 }
 
 static struct wil_sta_info *
-wil_find_sta_by_key_usage(struct wil6210_priv *wil,
+wil_find_sta_by_key_usage(struct wil6210_priv *wil, u8 mid,
 			  enum wmi_key_usage key_usage, const u8 *mac_addr)
 {
 	int cid = -EINVAL;
@@ -1007,9 +1027,9 @@ wil_find_sta_by_key_usage(struct wil6210_priv *wil,
 
 	/* supplicant provides Rx group key in STA mode with NULL MAC address */
 	if (mac_addr)
-		cid = wil_find_cid(wil, mac_addr);
+		cid = wil_find_cid(wil, mid, mac_addr);
 	else if (key_usage == WMI_KEY_USE_RX_GROUP)
-		cid = wil_find_cid_by_idx(wil, 0);
+		cid = wil_find_cid_by_idx(wil, mid, 0);
 	if (cid < 0) {
 		wil_err(wil, "No CID for %pM %s\n", mac_addr,
 			key_usage_str[key_usage]);
@@ -1086,9 +1106,12 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy,
 				struct key_params *params)
 {
 	int rc;
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise);
-	struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage,
+	struct wireless_dev *wdev = vif_to_wdev(vif);
+	enum wmi_key_usage key_usage = wil_detect_key_usage(wdev, pairwise);
+	struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, vif->mid,
+							    key_usage,
 							    mac_addr);
 
 	if (!params) {
@@ -1118,7 +1141,7 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
-	rc = wmi_add_cipher_key(wil, key_index, mac_addr, params->key_len,
+	rc = wmi_add_cipher_key(vif, key_index, mac_addr, params->key_len,
 				params->key, key_usage);
 	if (!rc)
 		wil_set_crypto_rx(key_index, key_usage, cs, params);
@@ -1131,9 +1154,12 @@ static int wil_cfg80211_del_key(struct wiphy *wiphy,
 				u8 key_index, bool pairwise,
 				const u8 *mac_addr)
 {
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise);
-	struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage,
+	struct wireless_dev *wdev = vif_to_wdev(vif);
+	enum wmi_key_usage key_usage = wil_detect_key_usage(wdev, pairwise);
+	struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, vif->mid,
+							    key_usage,
 							    mac_addr);
 
 	wil_dbg_misc(wil, "del_key: %pM %s[%d]\n", mac_addr,
@@ -1146,7 +1172,7 @@ static int wil_cfg80211_del_key(struct wiphy *wiphy,
 	if (!IS_ERR_OR_NULL(cs))
 		wil_del_rx_key(key_index, key_usage, cs);
 
-	return wmi_del_cipher_key(wil, key_index, mac_addr, key_usage);
+	return wmi_del_cipher_key(vif, key_index, mac_addr, key_usage);
 }
 
 /* Need to be present or wiphy_new() will WARN */
@@ -1183,10 +1209,11 @@ static int wil_cancel_remain_on_channel(struct wiphy *wiphy,
 					u64 cookie)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 
 	wil_dbg_misc(wil, "cancel_remain_on_channel\n");
 
-	return wil_p2p_cancel_listen(wil, cookie);
+	return wil_p2p_cancel_listen(vif, cookie);
 }
 
 /**
@@ -1279,11 +1306,10 @@ static void wil_print_bcon_data(struct cfg80211_beacon_data *b)
 }
 
 /* internal functions for device reset and starting AP */
-static int _wil_cfg80211_set_ies(struct wiphy *wiphy,
+static int _wil_cfg80211_set_ies(struct wil6210_vif *vif,
 				 struct cfg80211_beacon_data *bcon)
 {
 	int rc;
-	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	u16 len = 0, proberesp_len = 0;
 	u8 *ies = NULL, *proberesp = NULL;
 
@@ -1304,20 +1330,21 @@ static int _wil_cfg80211_set_ies(struct wiphy *wiphy,
 	if (rc)
 		goto out;
 
-	rc = wmi_set_ie(wil, WMI_FRAME_PROBE_RESP, len, ies);
+	rc = wmi_set_ie(vif, WMI_FRAME_PROBE_RESP, len, ies);
 	if (rc)
 		goto out;
 
 	if (bcon->assocresp_ies)
-		rc = wmi_set_ie(wil, WMI_FRAME_ASSOC_RESP,
+		rc = wmi_set_ie(vif, WMI_FRAME_ASSOC_RESP,
 				bcon->assocresp_ies_len, bcon->assocresp_ies);
 	else
-		rc = wmi_set_ie(wil, WMI_FRAME_ASSOC_RESP, len, ies);
+		rc = wmi_set_ie(vif, WMI_FRAME_ASSOC_RESP, len, ies);
 #if 0 /* to use beacon IE's, remove this #if 0 */
 	if (rc)
 		goto out;
 
-	rc = wmi_set_ie(wil, WMI_FRAME_BEACON, bcon->tail_len, bcon->tail);
+	rc = wmi_set_ie(vif, WMI_FRAME_BEACON,
+			bcon->tail_len, bcon->tail);
 #endif
 out:
 	kfree(ies);
@@ -1332,6 +1359,7 @@ static int _wil_cfg80211_start_ap(struct wiphy *wiphy,
 				  u8 hidden_ssid, u32 pbss)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	int rc;
 	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 	u8 wmi_nettype = wil_iftype_nl2wmi(wdev->iftype);
@@ -1340,7 +1368,7 @@ static int _wil_cfg80211_start_ap(struct wiphy *wiphy,
 	if (pbss)
 		wmi_nettype = WMI_NETTYPE_P2P;
 
-	wil_dbg_misc(wil, "start_ap: is_go=%d\n", is_go);
+	wil_dbg_misc(wil, "start_ap: mid=%d, is_go=%d\n", vif->mid, is_go);
 	if (is_go && !pbss) {
 		wil_err(wil, "P2P GO must be in PBSS\n");
 		return -ENOTSUPP;
@@ -1355,34 +1383,34 @@ static int _wil_cfg80211_start_ap(struct wiphy *wiphy,
 	if (rc)
 		goto out;
 
-	rc = wmi_set_ssid(wil, ssid_len, ssid);
+	rc = wmi_set_ssid(vif, ssid_len, ssid);
 	if (rc)
 		goto out;
 
-	rc = _wil_cfg80211_set_ies(wiphy, bcon);
+	rc = _wil_cfg80211_set_ies(vif, bcon);
 	if (rc)
 		goto out;
 
-	wil->privacy = privacy;
-	wil->channel = chan;
-	wil->hidden_ssid = hidden_ssid;
-	wil->pbss = pbss;
+	vif->privacy = privacy;
+	vif->channel = chan;
+	vif->hidden_ssid = hidden_ssid;
+	vif->pbss = pbss;
 
 	netif_carrier_on(ndev);
 	wil6210_bus_request(wil, WIL_MAX_BUS_REQUEST_KBPS);
 
-	rc = wmi_pcp_start(wil, bi, wmi_nettype, chan, hidden_ssid, is_go);
+	rc = wmi_pcp_start(vif, bi, wmi_nettype, chan, hidden_ssid, is_go);
 	if (rc)
 		goto err_pcp_start;
 
-	rc = wil_bcast_init(wil);
+	rc = wil_bcast_init(vif);
 	if (rc)
 		goto err_bcast;
 
 	goto out; /* success */
 
 err_bcast:
-	wmi_pcp_stop(wil);
+	wmi_pcp_stop(vif);
 err_pcp_start:
 	netif_carrier_off(ndev);
 	wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
@@ -1396,10 +1424,11 @@ static int wil_cfg80211_change_beacon(struct wiphy *wiphy,
 				      struct cfg80211_beacon_data *bcon)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	int rc;
 	u32 privacy = 0;
 
-	wil_dbg_misc(wil, "change_beacon\n");
+	wil_dbg_misc(wil, "change_beacon, mid=%d\n", vif->mid);
 	wil_print_bcon_data(bcon);
 
 	if (bcon->tail &&
@@ -1408,20 +1437,20 @@ static int wil_cfg80211_change_beacon(struct wiphy *wiphy,
 		privacy = 1;
 
 	/* in case privacy has changed, need to restart the AP */
-	if (wil->privacy != privacy) {
+	if (vif->privacy != privacy) {
 		struct wireless_dev *wdev = ndev->ieee80211_ptr;
 
 		wil_dbg_misc(wil, "privacy changed %d=>%d. Restarting AP\n",
-			     wil->privacy, privacy);
+			     vif->privacy, privacy);
 
 		rc = _wil_cfg80211_start_ap(wiphy, ndev, wdev->ssid,
 					    wdev->ssid_len, privacy,
 					    wdev->beacon_interval,
-					    wil->channel, bcon,
-					    wil->hidden_ssid,
-					    wil->pbss);
+					    vif->channel, bcon,
+					    vif->hidden_ssid,
+					    vif->pbss);
 	} else {
-		rc = _wil_cfg80211_set_ies(wiphy, bcon);
+		rc = _wil_cfg80211_set_ies(vif, bcon);
 	}
 
 	return rc;
@@ -1488,6 +1517,7 @@ static int wil_cfg80211_stop_ap(struct wiphy *wiphy,
 				struct net_device *ndev)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 
 	wil_dbg_misc(wil, "stop_ap\n");
 
@@ -1499,7 +1529,7 @@ static int wil_cfg80211_stop_ap(struct wiphy *wiphy,
 
 	mutex_lock(&wil->mutex);
 
-	wmi_pcp_stop(wil);
+	wmi_pcp_stop(vif);
 
 	__wil_down(wil);
 
@@ -1513,9 +1543,11 @@ static int wil_cfg80211_add_station(struct wiphy *wiphy,
 				    const u8 *mac,
 				    struct station_parameters *params)
 {
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 
-	wil_dbg_misc(wil, "add station %pM aid %d\n", mac, params->aid);
+	wil_dbg_misc(wil, "add station %pM aid %d mid %d\n",
+		     mac, params->aid, vif->mid);
 
 	if (!disable_ap_sme) {
 		wil_err(wil, "not supported with AP SME enabled\n");
@@ -1527,20 +1559,21 @@ static int wil_cfg80211_add_station(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
-	return wmi_new_sta(wil, mac, params->aid);
+	return wmi_new_sta(vif, mac, params->aid);
 }
 
 static int wil_cfg80211_del_station(struct wiphy *wiphy,
 				    struct net_device *dev,
 				    struct station_del_parameters *params)
 {
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 
-	wil_dbg_misc(wil, "del_station: %pM, reason=%d\n", params->mac,
-		     params->reason_code);
+	wil_dbg_misc(wil, "del_station: %pM, reason=%d mid=%d\n",
+		     params->mac, params->reason_code, vif->mid);
 
 	mutex_lock(&wil->mutex);
-	wil6210_disconnect(wil, params->mac, params->reason_code, false);
+	wil6210_disconnect(vif, params->mac, params->reason_code, false);
 	mutex_unlock(&wil->mutex);
 
 	return 0;
@@ -1551,13 +1584,15 @@ static int wil_cfg80211_change_station(struct wiphy *wiphy,
 				       const u8 *mac,
 				       struct station_parameters *params)
 {
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	int authorize;
 	int cid, i;
 	struct vring_tx_data *txdata = NULL;
 
-	wil_dbg_misc(wil, "change station %pM mask 0x%x set 0x%x\n", mac,
-		     params->sta_flags_mask, params->sta_flags_set);
+	wil_dbg_misc(wil, "change station %pM mask 0x%x set 0x%x mid %d\n",
+		     mac, params->sta_flags_mask, params->sta_flags_set,
+		     vif->mid);
 
 	if (!disable_ap_sme) {
 		wil_dbg_misc(wil, "not supported with AP SME enabled\n");
@@ -1567,7 +1602,7 @@ static int wil_cfg80211_change_station(struct wiphy *wiphy,
 	if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
 		return 0;
 
-	cid = wil_find_cid(wil, mac);
+	cid = wil_find_cid(wil, vif->mid, mac);
 	if (cid < 0) {
 		wil_err(wil, "station not found\n");
 		return -ENOLINK;
@@ -1594,9 +1629,10 @@ static int wil_cfg80211_change_station(struct wiphy *wiphy,
 
 /* probe_client handling */
 static void wil_probe_client_handle(struct wil6210_priv *wil,
+				    struct wil6210_vif *vif,
 				    struct wil_probe_client_req *req)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = vif_to_ndev(vif);
 	struct wil_sta_info *sta = &wil->sta[req->cid];
 	/* assume STA is alive if it is still connected,
 	 * else FW will disconnect it
@@ -1607,51 +1643,53 @@ static void wil_probe_client_handle(struct wil6210_priv *wil,
 			      0, false, GFP_KERNEL);
 }
 
-static struct list_head *next_probe_client(struct wil6210_priv *wil)
+static struct list_head *next_probe_client(struct wil6210_vif *vif)
 {
 	struct list_head *ret = NULL;
 
-	mutex_lock(&wil->probe_client_mutex);
+	mutex_lock(&vif->probe_client_mutex);
 
-	if (!list_empty(&wil->probe_client_pending)) {
-		ret = wil->probe_client_pending.next;
+	if (!list_empty(&vif->probe_client_pending)) {
+		ret = vif->probe_client_pending.next;
 		list_del(ret);
 	}
 
-	mutex_unlock(&wil->probe_client_mutex);
+	mutex_unlock(&vif->probe_client_mutex);
 
 	return ret;
 }
 
 void wil_probe_client_worker(struct work_struct *work)
 {
-	struct wil6210_priv *wil = container_of(work, struct wil6210_priv,
-						probe_client_worker);
+	struct wil6210_vif *vif = container_of(work, struct wil6210_vif,
+					       probe_client_worker);
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wil_probe_client_req *req;
 	struct list_head *lh;
 
-	while ((lh = next_probe_client(wil)) != NULL) {
+	while ((lh = next_probe_client(vif)) != NULL) {
 		req = list_entry(lh, struct wil_probe_client_req, list);
 
-		wil_probe_client_handle(wil, req);
+		wil_probe_client_handle(wil, vif, req);
 		kfree(req);
 	}
 }
 
-void wil_probe_client_flush(struct wil6210_priv *wil)
+void wil_probe_client_flush(struct wil6210_vif *vif)
 {
 	struct wil_probe_client_req *req, *t;
+	struct wil6210_priv *wil = vif_to_wil(vif);
 
 	wil_dbg_misc(wil, "probe_client_flush\n");
 
-	mutex_lock(&wil->probe_client_mutex);
+	mutex_lock(&vif->probe_client_mutex);
 
-	list_for_each_entry_safe(req, t, &wil->probe_client_pending, list) {
+	list_for_each_entry_safe(req, t, &vif->probe_client_pending, list) {
 		list_del(&req->list);
 		kfree(req);
 	}
 
-	mutex_unlock(&wil->probe_client_mutex);
+	mutex_unlock(&vif->probe_client_mutex);
 }
 
 static int wil_cfg80211_probe_client(struct wiphy *wiphy,
@@ -1659,10 +1697,12 @@ static int wil_cfg80211_probe_client(struct wiphy *wiphy,
 				     const u8 *peer, u64 *cookie)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	struct wil_probe_client_req *req;
-	int cid = wil_find_cid(wil, peer);
+	int cid = wil_find_cid(wil, vif->mid, peer);
 
-	wil_dbg_misc(wil, "probe_client: %pM => CID %d\n", peer, cid);
+	wil_dbg_misc(wil, "probe_client: %pM => CID %d MID %d\n",
+		     peer, cid, vif->mid);
 
 	if (cid < 0)
 		return -ENOLINK;
@@ -1674,12 +1714,12 @@ static int wil_cfg80211_probe_client(struct wiphy *wiphy,
 	req->cid = cid;
 	req->cookie = cid;
 
-	mutex_lock(&wil->probe_client_mutex);
-	list_add_tail(&req->list, &wil->probe_client_pending);
-	mutex_unlock(&wil->probe_client_mutex);
+	mutex_lock(&vif->probe_client_mutex);
+	list_add_tail(&req->list, &vif->probe_client_pending);
+	mutex_unlock(&vif->probe_client_mutex);
 
 	*cookie = req->cookie;
-	queue_work(wil->wq_service, &wil->probe_client_worker);
+	queue_work(wil->wq_service, &vif->probe_client_worker);
 	return 0;
 }
 
@@ -1688,11 +1728,12 @@ static int wil_cfg80211_change_bss(struct wiphy *wiphy,
 				   struct bss_parameters *params)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 
 	if (params->ap_isolate >= 0) {
-		wil_dbg_misc(wil, "change_bss: ap_isolate %d => %d\n",
-			     wil->ap_isolate, params->ap_isolate);
-		wil->ap_isolate = params->ap_isolate;
+		wil_dbg_misc(wil, "change_bss: ap_isolate MID %d, %d => %d\n",
+			     vif->mid, vif->ap_isolate, params->ap_isolate);
+		vif->ap_isolate = params->ap_isolate;
 	}
 
 	return 0;
@@ -1738,7 +1779,7 @@ static int wil_cfg80211_suspend(struct wiphy *wiphy,
 	mutex_lock(&wil->mutex);
 	mutex_lock(&wil->p2p_wdev_mutex);
 	wil_p2p_stop_radio_operations(wil);
-	wil_abort_scan(wil, true);
+	wil_abort_scan(ndev_to_vif(wil->main_ndev), true);
 	mutex_unlock(&wil->p2p_wdev_mutex);
 	mutex_unlock(&wil->mutex);
 
@@ -1761,8 +1802,12 @@ wil_cfg80211_sched_scan_start(struct wiphy *wiphy,
 			      struct cfg80211_sched_scan_request *request)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	int i, rc;
 
+	if (vif->mid != 0)
+		return -EOPNOTSUPP;
+
 	wil_dbg_misc(wil,
 		     "sched scan start: n_ssids %d, ie_len %zu, flags 0x%x\n",
 		     request->n_ssids, request->ie_len, request->flags);
@@ -1796,7 +1841,8 @@ wil_cfg80211_sched_scan_start(struct wiphy *wiphy,
 			     i, sp->interval, sp->iterations);
 	}
 
-	rc = wmi_set_ie(wil, WMI_FRAME_PROBE_REQ, request->ie_len, request->ie);
+	rc = wmi_set_ie(vif, WMI_FRAME_PROBE_REQ,
+			request->ie_len, request->ie);
 	if (rc)
 		return rc;
 	return wmi_start_sched_scan(wil, request);
@@ -1807,8 +1853,12 @@ wil_cfg80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev,
 			     u64 reqid)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = ndev_to_vif(dev);
 	int rc;
 
+	if (vif->mid != 0)
+		return -EOPNOTSUPP;
+
 	rc = wmi_stop_sched_scan(wil);
 	/* device would return error if it thinks PNO is already stopped.
 	 * ignore the return code so user space and driver gets back in-sync
@@ -2015,11 +2065,11 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil)
 	mutex_lock(&wil->p2p_wdev_mutex);
 	p2p_wdev = wil->p2p_wdev;
 	wil->p2p_wdev = NULL;
-	wil->radio_wdev = wil_to_wdev(wil);
+	wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
 	mutex_unlock(&wil->p2p_wdev_mutex);
 	if (p2p_wdev) {
 		cfg80211_unregister_wdev(p2p_wdev);
-		vif = wdev_to_vif(p2p_wdev);
+		vif = wdev_to_vif(wil, p2p_wdev);
 		kfree(vif);
 	}
 }
@@ -2045,6 +2095,7 @@ static int wil_rf_sector_get_cfg(struct wiphy *wiphy,
 				 const void *data, int data_len)
 {
 	struct wil6210_priv *wil = wdev_to_wil(wdev);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 	int rc;
 	struct nlattr *tb[QCA_ATTR_DMG_RF_SECTOR_MAX + 1];
 	u16 sector_index;
@@ -2101,8 +2152,8 @@ static int wil_rf_sector_get_cfg(struct wiphy *wiphy,
 	cmd.sector_type = sector_type;
 	cmd.rf_modules_vec = rf_modules_vec & 0xFF;
 	memset(&reply, 0, sizeof(reply));
-	rc = wmi_call(wil, WMI_GET_RF_SECTOR_PARAMS_CMDID, &cmd, sizeof(cmd),
-		      WMI_GET_RF_SECTOR_PARAMS_DONE_EVENTID,
+	rc = wmi_call(wil, WMI_GET_RF_SECTOR_PARAMS_CMDID, vif->mid,
+		      &cmd, sizeof(cmd), WMI_GET_RF_SECTOR_PARAMS_DONE_EVENTID,
 		      &reply, sizeof(reply),
 		      500);
 	if (rc)
@@ -2164,6 +2215,7 @@ static int wil_rf_sector_set_cfg(struct wiphy *wiphy,
 				 const void *data, int data_len)
 {
 	struct wil6210_priv *wil = wdev_to_wil(wdev);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 	int rc, tmp;
 	struct nlattr *tb[QCA_ATTR_DMG_RF_SECTOR_MAX + 1];
 	struct nlattr *tb2[QCA_ATTR_DMG_RF_SECTOR_CFG_MAX + 1];
@@ -2258,8 +2310,8 @@ static int wil_rf_sector_set_cfg(struct wiphy *wiphy,
 
 	cmd.rf_modules_vec = rf_modules_vec & 0xFF;
 	memset(&reply, 0, sizeof(reply));
-	rc = wmi_call(wil, WMI_SET_RF_SECTOR_PARAMS_CMDID, &cmd, sizeof(cmd),
-		      WMI_SET_RF_SECTOR_PARAMS_DONE_EVENTID,
+	rc = wmi_call(wil, WMI_SET_RF_SECTOR_PARAMS_CMDID, vif->mid,
+		      &cmd, sizeof(cmd), WMI_SET_RF_SECTOR_PARAMS_DONE_EVENTID,
 		      &reply, sizeof(reply),
 		      500);
 	if (rc)
@@ -2272,6 +2324,7 @@ static int wil_rf_sector_get_selected(struct wiphy *wiphy,
 				      const void *data, int data_len)
 {
 	struct wil6210_priv *wil = wdev_to_wil(wdev);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 	int rc;
 	struct nlattr *tb[QCA_ATTR_DMG_RF_SECTOR_MAX + 1];
 	u8 sector_type, mac_addr[ETH_ALEN];
@@ -2305,7 +2358,7 @@ static int wil_rf_sector_get_selected(struct wiphy *wiphy,
 
 	if (tb[QCA_ATTR_MAC_ADDR]) {
 		ether_addr_copy(mac_addr, nla_data(tb[QCA_ATTR_MAC_ADDR]));
-		cid = wil_find_cid(wil, mac_addr);
+		cid = wil_find_cid(wil, vif->mid, mac_addr);
 		if (cid < 0) {
 			wil_err(wil, "invalid MAC address %pM\n", mac_addr);
 			return -ENOENT;
@@ -2321,7 +2374,7 @@ static int wil_rf_sector_get_selected(struct wiphy *wiphy,
 	cmd.cid = (u8)cid;
 	cmd.sector_type = sector_type;
 	memset(&reply, 0, sizeof(reply));
-	rc = wmi_call(wil, WMI_GET_SELECTED_RF_SECTOR_INDEX_CMDID,
+	rc = wmi_call(wil, WMI_GET_SELECTED_RF_SECTOR_INDEX_CMDID, vif->mid,
 		      &cmd, sizeof(cmd),
 		      WMI_GET_SELECTED_RF_SECTOR_INDEX_DONE_EVENTID,
 		      &reply, sizeof(reply),
@@ -2354,7 +2407,7 @@ nla_put_failure:
 }
 
 static int wil_rf_sector_wmi_set_selected(struct wil6210_priv *wil,
-					  u16 sector_index,
+					  u8 mid, u16 sector_index,
 					  u8 sector_type, u8 cid)
 {
 	struct wmi_set_selected_rf_sector_index_cmd cmd;
@@ -2369,7 +2422,7 @@ static int wil_rf_sector_wmi_set_selected(struct wil6210_priv *wil,
 	cmd.sector_type = sector_type;
 	cmd.cid = (u8)cid;
 	memset(&reply, 0, sizeof(reply));
-	rc = wmi_call(wil, WMI_SET_SELECTED_RF_SECTOR_INDEX_CMDID,
+	rc = wmi_call(wil, WMI_SET_SELECTED_RF_SECTOR_INDEX_CMDID, mid,
 		      &cmd, sizeof(cmd),
 		      WMI_SET_SELECTED_RF_SECTOR_INDEX_DONE_EVENTID,
 		      &reply, sizeof(reply),
@@ -2384,6 +2437,7 @@ static int wil_rf_sector_set_selected(struct wiphy *wiphy,
 				      const void *data, int data_len)
 {
 	struct wil6210_priv *wil = wdev_to_wil(wdev);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
 	int rc;
 	struct nlattr *tb[QCA_ATTR_DMG_RF_SECTOR_MAX + 1];
 	u16 sector_index;
@@ -2423,7 +2477,7 @@ static int wil_rf_sector_set_selected(struct wiphy *wiphy,
 	if (tb[QCA_ATTR_MAC_ADDR]) {
 		ether_addr_copy(mac_addr, nla_data(tb[QCA_ATTR_MAC_ADDR]));
 		if (!is_broadcast_ether_addr(mac_addr)) {
-			cid = wil_find_cid(wil, mac_addr);
+			cid = wil_find_cid(wil, vif->mid, mac_addr);
 			if (cid < 0) {
 				wil_err(wil, "invalid MAC address %pM\n",
 					mac_addr);
@@ -2445,17 +2499,20 @@ static int wil_rf_sector_set_selected(struct wiphy *wiphy,
 	}
 
 	if (cid >= 0) {
-		rc = wil_rf_sector_wmi_set_selected(wil, sector_index,
+		rc = wil_rf_sector_wmi_set_selected(wil, vif->mid, sector_index,
 						    sector_type, cid);
 	} else {
 		/* unlock all cids */
 		rc = wil_rf_sector_wmi_set_selected(
-			wil, WMI_INVALID_RF_SECTOR_INDEX, sector_type,
-			WIL_CID_ALL);
+			wil, vif->mid, WMI_INVALID_RF_SECTOR_INDEX,
+			sector_type, WIL_CID_ALL);
 		if (rc == -EINVAL) {
 			for (i = 0; i < WIL6210_MAX_CID; i++) {
+				if (wil->sta[i].mid != vif->mid)
+					continue;
 				rc = wil_rf_sector_wmi_set_selected(
-					wil, WMI_INVALID_RF_SECTOR_INDEX,
+					wil, vif->mid,
+					WMI_INVALID_RF_SECTOR_INDEX,
 					sector_type, i);
 				/* the FW will silently ignore and return
 				 * success for unused cid, so abort the loop
diff --git a/drivers/net/wireless/ath/wil6210/debug.c b/drivers/net/wireless/ath/wil6210/debug.c
index 217a4591bde4..a9befb971cc4 100644
--- a/drivers/net/wireless/ath/wil6210/debug.c
+++ b/drivers/net/wireless/ath/wil6210/debug.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2013,2016 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -25,7 +26,7 @@ void __wil_err(struct wil6210_priv *wil, const char *fmt, ...)
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	netdev_err(wil_to_ndev(wil), "%pV", &vaf);
+	netdev_err(wil->main_ndev, "%pV", &vaf);
 	trace_wil6210_log_err(&vaf);
 	va_end(args);
 }
@@ -41,7 +42,7 @@ void __wil_err_ratelimited(struct wil6210_priv *wil, const char *fmt, ...)
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	netdev_err(wil_to_ndev(wil), "%pV", &vaf);
+	netdev_err(wil->main_ndev, "%pV", &vaf);
 	trace_wil6210_log_err(&vaf);
 	va_end(args);
 }
@@ -57,7 +58,7 @@ void wil_dbg_ratelimited(const struct wil6210_priv *wil, const char *fmt, ...)
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	netdev_dbg(wil_to_ndev(wil), "%pV", &vaf);
+	netdev_dbg(wil->main_ndev, "%pV", &vaf);
 	trace_wil6210_log_dbg(&vaf);
 	va_end(args);
 }
@@ -70,7 +71,7 @@ void __wil_info(struct wil6210_priv *wil, const char *fmt, ...)
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	netdev_info(wil_to_ndev(wil), "%pV", &vaf);
+	netdev_info(wil->main_ndev, "%pV", &vaf);
 	trace_wil6210_log_info(&vaf);
 	va_end(args);
 }
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index 4a4888246e8c..636da5e32ae1 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -621,7 +622,7 @@ static ssize_t wil_write_file_reset(struct file *file, const char __user *buf,
 				    size_t len, loff_t *ppos)
 {
 	struct wil6210_priv *wil = file->private_data;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 
 	/**
 	 * BUG:
@@ -716,27 +717,44 @@ static ssize_t wil_write_back(struct file *file, const char __user *buf,
 	if (rc < 2)
 		return -EINVAL;
 
-	if (0 == strcmp(cmd, "add")) {
-		if (rc < 3) {
-			wil_err(wil, "BACK: add require at least 2 params\n");
+	if ((strcmp(cmd, "add") == 0) ||
+	    (strcmp(cmd, "del_tx") == 0)) {
+		struct vring_tx_data *txdata;
+
+		if (p1 < 0 || p1 >= WIL6210_MAX_TX_RINGS) {
+			wil_err(wil, "BACK: invalid ring id %d\n", p1);
 			return -EINVAL;
 		}
-		if (rc < 4)
-			p3 = 0;
-		wmi_addba(wil, p1, p2, p3);
-	} else if (0 == strcmp(cmd, "del_tx")) {
-		if (rc < 3)
-			p2 = WLAN_REASON_QSTA_LEAVE_QBSS;
-		wmi_delba_tx(wil, p1, p2);
-	} else if (0 == strcmp(cmd, "del_rx")) {
+		txdata = &wil->vring_tx_data[p1];
+		if (strcmp(cmd, "add") == 0) {
+			if (rc < 3) {
+				wil_err(wil, "BACK: add require at least 2 params\n");
+				return -EINVAL;
+			}
+			if (rc < 4)
+				p3 = 0;
+			wmi_addba(wil, txdata->mid, p1, p2, p3);
+		} else {
+			if (rc < 3)
+				p2 = WLAN_REASON_QSTA_LEAVE_QBSS;
+			wmi_delba_tx(wil, txdata->mid, p1, p2);
+		}
+	} else if (strcmp(cmd, "del_rx") == 0) {
+		struct wil_sta_info *sta;
+
 		if (rc < 3) {
 			wil_err(wil,
 				"BACK: del_rx require at least 2 params\n");
 			return -EINVAL;
 		}
+		if (p1 < 0 || p1 >= WIL6210_MAX_CID) {
+			wil_err(wil, "BACK: invalid CID %d\n", p1);
+			return -EINVAL;
+		}
 		if (rc < 4)
 			p3 = WLAN_REASON_QSTA_LEAVE_QBSS;
-		wmi_delba_rx(wil, mk_cidxtid(p1, p2), p3);
+		sta = &wil->sta[p1];
+		wmi_delba_rx(wil, sta->mid, mk_cidxtid(p1, p2), p3);
 	} else {
 		wil_err(wil, "BACK: Unrecognized command \"%s\"\n", cmd);
 		return -EINVAL;
@@ -855,7 +873,7 @@ static ssize_t wil_write_file_txmgmt(struct file *file, const char __user *buf,
 {
 	struct wil6210_priv *wil = file->private_data;
 	struct wiphy *wiphy = wil_to_wiphy(wil);
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wireless_dev *wdev = wil->main_ndev->ieee80211_ptr;
 	struct cfg80211_mgmt_tx_params params;
 	int rc;
 	void *frame;
@@ -890,6 +908,7 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf,
 				  size_t len, loff_t *ppos)
 {
 	struct wil6210_priv *wil = file->private_data;
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_cmd_hdr *wmi;
 	void *cmd;
 	int cmdlen = len - sizeof(struct wmi_cmd_hdr);
@@ -912,7 +931,7 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf,
 	cmd = (cmdlen > 0) ? &wmi[1] : NULL;
 	cmdid = le16_to_cpu(wmi->command_id);
 
-	rc1 = wmi_send(wil, cmdid, cmd, cmdlen);
+	rc1 = wmi_send(wil, cmdid, vif->mid, cmd, cmdlen);
 	kfree(wmi);
 
 	wil_info(wil, "0x%04x[%d] -> %d\n", cmdid, cmdlen, rc1);
@@ -1050,6 +1069,7 @@ static int wil_bf_debugfs_show(struct seq_file *s, void *data)
 	int rc;
 	int i;
 	struct wil6210_priv *wil = s->private;
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_notify_req_cmd cmd = {
 		.interval_usec = 0,
 	};
@@ -1062,7 +1082,8 @@ static int wil_bf_debugfs_show(struct seq_file *s, void *data)
 		u32 status;
 
 		cmd.cid = i;
-		rc = wmi_call(wil, WMI_NOTIFY_REQ_CMDID, &cmd, sizeof(cmd),
+		rc = wmi_call(wil, WMI_NOTIFY_REQ_CMDID, vif->mid,
+			      &cmd, sizeof(cmd),
 			      WMI_NOTIFY_REQ_DONE_EVENTID, &reply,
 			      sizeof(reply), 20);
 		/* if reply is all-0, ignore this CID */
@@ -1155,7 +1176,7 @@ static const struct file_operations fops_temp = {
 static int wil_freq_debugfs_show(struct seq_file *s, void *data)
 {
 	struct wil6210_priv *wil = s->private;
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wireless_dev *wdev = wil->main_ndev->ieee80211_ptr;
 	u16 freq = wdev->chandef.chan ? wdev->chandef.chan->center_freq : 0;
 
 	seq_printf(s, "Freq = %d\n", freq);
@@ -1180,6 +1201,7 @@ static int wil_link_debugfs_show(struct seq_file *s, void *data)
 {
 	struct wil6210_priv *wil = s->private;
 	struct station_info sinfo;
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int i, rc;
 
 	for (i = 0; i < ARRAY_SIZE(wil->sta); i++) {
@@ -1200,7 +1222,7 @@ static int wil_link_debugfs_show(struct seq_file *s, void *data)
 		seq_printf(s, "[%d] %pM %s\n", i, p->addr, status);
 
 		if (p->status == wil_sta_connected) {
-			rc = wil_cid_fill_sinfo(wil, i, &sinfo);
+			rc = wil_cid_fill_sinfo(vif, i, &sinfo);
 			if (rc)
 				return rc;
 
@@ -1229,7 +1251,7 @@ static const struct file_operations fops_link = {
 static int wil_info_debugfs_show(struct seq_file *s, void *data)
 {
 	struct wil6210_priv *wil = s->private;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	int is_ac = power_supply_is_system_supplied();
 	int rx = atomic_xchg(&wil->isr_count_rx, 0);
 	int tx = atomic_xchg(&wil->isr_count_tx, 0);
@@ -1773,11 +1795,9 @@ static void wil6210_debugfs_init_isr(struct wil6210_priv *wil,
 
 /* fields in struct wil6210_priv */
 static const struct dbg_off dbg_wil_off[] = {
-	WIL_FIELD(privacy,	0444,		doff_u32),
 	WIL_FIELD(status[0],	0644,	doff_ulong),
 	WIL_FIELD(hw_version,	0444,	doff_x32),
 	WIL_FIELD(recovery_count, 0444,	doff_u32),
-	WIL_FIELD(ap_isolate,	0444,	doff_u32),
 	WIL_FIELD(discovery_mode, 0644,	doff_u8),
 	WIL_FIELD(chip_revision, 0444,	doff_u8),
 	WIL_FIELD(abft_len, 0644,		doff_u8),
diff --git a/drivers/net/wireless/ath/wil6210/ethtool.c b/drivers/net/wireless/ath/wil6210/ethtool.c
index 66200f616a37..e7ff41e623d2 100644
--- a/drivers/net/wireless/ath/wil6210/ethtool.c
+++ b/drivers/net/wireless/ath/wil6210/ethtool.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014,2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -74,12 +75,13 @@ static int wil_ethtoolops_set_coalesce(struct net_device *ndev,
 				       struct ethtool_coalesce *cp)
 {
 	struct wil6210_priv *wil = ndev_to_wil(ndev);
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 	int ret;
 
 	wil_dbg_misc(wil, "ethtoolops_set_coalesce: rx %d usec, tx %d usec\n",
 		     cp->rx_coalesce_usecs, cp->tx_coalesce_usecs);
 
-	if (wil->wdev->iftype == NL80211_IFTYPE_MONITOR) {
+	if (wdev->iftype == NL80211_IFTYPE_MONITOR) {
 		wil_dbg_misc(wil, "No IRQ coalescing in monitor mode\n");
 		return -EINVAL;
 	}
diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index 1835187ea075..ef8f9a769833 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c
@@ -188,12 +188,14 @@ void wil_unmask_irq(struct wil6210_priv *wil)
 
 void wil_configure_interrupt_moderation(struct wil6210_priv *wil)
 {
+	struct wireless_dev *wdev = wil->main_ndev->ieee80211_ptr;
+
 	wil_dbg_irq(wil, "configure_interrupt_moderation\n");
 
 	/* disable interrupt moderation for monitor
 	 * to get better timestamp precision
 	 */
-	if (wil->wdev->iftype == NL80211_IFTYPE_MONITOR)
+	if (wdev->iftype == NL80211_IFTYPE_MONITOR)
 		return;
 
 	/* Disable and clear tx counter before (re)configuration */
@@ -340,7 +342,7 @@ static irqreturn_t wil6210_irq_tx(int irq, void *cookie)
 
 static void wil_notify_fw_error(struct wil6210_priv *wil)
 {
-	struct device *dev = &wil_to_ndev(wil)->dev;
+	struct device *dev = &wil->main_ndev->dev;
 	char *envp[3] = {
 		[0] = "SOURCE=wil6210",
 		[1] = "EVENT=FW_ERROR",
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 04c8651274d9..b151f1c7ce85 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -160,24 +160,25 @@ void wil_memcpy_toio_32(volatile void __iomem *dst, const void *src,
 	}
 }
 
-static void wil_disconnect_cid(struct wil6210_priv *wil, int cid,
+static void wil_disconnect_cid(struct wil6210_vif *vif, int cid,
 			       u16 reason_code, bool from_event)
 __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 {
 	uint i;
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct wireless_dev *wdev = wil->wdev;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct net_device *ndev = vif_to_ndev(vif);
+	struct wireless_dev *wdev = vif_to_wdev(vif);
 	struct wil_sta_info *sta = &wil->sta[cid];
 
 	might_sleep();
-	wil_dbg_misc(wil, "disconnect_cid: CID %d, status %d\n",
-		     cid, sta->status);
+	wil_dbg_misc(wil, "disconnect_cid: CID %d, MID %d, status %d\n",
+		     cid, sta->mid, sta->status);
 	/* inform upper/lower layers */
 	if (sta->status != wil_sta_unused) {
 		if (!from_event) {
 			bool del_sta = (wdev->iftype == NL80211_IFTYPE_AP) ?
 						disable_ap_sme : false;
-			wmi_disconnect_sta(wil, sta->addr, reason_code,
+			wmi_disconnect_sta(vif, sta->addr, reason_code,
 					   true, del_sta);
 		}
 
@@ -191,6 +192,7 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 			break;
 		}
 		sta->status = wil_sta_unused;
+		sta->mid = U8_MAX;
 	}
 	/* reorder buffers */
 	for (i = 0; i < WIL_STA_TID_NUM; i++) {
@@ -216,28 +218,33 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 	memset(&sta->stats, 0, sizeof(sta->stats));
 }
 
-static bool wil_is_connected(struct wil6210_priv *wil)
+static bool wil_vif_is_connected(struct wil6210_priv *wil, u8 mid)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(wil->sta); i++) {
-		if (wil->sta[i].status == wil_sta_connected)
+		if (wil->sta[i].mid == mid &&
+		    wil->sta[i].status == wil_sta_connected)
 			return true;
 	}
 
 	return false;
 }
 
-static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid,
+static void _wil6210_disconnect(struct wil6210_vif *vif, const u8 *bssid,
 				u16 reason_code, bool from_event)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int cid = -ENOENT;
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct wireless_dev *wdev = wil->wdev;
+	struct net_device *ndev;
+	struct wireless_dev *wdev;
 
-	if (unlikely(!ndev))
+	if (unlikely(!vif))
 		return;
 
+	ndev = vif_to_ndev(vif);
+	wdev = vif_to_wdev(vif);
+
 	might_sleep();
 	wil_info(wil, "bssid=%pM, reason=%d, ev%s\n", bssid,
 		 reason_code, from_event ? "+" : "-");
@@ -254,22 +261,22 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid,
 	 */
 	if (bssid && !is_broadcast_ether_addr(bssid) &&
 	    !ether_addr_equal_unaligned(ndev->dev_addr, bssid)) {
-		cid = wil_find_cid(wil, bssid);
+		cid = wil_find_cid(wil, vif->mid, bssid);
 		wil_dbg_misc(wil, "Disconnect %pM, CID=%d, reason=%d\n",
 			     bssid, cid, reason_code);
 		if (cid >= 0) /* disconnect 1 peer */
-			wil_disconnect_cid(wil, cid, reason_code, from_event);
+			wil_disconnect_cid(vif, cid, reason_code, from_event);
 	} else { /* all */
 		wil_dbg_misc(wil, "Disconnect all\n");
 		for (cid = 0; cid < WIL6210_MAX_CID; cid++)
-			wil_disconnect_cid(wil, cid, reason_code, from_event);
+			wil_disconnect_cid(vif, cid, reason_code, from_event);
 	}
 
 	/* link state */
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
-		wil_bcast_fini(wil);
+		wil_bcast_fini(vif);
 		wil_update_net_queues_bh(wil, NULL, true);
 		netif_carrier_off(ndev);
 		wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
@@ -278,20 +285,20 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid,
 			clear_bit(wil_status_fwconnected, wil->status);
 			cfg80211_disconnected(ndev, reason_code,
 					      NULL, 0,
-					      wil->locally_generated_disc,
+					      vif->locally_generated_disc,
 					      GFP_KERNEL);
-			wil->locally_generated_disc = false;
+			vif->locally_generated_disc = false;
 		} else if (test_bit(wil_status_fwconnecting, wil->status)) {
 			cfg80211_connect_result(ndev, bssid, NULL, 0, NULL, 0,
 						WLAN_STATUS_UNSPECIFIED_FAILURE,
 						GFP_KERNEL);
-			wil->bss = NULL;
+			vif->bss = NULL;
 		}
 		clear_bit(wil_status_fwconnecting, wil->status);
 		break;
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		if (!wil_is_connected(wil)) {
+		if (!wil_vif_is_connected(wil, vif->mid)) {
 			wil_update_net_queues_bh(wil, NULL, true);
 			clear_bit(wil_status_fwconnected, wil->status);
 		} else {
@@ -303,11 +310,12 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid,
 	}
 }
 
-static void wil_disconnect_worker(struct work_struct *work)
+void wil_disconnect_worker(struct work_struct *work)
 {
-	struct wil6210_priv *wil = container_of(work,
-			struct wil6210_priv, disconnect_worker);
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct wil6210_vif *vif = container_of(work,
+			struct wil6210_vif, disconnect_worker);
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct net_device *ndev = vif_to_ndev(vif);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -322,7 +330,7 @@ static void wil_disconnect_worker(struct work_struct *work)
 		/* already disconnected */
 		return;
 
-	rc = wmi_call(wil, WMI_DISCONNECT_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_DISCONNECT_CMDID, vif->mid, NULL, 0,
 		      WMI_DISCONNECT_EVENTID, &reply, sizeof(reply),
 		      WIL6210_DISCONNECT_TO_MS);
 	if (rc) {
@@ -337,30 +345,6 @@ static void wil_disconnect_worker(struct work_struct *work)
 	clear_bit(wil_status_fwconnecting, wil->status);
 }
 
-static void wil_connect_timer_fn(struct timer_list *t)
-{
-	struct wil6210_priv *wil = from_timer(wil, t, connect_timer);
-	bool q;
-
-	wil_err(wil, "Connect timeout detected, disconnect station\n");
-
-	/* reschedule to thread context - disconnect won't
-	 * run from atomic context.
-	 * queue on wmi_wq to prevent race with connect event.
-	 */
-	q = queue_work(wil->wmi_wq, &wil->disconnect_worker);
-	wil_dbg_wmi(wil, "queue_work of disconnect_worker -> %d\n", q);
-}
-
-static void wil_scan_timer_fn(struct timer_list *t)
-{
-	struct wil6210_priv *wil = from_timer(wil, t, scan_timer);
-
-	clear_bit(wil_status_fwready, wil->status);
-	wil_err(wil, "Scan timeout detected, start fw error recovery\n");
-	wil_fw_error_recovery(wil);
-}
-
 static int wil_wait_for_recovery(struct wil6210_priv *wil)
 {
 	if (wait_event_interruptible(wil->wq, wil->recovery_state !=
@@ -394,12 +378,12 @@ static void wil_fw_error_worker(struct work_struct *work)
 {
 	struct wil6210_priv *wil = container_of(work, struct wil6210_priv,
 						fw_error_worker);
-	struct wireless_dev *wdev = wil->wdev;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 
 	wil_dbg_misc(wil, "fw error worker\n");
 
-	if (!(ndev->flags & IFF_UP)) {
+	if (!ndev || !(ndev->flags & IFF_UP)) {
 		wil_info(wil, "No recovery - interface is down\n");
 		return;
 	}
@@ -429,6 +413,10 @@ static void wil_fw_error_worker(struct work_struct *work)
 		return;
 
 	mutex_lock(&wil->mutex);
+	/* Needs adaptation for multiple VIFs
+	 * need to go over all VIFs and consider the appropriate
+	 * recovery.
+	 */
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
@@ -461,8 +449,9 @@ static int wil_find_free_vring(struct wil6210_priv *wil)
 	return -EINVAL;
 }
 
-int wil_tx_init(struct wil6210_priv *wil, int cid)
+int wil_tx_init(struct wil6210_vif *vif, int cid)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc = -EINVAL, ringid;
 
 	if (cid < 0) {
@@ -475,21 +464,22 @@ int wil_tx_init(struct wil6210_priv *wil, int cid)
 		goto out;
 	}
 
-	wil_dbg_wmi(wil, "Configure for connection CID %d vring %d\n",
-		    cid, ringid);
+	wil_dbg_wmi(wil, "Configure for connection CID %d MID %d vring %d\n",
+		    cid, vif->mid, ringid);
 
-	rc = wil_vring_init_tx(wil, ringid, 1 << tx_ring_order, cid, 0);
+	rc = wil_vring_init_tx(vif, ringid, 1 << tx_ring_order, cid, 0);
 	if (rc)
-		wil_err(wil, "wil_vring_init_tx for CID %d vring %d failed\n",
-			cid, ringid);
+		wil_err(wil, "init TX for CID %d MID %d vring %d failed\n",
+			cid, vif->mid, ringid);
 
 out:
 	return rc;
 }
 
-int wil_bcast_init(struct wil6210_priv *wil)
+int wil_bcast_init(struct wil6210_vif *vif)
 {
-	int ri = wil->bcast_vring, rc;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	int ri = vif->bcast_vring, rc;
 
 	if ((ri >= 0) && wil->vring_tx[ri].va)
 		return 0;
@@ -498,22 +488,23 @@ int wil_bcast_init(struct wil6210_priv *wil)
 	if (ri < 0)
 		return ri;
 
-	wil->bcast_vring = ri;
-	rc = wil_vring_init_bcast(wil, ri, 1 << bcast_ring_order);
+	vif->bcast_vring = ri;
+	rc = wil_vring_init_bcast(vif, ri, 1 << bcast_ring_order);
 	if (rc)
-		wil->bcast_vring = -1;
+		vif->bcast_vring = -1;
 
 	return rc;
 }
 
-void wil_bcast_fini(struct wil6210_priv *wil)
+void wil_bcast_fini(struct wil6210_vif *vif)
 {
-	int ri = wil->bcast_vring;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	int ri = vif->bcast_vring;
 
 	if (ri < 0)
 		return;
 
-	wil->bcast_vring = -1;
+	vif->bcast_vring = -1;
 	wil_vring_fini_tx(wil, ri);
 }
 
@@ -524,15 +515,16 @@ int wil_priv_init(struct wil6210_priv *wil)
 	wil_dbg_misc(wil, "priv_init\n");
 
 	memset(wil->sta, 0, sizeof(wil->sta));
-	for (i = 0; i < WIL6210_MAX_CID; i++)
+	for (i = 0; i < WIL6210_MAX_CID; i++) {
 		spin_lock_init(&wil->sta[i].tid_rx_lock);
+		wil->sta[i].mid = U8_MAX;
+	}
 
 	for (i = 0; i < WIL6210_MAX_TX_RINGS; i++)
 		spin_lock_init(&wil->vring_tx_data[i].lock);
 
 	mutex_init(&wil->mutex);
 	mutex_init(&wil->wmi_mutex);
-	mutex_init(&wil->probe_client_mutex);
 	mutex_init(&wil->p2p_wdev_mutex);
 	mutex_init(&wil->halp.lock);
 
@@ -540,22 +532,12 @@ int wil_priv_init(struct wil6210_priv *wil)
 	init_completion(&wil->wmi_call);
 	init_completion(&wil->halp.comp);
 
-	wil->bcast_vring = -1;
-	timer_setup(&wil->connect_timer, wil_connect_timer_fn, 0);
-	timer_setup(&wil->scan_timer, wil_scan_timer_fn, 0);
-	timer_setup(&wil->p2p.discovery_timer, wil_p2p_discovery_timer_fn, 0);
-
-	INIT_WORK(&wil->disconnect_worker, wil_disconnect_worker);
 	INIT_WORK(&wil->wmi_event_worker, wmi_event_worker);
 	INIT_WORK(&wil->fw_error_worker, wil_fw_error_worker);
-	INIT_WORK(&wil->probe_client_worker, wil_probe_client_worker);
-	INIT_WORK(&wil->p2p.delayed_listen_work, wil_p2p_delayed_listen_work);
 
 	INIT_LIST_HEAD(&wil->pending_wmi_ev);
-	INIT_LIST_HEAD(&wil->probe_client_pending);
 	spin_lock_init(&wil->wmi_ev_lock);
 	spin_lock_init(&wil->net_queue_lock);
-	wil->net_queue_stopped = 1;
 	init_waitqueue_head(&wil->wq);
 
 	wil->wmi_wq = create_singlethread_workqueue(WIL_NAME "_wmi");
@@ -582,6 +564,8 @@ int wil_priv_init(struct wil6210_priv *wil)
 	memset(&wil->suspend_stats, 0, sizeof(wil->suspend_stats));
 	wil->vring_idle_trsh = 16;
 
+	wil->reply_mid = U8_MAX;
+
 	return 0;
 
 out_wmi_wq:
@@ -600,7 +584,7 @@ void wil6210_bus_request(struct wil6210_priv *wil, u32 kbps)
 
 /**
  * wil6210_disconnect - disconnect one connection
- * @wil: driver context
+ * @vif: virtual interface context
  * @bssid: peer to disconnect, NULL to disconnect all
  * @reason_code: Reason code for the Disassociation frame
  * @from_event: whether is invoked from FW event handler
@@ -608,13 +592,15 @@ void wil6210_bus_request(struct wil6210_priv *wil, u32 kbps)
  * Disconnect and release associated resources. If invoked not from the
  * FW event handler, issue WMI command(s) to trigger MAC disconnect.
  */
-void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid,
+void wil6210_disconnect(struct wil6210_vif *vif, const u8 *bssid,
 			u16 reason_code, bool from_event)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
+
 	wil_dbg_misc(wil, "disconnect\n");
 
-	del_timer_sync(&wil->connect_timer);
-	_wil6210_disconnect(wil, bssid, reason_code, from_event);
+	del_timer_sync(&vif->connect_timer);
+	_wil6210_disconnect(vif, bssid, reason_code, from_event);
 }
 
 void wil_priv_deinit(struct wil6210_priv *wil)
@@ -622,18 +608,8 @@ void wil_priv_deinit(struct wil6210_priv *wil)
 	wil_dbg_misc(wil, "priv_deinit\n");
 
 	wil_set_recovery_state(wil, fw_recovery_idle);
-	del_timer_sync(&wil->scan_timer);
-	del_timer_sync(&wil->p2p.discovery_timer);
-	cancel_work_sync(&wil->disconnect_worker);
 	cancel_work_sync(&wil->fw_error_worker);
-	cancel_work_sync(&wil->p2p.discovery_expired_work);
-	cancel_work_sync(&wil->p2p.delayed_listen_work);
-	mutex_lock(&wil->mutex);
-	wil6210_disconnect(wil, NULL, WLAN_REASON_DEAUTH_LEAVING, false);
-	mutex_unlock(&wil->mutex);
 	wmi_event_flush(wil);
-	wil_probe_client_flush(wil);
-	cancel_work_sync(&wil->probe_client_worker);
 	destroy_workqueue(wil->wq_service);
 	destroy_workqueue(wil->wmi_wq);
 }
@@ -943,7 +919,7 @@ void wil_mbox_ring_le2cpus(struct wil6210_mbox_ring *r)
 
 static int wil_get_bl_info(struct wil6210_priv *wil)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct wiphy *wiphy = wil_to_wiphy(wil);
 	union {
 		struct bl_dedicated_registers_v0 bl0;
@@ -1035,7 +1011,7 @@ static void wil_bl_crash_info(struct wil6210_priv *wil, bool is_err)
 
 static int wil_get_otp_info(struct wil6210_priv *wil)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct wiphy *wiphy = wil_to_wiphy(wil);
 	u8 mac[8];
 
@@ -1069,8 +1045,9 @@ static int wil_wait_for_fw_ready(struct wil6210_priv *wil)
 	return 0;
 }
 
-void wil_abort_scan(struct wil6210_priv *wil, bool sync)
+void wil_abort_scan(struct wil6210_vif *vif, bool sync)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct cfg80211_scan_info info = {
 		.aborted = true,
@@ -1078,22 +1055,22 @@ void wil_abort_scan(struct wil6210_priv *wil, bool sync)
 
 	lockdep_assert_held(&wil->p2p_wdev_mutex);
 
-	if (!wil->scan_request)
+	if (!vif->scan_request)
 		return;
 
-	wil_dbg_misc(wil, "Abort scan_request 0x%p\n", wil->scan_request);
-	del_timer_sync(&wil->scan_timer);
+	wil_dbg_misc(wil, "Abort scan_request 0x%p\n", vif->scan_request);
+	del_timer_sync(&vif->scan_timer);
 	mutex_unlock(&wil->p2p_wdev_mutex);
-	rc = wmi_abort_scan(wil);
+	rc = wmi_abort_scan(vif);
 	if (!rc && sync)
-		wait_event_interruptible_timeout(wil->wq, !wil->scan_request,
+		wait_event_interruptible_timeout(wil->wq, !vif->scan_request,
 						 msecs_to_jiffies(
 						 WAIT_FOR_SCAN_ABORT_MS));
 
 	mutex_lock(&wil->p2p_wdev_mutex);
-	if (wil->scan_request) {
-		cfg80211_scan_done(wil->scan_request, &info);
-		wil->scan_request = NULL;
+	if (vif->scan_request) {
+		cfg80211_scan_done(vif->scan_request, &info);
+		vif->scan_request = NULL;
 	}
 }
 
@@ -1148,6 +1125,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 	int rc;
 	unsigned long status_flags = BIT(wil_status_resetting);
 	int no_flash;
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 
 	wil_dbg_misc(wil, "reset\n");
 
@@ -1158,7 +1136,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 		static const u8 mac[ETH_ALEN] = {
 			0x00, 0xde, 0xad, 0x12, 0x34, 0x56,
 		};
-		struct net_device *ndev = wil_to_ndev(wil);
+		struct net_device *ndev = wil->main_ndev;
 
 		ether_addr_copy(ndev->perm_addr, mac);
 		ether_addr_copy(ndev->dev_addr, ndev->perm_addr);
@@ -1196,15 +1174,15 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 		goto out;
 	}
 
-	cancel_work_sync(&wil->disconnect_worker);
-	wil6210_disconnect(wil, NULL, WLAN_REASON_DEAUTH_LEAVING, false);
-	wil_bcast_fini(wil);
+	cancel_work_sync(&vif->disconnect_worker);
+	wil6210_disconnect(vif, NULL, WLAN_REASON_DEAUTH_LEAVING, false);
+	wil_bcast_fini(vif);
 
 	/* Disable device led before reset*/
 	wmi_led_cfg(wil, false);
 
 	mutex_lock(&wil->p2p_wdev_mutex);
-	wil_abort_scan(wil, false);
+	wil_abort_scan(vif, false);
 	mutex_unlock(&wil->p2p_wdev_mutex);
 
 	/* prevent NAPI from being scheduled and prevent wmi commands */
@@ -1276,7 +1254,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 	}
 
 	/* init after reset */
-	wil->ap_isolate = 0;
+	vif->ap_isolate = 0;
 	reinit_completion(&wil->wmi_ready);
 	reinit_completion(&wil->wmi_call);
 	reinit_completion(&wil->halp.comp);
@@ -1337,8 +1315,8 @@ void wil_fw_error_recovery(struct wil6210_priv *wil)
 
 int __wil_up(struct wil6210_priv *wil)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct wireless_dev *wdev = wil->wdev;
+	struct net_device *ndev = wil->main_ndev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 	int rc;
 
 	WARN_ON(!mutex_is_locked(&wil->mutex));
@@ -1422,7 +1400,7 @@ int __wil_down(struct wil6210_priv *wil)
 
 	mutex_lock(&wil->p2p_wdev_mutex);
 	wil_p2p_stop_radio_operations(wil);
-	wil_abort_scan(wil, false);
+	wil_abort_scan(ndev_to_vif(wil->main_ndev), false);
 	mutex_unlock(&wil->p2p_wdev_mutex);
 
 	return wil_reset(wil, false);
@@ -1442,13 +1420,14 @@ int wil_down(struct wil6210_priv *wil)
 	return rc;
 }
 
-int wil_find_cid(struct wil6210_priv *wil, const u8 *mac)
+int wil_find_cid(struct wil6210_priv *wil, u8 mid, const u8 *mac)
 {
 	int i;
 	int rc = -ENOENT;
 
 	for (i = 0; i < ARRAY_SIZE(wil->sta); i++) {
-		if ((wil->sta[i].status != wil_sta_unused) &&
+		if (wil->sta[i].mid == mid &&
+		    wil->sta[i].status != wil_sta_unused &&
 		    ether_addr_equal(wil->sta[i].addr, mac)) {
 			rc = i;
 			break;
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 648f63682f59..95570b8f1f6d 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -122,6 +122,85 @@ static void wil_dev_setup(struct net_device *dev)
 	dev->tx_queue_len = WIL_TX_Q_LEN_DEFAULT;
 }
 
+static void wil_vif_deinit(struct wil6210_vif *vif)
+{
+	del_timer_sync(&vif->scan_timer);
+	del_timer_sync(&vif->p2p.discovery_timer);
+	cancel_work_sync(&vif->disconnect_worker);
+	cancel_work_sync(&vif->p2p.discovery_expired_work);
+	cancel_work_sync(&vif->p2p.delayed_listen_work);
+	wil_probe_client_flush(vif);
+	cancel_work_sync(&vif->probe_client_worker);
+}
+
+void wil_vif_free(struct wil6210_vif *vif)
+{
+	struct net_device *ndev = vif_to_ndev(vif);
+
+	wil_vif_deinit(vif);
+	free_netdev(ndev);
+}
+
+static void wil_ndev_destructor(struct net_device *ndev)
+{
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
+
+	wil_vif_deinit(vif);
+}
+
+static void wil_connect_timer_fn(struct timer_list *t)
+{
+	struct wil6210_vif *vif = from_timer(vif, t, connect_timer);
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	bool q;
+
+	wil_err(wil, "Connect timeout detected, disconnect station\n");
+
+	/* reschedule to thread context - disconnect won't
+	 * run from atomic context.
+	 * queue on wmi_wq to prevent race with connect event.
+	 */
+	q = queue_work(wil->wmi_wq, &vif->disconnect_worker);
+	wil_dbg_wmi(wil, "queue_work of disconnect_worker -> %d\n", q);
+}
+
+static void wil_scan_timer_fn(struct timer_list *t)
+{
+	struct wil6210_vif *vif = from_timer(vif, t, scan_timer);
+	struct wil6210_priv *wil = vif_to_wil(vif);
+
+	clear_bit(wil_status_fwready, wil->status);
+	wil_err(wil, "Scan timeout detected, start fw error recovery\n");
+	wil_fw_error_recovery(wil);
+}
+
+static void wil_p2p_discovery_timer_fn(struct timer_list *t)
+{
+	struct wil6210_vif *vif = from_timer(vif, t, p2p.discovery_timer);
+	struct wil6210_priv *wil = vif_to_wil(vif);
+
+	wil_dbg_misc(wil, "p2p_discovery_timer_fn\n");
+
+	schedule_work(&vif->p2p.discovery_expired_work);
+}
+
+static void wil_vif_init(struct wil6210_vif *vif)
+{
+	vif->bcast_vring = -1;
+
+	mutex_init(&vif->probe_client_mutex);
+
+	timer_setup(&vif->connect_timer, wil_connect_timer_fn, 0);
+	timer_setup(&vif->scan_timer, wil_scan_timer_fn, 0);
+	timer_setup(&vif->p2p.discovery_timer, wil_p2p_discovery_timer_fn, 0);
+
+	INIT_WORK(&vif->probe_client_worker, wil_probe_client_worker);
+	INIT_WORK(&vif->disconnect_worker, wil_disconnect_worker);
+	INIT_WORK(&vif->p2p.delayed_listen_work, wil_p2p_delayed_listen_work);
+
+	INIT_LIST_HEAD(&vif->probe_client_pending);
+}
+
 struct wil6210_vif *
 wil_vif_alloc(struct wil6210_priv *wil, const char *name,
 	      unsigned char name_assign_type, enum nl80211_iftype iftype,
@@ -138,10 +217,12 @@ wil_vif_alloc(struct wil6210_priv *wil, const char *name,
 		return ERR_PTR(-ENOMEM);
 	}
 	if (mid == 0)
-		wil->ndev = ndev;
+		wil->main_ndev = ndev;
 	vif = ndev_to_vif(ndev);
+	vif->ndev = ndev;
 	vif->wil = wil;
 	vif->mid = mid;
+	wil_vif_init(vif);
 
 	wdev = &vif->wdev;
 	wdev->wiphy = wil->wiphy;
@@ -163,7 +244,6 @@ wil_vif_alloc(struct wil6210_priv *wil, const char *name,
 
 void *wil_if_alloc(struct device *dev)
 {
-	struct wireless_dev *wdev;
 	struct wil6210_priv *wil;
 	struct wil6210_vif *vif;
 	int rc = 0;
@@ -190,9 +270,7 @@ void *wil_if_alloc(struct device *dev)
 		goto out_priv;
 	}
 
-	wdev = &vif->wdev;
-	wil->wdev = wdev;
-	wil->radio_wdev = wdev;
+	wil->radio_wdev = vif_to_wdev(vif);
 
 	return wil;
 
@@ -207,7 +285,7 @@ out_cfg:
 
 void wil_if_free(struct wil6210_priv *wil)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 
 	wil_dbg_misc(wil, "if_free\n");
 
@@ -216,7 +294,8 @@ void wil_if_free(struct wil6210_priv *wil)
 
 	wil_priv_deinit(wil);
 
-	wil_to_ndev(wil) = NULL;
+	wil->main_ndev = NULL;
+	wil_ndev_destructor(ndev);
 	free_netdev(ndev);
 
 	wil_cfg80211_deinit(wil);
@@ -224,9 +303,8 @@ void wil_if_free(struct wil6210_priv *wil)
 
 int wil_if_add(struct wil6210_priv *wil)
 {
-	struct wireless_dev *wdev = wil_to_wdev(wil);
 	struct wiphy *wiphy = wil->wiphy;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	int rc;
 
 	wil_dbg_misc(wil, "entered");
@@ -255,14 +333,14 @@ int wil_if_add(struct wil6210_priv *wil)
 	return 0;
 
 out_wiphy:
-	wiphy_unregister(wdev->wiphy);
+	wiphy_unregister(wiphy);
 	return rc;
 }
 
 void wil_if_remove(struct wil6210_priv *wil)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct net_device *ndev = wil->main_ndev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 
 	wil_dbg_misc(wil, "if_remove\n");
 
diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c
index 7dbee2c3e482..8b34f18590d9 100644
--- a/drivers/net/wireless/ath/wil6210/p2p.c
+++ b/drivers/net/wireless/ath/wil6210/p2p.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -22,27 +23,28 @@
 #define P2P_SEARCH_DURATION_MS 500
 #define P2P_DEFAULT_BI 100
 
-static int wil_p2p_start_listen(struct wil6210_priv *wil)
+static int wil_p2p_start_listen(struct wil6210_vif *vif)
 {
-	struct wil_p2p_info *p2p = &wil->p2p;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct wil_p2p_info *p2p = &vif->p2p;
 	u8 channel = p2p->listen_chan.hw_value;
 	int rc;
 
 	lockdep_assert_held(&wil->mutex);
 
-	rc = wmi_p2p_cfg(wil, channel, P2P_DEFAULT_BI);
+	rc = wmi_p2p_cfg(vif, channel, P2P_DEFAULT_BI);
 	if (rc) {
 		wil_err(wil, "wmi_p2p_cfg failed\n");
 		goto out;
 	}
 
-	rc = wmi_set_ssid(wil, strlen(P2P_WILDCARD_SSID), P2P_WILDCARD_SSID);
+	rc = wmi_set_ssid(vif, strlen(P2P_WILDCARD_SSID), P2P_WILDCARD_SSID);
 	if (rc) {
 		wil_err(wil, "wmi_set_ssid failed\n");
 		goto out_stop;
 	}
 
-	rc = wmi_start_listen(wil);
+	rc = wmi_start_listen(vif);
 	if (rc) {
 		wil_err(wil, "wmi_start_listen failed\n");
 		goto out_stop;
@@ -53,7 +55,7 @@ static int wil_p2p_start_listen(struct wil6210_priv *wil)
 		  jiffies + msecs_to_jiffies(p2p->listen_duration));
 out_stop:
 	if (rc)
-		wmi_stop_discovery(wil);
+		wmi_stop_discovery(vif);
 
 out:
 	return rc;
@@ -65,20 +67,12 @@ bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request)
 	       (request->channels[0]->hw_value == P2P_DMG_SOCIAL_CHANNEL);
 }
 
-void wil_p2p_discovery_timer_fn(struct timer_list *t)
-{
-	struct wil6210_priv *wil = from_timer(wil, t, p2p.discovery_timer);
-
-	wil_dbg_misc(wil, "p2p_discovery_timer_fn\n");
-
-	schedule_work(&wil->p2p.discovery_expired_work);
-}
-
-int wil_p2p_search(struct wil6210_priv *wil,
+int wil_p2p_search(struct wil6210_vif *vif,
 		   struct cfg80211_scan_request *request)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
-	struct wil_p2p_info *p2p = &wil->p2p;
+	struct wil_p2p_info *p2p = &vif->p2p;
 
 	wil_dbg_misc(wil, "p2p_search: channel %d\n", P2P_DMG_SOCIAL_CHANNEL);
 
@@ -90,20 +84,20 @@ int wil_p2p_search(struct wil6210_priv *wil,
 		goto out;
 	}
 
-	rc = wmi_p2p_cfg(wil, P2P_DMG_SOCIAL_CHANNEL, P2P_DEFAULT_BI);
+	rc = wmi_p2p_cfg(vif, P2P_DMG_SOCIAL_CHANNEL, P2P_DEFAULT_BI);
 	if (rc) {
 		wil_err(wil, "wmi_p2p_cfg failed\n");
 		goto out;
 	}
 
-	rc = wmi_set_ssid(wil, strlen(P2P_WILDCARD_SSID), P2P_WILDCARD_SSID);
+	rc = wmi_set_ssid(vif, strlen(P2P_WILDCARD_SSID), P2P_WILDCARD_SSID);
 	if (rc) {
 		wil_err(wil, "wmi_set_ssid failed\n");
 		goto out_stop;
 	}
 
 	/* Set application IE to probe request and probe response */
-	rc = wmi_set_ie(wil, WMI_FRAME_PROBE_REQ,
+	rc = wmi_set_ie(vif, WMI_FRAME_PROBE_REQ,
 			request->ie_len, request->ie);
 	if (rc) {
 		wil_err(wil, "wmi_set_ie(WMI_FRAME_PROBE_REQ) failed\n");
@@ -113,14 +107,14 @@ int wil_p2p_search(struct wil6210_priv *wil,
 	/* supplicant doesn't provide Probe Response IEs. As a workaround -
 	 * re-use Probe Request IEs
 	 */
-	rc = wmi_set_ie(wil, WMI_FRAME_PROBE_RESP,
+	rc = wmi_set_ie(vif, WMI_FRAME_PROBE_RESP,
 			request->ie_len, request->ie);
 	if (rc) {
 		wil_err(wil, "wmi_set_ie(WMI_FRAME_PROBE_RESP) failed\n");
 		goto out_stop;
 	}
 
-	rc = wmi_start_search(wil);
+	rc = wmi_start_search(vif);
 	if (rc) {
 		wil_err(wil, "wmi_start_search failed\n");
 		goto out_stop;
@@ -133,7 +127,7 @@ int wil_p2p_search(struct wil6210_priv *wil,
 
 out_stop:
 	if (rc)
-		wmi_stop_discovery(wil);
+		wmi_stop_discovery(vif);
 
 out:
 	return rc;
@@ -143,7 +137,8 @@ int wil_p2p_listen(struct wil6210_priv *wil, struct wireless_dev *wdev,
 		   unsigned int duration, struct ieee80211_channel *chan,
 		   u64 *cookie)
 {
-	struct wil_p2p_info *p2p = &wil->p2p;
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
+	struct wil_p2p_info *p2p = &vif->p2p;
 	int rc;
 
 	if (!chan)
@@ -164,7 +159,7 @@ int wil_p2p_listen(struct wil6210_priv *wil, struct wireless_dev *wdev,
 	p2p->listen_duration = duration;
 
 	mutex_lock(&wil->p2p_wdev_mutex);
-	if (wil->scan_request) {
+	if (vif->scan_request) {
 		wil_dbg_misc(wil, "Delaying p2p listen until scan done\n");
 		p2p->pending_listen_wdev = wdev;
 		p2p->discovery_started = 1;
@@ -174,12 +169,13 @@ int wil_p2p_listen(struct wil6210_priv *wil, struct wireless_dev *wdev,
 	}
 	mutex_unlock(&wil->p2p_wdev_mutex);
 
-	rc = wil_p2p_start_listen(wil);
+	rc = wil_p2p_start_listen(vif);
 	if (rc)
 		goto out;
 
 	p2p->discovery_started = 1;
-	wil->radio_wdev = wdev;
+	if (vif->mid == 0)
+		wil->radio_wdev = wdev;
 
 	cfg80211_ready_on_channel(wdev, *cookie, chan, duration,
 				  GFP_KERNEL);
@@ -189,9 +185,9 @@ out:
 	return rc;
 }
 
-u8 wil_p2p_stop_discovery(struct wil6210_priv *wil)
+u8 wil_p2p_stop_discovery(struct wil6210_vif *vif)
 {
-	struct wil_p2p_info *p2p = &wil->p2p;
+	struct wil_p2p_info *p2p = &vif->p2p;
 	u8 started = p2p->discovery_started;
 
 	if (p2p->discovery_started) {
@@ -200,7 +196,7 @@ u8 wil_p2p_stop_discovery(struct wil6210_priv *wil)
 			p2p->pending_listen_wdev = NULL;
 		} else {
 			del_timer_sync(&p2p->discovery_timer);
-			wmi_stop_discovery(wil);
+			wmi_stop_discovery(vif);
 		}
 		p2p->discovery_started = 0;
 	}
@@ -208,9 +204,10 @@ u8 wil_p2p_stop_discovery(struct wil6210_priv *wil)
 	return started;
 }
 
-int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie)
+int wil_p2p_cancel_listen(struct wil6210_vif *vif, u64 cookie)
 {
-	struct wil_p2p_info *p2p = &wil->p2p;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct wil_p2p_info *p2p = &vif->p2p;
 	u8 started;
 
 	mutex_lock(&wil->mutex);
@@ -222,7 +219,7 @@ int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie)
 		return -ENOENT;
 	}
 
-	started = wil_p2p_stop_discovery(wil);
+	started = wil_p2p_stop_discovery(vif);
 
 	mutex_unlock(&wil->mutex);
 
@@ -232,11 +229,12 @@ int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie)
 	}
 
 	mutex_lock(&wil->p2p_wdev_mutex);
-	cfg80211_remain_on_channel_expired(wil->radio_wdev,
+	cfg80211_remain_on_channel_expired(vif_to_radio_wdev(wil, vif),
 					   p2p->cookie,
 					   &p2p->listen_chan,
 					   GFP_KERNEL);
-	wil->radio_wdev = wil->wdev;
+	if (vif->mid == 0)
+		wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
 	mutex_unlock(&wil->p2p_wdev_mutex);
 	return 0;
 }
@@ -245,40 +243,43 @@ void wil_p2p_listen_expired(struct work_struct *work)
 {
 	struct wil_p2p_info *p2p = container_of(work,
 			struct wil_p2p_info, discovery_expired_work);
-	struct wil6210_priv *wil = container_of(p2p,
-			struct wil6210_priv, p2p);
+	struct wil6210_vif *vif = container_of(p2p,
+			struct wil6210_vif, p2p);
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	u8 started;
 
 	wil_dbg_misc(wil, "p2p_listen_expired\n");
 
 	mutex_lock(&wil->mutex);
-	started = wil_p2p_stop_discovery(wil);
+	started = wil_p2p_stop_discovery(vif);
 	mutex_unlock(&wil->mutex);
 
-	if (started) {
-		mutex_lock(&wil->p2p_wdev_mutex);
-		cfg80211_remain_on_channel_expired(wil->radio_wdev,
-						   p2p->cookie,
-						   &p2p->listen_chan,
-						   GFP_KERNEL);
-		wil->radio_wdev = wil->wdev;
-		mutex_unlock(&wil->p2p_wdev_mutex);
-	}
+	if (!started)
+		return;
 
+	mutex_lock(&wil->p2p_wdev_mutex);
+	cfg80211_remain_on_channel_expired(vif_to_radio_wdev(wil, vif),
+					   p2p->cookie,
+					   &p2p->listen_chan,
+					   GFP_KERNEL);
+	if (vif->mid == 0)
+		wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
+	mutex_unlock(&wil->p2p_wdev_mutex);
 }
 
 void wil_p2p_search_expired(struct work_struct *work)
 {
 	struct wil_p2p_info *p2p = container_of(work,
 			struct wil_p2p_info, discovery_expired_work);
-	struct wil6210_priv *wil = container_of(p2p,
-			struct wil6210_priv, p2p);
+	struct wil6210_vif *vif = container_of(p2p,
+			struct wil6210_vif, p2p);
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	u8 started;
 
 	wil_dbg_misc(wil, "p2p_search_expired\n");
 
 	mutex_lock(&wil->mutex);
-	started = wil_p2p_stop_discovery(wil);
+	started = wil_p2p_stop_discovery(vif);
 	mutex_unlock(&wil->mutex);
 
 	if (started) {
@@ -287,10 +288,12 @@ void wil_p2p_search_expired(struct work_struct *work)
 		};
 
 		mutex_lock(&wil->p2p_wdev_mutex);
-		if (wil->scan_request) {
-			cfg80211_scan_done(wil->scan_request, &info);
-			wil->scan_request = NULL;
-			wil->radio_wdev = wil->wdev;
+		if (vif->scan_request) {
+			cfg80211_scan_done(vif->scan_request, &info);
+			vif->scan_request = NULL;
+			if (vif->mid == 0)
+				wil->radio_wdev =
+					wil->main_ndev->ieee80211_ptr;
 		}
 		mutex_unlock(&wil->p2p_wdev_mutex);
 	}
@@ -300,8 +303,9 @@ void wil_p2p_delayed_listen_work(struct work_struct *work)
 {
 	struct wil_p2p_info *p2p = container_of(work,
 			struct wil_p2p_info, delayed_listen_work);
-	struct wil6210_priv *wil = container_of(p2p,
-			struct wil6210_priv, p2p);
+	struct wil6210_vif *vif = container_of(p2p,
+			struct wil6210_vif, p2p);
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 
 	mutex_lock(&wil->mutex);
@@ -311,14 +315,14 @@ void wil_p2p_delayed_listen_work(struct work_struct *work)
 		goto out;
 
 	mutex_lock(&wil->p2p_wdev_mutex);
-	if (wil->scan_request) {
+	if (vif->scan_request) {
 		/* another scan started, wait again... */
 		mutex_unlock(&wil->p2p_wdev_mutex);
 		goto out;
 	}
 	mutex_unlock(&wil->p2p_wdev_mutex);
 
-	rc = wil_p2p_start_listen(wil);
+	rc = wil_p2p_start_listen(vif);
 
 	mutex_lock(&wil->p2p_wdev_mutex);
 	if (rc) {
@@ -326,12 +330,14 @@ void wil_p2p_delayed_listen_work(struct work_struct *work)
 						   p2p->cookie,
 						   &p2p->listen_chan,
 						   GFP_KERNEL);
-		wil->radio_wdev = wil->wdev;
+		if (vif->mid == 0)
+			wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
 	} else {
 		cfg80211_ready_on_channel(p2p->pending_listen_wdev, p2p->cookie,
 					  &p2p->listen_chan,
 					  p2p->listen_duration, GFP_KERNEL);
-		wil->radio_wdev = p2p->pending_listen_wdev;
+		if (vif->mid == 0)
+			wil->radio_wdev = p2p->pending_listen_wdev;
 	}
 	p2p->pending_listen_wdev = NULL;
 	mutex_unlock(&wil->p2p_wdev_mutex);
@@ -342,7 +348,8 @@ out:
 
 void wil_p2p_stop_radio_operations(struct wil6210_priv *wil)
 {
-	struct wil_p2p_info *p2p = &wil->p2p;
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
+	struct wil_p2p_info *p2p = &vif->p2p;
 	struct cfg80211_scan_info info = {
 		.aborted = true,
 	};
@@ -355,21 +362,21 @@ void wil_p2p_stop_radio_operations(struct wil6210_priv *wil)
 
 	if (!p2p->discovery_started) {
 		/* Regular scan on the p2p device */
-		if (wil->scan_request &&
-		    wil->scan_request->wdev == wil->p2p_wdev)
-			wil_abort_scan(wil, true);
+		if (vif->scan_request &&
+		    vif->scan_request->wdev == wil->p2p_wdev)
+			wil_abort_scan(vif, true);
 		goto out;
 	}
 
 	/* Search or listen on p2p device */
 	mutex_unlock(&wil->p2p_wdev_mutex);
-	wil_p2p_stop_discovery(wil);
+	wil_p2p_stop_discovery(vif);
 	mutex_lock(&wil->p2p_wdev_mutex);
 
-	if (wil->scan_request) {
+	if (vif->scan_request) {
 		/* search */
-		cfg80211_scan_done(wil->scan_request, &info);
-		wil->scan_request = NULL;
+		cfg80211_scan_done(vif->scan_request, &info);
+		vif->scan_request = NULL;
 	} else {
 		/* listen */
 		cfg80211_remain_on_channel_expired(wil->radio_wdev,
@@ -379,5 +386,5 @@ void wil_p2p_stop_radio_operations(struct wil6210_priv *wil)
 	}
 
 out:
-	wil->radio_wdev = wil->wdev;
+	wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
 }
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index 809092a49192..a61ffd9dce0f 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -425,7 +425,7 @@ static int wil6210_suspend(struct device *dev, bool is_runtime)
 	int rc = 0;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct wil6210_priv *wil = pci_get_drvdata(pdev);
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	bool keep_radio_on = ndev->flags & IFF_UP &&
 			     wil->keep_radio_on_during_sleep;
 
@@ -457,7 +457,7 @@ static int wil6210_resume(struct device *dev, bool is_runtime)
 	int rc = 0;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct wil6210_priv *wil = pci_get_drvdata(pdev);
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	bool keep_radio_on = ndev->flags & IFF_UP &&
 			     wil->keep_radio_on_during_sleep;
 
diff --git a/drivers/net/wireless/ath/wil6210/pm.c b/drivers/net/wireless/ath/wil6210/pm.c
index 0a96518a566f..12f6d78bcf63 100644
--- a/drivers/net/wireless/ath/wil6210/pm.c
+++ b/drivers/net/wireless/ath/wil6210/pm.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014,2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -23,8 +24,8 @@
 int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 {
 	int rc = 0;
-	struct wireless_dev *wdev = wil->wdev;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 	bool wmi_only = test_bit(WMI_FW_CAPABILITY_WMI_ONLY,
 				 wil->fw_capabilities);
 
@@ -258,7 +259,7 @@ reject_suspend:
 static int wil_suspend_radio_off(struct wil6210_priv *wil)
 {
 	int rc = 0;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 
 	wil_dbg_pm(wil, "suspend radio off\n");
 
@@ -306,7 +307,7 @@ out:
 static int wil_resume_radio_off(struct wil6210_priv *wil)
 {
 	int rc = 0;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 
 	wil_dbg_pm(wil, "Enabling PCIe IRQ\n");
 	wil_enable_irq(wil);
diff --git a/drivers/net/wireless/ath/wil6210/pmc.c b/drivers/net/wireless/ath/wil6210/pmc.c
index 4ea27b0bd278..c49f7988369e 100644
--- a/drivers/net/wireless/ath/wil6210/pmc.c
+++ b/drivers/net/wireless/ath/wil6210/pmc.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2015,2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -53,6 +54,7 @@ void wil_pmc_alloc(struct wil6210_priv *wil,
 	u32 i;
 	struct pmc_ctx *pmc = &wil->pmc;
 	struct device *dev = wil_to_dev(wil);
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_pmc_cmd pmc_cmd = {0};
 	int last_cmd_err = -ENOMEM;
 
@@ -186,6 +188,7 @@ void wil_pmc_alloc(struct wil6210_priv *wil,
 	wil_dbg_misc(wil, "pmc_alloc: send WMI_PMC_CMD with ALLOCATE op\n");
 	pmc->last_cmd_status = wmi_send(wil,
 					WMI_PMC_CMDID,
+					vif->mid,
 					&pmc_cmd,
 					sizeof(pmc_cmd));
 	if (pmc->last_cmd_status) {
@@ -236,6 +239,7 @@ void wil_pmc_free(struct wil6210_priv *wil, int send_pmc_cmd)
 {
 	struct pmc_ctx *pmc = &wil->pmc;
 	struct device *dev = wil_to_dev(wil);
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_pmc_cmd pmc_cmd = {0};
 
 	mutex_lock(&pmc->lock);
@@ -254,8 +258,8 @@ void wil_pmc_free(struct wil6210_priv *wil, int send_pmc_cmd)
 		wil_dbg_misc(wil, "send WMI_PMC_CMD with RELEASE op\n");
 		pmc_cmd.op = WMI_PMC_RELEASE;
 		pmc->last_cmd_status =
-				wmi_send(wil, WMI_PMC_CMDID, &pmc_cmd,
-					 sizeof(pmc_cmd));
+				wmi_send(wil, WMI_PMC_CMDID, vif->mid,
+					 &pmc_cmd, sizeof(pmc_cmd));
 		if (pmc->last_cmd_status) {
 			wil_err(wil,
 				"WMI_PMC_CMD with RELEASE op failed, status %d",
diff --git a/drivers/net/wireless/ath/wil6210/rx_reorder.c b/drivers/net/wireless/ath/wil6210/rx_reorder.c
index a43cffcf1bbf..c2140b3d9496 100644
--- a/drivers/net/wireless/ath/wil6210/rx_reorder.c
+++ b/drivers/net/wireless/ath/wil6210/rx_reorder.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -44,7 +45,7 @@ static void wil_release_reorder_frame(struct wil6210_priv *wil,
 				      struct wil_tid_ampdu_rx *r,
 				      int index)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct sk_buff *skb = r->reorder_buf[index];
 
 	if (!skb)
@@ -93,7 +94,7 @@ static void wil_reorder_release(struct wil6210_priv *wil,
 void wil_rx_reorder(struct wil6210_priv *wil, struct sk_buff *skb)
 __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct vring_rx_desc *d = wil_skb_rxdesc(skb);
 	int tid = wil_rxdesc_tid(d);
 	int cid = wil_rxdesc_cid(d);
@@ -292,8 +293,8 @@ static u16 wil_agg_size(struct wil6210_priv *wil, u16 req_agg_wsize)
 }
 
 /* Block Ack - Rx side (recipient) */
-int wil_addba_rx_request(struct wil6210_priv *wil, u8 cidxtid,
-			 u8 dialog_token, __le16 ba_param_set,
+int wil_addba_rx_request(struct wil6210_priv *wil, u8 mid,
+			 u8 cidxtid, u8 dialog_token, __le16 ba_param_set,
 			 __le16 ba_timeout, __le16 ba_seq_ctrl)
 __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 {
@@ -354,7 +355,7 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 		}
 	}
 
-	rc = wmi_addba_rx_resp(wil, cid, tid, dialog_token, status,
+	rc = wmi_addba_rx_resp(wil, mid, cid, tid, dialog_token, status,
 			       agg_amsdu, agg_wsize, agg_timeout);
 	if (rc || (status != WLAN_STATUS_SUCCESS)) {
 		wil_err(wil, "do not apply ba, rc(%d), status(%d)\n", rc,
@@ -393,7 +394,7 @@ int wil_addba_tx_request(struct wil6210_priv *wil, u8 ringid, u16 wsize)
 		goto out;
 	}
 	txdata->addba_in_progress = true;
-	rc = wmi_addba(wil, ringid, agg_wsize, agg_timeout);
+	rc = wmi_addba(wil, txdata->mid, ringid, agg_wsize, agg_timeout);
 	if (rc) {
 		wil_err(wil, "wmi_addba failed, rc (%d)", rc);
 		txdata->addba_in_progress = false;
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index 16b8a4e5201f..a53238647b95 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -474,7 +475,7 @@ static struct sk_buff *wil_vring_reap_rx(struct wil6210_priv *wil,
 					 struct vring *vring)
 {
 	struct device *dev = wil_to_dev(wil);
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	volatile struct vring_rx_desc *_d;
 	struct vring_rx_desc *d;
 	struct sk_buff *skb;
@@ -624,7 +625,7 @@ again:
  */
 static int wil_rx_refill(struct wil6210_priv *wil, int count)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct vring *v = &wil->vring_rx;
 	u32 next_tail;
 	int rc = 0;
@@ -713,8 +714,9 @@ static int wil_rx_crypto_check(struct wil6210_priv *wil, struct sk_buff *skb)
 void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev)
 {
 	gro_result_t rc = GRO_NORMAL;
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct wil6210_priv *wil = ndev_to_wil(ndev);
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wireless_dev *wdev = vif_to_wdev(vif);
 	unsigned int len = skb->len;
 	struct vring_rx_desc *d = wil_skb_rxdesc(skb);
 	int cid = wil_rxdesc_cid(d); /* always 0..7, no need to check */
@@ -751,14 +753,15 @@ void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev)
 		goto stats;
 	}
 
-	if (wdev->iftype == NL80211_IFTYPE_AP && !wil->ap_isolate) {
+	if (wdev->iftype == NL80211_IFTYPE_AP && !vif->ap_isolate) {
 		if (mcast) {
 			/* send multicast frames both to higher layers in
 			 * local net stack and back to the wireless medium
 			 */
 			xmit_skb = skb_copy(skb, GFP_ATOMIC);
 		} else {
-			int xmit_cid = wil_find_cid(wil, eth->h_dest);
+			int xmit_cid = wil_find_cid(wil, vif->mid,
+						    eth->h_dest);
 
 			if (xmit_cid >= 0) {
 				/* The destination station is associated to
@@ -815,7 +818,8 @@ stats:
  */
 void wil_rx_handle(struct wil6210_priv *wil, int *quota)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
 	struct vring *v = &wil->vring_rx;
 	struct sk_buff *skb;
 
@@ -827,7 +831,8 @@ void wil_rx_handle(struct wil6210_priv *wil, int *quota)
 	while ((*quota > 0) && (NULL != (skb = wil_vring_reap_rx(wil, v)))) {
 		(*quota)--;
 
-		if (wil->wdev->iftype == NL80211_IFTYPE_MONITOR) {
+		/* monitor is currently supported on main interface only */
+		if (wdev->iftype == NL80211_IFTYPE_MONITOR) {
 			skb->dev = ndev;
 			skb_reset_mac_header(skb);
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -911,12 +916,14 @@ static inline void wil_tx_data_init(struct vring_tx_data *txdata)
 	txdata->agg_timeout = 0;
 	txdata->agg_amsdu = 0;
 	txdata->addba_in_progress = false;
+	txdata->mid = U8_MAX;
 	spin_unlock_bh(&txdata->lock);
 }
 
-int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size,
+int wil_vring_init_tx(struct wil6210_vif *vif, int id, int size,
 		      int cid, int tid)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct wmi_vring_cfg_cmd cmd = {
 		.action = cpu_to_le32(WMI_VRING_CMD_ADD),
@@ -966,9 +973,9 @@ int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size,
 
 	cmd.vring_cfg.tx_sw_ring.ring_mem_base = cpu_to_le64(vring->pa);
 
-	if (!wil->privacy)
+	if (!vif->privacy)
 		txdata->dot1x_open = true;
-	rc = wmi_call(wil, WMI_VRING_CFG_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_VRING_CFG_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_VRING_CFG_DONE_EVENTID, &reply, sizeof(reply), 100);
 	if (rc)
 		goto out_free;
@@ -982,6 +989,7 @@ int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size,
 
 	spin_lock_bh(&txdata->lock);
 	vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr);
+	txdata->mid = vif->mid;
 	txdata->enabled = 1;
 	spin_unlock_bh(&txdata->lock);
 
@@ -1003,8 +1011,9 @@ int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size,
 	return rc;
 }
 
-int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size)
+int wil_vring_init_bcast(struct wil6210_vif *vif, int id, int size)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct wmi_bcast_vring_cfg_cmd cmd = {
 		.action = cpu_to_le32(WMI_VRING_CMD_ADD),
@@ -1046,9 +1055,10 @@ int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size)
 
 	cmd.vring_cfg.tx_sw_ring.ring_mem_base = cpu_to_le64(vring->pa);
 
-	if (!wil->privacy)
+	if (!vif->privacy)
 		txdata->dot1x_open = true;
-	rc = wmi_call(wil, WMI_BCAST_VRING_CFG_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_BCAST_VRING_CFG_CMDID, vif->mid,
+		      &cmd, sizeof(cmd),
 		      WMI_VRING_CFG_DONE_EVENTID, &reply, sizeof(reply), 100);
 	if (rc)
 		goto out_free;
@@ -1062,6 +1072,7 @@ int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size)
 
 	spin_lock_bh(&txdata->lock);
 	vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr);
+	txdata->mid = vif->mid;
 	txdata->enabled = 1;
 	spin_unlock_bh(&txdata->lock);
 
@@ -1091,6 +1102,7 @@ void wil_vring_fini_tx(struct wil6210_priv *wil, int id)
 
 	spin_lock_bh(&txdata->lock);
 	txdata->dot1x_open = false;
+	txdata->mid = U8_MAX;
 	txdata->enabled = 0; /* no Tx can be in progress or start anew */
 	spin_unlock_bh(&txdata->lock);
 	/* napi_synchronize waits for completion of the current NAPI but will
@@ -1108,11 +1120,12 @@ void wil_vring_fini_tx(struct wil6210_priv *wil, int id)
 }
 
 static struct vring *wil_find_tx_ucast(struct wil6210_priv *wil,
+				       struct wil6210_vif *vif,
 				       struct sk_buff *skb)
 {
 	int i;
 	struct ethhdr *eth = (void *)skb->data;
-	int cid = wil_find_cid(wil, eth->h_dest);
+	int cid = wil_find_cid(wil, vif->mid, eth->h_dest);
 
 	if (cid < 0)
 		return NULL;
@@ -1142,10 +1155,11 @@ static struct vring *wil_find_tx_ucast(struct wil6210_priv *wil,
 	return NULL;
 }
 
-static int wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
-			struct sk_buff *skb);
+static int wil_tx_vring(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			struct vring *vring, struct sk_buff *skb);
 
 static struct vring *wil_find_tx_vring_sta(struct wil6210_priv *wil,
+					   struct wil6210_vif *vif,
 					   struct sk_buff *skb)
 {
 	struct vring *v;
@@ -1160,7 +1174,7 @@ static struct vring *wil_find_tx_vring_sta(struct wil6210_priv *wil,
 	for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) {
 		v = &wil->vring_tx[i];
 		txdata = &wil->vring_tx_data[i];
-		if (!v->va || !txdata->enabled)
+		if (!v->va || !txdata->enabled || txdata->mid != vif->mid)
 			continue;
 
 		cid = wil->vring2cid_tid[i][0];
@@ -1193,11 +1207,12 @@ static struct vring *wil_find_tx_vring_sta(struct wil6210_priv *wil,
  *  - for PBSS
  */
 static struct vring *wil_find_tx_bcast_1(struct wil6210_priv *wil,
+					 struct wil6210_vif *vif,
 					 struct sk_buff *skb)
 {
 	struct vring *v;
 	struct vring_tx_data *txdata;
-	int i = wil->bcast_vring;
+	int i = vif->bcast_vring;
 
 	if (i < 0)
 		return NULL;
@@ -1222,6 +1237,7 @@ static void wil_set_da_for_vring(struct wil6210_priv *wil,
 }
 
 static struct vring *wil_find_tx_bcast_2(struct wil6210_priv *wil,
+					 struct wil6210_vif *vif,
 					 struct sk_buff *skb)
 {
 	struct vring *v, *v2;
@@ -1230,13 +1246,13 @@ static struct vring *wil_find_tx_bcast_2(struct wil6210_priv *wil,
 	u8 cid;
 	struct ethhdr *eth = (void *)skb->data;
 	char *src = eth->h_source;
-	struct vring_tx_data *txdata;
+	struct vring_tx_data *txdata, *txdata2;
 
 	/* find 1-st vring eligible for data */
 	for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) {
 		v = &wil->vring_tx[i];
 		txdata = &wil->vring_tx_data[i];
-		if (!v->va || !txdata->enabled)
+		if (!v->va || !txdata->enabled || txdata->mid != vif->mid)
 			continue;
 
 		cid = wil->vring2cid_tid[i][0];
@@ -1264,7 +1280,8 @@ found:
 	/* find other active vrings and duplicate skb for each */
 	for (i++; i < WIL6210_MAX_TX_RINGS; i++) {
 		v2 = &wil->vring_tx[i];
-		if (!v2->va)
+		txdata2 = &wil->vring_tx_data[i];
+		if (!v2->va || txdata2->mid != vif->mid)
 			continue;
 		cid = wil->vring2cid_tid[i][0];
 		if (cid >= WIL6210_MAX_CID) /* skip BCAST */
@@ -1280,7 +1297,7 @@ found:
 		if (skb2) {
 			wil_dbg_txrx(wil, "BCAST DUP -> ring %d\n", i);
 			wil_set_da_for_vring(wil, skb2, i);
-			wil_tx_vring(wil, v2, skb2);
+			wil_tx_vring(wil, vif, v2, skb2);
 		} else {
 			wil_err(wil, "skb_copy failed\n");
 		}
@@ -1417,8 +1434,8 @@ static inline void wil_set_tx_desc_last_tso(volatile struct vring_tx_desc *d)
 		  DMA_CFG_DESC_TX_0_SEGMENT_BUF_DETAILS_POS;
 }
 
-static int __wil_tx_vring_tso(struct wil6210_priv *wil, struct vring *vring,
-			      struct sk_buff *skb)
+static int __wil_tx_vring_tso(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			      struct vring *vring, struct sk_buff *skb)
 {
 	struct device *dev = wil_to_dev(wil);
 
@@ -1710,8 +1727,8 @@ err_exit:
 	return rc;
 }
 
-static int __wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
-			  struct sk_buff *skb)
+static int __wil_tx_vring(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			  struct vring *vring, struct sk_buff *skb)
 {
 	struct device *dev = wil_to_dev(wil);
 	struct vring_tx_desc dd, *d = &dd;
@@ -1725,7 +1742,7 @@ static int __wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
 	uint i = swhead;
 	dma_addr_t pa;
 	int used;
-	bool mcast = (vring_index == wil->bcast_vring);
+	bool mcast = (vring_index == vif->bcast_vring);
 	uint len = skb_headlen(skb);
 
 	wil_dbg_txrx(wil, "tx_vring: %d bytes to vring %d\n", skb->len,
@@ -1860,8 +1877,8 @@ static int __wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
 	return -EINVAL;
 }
 
-static int wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
-			struct sk_buff *skb)
+static int wil_tx_vring(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			struct vring *vring, struct sk_buff *skb)
 {
 	int vring_index = vring - wil->vring_tx;
 	struct vring_tx_data *txdata = &wil->vring_tx_data[vring_index];
@@ -1879,7 +1896,7 @@ static int wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
 	}
 
 	rc = (skb_is_gso(skb) ? __wil_tx_vring_tso : __wil_tx_vring)
-	     (wil, vring, skb);
+	     (wil, vif, vring, skb);
 
 	spin_unlock(&txdata->lock);
 
@@ -1923,7 +1940,7 @@ static inline void __wil_update_net_queues(struct wil6210_priv *wil,
 	if (check_stop) {
 		if (!vring || unlikely(wil_vring_avail_low(vring))) {
 			/* not enough room in the vring */
-			netif_tx_stop_all_queues(wil_to_ndev(wil));
+			netif_tx_stop_all_queues(wil->main_ndev);
 			wil->net_queue_stopped = true;
 			wil_dbg_txrx(wil, "netif_tx_stop called\n");
 		}
@@ -1953,7 +1970,7 @@ static inline void __wil_update_net_queues(struct wil6210_priv *wil,
 	if (!vring || wil_vring_avail_high(vring)) {
 		/* enough room in the vring */
 		wil_dbg_txrx(wil, "calling netif_tx_wake\n");
-		netif_tx_wake_all_queues(wil_to_ndev(wil));
+		netif_tx_wake_all_queues(wil->main_ndev);
 		wil->net_queue_stopped = false;
 	}
 }
@@ -1976,7 +1993,8 @@ void wil_update_net_queues_bh(struct wil6210_priv *wil, struct vring *vring,
 
 netdev_tx_t wil_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
-	struct wil6210_priv *wil = ndev_to_wil(ndev);
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct ethhdr *eth = (void *)skb->data;
 	bool bcast = is_multicast_ether_addr(eth->h_dest);
 	struct vring *vring;
@@ -1995,40 +2013,40 @@ netdev_tx_t wil_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		wil_dbg_ratelimited(wil, "FW not connected, packet dropped\n");
 		goto drop;
 	}
-	if (unlikely(wil->wdev->iftype == NL80211_IFTYPE_MONITOR)) {
+	if (unlikely(vif->wdev.iftype == NL80211_IFTYPE_MONITOR)) {
 		wil_err(wil, "Xmit in monitor mode not supported\n");
 		goto drop;
 	}
 	pr_once_fw = false;
 
 	/* find vring */
-	if (wil->wdev->iftype == NL80211_IFTYPE_STATION && !wil->pbss) {
+	if (vif->wdev.iftype == NL80211_IFTYPE_STATION && !vif->pbss) {
 		/* in STA mode (ESS), all to same VRING (to AP) */
-		vring = wil_find_tx_vring_sta(wil, skb);
+		vring = wil_find_tx_vring_sta(wil, vif, skb);
 	} else if (bcast) {
-		if (wil->pbss)
+		if (vif->pbss)
 			/* in pbss, no bcast VRING - duplicate skb in
 			 * all stations VRINGs
 			 */
-			vring = wil_find_tx_bcast_2(wil, skb);
-		else if (wil->wdev->iftype == NL80211_IFTYPE_AP)
+			vring = wil_find_tx_bcast_2(wil, vif, skb);
+		else if (vif->wdev.iftype == NL80211_IFTYPE_AP)
 			/* AP has a dedicated bcast VRING */
-			vring = wil_find_tx_bcast_1(wil, skb);
+			vring = wil_find_tx_bcast_1(wil, vif, skb);
 		else
 			/* unexpected combination, fallback to duplicating
 			 * the skb in all stations VRINGs
 			 */
-			vring = wil_find_tx_bcast_2(wil, skb);
+			vring = wil_find_tx_bcast_2(wil, vif, skb);
 	} else {
 		/* unicast, find specific VRING by dest. address */
-		vring = wil_find_tx_ucast(wil, skb);
+		vring = wil_find_tx_ucast(wil, vif, skb);
 	}
 	if (unlikely(!vring)) {
 		wil_dbg_txrx(wil, "No Tx VRING found for %pM\n", eth->h_dest);
 		goto drop;
 	}
 	/* set up vring entry */
-	rc = wil_tx_vring(wil, vring, skb);
+	rc = wil_tx_vring(wil, vif, vring, skb);
 
 	switch (rc) {
 	case 0:
@@ -2074,7 +2092,7 @@ static inline void wil_consume_skb(struct sk_buff *skb, bool acked)
  */
 int wil_tx_complete(struct wil6210_priv *wil, int ringid)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
 	struct device *dev = wil_to_dev(wil);
 	struct vring *vring = &wil->vring_tx[ringid];
 	struct vring_tx_data *txdata = &wil->vring_tx_data[ringid];
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 81cb27af64e4..fa8df41b045f 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -464,6 +464,7 @@ struct vring_tx_data {
 	u16 agg_timeout;
 	u8 agg_amsdu;
 	bool addba_in_progress; /* if set, agg_xxx is for request in progress */
+	u8 mid;
 	spinlock_t lock;
 };
 
@@ -542,7 +543,6 @@ struct wil_tid_crypto_rx {
 struct wil_p2p_info {
 	struct ieee80211_channel listen_chan;
 	u8 discovery_started;
-	u8 p2p_dev_started;
 	u64 cookie;
 	struct wireless_dev *pending_listen_wdev;
 	unsigned int listen_duration;
@@ -585,6 +585,7 @@ struct wil_net_stats {
  */
 struct wil_sta_info {
 	u8 addr[ETH_ALEN];
+	u8 mid;
 	enum wil_sta_status status;
 	struct wil_net_stats stats;
 	/* Rx BACK */
@@ -672,16 +673,34 @@ extern u8 led_polarity;
 
 struct wil6210_vif {
 	struct wireless_dev wdev;
+	struct net_device *ndev;
 	struct wil6210_priv *wil;
 	u8 mid;
+	u32 privacy; /* secure connection? */
+	u16 channel; /* relevant in AP mode */
+	u8 hidden_ssid; /* relevant in AP mode */
+	u32 ap_isolate; /* no intra-BSS communication */
+	bool pbss;
+	int bcast_vring;
+	struct cfg80211_bss *bss; /* connected bss, relevant in STA mode */
+	int locally_generated_disc; /* relevant in STA mode */
+	struct timer_list connect_timer;
+	struct work_struct disconnect_worker;
+	/* scan */
+	struct cfg80211_scan_request *scan_request;
+	struct timer_list scan_timer; /* detect scan timeout */
+	struct wil_p2p_info p2p;
+	/* keep alive */
+	struct list_head probe_client_pending;
+	struct mutex probe_client_mutex; /* protect @probe_client_pending */
+	struct work_struct probe_client_worker;
 };
 
 struct wil6210_priv {
 	struct pci_dev *pdev;
 	u32 bar_size;
 	struct wiphy *wiphy;
-	struct wireless_dev *wdev;
-	struct net_device *ndev;
+	struct net_device *main_ndev;
 	void __iomem *csr;
 	DECLARE_BITMAP(status, wil_status_last);
 	u8 fw_version[ETHTOOL_FWVERS_LEN];
@@ -703,13 +722,7 @@ struct wil6210_priv {
 	/* profile */
 	struct cfg80211_chan_def monitor_chandef;
 	u32 monitor_flags;
-	u32 privacy; /* secure connection? */
-	u8 hidden_ssid; /* relevant in AP mode */
-	u16 channel; /* relevant in AP mode */
 	int sinfo_gen;
-	u32 ap_isolate; /* no intra-BSS communication */
-	struct cfg80211_bss *bss; /* connected bss, relevant in STA mode */
-	int locally_generated_disc; /* relevant in STA mode */
 	/* interrupt moderation */
 	u32 tx_max_burst_duration;
 	u32 tx_interframe_timeout;
@@ -724,15 +737,13 @@ struct wil6210_priv {
 	struct completion wmi_call;
 	u16 wmi_seq;
 	u16 reply_id; /**< wait for this WMI event */
+	u8 reply_mid;
 	void *reply_buf;
 	u16 reply_size;
 	struct workqueue_struct *wmi_wq; /* for deferred calls */
 	struct work_struct wmi_event_worker;
 	struct workqueue_struct *wq_service;
-	struct work_struct disconnect_worker;
 	struct work_struct fw_error_worker;	/* for FW error recovery */
-	struct timer_list connect_timer;
-	struct timer_list scan_timer; /* detect scan timeout */
 	struct list_head pending_wmi_ev;
 	/*
 	 * protect pending_wmi_ev
@@ -744,10 +755,6 @@ struct wil6210_priv {
 	int net_queue_stopped; /* netif_tx_stop_all_queues invoked */
 	struct napi_struct napi_rx;
 	struct napi_struct napi_tx;
-	/* keep alive */
-	struct list_head probe_client_pending;
-	struct mutex probe_client_mutex; /* protect @probe_client_pending */
-	struct work_struct probe_client_worker;
 	/* DMA related */
 	struct vring vring_rx;
 	unsigned int rx_buf_len;
@@ -755,11 +762,8 @@ struct wil6210_priv {
 	struct vring_tx_data vring_tx_data[WIL6210_MAX_TX_RINGS];
 	u8 vring2cid_tid[WIL6210_MAX_TX_RINGS][2]; /* [0] - CID, [1] - TID */
 	struct wil_sta_info sta[WIL6210_MAX_CID];
-	int bcast_vring;
 	u32 vring_idle_trsh; /* HW fetches up to 16 descriptors at once  */
 	u32 dma_addr_size; /* indicates dma addr size */
-	/* scan */
-	struct cfg80211_scan_request *scan_request;
 
 	struct mutex mutex; /* for wil6210_priv access in wil_{up|down} */
 	/* statistics */
@@ -779,9 +783,7 @@ struct wil6210_priv {
 
 	struct pmc_ctx pmc;
 
-	bool pbss;
-
-	struct wil_p2p_info p2p;
+	u8 p2p_dev_started;
 
 	/* P2P_DEVICE vif */
 	struct wireless_dev *p2p_wdev;
@@ -810,12 +812,32 @@ struct wil6210_priv {
 #define wil_to_wiphy(i) (i->wiphy)
 #define wil_to_dev(i) (wiphy_dev(wil_to_wiphy(i)))
 #define wiphy_to_wil(w) (struct wil6210_priv *)(wiphy_priv(w))
-#define wil_to_wdev(i) (i->wdev)
 #define wdev_to_wil(w) (struct wil6210_priv *)(wdev_priv(w))
-#define wil_to_ndev(i) (i->ndev)
 #define ndev_to_wil(n) (wdev_to_wil(n->ieee80211_ptr))
 #define ndev_to_vif(n) (struct wil6210_vif *)(netdev_priv(n))
-#define wdev_to_vif(w) (container_of(w, struct wil6210_vif, wdev))
+#define vif_to_wil(v) (v->wil)
+#define vif_to_ndev(v) (v->ndev)
+#define vif_to_wdev(v) (&v->wdev)
+
+static inline struct wil6210_vif *wdev_to_vif(struct wil6210_priv *wil,
+					      struct wireless_dev *wdev)
+{
+	/* main interface is shared with P2P device */
+	if (wdev == wil->p2p_wdev)
+		return ndev_to_vif(wil->main_ndev);
+	else
+		return container_of(wdev, struct wil6210_vif, wdev);
+}
+
+static inline struct wireless_dev *
+vif_to_radio_wdev(struct wil6210_priv *wil, struct wil6210_vif *vif)
+{
+	/* main interface is shared with P2P device */
+	if (vif->mid)
+		return vif_to_wdev(vif);
+	else
+		return wil->radio_wdev;
+}
 
 __printf(2, 3)
 void wil_dbg_trace(struct wil6210_priv *wil, const char *fmt, ...);
@@ -828,7 +850,7 @@ void __wil_info(struct wil6210_priv *wil, const char *fmt, ...);
 __printf(2, 3)
 void wil_dbg_ratelimited(const struct wil6210_priv *wil, const char *fmt, ...);
 #define wil_dbg(wil, fmt, arg...) do { \
-	netdev_dbg(wil_to_ndev(wil), fmt, ##arg); \
+	netdev_dbg(wil->main_ndev, fmt, ##arg); \
 	wil_dbg_trace(wil, fmt, ##arg); \
 } while (0)
 
@@ -933,7 +955,7 @@ int wil_down(struct wil6210_priv *wil);
 int __wil_down(struct wil6210_priv *wil);
 void wil_refresh_fw_capabilities(struct wil6210_priv *wil);
 void wil_mbox_ring_le2cpus(struct wil6210_mbox_ring *r);
-int wil_find_cid(struct wil6210_priv *wil, const u8 *mac);
+int wil_find_cid(struct wil6210_priv *wil, u8 mid, const u8 *mac);
 void wil_set_ethtoolops(struct net_device *ndev);
 
 struct fw_map *wil_find_fw_mapping(const char *section);
@@ -942,40 +964,42 @@ void __iomem *wmi_buffer(struct wil6210_priv *wil, __le32 ptr);
 void __iomem *wmi_addr(struct wil6210_priv *wil, u32 ptr);
 int wmi_read_hdr(struct wil6210_priv *wil, __le32 ptr,
 		 struct wil6210_mbox_hdr *hdr);
-int wmi_send(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len);
+int wmi_send(struct wil6210_priv *wil, u16 cmdid, u8 mid, void *buf, u16 len);
 void wmi_recv_cmd(struct wil6210_priv *wil);
-int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len,
+int wmi_call(struct wil6210_priv *wil, u16 cmdid, u8 mid, void *buf, u16 len,
 	     u16 reply_id, void *reply, u8 reply_size, int to_msec);
 void wmi_event_worker(struct work_struct *work);
 void wmi_event_flush(struct wil6210_priv *wil);
-int wmi_set_ssid(struct wil6210_priv *wil, u8 ssid_len, const void *ssid);
-int wmi_get_ssid(struct wil6210_priv *wil, u8 *ssid_len, void *ssid);
+int wmi_set_ssid(struct wil6210_vif *vif, u8 ssid_len, const void *ssid);
+int wmi_get_ssid(struct wil6210_vif *vif, u8 *ssid_len, void *ssid);
 int wmi_set_channel(struct wil6210_priv *wil, int channel);
 int wmi_get_channel(struct wil6210_priv *wil, int *channel);
-int wmi_del_cipher_key(struct wil6210_priv *wil, u8 key_index,
+int wmi_del_cipher_key(struct wil6210_vif *vif, u8 key_index,
 		       const void *mac_addr, int key_usage);
-int wmi_add_cipher_key(struct wil6210_priv *wil, u8 key_index,
+int wmi_add_cipher_key(struct wil6210_vif *vif, u8 key_index,
 		       const void *mac_addr, int key_len, const void *key,
 		       int key_usage);
 int wmi_echo(struct wil6210_priv *wil);
-int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie);
+int wmi_set_ie(struct wil6210_vif *vif, u8 type, u16 ie_len, const void *ie);
 int wmi_rx_chain_add(struct wil6210_priv *wil, struct vring *vring);
 int wmi_rxon(struct wil6210_priv *wil, bool on);
 int wmi_get_temperature(struct wil6210_priv *wil, u32 *t_m, u32 *t_r);
-int wmi_disconnect_sta(struct wil6210_priv *wil, const u8 *mac,
+int wmi_disconnect_sta(struct wil6210_vif *vif, const u8 *mac,
 		       u16 reason, bool full_disconnect, bool del_sta);
-int wmi_addba(struct wil6210_priv *wil, u8 ringid, u8 size, u16 timeout);
-int wmi_delba_tx(struct wil6210_priv *wil, u8 ringid, u16 reason);
-int wmi_delba_rx(struct wil6210_priv *wil, u8 cidxtid, u16 reason);
-int wmi_addba_rx_resp(struct wil6210_priv *wil, u8 cid, u8 tid, u8 token,
+int wmi_addba(struct wil6210_priv *wil, u8 mid,
+	      u8 ringid, u8 size, u16 timeout);
+int wmi_delba_tx(struct wil6210_priv *wil, u8 mid, u8 ringid, u16 reason);
+int wmi_delba_rx(struct wil6210_priv *wil, u8 mid, u8 cidxtid, u16 reason);
+int wmi_addba_rx_resp(struct wil6210_priv *wil,
+		      u8 mid, u8 cid, u8 tid, u8 token,
 		      u16 status, bool amsdu, u16 agg_wsize, u16 timeout);
 int wmi_ps_dev_profile_cfg(struct wil6210_priv *wil,
 			   enum wmi_ps_profile_type ps_profile);
 int wmi_set_mgmt_retry(struct wil6210_priv *wil, u8 retry_short);
 int wmi_get_mgmt_retry(struct wil6210_priv *wil, u8 *retry_short);
-int wmi_new_sta(struct wil6210_priv *wil, const u8 *mac, u8 aid);
-int wil_addba_rx_request(struct wil6210_priv *wil, u8 cidxtid,
-			 u8 dialog_token, __le16 ba_param_set,
+int wmi_new_sta(struct wil6210_vif *vif, const u8 *mac, u8 aid);
+int wil_addba_rx_request(struct wil6210_priv *wil, u8 mid,
+			 u8 cidxtid, u8 dialog_token, __le16 ba_param_set,
 			 __le16 ba_timeout, __le16 ba_seq_ctrl);
 int wil_addba_tx_request(struct wil6210_priv *wil, u8 ringid, u16 wsize);
 
@@ -991,24 +1015,23 @@ void wil6210_mask_halp(struct wil6210_priv *wil);
 
 /* P2P */
 bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request);
-void wil_p2p_discovery_timer_fn(struct timer_list *t);
-int wil_p2p_search(struct wil6210_priv *wil,
+int wil_p2p_search(struct wil6210_vif *vif,
 		   struct cfg80211_scan_request *request);
 int wil_p2p_listen(struct wil6210_priv *wil, struct wireless_dev *wdev,
 		   unsigned int duration, struct ieee80211_channel *chan,
 		   u64 *cookie);
-u8 wil_p2p_stop_discovery(struct wil6210_priv *wil);
-int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie);
+u8 wil_p2p_stop_discovery(struct wil6210_vif *vif);
+int wil_p2p_cancel_listen(struct wil6210_vif *vif, u64 cookie);
 void wil_p2p_listen_expired(struct work_struct *work);
 void wil_p2p_search_expired(struct work_struct *work);
 void wil_p2p_stop_radio_operations(struct wil6210_priv *wil);
 void wil_p2p_delayed_listen_work(struct work_struct *work);
 
 /* WMI for P2P */
-int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi);
-int wmi_start_listen(struct wil6210_priv *wil);
-int wmi_start_search(struct wil6210_priv *wil);
-int wmi_stop_discovery(struct wil6210_priv *wil);
+int wmi_p2p_cfg(struct wil6210_vif *vif, int channel, int bi);
+int wmi_start_listen(struct wil6210_vif *vif);
+int wmi_start_search(struct wil6210_vif *vif);
+int wmi_stop_discovery(struct wil6210_vif *vif);
 
 int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 struct cfg80211_mgmt_tx_params *params,
@@ -1025,7 +1048,7 @@ static inline int wil6210_debugfs_init(struct wil6210_priv *wil) { return 0; }
 static inline void wil6210_debugfs_remove(struct wil6210_priv *wil) {}
 #endif
 
-int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
+int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid,
 		       struct station_info *sinfo);
 
 struct wil6210_priv *wil_cfg80211_init(struct device *dev);
@@ -1033,29 +1056,30 @@ void wil_cfg80211_deinit(struct wil6210_priv *wil);
 void wil_p2p_wdev_free(struct wil6210_priv *wil);
 
 int wmi_set_mac_address(struct wil6210_priv *wil, void *addr);
-int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype,
-		  u8 chan, u8 hidden_ssid, u8 is_go);
-int wmi_pcp_stop(struct wil6210_priv *wil);
+int wmi_pcp_start(struct wil6210_vif *vif, int bi, u8 wmi_nettype, u8 chan,
+		  u8 hidden_ssid, u8 is_go);
+int wmi_pcp_stop(struct wil6210_vif *vif);
 int wmi_led_cfg(struct wil6210_priv *wil, bool enable);
-int wmi_abort_scan(struct wil6210_priv *wil);
-void wil_abort_scan(struct wil6210_priv *wil, bool sync);
+int wmi_abort_scan(struct wil6210_vif *vif);
+void wil_abort_scan(struct wil6210_vif *vif, bool sync);
 void wil6210_bus_request(struct wil6210_priv *wil, u32 kbps);
-void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid,
+void wil6210_disconnect(struct wil6210_vif *vif, const u8 *bssid,
 			u16 reason_code, bool from_event);
-void wil_probe_client_flush(struct wil6210_priv *wil);
+void wil_probe_client_flush(struct wil6210_vif *vif);
 void wil_probe_client_worker(struct work_struct *work);
+void wil_disconnect_worker(struct work_struct *work);
 
 int wil_rx_init(struct wil6210_priv *wil, u16 size);
 void wil_rx_fini(struct wil6210_priv *wil);
 
 /* TX API */
-int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size,
+int wil_vring_init_tx(struct wil6210_vif *vif, int id, int size,
 		      int cid, int tid);
 void wil_vring_fini_tx(struct wil6210_priv *wil, int id);
-int wil_tx_init(struct wil6210_priv *wil, int cid);
-int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size);
-int wil_bcast_init(struct wil6210_priv *wil);
-void wil_bcast_fini(struct wil6210_priv *wil);
+int wil_tx_init(struct wil6210_vif *vif, int cid);
+int wil_vring_init_bcast(struct wil6210_vif *vif, int id, int size);
+int wil_bcast_init(struct wil6210_vif *vif);
+void wil_bcast_fini(struct wil6210_vif *vif);
 
 void wil_update_net_queues(struct wil6210_priv *wil, struct vring *vring,
 			   bool should_stop);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index b31e2514f8c2..044d69850353 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -516,7 +516,8 @@ static const char *eventid2name(u16 eventid)
 	}
 }
 
-static int __wmi_send(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len)
+static int __wmi_send(struct wil6210_priv *wil, u16 cmdid, u8 mid,
+		      void *buf, u16 len)
 {
 	struct {
 		struct wil6210_mbox_hdr hdr;
@@ -528,7 +529,7 @@ static int __wmi_send(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len)
 			.len = cpu_to_le16(sizeof(cmd.wmi) + len),
 		},
 		.wmi = {
-			.mid = 0,
+			.mid = mid,
 			.command_id = cpu_to_le16(cmdid),
 		},
 	};
@@ -612,8 +613,8 @@ static int __wmi_send(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len)
 	}
 	cmd.hdr.seq = cpu_to_le16(++wil->wmi_seq);
 	/* set command */
-	wil_dbg_wmi(wil, "sending %s (0x%04x) [%d]\n",
-		    cmdid2name(cmdid), cmdid, len);
+	wil_dbg_wmi(wil, "sending %s (0x%04x) [%d] mid %d\n",
+		    cmdid2name(cmdid), cmdid, len, mid);
 	wil_hex_dump_wmi("Cmd ", DUMP_PREFIX_OFFSET, 16, 1, &cmd,
 			 sizeof(cmd), true);
 	wil_hex_dump_wmi("cmd ", DUMP_PREFIX_OFFSET, 16, 1, buf,
@@ -637,21 +638,22 @@ out:
 	return rc;
 }
 
-int wmi_send(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len)
+int wmi_send(struct wil6210_priv *wil, u16 cmdid, u8 mid, void *buf, u16 len)
 {
 	int rc;
 
 	mutex_lock(&wil->wmi_mutex);
-	rc = __wmi_send(wil, cmdid, buf, len);
+	rc = __wmi_send(wil, cmdid, mid, buf, len);
 	mutex_unlock(&wil->wmi_mutex);
 
 	return rc;
 }
 
 /*=== Event handlers ===*/
-static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_ready(struct wil6210_vif *vif, int id, void *d, int len)
 {
-	struct wireless_dev *wdev = wil->wdev;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct wiphy *wiphy = wil_to_wiphy(wil);
 	struct wmi_ready_event *evt = d;
 
 	wil->n_mids = evt->numof_additional_mids;
@@ -660,8 +662,7 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len)
 		 wil->fw_version, le32_to_cpu(evt->sw_version),
 		 evt->mac, wil->n_mids);
 	/* ignore MAC address, we already have it from the boot loader */
-	strlcpy(wdev->wiphy->fw_version, wil->fw_version,
-		sizeof(wdev->wiphy->fw_version));
+	strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version));
 
 	if (len > offsetof(struct wmi_ready_event, rfc_read_calib_result)) {
 		wil_dbg_wmi(wil, "rfc calibration result %d\n",
@@ -674,8 +675,9 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len)
 	complete(&wil->wmi_ready);
 }
 
-static void wmi_evt_rx_mgmt(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_rx_mgmt(struct wil6210_vif *vif, int id, void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_rx_mgmt_packet_event *data = d;
 	struct wiphy *wiphy = wil_to_wiphy(wil);
 	struct ieee80211_mgmt *rx_mgmt_frame =
@@ -754,13 +756,13 @@ static void wmi_evt_rx_mgmt(struct wil6210_priv *wil, int id, void *d, int len)
 		}
 	} else {
 		mutex_lock(&wil->p2p_wdev_mutex);
-		cfg80211_rx_mgmt(wil->radio_wdev, freq, signal,
+		cfg80211_rx_mgmt(vif_to_radio_wdev(wil, vif), freq, signal,
 				 (void *)rx_mgmt_frame, d_len, 0);
 		mutex_unlock(&wil->p2p_wdev_mutex);
 	}
 }
 
-static void wmi_evt_tx_mgmt(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_tx_mgmt(struct wil6210_vif *vif, int id, void *d, int len)
 {
 	struct wmi_tx_mgmt_packet_event *data = d;
 	struct ieee80211_mgmt *mgmt_frame =
@@ -771,11 +773,13 @@ static void wmi_evt_tx_mgmt(struct wil6210_priv *wil, int id, void *d, int len)
 			 flen, true);
 }
 
-static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id,
+static void wmi_evt_scan_complete(struct wil6210_vif *vif, int id,
 				  void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
+
 	mutex_lock(&wil->p2p_wdev_mutex);
-	if (wil->scan_request) {
+	if (vif->scan_request) {
 		struct wmi_scan_complete_event *data = d;
 		int status = le32_to_cpu(data->status);
 		struct cfg80211_scan_info info = {
@@ -785,15 +789,16 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id,
 
 		wil_dbg_wmi(wil, "SCAN_COMPLETE(0x%08x)\n", status);
 		wil_dbg_misc(wil, "Complete scan_request 0x%p aborted %d\n",
-			     wil->scan_request, info.aborted);
-		del_timer_sync(&wil->scan_timer);
-		cfg80211_scan_done(wil->scan_request, &info);
-		wil->radio_wdev = wil->wdev;
-		wil->scan_request = NULL;
+			     vif->scan_request, info.aborted);
+		del_timer_sync(&vif->scan_timer);
+		cfg80211_scan_done(vif->scan_request, &info);
+		if (vif->mid == 0)
+			wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
+		vif->scan_request = NULL;
 		wake_up_interruptible(&wil->wq);
-		if (wil->p2p.pending_listen_wdev) {
+		if (vif->p2p.pending_listen_wdev) {
 			wil_dbg_misc(wil, "Scheduling delayed listen\n");
-			schedule_work(&wil->p2p.delayed_listen_work);
+			schedule_work(&vif->p2p.delayed_listen_work);
 		}
 	} else {
 		wil_err(wil, "SCAN_COMPLETE while not scanning\n");
@@ -801,10 +806,11 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id,
 	mutex_unlock(&wil->p2p_wdev_mutex);
 }
 
-static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
-	struct wireless_dev *wdev = wil->wdev;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct net_device *ndev = vif_to_ndev(vif);
+	struct wireless_dev *wdev = vif_to_wdev(vif);
 	struct wmi_connect_event *evt = d;
 	int ch; /* channel number */
 	struct station_info sinfo;
@@ -874,7 +880,7 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 			mutex_unlock(&wil->mutex);
 			return;
 		}
-		del_timer_sync(&wil->connect_timer);
+		del_timer_sync(&vif->connect_timer);
 	} else if ((wdev->iftype == NL80211_IFTYPE_AP) ||
 		   (wdev->iftype == NL80211_IFTYPE_P2P_GO)) {
 		if (wil->sta[evt->cid].status != wil_sta_unused) {
@@ -886,13 +892,14 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 	}
 
 	ether_addr_copy(wil->sta[evt->cid].addr, evt->bssid);
+	wil->sta[evt->cid].mid = vif->mid;
 	wil->sta[evt->cid].status = wil_sta_conn_pending;
 
-	rc = wil_tx_init(wil, evt->cid);
+	rc = wil_tx_init(vif, evt->cid);
 	if (rc) {
 		wil_err(wil, "config tx vring failed for CID %d, rc (%d)\n",
 			evt->cid, rc);
-		wmi_disconnect_sta(wil, wil->sta[evt->cid].addr,
+		wmi_disconnect_sta(vif, wil->sta[evt->cid].addr,
 				   WLAN_REASON_UNSPECIFIED, false, false);
 	} else {
 		wil_info(wil, "successful connection to CID %d\n", evt->cid);
@@ -912,14 +919,14 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 		} else {
 			struct wiphy *wiphy = wil_to_wiphy(wil);
 
-			cfg80211_ref_bss(wiphy, wil->bss);
-			cfg80211_connect_bss(ndev, evt->bssid, wil->bss,
+			cfg80211_ref_bss(wiphy, vif->bss);
+			cfg80211_connect_bss(ndev, evt->bssid, vif->bss,
 					     assoc_req_ie, assoc_req_ielen,
 					     assoc_resp_ie, assoc_resp_ielen,
 					     WLAN_STATUS_SUCCESS, GFP_KERNEL,
 					     NL80211_TIMEOUT_UNSPECIFIED);
 		}
-		wil->bss = NULL;
+		vif->bss = NULL;
 	} else if ((wdev->iftype == NL80211_IFTYPE_AP) ||
 		   (wdev->iftype == NL80211_IFTYPE_P2P_GO)) {
 		if (rc) {
@@ -951,15 +958,18 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 	wil_update_net_queues_bh(wil, NULL, false);
 
 out:
-	if (rc)
+	if (rc) {
 		wil->sta[evt->cid].status = wil_sta_unused;
+		wil->sta[evt->cid].mid = U8_MAX;
+	}
 	clear_bit(wil_status_fwconnecting, wil->status);
 	mutex_unlock(&wil->mutex);
 }
 
-static void wmi_evt_disconnect(struct wil6210_priv *wil, int id,
+static void wmi_evt_disconnect(struct wil6210_vif *vif, int id,
 			       void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_disconnect_event *evt = d;
 	u16 reason_code = le16_to_cpu(evt->protocol_reason_status);
 
@@ -976,7 +986,7 @@ static void wmi_evt_disconnect(struct wil6210_priv *wil, int id,
 	}
 
 	mutex_lock(&wil->mutex);
-	wil6210_disconnect(wil, evt->bssid, reason_code, true);
+	wil6210_disconnect(vif, evt->bssid, reason_code, true);
 	mutex_unlock(&wil->mutex);
 }
 
@@ -984,10 +994,10 @@ static void wmi_evt_disconnect(struct wil6210_priv *wil, int id,
  * Firmware reports EAPOL frame using WME event.
  * Reconstruct Ethernet frame and deliver it via normal Rx
  */
-static void wmi_evt_eapol_rx(struct wil6210_priv *wil, int id,
-			     void *d, int len)
+static void wmi_evt_eapol_rx(struct wil6210_vif *vif, int id, void *d, int len)
 {
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct net_device *ndev = vif_to_ndev(vif);
 	struct wmi_eapol_rx_event *evt = d;
 	u16 eapol_len = le16_to_cpu(evt->eapol_len);
 	int sz = eapol_len + ETH_HLEN;
@@ -996,10 +1006,10 @@ static void wmi_evt_eapol_rx(struct wil6210_priv *wil, int id,
 	int cid;
 	struct wil_net_stats *stats = NULL;
 
-	wil_dbg_wmi(wil, "EAPOL len %d from %pM\n", eapol_len,
-		    evt->src_mac);
+	wil_dbg_wmi(wil, "EAPOL len %d from %pM MID %d\n", eapol_len,
+		    evt->src_mac, vif->mid);
 
-	cid = wil_find_cid(wil, evt->src_mac);
+	cid = wil_find_cid(wil, vif->mid, evt->src_mac);
 	if (cid >= 0)
 		stats = &wil->sta[cid].stats;
 
@@ -1034,13 +1044,14 @@ static void wmi_evt_eapol_rx(struct wil6210_priv *wil, int id,
 	}
 }
 
-static void wmi_evt_vring_en(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_vring_en(struct wil6210_vif *vif, int id, void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_vring_en_event *evt = d;
 	u8 vri = evt->vring_index;
-	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wireless_dev *wdev = vif_to_wdev(vif);
 
-	wil_dbg_wmi(wil, "Enable vring %d\n", vri);
+	wil_dbg_wmi(wil, "Enable vring %d MID %d\n", vri, vif->mid);
 
 	if (vri >= ARRAY_SIZE(wil->vring_tx)) {
 		wil_err(wil, "Enable for invalid vring %d\n", vri);
@@ -1052,15 +1063,16 @@ static void wmi_evt_vring_en(struct wil6210_priv *wil, int id, void *d, int len)
 		 * wil_cfg80211_change_station()
 		 */
 		wil->vring_tx_data[vri].dot1x_open = true;
-	if (vri == wil->bcast_vring) /* no BA for bcast */
+	if (vri == vif->bcast_vring) /* no BA for bcast */
 		return;
 	if (agg_wsize >= 0)
 		wil_addba_tx_request(wil, vri, agg_wsize);
 }
 
-static void wmi_evt_ba_status(struct wil6210_priv *wil, int id, void *d,
-			      int len)
+static void wmi_evt_ba_status(struct wil6210_vif *vif, int id,
+			      void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_ba_status_event *evt = d;
 	struct vring_tx_data *txdata;
 
@@ -1089,19 +1101,21 @@ static void wmi_evt_ba_status(struct wil6210_priv *wil, int id, void *d,
 	txdata->addba_in_progress = false;
 }
 
-static void wmi_evt_addba_rx_req(struct wil6210_priv *wil, int id, void *d,
-				 int len)
+static void wmi_evt_addba_rx_req(struct wil6210_vif *vif, int id,
+				 void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_rcp_addba_req_event *evt = d;
 
-	wil_addba_rx_request(wil, evt->cidxtid, evt->dialog_token,
+	wil_addba_rx_request(wil, vif->mid, evt->cidxtid, evt->dialog_token,
 			     evt->ba_param_set, evt->ba_timeout,
 			     evt->ba_seq_ctrl);
 }
 
-static void wmi_evt_delba(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_delba(struct wil6210_vif *vif, int id, void *d, int len)
 __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_delba_event *evt = d;
 	u8 cid, tid;
 	u16 reason = __le16_to_cpu(evt->reason);
@@ -1110,8 +1124,8 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 
 	might_sleep();
 	parse_cidxtid(evt->cidxtid, &cid, &tid);
-	wil_dbg_wmi(wil, "DELBA CID %d TID %d from %s reason %d\n",
-		    cid, tid,
+	wil_dbg_wmi(wil, "DELBA MID %d CID %d TID %d from %s reason %d\n",
+		    vif->mid, cid, tid,
 		    evt->from_initiator ? "originator" : "recipient",
 		    reason);
 	if (!evt->from_initiator) {
@@ -1148,8 +1162,9 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 }
 
 static void
-wmi_evt_sched_scan_result(struct wil6210_priv *wil, int id, void *d, int len)
+wmi_evt_sched_scan_result(struct wil6210_vif *vif, int id, void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_sched_scan_result_event *data = d;
 	struct wiphy *wiphy = wil_to_wiphy(wil);
 	struct ieee80211_mgmt *rx_mgmt_frame =
@@ -1220,15 +1235,17 @@ wmi_evt_sched_scan_result(struct wil6210_priv *wil, int id, void *d, int len)
  * Some events are ignored for purpose; and need not be interpreted as
  * "unhandled events"
  */
-static void wmi_evt_ignore(struct wil6210_priv *wil, int id, void *d, int len)
+static void wmi_evt_ignore(struct wil6210_vif *vif, int id, void *d, int len)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
+
 	wil_dbg_wmi(wil, "Ignore event 0x%04x len %d\n", id, len);
 }
 
 static const struct {
 	int eventid;
-	void (*handler)(struct wil6210_priv *wil, int eventid,
-			void *data, int data_len);
+	void (*handler)(struct wil6210_vif *vif,
+			int eventid, void *data, int data_len);
 } wmi_evt_handlers[] = {
 	{WMI_READY_EVENTID,		wmi_evt_ready},
 	{WMI_FW_READY_EVENTID,			wmi_evt_ignore},
@@ -1325,6 +1342,7 @@ void wmi_recv_cmd(struct wil6210_priv *wil)
 		    (len >= sizeof(struct wmi_cmd_hdr))) {
 			struct wmi_cmd_hdr *wmi = &evt->event.wmi;
 			u16 id = le16_to_cpu(wmi->command_id);
+			u8 mid = wmi->mid;
 			u32 tstamp = le32_to_cpu(wmi->fw_timestamp);
 			if (test_bit(wil_status_resuming, wil->status)) {
 				if (id == WMI_TRAFFIC_RESUME_EVENTID)
@@ -1336,7 +1354,8 @@ void wmi_recv_cmd(struct wil6210_priv *wil)
 						id);
 			}
 			spin_lock_irqsave(&wil->wmi_ev_lock, flags);
-			if (wil->reply_id && wil->reply_id == id) {
+			if (wil->reply_id && wil->reply_id == id &&
+			    wil->reply_mid == mid) {
 				if (wil->reply_buf) {
 					memcpy(wil->reply_buf, wmi,
 					       min(len, wil->reply_size));
@@ -1384,7 +1403,7 @@ void wmi_recv_cmd(struct wil6210_priv *wil)
 		    n - num_immed_reply, num_immed_reply);
 }
 
-int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len,
+int wmi_call(struct wil6210_priv *wil, u16 cmdid, u8 mid, void *buf, u16 len,
 	     u16 reply_id, void *reply, u8 reply_size, int to_msec)
 {
 	int rc;
@@ -1394,12 +1413,13 @@ int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len,
 
 	spin_lock(&wil->wmi_ev_lock);
 	wil->reply_id = reply_id;
+	wil->reply_mid = mid;
 	wil->reply_buf = reply;
 	wil->reply_size = reply_size;
 	reinit_completion(&wil->wmi_call);
 	spin_unlock(&wil->wmi_ev_lock);
 
-	rc = __wmi_send(wil, cmdid, buf, len);
+	rc = __wmi_send(wil, cmdid, mid, buf, len);
 	if (rc)
 		goto out;
 
@@ -1419,6 +1439,7 @@ int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len,
 out:
 	spin_lock(&wil->wmi_ev_lock);
 	wil->reply_id = 0;
+	wil->reply_mid = U8_MAX;
 	wil->reply_buf = NULL;
 	wil->reply_size = 0;
 	spin_unlock(&wil->wmi_ev_lock);
@@ -1430,27 +1451,31 @@ out:
 
 int wmi_echo(struct wil6210_priv *wil)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_echo_cmd cmd = {
 		.value = cpu_to_le32(0x12345678),
 	};
 
-	return wmi_call(wil, WMI_ECHO_CMDID, &cmd, sizeof(cmd),
+	return wmi_call(wil, WMI_ECHO_CMDID, vif->mid, &cmd, sizeof(cmd),
 			WMI_ECHO_RSP_EVENTID, NULL, 0, 50);
 }
 
 int wmi_set_mac_address(struct wil6210_priv *wil, void *addr)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_set_mac_address_cmd cmd;
 
 	ether_addr_copy(cmd.mac, addr);
 
 	wil_dbg_wmi(wil, "Set MAC %pM\n", addr);
 
-	return wmi_send(wil, WMI_SET_MAC_ADDRESS_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_SET_MAC_ADDRESS_CMDID, vif->mid,
+			&cmd, sizeof(cmd));
 }
 
 int wmi_led_cfg(struct wil6210_priv *wil, bool enable)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc = 0;
 	struct wmi_led_cfg_cmd cmd = {
 		.led_mode = enable,
@@ -1487,7 +1512,7 @@ int wmi_led_cfg(struct wil6210_priv *wil, bool enable)
 		    "%s led %d\n",
 		    enable ? "enabling" : "disabling", led_id);
 
-	rc = wmi_call(wil, WMI_LED_CFG_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_LED_CFG_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_LED_CFG_DONE_EVENTID, &reply, sizeof(reply),
 		      100);
 	if (rc)
@@ -1503,9 +1528,10 @@ out:
 	return rc;
 }
 
-int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype,
-		  u8 chan, u8 hidden_ssid, u8 is_go)
+int wmi_pcp_start(struct wil6210_vif *vif,
+		  int bi, u8 wmi_nettype, u8 chan, u8 hidden_ssid, u8 is_go)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 
 	struct wmi_pcp_start_cmd cmd = {
@@ -1524,7 +1550,7 @@ int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype,
 		struct wmi_pcp_started_event evt;
 	} __packed reply;
 
-	if (!wil->privacy)
+	if (!vif->privacy)
 		cmd.disable_sec = 1;
 
 	if ((cmd.pcp_max_assoc_sta > WIL6210_MAX_CID) ||
@@ -1546,7 +1572,7 @@ int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype,
 	 * Processing time may be huge, in case of secure AP it takes about
 	 * 3500ms for FW to start AP
 	 */
-	rc = wmi_call(wil, WMI_PCP_START_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_PCP_START_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_PCP_STARTED_EVENTID, &reply, sizeof(reply), 5000);
 	if (rc)
 		return rc;
@@ -1561,20 +1587,22 @@ int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype,
 	return rc;
 }
 
-int wmi_pcp_stop(struct wil6210_priv *wil)
+int wmi_pcp_stop(struct wil6210_vif *vif)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 
 	rc = wmi_led_cfg(wil, false);
 	if (rc)
 		return rc;
 
-	return wmi_call(wil, WMI_PCP_STOP_CMDID, NULL, 0,
+	return wmi_call(wil, WMI_PCP_STOP_CMDID, vif->mid, NULL, 0,
 			WMI_PCP_STOPPED_EVENTID, NULL, 0, 20);
 }
 
-int wmi_set_ssid(struct wil6210_priv *wil, u8 ssid_len, const void *ssid)
+int wmi_set_ssid(struct wil6210_vif *vif, u8 ssid_len, const void *ssid)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_set_ssid_cmd cmd = {
 		.ssid_len = cpu_to_le32(ssid_len),
 	};
@@ -1584,11 +1612,12 @@ int wmi_set_ssid(struct wil6210_priv *wil, u8 ssid_len, const void *ssid)
 
 	memcpy(cmd.ssid, ssid, ssid_len);
 
-	return wmi_send(wil, WMI_SET_SSID_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_SET_SSID_CMDID, vif->mid, &cmd, sizeof(cmd));
 }
 
-int wmi_get_ssid(struct wil6210_priv *wil, u8 *ssid_len, void *ssid)
+int wmi_get_ssid(struct wil6210_vif *vif, u8 *ssid_len, void *ssid)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -1596,8 +1625,8 @@ int wmi_get_ssid(struct wil6210_priv *wil, u8 *ssid_len, void *ssid)
 	} __packed reply;
 	int len; /* reply.cmd.ssid_len in CPU order */
 
-	rc = wmi_call(wil, WMI_GET_SSID_CMDID, NULL, 0, WMI_GET_SSID_EVENTID,
-		      &reply, sizeof(reply), 20);
+	rc = wmi_call(wil, WMI_GET_SSID_CMDID, vif->mid, NULL, 0,
+		      WMI_GET_SSID_EVENTID, &reply, sizeof(reply), 20);
 	if (rc)
 		return rc;
 
@@ -1613,22 +1642,25 @@ int wmi_get_ssid(struct wil6210_priv *wil, u8 *ssid_len, void *ssid)
 
 int wmi_set_channel(struct wil6210_priv *wil, int channel)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	struct wmi_set_pcp_channel_cmd cmd = {
 		.channel = channel - 1,
 	};
 
-	return wmi_send(wil, WMI_SET_PCP_CHANNEL_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_SET_PCP_CHANNEL_CMDID, vif->mid,
+			&cmd, sizeof(cmd));
 }
 
 int wmi_get_channel(struct wil6210_priv *wil, int *channel)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
 		struct wmi_set_pcp_channel_cmd cmd;
 	} __packed reply;
 
-	rc = wmi_call(wil, WMI_GET_PCP_CHANNEL_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_GET_PCP_CHANNEL_CMDID, vif->mid, NULL, 0,
 		      WMI_GET_PCP_CHANNEL_EVENTID, &reply, sizeof(reply), 20);
 	if (rc)
 		return rc;
@@ -1641,8 +1673,9 @@ int wmi_get_channel(struct wil6210_priv *wil, int *channel)
 	return 0;
 }
 
-int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi)
+int wmi_p2p_cfg(struct wil6210_vif *vif, int channel, int bi)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct wmi_p2p_cfg_cmd cmd = {
 		.discovery_mode = WMI_DISCOVERY_MODE_PEER2PEER,
@@ -1656,7 +1689,7 @@ int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi)
 
 	wil_dbg_wmi(wil, "sending WMI_P2P_CFG_CMDID\n");
 
-	rc = wmi_call(wil, WMI_P2P_CFG_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_P2P_CFG_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_P2P_CFG_DONE_EVENTID, &reply, sizeof(reply), 300);
 	if (!rc && reply.evt.status != WMI_FW_STATUS_SUCCESS) {
 		wil_err(wil, "P2P_CFG failed. status %d\n", reply.evt.status);
@@ -1666,8 +1699,9 @@ int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi)
 	return rc;
 }
 
-int wmi_start_listen(struct wil6210_priv *wil)
+int wmi_start_listen(struct wil6210_vif *vif)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -1676,7 +1710,7 @@ int wmi_start_listen(struct wil6210_priv *wil)
 
 	wil_dbg_wmi(wil, "sending WMI_START_LISTEN_CMDID\n");
 
-	rc = wmi_call(wil, WMI_START_LISTEN_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_START_LISTEN_CMDID, vif->mid, NULL, 0,
 		      WMI_LISTEN_STARTED_EVENTID, &reply, sizeof(reply), 300);
 	if (!rc && reply.evt.status != WMI_FW_STATUS_SUCCESS) {
 		wil_err(wil, "device failed to start listen. status %d\n",
@@ -1687,8 +1721,9 @@ int wmi_start_listen(struct wil6210_priv *wil)
 	return rc;
 }
 
-int wmi_start_search(struct wil6210_priv *wil)
+int wmi_start_search(struct wil6210_vif *vif)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -1697,7 +1732,7 @@ int wmi_start_search(struct wil6210_priv *wil)
 
 	wil_dbg_wmi(wil, "sending WMI_START_SEARCH_CMDID\n");
 
-	rc = wmi_call(wil, WMI_START_SEARCH_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_START_SEARCH_CMDID, vif->mid, NULL, 0,
 		      WMI_SEARCH_STARTED_EVENTID, &reply, sizeof(reply), 300);
 	if (!rc && reply.evt.status != WMI_FW_STATUS_SUCCESS) {
 		wil_err(wil, "device failed to start search. status %d\n",
@@ -1708,13 +1743,14 @@ int wmi_start_search(struct wil6210_priv *wil)
 	return rc;
 }
 
-int wmi_stop_discovery(struct wil6210_priv *wil)
+int wmi_stop_discovery(struct wil6210_vif *vif)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 
 	wil_dbg_wmi(wil, "sending WMI_DISCOVERY_STOP_CMDID\n");
 
-	rc = wmi_call(wil, WMI_DISCOVERY_STOP_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_DISCOVERY_STOP_CMDID, vif->mid, NULL, 0,
 		      WMI_DISCOVERY_STOPPED_EVENTID, NULL, 0, 100);
 
 	if (rc)
@@ -1723,9 +1759,10 @@ int wmi_stop_discovery(struct wil6210_priv *wil)
 	return rc;
 }
 
-int wmi_del_cipher_key(struct wil6210_priv *wil, u8 key_index,
+int wmi_del_cipher_key(struct wil6210_vif *vif, u8 key_index,
 		       const void *mac_addr, int key_usage)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_delete_cipher_key_cmd cmd = {
 		.key_index = key_index,
 	};
@@ -1733,13 +1770,15 @@ int wmi_del_cipher_key(struct wil6210_priv *wil, u8 key_index,
 	if (mac_addr)
 		memcpy(cmd.mac, mac_addr, WMI_MAC_LEN);
 
-	return wmi_send(wil, WMI_DELETE_CIPHER_KEY_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_DELETE_CIPHER_KEY_CMDID, vif->mid,
+			&cmd, sizeof(cmd));
 }
 
-int wmi_add_cipher_key(struct wil6210_priv *wil, u8 key_index,
+int wmi_add_cipher_key(struct wil6210_vif *vif, u8 key_index,
 		       const void *mac_addr, int key_len, const void *key,
 		       int key_usage)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wmi_add_cipher_key_cmd cmd = {
 		.key_index = key_index,
 		.key_usage = key_usage,
@@ -1753,11 +1792,13 @@ int wmi_add_cipher_key(struct wil6210_priv *wil, u8 key_index,
 	if (mac_addr)
 		memcpy(cmd.mac, mac_addr, WMI_MAC_LEN);
 
-	return wmi_send(wil, WMI_ADD_CIPHER_KEY_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_ADD_CIPHER_KEY_CMDID, vif->mid,
+			&cmd, sizeof(cmd));
 }
 
-int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
+int wmi_set_ie(struct wil6210_vif *vif, u8 type, u16 ie_len, const void *ie)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	static const char *const names[] = {
 		[WMI_FRAME_BEACON]	= "BEACON",
 		[WMI_FRAME_PROBE_REQ]	= "PROBE_REQ",
@@ -1786,7 +1827,7 @@ int wmi_set_ie(struct wil6210_priv *wil, u8 type, u16 ie_len, const void *ie)
 	/* BUG: FW API define ieLen as u8. Will fix FW */
 	cmd->ie_len = cpu_to_le16(ie_len);
 	memcpy(cmd->ie_info, ie, ie_len);
-	rc = wmi_send(wil, WMI_SET_APPIE_CMDID, cmd, len);
+	rc = wmi_send(wil, WMI_SET_APPIE_CMDID, vif->mid, cmd, len);
 	kfree(cmd);
 out:
 	if (rc) {
@@ -1808,6 +1849,7 @@ out:
  */
 int wmi_rxon(struct wil6210_priv *wil, bool on)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -1817,13 +1859,13 @@ int wmi_rxon(struct wil6210_priv *wil, bool on)
 	wil_info(wil, "(%s)\n", on ? "on" : "off");
 
 	if (on) {
-		rc = wmi_call(wil, WMI_START_LISTEN_CMDID, NULL, 0,
+		rc = wmi_call(wil, WMI_START_LISTEN_CMDID, vif->mid, NULL, 0,
 			      WMI_LISTEN_STARTED_EVENTID,
 			      &reply, sizeof(reply), 100);
 		if ((rc == 0) && (reply.evt.status != WMI_FW_STATUS_SUCCESS))
 			rc = -EINVAL;
 	} else {
-		rc = wmi_call(wil, WMI_DISCOVERY_STOP_CMDID, NULL, 0,
+		rc = wmi_call(wil, WMI_DISCOVERY_STOP_CMDID, vif->mid, NULL, 0,
 			      WMI_DISCOVERY_STOPPED_EVENTID, NULL, 0, 20);
 	}
 
@@ -1832,8 +1874,9 @@ int wmi_rxon(struct wil6210_priv *wil, bool on)
 
 int wmi_rx_chain_add(struct wil6210_priv *wil, struct vring *vring)
 {
-	struct wireless_dev *wdev = wil->wdev;
-	struct net_device *ndev = wil_to_ndev(wil);
+	struct net_device *ndev = wil->main_ndev;
+	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct wmi_cfg_rx_chain_cmd cmd = {
 		.action = WMI_RX_CHAIN_ADD,
 		.rx_sw_ring = {
@@ -1877,7 +1920,7 @@ int wmi_rx_chain_add(struct wil6210_priv *wil, struct vring *vring)
 				L2_802_3_OFFLOAD_CTRL_SNAP_KEEP_MSK;
 
 	/* typical time for secure PCP is 840ms */
-	rc = wmi_call(wil, WMI_CFG_RX_CHAIN_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_CFG_RX_CHAIN_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_CFG_RX_CHAIN_DONE_EVENTID, &evt, sizeof(evt), 2000);
 	if (rc)
 		return rc;
@@ -1895,6 +1938,7 @@ int wmi_rx_chain_add(struct wil6210_priv *wil, struct vring *vring)
 
 int wmi_get_temperature(struct wil6210_priv *wil, u32 *t_bb, u32 *t_rf)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct wmi_temp_sense_cmd cmd = {
 		.measure_baseband_en = cpu_to_le32(!!t_bb),
@@ -1906,7 +1950,7 @@ int wmi_get_temperature(struct wil6210_priv *wil, u32 *t_bb, u32 *t_rf)
 		struct wmi_temp_sense_done_event evt;
 	} __packed reply;
 
-	rc = wmi_call(wil, WMI_TEMP_SENSE_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_TEMP_SENSE_CMDID, vif->mid, &cmd, sizeof(cmd),
 		      WMI_TEMP_SENSE_DONE_EVENTID, &reply, sizeof(reply), 100);
 	if (rc)
 		return rc;
@@ -1919,9 +1963,10 @@ int wmi_get_temperature(struct wil6210_priv *wil, u32 *t_bb, u32 *t_rf)
 	return 0;
 }
 
-int wmi_disconnect_sta(struct wil6210_priv *wil, const u8 *mac,
+int wmi_disconnect_sta(struct wil6210_vif *vif, const u8 *mac,
 		       u16 reason, bool full_disconnect, bool del_sta)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	u16 reason_code;
 	struct wmi_disconnect_sta_cmd disc_sta_cmd = {
@@ -1937,16 +1982,17 @@ int wmi_disconnect_sta(struct wil6210_priv *wil, const u8 *mac,
 
 	wil_dbg_wmi(wil, "disconnect_sta: (%pM, reason %d)\n", mac, reason);
 
-	wil->locally_generated_disc = true;
+	vif->locally_generated_disc = true;
 	if (del_sta) {
 		ether_addr_copy(del_sta_cmd.dst_mac, mac);
-		rc = wmi_call(wil, WMI_DEL_STA_CMDID, &del_sta_cmd,
+		rc = wmi_call(wil, WMI_DEL_STA_CMDID, vif->mid, &del_sta_cmd,
 			      sizeof(del_sta_cmd), WMI_DISCONNECT_EVENTID,
 			      &reply, sizeof(reply), 1000);
 	} else {
 		ether_addr_copy(disc_sta_cmd.dst_mac, mac);
-		rc = wmi_call(wil, WMI_DISCONNECT_STA_CMDID, &disc_sta_cmd,
-			      sizeof(disc_sta_cmd), WMI_DISCONNECT_EVENTID,
+		rc = wmi_call(wil, WMI_DISCONNECT_STA_CMDID, vif->mid,
+			      &disc_sta_cmd, sizeof(disc_sta_cmd),
+			      WMI_DISCONNECT_EVENTID,
 			      &reply, sizeof(reply), 1000);
 	}
 	/* failure to disconnect in reasonable time treated as FW error */
@@ -1967,12 +2013,13 @@ int wmi_disconnect_sta(struct wil6210_priv *wil, const u8 *mac,
 			    reply.evt.disconnect_reason);
 
 		wil->sinfo_gen++;
-		wil6210_disconnect(wil, reply.evt.bssid, reason_code, true);
+		wil6210_disconnect(vif, reply.evt.bssid, reason_code, true);
 	}
 	return 0;
 }
 
-int wmi_addba(struct wil6210_priv *wil, u8 ringid, u8 size, u16 timeout)
+int wmi_addba(struct wil6210_priv *wil, u8 mid,
+	      u8 ringid, u8 size, u16 timeout)
 {
 	struct wmi_vring_ba_en_cmd cmd = {
 		.ringid = ringid,
@@ -1984,10 +2031,10 @@ int wmi_addba(struct wil6210_priv *wil, u8 ringid, u8 size, u16 timeout)
 	wil_dbg_wmi(wil, "addba: (ring %d size %d timeout %d)\n", ringid, size,
 		    timeout);
 
-	return wmi_send(wil, WMI_VRING_BA_EN_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_VRING_BA_EN_CMDID, mid, &cmd, sizeof(cmd));
 }
 
-int wmi_delba_tx(struct wil6210_priv *wil, u8 ringid, u16 reason)
+int wmi_delba_tx(struct wil6210_priv *wil, u8 mid, u8 ringid, u16 reason)
 {
 	struct wmi_vring_ba_dis_cmd cmd = {
 		.ringid = ringid,
@@ -1996,10 +2043,10 @@ int wmi_delba_tx(struct wil6210_priv *wil, u8 ringid, u16 reason)
 
 	wil_dbg_wmi(wil, "delba_tx: (ring %d reason %d)\n", ringid, reason);
 
-	return wmi_send(wil, WMI_VRING_BA_DIS_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_VRING_BA_DIS_CMDID, mid, &cmd, sizeof(cmd));
 }
 
-int wmi_delba_rx(struct wil6210_priv *wil, u8 cidxtid, u16 reason)
+int wmi_delba_rx(struct wil6210_priv *wil, u8 mid, u8 cidxtid, u16 reason)
 {
 	struct wmi_rcp_delba_cmd cmd = {
 		.cidxtid = cidxtid,
@@ -2009,10 +2056,11 @@ int wmi_delba_rx(struct wil6210_priv *wil, u8 cidxtid, u16 reason)
 	wil_dbg_wmi(wil, "delba_rx: (CID %d TID %d reason %d)\n", cidxtid & 0xf,
 		    (cidxtid >> 4) & 0xf, reason);
 
-	return wmi_send(wil, WMI_RCP_DELBA_CMDID, &cmd, sizeof(cmd));
+	return wmi_send(wil, WMI_RCP_DELBA_CMDID, mid, &cmd, sizeof(cmd));
 }
 
-int wmi_addba_rx_resp(struct wil6210_priv *wil, u8 cid, u8 tid, u8 token,
+int wmi_addba_rx_resp(struct wil6210_priv *wil,
+		      u8 mid, u8 cid, u8 tid, u8 token,
 		      u16 status, bool amsdu, u16 agg_wsize, u16 timeout)
 {
 	int rc;
@@ -2035,10 +2083,11 @@ int wmi_addba_rx_resp(struct wil6210_priv *wil, u8 cid, u8 tid, u8 token,
 	} __packed reply;
 
 	wil_dbg_wmi(wil,
-		    "ADDBA response for CID %d TID %d size %d timeout %d status %d AMSDU%s\n",
-		    cid, tid, agg_wsize, timeout, status, amsdu ? "+" : "-");
+		    "ADDBA response for MID %d CID %d TID %d size %d timeout %d status %d AMSDU%s\n",
+		    mid, cid, tid, agg_wsize,
+		    timeout, status, amsdu ? "+" : "-");
 
-	rc = wmi_call(wil, WMI_RCP_ADDBA_RESP_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_RCP_ADDBA_RESP_CMDID, mid, &cmd, sizeof(cmd),
 		      WMI_RCP_ADDBA_RESP_SENT_EVENTID, &reply, sizeof(reply),
 		      100);
 	if (rc)
@@ -2056,6 +2105,7 @@ int wmi_addba_rx_resp(struct wil6210_priv *wil, u8 cid, u8 tid, u8 token,
 int wmi_ps_dev_profile_cfg(struct wil6210_priv *wil,
 			   enum wmi_ps_profile_type ps_profile)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct wmi_ps_dev_profile_cfg_cmd cmd = {
 		.ps_profile = ps_profile,
@@ -2070,7 +2120,8 @@ int wmi_ps_dev_profile_cfg(struct wil6210_priv *wil,
 
 	reply.evt.status = cpu_to_le32(WMI_PS_CFG_CMD_STATUS_ERROR);
 
-	rc = wmi_call(wil, WMI_PS_DEV_PROFILE_CFG_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_PS_DEV_PROFILE_CFG_CMDID, vif->mid,
+		      &cmd, sizeof(cmd),
 		      WMI_PS_DEV_PROFILE_CFG_EVENTID, &reply, sizeof(reply),
 		      100);
 	if (rc)
@@ -2089,6 +2140,7 @@ int wmi_ps_dev_profile_cfg(struct wil6210_priv *wil,
 
 int wmi_set_mgmt_retry(struct wil6210_priv *wil, u8 retry_short)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct wmi_set_mgmt_retry_limit_cmd cmd = {
 		.mgmt_retry_limit = retry_short,
@@ -2105,7 +2157,8 @@ int wmi_set_mgmt_retry(struct wil6210_priv *wil, u8 retry_short)
 
 	reply.evt.status = WMI_FW_STATUS_FAILURE;
 
-	rc = wmi_call(wil, WMI_SET_MGMT_RETRY_LIMIT_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_SET_MGMT_RETRY_LIMIT_CMDID, vif->mid,
+		      &cmd, sizeof(cmd),
 		      WMI_SET_MGMT_RETRY_LIMIT_EVENTID, &reply, sizeof(reply),
 		      100);
 	if (rc)
@@ -2122,6 +2175,7 @@ int wmi_set_mgmt_retry(struct wil6210_priv *wil, u8 retry_short)
 
 int wmi_get_mgmt_retry(struct wil6210_priv *wil, u8 *retry_short)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -2134,7 +2188,7 @@ int wmi_get_mgmt_retry(struct wil6210_priv *wil, u8 *retry_short)
 		return -ENOTSUPP;
 
 	reply.evt.mgmt_retry_limit = 0;
-	rc = wmi_call(wil, WMI_GET_MGMT_RETRY_LIMIT_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_GET_MGMT_RETRY_LIMIT_CMDID, vif->mid, NULL, 0,
 		      WMI_GET_MGMT_RETRY_LIMIT_EVENTID, &reply, sizeof(reply),
 		      100);
 	if (rc)
@@ -2146,21 +2200,23 @@ int wmi_get_mgmt_retry(struct wil6210_priv *wil, u8 *retry_short)
 	return 0;
 }
 
-int wmi_abort_scan(struct wil6210_priv *wil)
+int wmi_abort_scan(struct wil6210_vif *vif)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 
 	wil_dbg_wmi(wil, "sending WMI_ABORT_SCAN_CMDID\n");
 
-	rc = wmi_send(wil, WMI_ABORT_SCAN_CMDID, NULL, 0);
+	rc = wmi_send(wil, WMI_ABORT_SCAN_CMDID, vif->mid, NULL, 0);
 	if (rc)
 		wil_err(wil, "Failed to abort scan (%d)\n", rc);
 
 	return rc;
 }
 
-int wmi_new_sta(struct wil6210_priv *wil, const u8 *mac, u8 aid)
+int wmi_new_sta(struct wil6210_vif *vif, const u8 *mac, u8 aid)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	int rc;
 	struct wmi_new_sta_cmd cmd = {
 		.aid = aid,
@@ -2170,7 +2226,7 @@ int wmi_new_sta(struct wil6210_priv *wil, const u8 *mac, u8 aid)
 
 	ether_addr_copy(cmd.dst_mac, mac);
 
-	rc = wmi_send(wil, WMI_NEW_STA_CMDID, &cmd, sizeof(cmd));
+	rc = wmi_send(wil, WMI_NEW_STA_CMDID, vif->mid, &cmd, sizeof(cmd));
 	if (rc)
 		wil_err(wil, "Failed to send new sta (%d)\n", rc);
 
@@ -2206,6 +2262,7 @@ static const char *suspend_status2name(u8 status)
 
 int wmi_suspend(struct wil6210_priv *wil)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct wmi_traffic_suspend_cmd cmd = {
 		.wakeup_trigger = wil->wakeup_trigger,
@@ -2221,7 +2278,8 @@ int wmi_suspend(struct wil6210_priv *wil)
 
 	reply.evt.status = WMI_TRAFFIC_SUSPEND_REJECTED_LINK_NOT_IDLE;
 
-	rc = wmi_call(wil, WMI_TRAFFIC_SUSPEND_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_TRAFFIC_SUSPEND_CMDID, vif->mid,
+		      &cmd, sizeof(cmd),
 		      WMI_TRAFFIC_SUSPEND_EVENTID, &reply, sizeof(reply),
 		      suspend_to);
 	if (rc) {
@@ -2289,6 +2347,7 @@ static void resume_triggers2string(u32 triggers, char *string, int str_size)
 
 int wmi_resume(struct wil6210_priv *wil)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	char string[100];
 	struct {
@@ -2299,7 +2358,7 @@ int wmi_resume(struct wil6210_priv *wil)
 	reply.evt.status = WMI_TRAFFIC_RESUME_FAILED;
 	reply.evt.resume_triggers = WMI_RESUME_TRIGGER_UNKNOWN;
 
-	rc = wmi_call(wil, WMI_TRAFFIC_RESUME_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_TRAFFIC_RESUME_CMDID, vif->mid, NULL, 0,
 		      WMI_TRAFFIC_RESUME_EVENTID, &reply, sizeof(reply),
 		      WIL_WAIT_FOR_SUSPEND_RESUME_COMP);
 	if (rc)
@@ -2313,14 +2372,14 @@ int wmi_resume(struct wil6210_priv *wil)
 	return reply.evt.status;
 }
 
-static bool wmi_evt_call_handler(struct wil6210_priv *wil, int id,
+static bool wmi_evt_call_handler(struct wil6210_vif *vif, int id,
 				 void *d, int len)
 {
 	uint i;
 
 	for (i = 0; i < ARRAY_SIZE(wmi_evt_handlers); i++) {
 		if (wmi_evt_handlers[i].eventid == id) {
-			wmi_evt_handlers[i].handler(wil, id, d, len);
+			wmi_evt_handlers[i].handler(vif, id, d, len);
 			return true;
 		}
 	}
@@ -2332,19 +2391,25 @@ static void wmi_event_handle(struct wil6210_priv *wil,
 			     struct wil6210_mbox_hdr *hdr)
 {
 	u16 len = le16_to_cpu(hdr->len);
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 
 	if ((hdr->type == WIL_MBOX_HDR_TYPE_WMI) &&
 	    (len >= sizeof(struct wmi_cmd_hdr))) {
 		struct wmi_cmd_hdr *wmi = (void *)(&hdr[1]);
 		void *evt_data = (void *)(&wmi[1]);
 		u16 id = le16_to_cpu(wmi->command_id);
+		u8 mid = wmi->mid;
+
+		wil_dbg_wmi(wil, "Handle %s (0x%04x) (reply_id 0x%04x,%d)\n",
+			    eventid2name(id), id, wil->reply_id,
+			    wil->reply_mid);
 
-		wil_dbg_wmi(wil, "Handle %s (0x%04x) (reply_id 0x%04x)\n",
-			    eventid2name(id), id, wil->reply_id);
 		/* check if someone waits for this event */
-		if (wil->reply_id && wil->reply_id == id) {
+		if (wil->reply_id && wil->reply_id == id &&
+		    wil->reply_mid == mid) {
 			WARN_ON(wil->reply_buf);
-			wmi_evt_call_handler(wil, id, evt_data,
+
+			wmi_evt_call_handler(vif, id, evt_data,
 					     len - sizeof(*wmi));
 			wil_dbg_wmi(wil, "event_handle: Complete WMI 0x%04x\n",
 				    id);
@@ -2353,7 +2418,7 @@ static void wmi_event_handle(struct wil6210_priv *wil,
 		}
 		/* unsolicited event */
 		/* search for handler */
-		if (!wmi_evt_call_handler(wil, id, evt_data,
+		if (!wmi_evt_call_handler(vif, id, evt_data,
 					  len - sizeof(*wmi))) {
 			wil_info(wil, "Unhandled event 0x%04x\n", id);
 		}
@@ -2523,6 +2588,7 @@ wmi_sched_scan_set_plans(struct wil6210_priv *wil,
 int wmi_start_sched_scan(struct wil6210_priv *wil,
 			 struct cfg80211_sched_scan_request *request)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct wmi_start_sched_scan_cmd cmd = {
 		.min_rssi_threshold = S8_MIN,
@@ -2549,7 +2615,8 @@ int wmi_start_sched_scan(struct wil6210_priv *wil,
 
 	reply.evt.result = WMI_PNO_REJECT;
 
-	rc = wmi_call(wil, WMI_START_SCHED_SCAN_CMDID, &cmd, sizeof(cmd),
+	rc = wmi_call(wil, WMI_START_SCHED_SCAN_CMDID, vif->mid,
+		      &cmd, sizeof(cmd),
 		      WMI_START_SCHED_SCAN_EVENTID, &reply, sizeof(reply),
 		      WIL_WMI_CALL_GENERAL_TO_MS);
 	if (rc)
@@ -2566,6 +2633,7 @@ int wmi_start_sched_scan(struct wil6210_priv *wil,
 
 int wmi_stop_sched_scan(struct wil6210_priv *wil)
 {
+	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int rc;
 	struct {
 		struct wmi_cmd_hdr wmi;
@@ -2577,7 +2645,7 @@ int wmi_stop_sched_scan(struct wil6210_priv *wil)
 
 	reply.evt.result = WMI_PNO_REJECT;
 
-	rc = wmi_call(wil, WMI_STOP_SCHED_SCAN_CMDID, NULL, 0,
+	rc = wmi_call(wil, WMI_STOP_SCHED_SCAN_CMDID, vif->mid, NULL, 0,
 		      WMI_STOP_SCHED_SCAN_EVENTID, &reply, sizeof(reply),
 		      WIL_WMI_CALL_GENERAL_TO_MS);
 	if (rc)

From 4aebd3bdbd8a26ebcd2398289e2379472d17825f Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:14 +0200
Subject: [PATCH 0441/1678] wil6210: add support for adding and removing
 virtual interfaces

Add generic support in cfg80211 operations add_virtual_intf
and del_virtual_intf for adding/removing VIFs of any
interface type, and fix change_virtual_intf to allow changing
the interface type of a VIF. Previously these operations
only worked for the P2P_DEVICE interface which is not a real
VIF(it is management-only and shares radio with the main
interface).
Currently the interface combination is validated, the VIF is
added/removed in the firmware and the appropriate net/wireless
device is also added/removed.
Added minimal support for proper interface up/down and module
unload but most operations still work only on the main interface.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 209 +++++++++++++++++---
 drivers/net/wireless/ath/wil6210/main.c     |  34 ++++
 drivers/net/wireless/ath/wil6210/netdev.c   | 163 +++++++++++++--
 drivers/net/wireless/ath/wil6210/pcie_bus.c |  39 +++-
 drivers/net/wireless/ath/wil6210/wil6210.h  |  21 +-
 drivers/net/wireless/ath/wil6210/wmi.c      | 111 ++++++++++-
 6 files changed, 518 insertions(+), 59 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 2fd4af8d94dc..ce20ee47a258 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -18,6 +18,7 @@
 #include <linux/etherdevice.h>
 #include <linux/moduleparam.h>
 #include <net/netlink.h>
+#include <net/cfg80211.h>
 #include "wil6210.h"
 #include "wmi.h"
 #include "fw.h"
@@ -418,6 +419,53 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
 	mutex_unlock(&wil->mutex);
 }
 
+static int wil_cfg80211_validate_add_iface(struct wil6210_priv *wil,
+					   enum nl80211_iftype new_type)
+{
+	int i;
+	struct wireless_dev *wdev;
+	struct iface_combination_params params = {
+		.num_different_channels = 1,
+	};
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		if (wil->vifs[i]) {
+			wdev = vif_to_wdev(wil->vifs[i]);
+			params.iftype_num[wdev->iftype]++;
+		}
+	}
+	params.iftype_num[new_type]++;
+	return cfg80211_check_combinations(wil->wiphy, &params);
+}
+
+static int wil_cfg80211_validate_change_iface(struct wil6210_priv *wil,
+					      struct wil6210_vif *vif,
+					      enum nl80211_iftype new_type)
+{
+	int i, ret = 0;
+	struct wireless_dev *wdev;
+	struct iface_combination_params params = {
+		.num_different_channels = 1,
+	};
+	bool check_combos = false;
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		struct wil6210_vif *vif_pos = wil->vifs[i];
+
+		if (vif_pos && vif != vif_pos) {
+			wdev = vif_to_wdev(vif_pos);
+			params.iftype_num[wdev->iftype]++;
+			check_combos = true;
+		}
+	}
+
+	if (check_combos) {
+		params.iftype_num[new_type]++;
+		ret = cfg80211_check_combinations(wil->wiphy, &params);
+	}
+	return ret;
+}
+
 static struct wireless_dev *
 wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 		       unsigned char name_assign_type,
@@ -425,53 +473,136 @@ wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 		       struct vif_params *params)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	struct net_device *ndev = wil->main_ndev;
+	struct net_device *ndev_main = wil->main_ndev, *ndev;
 	struct wil6210_vif *vif;
-	struct wireless_dev *p2p_wdev;
+	struct wireless_dev *p2p_wdev, *wdev;
+	int rc;
 
-	wil_dbg_misc(wil, "add_iface\n");
+	wil_dbg_misc(wil, "add_iface, type %d\n", type);
 
-	if (type != NL80211_IFTYPE_P2P_DEVICE) {
-		wil_err(wil, "unsupported iftype %d\n", type);
+	/* P2P device is not a real virtual interface, it is a management-only
+	 * interface that shares the main interface.
+	 * Skip concurrency checks here.
+	 */
+	if (type == NL80211_IFTYPE_P2P_DEVICE) {
+		if (wil->p2p_wdev) {
+			wil_err(wil, "P2P_DEVICE interface already created\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		vif = kzalloc(sizeof(*vif), GFP_KERNEL);
+		if (!vif)
+			return ERR_PTR(-ENOMEM);
+
+		p2p_wdev = vif_to_wdev(vif);
+		p2p_wdev->iftype = type;
+		p2p_wdev->wiphy = wiphy;
+		/* use our primary ethernet address */
+		ether_addr_copy(p2p_wdev->address, ndev_main->perm_addr);
+
+		wil->p2p_wdev = p2p_wdev;
+
+		return p2p_wdev;
+	}
+
+	if (!wil->wiphy->n_iface_combinations) {
+		wil_err(wil, "virtual interfaces not supported\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (wil->p2p_wdev) {
-		wil_err(wil, "P2P_DEVICE interface already created\n");
-		return ERR_PTR(-EINVAL);
+	rc = wil_cfg80211_validate_add_iface(wil, type);
+	if (rc) {
+		wil_err(wil, "iface validation failed, err=%d\n", rc);
+		return ERR_PTR(rc);
 	}
 
-	vif = kzalloc(sizeof(*vif), GFP_KERNEL);
-	if (!vif)
-		return ERR_PTR(-ENOMEM);
+	vif = wil_vif_alloc(wil, name, name_assign_type, type);
+	if (IS_ERR(vif))
+		return ERR_CAST(vif);
 
-	p2p_wdev = &vif->wdev;
-	p2p_wdev->iftype = type;
-	p2p_wdev->wiphy = wiphy;
-	/* use our primary ethernet address */
-	ether_addr_copy(p2p_wdev->address, ndev->perm_addr);
+	ndev = vif_to_ndev(vif);
+	ether_addr_copy(ndev->perm_addr, ndev_main->perm_addr);
+	if (is_valid_ether_addr(params->macaddr)) {
+		ether_addr_copy(ndev->dev_addr, params->macaddr);
+	} else {
+		ether_addr_copy(ndev->dev_addr, ndev_main->perm_addr);
+		ndev->dev_addr[0] = (ndev->dev_addr[0] ^ (1 << vif->mid)) |
+			0x2; /* locally administered */
+	}
+	wdev = vif_to_wdev(vif);
+	ether_addr_copy(wdev->address, ndev->dev_addr);
 
-	wil->p2p_wdev = p2p_wdev;
+	rc = wil_vif_add(wil, vif);
+	if (rc)
+		goto out;
 
-	return p2p_wdev;
+	wil_info(wil, "added VIF, mid %d iftype %d MAC %pM\n",
+		 vif->mid, type, wdev->address);
+	return wdev;
+out:
+	wil_vif_free(vif);
+	return ERR_PTR(rc);
+}
+
+int wil_vif_prepare_stop(struct wil6210_priv *wil, struct wil6210_vif *vif)
+{
+	struct wireless_dev *wdev = vif_to_wdev(vif);
+	struct net_device *ndev;
+	int rc;
+
+	if (wdev->iftype != NL80211_IFTYPE_AP)
+		return 0;
+
+	ndev = vif_to_ndev(vif);
+	if (netif_carrier_ok(ndev)) {
+		rc = wmi_pcp_stop(vif);
+		if (rc) {
+			wil_info(wil, "failed to stop AP, status %d\n",
+				 rc);
+			/* continue */
+		}
+		netif_carrier_off(ndev);
+	}
+
+	return 0;
 }
 
 static int wil_cfg80211_del_iface(struct wiphy *wiphy,
 				  struct wireless_dev *wdev)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil6210_vif *vif = wdev_to_vif(wil, wdev);
+	int rc;
 
 	wil_dbg_misc(wil, "del_iface\n");
 
-	if (wdev != wil->p2p_wdev) {
-		wil_err(wil, "delete of incorrect interface 0x%p\n", wdev);
+	if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE) {
+		if (wdev != wil->p2p_wdev) {
+			wil_err(wil, "delete of incorrect interface 0x%p\n",
+				wdev);
+			return -EINVAL;
+		}
+
+		wil_cfg80211_stop_p2p_device(wiphy, wdev);
+		wil_p2p_wdev_free(wil);
+		return 0;
+	}
+
+	if (vif->mid == 0) {
+		wil_err(wil, "cannot remove the main interface\n");
 		return -EINVAL;
 	}
 
-	wil_cfg80211_stop_p2p_device(wiphy, wdev);
-	wil_p2p_wdev_free(wil);
+	rc = wil_vif_prepare_stop(wil, vif);
+	if (rc)
+		goto out;
 
-	return 0;
+	wil_info(wil, "deleted VIF, mid %d iftype %d MAC %pM\n",
+		 vif->mid, wdev->iftype, wdev->address);
+
+	wil_vif_remove(wil, vif->mid);
+out:
+	return rc;
 }
 
 static int wil_cfg80211_change_iface(struct wiphy *wiphy,
@@ -486,7 +617,19 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 
 	wil_dbg_misc(wil, "change_iface: type=%d\n", type);
 
-	if (netif_running(ndev) && !wil_is_recovery_blocked(wil)) {
+	if (wiphy->n_iface_combinations) {
+		rc = wil_cfg80211_validate_change_iface(wil, vif, type);
+		if (rc) {
+			wil_err(wil, "iface validation failed, err=%d\n", rc);
+			return rc;
+		}
+	}
+
+	/* do not reset FW when there are active VIFs,
+	 * because it can cause significant disruption
+	 */
+	if (!wil_has_other_up_ifaces(wil, ndev) &&
+	    netif_running(ndev) && !wil_is_recovery_blocked(wil)) {
 		wil_dbg_misc(wil, "interface is up. resetting...\n");
 		mutex_lock(&wil->mutex);
 		__wil_down(wil);
@@ -511,8 +654,17 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 		return -EOPNOTSUPP;
 	}
 
-	wdev->iftype = type;
+	if (vif->mid != 0 && wil_has_up_ifaces(wil)) {
+		wil_vif_prepare_stop(wil, vif);
+		rc = wmi_port_delete(wil, vif->mid);
+		if (rc)
+			return rc;
+		rc = wmi_port_allocate(wil, vif->mid, ndev->dev_addr, type);
+		if (rc)
+			return rc;
+	}
 
+	wdev->iftype = type;
 	return 0;
 }
 
@@ -2007,6 +2159,13 @@ int wil_cfg80211_iface_combinations_from_fw(
 		combo = (struct wil_fw_concurrency_combo *)limit;
 	}
 
+	wil_dbg_misc(wil, "multiple VIFs supported, n_mids %d\n", conc->n_mids);
+	wil->max_vifs = conc->n_mids + 1; /* including main interface */
+	if (wil->max_vifs > WIL_MAX_VIFS) {
+		wil_info(wil, "limited number of VIFs supported(%d, FW %d)\n",
+			 WIL_MAX_VIFS, wil->max_vifs);
+		wil->max_vifs = WIL_MAX_VIFS;
+	}
 	wiphy->n_iface_combinations = n_combos;
 	wiphy->iface_combinations = iface_combinations;
 	return 0;
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index b151f1c7ce85..2926bd1ae713 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -565,6 +565,7 @@ int wil_priv_init(struct wil6210_priv *wil)
 	wil->vring_idle_trsh = 16;
 
 	wil->reply_mid = U8_MAX;
+	wil->max_vifs = 1;
 
 	return 0;
 
@@ -1115,6 +1116,33 @@ static void wil_pre_fw_config(struct wil6210_priv *wil)
 	}
 }
 
+static int wil_restore_vifs(struct wil6210_priv *wil)
+{
+	struct wil6210_vif *vif;
+	struct net_device *ndev;
+	struct wireless_dev *wdev;
+	int i, rc;
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		vif = wil->vifs[i];
+		if (!vif)
+			continue;
+		if (vif->mid) {
+			ndev = vif_to_ndev(vif);
+			wdev = vif_to_wdev(vif);
+			rc = wmi_port_allocate(wil, vif->mid, ndev->dev_addr,
+					       wdev->iftype);
+			if (rc) {
+				wil_err(wil, "fail to restore VIF %d type %d, rc %d\n",
+					i, wdev->iftype, rc);
+				return rc;
+			}
+		}
+	}
+
+	return 0;
+}
+
 /*
  * We reset all the structures, and we reset the UMAC.
  * After calling this routine, you're expected to reload
@@ -1277,6 +1305,12 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 			return rc;
 		}
 
+		rc = wil_restore_vifs(wil);
+		if (rc) {
+			wil_err(wil, "failed to restore vifs, rc %d\n", rc);
+			return rc;
+		}
+
 		wil_collect_fw_info(wil);
 
 		if (wil->ps_profile != WMI_PS_PROFILE_TYPE_DEFAULT)
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 95570b8f1f6d..e23a80c235cc 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -16,13 +16,38 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
 #include "wil6210.h"
 #include "txrx.h"
 
+bool wil_has_other_up_ifaces(struct wil6210_priv *wil,
+			     struct net_device *ndev)
+{
+	int i;
+	struct wil6210_vif *vif;
+	struct net_device *ndev_i;
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		vif = wil->vifs[i];
+		if (vif) {
+			ndev_i = vif_to_ndev(vif);
+			if (ndev_i != ndev && ndev_i->flags & IFF_UP)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+bool wil_has_up_ifaces(struct wil6210_priv *wil)
+{
+	return wil_has_other_up_ifaces(wil, NULL);
+}
+
 static int wil_open(struct net_device *ndev)
 {
 	struct wil6210_priv *wil = ndev_to_wil(ndev);
-	int rc;
+	int rc = 0;
 
 	wil_dbg_misc(wil, "open\n");
 
@@ -32,13 +57,16 @@ static int wil_open(struct net_device *ndev)
 		return -EINVAL;
 	}
 
-	rc = wil_pm_runtime_get(wil);
-	if (rc < 0)
-		return rc;
+	if (!wil_has_other_up_ifaces(wil, ndev)) {
+		wil_dbg_misc(wil, "open, first iface\n");
+		rc = wil_pm_runtime_get(wil);
+		if (rc < 0)
+			return rc;
 
-	rc = wil_up(wil);
-	if (rc)
-		wil_pm_runtime_put(wil);
+		rc = wil_up(wil);
+		if (rc)
+			wil_pm_runtime_put(wil);
+	}
 
 	return rc;
 }
@@ -46,13 +74,16 @@ static int wil_open(struct net_device *ndev)
 static int wil_stop(struct net_device *ndev)
 {
 	struct wil6210_priv *wil = ndev_to_wil(ndev);
-	int rc;
+	int rc = 0;
 
 	wil_dbg_misc(wil, "stop\n");
 
-	rc = wil_down(wil);
-	if (!rc)
-		wil_pm_runtime_put(wil);
+	if (!wil_has_other_up_ifaces(wil, ndev)) {
+		wil_dbg_misc(wil, "stop, last iface\n");
+		rc = wil_down(wil);
+		if (!rc)
+			wil_pm_runtime_put(wil);
+	}
 
 	return rc;
 }
@@ -201,14 +232,32 @@ static void wil_vif_init(struct wil6210_vif *vif)
 	INIT_LIST_HEAD(&vif->probe_client_pending);
 }
 
+static u8 wil_vif_find_free_mid(struct wil6210_priv *wil)
+{
+	u8 i;
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		if (!wil->vifs[i])
+			return i;
+	}
+
+	return U8_MAX;
+}
+
 struct wil6210_vif *
 wil_vif_alloc(struct wil6210_priv *wil, const char *name,
-	      unsigned char name_assign_type, enum nl80211_iftype iftype,
-	      u8 mid)
+	      unsigned char name_assign_type, enum nl80211_iftype iftype)
 {
 	struct net_device *ndev;
 	struct wireless_dev *wdev;
 	struct wil6210_vif *vif;
+	u8 mid;
+
+	mid = wil_vif_find_free_mid(wil);
+	if (mid == U8_MAX) {
+		wil_err(wil, "no available virtual interface\n");
+		return ERR_PTR(-EINVAL);
+	}
 
 	ndev = alloc_netdev(sizeof(*vif), name, name_assign_type,
 			    wil_dev_setup);
@@ -216,8 +265,13 @@ wil_vif_alloc(struct wil6210_priv *wil, const char *name,
 		dev_err(wil_to_dev(wil), "alloc_netdev failed\n");
 		return ERR_PTR(-ENOMEM);
 	}
-	if (mid == 0)
+	if (mid == 0) {
 		wil->main_ndev = ndev;
+	} else {
+		ndev->priv_destructor = wil_ndev_destructor;
+		ndev->needs_free_netdev = true;
+	}
+
 	vif = ndev_to_vif(ndev);
 	vif->ndev = ndev;
 	vif->wil = wil;
@@ -263,7 +317,7 @@ void *wil_if_alloc(struct device *dev)
 	wil_dbg_misc(wil, "if_alloc\n");
 
 	vif = wil_vif_alloc(wil, "wlan%d", NET_NAME_UNKNOWN,
-			    NL80211_IFTYPE_STATION, 0);
+			    NL80211_IFTYPE_STATION);
 	if (IS_ERR(vif)) {
 		dev_err(dev, "wil_vif_alloc failed\n");
 		rc = -ENOMEM;
@@ -301,10 +355,43 @@ void wil_if_free(struct wil6210_priv *wil)
 	wil_cfg80211_deinit(wil);
 }
 
+int wil_vif_add(struct wil6210_priv *wil, struct wil6210_vif *vif)
+{
+	struct net_device *ndev = vif_to_ndev(vif);
+	struct wireless_dev *wdev = vif_to_wdev(vif);
+	bool any_active = wil_has_up_ifaces(wil);
+	int rc;
+
+	ASSERT_RTNL();
+
+	if (wil->vifs[vif->mid]) {
+		dev_err(&ndev->dev, "VIF with mid %d already in use\n",
+			vif->mid);
+		return -EEXIST;
+	}
+	if (any_active && vif->mid != 0) {
+		rc = wmi_port_allocate(wil, vif->mid, ndev->dev_addr,
+				       wdev->iftype);
+		if (rc)
+			return rc;
+	}
+	rc = register_netdevice(ndev);
+	if (rc < 0) {
+		dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc);
+		if (any_active && vif->mid != 0)
+			wmi_port_delete(wil, vif->mid);
+		return rc;
+	}
+
+	wil->vifs[vif->mid] = vif;
+	return 0;
+}
+
 int wil_if_add(struct wil6210_priv *wil)
 {
 	struct wiphy *wiphy = wil->wiphy;
 	struct net_device *ndev = wil->main_ndev;
+	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	int rc;
 
 	wil_dbg_misc(wil, "entered");
@@ -324,11 +411,11 @@ int wil_if_add(struct wil6210_priv *wil)
 
 	wil_update_net_queues_bh(wil, NULL, true);
 
-	rc = register_netdev(ndev);
-	if (rc < 0) {
-		dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc);
+	rtnl_lock();
+	rc = wil_vif_add(wil, vif);
+	rtnl_unlock();
+	if (rc < 0)
 		goto out_wiphy;
-	}
 
 	return 0;
 
@@ -337,6 +424,40 @@ out_wiphy:
 	return rc;
 }
 
+void wil_vif_remove(struct wil6210_priv *wil, u8 mid)
+{
+	struct wil6210_vif *vif;
+	struct net_device *ndev;
+	bool any_active = wil_has_up_ifaces(wil);
+
+	ASSERT_RTNL();
+	if (mid >= wil->max_vifs) {
+		wil_err(wil, "invalid MID: %d\n", mid);
+		return;
+	}
+
+	vif = wil->vifs[mid];
+	if (!vif) {
+		wil_err(wil, "MID %d not registered\n", mid);
+		return;
+	}
+
+	ndev = vif_to_ndev(vif);
+	/* during unregister_netdevice cfg80211_leave may perform operations
+	 * such as stop AP, disconnect, so we only clear the VIF afterwards
+	 */
+	unregister_netdevice(ndev);
+
+	if (any_active && vif->mid != 0)
+		wmi_port_delete(wil, vif->mid);
+
+	wil->vifs[mid] = NULL;
+	/* for VIFs, ndev will be freed by destructor after RTNL is unlocked.
+	 * the main interface will be freed in wil_if_free, we need to keep it
+	 * a bit longer so logging macros will work.
+	 */
+}
+
 void wil_if_remove(struct wil6210_priv *wil)
 {
 	struct net_device *ndev = wil->main_ndev;
@@ -344,6 +465,8 @@ void wil_if_remove(struct wil6210_priv *wil)
 
 	wil_dbg_misc(wil, "if_remove\n");
 
-	unregister_netdev(ndev);
+	rtnl_lock();
+	wil_vif_remove(wil, 0);
+	rtnl_unlock();
 	wiphy_unregister(wdev->wiphy);
 }
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index a61ffd9dce0f..1fd76148d6ff 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -137,6 +137,20 @@ void wil_enable_irq(struct wil6210_priv *wil)
 	enable_irq(wil->pdev->irq);
 }
 
+static void wil_remove_all_additional_vifs(struct wil6210_priv *wil)
+{
+	struct wil6210_vif *vif;
+	int i;
+
+	for (i = 1; i < wil->max_vifs; i++) {
+		vif = wil->vifs[i];
+		if (vif) {
+			wil_vif_prepare_stop(wil, vif);
+			wil_vif_remove(wil, vif->mid);
+		}
+	}
+}
+
 /* Bus ops */
 static int wil_if_pcie_enable(struct wil6210_priv *wil)
 {
@@ -148,10 +162,8 @@ static int wil_if_pcie_enable(struct wil6210_priv *wil)
 	 */
 	int msi_only = pdev->msi_enabled;
 	bool _use_msi = use_msi;
-	bool wmi_only = test_bit(WMI_FW_CAPABILITY_WMI_ONLY,
-				 wil->fw_capabilities);
 
-	wil_dbg_misc(wil, "if_pcie_enable, wmi_only %d\n", wmi_only);
+	wil_dbg_misc(wil, "if_pcie_enable\n");
 
 	pci_set_master(pdev);
 
@@ -172,11 +184,9 @@ static int wil_if_pcie_enable(struct wil6210_priv *wil)
 	if (rc)
 		goto stop_master;
 
-	/* need reset here to obtain MAC or in case of WMI-only FW, full reset
-	 * and fw loading takes place
-	 */
+	/* need reset here to obtain MAC */
 	mutex_lock(&wil->mutex);
-	rc = wil_reset(wil, wmi_only);
+	rc = wil_reset(wil, false);
 	mutex_unlock(&wil->mutex);
 	if (rc)
 		goto release_irq;
@@ -356,6 +366,18 @@ static int wil_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bus_disable;
 	}
 
+	/* in case of WMI-only FW, perform full reset and FW loading */
+	if (test_bit(WMI_FW_CAPABILITY_WMI_ONLY, wil->fw_capabilities)) {
+		wil_dbg_misc(wil, "Loading WMI only FW\n");
+		mutex_lock(&wil->mutex);
+		rc = wil_reset(wil, true);
+		mutex_unlock(&wil->mutex);
+		if (rc) {
+			wil_err(wil, "failed to load WMI only FW\n");
+			goto if_remove;
+		}
+	}
+
 	if (IS_ENABLED(CONFIG_PM))
 		wil->pm_notify.notifier_call = wil6210_pm_notify;
 
@@ -372,6 +394,8 @@ static int wil_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
+if_remove:
+	wil_if_remove(wil);
 bus_disable:
 	wil_if_pcie_disable(wil);
 err_iounmap:
@@ -402,6 +426,7 @@ static void wil_pcie_remove(struct pci_dev *pdev)
 	wil6210_debugfs_remove(wil);
 	rtnl_lock();
 	wil_p2p_wdev_free(wil);
+	wil_remove_all_additional_vifs(wil);
 	rtnl_unlock();
 	wil_if_remove(wil);
 	wil_if_pcie_disable(wil);
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index fa8df41b045f..8ab4b5af20fb 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -50,6 +50,11 @@ extern bool disable_ap_sme;
 #define WIL_DEFAULT_BUS_REQUEST_KBPS 128000 /* ~1Gbps */
 #define WIL_MAX_BUS_REQUEST_KBPS 800000 /* ~6.1Gbps */
 
+/* maximum number of virtual interfaces the driver supports
+ * (including the main interface)
+ */
+#define WIL_MAX_VIFS 4
+
 /**
  * extract bits [@b0:@b1] (inclusive) from the value @x
  * it should be @b0 <= @b1, or result is incorrect
@@ -714,11 +719,12 @@ struct wil6210_priv {
 	DECLARE_BITMAP(hw_capa, hw_capa_last);
 	DECLARE_BITMAP(fw_capabilities, WMI_FW_CAPABILITY_MAX);
 	DECLARE_BITMAP(platform_capa, WIL_PLATFORM_CAPA_MAX);
-	u8 n_mids; /* number of additional MIDs as reported by FW */
 	u32 recovery_count; /* num of FW recovery attempts in a short time */
 	u32 recovery_state; /* FW recovery state machine */
 	unsigned long last_fw_recovery; /* jiffies of last fw recovery */
 	wait_queue_head_t wq; /* for all wait_event() use */
+	u8 max_vifs; /* maximum number of interfaces, including main */
+	struct wil6210_vif *vifs[WIL_MAX_VIFS];
 	/* profile */
 	struct cfg80211_chan_def monitor_chandef;
 	u32 monitor_flags;
@@ -935,11 +941,16 @@ void wil_memcpy_toio_32(volatile void __iomem *dst, const void *src,
 
 struct wil6210_vif *
 wil_vif_alloc(struct wil6210_priv *wil, const char *name,
-	      unsigned char name_assign_type, enum nl80211_iftype iftype,
-	      u8 mid);
+	      unsigned char name_assign_type, enum nl80211_iftype iftype);
+void wil_vif_free(struct wil6210_vif *vif);
 void *wil_if_alloc(struct device *dev);
+bool wil_has_other_up_ifaces(struct wil6210_priv *wil,
+			     struct net_device *ndev);
+bool wil_has_up_ifaces(struct wil6210_priv *wil);
 void wil_if_free(struct wil6210_priv *wil);
+int wil_vif_add(struct wil6210_priv *wil, struct wil6210_vif *vif);
 int wil_if_add(struct wil6210_priv *wil);
+void wil_vif_remove(struct wil6210_priv *wil, u8 mid);
 void wil_if_remove(struct wil6210_priv *wil);
 int wil_priv_init(struct wil6210_priv *wil);
 void wil_priv_deinit(struct wil6210_priv *wil);
@@ -998,6 +1009,9 @@ int wmi_ps_dev_profile_cfg(struct wil6210_priv *wil,
 int wmi_set_mgmt_retry(struct wil6210_priv *wil, u8 retry_short);
 int wmi_get_mgmt_retry(struct wil6210_priv *wil, u8 *retry_short);
 int wmi_new_sta(struct wil6210_vif *vif, const u8 *mac, u8 aid);
+int wmi_port_allocate(struct wil6210_priv *wil, u8 mid,
+		      const u8 *mac, enum nl80211_iftype iftype);
+int wmi_port_delete(struct wil6210_priv *wil, u8 mid);
 int wil_addba_rx_request(struct wil6210_priv *wil, u8 mid,
 			 u8 cidxtid, u8 dialog_token, __le16 ba_param_set,
 			 __le16 ba_timeout, __le16 ba_seq_ctrl);
@@ -1039,6 +1053,7 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 int wil_cfg80211_iface_combinations_from_fw(
 	struct wil6210_priv *wil,
 	const struct wil_fw_record_concurrency *conc);
+int wil_vif_prepare_stop(struct wil6210_priv *wil, struct wil6210_vif *vif);
 
 #if defined(CONFIG_WIL6210_DEBUGFS)
 int wil6210_debugfs_init(struct wil6210_priv *wil);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 044d69850353..23c28bf07f28 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -656,11 +656,14 @@ static void wmi_evt_ready(struct wil6210_vif *vif, int id, void *d, int len)
 	struct wiphy *wiphy = wil_to_wiphy(wil);
 	struct wmi_ready_event *evt = d;
 
-	wil->n_mids = evt->numof_additional_mids;
-
 	wil_info(wil, "FW ver. %s(SW %d); MAC %pM; %d MID's\n",
 		 wil->fw_version, le32_to_cpu(evt->sw_version),
-		 evt->mac, wil->n_mids);
+		 evt->mac, evt->numof_additional_mids);
+	if (evt->numof_additional_mids + 1 < wil->max_vifs) {
+		wil_err(wil, "FW does not support enough MIDs (need %d)",
+			wil->max_vifs - 1);
+		return; /* FW load will fail after timeout */
+	}
 	/* ignore MAC address, we already have it from the boot loader */
 	strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version));
 
@@ -2372,6 +2375,92 @@ int wmi_resume(struct wil6210_priv *wil)
 	return reply.evt.status;
 }
 
+int wmi_port_allocate(struct wil6210_priv *wil, u8 mid,
+		      const u8 *mac, enum nl80211_iftype iftype)
+{
+	int rc;
+	struct wmi_port_allocate_cmd cmd = {
+		.mid = mid,
+	};
+	struct {
+		struct wmi_cmd_hdr wmi;
+		struct wmi_port_allocated_event evt;
+	} __packed reply;
+
+	wil_dbg_misc(wil, "port allocate, mid %d iftype %d, mac %pM\n",
+		     mid, iftype, mac);
+
+	ether_addr_copy(cmd.mac, mac);
+	switch (iftype) {
+	case NL80211_IFTYPE_STATION:
+		cmd.port_role = WMI_PORT_STA;
+		break;
+	case NL80211_IFTYPE_AP:
+		cmd.port_role = WMI_PORT_AP;
+		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+		cmd.port_role = WMI_PORT_P2P_CLIENT;
+		break;
+	case NL80211_IFTYPE_P2P_GO:
+		cmd.port_role = WMI_PORT_P2P_GO;
+		break;
+	/* what about monitor??? */
+	default:
+		wil_err(wil, "unsupported iftype: %d\n", iftype);
+		return -EINVAL;
+	}
+
+	reply.evt.status = WMI_FW_STATUS_FAILURE;
+
+	rc = wmi_call(wil, WMI_PORT_ALLOCATE_CMDID, mid,
+		      &cmd, sizeof(cmd),
+		      WMI_PORT_ALLOCATED_EVENTID, &reply,
+		      sizeof(reply), 300);
+	if (rc) {
+		wil_err(wil, "failed to allocate port, status %d\n", rc);
+		return rc;
+	}
+	if (reply.evt.status != WMI_FW_STATUS_SUCCESS) {
+		wil_err(wil, "WMI_PORT_ALLOCATE returned status %d\n",
+			reply.evt.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int wmi_port_delete(struct wil6210_priv *wil, u8 mid)
+{
+	int rc;
+	struct wmi_port_delete_cmd cmd = {
+		.mid = mid,
+	};
+	struct {
+		struct wmi_cmd_hdr wmi;
+		struct wmi_port_deleted_event evt;
+	} __packed reply;
+
+	wil_dbg_misc(wil, "port delete, mid %d\n", mid);
+
+	reply.evt.status = WMI_FW_STATUS_FAILURE;
+
+	rc = wmi_call(wil, WMI_PORT_DELETE_CMDID, mid,
+		      &cmd, sizeof(cmd),
+		      WMI_PORT_DELETED_EVENTID, &reply,
+		      sizeof(reply), 2000);
+	if (rc) {
+		wil_err(wil, "failed to delete port, status %d\n", rc);
+		return rc;
+	}
+	if (reply.evt.status != WMI_FW_STATUS_SUCCESS) {
+		wil_err(wil, "WMI_PORT_DELETE returned status %d\n",
+			reply.evt.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static bool wmi_evt_call_handler(struct wil6210_vif *vif, int id,
 				 void *d, int len)
 {
@@ -2391,7 +2480,7 @@ static void wmi_event_handle(struct wil6210_priv *wil,
 			     struct wil6210_mbox_hdr *hdr)
 {
 	u16 len = le16_to_cpu(hdr->len);
-	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
+	struct wil6210_vif *vif;
 
 	if ((hdr->type == WIL_MBOX_HDR_TYPE_WMI) &&
 	    (len >= sizeof(struct wmi_cmd_hdr))) {
@@ -2404,6 +2493,20 @@ static void wmi_event_handle(struct wil6210_priv *wil,
 			    eventid2name(id), id, wil->reply_id,
 			    wil->reply_mid);
 
+		if (mid == MID_BROADCAST)
+			mid = 0;
+		if (mid >= wil->max_vifs) {
+			wil_dbg_wmi(wil, "invalid mid %d, event skipped\n",
+				    mid);
+			return;
+		}
+		vif = wil->vifs[mid];
+		if (!vif) {
+			wil_dbg_wmi(wil, "event for empty VIF(%d), skipped\n",
+				    mid);
+			return;
+		}
+
 		/* check if someone waits for this event */
 		if (wil->reply_id && wil->reply_id == id &&
 		    wil->reply_mid == mid) {

From 3ada9314b4ea06e656ebb8f5806ff97596a3d548 Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:15 +0200
Subject: [PATCH 0442/1678] wil6210: multiple VIFs support for start/stop AP

Add support for multiple VIFs in the cfg80211 operations start_ap,
stop_ap and change_beacon. This change allows starting multiple APs
using virtual interfaces.
The data path and most other operations are still working only
on the main interface.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 49 ++++++++++++++-------
 drivers/net/wireless/ath/wil6210/main.c     | 14 +++++-
 drivers/net/wireless/ath/wil6210/netdev.c   | 22 ++++-----
 drivers/net/wireless/ath/wil6210/pcie_bus.c |  2 +-
 drivers/net/wireless/ath/wil6210/wil6210.h  |  9 ++--
 drivers/net/wireless/ath/wil6210/wmi.c      |  8 ++++
 6 files changed, 71 insertions(+), 33 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index ce20ee47a258..a3ad3f42c693 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -544,8 +544,9 @@ out:
 	return ERR_PTR(rc);
 }
 
-int wil_vif_prepare_stop(struct wil6210_priv *wil, struct wil6210_vif *vif)
+int wil_vif_prepare_stop(struct wil6210_vif *vif)
 {
+	struct wil6210_priv *wil = vif_to_wil(vif);
 	struct wireless_dev *wdev = vif_to_wdev(vif);
 	struct net_device *ndev;
 	int rc;
@@ -561,6 +562,7 @@ int wil_vif_prepare_stop(struct wil6210_priv *wil, struct wil6210_vif *vif)
 				 rc);
 			/* continue */
 		}
+		wil_bcast_fini(vif);
 		netif_carrier_off(ndev);
 	}
 
@@ -593,7 +595,7 @@ static int wil_cfg80211_del_iface(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
-	rc = wil_vif_prepare_stop(wil, vif);
+	rc = wil_vif_prepare_stop(vif);
 	if (rc)
 		goto out;
 
@@ -614,6 +616,7 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 	struct wil6210_vif *vif = ndev_to_vif(ndev);
 	struct wireless_dev *wdev = vif_to_wdev(vif);
 	int rc;
+	bool fw_reset = false;
 
 	wil_dbg_misc(wil, "change_iface: type=%d\n", type);
 
@@ -628,7 +631,7 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 	/* do not reset FW when there are active VIFs,
 	 * because it can cause significant disruption
 	 */
-	if (!wil_has_other_up_ifaces(wil, ndev) &&
+	if (!wil_has_other_active_ifaces(wil, ndev, true, false) &&
 	    netif_running(ndev) && !wil_is_recovery_blocked(wil)) {
 		wil_dbg_misc(wil, "interface is up. resetting...\n");
 		mutex_lock(&wil->mutex);
@@ -638,6 +641,7 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 
 		if (rc)
 			return rc;
+		fw_reset = true;
 	}
 
 	switch (type) {
@@ -654,8 +658,9 @@ static int wil_cfg80211_change_iface(struct wiphy *wiphy,
 		return -EOPNOTSUPP;
 	}
 
-	if (vif->mid != 0 && wil_has_up_ifaces(wil)) {
-		wil_vif_prepare_stop(wil, vif);
+	if (vif->mid != 0 && wil_has_active_ifaces(wil, true, false)) {
+		if (!fw_reset)
+			wil_vif_prepare_stop(vif);
 		rc = wmi_port_delete(wil, vif->mid);
 		if (rc)
 			return rc;
@@ -1530,10 +1535,12 @@ static int _wil_cfg80211_start_ap(struct wiphy *wiphy,
 
 	mutex_lock(&wil->mutex);
 
-	__wil_down(wil);
-	rc = __wil_up(wil);
-	if (rc)
-		goto out;
+	if (!wil_has_other_active_ifaces(wil, ndev, true, false)) {
+		__wil_down(wil);
+		rc = __wil_up(wil);
+		if (rc)
+			goto out;
+	}
 
 	rc = wmi_set_ssid(vif, ssid_len, ssid);
 	if (rc)
@@ -1549,7 +1556,8 @@ static int _wil_cfg80211_start_ap(struct wiphy *wiphy,
 	vif->pbss = pbss;
 
 	netif_carrier_on(ndev);
-	wil6210_bus_request(wil, WIL_MAX_BUS_REQUEST_KBPS);
+	if (!wil_has_other_active_ifaces(wil, ndev, false, true))
+		wil6210_bus_request(wil, WIL_MAX_BUS_REQUEST_KBPS);
 
 	rc = wmi_pcp_start(vif, bi, wmi_nettype, chan, hidden_ssid, is_go);
 	if (rc)
@@ -1565,7 +1573,8 @@ err_bcast:
 	wmi_pcp_stop(vif);
 err_pcp_start:
 	netif_carrier_off(ndev);
-	wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
+	if (!wil_has_other_active_ifaces(wil, ndev, false, true))
+		wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
 out:
 	mutex_unlock(&wil->mutex);
 	return rc;
@@ -1670,20 +1679,26 @@ static int wil_cfg80211_stop_ap(struct wiphy *wiphy,
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	struct wil6210_vif *vif = ndev_to_vif(ndev);
+	bool last;
 
-	wil_dbg_misc(wil, "stop_ap\n");
+	wil_dbg_misc(wil, "stop_ap, mid=%d\n", vif->mid);
 
 	netif_carrier_off(ndev);
-	wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
-	wil_set_recovery_state(wil, fw_recovery_idle);
-
-	set_bit(wil_status_resetting, wil->status);
+	last = !wil_has_other_active_ifaces(wil, ndev, false, true);
+	if (last) {
+		wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
+		wil_set_recovery_state(wil, fw_recovery_idle);
+		set_bit(wil_status_resetting, wil->status);
+	}
 
 	mutex_lock(&wil->mutex);
 
 	wmi_pcp_stop(vif);
 
-	__wil_down(wil);
+	if (last)
+		__wil_down(wil);
+	else
+		wil_bcast_fini(vif);
 
 	mutex_unlock(&wil->mutex);
 
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 2926bd1ae713..5aeaf9b23b80 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -508,6 +508,18 @@ void wil_bcast_fini(struct wil6210_vif *vif)
 	wil_vring_fini_tx(wil, ri);
 }
 
+void wil_bcast_fini_all(struct wil6210_priv *wil)
+{
+	int i;
+	struct wil6210_vif *vif;
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		vif = wil->vifs[i];
+		if (vif)
+			wil_bcast_fini(vif);
+	}
+}
+
 int wil_priv_init(struct wil6210_priv *wil)
 {
 	uint i;
@@ -1204,7 +1216,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 
 	cancel_work_sync(&vif->disconnect_worker);
 	wil6210_disconnect(vif, NULL, WLAN_REASON_DEAUTH_LEAVING, false);
-	wil_bcast_fini(vif);
+	wil_bcast_fini_all(wil);
 
 	/* Disable device led before reset*/
 	wmi_led_cfg(wil, false);
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index e23a80c235cc..87956c0f7c79 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -20,8 +20,8 @@
 #include "wil6210.h"
 #include "txrx.h"
 
-bool wil_has_other_up_ifaces(struct wil6210_priv *wil,
-			     struct net_device *ndev)
+bool wil_has_other_active_ifaces(struct wil6210_priv *wil,
+				 struct net_device *ndev, bool up, bool ok)
 {
 	int i;
 	struct wil6210_vif *vif;
@@ -31,17 +31,19 @@ bool wil_has_other_up_ifaces(struct wil6210_priv *wil,
 		vif = wil->vifs[i];
 		if (vif) {
 			ndev_i = vif_to_ndev(vif);
-			if (ndev_i != ndev && ndev_i->flags & IFF_UP)
-				return true;
+			if (ndev_i != ndev)
+				if ((up && (ndev_i->flags & IFF_UP)) ||
+				    (ok && netif_carrier_ok(ndev_i)))
+					return true;
 		}
 	}
 
 	return false;
 }
 
-bool wil_has_up_ifaces(struct wil6210_priv *wil)
+bool wil_has_active_ifaces(struct wil6210_priv *wil, bool up, bool ok)
 {
-	return wil_has_other_up_ifaces(wil, NULL);
+	return wil_has_other_active_ifaces(wil, NULL, up, ok);
 }
 
 static int wil_open(struct net_device *ndev)
@@ -57,7 +59,7 @@ static int wil_open(struct net_device *ndev)
 		return -EINVAL;
 	}
 
-	if (!wil_has_other_up_ifaces(wil, ndev)) {
+	if (!wil_has_other_active_ifaces(wil, ndev, true, false)) {
 		wil_dbg_misc(wil, "open, first iface\n");
 		rc = wil_pm_runtime_get(wil);
 		if (rc < 0)
@@ -78,7 +80,7 @@ static int wil_stop(struct net_device *ndev)
 
 	wil_dbg_misc(wil, "stop\n");
 
-	if (!wil_has_other_up_ifaces(wil, ndev)) {
+	if (!wil_has_other_active_ifaces(wil, ndev, true, false)) {
 		wil_dbg_misc(wil, "stop, last iface\n");
 		rc = wil_down(wil);
 		if (!rc)
@@ -359,7 +361,7 @@ int wil_vif_add(struct wil6210_priv *wil, struct wil6210_vif *vif)
 {
 	struct net_device *ndev = vif_to_ndev(vif);
 	struct wireless_dev *wdev = vif_to_wdev(vif);
-	bool any_active = wil_has_up_ifaces(wil);
+	bool any_active = wil_has_active_ifaces(wil, true, false);
 	int rc;
 
 	ASSERT_RTNL();
@@ -428,7 +430,7 @@ void wil_vif_remove(struct wil6210_priv *wil, u8 mid)
 {
 	struct wil6210_vif *vif;
 	struct net_device *ndev;
-	bool any_active = wil_has_up_ifaces(wil);
+	bool any_active = wil_has_active_ifaces(wil, true, false);
 
 	ASSERT_RTNL();
 	if (mid >= wil->max_vifs) {
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index 1fd76148d6ff..7d3ff3f318b7 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -145,7 +145,7 @@ static void wil_remove_all_additional_vifs(struct wil6210_priv *wil)
 	for (i = 1; i < wil->max_vifs; i++) {
 		vif = wil->vifs[i];
 		if (vif) {
-			wil_vif_prepare_stop(wil, vif);
+			wil_vif_prepare_stop(vif);
 			wil_vif_remove(wil, vif->mid);
 		}
 	}
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 8ab4b5af20fb..6e46a23fed52 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -944,9 +944,9 @@ wil_vif_alloc(struct wil6210_priv *wil, const char *name,
 	      unsigned char name_assign_type, enum nl80211_iftype iftype);
 void wil_vif_free(struct wil6210_vif *vif);
 void *wil_if_alloc(struct device *dev);
-bool wil_has_other_up_ifaces(struct wil6210_priv *wil,
-			     struct net_device *ndev);
-bool wil_has_up_ifaces(struct wil6210_priv *wil);
+bool wil_has_other_active_ifaces(struct wil6210_priv *wil,
+				 struct net_device *ndev, bool up, bool ok);
+bool wil_has_active_ifaces(struct wil6210_priv *wil, bool up, bool ok);
 void wil_if_free(struct wil6210_priv *wil);
 int wil_vif_add(struct wil6210_priv *wil, struct wil6210_vif *vif);
 int wil_if_add(struct wil6210_priv *wil);
@@ -1053,7 +1053,7 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 int wil_cfg80211_iface_combinations_from_fw(
 	struct wil6210_priv *wil,
 	const struct wil_fw_record_concurrency *conc);
-int wil_vif_prepare_stop(struct wil6210_priv *wil, struct wil6210_vif *vif);
+int wil_vif_prepare_stop(struct wil6210_vif *vif);
 
 #if defined(CONFIG_WIL6210_DEBUGFS)
 int wil6210_debugfs_init(struct wil6210_priv *wil);
@@ -1095,6 +1095,7 @@ int wil_tx_init(struct wil6210_vif *vif, int cid);
 int wil_vring_init_bcast(struct wil6210_vif *vif, int id, int size);
 int wil_bcast_init(struct wil6210_vif *vif);
 void wil_bcast_fini(struct wil6210_vif *vif);
+void wil_bcast_fini_all(struct wil6210_priv *wil);
 
 void wil_update_net_queues(struct wil6210_priv *wil, struct vring *vring,
 			   bool should_stop);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 23c28bf07f28..762ade3840e5 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -341,6 +341,10 @@ static const char *cmdid2name(u16 cmdid)
 		return "WMI_GET_PCP_CHANNEL_CMD";
 	case WMI_P2P_CFG_CMDID:
 		return "WMI_P2P_CFG_CMD";
+	case WMI_PORT_ALLOCATE_CMDID:
+		return "WMI_PORT_ALLOCATE_CMD";
+	case WMI_PORT_DELETE_CMDID:
+		return "WMI_PORT_DELETE_CMD";
 	case WMI_START_LISTEN_CMDID:
 		return "WMI_START_LISTEN_CMD";
 	case WMI_START_SEARCH_CMDID:
@@ -479,6 +483,10 @@ static const char *eventid2name(u16 eventid)
 		return "WMI_GET_PCP_CHANNEL_EVENT";
 	case WMI_P2P_CFG_DONE_EVENTID:
 		return "WMI_P2P_CFG_DONE_EVENT";
+	case WMI_PORT_ALLOCATED_EVENTID:
+		return "WMI_PORT_ALLOCATED_EVENT";
+	case WMI_PORT_DELETED_EVENTID:
+		return "WMI_PORT_DELETED_EVENT";
 	case WMI_LISTEN_STARTED_EVENTID:
 		return "WMI_LISTEN_STARTED_EVENT";
 	case WMI_SEARCH_STARTED_EVENTID:

From 404bbb3cca3c269ef392017053a8d4cb83e0cc77 Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:16 +0200
Subject: [PATCH 0443/1678] wil6210: rename p2p_wdev_mutex to vif_mutex

As more support is added for multiple VIFs, there is a need
to protect the wil6210_vif structure from access while it is
deleted (mainly from del_virtual_intf operation).
Instead of adding another mutex, use p2p_wdev_mutex and rename
it to vif_mutex to reflect the added role. Its existing roles
are similar so it extends nicely (for example it protects the
scan_request member which is also a member of wil6210_vif).

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c | 22 ++++++-------
 drivers/net/wireless/ath/wil6210/main.c     | 16 +++++-----
 drivers/net/wireless/ath/wil6210/p2p.c      | 34 ++++++++++-----------
 drivers/net/wireless/ath/wil6210/wil6210.h  |  2 +-
 drivers/net/wireless/ath/wil6210/wmi.c      |  8 ++---
 5 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index a3ad3f42c693..4afb38f8f6f2 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -412,10 +412,10 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
 
 	wil_dbg_misc(wil, "stop_p2p_device: entered\n");
 	mutex_lock(&wil->mutex);
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	wil_p2p_stop_radio_operations(wil);
 	wil->p2p_dev_started = 0;
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	mutex_unlock(&wil->mutex);
 }
 
@@ -706,14 +706,14 @@ static int wil_cfg80211_scan(struct wiphy *wiphy,
 
 	mutex_lock(&wil->mutex);
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	if (vif->scan_request || vif->p2p.discovery_started) {
 		wil_err(wil, "Already scanning\n");
-		mutex_unlock(&wil->p2p_wdev_mutex);
+		mutex_unlock(&wil->vif_mutex);
 		rc = -EAGAIN;
 		goto out;
 	}
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 
 	if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE) {
 		if (!wil->p2p_dev_started) {
@@ -825,7 +825,7 @@ static void wil_cfg80211_abort_scan(struct wiphy *wiphy,
 	wil_dbg_misc(wil, "wdev=0x%p iftype=%d\n", wdev, wdev->iftype);
 
 	mutex_lock(&wil->mutex);
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 
 	if (!vif->scan_request)
 		goto out;
@@ -841,7 +841,7 @@ static void wil_cfg80211_abort_scan(struct wiphy *wiphy,
 		wil_abort_scan(vif, true);
 
 out:
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	mutex_unlock(&wil->mutex);
 }
 
@@ -1944,10 +1944,10 @@ static int wil_cfg80211_suspend(struct wiphy *wiphy,
 	wil_dbg_pm(wil, "suspending\n");
 
 	mutex_lock(&wil->mutex);
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	wil_p2p_stop_radio_operations(wil);
 	wil_abort_scan(ndev_to_vif(wil->main_ndev), true);
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	mutex_unlock(&wil->mutex);
 
 out:
@@ -2236,11 +2236,11 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil)
 	struct wireless_dev *p2p_wdev;
 	struct wil6210_vif *vif;
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	p2p_wdev = wil->p2p_wdev;
 	wil->p2p_wdev = NULL;
 	wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	if (p2p_wdev) {
 		cfg80211_unregister_wdev(p2p_wdev);
 		vif = wdev_to_vif(wil, p2p_wdev);
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 5aeaf9b23b80..9a2a8663b66a 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -536,8 +536,8 @@ int wil_priv_init(struct wil6210_priv *wil)
 		spin_lock_init(&wil->vring_tx_data[i].lock);
 
 	mutex_init(&wil->mutex);
+	mutex_init(&wil->vif_mutex);
 	mutex_init(&wil->wmi_mutex);
-	mutex_init(&wil->p2p_wdev_mutex);
 	mutex_init(&wil->halp.lock);
 
 	init_completion(&wil->wmi_ready);
@@ -1066,21 +1066,21 @@ void wil_abort_scan(struct wil6210_vif *vif, bool sync)
 		.aborted = true,
 	};
 
-	lockdep_assert_held(&wil->p2p_wdev_mutex);
+	lockdep_assert_held(&wil->vif_mutex);
 
 	if (!vif->scan_request)
 		return;
 
 	wil_dbg_misc(wil, "Abort scan_request 0x%p\n", vif->scan_request);
 	del_timer_sync(&vif->scan_timer);
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	rc = wmi_abort_scan(vif);
 	if (!rc && sync)
 		wait_event_interruptible_timeout(wil->wq, !vif->scan_request,
 						 msecs_to_jiffies(
 						 WAIT_FOR_SCAN_ABORT_MS));
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	if (vif->scan_request) {
 		cfg80211_scan_done(vif->scan_request, &info);
 		vif->scan_request = NULL;
@@ -1221,9 +1221,9 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 	/* Disable device led before reset*/
 	wmi_led_cfg(wil, false);
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	wil_abort_scan(vif, false);
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 
 	/* prevent NAPI from being scheduled and prevent wmi commands */
 	mutex_lock(&wil->wmi_mutex);
@@ -1444,10 +1444,10 @@ int __wil_down(struct wil6210_priv *wil)
 	}
 	wil_enable_irq(wil);
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	wil_p2p_stop_radio_operations(wil);
 	wil_abort_scan(ndev_to_vif(wil->main_ndev), false);
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 
 	return wil_reset(wil, false);
 }
diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c
index 8b34f18590d9..db087ea58ddf 100644
--- a/drivers/net/wireless/ath/wil6210/p2p.c
+++ b/drivers/net/wireless/ath/wil6210/p2p.c
@@ -158,16 +158,16 @@ int wil_p2p_listen(struct wil6210_priv *wil, struct wireless_dev *wdev,
 	*cookie = ++p2p->cookie;
 	p2p->listen_duration = duration;
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	if (vif->scan_request) {
 		wil_dbg_misc(wil, "Delaying p2p listen until scan done\n");
 		p2p->pending_listen_wdev = wdev;
 		p2p->discovery_started = 1;
 		rc = 0;
-		mutex_unlock(&wil->p2p_wdev_mutex);
+		mutex_unlock(&wil->vif_mutex);
 		goto out;
 	}
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 
 	rc = wil_p2p_start_listen(vif);
 	if (rc)
@@ -228,14 +228,14 @@ int wil_p2p_cancel_listen(struct wil6210_vif *vif, u64 cookie)
 		return -ENOENT;
 	}
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	cfg80211_remain_on_channel_expired(vif_to_radio_wdev(wil, vif),
 					   p2p->cookie,
 					   &p2p->listen_chan,
 					   GFP_KERNEL);
 	if (vif->mid == 0)
 		wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	return 0;
 }
 
@@ -257,14 +257,14 @@ void wil_p2p_listen_expired(struct work_struct *work)
 	if (!started)
 		return;
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	cfg80211_remain_on_channel_expired(vif_to_radio_wdev(wil, vif),
 					   p2p->cookie,
 					   &p2p->listen_chan,
 					   GFP_KERNEL);
 	if (vif->mid == 0)
 		wil->radio_wdev = wil->main_ndev->ieee80211_ptr;
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 }
 
 void wil_p2p_search_expired(struct work_struct *work)
@@ -287,7 +287,7 @@ void wil_p2p_search_expired(struct work_struct *work)
 			.aborted = false,
 		};
 
-		mutex_lock(&wil->p2p_wdev_mutex);
+		mutex_lock(&wil->vif_mutex);
 		if (vif->scan_request) {
 			cfg80211_scan_done(vif->scan_request, &info);
 			vif->scan_request = NULL;
@@ -295,7 +295,7 @@ void wil_p2p_search_expired(struct work_struct *work)
 				wil->radio_wdev =
 					wil->main_ndev->ieee80211_ptr;
 		}
-		mutex_unlock(&wil->p2p_wdev_mutex);
+		mutex_unlock(&wil->vif_mutex);
 	}
 }
 
@@ -314,17 +314,17 @@ void wil_p2p_delayed_listen_work(struct work_struct *work)
 	if (!p2p->discovery_started || !p2p->pending_listen_wdev)
 		goto out;
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	if (vif->scan_request) {
 		/* another scan started, wait again... */
-		mutex_unlock(&wil->p2p_wdev_mutex);
+		mutex_unlock(&wil->vif_mutex);
 		goto out;
 	}
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 
 	rc = wil_p2p_start_listen(vif);
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	if (rc) {
 		cfg80211_remain_on_channel_expired(p2p->pending_listen_wdev,
 						   p2p->cookie,
@@ -340,7 +340,7 @@ void wil_p2p_delayed_listen_work(struct work_struct *work)
 			wil->radio_wdev = p2p->pending_listen_wdev;
 	}
 	p2p->pending_listen_wdev = NULL;
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 
 out:
 	mutex_unlock(&wil->mutex);
@@ -355,7 +355,7 @@ void wil_p2p_stop_radio_operations(struct wil6210_priv *wil)
 	};
 
 	lockdep_assert_held(&wil->mutex);
-	lockdep_assert_held(&wil->p2p_wdev_mutex);
+	lockdep_assert_held(&wil->vif_mutex);
 
 	if (wil->radio_wdev != wil->p2p_wdev)
 		goto out;
@@ -369,9 +369,9 @@ void wil_p2p_stop_radio_operations(struct wil6210_priv *wil)
 	}
 
 	/* Search or listen on p2p device */
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 	wil_p2p_stop_discovery(vif);
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 
 	if (vif->scan_request) {
 		/* search */
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 6e46a23fed52..1da549acc1ef 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -725,6 +725,7 @@ struct wil6210_priv {
 	wait_queue_head_t wq; /* for all wait_event() use */
 	u8 max_vifs; /* maximum number of interfaces, including main */
 	struct wil6210_vif *vifs[WIL_MAX_VIFS];
+	struct mutex vif_mutex; /* protects access to VIF entries */
 	/* profile */
 	struct cfg80211_chan_def monitor_chandef;
 	u32 monitor_flags;
@@ -793,7 +794,6 @@ struct wil6210_priv {
 
 	/* P2P_DEVICE vif */
 	struct wireless_dev *p2p_wdev;
-	struct mutex p2p_wdev_mutex; /* protect @p2p_wdev and @scan_request */
 	struct wireless_dev *radio_wdev;
 
 	/* High Access Latency Policy voting */
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 762ade3840e5..befeeb267de2 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -766,10 +766,10 @@ static void wmi_evt_rx_mgmt(struct wil6210_vif *vif, int id, void *d, int len)
 			wil_err(wil, "cfg80211_inform_bss_frame() failed\n");
 		}
 	} else {
-		mutex_lock(&wil->p2p_wdev_mutex);
+		mutex_lock(&wil->vif_mutex);
 		cfg80211_rx_mgmt(vif_to_radio_wdev(wil, vif), freq, signal,
 				 (void *)rx_mgmt_frame, d_len, 0);
-		mutex_unlock(&wil->p2p_wdev_mutex);
+		mutex_unlock(&wil->vif_mutex);
 	}
 }
 
@@ -789,7 +789,7 @@ static void wmi_evt_scan_complete(struct wil6210_vif *vif, int id,
 {
 	struct wil6210_priv *wil = vif_to_wil(vif);
 
-	mutex_lock(&wil->p2p_wdev_mutex);
+	mutex_lock(&wil->vif_mutex);
 	if (vif->scan_request) {
 		struct wmi_scan_complete_event *data = d;
 		int status = le32_to_cpu(data->status);
@@ -814,7 +814,7 @@ static void wmi_evt_scan_complete(struct wil6210_vif *vif, int id,
 	} else {
 		wil_err(wil, "SCAN_COMPLETE while not scanning\n");
 	}
-	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->vif_mutex);
 }
 
 static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)

From 5bd6098252104f1007882805e9d7c36924ff6a81 Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:17 +0200
Subject: [PATCH 0444/1678] wil6210: multiple VIFs support for connections and
 data path

Track the connection status per-VIF.
The data path code is also updated to support multiple VIFs.
This includes RX and TX VRING management, NAPI poll loops,
RX reordering and related code.
Power management code used to check if the main interface
is up or based on connection state of the main interface,
adapt this code to take all VIFs into account.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c   |  32 ++---
 drivers/net/wireless/ath/wil6210/debugfs.c    |  20 ++-
 drivers/net/wireless/ath/wil6210/interrupt.c  |   2 +-
 drivers/net/wireless/ath/wil6210/main.c       |  78 ++++++++---
 drivers/net/wireless/ath/wil6210/netdev.c     |  46 +++++-
 drivers/net/wireless/ath/wil6210/pcie_bus.c   |  18 ++-
 drivers/net/wireless/ath/wil6210/pm.c         | 131 +++++++++++++-----
 drivers/net/wireless/ath/wil6210/rx_reorder.c |  36 +++--
 drivers/net/wireless/ath/wil6210/txrx.c       |  83 +++++++----
 drivers/net/wireless/ath/wil6210/txrx.h       |  22 ++-
 drivers/net/wireless/ath/wil6210/wil6210.h    |  25 ++--
 drivers/net/wireless/ath/wil6210/wmi.c        |   9 +-
 12 files changed, 349 insertions(+), 153 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 4afb38f8f6f2..cdbb393863f3 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -319,7 +319,7 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid,
 	sinfo->tx_packets = stats->tx_packets;
 	sinfo->tx_failed = stats->tx_errors;
 
-	if (test_bit(wil_status_fwconnected, wil->status)) {
+	if (test_bit(wil_vif_fwconnected, vif->status)) {
 		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		if (test_bit(WMI_FW_CAPABILITY_RSSI_REPORTING,
 			     wil->fw_capabilities))
@@ -490,11 +490,10 @@ wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 			return ERR_PTR(-EINVAL);
 		}
 
-		vif = kzalloc(sizeof(*vif), GFP_KERNEL);
-		if (!vif)
+		p2p_wdev = kzalloc(sizeof(*p2p_wdev), GFP_KERNEL);
+		if (!p2p_wdev)
 			return ERR_PTR(-ENOMEM);
 
-		p2p_wdev = vif_to_wdev(vif);
 		p2p_wdev->iftype = type;
 		p2p_wdev->wiphy = wiphy;
 		/* use our primary ethernet address */
@@ -904,8 +903,8 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 	wil_dbg_misc(wil, "connect, mid=%d\n", vif->mid);
 	wil_print_connect_params(wil, sme);
 
-	if (test_bit(wil_status_fwconnecting, wil->status) ||
-	    test_bit(wil_status_fwconnected, wil->status))
+	if (test_bit(wil_vif_fwconnecting, vif->status) ||
+	    test_bit(wil_vif_fwconnected, vif->status))
 		return -EALREADY;
 
 	if (sme->ie_len > WMI_MAX_IE_LEN) {
@@ -1009,18 +1008,19 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 	ether_addr_copy(conn.bssid, bss->bssid);
 	ether_addr_copy(conn.dst_mac, bss->bssid);
 
-	set_bit(wil_status_fwconnecting, wil->status);
+	set_bit(wil_vif_fwconnecting, vif->status);
 
 	rc = wmi_send(wil, WMI_CONNECT_CMDID, vif->mid, &conn, sizeof(conn));
 	if (rc == 0) {
 		netif_carrier_on(ndev);
-		wil6210_bus_request(wil, WIL_MAX_BUS_REQUEST_KBPS);
+		if (!wil_has_other_active_ifaces(wil, ndev, false, true))
+			wil6210_bus_request(wil, WIL_MAX_BUS_REQUEST_KBPS);
 		vif->bss = bss;
 		/* Connect can take lots of time */
 		mod_timer(&vif->connect_timer,
 			  jiffies + msecs_to_jiffies(5000));
 	} else {
-		clear_bit(wil_status_fwconnecting, wil->status);
+		clear_bit(wil_vif_fwconnecting, vif->status);
 	}
 
  out:
@@ -1040,8 +1040,8 @@ static int wil_cfg80211_disconnect(struct wiphy *wiphy,
 	wil_dbg_misc(wil, "disconnect: reason=%d, mid=%d\n",
 		     reason_code, vif->mid);
 
-	if (!(test_bit(wil_status_fwconnecting, wil->status) ||
-	      test_bit(wil_status_fwconnected, wil->status))) {
+	if (!(test_bit(wil_vif_fwconnecting, vif->status) ||
+	      test_bit(wil_vif_fwconnected, vif->status))) {
 		wil_err(wil, "Disconnect was called while disconnected\n");
 		return 0;
 	}
@@ -1946,7 +1946,7 @@ static int wil_cfg80211_suspend(struct wiphy *wiphy,
 	mutex_lock(&wil->mutex);
 	mutex_lock(&wil->vif_mutex);
 	wil_p2p_stop_radio_operations(wil);
-	wil_abort_scan(ndev_to_vif(wil->main_ndev), true);
+	wil_abort_scan_all_vifs(wil, true);
 	mutex_unlock(&wil->vif_mutex);
 	mutex_unlock(&wil->mutex);
 
@@ -2234,7 +2234,6 @@ void wil_cfg80211_deinit(struct wil6210_priv *wil)
 void wil_p2p_wdev_free(struct wil6210_priv *wil)
 {
 	struct wireless_dev *p2p_wdev;
-	struct wil6210_vif *vif;
 
 	mutex_lock(&wil->vif_mutex);
 	p2p_wdev = wil->p2p_wdev;
@@ -2243,8 +2242,7 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil)
 	mutex_unlock(&wil->vif_mutex);
 	if (p2p_wdev) {
 		cfg80211_unregister_wdev(p2p_wdev);
-		vif = wdev_to_vif(wil, p2p_wdev);
-		kfree(vif);
+		kfree(p2p_wdev);
 	}
 }
 
@@ -2538,7 +2536,7 @@ static int wil_rf_sector_get_selected(struct wiphy *wiphy,
 			return -ENOENT;
 		}
 	} else {
-		if (test_bit(wil_status_fwconnected, wil->status)) {
+		if (test_bit(wil_vif_fwconnected, vif->status)) {
 			wil_err(wil, "must specify MAC address when connected\n");
 			return -EINVAL;
 		}
@@ -2665,7 +2663,7 @@ static int wil_rf_sector_set_selected(struct wiphy *wiphy,
 			cid = -1;
 		}
 	} else {
-		if (test_bit(wil_status_fwconnected, wil->status)) {
+		if (test_bit(wil_vif_fwconnected, vif->status)) {
 			wil_err(wil, "must specify MAC address when connected\n");
 			return -EINVAL;
 		}
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index 636da5e32ae1..93a99a1c8c92 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -1201,12 +1201,13 @@ static int wil_link_debugfs_show(struct seq_file *s, void *data)
 {
 	struct wil6210_priv *wil = s->private;
 	struct station_info sinfo;
-	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
 	int i, rc;
 
 	for (i = 0; i < ARRAY_SIZE(wil->sta); i++) {
 		struct wil_sta_info *p = &wil->sta[i];
 		char *status = "unknown";
+		struct wil6210_vif *vif;
+		u8 mid;
 
 		switch (p->status) {
 		case wil_sta_unused:
@@ -1219,9 +1220,15 @@ static int wil_link_debugfs_show(struct seq_file *s, void *data)
 			status = "connected";
 			break;
 		}
-		seq_printf(s, "[%d] %pM %s\n", i, p->addr, status);
+		mid = (p->status != wil_sta_unused) ? p->mid : U8_MAX;
+		seq_printf(s, "[%d][MID %d] %pM %s\n",
+			   i, mid, p->addr, status);
 
-		if (p->status == wil_sta_connected) {
+		if (p->status != wil_sta_connected)
+			continue;
+
+		vif = (mid < wil->max_vifs) ? wil->vifs[mid] : NULL;
+		if (vif) {
 			rc = wil_cid_fill_sinfo(vif, i, &sinfo);
 			if (rc)
 				return rc;
@@ -1229,6 +1236,8 @@ static int wil_link_debugfs_show(struct seq_file *s, void *data)
 			seq_printf(s, "  Tx_mcs = %d\n", sinfo.txrate.mcs);
 			seq_printf(s, "  Rx_mcs = %d\n", sinfo.rxrate.mcs);
 			seq_printf(s, "  SQ     = %d\n", sinfo.signal);
+		} else {
+			seq_puts(s, "  INVALID MID\n");
 		}
 	}
 
@@ -1420,6 +1429,7 @@ __acquires(&p->tid_rx_lock) __releases(&p->tid_rx_lock)
 		struct wil_sta_info *p = &wil->sta[i];
 		char *status = "unknown";
 		u8 aid = 0;
+		u8 mid;
 
 		switch (p->status) {
 		case wil_sta_unused:
@@ -1433,7 +1443,9 @@ __acquires(&p->tid_rx_lock) __releases(&p->tid_rx_lock)
 			aid = p->aid;
 			break;
 		}
-		seq_printf(s, "[%d] %pM %s AID %d\n", i, p->addr, status, aid);
+		mid = (p->status != wil_sta_unused) ? p->mid : U8_MAX;
+		seq_printf(s, "[%d] %pM %s MID %d AID %d\n", i, p->addr, status,
+			   mid, aid);
 
 		if (p->status == wil_sta_connected) {
 			spin_lock_bh(&p->tid_rx_lock);
diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index ef8f9a769833..84e9840c1752 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c
@@ -127,7 +127,7 @@ void wil6210_unmask_irq_tx(struct wil6210_priv *wil)
 
 void wil6210_unmask_irq_rx(struct wil6210_priv *wil)
 {
-	bool unmask_rx_htrsh = test_bit(wil_status_fwconnected, wil->status);
+	bool unmask_rx_htrsh = atomic_read(&wil->connected_vifs) > 0;
 
 	wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, IMC),
 	      unmask_rx_htrsh ? WIL6210_IMC_RX : WIL6210_IMC_RX_NO_RX_HTRSH);
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 9a2a8663b66a..a4b413e8d55a 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -175,6 +175,15 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 		     cid, sta->mid, sta->status);
 	/* inform upper/lower layers */
 	if (sta->status != wil_sta_unused) {
+		if (vif->mid != sta->mid) {
+			wil_err(wil, "STA MID mismatch with VIF MID(%d)\n",
+				vif->mid);
+			/* let FW override sta->mid but be more strict with
+			 * user space requests
+			 */
+			if (!from_event)
+				return;
+		}
 		if (!from_event) {
 			bool del_sta = (wdev->iftype == NL80211_IFTYPE_AP) ?
 						disable_ap_sme : false;
@@ -277,32 +286,35 @@ static void _wil6210_disconnect(struct wil6210_vif *vif, const u8 *bssid,
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
 		wil_bcast_fini(vif);
-		wil_update_net_queues_bh(wil, NULL, true);
+		wil_update_net_queues_bh(wil, vif, NULL, true);
 		netif_carrier_off(ndev);
-		wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
+		if (!wil_has_other_active_ifaces(wil, ndev, false, true))
+			wil6210_bus_request(wil, WIL_DEFAULT_BUS_REQUEST_KBPS);
 
-		if (test_bit(wil_status_fwconnected, wil->status)) {
-			clear_bit(wil_status_fwconnected, wil->status);
+		if (test_and_clear_bit(wil_vif_fwconnected, vif->status)) {
+			atomic_dec(&wil->connected_vifs);
 			cfg80211_disconnected(ndev, reason_code,
 					      NULL, 0,
 					      vif->locally_generated_disc,
 					      GFP_KERNEL);
 			vif->locally_generated_disc = false;
-		} else if (test_bit(wil_status_fwconnecting, wil->status)) {
+		} else if (test_bit(wil_vif_fwconnecting, vif->status)) {
 			cfg80211_connect_result(ndev, bssid, NULL, 0, NULL, 0,
 						WLAN_STATUS_UNSPECIFIED_FAILURE,
 						GFP_KERNEL);
 			vif->bss = NULL;
 		}
-		clear_bit(wil_status_fwconnecting, wil->status);
+		clear_bit(wil_vif_fwconnecting, vif->status);
 		break;
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
 		if (!wil_vif_is_connected(wil, vif->mid)) {
-			wil_update_net_queues_bh(wil, NULL, true);
-			clear_bit(wil_status_fwconnected, wil->status);
+			wil_update_net_queues_bh(wil, vif, NULL, true);
+			if (test_and_clear_bit(wil_vif_fwconnected,
+					       vif->status))
+				atomic_dec(&wil->connected_vifs);
 		} else {
-			wil_update_net_queues_bh(wil, NULL, false);
+			wil_update_net_queues_bh(wil, vif, NULL, false);
 		}
 		break;
 	default:
@@ -322,11 +334,11 @@ void wil_disconnect_worker(struct work_struct *work)
 		struct wmi_disconnect_event evt;
 	} __packed reply;
 
-	if (test_bit(wil_status_fwconnected, wil->status))
+	if (test_bit(wil_vif_fwconnected, vif->status))
 		/* connect succeeded after all */
 		return;
 
-	if (!test_bit(wil_status_fwconnecting, wil->status))
+	if (!test_bit(wil_vif_fwconnecting, vif->status))
 		/* already disconnected */
 		return;
 
@@ -338,11 +350,11 @@ void wil_disconnect_worker(struct work_struct *work)
 		return;
 	}
 
-	wil_update_net_queues_bh(wil, NULL, true);
+	wil_update_net_queues_bh(wil, vif, NULL, true);
 	netif_carrier_off(ndev);
 	cfg80211_connect_result(ndev, NULL, NULL, 0, NULL, 0,
 				WLAN_STATUS_UNSPECIFIED_FAILURE, GFP_KERNEL);
-	clear_bit(wil_status_fwconnecting, wil->status);
+	clear_bit(wil_vif_fwconnecting, vif->status);
 }
 
 static int wil_wait_for_recovery(struct wil6210_priv *wil)
@@ -1087,6 +1099,20 @@ void wil_abort_scan(struct wil6210_vif *vif, bool sync)
 	}
 }
 
+void wil_abort_scan_all_vifs(struct wil6210_priv *wil, bool sync)
+{
+	int i;
+
+	lockdep_assert_held(&wil->vif_mutex);
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		struct wil6210_vif *vif = wil->vifs[i];
+
+		if (vif)
+			wil_abort_scan(vif, sync);
+	}
+}
+
 int wil_ps_update(struct wil6210_priv *wil, enum wmi_ps_profile_type ps_profile)
 {
 	int rc;
@@ -1139,6 +1165,7 @@ static int wil_restore_vifs(struct wil6210_priv *wil)
 		vif = wil->vifs[i];
 		if (!vif)
 			continue;
+		vif->ap_isolate = 0;
 		if (vif->mid) {
 			ndev = vif_to_ndev(vif);
 			wdev = vif_to_wdev(vif);
@@ -1162,10 +1189,10 @@ static int wil_restore_vifs(struct wil6210_priv *wil)
  */
 int wil_reset(struct wil6210_priv *wil, bool load_fw)
 {
-	int rc;
+	int rc, i;
 	unsigned long status_flags = BIT(wil_status_resetting);
 	int no_flash;
-	struct wil6210_vif *vif = ndev_to_vif(wil->main_ndev);
+	struct wil6210_vif *vif;
 
 	wil_dbg_misc(wil, "reset\n");
 
@@ -1214,17 +1241,23 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 		goto out;
 	}
 
-	cancel_work_sync(&vif->disconnect_worker);
-	wil6210_disconnect(vif, NULL, WLAN_REASON_DEAUTH_LEAVING, false);
+	mutex_lock(&wil->vif_mutex);
+	wil_abort_scan_all_vifs(wil, false);
+	mutex_unlock(&wil->vif_mutex);
+
+	for (i = 0; i < wil->max_vifs; i++) {
+		vif = wil->vifs[i];
+		if (vif) {
+			cancel_work_sync(&vif->disconnect_worker);
+			wil6210_disconnect(vif, NULL,
+					   WLAN_REASON_DEAUTH_LEAVING, false);
+		}
+	}
 	wil_bcast_fini_all(wil);
 
 	/* Disable device led before reset*/
 	wmi_led_cfg(wil, false);
 
-	mutex_lock(&wil->vif_mutex);
-	wil_abort_scan(vif, false);
-	mutex_unlock(&wil->vif_mutex);
-
 	/* prevent NAPI from being scheduled and prevent wmi commands */
 	mutex_lock(&wil->wmi_mutex);
 	if (test_bit(wil_status_suspending, wil->status))
@@ -1294,7 +1327,6 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 	}
 
 	/* init after reset */
-	vif->ap_isolate = 0;
 	reinit_completion(&wil->wmi_ready);
 	reinit_completion(&wil->wmi_call);
 	reinit_completion(&wil->halp.comp);
@@ -1446,7 +1478,7 @@ int __wil_down(struct wil6210_priv *wil)
 
 	mutex_lock(&wil->vif_mutex);
 	wil_p2p_stop_radio_operations(wil);
-	wil_abort_scan(ndev_to_vif(wil->main_ndev), false);
+	wil_abort_scan_all_vifs(wil, false);
 	mutex_unlock(&wil->vif_mutex);
 
 	return wil_reset(wil, false);
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 87956c0f7c79..05e9408e7ea3 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -43,6 +43,7 @@ bool wil_has_other_active_ifaces(struct wil6210_priv *wil,
 
 bool wil_has_active_ifaces(struct wil6210_priv *wil, bool up, bool ok)
 {
+	/* use NULL ndev argument to check all interfaces */
 	return wil_has_other_active_ifaces(wil, NULL, up, ok);
 }
 
@@ -130,11 +131,19 @@ static int wil6210_netdev_poll_tx(struct napi_struct *napi, int budget)
 	for (i = 0; i < WIL6210_MAX_TX_RINGS; i++) {
 		struct vring *vring = &wil->vring_tx[i];
 		struct vring_tx_data *txdata = &wil->vring_tx_data[i];
+		struct wil6210_vif *vif;
 
-		if (!vring->va || !txdata->enabled)
+		if (!vring->va || !txdata->enabled ||
+		    txdata->mid >= wil->max_vifs)
 			continue;
 
-		tx_done += wil_tx_complete(wil, i);
+		vif = wil->vifs[txdata->mid];
+		if (unlikely(!vif)) {
+			wil_dbg_txrx(wil, "Invalid MID %d\n", txdata->mid);
+			continue;
+		}
+
+		tx_done += wil_tx_complete(vif, i);
 	}
 
 	if (tx_done < budget) {
@@ -232,6 +241,8 @@ static void wil_vif_init(struct wil6210_vif *vif)
 	INIT_WORK(&vif->p2p.delayed_listen_work, wil_p2p_delayed_listen_work);
 
 	INIT_LIST_HEAD(&vif->probe_client_pending);
+
+	vif->net_queue_stopped = 1;
 }
 
 static u8 wil_vif_find_free_mid(struct wil6210_priv *wil)
@@ -406,12 +417,14 @@ int wil_if_add(struct wil6210_priv *wil)
 		return rc;
 	}
 
-	netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx,
+	init_dummy_netdev(&wil->napi_ndev);
+	netif_napi_add(&wil->napi_ndev, &wil->napi_rx, wil6210_netdev_poll_rx,
 		       WIL6210_NAPI_BUDGET);
-	netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx,
+	netif_tx_napi_add(&wil->napi_ndev,
+			  &wil->napi_tx, wil6210_netdev_poll_tx,
 			  WIL6210_NAPI_BUDGET);
 
-	wil_update_net_queues_bh(wil, NULL, true);
+	wil_update_net_queues_bh(wil, vif, NULL, true);
 
 	rtnl_lock();
 	rc = wil_vif_add(wil, vif);
@@ -450,10 +463,29 @@ void wil_vif_remove(struct wil6210_priv *wil, u8 mid)
 	 */
 	unregister_netdevice(ndev);
 
+	mutex_lock(&wil->mutex);
+	wil6210_disconnect(vif, NULL, WLAN_REASON_DEAUTH_LEAVING, false);
+	mutex_unlock(&wil->mutex);
+
 	if (any_active && vif->mid != 0)
 		wmi_port_delete(wil, vif->mid);
 
+	/* make sure no one is accessing the VIF before removing */
+	mutex_lock(&wil->vif_mutex);
 	wil->vifs[mid] = NULL;
+	/* ensure NAPI code will see the NULL VIF */
+	wmb();
+	if (test_bit(wil_status_napi_en, wil->status)) {
+		napi_synchronize(&wil->napi_rx);
+		napi_synchronize(&wil->napi_tx);
+	}
+	mutex_unlock(&wil->vif_mutex);
+
+	flush_work(&wil->wmi_event_worker);
+	del_timer_sync(&vif->connect_timer);
+	cancel_work_sync(&vif->disconnect_worker);
+	wil_probe_client_flush(vif);
+	cancel_work_sync(&vif->probe_client_worker);
 	/* for VIFs, ndev will be freed by destructor after RTNL is unlocked.
 	 * the main interface will be freed in wil_if_free, we need to keep it
 	 * a bit longer so logging macros will work.
@@ -470,5 +502,9 @@ void wil_if_remove(struct wil6210_priv *wil)
 	rtnl_lock();
 	wil_vif_remove(wil, 0);
 	rtnl_unlock();
+
+	netif_napi_del(&wil->napi_tx);
+	netif_napi_del(&wil->napi_rx);
+
 	wiphy_unregister(wdev->wiphy);
 }
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index 7d3ff3f318b7..19cbc6add637 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -450,12 +450,15 @@ static int wil6210_suspend(struct device *dev, bool is_runtime)
 	int rc = 0;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct wil6210_priv *wil = pci_get_drvdata(pdev);
-	struct net_device *ndev = wil->main_ndev;
-	bool keep_radio_on = ndev->flags & IFF_UP &&
-			     wil->keep_radio_on_during_sleep;
+	bool keep_radio_on, active_ifaces;
 
 	wil_dbg_pm(wil, "suspend: %s\n", is_runtime ? "runtime" : "system");
 
+	mutex_lock(&wil->vif_mutex);
+	active_ifaces = wil_has_active_ifaces(wil, true, false);
+	mutex_unlock(&wil->vif_mutex);
+	keep_radio_on = active_ifaces && wil->keep_radio_on_during_sleep;
+
 	rc = wil_can_suspend(wil, is_runtime);
 	if (rc)
 		goto out;
@@ -482,12 +485,15 @@ static int wil6210_resume(struct device *dev, bool is_runtime)
 	int rc = 0;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct wil6210_priv *wil = pci_get_drvdata(pdev);
-	struct net_device *ndev = wil->main_ndev;
-	bool keep_radio_on = ndev->flags & IFF_UP &&
-			     wil->keep_radio_on_during_sleep;
+	bool keep_radio_on, active_ifaces;
 
 	wil_dbg_pm(wil, "resume: %s\n", is_runtime ? "runtime" : "system");
 
+	mutex_lock(&wil->vif_mutex);
+	active_ifaces = wil_has_active_ifaces(wil, true, false);
+	mutex_unlock(&wil->vif_mutex);
+	keep_radio_on = active_ifaces && wil->keep_radio_on_during_sleep;
+
 	/* In case radio stays on, platform device will control
 	 * PCIe master
 	 */
diff --git a/drivers/net/wireless/ath/wil6210/pm.c b/drivers/net/wireless/ath/wil6210/pm.c
index 12f6d78bcf63..ba81fb3ac96f 100644
--- a/drivers/net/wireless/ath/wil6210/pm.c
+++ b/drivers/net/wireless/ath/wil6210/pm.c
@@ -21,13 +21,72 @@
 
 #define WIL6210_AUTOSUSPEND_DELAY_MS (1000)
 
+static void wil_pm_wake_connected_net_queues(struct wil6210_priv *wil)
+{
+	int i;
+
+	mutex_lock(&wil->vif_mutex);
+	for (i = 0; i < wil->max_vifs; i++) {
+		struct wil6210_vif *vif = wil->vifs[i];
+
+		if (vif && test_bit(wil_vif_fwconnected, vif->status))
+			wil_update_net_queues_bh(wil, vif, NULL, false);
+	}
+	mutex_unlock(&wil->vif_mutex);
+}
+
+static void wil_pm_stop_all_net_queues(struct wil6210_priv *wil)
+{
+	int i;
+
+	mutex_lock(&wil->vif_mutex);
+	for (i = 0; i < wil->max_vifs; i++) {
+		struct wil6210_vif *vif = wil->vifs[i];
+
+		if (vif)
+			wil_update_net_queues_bh(wil, vif, NULL, true);
+	}
+	mutex_unlock(&wil->vif_mutex);
+}
+
+static bool
+wil_can_suspend_vif(struct wil6210_priv *wil, struct wil6210_vif *vif,
+		    bool is_runtime)
+{
+	struct wireless_dev *wdev = vif_to_wdev(vif);
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_MONITOR:
+		wil_dbg_pm(wil, "Sniffer\n");
+		return false;
+
+	/* for STA-like interface, don't runtime suspend */
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		if (test_bit(wil_vif_fwconnecting, vif->status)) {
+			wil_dbg_pm(wil, "Delay suspend when connecting\n");
+			return false;
+		}
+		if (is_runtime) {
+			wil_dbg_pm(wil, "STA-like interface\n");
+			return false;
+		}
+		break;
+	/* AP-like interface - can't suspend */
+	default:
+		wil_dbg_pm(wil, "AP-like interface\n");
+		return false;
+	}
+
+	return true;
+}
+
 int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 {
-	int rc = 0;
-	struct net_device *ndev = wil->main_ndev;
-	struct wireless_dev *wdev = ndev->ieee80211_ptr;
+	int rc = 0, i;
 	bool wmi_only = test_bit(WMI_FW_CAPABILITY_WMI_ONLY,
 				 wil->fw_capabilities);
+	bool active_ifaces;
 
 	wil_dbg_pm(wil, "can_suspend: %s\n", is_runtime ? "runtime" : "system");
 
@@ -41,7 +100,12 @@ int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 		rc = -EBUSY;
 		goto out;
 	}
-	if (!(ndev->flags & IFF_UP)) {
+
+	mutex_lock(&wil->vif_mutex);
+	active_ifaces = wil_has_active_ifaces(wil, true, false);
+	mutex_unlock(&wil->vif_mutex);
+
+	if (!active_ifaces) {
 		/* can always sleep when down */
 		wil_dbg_pm(wil, "Interface is down\n");
 		goto out;
@@ -58,32 +122,19 @@ int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 	}
 
 	/* interface is running */
-	switch (wdev->iftype) {
-	case NL80211_IFTYPE_MONITOR:
-		wil_dbg_pm(wil, "Sniffer\n");
-		rc = -EBUSY;
-		goto out;
-	/* for STA-like interface, don't runtime suspend */
-	case NL80211_IFTYPE_STATION:
-	case NL80211_IFTYPE_P2P_CLIENT:
-		if (test_bit(wil_status_fwconnecting, wil->status)) {
-			wil_dbg_pm(wil, "Delay suspend when connecting\n");
+	mutex_lock(&wil->vif_mutex);
+	for (i = 0; i < wil->max_vifs; i++) {
+		struct wil6210_vif *vif = wil->vifs[i];
+
+		if (!vif)
+			continue;
+		if (!wil_can_suspend_vif(wil, vif, is_runtime)) {
 			rc = -EBUSY;
+			mutex_unlock(&wil->vif_mutex);
 			goto out;
 		}
-		/* Runtime pm not supported in case the interface is up */
-		if (is_runtime) {
-			wil_dbg_pm(wil, "STA-like interface\n");
-			rc = -EBUSY;
-			goto out;
-		}
-		break;
-	/* AP-like interface - can't suspend */
-	default:
-		wil_dbg_pm(wil, "AP-like interface\n");
-		rc = -EBUSY;
-		break;
 	}
+	mutex_unlock(&wil->vif_mutex);
 
 out:
 	wil_dbg_pm(wil, "can_suspend: %s => %s (%d)\n",
@@ -128,8 +179,7 @@ static int wil_resume_keep_radio_on(struct wil6210_priv *wil)
 	}
 
 	/* Wake all queues */
-	if (test_bit(wil_status_fwconnected, wil->status))
-		wil_update_net_queues_bh(wil, NULL, false);
+	wil_pm_wake_connected_net_queues(wil);
 
 out:
 	if (rc)
@@ -153,7 +203,7 @@ static int wil_suspend_keep_radio_on(struct wil6210_priv *wil)
 		wil->suspend_stats.rejected_by_host++;
 		return -EBUSY;
 	}
-	wil_update_net_queues_bh(wil, NULL, true);
+	wil_pm_stop_all_net_queues(wil);
 
 	if (!wil_is_tx_idle(wil)) {
 		wil_dbg_pm(wil, "Pending TX data, reject suspend\n");
@@ -244,22 +294,20 @@ resume_after_fail:
 	/* if resume succeeded, reject the suspend */
 	if (!rc) {
 		rc = -EBUSY;
-		if (test_bit(wil_status_fwconnected, wil->status))
-			wil_update_net_queues_bh(wil, NULL, false);
+		wil_pm_wake_connected_net_queues(wil);
 	}
 	return rc;
 
 reject_suspend:
 	clear_bit(wil_status_suspending, wil->status);
-	if (test_bit(wil_status_fwconnected, wil->status))
-		wil_update_net_queues_bh(wil, NULL, false);
+	wil_pm_wake_connected_net_queues(wil);
 	return -EBUSY;
 }
 
 static int wil_suspend_radio_off(struct wil6210_priv *wil)
 {
 	int rc = 0;
-	struct net_device *ndev = wil->main_ndev;
+	bool active_ifaces;
 
 	wil_dbg_pm(wil, "suspend radio off\n");
 
@@ -273,7 +321,11 @@ static int wil_suspend_radio_off(struct wil6210_priv *wil)
 	}
 
 	/* if netif up, hardware is alive, shut it down */
-	if (ndev->flags & IFF_UP) {
+	mutex_lock(&wil->vif_mutex);
+	active_ifaces = wil_has_active_ifaces(wil, true, false);
+	mutex_unlock(&wil->vif_mutex);
+
+	if (active_ifaces) {
 		rc = wil_down(wil);
 		if (rc) {
 			wil_err(wil, "wil_down : %d\n", rc);
@@ -307,16 +359,19 @@ out:
 static int wil_resume_radio_off(struct wil6210_priv *wil)
 {
 	int rc = 0;
-	struct net_device *ndev = wil->main_ndev;
+	bool active_ifaces;
 
 	wil_dbg_pm(wil, "Enabling PCIe IRQ\n");
 	wil_enable_irq(wil);
-	/* if netif up, bring hardware up
+	/* if any netif up, bring hardware up
 	 * During open(), IFF_UP set after actual device method
 	 * invocation. This prevent recursive call to wil_up()
 	 * wil_status_suspended will be cleared in wil_reset
 	 */
-	if (ndev->flags & IFF_UP)
+	mutex_lock(&wil->vif_mutex);
+	active_ifaces = wil_has_active_ifaces(wil, true, false);
+	mutex_unlock(&wil->vif_mutex);
+	if (active_ifaces)
 		rc = wil_up(wil);
 	else
 		clear_bit(wil_status_suspended, wil->status);
diff --git a/drivers/net/wireless/ath/wil6210/rx_reorder.c b/drivers/net/wireless/ath/wil6210/rx_reorder.c
index c2140b3d9496..14dcb0698dee 100644
--- a/drivers/net/wireless/ath/wil6210/rx_reorder.c
+++ b/drivers/net/wireless/ath/wil6210/rx_reorder.c
@@ -41,11 +41,10 @@ static inline int reorder_index(struct wil_tid_ampdu_rx *r, u16 seq)
 	return seq_sub(seq, r->ssn) % r->buf_size;
 }
 
-static void wil_release_reorder_frame(struct wil6210_priv *wil,
+static void wil_release_reorder_frame(struct net_device *ndev,
 				      struct wil_tid_ampdu_rx *r,
 				      int index)
 {
-	struct net_device *ndev = wil->main_ndev;
 	struct sk_buff *skb = r->reorder_buf[index];
 
 	if (!skb)
@@ -60,7 +59,7 @@ no_frame:
 	r->head_seq_num = seq_inc(r->head_seq_num);
 }
 
-static void wil_release_reorder_frames(struct wil6210_priv *wil,
+static void wil_release_reorder_frames(struct net_device *ndev,
 				       struct wil_tid_ampdu_rx *r,
 				       u16 hseq)
 {
@@ -74,18 +73,18 @@ static void wil_release_reorder_frames(struct wil6210_priv *wil,
 	 */
 	while (seq_less(r->head_seq_num, hseq) && r->stored_mpdu_num) {
 		index = reorder_index(r, r->head_seq_num);
-		wil_release_reorder_frame(wil, r, index);
+		wil_release_reorder_frame(ndev, r, index);
 	}
 	r->head_seq_num = hseq;
 }
 
-static void wil_reorder_release(struct wil6210_priv *wil,
+static void wil_reorder_release(struct net_device *ndev,
 				struct wil_tid_ampdu_rx *r)
 {
 	int index = reorder_index(r, r->head_seq_num);
 
 	while (r->reorder_buf[index]) {
-		wil_release_reorder_frame(wil, r, index);
+		wil_release_reorder_frame(ndev, r, index);
 		index = reorder_index(r, r->head_seq_num);
 	}
 }
@@ -94,7 +93,8 @@ static void wil_reorder_release(struct wil6210_priv *wil,
 void wil_rx_reorder(struct wil6210_priv *wil, struct sk_buff *skb)
 __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 {
-	struct net_device *ndev = wil->main_ndev;
+	struct wil6210_vif *vif;
+	struct net_device *ndev;
 	struct vring_rx_desc *d = wil_skb_rxdesc(skb);
 	int tid = wil_rxdesc_tid(d);
 	int cid = wil_rxdesc_cid(d);
@@ -109,6 +109,14 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 	wil_dbg_txrx(wil, "MID %d CID %d TID %d Seq 0x%03x mcast %01x\n",
 		     mid, cid, tid, seq, mcast);
 
+	vif = wil->vifs[mid];
+	if (unlikely(!vif)) {
+		wil_dbg_txrx(wil, "invalid VIF, mid %d\n", mid);
+		dev_kfree_skb(skb);
+		return;
+	}
+	ndev = vif_to_ndev(vif);
+
 	if (unlikely(mcast)) {
 		wil_netif_rx_any(skb, ndev);
 		return;
@@ -169,7 +177,7 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 	if (!seq_less(seq, r->head_seq_num + r->buf_size)) {
 		hseq = seq_inc(seq_sub(seq, r->buf_size));
 		/* release stored frames up to new head to stack */
-		wil_release_reorder_frames(wil, r, hseq);
+		wil_release_reorder_frames(ndev, r, hseq);
 	}
 
 	/* Now the new frame is always in the range of the reordering buffer */
@@ -200,16 +208,18 @@ __acquires(&sta->tid_rx_lock) __releases(&sta->tid_rx_lock)
 	r->reorder_buf[index] = skb;
 	r->reorder_time[index] = jiffies;
 	r->stored_mpdu_num++;
-	wil_reorder_release(wil, r);
+	wil_reorder_release(ndev, r);
 
 out:
 	spin_unlock(&sta->tid_rx_lock);
 }
 
 /* process BAR frame, called in NAPI context */
-void wil_rx_bar(struct wil6210_priv *wil, u8 cid, u8 tid, u16 seq)
+void wil_rx_bar(struct wil6210_priv *wil, struct wil6210_vif *vif,
+		u8 cid, u8 tid, u16 seq)
 {
 	struct wil_sta_info *sta = &wil->sta[cid];
+	struct net_device *ndev = vif_to_ndev(vif);
 	struct wil_tid_ampdu_rx *r;
 
 	spin_lock(&sta->tid_rx_lock);
@@ -224,9 +234,9 @@ void wil_rx_bar(struct wil6210_priv *wil, u8 cid, u8 tid, u16 seq)
 			seq, r->head_seq_num);
 		goto out;
 	}
-	wil_dbg_txrx(wil, "BAR: CID %d TID %d Seq 0x%03x head 0x%03x\n",
-		     cid, tid, seq, r->head_seq_num);
-	wil_release_reorder_frames(wil, r, seq);
+	wil_dbg_txrx(wil, "BAR: CID %d MID %d TID %d Seq 0x%03x head 0x%03x\n",
+		     cid, vif->mid, tid, seq, r->head_seq_num);
+	wil_release_reorder_frames(ndev, r, seq);
 
 out:
 	spin_unlock(&sta->tid_rx_lock);
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index a53238647b95..b60b9fcaaebd 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -475,7 +475,8 @@ static struct sk_buff *wil_vring_reap_rx(struct wil6210_priv *wil,
 					 struct vring *vring)
 {
 	struct device *dev = wil_to_dev(wil);
-	struct net_device *ndev = wil->main_ndev;
+	struct wil6210_vif *vif;
+	struct net_device *ndev;
 	volatile struct vring_rx_desc *_d;
 	struct vring_rx_desc *d;
 	struct sk_buff *skb;
@@ -484,7 +485,7 @@ static struct sk_buff *wil_vring_reap_rx(struct wil6210_priv *wil,
 	unsigned int sz = wil->rx_buf_len + ETH_HLEN + snaplen;
 	u16 dmalen;
 	u8 ftype;
-	int cid;
+	int cid, mid;
 	int i;
 	struct wil_net_stats *stats;
 
@@ -521,6 +522,16 @@ again:
 			  (const void *)d, sizeof(*d), false);
 
 	cid = wil_rxdesc_cid(d);
+	mid = wil_rxdesc_mid(d);
+	vif = wil->vifs[mid];
+
+	if (unlikely(!vif)) {
+		wil_dbg_txrx(wil, "skipped RX descriptor with invalid mid %d",
+			     mid);
+		kfree_skb(skb);
+		goto again;
+	}
+	ndev = vif_to_ndev(vif);
 	stats = &wil->sta[cid].stats;
 
 	if (unlikely(dmalen > sz)) {
@@ -554,7 +565,6 @@ again:
 	ftype = wil_rxdesc_ftype(d) << 2;
 	if (unlikely(ftype != IEEE80211_FTYPE_DATA)) {
 		u8 fc1 = wil_rxdesc_fc1(d);
-		int mid = wil_rxdesc_mid(d);
 		int tid = wil_rxdesc_tid(d);
 		u16 seq = wil_rxdesc_seq(d);
 
@@ -566,7 +576,7 @@ again:
 			wil_dbg_txrx(wil,
 				     "BAR: MID %d CID %d TID %d Seq 0x%03x\n",
 				     mid, cid, tid, seq);
-			wil_rx_bar(wil, cid, tid, seq);
+			wil_rx_bar(wil, vif, cid, tid, seq);
 		} else {
 			/* print again all info. One can enable only this
 			 * without overhead for printing every Rx frame
@@ -622,6 +632,11 @@ again:
 /**
  * allocate and fill up to @count buffers in rx ring
  * buffers posted at @swtail
+ * Note: we have a single RX queue for servicing all VIFs, but we
+ * allocate skbs with headroom according to main interface only. This
+ * means it will not work with monitor interface together with other VIFs.
+ * Currently we only support monitor interface on its own without other VIFs,
+ * and we will need to fix this code once we add support.
  */
 static int wil_rx_refill(struct wil6210_priv *wil, int count)
 {
@@ -789,8 +804,8 @@ void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev)
 	}
 
 	if (skb) { /* deliver to local stack */
-
 		skb->protocol = eth_type_trans(skb, ndev);
+		skb->dev = ndev;
 		rc = napi_gro_receive(&wil->napi_rx, skb);
 		wil_dbg_txrx(wil, "Rx complete %d bytes => %s\n",
 			     len, gro_res_str[rc]);
@@ -1905,6 +1920,7 @@ static int wil_tx_vring(struct wil6210_priv *wil, struct wil6210_vif *vif,
 
 /**
  * Check status of tx vrings and stop/wake net queues if needed
+ * It will start/stop net queues of a specific VIF net_device.
  *
  * This function does one of two checks:
  * In case check_stop is true, will check if net queues need to be stopped. If
@@ -1920,28 +1936,32 @@ static int wil_tx_vring(struct wil6210_priv *wil, struct wil6210_vif *vif,
  * availability and modified vring has high descriptor availability.
  */
 static inline void __wil_update_net_queues(struct wil6210_priv *wil,
+					   struct wil6210_vif *vif,
 					   struct vring *vring,
 					   bool check_stop)
 {
 	int i;
 
-	if (vring)
-		wil_dbg_txrx(wil, "vring %d, check_stop=%d, stopped=%d",
-			     (int)(vring - wil->vring_tx), check_stop,
-			     wil->net_queue_stopped);
-	else
-		wil_dbg_txrx(wil, "check_stop=%d, stopped=%d",
-			     check_stop, wil->net_queue_stopped);
+	if (unlikely(!vif))
+		return;
 
-	if (check_stop == wil->net_queue_stopped)
+	if (vring)
+		wil_dbg_txrx(wil, "vring %d, mid %d, check_stop=%d, stopped=%d",
+			     (int)(vring - wil->vring_tx), vif->mid, check_stop,
+			     vif->net_queue_stopped);
+	else
+		wil_dbg_txrx(wil, "check_stop=%d, mid=%d, stopped=%d",
+			     check_stop, vif->mid, vif->net_queue_stopped);
+
+	if (check_stop == vif->net_queue_stopped)
 		/* net queues already in desired state */
 		return;
 
 	if (check_stop) {
 		if (!vring || unlikely(wil_vring_avail_low(vring))) {
 			/* not enough room in the vring */
-			netif_tx_stop_all_queues(wil->main_ndev);
-			wil->net_queue_stopped = true;
+			netif_tx_stop_all_queues(vif_to_ndev(vif));
+			vif->net_queue_stopped = true;
 			wil_dbg_txrx(wil, "netif_tx_stop called\n");
 		}
 		return;
@@ -1957,7 +1977,8 @@ static inline void __wil_update_net_queues(struct wil6210_priv *wil,
 		struct vring *cur_vring = &wil->vring_tx[i];
 		struct vring_tx_data *txdata = &wil->vring_tx_data[i];
 
-		if (!cur_vring->va || !txdata->enabled || cur_vring == vring)
+		if (txdata->mid != vif->mid || !cur_vring->va ||
+		    !txdata->enabled || cur_vring == vring)
 			continue;
 
 		if (wil_vring_avail_low(cur_vring)) {
@@ -1970,24 +1991,24 @@ static inline void __wil_update_net_queues(struct wil6210_priv *wil,
 	if (!vring || wil_vring_avail_high(vring)) {
 		/* enough room in the vring */
 		wil_dbg_txrx(wil, "calling netif_tx_wake\n");
-		netif_tx_wake_all_queues(wil->main_ndev);
-		wil->net_queue_stopped = false;
+		netif_tx_wake_all_queues(vif_to_ndev(vif));
+		vif->net_queue_stopped = false;
 	}
 }
 
-void wil_update_net_queues(struct wil6210_priv *wil, struct vring *vring,
-			   bool check_stop)
+void wil_update_net_queues(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			   struct vring *vring, bool check_stop)
 {
 	spin_lock(&wil->net_queue_lock);
-	__wil_update_net_queues(wil, vring, check_stop);
+	__wil_update_net_queues(wil, vif, vring, check_stop);
 	spin_unlock(&wil->net_queue_lock);
 }
 
-void wil_update_net_queues_bh(struct wil6210_priv *wil, struct vring *vring,
-			      bool check_stop)
+void wil_update_net_queues_bh(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			      struct vring *vring, bool check_stop)
 {
 	spin_lock_bh(&wil->net_queue_lock);
-	__wil_update_net_queues(wil, vring, check_stop);
+	__wil_update_net_queues(wil, vif, vring, check_stop);
 	spin_unlock_bh(&wil->net_queue_lock);
 }
 
@@ -2009,8 +2030,9 @@ netdev_tx_t wil_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		}
 		goto drop;
 	}
-	if (unlikely(!test_bit(wil_status_fwconnected, wil->status))) {
-		wil_dbg_ratelimited(wil, "FW not connected, packet dropped\n");
+	if (unlikely(!test_bit(wil_vif_fwconnected, vif->status))) {
+		wil_dbg_ratelimited(wil,
+				    "VIF not connected, packet dropped\n");
 		goto drop;
 	}
 	if (unlikely(vif->wdev.iftype == NL80211_IFTYPE_MONITOR)) {
@@ -2051,7 +2073,7 @@ netdev_tx_t wil_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	switch (rc) {
 	case 0:
 		/* shall we stop net queues? */
-		wil_update_net_queues_bh(wil, vring, true);
+		wil_update_net_queues_bh(wil, vif, vring, true);
 		/* statistics will be updated on the tx_complete */
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
@@ -2090,9 +2112,10 @@ static inline void wil_consume_skb(struct sk_buff *skb, bool acked)
  *
  * Safe to call from IRQ
  */
-int wil_tx_complete(struct wil6210_priv *wil, int ringid)
+int wil_tx_complete(struct wil6210_vif *vif, int ringid)
 {
-	struct net_device *ndev = wil->main_ndev;
+	struct wil6210_priv *wil = vif_to_wil(vif);
+	struct net_device *ndev = vif_to_ndev(vif);
 	struct device *dev = wil_to_dev(wil);
 	struct vring *vring = &wil->vring_tx[ringid];
 	struct vring_tx_data *txdata = &wil->vring_tx_data[ringid];
@@ -2202,7 +2225,7 @@ int wil_tx_complete(struct wil6210_priv *wil, int ringid)
 
 	/* shall we wake net queues? */
 	if (done)
-		wil_update_net_queues(wil, vring, false);
+		wil_update_net_queues(wil, vif, vring, false);
 
 	return done;
 }
diff --git a/drivers/net/wireless/ath/wil6210/txrx.h b/drivers/net/wireless/ath/wil6210/txrx.h
index fcdffaa8251b..5f07717acc2c 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.h
+++ b/drivers/net/wireless/ath/wil6210/txrx.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2012-2016 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -63,7 +64,9 @@ static inline void wil_desc_addr_set(struct vring_dma_addr *addr,
  * [dword 1]
  * bit  0.. 3 : pkt_mode:4
  * bit      4 : pkt_mode_en:1
- * bit  5..14 : reserved0:10
+ * bit      5 : mac_id_en:1
+ * bit   6..7 : mac_id:2
+ * bit  8..14 : reserved0:7
  * bit     15 : ack_policy_en:1
  * bit 16..19 : dst_index:4
  * bit     20 : dst_index_en:1
@@ -132,6 +135,14 @@ struct vring_tx_mac {
 #define MAC_CFG_DESC_TX_1_PKT_MODE_EN_LEN 1
 #define MAC_CFG_DESC_TX_1_PKT_MODE_EN_MSK 0x10
 
+#define MAC_CFG_DESC_TX_1_MAC_ID_EN_POS 5
+#define MAC_CFG_DESC_TX_1_MAC_ID_EN_LEN 1
+#define MAC_CFG_DESC_TX_1_MAC_ID_EN_MSK 0x20
+
+#define MAC_CFG_DESC_TX_1_MAC_ID_POS 6
+#define MAC_CFG_DESC_TX_1_MAC_ID_LEN 2
+#define MAC_CFG_DESC_TX_1_MAC_ID_MSK 0xc0
+
 #define MAC_CFG_DESC_TX_1_ACK_POLICY_EN_POS 15
 #define MAC_CFG_DESC_TX_1_ACK_POLICY_EN_LEN 1
 #define MAC_CFG_DESC_TX_1_ACK_POLICY_EN_MSK 0x8000
@@ -304,7 +315,7 @@ enum {
  * bit  0.. 3 : tid:4 The QoS (b3-0) TID Field
  * bit  4.. 6 : cid:3 The Source index that  was found during parsing the TA.
  *		This field is used to define the source of the packet
- * bit      7 : reserved:1
+ * bit      7 : MAC_id_valid:1, 1 if MAC virtual number is valid.
  * bit  8.. 9 : mid:2 The MAC virtual number
  * bit 10..11 : frame_type:2 : The FC (b3-2) - MPDU Type
  *		(management, data, control and extension)
@@ -395,6 +406,7 @@ struct vring_rx_mac {
 #define RX_DMA_D0_CMD_DMA_EOP	BIT(8)
 #define RX_DMA_D0_CMD_DMA_RT	BIT(9)  /* always 1 */
 #define RX_DMA_D0_CMD_DMA_IT	BIT(10) /* interrupt */
+#define RX_MAC_D0_MAC_ID_VALID	BIT(7)
 
 /* Error field */
 #define RX_DMA_ERROR_FCS	BIT(0)
@@ -451,7 +463,8 @@ static inline int wil_rxdesc_cid(struct vring_rx_desc *d)
 
 static inline int wil_rxdesc_mid(struct vring_rx_desc *d)
 {
-	return WIL_GET_BITS(d->mac.d0, 8, 9);
+	return (d->mac.d0 & RX_MAC_D0_MAC_ID_VALID) ?
+		WIL_GET_BITS(d->mac.d0, 8, 9) : 0;
 }
 
 static inline int wil_rxdesc_ftype(struct vring_rx_desc *d)
@@ -517,7 +530,8 @@ static inline struct vring_rx_desc *wil_skb_rxdesc(struct sk_buff *skb)
 
 void wil_netif_rx_any(struct sk_buff *skb, struct net_device *ndev);
 void wil_rx_reorder(struct wil6210_priv *wil, struct sk_buff *skb);
-void wil_rx_bar(struct wil6210_priv *wil, u8 cid, u8 tid, u16 seq);
+void wil_rx_bar(struct wil6210_priv *wil, struct wil6210_vif *vif,
+		u8 cid, u8 tid, u16 seq);
 struct wil_tid_ampdu_rx *wil_tid_ampdu_rx_alloc(struct wil6210_priv *wil,
 						int size, u16 ssn);
 void wil_tid_ampdu_rx_free(struct wil6210_priv *wil,
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 1da549acc1ef..f9c5155025bc 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -475,8 +475,6 @@ struct vring_tx_data {
 
 enum { /* for wil6210_priv.status */
 	wil_status_fwready = 0, /* FW operational */
-	wil_status_fwconnecting,
-	wil_status_fwconnected,
 	wil_status_dontscan,
 	wil_status_mbox_ready, /* MBOX structures ready */
 	wil_status_irqen, /* interrupts enabled - for debug */
@@ -676,11 +674,18 @@ extern struct blink_on_off_time led_blink_time[WIL_LED_TIME_LAST];
 extern u8 led_id;
 extern u8 led_polarity;
 
+enum wil6210_vif_status {
+	wil_vif_fwconnecting,
+	wil_vif_fwconnected,
+	wil_vif_status_last /* keep last */
+};
+
 struct wil6210_vif {
 	struct wireless_dev wdev;
 	struct net_device *ndev;
 	struct wil6210_priv *wil;
 	u8 mid;
+	DECLARE_BITMAP(status, wil_vif_status_last);
 	u32 privacy; /* secure connection? */
 	u16 channel; /* relevant in AP mode */
 	u8 hidden_ssid; /* relevant in AP mode */
@@ -699,6 +704,7 @@ struct wil6210_vif {
 	struct list_head probe_client_pending;
 	struct mutex probe_client_mutex; /* protect @probe_client_pending */
 	struct work_struct probe_client_worker;
+	int net_queue_stopped; /* netif_tx_stop_all_queues invoked */
 };
 
 struct wil6210_priv {
@@ -726,6 +732,7 @@ struct wil6210_priv {
 	u8 max_vifs; /* maximum number of interfaces, including main */
 	struct wil6210_vif *vifs[WIL_MAX_VIFS];
 	struct mutex vif_mutex; /* protects access to VIF entries */
+	atomic_t connected_vifs;
 	/* profile */
 	struct cfg80211_chan_def monitor_chandef;
 	u32 monitor_flags;
@@ -759,9 +766,10 @@ struct wil6210_priv {
 	 */
 	spinlock_t wmi_ev_lock;
 	spinlock_t net_queue_lock; /* guarding stop/wake netif queue */
-	int net_queue_stopped; /* netif_tx_stop_all_queues invoked */
 	struct napi_struct napi_rx;
 	struct napi_struct napi_tx;
+	struct net_device napi_ndev; /* dummy net_device serving all VIFs */
+
 	/* DMA related */
 	struct vring vring_rx;
 	unsigned int rx_buf_len;
@@ -1077,6 +1085,7 @@ int wmi_pcp_stop(struct wil6210_vif *vif);
 int wmi_led_cfg(struct wil6210_priv *wil, bool enable);
 int wmi_abort_scan(struct wil6210_vif *vif);
 void wil_abort_scan(struct wil6210_vif *vif, bool sync);
+void wil_abort_scan_all_vifs(struct wil6210_priv *wil, bool sync);
 void wil6210_bus_request(struct wil6210_priv *wil, u32 kbps);
 void wil6210_disconnect(struct wil6210_vif *vif, const u8 *bssid,
 			u16 reason_code, bool from_event);
@@ -1097,12 +1106,12 @@ int wil_bcast_init(struct wil6210_vif *vif);
 void wil_bcast_fini(struct wil6210_vif *vif);
 void wil_bcast_fini_all(struct wil6210_priv *wil);
 
-void wil_update_net_queues(struct wil6210_priv *wil, struct vring *vring,
-			   bool should_stop);
-void wil_update_net_queues_bh(struct wil6210_priv *wil, struct vring *vring,
-			      bool check_stop);
+void wil_update_net_queues(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			   struct vring *vring, bool should_stop);
+void wil_update_net_queues_bh(struct wil6210_priv *wil, struct wil6210_vif *vif,
+			      struct vring *vring, bool check_stop);
 netdev_tx_t wil_start_xmit(struct sk_buff *skb, struct net_device *ndev);
-int wil_tx_complete(struct wil6210_priv *wil, int ringid);
+int wil_tx_complete(struct wil6210_vif *vif, int ringid);
 void wil6210_unmask_irq_tx(struct wil6210_priv *wil);
 
 /* RX API */
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index befeeb267de2..a3dda9a97c1f 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -886,7 +886,7 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)
 
 	if ((wdev->iftype == NL80211_IFTYPE_STATION) ||
 	    (wdev->iftype == NL80211_IFTYPE_P2P_CLIENT)) {
-		if (!test_bit(wil_status_fwconnecting, wil->status)) {
+		if (!test_bit(wil_vif_fwconnecting, vif->status)) {
 			wil_err(wil, "Not in connecting state\n");
 			mutex_unlock(&wil->mutex);
 			return;
@@ -965,15 +965,16 @@ static void wmi_evt_connect(struct wil6210_vif *vif, int id, void *d, int len)
 
 	wil->sta[evt->cid].status = wil_sta_connected;
 	wil->sta[evt->cid].aid = evt->aid;
-	set_bit(wil_status_fwconnected, wil->status);
-	wil_update_net_queues_bh(wil, NULL, false);
+	if (!test_and_set_bit(wil_vif_fwconnected, vif->status))
+		atomic_inc(&wil->connected_vifs);
+	wil_update_net_queues_bh(wil, vif, NULL, false);
 
 out:
 	if (rc) {
 		wil->sta[evt->cid].status = wil_sta_unused;
 		wil->sta[evt->cid].mid = U8_MAX;
 	}
-	clear_bit(wil_status_fwconnecting, wil->status);
+	clear_bit(wil_vif_fwconnecting, vif->status);
 	mutex_unlock(&wil->mutex);
 }
 

From 3a3b745f1eaeb5c9f876bcc7849f6e35fbc87397 Mon Sep 17 00:00:00 2001
From: Lior David <liord@codeaurora.org>
Date: Mon, 26 Feb 2018 20:12:18 +0200
Subject: [PATCH 0445/1678] wil6210: add debugfs 'mids' file

Added a new debugfs file 'mids' to print the list of
virtual interfaces by MAC ID (MID). Allows mapping
between the internal MID used by FW and the actual
network interface used by the VIF. This is needed by
debugging tools.

Signed-off-by: Lior David <liord@codeaurora.org>
Signed-off-by: Maya Erez <merez@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wil6210/debugfs.c | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index 93a99a1c8c92..8c90b3111f0b 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -1495,6 +1495,42 @@ static const struct file_operations fops_sta = {
 	.llseek		= seq_lseek,
 };
 
+static int wil_mids_debugfs_show(struct seq_file *s, void *data)
+{
+	struct wil6210_priv *wil = s->private;
+	struct wil6210_vif *vif;
+	struct net_device *ndev;
+	int i;
+
+	mutex_lock(&wil->vif_mutex);
+	for (i = 0; i < wil->max_vifs; i++) {
+		vif = wil->vifs[i];
+
+		if (vif) {
+			ndev = vif_to_ndev(vif);
+			seq_printf(s, "[%d] %pM %s\n", i, ndev->dev_addr,
+				   ndev->name);
+		} else {
+			seq_printf(s, "[%d] unused\n", i);
+		}
+	}
+	mutex_unlock(&wil->vif_mutex);
+
+	return 0;
+}
+
+static int wil_mids_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wil_mids_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations fops_mids = {
+	.open		= wil_mids_seq_open,
+	.release	= single_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
 static ssize_t wil_read_file_led_cfg(struct file *file, char __user *user_buf,
 				     size_t count, loff_t *ppos)
 {
@@ -1749,6 +1785,7 @@ static const struct {
 	{"mbox",	0444,		&fops_mbox},
 	{"vrings",	0444,		&fops_vring},
 	{"stations", 0444,		&fops_sta},
+	{"mids",	0444,		&fops_mids},
 	{"desc",	0444,		&fops_txdesc},
 	{"bf",		0444,		&fops_bf},
 	{"mem_val",	0644,		&fops_memread},

From 71d22d58b6e507147a9008ce2fd690cf311c97f9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Feb 2018 22:13:52 +0100
Subject: [PATCH 0446/1678] bpf, x64: remove bpf_flush_icache

Unlike other archs flush_icache_range() is a noop on x64, therefore
remove the JIT's bpf_flush_icache() altogether since not needed.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/x86/net/bpf_jit_comp.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index cbf94d44f3d5..eb661fff94d7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,10 +11,10 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
+#include <linux/bpf.h>
+
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
-#include <linux/bpf.h>
 
 /*
  * assembly code in arch/x86/net/bpf_jit.S
@@ -103,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JLE 0x7E
 #define X86_JG  0x7F
 
-static void bpf_flush_icache(void *start, void *end)
-{
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	smp_wmb();
-	flush_icache_range((unsigned long)start, (unsigned long)end);
-	set_fs(old_fs);
-}
-
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
@@ -1266,7 +1256,6 @@ skip_init_addrs:
 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
 
 	if (image) {
-		bpf_flush_icache(header, image + proglen);
 		if (!prog->is_func || extra_pass) {
 			bpf_jit_binary_lock_ro(header);
 		} else {

From 67490e34ba2b1c02fb554a8059cd6ce12b47ccfb Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 27 Feb 2018 09:52:42 -0800
Subject: [PATCH 0447/1678] selftests/net: revert the zerocopy Rx path for
 PF_RDS

In preparation for optimized reception of zerocopy completion,
revert the Rx side changes introduced by Commit dfb8434b0a94
("selftests/net: add zerocopy support for PF_RDS test case")

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/msg_zerocopy.c | 67 ----------------------
 1 file changed, 67 deletions(-)

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 5cc2a53bb71c..eff9cf2ff04d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -344,26 +344,6 @@ static int do_setup_tx(int domain, int type, int protocol)
 	return fd;
 }
 
-static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
-				       uint32_t *ckbuf, size_t nbytes)
-{
-	int ncookies, i;
-
-	if (serr->ee_errno != 0)
-		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
-	ncookies = serr->ee_data;
-	if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
-		error(1, 0, "Returned %d cookies, max expected %d\n",
-		      ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
-	if (nbytes != ncookies * sizeof(uint32_t))
-		error(1, 0, "Expected %d cookies, got %ld\n",
-		      ncookies, nbytes/sizeof(uint32_t));
-	for (i = 0; i < ncookies; i++)
-		if (cfg_verbose >= 2)
-			fprintf(stderr, "%d\n", ckbuf[i]);
-	return ncookies;
-}
-
 static bool do_recv_completion(int fd)
 {
 	struct sock_extended_err *serr;
@@ -372,17 +352,10 @@ static bool do_recv_completion(int fd)
 	uint32_t hi, lo, range;
 	int ret, zerocopy;
 	char control[100];
-	uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
-	struct iovec iov;
 
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
 
-	iov.iov_base = ckbuf;
-	iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-
 	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
 	if (ret == -1 && errno == EAGAIN)
 		return false;
@@ -402,10 +375,6 @@ static bool do_recv_completion(int fd)
 
 	serr = (void *) CMSG_DATA(cm);
 
-	if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
-		completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
-		return true;
-	}
 	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
 		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
 	if (serr->ee_errno != 0)
@@ -631,40 +600,6 @@ static void do_flush_datagram(int fd, int type)
 	bytes += cfg_payload_len;
 }
 
-
-static void do_recvmsg(int fd)
-{
-	int ret, off = 0;
-	char *buf;
-	struct iovec iov;
-	struct msghdr msg;
-	struct sockaddr_storage din;
-
-	buf = calloc(cfg_payload_len, sizeof(char));
-	iov.iov_base = buf;
-	iov.iov_len = cfg_payload_len;
-
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_name = &din;
-	msg.msg_namelen = sizeof(din);
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-
-	ret = recvmsg(fd, &msg, MSG_TRUNC);
-
-	if (ret == -1)
-		error(1, errno, "recv");
-	if (ret != cfg_payload_len)
-		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
-
-	if (memcmp(buf + off, payload, ret))
-		error(1, 0, "recv: data mismatch");
-
-	free(buf);
-	packets++;
-	bytes += cfg_payload_len;
-}
-
 static void do_rx(int domain, int type, int protocol)
 {
 	uint64_t tstop;
@@ -676,8 +611,6 @@ static void do_rx(int domain, int type, int protocol)
 	do {
 		if (type == SOCK_STREAM)
 			do_flush_tcp(fd);
-		else if (domain == PF_RDS)
-			do_recvmsg(fd);
 		else
 			do_flush_datagram(fd, type);
 

From 401910db4cd425899832a093539222b6174f92a2 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 27 Feb 2018 09:52:43 -0800
Subject: [PATCH 0448/1678] rds: deliver zerocopy completion notification with
 data

This commit is an optimization over commit 01883eda72bd
("rds: support for zcopy completion notification") for PF_RDS sockets.

RDS applications are predominantly request-response transactions, so
it is more efficient to reduce the number of system calls and have
zerocopy completion notification delivered as ancillary data on the
POLLIN channel.

Cookies are passed up as ancillary data (at level SOL_RDS) in a
struct rds_zcopy_cookies when the returned value of recvmsg() is
greater than, or equal to, 0. A max of RDS_MAX_ZCOOKIES may be passed
with each message.

This commit removes support for zerocopy completion notification on
MSG_ERRQUEUE for PF_RDS sockets.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/errqueue.h |  2 --
 include/uapi/linux/rds.h      |  7 +++++++
 net/rds/af_rds.c              |  7 +++++--
 net/rds/message.c             | 38 +++++++++++++++--------------------
 net/rds/rds.h                 |  2 ++
 net/rds/recv.c                | 31 +++++++++++++++++++++++++++-
 6 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 28812eda4209..dc64cfaf13da 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,13 +20,11 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
 #define SO_EE_ORIGIN_ZEROCOPY	5
-#define SO_EE_ORIGIN_ZCOOKIE	6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED	1
-#define	SO_EE_ORIGIN_MAX_ZCOOKIES	8
 
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 12e3bca32cad..a66b213de3d7 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -104,6 +104,7 @@
 #define RDS_CMSG_MASKED_ATOMIC_CSWP	9
 #define RDS_CMSG_RXPATH_LATENCY		11
 #define	RDS_CMSG_ZCOPY_COOKIE		12
+#define	RDS_CMSG_ZCOPY_COMPLETION	13
 
 #define RDS_INFO_FIRST			10000
 #define RDS_INFO_COUNTERS		10000
@@ -317,6 +318,12 @@ struct rds_rdma_notify {
 #define RDS_RDMA_DROPPED	3
 #define RDS_RDMA_OTHER_ERROR	4
 
+#define	RDS_MAX_ZCOOKIES	8
+struct rds_zcopy_cookies {
+	__u32 num;
+	__u32 cookies[RDS_MAX_ZCOOKIES];
+};
+
 /*
  * Common set of flags for all RDMA related structs
  */
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a937f18896ae..f7126108a811 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,6 +77,7 @@ static int rds_release(struct socket *sock)
 	rds_send_drop_to(rs, NULL);
 	rds_rdma_drop_keys(rs);
 	rds_notify_queue_get(rs, NULL);
+	__skb_queue_purge(&rs->rs_zcookie_queue);
 
 	spin_lock_bh(&rds_sock_lock);
 	list_del_init(&rs->rs_item);
@@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
  *  -	to signal that a previously congested destination may have become
  *	uncongested
  *  -	A notification has been queued to the socket (this can be a congestion
- *	update, or a RDMA completion).
+ *	update, or a RDMA completion, or a MSG_ZEROCOPY completion).
  *
  * EPOLLOUT is asserted if there is room on the send queue. This does not mean
  * however, that the next sendmsg() call will succeed. If the application tries
@@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 		spin_unlock(&rs->rs_lock);
 	}
 	if (!list_empty(&rs->rs_recv_queue) ||
-	    !list_empty(&rs->rs_notify_queue))
+	    !list_empty(&rs->rs_notify_queue) ||
+	    !skb_queue_empty(&rs->rs_zcookie_queue))
 		mask |= (EPOLLIN | EPOLLRDNORM);
 	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
 		mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	INIT_LIST_HEAD(&rs->rs_recv_queue);
 	INIT_LIST_HEAD(&rs->rs_notify_queue);
 	INIT_LIST_HEAD(&rs->rs_cong_list);
+	skb_queue_head_init(&rs->rs_zcookie_queue);
 	spin_lock_init(&rs->rs_rdma_lock);
 	rs->rs_rdma_keys = RB_ROOT;
 	rs->rs_rx_traces = 0;
diff --git a/net/rds/message.c b/net/rds/message.c
index 651834513481..116cf87ccb89 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -58,32 +58,26 @@ EXPORT_SYMBOL_GPL(rds_message_addref);
 
 static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
 {
-	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
-	int ncookies;
-	u32 *ptr;
+	struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+	int ncookies = ck->num;
 
-	if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE)
+	if (ncookies == RDS_MAX_ZCOOKIES)
 		return false;
-	ncookies = serr->ee.ee_data;
-	if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES)
-		return false;
-	ptr = skb_put(skb, sizeof(u32));
-	*ptr = cookie;
-	serr->ee.ee_data = ++ncookies;
+	ck->cookies[ncookies] = cookie;
+	ck->num =  ++ncookies;
 	return true;
 }
 
 static void rds_rm_zerocopy_callback(struct rds_sock *rs,
 				     struct rds_znotifier *znotif)
 {
-	struct sock *sk = rds_rs_to_sk(rs);
 	struct sk_buff *skb, *tail;
-	struct sock_exterr_skb *serr;
 	unsigned long flags;
 	struct sk_buff_head *q;
 	u32 cookie = znotif->z_cookie;
+	struct rds_zcopy_cookies *ck;
 
-	q = &sk->sk_error_queue;
+	q = &rs->rs_zcookie_queue;
 	spin_lock_irqsave(&q->lock, flags);
 	tail = skb_peek_tail(q);
 
@@ -91,22 +85,19 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
 		spin_unlock_irqrestore(&q->lock, flags);
 		mm_unaccount_pinned_pages(&znotif->z_mmp);
 		consume_skb(rds_skb_from_znotifier(znotif));
-		sk->sk_error_report(sk);
+		/* caller invokes rds_wake_sk_sleep() */
 		return;
 	}
 
 	skb = rds_skb_from_znotifier(znotif);
-	serr = SKB_EXT_ERR(skb);
-	memset(&serr->ee, 0, sizeof(serr->ee));
-	serr->ee.ee_errno = 0;
-	serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE;
-	serr->ee.ee_info = 0;
+	ck = (struct rds_zcopy_cookies *)skb->cb;
+	memset(ck, 0, sizeof(*ck));
 	WARN_ON(!skb_zcookie_add(skb, cookie));
 
 	__skb_queue_tail(q, skb);
 
 	spin_unlock_irqrestore(&q->lock, flags);
-	sk->sk_error_report(sk);
+	/* caller invokes rds_wake_sk_sleep() */
 
 	mm_unaccount_pinned_pages(&znotif->z_mmp);
 }
@@ -129,6 +120,7 @@ static void rds_message_purge(struct rds_message *rm)
 		if (rm->data.op_mmp_znotifier) {
 			zcopy = true;
 			rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier);
+			rds_wake_sk_sleep(rs);
 			rm->data.op_mmp_znotifier = NULL;
 		}
 		sock_put(rds_rs_to_sk(rs));
@@ -362,10 +354,12 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
 		int total_copied = 0;
 		struct sk_buff *skb;
 
-		skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
-				GFP_KERNEL);
+		skb = alloc_skb(0, GFP_KERNEL);
 		if (!skb)
 			return -ENOMEM;
+		BUILD_BUG_ON(sizeof(skb->cb) <
+			     max_t(int, sizeof(struct rds_znotifier),
+				   sizeof(struct rds_zcopy_cookies)));
 		rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
 		if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
 					    length)) {
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 31cd38852050..33b16353d8f3 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -603,6 +603,8 @@ struct rds_sock {
 	/* Socket receive path trace points*/
 	u8			rs_rx_traces;
 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+	struct sk_buff_head	rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b080961464df..d50747725221 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -577,6 +577,32 @@ out:
 	return ret;
 }
 
+static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
+{
+	struct sk_buff *skb;
+	struct sk_buff_head *q = &rs->rs_zcookie_queue;
+	struct rds_zcopy_cookies *done;
+
+	if (!msg->msg_control)
+		return false;
+
+	if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) ||
+	    msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
+		return false;
+
+	skb = skb_dequeue(q);
+	if (!skb)
+		return false;
+	done = (struct rds_zcopy_cookies *)skb->cb;
+	if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
+		     done)) {
+		skb_queue_head(q, skb);
+		return false;
+	}
+	consume_skb(skb);
+	return true;
+}
+
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		int msg_flags)
 {
@@ -611,7 +637,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 		if (!rds_next_incoming(rs, &inc)) {
 			if (nonblock) {
-				ret = -EAGAIN;
+				bool reaped = rds_recvmsg_zcookie(rs, msg);
+
+				ret = reaped ?  0 : -EAGAIN;
 				break;
 			}
 
@@ -660,6 +688,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 			ret = -EFAULT;
 			goto out;
 		}
+		rds_recvmsg_zcookie(rs, msg);
 
 		rds_stats_inc(s_recv_delivered);
 

From 6f3899e602b0f4ec3d62ff385bb805e7109defa4 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 27 Feb 2018 09:52:44 -0800
Subject: [PATCH 0449/1678] selftests/net: reap zerocopy completions passed up
 as ancillary data.

PF_RDS sockets pass up cookies for zerocopy completion as ancillary
data. Update msg_zerocopy to reap this information.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/msg_zerocopy.c | 65 +++++++++++++++++++---
 1 file changed, 57 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index eff9cf2ff04d..406cc70c571d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -344,7 +344,53 @@ static int do_setup_tx(int domain, int type, int protocol)
 	return fd;
 }
 
-static bool do_recv_completion(int fd)
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+	int i;
+
+	if (ck->num > RDS_MAX_ZCOOKIES)
+		error(1, 0, "Returned %d cookies, max expected %d\n",
+		      ck->num, RDS_MAX_ZCOOKIES);
+	for (i = 0; i < ck->num; i++)
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "%d\n", ck->cookies[i]);
+	return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+	char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+	struct rds_zcopy_cookies *ck;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	bool ret = false;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+
+	if (recvmsg(fd, &msg, MSG_DONTWAIT))
+		return ret;
+
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_RDS &&
+		    cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+			ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+			completions += do_process_zerocopy_cookies(ck);
+			ret = true;
+			break;
+		}
+		error(0, 0, "ignoring cmsg at level %d type %d\n",
+			    cmsg->cmsg_level, cmsg->cmsg_type);
+	}
+	return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
 {
 	struct sock_extended_err *serr;
 	struct msghdr msg = {};
@@ -353,6 +399,9 @@ static bool do_recv_completion(int fd)
 	int ret, zerocopy;
 	char control[100];
 
+	if (domain == PF_RDS)
+		return do_recvmsg_completion(fd);
+
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
 
@@ -409,20 +458,20 @@ static bool do_recv_completion(int fd)
 }
 
 /* Read all outstanding messages on the errqueue */
-static void do_recv_completions(int fd)
+static void do_recv_completions(int fd, int domain)
 {
-	while (do_recv_completion(fd)) {}
+	while (do_recv_completion(fd, domain)) {}
 }
 
 /* Wait for all remaining completions on the errqueue */
-static void do_recv_remaining_completions(int fd)
+static void do_recv_remaining_completions(int fd, int domain)
 {
 	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
 
 	while (completions < expected_completions &&
 	       gettimeofday_ms() < tstop) {
-		if (do_poll(fd, POLLERR))
-			do_recv_completions(fd);
+		if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+			do_recv_completions(fd, domain);
 	}
 
 	if (completions < expected_completions)
@@ -503,13 +552,13 @@ static void do_tx(int domain, int type, int protocol)
 
 		while (!do_poll(fd, POLLOUT)) {
 			if (cfg_zerocopy)
-				do_recv_completions(fd);
+				do_recv_completions(fd, domain);
 		}
 
 	} while (gettimeofday_ms() < tstop);
 
 	if (cfg_zerocopy)
-		do_recv_remaining_completions(fd);
+		do_recv_remaining_completions(fd, domain);
 
 	if (close(fd))
 		error(1, errno, "close");

From 15d2ee42a3087089e73ad52fd8c1b37ab496b87c Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 26 Feb 2018 22:47:06 +0100
Subject: [PATCH 0450/1678] net: stmmac: ensure that the MSS desc is the last
 desc to set the own bit

A dma_wmb() is used to guarantee the ordering, with respect to
other writes, to cache coherent DMA memory.

There is a dma_wmb() in prepare_tx_desc()/prepare_tso_tx_desc() which
ensures that TDES0/1/2 is written before TDES3 (which contains the own
bit), for First Desc.

However, in the rare case that MSS changes, there will be a MSS
context descriptor in front of the regular DMA descriptors:

<MSS desc> <- DMA Next Descriptor
<First Desc>
<desc n>
<Last Desc>

Thus, for this special case, we need a dma_wmb()
after prepare_tso_tx_desc()/before writing the own bit to the MSS desc,
so that we flush the write to TDES3 for First Desc,
in order to ensure that the MSS descriptor is the last descriptor to
set the own bit.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c8d86d77e03d..3b5e7b06e796 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2983,8 +2983,15 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 			tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len));
 
 	/* If context desc is used to change MSS */
-	if (mss_desc)
+	if (mss_desc) {
+		/* Make sure that first descriptor has been completely
+		 * written, including its own bit. This is because MSS is
+		 * actually before first descriptor, so we need to make
+		 * sure that MSS's own bit is the last thing written.
+		 */
+		dma_wmb();
 		priv->hw->desc->set_tx_owner(mss_desc);
+	}
 
 	/* The own bit must be the latest setting done when prepare the
 	 * descriptor and then barrier is needed to make sure that

From 95eb930a40a0973f9b984d87a4986bb81f897ede Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 26 Feb 2018 22:47:07 +0100
Subject: [PATCH 0451/1678] net: stmmac: use correct barrier between coherent
 memory and MMIO

The last memory barrier in stmmac_xmit()/stmmac_tso_xmit() is placed
between a coherent memory write and a MMIO write:

The own bit is written in First Desc (TSO: MSS desc or First Desc).
<barrier>
The DMA engine is started by a write to the tx desc tail pointer/
enable dma transmission register, i.e. a MMIO write.

This barrier cannot be a simple dma_wmb(), since a dma_wmb() is only
used to guarantee the ordering, with respect to other writes,
to cache coherent DMA memory.

To guarantee that the cache coherent memory writes have completed
before we attempt to write to the cache incoherent MMIO region,
we need to use the more heavyweight barrier wmb().

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 3b5e7b06e796..6dd04f237b2a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2997,7 +2997,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * descriptor and then barrier is needed to make sure that
 	 * all is coherent before granting the DMA engine.
 	 */
-	dma_wmb();
+	wmb();
 
 	if (netif_msg_pktdata(priv)) {
 		pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
@@ -3221,7 +3221,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * descriptor and then barrier is needed to make sure that
 		 * all is coherent before granting the DMA engine.
 		 */
-		dma_wmb();
+		wmb();
 	}
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);

From a6b25da5e7ba212af5826a662e6a035a79bffabd Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 26 Feb 2018 22:47:08 +0100
Subject: [PATCH 0452/1678] net: stmmac: ensure that the device has released
 ownership before reading data

According to Documentation/memory-barriers.txt, we need to use a
dma_rmb() after reading the status/own bit, to ensure that all
descriptor fields are read after reading the own bit.

This way, we ensure that the DMA engine is done with the DMA
descriptor before we read the other descriptor fields, e.g. reading
the tx hardware timestamp (if PTP is enabled).

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6dd04f237b2a..a9856a8bf8ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1844,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		/* Make sure descriptor fields are read after reading
+		 * the own bit.
+		 */
+		dma_rmb();
+
 		/* Just consider the last segment and ...*/
 		if (likely(!(status & tx_not_ls))) {
 			/* ... verify the status error condition */

From 1e88f6e01bc094c9fc7021cc1944d14a63257703 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 26 Feb 2018 22:47:09 +0100
Subject: [PATCH 0453/1678] net: stmmac: make dwmac4_release_tx_desc() clear
 all descriptor fields

Make dwmac4_release_tx_desc() clear all descriptor fields, not just
TDES2 and TDES3.

I'm suspecting that TDES0 and TDES1 wasn't cleared because the DMA
engine uses them to store the tx hardware timestamp (if PTP is enabled).

However, stmmac_tx_clean() calls stmmac_get_tx_hwtstamp(), which reads
and saves the timestamp, before it calls release_tx_desc(), so this
is not an issue.

stmmac_xmit() and stmmac_tso_xmit() both always overwrite TDES0,
however, stmmac_tso_xmit() sometimes sets TDES1, and since neither
stmmac_xmit() nor stmmac_tso_xmit() explicitly clears TDES1, both
functions might reuse a DMA descriptor with old TDES1 data.

I haven't observed any misbehavior even though TDES1 sometimes
point to an old skb, however, explicitly clearing both TDES0 and TDES1
in dwmac4_release_tx_desc() minimizes the chances of undefined behavior.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index c728ffa095de..2a6521d33e43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 {
+	p->des0 = 0;
+	p->des1 = 0;
 	p->des2 = 0;
 	p->des3 = 0;
 }

From ecba616e041e64840d14e294b089ca355614b7fb Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 26 Feb 2018 18:10:55 -0600
Subject: [PATCH 0454/1678] ibmvnic: Fix TX descriptor tracking again

Sorry, the previous change introduced a race condition between
transmit completion processing and tracking TX descriptors. If a
completion is received before the number of descriptors is logged,
the number of descriptors will be add but not removed. After enough
times, this could halt the transmit queue forever.

Log the number of descriptors used by a transmit before sending.
I stress tested the fix on two different systems running over the
weekend without any issues.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5a86a916492c..32ee202de13a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1482,6 +1482,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if ((*hdrs >> 7) & 1) {
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
 		tx_crq.v1.n_crq_elem = num_entries;
+		tx_buff->num_entries = num_entries;
 		tx_buff->indir_arr[0] = tx_crq;
 		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
 						    sizeof(tx_buff->indir_arr),
@@ -1500,6 +1501,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 					       (u64)tx_buff->indir_dma,
 					       (u64)num_entries);
 	} else {
+		tx_buff->num_entries = num_entries;
 		lpar_rc = send_subcrq(adapter, handle_array[queue_num],
 				      &tx_crq);
 	}
@@ -1536,7 +1538,6 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		netif_stop_subqueue(netdev, queue_num);
 	}
 
-	tx_buff->num_entries = num_entries;
 	tx_packets++;
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;

From 53cc7721fdf12e649994cfb7d8f562acb0e4510b Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 26 Feb 2018 18:10:56 -0600
Subject: [PATCH 0455/1678] ibmvnic: Allocate statistics buffers during probe

Currently, buffers holding individual queue statistics are allocated
when the device is opened. If an ibmvnic interface is hotplugged or
initialized but never opened, an attempt to get statistics with
ethtool will result in a kernel panic.

Since the driver allocates a constant number, the maximum supported
queues, of buffers, these can be allocated during device probe and
freed when the device is hot-unplugged or the module is removed.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 32ee202de13a..1a0f67b60003 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -845,8 +845,6 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 	release_tx_pools(adapter);
 	release_rx_pools(adapter);
 
-	release_stats_token(adapter);
-	release_stats_buffers(adapter);
 	release_error_buffers(adapter);
 	release_napi(adapter);
 	release_login_rsp_buffer(adapter);
@@ -974,14 +972,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	if (rc)
 		return rc;
 
-	rc = init_stats_buffers(adapter);
-	if (rc)
-		return rc;
-
-	rc = init_stats_token(adapter);
-	if (rc)
-		return rc;
-
 	adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL);
 	if (!adapter->vpd)
 		return -ENOMEM;
@@ -4431,6 +4421,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		release_crq_queue(adapter);
 	}
 
+	rc = init_stats_buffers(adapter);
+	if (rc)
+		return rc;
+
+	rc = init_stats_token(adapter);
+	if (rc)
+		return rc;
+
 	return rc;
 }
 
@@ -4538,6 +4536,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	release_sub_crqs(adapter, 1);
 	release_crq_queue(adapter);
 
+	release_stats_token(adapter);
+	release_stats_buffers(adapter);
+
 	adapter->state = VNIC_REMOVED;
 
 	mutex_unlock(&adapter->reset_lock);

From 637f81d164a5347ee3325f106f815c7969c032b5 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 26 Feb 2018 18:10:57 -0600
Subject: [PATCH 0456/1678] ibmvnic: Harden TX/RX pool cleaning

If the driver releases resources after a failed reset or some other
error, the driver might attempt to clean up and free memory that
isn't there anymore. Include some additional checks that RX/TX queues
along with their associated structures are still there before cleaning.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 1a0f67b60003..b907c0a607e6 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1081,6 +1081,7 @@ static int ibmvnic_open(struct net_device *netdev)
 static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_rx_pool *rx_pool;
+	struct ibmvnic_rx_buff *rx_buff;
 	u64 rx_entries;
 	int rx_scrqs;
 	int i, j;
@@ -1094,14 +1095,15 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 	/* Free any remaining skbs in the rx buffer pools */
 	for (i = 0; i < rx_scrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
-		if (!rx_pool)
+		if (!rx_pool || !rx_pool->rx_buff)
 			continue;
 
 		netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i);
 		for (j = 0; j < rx_entries; j++) {
-			if (rx_pool->rx_buff[j].skb) {
-				dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
-				rx_pool->rx_buff[j].skb = NULL;
+			rx_buff = &rx_pool->rx_buff[j];
+			if (rx_buff && rx_buff->skb) {
+				dev_kfree_skb_any(rx_buff->skb);
+				rx_buff->skb = NULL;
 			}
 		}
 	}
@@ -1110,6 +1112,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_tx_pool *tx_pool;
+	struct ibmvnic_tx_buff *tx_buff;
 	u64 tx_entries;
 	int tx_scrqs;
 	int i, j;
@@ -1123,14 +1126,15 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 	/* Free any remaining skbs in the tx buffer pools */
 	for (i = 0; i < tx_scrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
-		if (!tx_pool)
+		if (!tx_pool && !tx_pool->tx_buff)
 			continue;
 
 		netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
 		for (j = 0; j < tx_entries; j++) {
-			if (tx_pool->tx_buff[j].skb) {
-				dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
-				tx_pool->tx_buff[j].skb = NULL;
+			tx_buff = &tx_pool->tx_buff[j];
+			if (tx_buff && tx_buff->skb) {
+				dev_kfree_skb_any(tx_buff->skb);
+				tx_buff->skb = NULL;
 			}
 		}
 	}

From 0aecb13ce3abe1372c7306c7d1b86ff748296abb Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 26 Feb 2018 18:10:58 -0600
Subject: [PATCH 0457/1678] ibmvnic: Report queue stops and restarts as debug
 output

It's not necessary to report each time a queue is stopped and restarted
as an informational message. Change that to be a debug message so that
it can be observed if needed but not printed by default.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b907c0a607e6..53b2c91fa786 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1528,7 +1528,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	if (atomic_add_return(num_entries, &tx_scrq->used)
 					>= adapter->req_tx_entries_per_subcrq) {
-		netdev_info(netdev, "Stopping queue %d\n", queue_num);
+		netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
 		netif_stop_subqueue(netdev, queue_num);
 	}
 
@@ -2541,8 +2541,8 @@ restart_loop:
 		    __netif_subqueue_stopped(adapter->netdev,
 					     scrq->pool_index)) {
 			netif_wake_subqueue(adapter->netdev, scrq->pool_index);
-			netdev_info(adapter->netdev, "Started queue %d\n",
-				    scrq->pool_index);
+			netdev_dbg(adapter->netdev, "Started queue %d\n",
+				   scrq->pool_index);
 		}
 	}
 

From 20a8ab744ff799ccedd35aba0d3139782f341bed Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 26 Feb 2018 18:10:59 -0600
Subject: [PATCH 0458/1678] ibmvnic: Do not attempt to login if RX or TX queues
 are not allocated

If a device reset fails for some reason, TX and RX queue resources
could be released. If a user attempts to open the device in this scenario,
it may result in a kernel panic as the driver tries to access this
memory. To fix this, include a check before device login that TX/RX
queues are still there before enabling the device. In addition, return a
value that can be checked in case of any errors to avoid waiting for a
completion that will never come.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 53b2c91fa786..765407179fdd 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data);
 static void send_map_query(struct ibmvnic_adapter *adapter);
 static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8);
 static void send_request_unmap(struct ibmvnic_adapter *, u8);
-static void send_login(struct ibmvnic_adapter *adapter);
+static int send_login(struct ibmvnic_adapter *adapter);
 static void send_cap_queries(struct ibmvnic_adapter *adapter);
 static int init_sub_crqs(struct ibmvnic_adapter *);
 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
@@ -809,8 +809,11 @@ static int ibmvnic_login(struct net_device *netdev)
 		}
 
 		reinit_completion(&adapter->init_done);
-		send_login(adapter);
-		if (!wait_for_completion_timeout(&adapter->init_done,
+		rc = send_login(adapter);
+		if (rc) {
+			dev_err(dev, "Unable to attempt device login\n");
+			return rc;
+		} else if (!wait_for_completion_timeout(&adapter->init_done,
 						 timeout)) {
 			dev_err(dev, "Login timeout\n");
 			return -1;
@@ -3074,7 +3077,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
 	strncpy(&vlcd->name, adapter->netdev->name, len);
 }
 
-static void send_login(struct ibmvnic_adapter *adapter)
+static int send_login(struct ibmvnic_adapter *adapter)
 {
 	struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
 	struct ibmvnic_login_buffer *login_buffer;
@@ -3090,6 +3093,12 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	struct vnic_login_client_data *vlcd;
 	int i;
 
+	if (!adapter->tx_scrq || !adapter->rx_scrq) {
+		netdev_err(adapter->netdev,
+			   "RX or TX queues are not allocated, device login failed\n");
+		return -1;
+	}
+
 	release_login_rsp_buffer(adapter);
 	client_data_len = vnic_client_data_len(adapter);
 
@@ -3187,7 +3196,7 @@ static void send_login(struct ibmvnic_adapter *adapter)
 	crq.login.len = cpu_to_be32(buffer_size);
 	ibmvnic_send_crq(adapter, &crq);
 
-	return;
+	return 0;
 
 buf_rsp_map_failed:
 	kfree(login_rsp_buffer);
@@ -3196,7 +3205,7 @@ buf_rsp_alloc_failed:
 buf_map_failed:
 	kfree(login_buffer);
 buf_alloc_failed:
-	return;
+	return -1;
 }
 
 static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr,

From 7e58a6c6627c8083a21351ef6bbc60ad0eaae1de Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:35 +0100
Subject: [PATCH 0459/1678] mlxsw: spectrum_ipip: Extract
 mlxsw_sp_l3addr_is_zero

Extract the logic for determining whether a given IPv4/IPv6 address is
all-zeroes from mlxsw_sp_ipip_tunnel_complete to a separate function.
Make that function public within the module.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c  | 16 +++++++++++-----
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h  |  6 ++++--
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index a1c4b1e63f8d..0378cccc8182 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -122,6 +122,13 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
 	return (union mlxsw_sp_l3addr) {0};
 }
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
+{
+	union mlxsw_sp_l3addr naddr = {0};
+
+	return !memcmp(&addr, &naddr, sizeof(naddr));
+}
+
 static int
 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 				  struct mlxsw_sp_ipip_entry *ipip_entry)
@@ -215,15 +222,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
 {
 	union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
 	union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
-	union mlxsw_sp_l3addr naddr = {0};
 
 	/* Tunnels with unset local or remote address are valid in Linux and
 	 * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
 	 * (NBMA) tunnels. In principle these can be offloaded, but the driver
 	 * currently doesn't support this. So punt.
 	 */
-	return memcmp(&saddr, &naddr, sizeof(naddr)) &&
-	       memcmp(&daddr, &naddr, sizeof(naddr));
+	return !mlxsw_sp_l3addr_is_zero(saddr) &&
+	       !mlxsw_sp_l3addr_is_zero(daddr);
 }
 
 static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index a4ff5737eccc..888f19000209 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -46,6 +46,8 @@ union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev);
 
+bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr);
+
 enum mlxsw_sp_ipip_type {
 	MLXSW_SP_IPIP_TYPE_GRE4,
 	MLXSW_SP_IPIP_TYPE_MAX,

From 8897207c89156a5d05dc5bf1a80dede21e768f0a Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:36 +0100
Subject: [PATCH 0460/1678] mlxsw: spectrum_ipip: Support decoding IPv6 tunnel
 addresses

To support mirroring to ip6gretap, the SPAN module needs to be able to
decode IPv6 addresses specified at that tunnel.

Extend mlxsw_sp_ipip_netdev_saddr() and mlxsw_sp_ipip_netdev_daddr() to
support IPv6 addresses. To that end, add and publish a support function
mlxsw_sp_ipip_netdev_parms6().

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_ipip.c   | 29 +++++++++++++++++--
 .../ethernet/mellanox/mlxsw/spectrum_ipip.h   |  2 ++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 0378cccc8182..98d896c14b87 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -33,6 +33,7 @@
  */
 
 #include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum_ipip.h"
 
@@ -44,6 +45,14 @@ mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev)
 	return tun->parms;
 }
 
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev)
+{
+	struct ip6_tnl *tun = netdev_priv(ol_dev);
+
+	return tun->parms;
+}
+
 static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms)
 {
 	return !!(parms.i_flags & TUNNEL_KEY);
@@ -72,24 +81,38 @@ mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms)
 	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr };
 }
 
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms)
+{
+	return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr };
+}
+
 static union mlxsw_sp_l3addr
 mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms)
 {
 	return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr };
 }
 
+static union mlxsw_sp_l3addr
+mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms)
+{
+	return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr };
+}
+
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev)
 {
 	struct ip_tunnel_parm parms4;
+	struct __ip6_tnl_parm parms6;
 
 	switch (proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
 		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 		return mlxsw_sp_ipip_parms4_saddr(parms4);
 	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
+		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+		return mlxsw_sp_ipip_parms6_saddr(parms6);
 	}
 
 	WARN_ON(1);
@@ -109,13 +132,15 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
 			   const struct net_device *ol_dev)
 {
 	struct ip_tunnel_parm parms4;
+	struct __ip6_tnl_parm parms6;
 
 	switch (proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
 		parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
 		return mlxsw_sp_ipip_parms4_daddr(parms4);
 	case MLXSW_SP_L3_PROTO_IPV6:
-		break;
+		parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev);
+		return mlxsw_sp_ipip_parms6_daddr(parms6);
 	}
 
 	WARN_ON(1);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 888f19000209..6909d867bb59 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -41,6 +41,8 @@
 
 struct ip_tunnel_parm
 mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev);
+struct __ip6_tnl_parm
+mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev);
 
 union mlxsw_sp_l3addr
 mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,

From d1b2a6c4bed99fc7e8a11e7abcff19293d1974f5 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:37 +0100
Subject: [PATCH 0461/1678] net: GRE: Add is_gretap_dev, is_ip6gretap_dev

Determining whether a device is a GRE device is easily done by
inspecting struct net_device.type. However, for the tap variants, the
type is just ARPHRD_ETHER.

Therefore introduce two predicate functions that use netdev_ops to tell
the tap devices.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gre.h  | 3 +++
 net/ipv4/ip_gre.c  | 6 ++++++
 net/ipv6/ip6_gre.c | 6 ++++++
 3 files changed, 15 insertions(+)

diff --git a/include/net/gre.h b/include/net/gre.h
index f90585decbce..797142eee9cd 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		     bool *csum_err, __be16 proto, int nhs);
 
+bool is_gretap_dev(const struct net_device *dev);
+bool is_ip6gretap_dev(const struct net_device *dev);
+
 static inline int gre_calc_hlen(__be16 o_flags)
 {
 	int addend = 4;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e496afa47709..0fe1d69b5df4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1323,6 +1323,12 @@ static void ipgre_tap_setup(struct net_device *dev)
 	ip_tunnel_setup(dev, gre_tap_net_id);
 }
 
+bool is_gretap_dev(const struct net_device *dev)
+{
+	return dev->netdev_ops == &gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_gretap_dev);
+
 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3026662a6413..4f150a394387 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1785,6 +1785,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
 	netif_keep_dst(dev);
 }
 
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+	return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
 				       struct ip_tunnel_encap *ipencap)
 {

From b0066da52ea53bae2b4ceed3f47d488df27dab66 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:38 +0100
Subject: [PATCH 0462/1678] ip_tunnel: Rename & publish init_tunnel_flow

Initializing struct flowi4 is useful for drivers that need to emulate
routing decisions made by a tunnel interface. Publish the
function (appropriately renamed) so that the drivers in question don't
need to cut'n'paste it around.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h | 16 ++++++++++++++++
 net/ipv4/ip_tunnel.c     | 40 ++++++++++++----------------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1f16773cfd76..cbe5addb9293 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 
 #ifdef CONFIG_INET
 
+static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
+				       int proto,
+				       __be32 daddr, __be32 saddr,
+				       __be32 key, __u8 tos, int oif,
+				       __u32 mark)
+{
+	memset(fl4, 0, sizeof(*fl4));
+	fl4->flowi4_oif = oif;
+	fl4->daddr = daddr;
+	fl4->saddr = saddr;
+	fl4->flowi4_tos = tos;
+	fl4->flowi4_proto = proto;
+	fl4->fl4_gre_key = key;
+	fl4->flowi4_mark = mark;
+}
+
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d786a8441bce..b2117d89bc83 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -290,22 +290,6 @@ failed:
 	return ERR_PTR(err);
 }
 
-static inline void init_tunnel_flow(struct flowi4 *fl4,
-				    int proto,
-				    __be32 daddr, __be32 saddr,
-				    __be32 key, __u8 tos, int oif,
-				    __u32 mark)
-{
-	memset(fl4, 0, sizeof(*fl4));
-	fl4->flowi4_oif = oif;
-	fl4->daddr = daddr;
-	fl4->saddr = saddr;
-	fl4->flowi4_tos = tos;
-	fl4->flowi4_proto = proto;
-	fl4->fl4_gre_key = key;
-	fl4->flowi4_mark = mark;
-}
-
 static int ip_tunnel_bind_dev(struct net_device *dev)
 {
 	struct net_device *tdev = NULL;
@@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 		struct flowi4 fl4;
 		struct rtable *rt;
 
-		init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
-				 iph->saddr, tunnel->parms.o_key,
-				 RT_TOS(iph->tos), tunnel->parms.link,
-				 tunnel->fwmark);
+		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
+				    iph->saddr, tunnel->parms.o_key,
+				    RT_TOS(iph->tos), tunnel->parms.link,
+				    tunnel->fwmark);
 		rt = ip_route_output_key(tunnel->net, &fl4);
 
 		if (!IS_ERR(rt)) {
@@ -581,8 +565,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
 	}
-	init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
-			 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
+	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+			    RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 		goto tx_error;
 	rt = ip_route_output_key(tunnel->net, &fl4);
@@ -711,14 +695,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	if (tunnel->fwmark) {
-		init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-				 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-				 tunnel->fwmark);
+		ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+				    tunnel->parms.o_key, RT_TOS(tos),
+				    tunnel->parms.link, tunnel->fwmark);
 	}
 	else {
-		init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
-				 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-				 skb->mark);
+		ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
+				    tunnel->parms.o_key, RT_TOS(tos),
+				    tunnel->parms.link, skb->mark);
 	}
 
 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)

From 0d6cd3fcbc0222234c7f91c7f3e57d34fc009714 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:39 +0100
Subject: [PATCH 0463/1678] mlxsw: reg: Add SPAN encapsulation to MPAT register

MPAT Register is used to query and configure the Switch Port Analyzer
Table. To configure Port Analyzer to encapsulate mirrored packets,
additional fields need to be specified for the MPAT register.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 141 +++++++++++++++++++++-
 1 file changed, 139 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0e08be41c8e0..2aaccbac3ed1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1,11 +1,11 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/reg.h
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
- * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -6772,6 +6772,101 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1);
  */
 MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1);
 
+enum mlxsw_reg_mpat_span_type {
+	/* Local SPAN Ethernet.
+	 * The original packet is not encapsulated.
+	 */
+	MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0,
+
+	/* Encapsulated Remote SPAN Ethernet L3 GRE.
+	 * The packet is encapsulated with GRE header.
+	 */
+	MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3,
+};
+
+/* reg_mpat_span_type
+ * SPAN type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4);
+
+/* Remote SPAN - Ethernet VLAN
+ * - - - - - - - - - - - - - -
+ */
+
+/* reg_mpat_eth_rspan_vid
+ * Encapsulation header VLAN ID.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12);
+
+/* Encapsulated Remote SPAN - Ethernet L2
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_version {
+	MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15,
+};
+
+/* reg_mpat_eth_rspan_version
+ * RSPAN mirror header version.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4);
+
+/* reg_mpat_eth_rspan_mac
+ * Destination MAC address.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6);
+
+/* reg_mpat_eth_rspan_tp
+ * Tag Packet. Indicates whether the mirroring header should be VLAN tagged.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1);
+
+/* Encapsulated Remote SPAN - Ethernet L3
+ * - - - - - - - - - - - - - - - - - - -
+ */
+
+enum mlxsw_reg_mpat_eth_rspan_protocol {
+	MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4,
+	MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6,
+};
+
+/* reg_mpat_eth_rspan_protocol
+ * SPAN encapsulation protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4);
+
+/* reg_mpat_eth_rspan_ttl
+ * Encapsulation header Time-to-Live/HopLimit.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8);
+
+/* reg_mpat_eth_rspan_smac
+ * Source MAC address
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6);
+
+/* reg_mpat_eth_rspan_dip*
+ * Destination IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16);
+
+/* reg_mpat_eth_rspan_sip*
+ * Source IP address. The IP version is configured by protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
+MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
+
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
 				       u16 system_port, bool e)
 {
@@ -6783,6 +6878,48 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
 	mlxsw_reg_mpat_be_set(payload, 1);
 }
 
+static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
+{
+	mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload,
+				 enum mlxsw_reg_mpat_eth_rspan_version version,
+				 const char *mac,
+				 bool tp)
+{
+	mlxsw_reg_mpat_eth_rspan_version_set(payload, version);
+	mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac);
+	mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl,
+				      const char *smac,
+				      u32 sip, u32 dip)
+{
+	mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+	mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+	mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+				    MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4);
+	mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip);
+	mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip);
+}
+
+static inline void
+mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl,
+				      const char *smac,
+				      struct in6_addr sip, struct in6_addr dip)
+{
+	mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl);
+	mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac);
+	mlxsw_reg_mpat_eth_rspan_protocol_set(payload,
+				    MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6);
+	mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip);
+	mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip);
+}
+
 /* MPAR - Monitoring Port Analyzer Register
  * ----------------------------------------
  * MPAR register is used to query and configure the port analyzer port mirroring

From 1da93eb466dc6f8a92e0316fd983ee8a3338db4c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:40 +0100
Subject: [PATCH 0464/1678] mlxsw: reg: Extend mlxsw_reg_mpat_pack()

To support encapsulated SPAN, extend mlxsw_reg_mpat_pack() with a field
to set the SPAN type.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h           | 4 +++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 2aaccbac3ed1..cb5f77f09f8e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -6868,7 +6868,8 @@ MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32);
 MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16);
 
 static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
-				       u16 system_port, bool e)
+				       u16 system_port, bool e,
+				       enum mlxsw_reg_mpat_span_type span_type)
 {
 	MLXSW_REG_ZERO(mpat, payload);
 	mlxsw_reg_mpat_pa_id_set(payload, pa_id);
@@ -6876,6 +6877,7 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
 	mlxsw_reg_mpat_e_set(payload, e);
 	mlxsw_reg_mpat_qos_set(payload, 1);
 	mlxsw_reg_mpat_be_set(payload, 1);
+	mlxsw_reg_mpat_span_type_set(payload, span_type);
 }
 
 static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index c3bec37d71ed..1433d4890b88 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -93,7 +93,8 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 		return NULL;
 
 	/* create a new port analayzer entry for local_port */
-	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true);
+	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 	if (err)
 		return NULL;
@@ -111,7 +112,8 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 	char mpat_pl[MLXSW_REG_MPAT_LEN];
 	int pa_id = span_entry->id;
 
-	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false);
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false,
+			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 }
 

From 98977089d8eef337608272699f4197c631d447b8 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:41 +0100
Subject: [PATCH 0465/1678] mlxsw: span: Remove span_entry by span_id

Instead of removing span_entry by the port number, allow removing by
SPAN id. That simplifies some code right here, and for mirroring to soft
netdevices, avoids problems with netdevice pointer invalidation and
reuse.

Rename mlxsw_sp_span_entry_find() to mlxsw_sp_span_entry_find_by_port()
and keep it--follow-up patches will make use of it.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/core_acl_flex_actions.c    |  6 ++--
 .../mellanox/mlxsw/core_acl_flex_actions.h    |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum.c    |  5 ++--
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  2 +-
 .../mlxsw/spectrum_acl_flex_actions.c         | 27 ++++-------------
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 29 ++++++++++++++-----
 .../ethernet/mellanox/mlxsw/spectrum_span.h   |  7 +++--
 7 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index b698fb481b2e..d1c2d85f396d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -838,7 +838,6 @@ struct mlxsw_afa_mirror {
 	struct mlxsw_afa_resource resource;
 	int span_id;
 	u8 local_in_port;
-	u8 local_out_port;
 	bool ingress;
 };
 
@@ -848,7 +847,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block,
 {
 	block->afa->ops->mirror_del(block->afa->ops_priv,
 				    mirror->local_in_port,
-				    mirror->local_out_port,
+				    mirror->span_id,
 				    mirror->ingress);
 	kfree(mirror);
 }
@@ -882,7 +881,6 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
 		goto err_mirror_add;
 
 	mirror->ingress = ingress;
-	mirror->local_out_port = local_out_port;
 	mirror->local_in_port = local_in_port;
 	mirror->resource.destructor = mlxsw_afa_mirror_destructor;
 	mlxsw_afa_resource_add(block, &mirror->resource);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 43132293475c..4e991ca6dce5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -50,7 +50,7 @@ struct mlxsw_afa_ops {
 	void (*counter_index_put)(void *priv, unsigned int counter_index);
 	int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
 			  bool ingress, int *p_span_id);
-	void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port,
+	void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
 			   bool ingress);
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bfde93910f82..10bb4891ec31 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1273,11 +1273,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 	to_port = netdev_priv(to_dev);
 
-	mirror->to_local_port = to_port->local_port;
 	mirror->ingress = ingress;
 	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
 	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
-					true);
+					true, &mirror->span_id);
 }
 
 static void
@@ -1288,7 +1287,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	span_type = mirror->ingress ?
 			MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port,
+	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
 				 span_type, true);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 675e03a892ed..2673310f92da 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -124,7 +124,7 @@ enum mlxsw_sp_port_mall_action_type {
 };
 
 struct mlxsw_sp_port_mall_mirror_tc_entry {
-	u8 to_local_port;
+	int span_id;
 	bool ingress;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index f7e61cecc42b..b450d3e4a905 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
  *
@@ -130,36 +130,19 @@ mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
 			bool ingress, int *p_span_id)
 {
 	struct mlxsw_sp_port *in_port, *out_port;
-	struct mlxsw_sp_span_entry *span_entry;
 	struct mlxsw_sp *mlxsw_sp = priv;
 	enum mlxsw_sp_span_type type;
-	int err;
 
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
 	out_port = mlxsw_sp->ports[local_out_port];
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false);
-	if (err)
-		return err;
-
-	span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port);
-	if (!span_entry) {
-		err = -ENOENT;
-		goto err_span_entry_find;
-	}
-
-	*p_span_id = span_entry->id;
-	return 0;
-
-err_span_entry_find:
-	mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
-	return err;
+	return mlxsw_sp_span_mirror_add(in_port, out_port, type,
+					false, p_span_id);
 }
 
 static void
-mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
-			bool ingress)
+mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *in_port;
@@ -168,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port,
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false);
+	mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
 }
 
 static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 1433d4890b88..9e596b064582 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -118,7 +118,7 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 }
 
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
 	int i;
 
@@ -131,13 +131,27 @@ mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	return NULL;
 }
 
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+	int i;
+
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		if (curr->ref_count && curr->id == span_id)
+			return curr;
+	}
+	return NULL;
+}
+
 static struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
 {
 	struct mlxsw_sp_span_entry *span_entry;
 
-	span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
-					      port->local_port);
+	span_entry = mlxsw_sp_span_entry_find_by_port(port->mlxsw_sp,
+						      port->local_port);
 	if (span_entry) {
 		/* Already exists, just take a reference */
 		span_entry->ref_count++;
@@ -316,7 +330,8 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type, bool bind)
+			     enum mlxsw_sp_span_type type, bool bind,
+			     int *p_span_id)
 {
 	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
 	struct mlxsw_sp_span_entry *span_entry;
@@ -333,6 +348,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 	if (err)
 		goto err_port_bind;
 
+	*p_span_id = span_entry->id;
 	return 0;
 
 err_port_bind:
@@ -340,13 +356,12 @@ err_port_bind:
 	return err;
 }
 
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
 			      enum mlxsw_sp_span_type type, bool bind)
 {
 	struct mlxsw_sp_span_entry *span_entry;
 
-	span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
-					      destination_port);
+	span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
 	if (!span_entry) {
 		netdev_err(from->dev, "no span entry found\n");
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 069050e385ff..02dedf2fcd3f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -62,11 +62,12 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 			     struct mlxsw_sp_port *to,
-			     enum mlxsw_sp_span_type type, bool bind);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port,
+			     enum mlxsw_sp_span_type type,
+			     bool bind, int *p_span_id);
+void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
 			      enum mlxsw_sp_span_type type, bool bind);
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, u8 local_port);
 
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
 

From 3546b03ffcdb0a70b0d7302d7e139f096a613e9f Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:42 +0100
Subject: [PATCH 0466/1678] mlxsw: spectrum_span: Initialize span_entry.id
 eagerly

It is known statically ahead of time which SPAN entry will have which
ID. Just initialize it eagerly in mlxsw_sp_span_init(), don't wait until
the entry is actually created. This simplifies some code in
mlxsw_sp_span_entry_create()

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_span.c    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 9e596b064582..5c87e6dfc5a1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -51,8 +51,12 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 	if (!mlxsw_sp->span.entries)
 		return -ENOMEM;
 
-	for (i = 0; i < mlxsw_sp->span.entries_count; i++)
-		INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list);
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+
+		INIT_LIST_HEAD(&curr->bound_ports_list);
+		curr->id = i;
+	}
 
 	return 0;
 }
@@ -72,34 +76,30 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 static struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 {
+	struct mlxsw_sp_span_entry *span_entry = NULL;
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_entry *span_entry;
 	char mpat_pl[MLXSW_REG_MPAT_LEN];
 	u8 local_port = port->local_port;
-	int index;
 	int i;
 	int err;
 
 	/* find a free entry to use */
-	index = -1;
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
 		if (!mlxsw_sp->span.entries[i].ref_count) {
-			index = i;
 			span_entry = &mlxsw_sp->span.entries[i];
 			break;
 		}
 	}
-	if (index < 0)
+	if (!span_entry)
 		return NULL;
 
 	/* create a new port analayzer entry for local_port */
-	mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true,
+	mlxsw_reg_mpat_pack(mpat_pl, span_entry->id, local_port, true,
 			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 	if (err)
 		return NULL;
 
-	span_entry->id = index;
 	span_entry->ref_count = 1;
 	span_entry->local_port = local_port;
 	return span_entry;

From 7b2ef81fd2bd4d01fc4890fb22c4dcfe76dd8c77 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:43 +0100
Subject: [PATCH 0467/1678] mlxsw: spectrum_span: Extract
 mlxsw_sp_span_entry_{de, }configure()

Configuring the hardware for encapsulated SPAN involves more code than
the simple mirroring case. Extract the related code to a separate
function to separate it from the rest of SPAN entry creation. Extract
deconfigure as well for symmetry, even though disablement is the same
regardless of SPAN type.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 43 +++++++++++++------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 5c87e6dfc5a1..442771908600 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -73,15 +73,40 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 	kfree(mlxsw_sp->span.entries);
 }
 
+static int
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_span_entry *span_entry,
+			      u8 local_port)
+{
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_span_entry *span_entry)
+{
+	u8 local_port = span_entry->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false,
+			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
 static struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 {
 	struct mlxsw_sp_span_entry *span_entry = NULL;
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
 	u8 local_port = port->local_port;
 	int i;
-	int err;
 
 	/* find a free entry to use */
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
@@ -93,11 +118,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 	if (!span_entry)
 		return NULL;
 
-	/* create a new port analayzer entry for local_port */
-	mlxsw_reg_mpat_pack(mpat_pl, span_entry->id, local_port, true,
-			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
-	if (err)
+	if (mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, local_port))
 		return NULL;
 
 	span_entry->ref_count = 1;
@@ -108,13 +129,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_span_entry *span_entry)
 {
-	u8 local_port = span_entry->local_port;
-	char mpat_pl[MLXSW_REG_MPAT_LEN];
-	int pa_id = span_entry->id;
-
-	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false,
-			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+	mlxsw_sp_span_entry_deconfigure(mlxsw_sp, span_entry);
 }
 
 struct mlxsw_sp_span_entry *

From 079c9f393b8d467995516c4716557373edefaa89 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:44 +0100
Subject: [PATCH 0468/1678] mlxsw: spectrum: Keep mirror netdev in
 mlxsw_sp_span_entry

Currently the only mirror action supported by mlxsw is mirror to another
mlxsw physical port. Correspondingly, span_entry, which tracks each
mlxsw mirror in the system, currently holds a u8 number of the
destination port.

To extend this system to mirror to gretap and ip6gretap netdevices, have
struct mlxsw_sp_span_entry actually hold the destination netdevice
itself.

This change then trickles down in obvious manner to SPAN module API and
mirror-related interfaces in struct mlxsw_afa_ops.

To prevent use of invalid pointer, NETDEV_UNREGISTER needs to be hooked
and the corresponding SPAN entry invalidated.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/core_acl_flex_actions.c    | 13 +++----
 .../mellanox/mlxsw/core_acl_flex_actions.h    |  7 +++-
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 11 ++++--
 .../ethernet/mellanox/mlxsw/spectrum_acl.c    |  2 +-
 .../mlxsw/spectrum_acl_flex_actions.c         |  8 ++--
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 39 ++++++++++++-------
 .../ethernet/mellanox/mlxsw/spectrum_span.h   | 10 +++--
 7 files changed, 56 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index d1c2d85f396d..ba338428ffd1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -863,9 +863,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block,
 }
 
 static struct mlxsw_afa_mirror *
-mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
-			u8 local_in_port, u8 local_out_port,
-			bool ingress)
+mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port,
+			const struct net_device *out_dev, bool ingress)
 {
 	struct mlxsw_afa_mirror *mirror;
 	int err;
@@ -875,7 +874,7 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block,
 		return ERR_PTR(-ENOMEM);
 
 	err = block->afa->ops->mirror_add(block->afa->ops_priv,
-					  local_in_port, local_out_port,
+					  local_in_port, out_dev,
 					  ingress, &mirror->span_id);
 	if (err)
 		goto err_mirror_add;
@@ -907,13 +906,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block,
 }
 
 int
-mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-			      u8 local_in_port, u8 local_out_port, bool ingress)
+mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port,
+			      const struct net_device *out_dev, bool ingress)
 {
 	struct mlxsw_afa_mirror *mirror;
 	int err;
 
-	mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port,
+	mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev,
 					 ingress);
 	if (IS_ERR(mirror))
 		return PTR_ERR(mirror);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index 4e991ca6dce5..6dd601703c99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -36,6 +36,7 @@
 #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H
 
 #include <linux/types.h>
+#include <linux/netdevice.h>
 
 struct mlxsw_afa;
 struct mlxsw_afa_block;
@@ -48,7 +49,8 @@ struct mlxsw_afa_ops {
 	void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index);
 	int (*counter_index_get)(void *priv, unsigned int *p_counter_index);
 	void (*counter_index_put)(void *priv, unsigned int counter_index);
-	int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port,
+	int (*mirror_add)(void *priv, u8 local_in_port,
+			  const struct net_device *out_dev,
 			  bool ingress, int *p_span_id);
 	void (*mirror_del)(void *priv, u8 local_in_port, int span_id,
 			   bool ingress);
@@ -70,7 +72,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
 int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
 					    u16 trap_id);
 int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block,
-				  u8 local_in_port, u8 local_out_port,
+				  u8 local_in_port,
+				  const struct net_device *out_dev,
 				  bool ingress);
 int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
 			       u8 local_port, bool in_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 10bb4891ec31..86991db3478c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1258,7 +1258,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 				      bool ingress)
 {
 	enum mlxsw_sp_span_type span_type;
-	struct mlxsw_sp_port *to_port;
 	struct net_device *to_dev;
 
 	to_dev = tcf_mirred_dev(a);
@@ -1271,11 +1270,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 		netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
 		return -EOPNOTSUPP;
 	}
-	to_port = netdev_priv(to_dev);
 
 	mirror->ingress = ingress;
 	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type,
+	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
 					true, &mirror->span_id);
 }
 
@@ -4638,10 +4636,17 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct mlxsw_sp_span_entry *span_entry;
 	struct mlxsw_sp *mlxsw_sp;
 	int err = 0;
 
 	mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+	if (event == NETDEV_UNREGISTER) {
+		span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev);
+		if (span_entry)
+			mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
+	}
+
 	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
 						       event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 0897a5435cc2..50f5eacdfa24 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -590,7 +590,7 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 
 	return mlxsw_afa_block_append_mirror(rulei->act_block,
 					     in_port->local_port,
-					     out_port->local_port,
+					     out_dev,
 					     binding->ingress);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index b450d3e4a905..510ce48d87f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -126,18 +126,18 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index)
 }
 
 static int
-mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port,
+mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
+			const struct net_device *out_dev,
 			bool ingress, int *p_span_id)
 {
-	struct mlxsw_sp_port *in_port, *out_port;
+	struct mlxsw_sp_port *in_port;
 	struct mlxsw_sp *mlxsw_sp = priv;
 	enum mlxsw_sp_span_type type;
 
 	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	out_port = mlxsw_sp->ports[local_out_port];
 	in_port = mlxsw_sp->ports[local_in_port];
 
-	return mlxsw_sp_span_mirror_add(in_port, out_port, type,
+	return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
 					false, p_span_id);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 442771908600..9819cdcf9e5c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -91,7 +91,8 @@ static void
 mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_span_entry *span_entry)
 {
-	u8 local_port = span_entry->local_port;
+	struct mlxsw_sp_port *to_port = netdev_priv(span_entry->to_dev);
+	u8 local_port = to_port->local_port;
 	char mpat_pl[MLXSW_REG_MPAT_LEN];
 	int pa_id = span_entry->id;
 
@@ -101,11 +102,12 @@ mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp *mlxsw_sp,
 }
 
 static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
+mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
+			   const struct net_device *to_dev)
 {
+	struct mlxsw_sp_port *to_port = netdev_priv(to_dev);
 	struct mlxsw_sp_span_entry *span_entry = NULL;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	u8 local_port = port->local_port;
+	u8 local_port = to_port->local_port;
 	int i;
 
 	/* find a free entry to use */
@@ -122,30 +124,39 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 		return NULL;
 
 	span_entry->ref_count = 1;
-	span_entry->local_port = local_port;
+	span_entry->to_dev = to_dev;
 	return span_entry;
 }
 
 static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_span_entry *span_entry)
 {
-	mlxsw_sp_span_entry_deconfigure(mlxsw_sp, span_entry);
+	if (span_entry->to_dev)
+		mlxsw_sp_span_entry_deconfigure(mlxsw_sp, span_entry);
 }
 
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *to_dev)
 {
 	int i;
 
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
 
-		if (curr->ref_count && curr->local_port == local_port)
+		if (curr->ref_count && curr->to_dev == to_dev)
 			return curr;
 	}
 	return NULL;
 }
 
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure(mlxsw_sp, span_entry);
+	span_entry->to_dev = NULL;
+}
+
 static struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
 {
@@ -161,19 +172,19 @@ mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
 }
 
 static struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
+			const struct net_device *to_dev)
 {
 	struct mlxsw_sp_span_entry *span_entry;
 
-	span_entry = mlxsw_sp_span_entry_find_by_port(port->mlxsw_sp,
-						      port->local_port);
+	span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
 	if (span_entry) {
 		/* Already exists, just take a reference */
 		span_entry->ref_count++;
 		return span_entry;
 	}
 
-	return mlxsw_sp_span_entry_create(port);
+	return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev);
 }
 
 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
@@ -344,7 +355,7 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
 }
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
+			     const struct net_device *to_dev,
 			     enum mlxsw_sp_span_type type, bool bind,
 			     int *p_span_id)
 {
@@ -352,7 +363,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 	struct mlxsw_sp_span_entry *span_entry;
 	int err;
 
-	span_entry = mlxsw_sp_span_entry_get(to);
+	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev);
 	if (!span_entry)
 		return -ENOENT;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 02dedf2fcd3f..44b307c59d0e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -51,7 +51,7 @@ struct mlxsw_sp_span_inspected_port {
 };
 
 struct mlxsw_sp_span_entry {
-	u8 local_port;
+	const struct net_device *to_dev;
 	struct list_head bound_ports_list;
 	int ref_count;
 	int id;
@@ -61,13 +61,17 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     struct mlxsw_sp_port *to,
+			     const struct net_device *to_dev,
 			     enum mlxsw_sp_span_type type,
 			     bool bind, int *p_span_id);
 void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
 			      enum mlxsw_sp_span_type type, bool bind);
 struct mlxsw_sp_span_entry *
-mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, u8 local_port);
+mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
+				 const struct net_device *to_dev);
+
+void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_span_entry *span_entry);
 
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
 

From 169b5d95c15e01e08e1665bd8ceaff9bf8331c33 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:45 +0100
Subject: [PATCH 0469/1678] mlxsw: spectrum_span: Generalize SPAN support

To support mirroring to different device types, the functions that
partake in configuring the port analyzer need to be extended to admit
non-trivial SPAN types.

Create a structure where all details of SPAN configuration are kept,
struct mlxsw_sp_span_parms. Also create struct mlxsw_sp_span_entry_ops
to keep per-SPAN-type operations.

Instantiate the latter once for MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH, and
once for a suite of NOP callbacks used for invalidated SPAN entry. Put
the formet as a sole member of a new array mlxsw_sp_span_entry_types,
where all known SPAN types are kept. Introduce a new function,
mlxsw_sp_span_entry_ops(), to look up the right ops suite given a
netdevice.

Change mlxsw_sp_span_mirror_add() to use both parms and ops structures.
Change mlxsw_sp_span_entry_get() and mlxsw_sp_span_entry_create() to
take these as arguments. Modify mlxsw_sp_span_entry_configure() and
mlxsw_sp_span_entry_deconfigure() to dispatch to ops.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    |   5 -
 .../ethernet/mellanox/mlxsw/spectrum_acl.c    |   3 -
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 155 +++++++++++++++---
 .../ethernet/mellanox/mlxsw/spectrum_span.h   |  17 ++
 4 files changed, 147 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 86991db3478c..b070cad8dba7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1266,11 +1266,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
 		return -EINVAL;
 	}
 
-	if (!mlxsw_sp_port_dev_check(to_dev)) {
-		netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port");
-		return -EOPNOTSUPP;
-	}
-
 	mirror->ingress = ingress;
 	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
 	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 50f5eacdfa24..9d7197b9abb1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -581,9 +581,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 	binding = list_first_entry(&block->binding_list,
 				   struct mlxsw_sp_acl_block_binding, list);
 	in_port = binding->mlxsw_sp_port;
-	if (!mlxsw_sp_port_dev_check(out_dev))
-		return -EINVAL;
-
 	out_port = netdev_priv(out_dev);
 	if (out_port->mlxsw_sp != mlxsw_sp)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 9819cdcf9e5c..c0e0e9af2da7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -74,40 +74,120 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 }
 
 static int
-mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_span_entry *span_entry,
-			      u8 local_port)
+mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev,
+			       struct mlxsw_sp_span_parms *sparmsp)
 {
+	sparmsp->dest_port = netdev_priv(to_dev);
+	return 0;
+}
+
+static int
+mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
+				   struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
 	char mpat_pl[MLXSW_REG_MPAT_LEN];
 	int pa_id = span_entry->id;
 
 	/* Create a new port analayzer entry for local_port. */
 	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
 			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 }
 
 static void
-mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_span_entry *span_entry)
+mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
+				       enum mlxsw_reg_mpat_span_type span_type)
 {
-	struct mlxsw_sp_port *to_port = netdev_priv(span_entry->to_dev);
-	u8 local_port = to_port->local_port;
+	struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
 	char mpat_pl[MLXSW_REG_MPAT_LEN];
 	int pa_id = span_entry->id;
 
-	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false,
-			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 }
 
+static void
+mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
+	.can_handle = mlxsw_sp_port_dev_check,
+	.parms = mlxsw_sp_span_entry_phys_parms,
+	.configure = mlxsw_sp_span_entry_phys_configure,
+	.deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
+};
+
+static const
+struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
+	&mlxsw_sp_span_entry_ops_phys,
+};
+
+static int
+mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
+			      struct mlxsw_sp_span_parms *sparmsp)
+{
+	sparmsp->dest_port = NULL;
+	return 0;
+}
+
+static int
+mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry,
+				  struct mlxsw_sp_span_parms sparms)
+{
+	return 0;
+}
+
+static void
+mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
+	.parms = mlxsw_sp_span_entry_nop_parms,
+	.configure = mlxsw_sp_span_entry_nop_configure,
+	.deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
+};
+
+static void
+mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_span_entry *span_entry,
+			      struct mlxsw_sp_span_parms sparms)
+{
+	if (sparms.dest_port) {
+		if (span_entry->ops->configure(span_entry, sparms)) {
+			netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+				   sparms.dest_port->dev->name);
+			sparms.dest_port = NULL;
+		}
+	}
+
+	span_entry->parms = sparms;
+}
+
+static void
+mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	if (span_entry->parms.dest_port)
+		span_entry->ops->deconfigure(span_entry);
+}
+
 static struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
-			   const struct net_device *to_dev)
+			   const struct net_device *to_dev,
+			   const struct mlxsw_sp_span_entry_ops *ops,
+			   struct mlxsw_sp_span_parms sparms)
 {
-	struct mlxsw_sp_port *to_port = netdev_priv(to_dev);
 	struct mlxsw_sp_span_entry *span_entry = NULL;
-	u8 local_port = to_port->local_port;
 	int i;
 
 	/* find a free entry to use */
@@ -120,19 +200,17 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
 	if (!span_entry)
 		return NULL;
 
-	if (mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, local_port))
-		return NULL;
-
+	span_entry->ops = ops;
 	span_entry->ref_count = 1;
 	span_entry->to_dev = to_dev;
+	mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
+
 	return span_entry;
 }
 
-static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_span_entry *span_entry)
+static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry)
 {
-	if (span_entry->to_dev)
-		mlxsw_sp_span_entry_deconfigure(mlxsw_sp, span_entry);
+	mlxsw_sp_span_entry_deconfigure(span_entry);
 }
 
 struct mlxsw_sp_span_entry *
@@ -153,8 +231,8 @@ mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
 void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_span_entry *span_entry)
 {
-	mlxsw_sp_span_entry_deconfigure(mlxsw_sp, span_entry);
-	span_entry->to_dev = NULL;
+	mlxsw_sp_span_entry_deconfigure(span_entry);
+	span_entry->ops = &mlxsw_sp_span_entry_ops_nop;
 }
 
 static struct mlxsw_sp_span_entry *
@@ -173,7 +251,9 @@ mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
 
 static struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
-			const struct net_device *to_dev)
+			const struct net_device *to_dev,
+			const struct mlxsw_sp_span_entry_ops *ops,
+			struct mlxsw_sp_span_parms sparms)
 {
 	struct mlxsw_sp_span_entry *span_entry;
 
@@ -184,7 +264,7 @@ mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
 		return span_entry;
 	}
 
-	return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev);
+	return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms);
 }
 
 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
@@ -192,7 +272,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
 {
 	WARN_ON(!span_entry->ref_count);
 	if (--span_entry->ref_count == 0)
-		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
+		mlxsw_sp_span_entry_destroy(span_entry);
 	return 0;
 }
 
@@ -354,16 +434,41 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
 	kfree(inspected_port);
 }
 
+static const struct mlxsw_sp_span_entry_ops *
+mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
+			const struct net_device *to_dev)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i)
+		if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev))
+			return mlxsw_sp_span_entry_types[i];
+
+	return NULL;
+}
+
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 			     const struct net_device *to_dev,
 			     enum mlxsw_sp_span_type type, bool bind,
 			     int *p_span_id)
 {
 	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_parms sparms = {0};
 	struct mlxsw_sp_span_entry *span_entry;
 	int err;
 
-	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev);
+	ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+	if (!ops) {
+		netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
+		return -EOPNOTSUPP;
+	}
+
+	err = ops->parms(to_dev, &sparms);
+	if (err)
+		return err;
+
+	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
 	if (!span_entry)
 		return -ENOENT;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 44b307c59d0e..9390b05a7919 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -50,13 +50,30 @@ struct mlxsw_sp_span_inspected_port {
 	u8 local_port;
 };
 
+struct mlxsw_sp_span_parms {
+	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+};
+
+struct mlxsw_sp_span_entry_ops;
+
 struct mlxsw_sp_span_entry {
 	const struct net_device *to_dev;
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_parms parms;
 	struct list_head bound_ports_list;
 	int ref_count;
 	int id;
 };
 
+struct mlxsw_sp_span_entry_ops {
+	bool (*can_handle)(const struct net_device *to_dev);
+	int (*parms)(const struct net_device *to_dev,
+		     struct mlxsw_sp_span_parms *sparmsp);
+	int (*configure)(struct mlxsw_sp_span_entry *span_entry,
+			 struct mlxsw_sp_span_parms sparms);
+	void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
+};
+
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
 

From 803335acbe3371f1c0e9dd02f318b16f5abc22f4 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:46 +0100
Subject: [PATCH 0470/1678] mlxsw: Handle config changes pertinent to SPAN

For some netdevices, for which mlxsw offloads mirroring, may have a
complex relationship between the declared intent and low-level
device configuration.

Trying to accurately track which changes might influence offloading
decisions is finicky and error-prone. Instead, this patch introduces a
function mlxsw_sp_span_entry_respin, which re-queries the configuration
anew and, if different, removes the existing offloads and installs new
ones.

Call this function strategically at event handlers that might influence
the mirroring configuration.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 29 +++++++++++--------
 .../ethernet/mellanox/mlxsw/spectrum_router.c |  7 +++++
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 24 +++++++++++++++
 .../ethernet/mellanox/mlxsw/spectrum_span.h   |  1 +
 4 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b070cad8dba7..8d2d140d7910 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1,6 +1,6 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum.c
- * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
@@ -3667,14 +3667,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_afa_init;
 	}
 
+	err = mlxsw_sp_span_init(mlxsw_sp);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
+		goto err_span_init;
+	}
+
+	/* Initialize router after SPAN is initialized, so that the FIB and
+	 * neighbor event handlers can issue SPAN respin.
+	 */
 	err = mlxsw_sp_router_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
 		goto err_router_init;
 	}
 
-	/* Initialize netdevice notifier after router is initialized, so that
-	 * the event handler can use router structures.
+	/* Initialize netdevice notifier after router and SPAN is initialized,
+	 * so that the event handler can use router structures and call SPAN
+	 * respin.
 	 */
 	mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
 	err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
@@ -3683,12 +3693,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_netdev_notifier;
 	}
 
-	err = mlxsw_sp_span_init(mlxsw_sp);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
-		goto err_span_init;
-	}
-
 	err = mlxsw_sp_acl_init(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n");
@@ -3714,12 +3718,12 @@ err_ports_create:
 err_dpipe_init:
 	mlxsw_sp_acl_fini(mlxsw_sp);
 err_acl_init:
-	mlxsw_sp_span_fini(mlxsw_sp);
-err_span_init:
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
 	mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	mlxsw_sp_span_fini(mlxsw_sp);
+err_span_init:
 	mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3745,9 +3749,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 	mlxsw_sp_acl_fini(mlxsw_sp);
-	mlxsw_sp_span_fini(mlxsw_sp);
 	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 	mlxsw_sp_router_fini(mlxsw_sp);
+	mlxsw_sp_span_fini(mlxsw_sp);
 	mlxsw_sp_afa_fini(mlxsw_sp);
 	mlxsw_sp_counter_pool_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4641,6 +4645,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
 		if (span_entry)
 			mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry);
 	}
+	mlxsw_sp_span_respin(mlxsw_sp);
 
 	if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
 		err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 05146970c19c..69f16c605b9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -70,6 +70,7 @@
 #include "spectrum_mr.h"
 #include "spectrum_mr_tcam.h"
 #include "spectrum_router.h"
+#include "spectrum_span.h"
 
 struct mlxsw_sp_fib;
 struct mlxsw_sp_vr;
@@ -2330,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 	read_unlock_bh(&n->lock);
 
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	entry_connected = nud_state & NUD_VALID && !dead;
 	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
 	if (!entry_connected && !neigh_entry)
@@ -5589,6 +5592,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 
 	/* Protect internal structures from changes */
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
@@ -5631,6 +5636,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	int err;
 
 	rtnl_lock();
+	mlxsw_sp_span_respin(mlxsw_sp);
+
 	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
 	case FIB_EVENT_ENTRY_ADD:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index c0e0e9af2da7..71102f156a97 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -502,3 +502,27 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
 		   span_entry->id);
 	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
 }
+
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
+{
+	int i;
+	int err;
+
+	ASSERT_RTNL();
+	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+		struct mlxsw_sp_span_parms sparms = {0};
+
+		if (!curr->ref_count)
+			continue;
+
+		err = curr->ops->parms(curr->to_dev, &sparms);
+		if (err)
+			continue;
+
+		if (memcmp(&sparms, &curr->parms, sizeof(sparms))) {
+			mlxsw_sp_span_entry_deconfigure(curr);
+			mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 9390b05a7919..b6dcd7d7277c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -76,6 +76,7 @@ struct mlxsw_sp_span_entry_ops {
 
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
 
 int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 			     const struct net_device *to_dev,

From 52a6444cda7d1b6fc6f6ff84e2d23cdb71c84102 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:47 +0100
Subject: [PATCH 0471/1678] mlxsw: Move a mirroring check to
 mlxsw_sp_span_entry_create

The check for whether a mirror port (which is a mlxsw front panel port)
belongs to the same mlxsw instance as the mirrored port, is currently
only done in spectrum_acl, even though it's applicable for the matchall
case as well. Thus move it to mlxsw_sp_span_entry_create().

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c  | 4 ----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 6 +++++-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 9d7197b9abb1..21ed27ae51e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -572,7 +572,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 				  struct net_device *out_dev)
 {
 	struct mlxsw_sp_acl_block_binding *binding;
-	struct mlxsw_sp_port *out_port;
 	struct mlxsw_sp_port *in_port;
 
 	if (!list_is_singular(&block->binding_list))
@@ -581,9 +580,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 	binding = list_first_entry(&block->binding_list,
 				   struct mlxsw_sp_acl_block_binding, list);
 	in_port = binding->mlxsw_sp_port;
-	out_port = netdev_priv(out_dev);
-	if (out_port->mlxsw_sp != mlxsw_sp)
-		return -EINVAL;
 
 	return mlxsw_afa_block_append_mirror(rulei->act_block,
 					     in_port->local_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 71102f156a97..57df57c7a405 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -164,7 +164,11 @@ mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_span_parms sparms)
 {
 	if (sparms.dest_port) {
-		if (span_entry->ops->configure(span_entry, sparms)) {
+		if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+			netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+				   sparms.dest_port->dev->name);
+			sparms.dest_port = NULL;
+		} else if (span_entry->ops->configure(span_entry, sparms)) {
 			netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
 				   sparms.dest_port->dev->name);
 			sparms.dest_port = NULL;

From 27cf76fe60ec6fbe2ba8844261b90a6aecbc42f0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:48 +0100
Subject: [PATCH 0472/1678] mlxsw: spectrum_span: Support mirror to gretap

When a user requests mirror from a mlxsw physical port (possibly based
on an ACL match) to a gretap netdevice, the driver needs to resolve the
request to a particular physical port that the mirrored packets will
egress through, and a suite of configuration keys (importantly, IP and
MAC addresses). That means calling into routing and neighbor kernel code
to simulate the decisions made by the system for packets passing through
a gretap netdevice.

Add a new instance of mlxsw_sp_span_entry_ops to support this.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig   |   2 +
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 167 +++++++++++++++++-
 .../ethernet/mellanox/mlxsw/spectrum_span.h   |   8 +
 3 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index d56eea310509..830c3e28505e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -78,6 +78,8 @@ config MLXSW_SPECTRUM
 	depends on IPV6 || IPV6=n
 	select PARMAN
 	select MLXFW
+	depends on NET_IPGRE
+	depends on !(MLXSW_CORE=y && NET_IPGRE=m)
 	default m
 	---help---
 	  This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 57df57c7a405..d5fda4f13c31 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -1,6 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c
  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,9 +33,12 @@
  */
 
 #include <linux/list.h>
+#include <net/arp.h>
+#include <net/gre.h>
 
 #include "spectrum.h"
 #include "spectrum_span.h"
+#include "spectrum_ipip.h"
 
 int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
@@ -127,17 +131,176 @@ struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
 	.deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
 };
 
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+			    __be32 *saddrp, __be32 *daddrp)
+{
+	struct ip_tunnel *tun = netdev_priv(to_dev);
+	struct net_device *dev = NULL;
+	struct ip_tunnel_parm parms;
+	struct rtable *rt = NULL;
+	struct flowi4 fl4;
+
+	/* We assume "dev" stays valid after rt is put. */
+	ASSERT_RTNL();
+
+	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+			    0, 0, parms.link, tun->fwmark);
+
+	rt = ip_route_output_key(tun->net, &fl4);
+	if (IS_ERR(rt))
+		return NULL;
+
+	if (rt->rt_type != RTN_UNICAST)
+		goto out;
+
+	dev = rt->dst.dev;
+	*saddrp = fl4.saddr;
+	*daddrp = rt->rt_gateway;
+
+out:
+	ip_rt_put(rt);
+	return dev;
+}
+
+static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
+			      const void *pkey,
+			      struct net_device *l3edev,
+			      unsigned char dmac[ETH_ALEN])
+{
+	struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev);
+	int err = 0;
+
+	if (!neigh) {
+		neigh = neigh_create(tbl, pkey, l3edev);
+		if (IS_ERR(neigh))
+			return PTR_ERR(neigh);
+	}
+
+	neigh_event_send(neigh, NULL);
+
+	read_lock_bh(&neigh->lock);
+	if ((neigh->nud_state & NUD_VALID) && !neigh->dead)
+		memcpy(dmac, neigh->ha, ETH_ALEN);
+	else
+		err = -ENOENT;
+	read_unlock_bh(&neigh->lock);
+
+	neigh_release(neigh);
+	return err;
+}
+
+static int
+mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
+{
+	sparmsp->dest_port = NULL;
+	return 0;
+}
+
+static int
+mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
+					union mlxsw_sp_l3addr saddr,
+					union mlxsw_sp_l3addr daddr,
+					union mlxsw_sp_l3addr gw,
+					__u8 ttl,
+					struct neigh_table *tbl,
+					struct mlxsw_sp_span_parms *sparmsp)
+{
+	unsigned char dmac[ETH_ALEN];
+
+	if (mlxsw_sp_l3addr_is_zero(gw))
+		gw = daddr;
+
+	if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) ||
+	    mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	sparmsp->dest_port = netdev_priv(l3edev);
+	sparmsp->ttl = ttl;
+	memcpy(sparmsp->dmac, dmac, ETH_ALEN);
+	memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN);
+	sparmsp->saddr = saddr;
+	sparmsp->daddr = daddr;
+	return 0;
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
+				  struct mlxsw_sp_span_parms *sparmsp)
+{
+	struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr };
+	union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr };
+	bool inherit_tos = tparm.iph.tos & 0x1;
+	bool inherit_ttl = !tparm.iph.ttl;
+	union mlxsw_sp_l3addr gw = daddr;
+	struct net_device *l3edev;
+
+	if (!(to_dev->flags & IFF_UP) ||
+	    /* Reject tunnels with GRE keys, checksums, etc. */
+	    tparm.i_flags || tparm.o_flags ||
+	    /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+	    inherit_ttl || !inherit_tos ||
+	    /* A destination address may not be "any". */
+	    mlxsw_sp_l3addr_is_zero(daddr))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4);
+	return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+						       tparm.iph.ttl,
+						       &arp_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry,
+				      struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+	mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+				    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+				    sparms.dmac, false);
+	mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl,
+					      sparms.ttl, sparms.smac,
+					      be32_to_cpu(sparms.saddr.addr4),
+					      be32_to_cpu(sparms.daddr.addr4));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
+	.can_handle = is_gretap_dev,
+	.parms = mlxsw_sp_span_entry_gretap4_parms,
+	.configure = mlxsw_sp_span_entry_gretap4_configure,
+	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
+};
+
 static const
 struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
 	&mlxsw_sp_span_entry_ops_phys,
+	&mlxsw_sp_span_entry_ops_gretap4,
 };
 
 static int
 mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev,
 			      struct mlxsw_sp_span_parms *sparmsp)
 {
-	sparmsp->dest_port = NULL;
-	return 0;
+	return mlxsw_sp_span_entry_unoffloadable(sparmsp);
 }
 
 static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index b6dcd7d7277c..948aceb512c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -35,6 +35,9 @@
 #define _MLXSW_SPECTRUM_SPAN_H
 
 #include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "spectrum_router.h"
 
 struct mlxsw_sp;
 struct mlxsw_sp_port;
@@ -52,6 +55,11 @@ struct mlxsw_sp_span_inspected_port {
 
 struct mlxsw_sp_span_parms {
 	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
+	unsigned int ttl;
+	unsigned char dmac[ETH_ALEN];
+	unsigned char smac[ETH_ALEN];
+	union mlxsw_sp_l3addr daddr;
+	union mlxsw_sp_l3addr saddr;
 };
 
 struct mlxsw_sp_span_entry_ops;

From 8f08a528de5eaded034c5480588722e7dc167540 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 27 Feb 2018 14:53:49 +0100
Subject: [PATCH 0473/1678] mlxsw: spectrum_span: Support mirror to ip6gretap

Similarly to mirror-to-gretap, this enables mirroring to IPv6 gretap
netdevice.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig   |   2 +
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 101 ++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 830c3e28505e..93d97b4676eb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -80,6 +80,8 @@ config MLXSW_SPECTRUM
 	select MLXFW
 	depends on NET_IPGRE
 	depends on !(MLXSW_CORE=y && NET_IPGRE=m)
+	depends on IPV6_GRE
+	depends on !(MLXSW_CORE=y && IPV6_GRE=m)
 	default m
 	---help---
 	  This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index d5fda4f13c31..f537e1de11d9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -35,6 +35,8 @@
 #include <linux/list.h>
 #include <net/arp.h>
 #include <net/gre.h>
+#include <net/ndisc.h>
+#include <net/ip6_tunnel.h>
 
 #include "spectrum.h"
 #include "spectrum_span.h"
@@ -290,10 +292,109 @@ static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
 	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
 };
 
+static struct net_device *
+mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
+			    struct in6_addr *saddrp,
+			    struct in6_addr *daddrp)
+{
+	struct ip6_tnl *t = netdev_priv(to_dev);
+	struct flowi6 fl6 = t->fl.u.ip6;
+	struct net_device *dev = NULL;
+	struct dst_entry *dst;
+	struct rt6_info *rt6;
+
+	/* We assume "dev" stays valid after dst is released. */
+	ASSERT_RTNL();
+
+	fl6.flowi6_mark = t->parms.fwmark;
+	if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr))
+		return NULL;
+
+	dst = ip6_route_output(t->net, NULL, &fl6);
+	if (!dst || dst->error)
+		goto out;
+
+	rt6 = container_of(dst, struct rt6_info, dst);
+
+	dev = dst->dev;
+	*saddrp = fl6.saddr;
+	*daddrp = rt6->rt6i_gateway;
+
+out:
+	dst_release(dst);
+	return dev;
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev,
+				  struct mlxsw_sp_span_parms *sparmsp)
+{
+	struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev);
+	bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS;
+	union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr };
+	union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr };
+	bool inherit_ttl = !tparm.hop_limit;
+	union mlxsw_sp_l3addr gw = daddr;
+	struct net_device *l3edev;
+
+	if (!(to_dev->flags & IFF_UP) ||
+	    /* Reject tunnels with GRE keys, checksums, etc. */
+	    tparm.i_flags || tparm.o_flags ||
+	    /* Require a fixed TTL and a TOS copied from the mirrored packet. */
+	    inherit_ttl || !inherit_tos ||
+	    /* A destination address may not be "any". */
+	    mlxsw_sp_l3addr_is_zero(daddr))
+		return mlxsw_sp_span_entry_unoffloadable(sparmsp);
+
+	l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6);
+	return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw,
+						       tparm.hop_limit,
+						       &nd_tbl, sparmsp);
+}
+
+static int
+mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry,
+				      struct mlxsw_sp_span_parms sparms)
+{
+	struct mlxsw_sp_port *dest_port = sparms.dest_port;
+	struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp;
+	u8 local_port = dest_port->local_port;
+	char mpat_pl[MLXSW_REG_MPAT_LEN];
+	int pa_id = span_entry->id;
+
+	/* Create a new port analayzer entry for local_port. */
+	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
+			    MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+	mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl,
+				    MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER,
+				    sparms.dmac, false);
+	mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac,
+					      sparms.saddr.addr6,
+					      sparms.daddr.addr6);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
+}
+
+static void
+mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
+{
+	mlxsw_sp_span_entry_deconfigure_common(span_entry,
+					MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3);
+}
+
+static const
+struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
+	.can_handle = is_ip6gretap_dev,
+	.parms = mlxsw_sp_span_entry_gretap6_parms,
+	.configure = mlxsw_sp_span_entry_gretap6_configure,
+	.deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
+};
+
 static const
 struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
 	&mlxsw_sp_span_entry_ops_phys,
 	&mlxsw_sp_span_entry_ops_gretap4,
+	&mlxsw_sp_span_entry_ops_gretap6,
 };
 
 static int

From f26d0d2543cb34680393d9993eea4150a153c3fa Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 27 Feb 2018 16:17:19 +0200
Subject: [PATCH 0474/1678] net/mlx4_en: Add physical RX/TX bytes/packets
 counters

Add physical RX/TX packets/bytes counters into ethtool output to monitor
all traffic that was received and transmitted on the port. These
counters are available only for none Virtual Function.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx4/en_ethtool.c   | 14 +++++++
 .../net/ethernet/mellanox/mlx4/en_netdev.c    |  4 ++
 drivers/net/ethernet/mellanox/mlx4/en_port.c  | 38 ++++++++++++-------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h  |  1 +
 .../net/ethernet/mellanox/mlx4/mlx4_stats.h   | 10 ++++-
 5 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ebc1f566a4d9..9a7a2f05ab35 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 	"rx_xdp_drop",
 	"rx_xdp_tx",
 	"rx_xdp_tx_full",
+
+	/* phy statistics */
+	"rx_packets_phy", "rx_bytes_phy",
+	"tx_packets_phy", "tx_bytes_phy",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
 
+	for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] = ((unsigned long *)&priv->phy_stats)[i];
+
 	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 		data[index++] = priv->tx_ring[TX][i]->packets;
 		data[index++] = priv->tx_ring[TX][i]->bytes;
@@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				strcpy(data + (index++) * ETH_GSTRING_LEN,
 				       main_strings[strings]);
 
+		for (i = 0; i < NUM_PHY_STATS; i++, strings++,
+		     bitmap_iterator_inc(&it))
+			if (bitmap_iterator_test(&it))
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+				       main_strings[strings]);
+
 		for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 8fc51bc29003..b62d2c3f976a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
 	bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
 	last_i += NUM_XDP_STATS;
+
+	if (!mlx4_is_slave(dev))
+		bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS);
+	last_i += NUM_PHY_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 1fa4849a6f56..0158b88bea5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 		priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
 	}
 
-	if (mlx4_is_master(mdev->dev)) {
-		stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
-						   &mlx4_en_stats->RTOT_prio_1,
-						   NUM_PRIORITIES);
-		stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
-						   &mlx4_en_stats->TTOT_prio_1,
-						   NUM_PRIORITIES);
-		stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
-						 &mlx4_en_stats->ROCT_prio_1,
-						 NUM_PRIORITIES);
-		stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
-						 &mlx4_en_stats->TOCT_prio_1,
-						 NUM_PRIORITIES);
+	if (!mlx4_is_slave(mdev->dev)) {
+		struct mlx4_en_phy_stats *p_stats = &priv->phy_stats;
+
+		p_stats->rx_packets_phy =
+			en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
+				       &mlx4_en_stats->RTOT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->tx_packets_phy =
+			en_stats_adder(&mlx4_en_stats->TTOT_prio_0,
+				       &mlx4_en_stats->TTOT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->rx_bytes_phy =
+			en_stats_adder(&mlx4_en_stats->ROCT_prio_0,
+				       &mlx4_en_stats->ROCT_prio_1,
+				       NUM_PRIORITIES);
+		p_stats->tx_bytes_phy =
+			en_stats_adder(&mlx4_en_stats->TOCT_prio_0,
+				       &mlx4_en_stats->TOCT_prio_1,
+				       NUM_PRIORITIES);
+		if (mlx4_is_master(mdev->dev)) {
+			stats->rx_packets = p_stats->rx_packets_phy;
+			stats->tx_packets = p_stats->tx_packets_phy;
+			stats->rx_bytes = p_stats->rx_bytes_phy;
+			stats->tx_bytes = p_stats->tx_bytes_phy;
+		}
 	}
 
 	/* net device stats */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f470ae37d937..f7c81133594f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -608,6 +608,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_flow_stats_tx tx_flowstats;
 	struct mlx4_en_port_stats port_stats;
 	struct mlx4_en_xdp_stats xdp_stats;
+	struct mlx4_en_phy_stats phy_stats;
 	struct mlx4_en_stats_bitmap stats_bitmap;
 	struct list_head mc_list;
 	struct list_head curr_list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index aab28eb27a30..86b6051da8ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats {
 #define NUM_XDP_STATS		3
 };
 
+struct mlx4_en_phy_stats {
+	unsigned long rx_packets_phy;
+	unsigned long rx_bytes_phy;
+	unsigned long tx_packets_phy;
+	unsigned long tx_bytes_phy;
+#define NUM_PHY_STATS		4
+};
+
 #define NUM_MAIN_STATS	21
 
 #define MLX4_NUM_PRIORITIES	8
@@ -116,7 +124,7 @@ enum {
 
 #define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
 			 NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
-			 NUM_XDP_STATS)
+			 NUM_XDP_STATS + NUM_PHY_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
 				  sizeof(((struct net_device_stats *)0)->n))

From 4f32e1c4a9bbb30028406c582419d0b4c8aa5af5 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 27 Feb 2018 16:17:20 +0200
Subject: [PATCH 0475/1678] net/mlx4_en: Remove unnecessary warn print in reset
 config

In mlx4_en_reset_config, there was a redundant warn print that was left
from previous versions of this function. No warn is needed anymore.

This warn can be confusing when RX-FCS is changed:
Turn OFF RX-FCS:
  mlx4_en: eth1: Changing device configuration rx filter(0) rx vlan(1)
Turn ON RX-FCS:
  mlx4_en: eth1: Changing device configuration rx filter(0) rx vlan(1)

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index b62d2c3f976a..e0adac4a9a19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3634,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev,
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n",
-		ts_config.rx_filter,
-		!!(features & NETIF_F_HW_VLAN_CTAG_RX));
-
 	mlx4_en_safe_replace_resources(priv, tmp);
 
 	if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) {

From 1cb8b1216c427ddbec51b8c3e77459dd44f85bcc Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 27 Feb 2018 16:17:21 +0200
Subject: [PATCH 0476/1678] net/mlx4_en: Combine checks of end-cases in RX
 completion function

Combine two end-cases in the same if statement with a single return value.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b4d144e67514..1e8f21b7fed8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -662,12 +662,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	int polled = 0;
 	int index;
 
-	if (unlikely(!priv->port_up))
+	if (unlikely(!priv->port_up || budget <= 0))
 		return 0;
 
-	if (unlikely(budget <= 0))
-		return polled;
-
 	ring = priv->rx_ring[cq_ring];
 
 	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */

From a970d8dba5dab199218d13080dcad4e22cbaf8b5 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 27 Feb 2018 16:17:22 +0200
Subject: [PATCH 0477/1678] net/mlx4_en: RX csum, pre-define enabled protocols
 for IP status masking

Pre-define a mask for IP status of a completion, that tests the
MLX4_CQE_STATUS_IPV6 only in case CONFIG_IPV6 is enabled.
Use it for IP status testing upon completion, instead of separating
the datapath into two flows.
This takes common code structures (such as closing parenthesis)
back to their original place, and makes code more readable.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 1e8f21b7fed8..c2c6bd7578fd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -649,6 +649,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
 	return get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6)
+#else
+#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
+#endif
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -835,12 +841,7 @@ xdp_drop_no_cnt:
 				ring->csum_ok++;
 			} else {
 				if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
-				      (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
-#if IS_ENABLED(CONFIG_IPV6)
-								 MLX4_CQE_STATUS_IPV6))))
-#else
-								 0))))
-#endif
+				      (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY))))
 					goto csum_none;
 				if (check_csum(cqe, skb, va, dev->features))
 					goto csum_none;

From 985324a16efb5a1b002c057ff457e28fb3f7480e Mon Sep 17 00:00:00 2001
From: Denis 'GNUtoo' Carikli <GNUtoo@no-log.org>
Date: Thu, 8 Feb 2018 17:25:42 +0100
Subject: [PATCH 0478/1678] bcma: add HP Stream Notebook

In this laptop we have the following PCI device:
02:00.0 Network controller [0280]: Broadcom Limited BCM43142 802.11b/g/n [14e4:4365] (rev 01)
	Subsystem: Hewlett-Packard Company BCM43142 802.11b/g/n [103c:804a]
	[...]
	Region 0: Memory at 91000000 (64-bit, non-prefetchable) [size=32K]
	[...]

With this patch, we can now see its WiFi chip:
  bcma: bus0: Found chip with id 43142, rev 0x01 and package 0x08
  bcma: bus0: Core 0 found: ChipCommon (manuf 0x4BF, id 0x800, rev 0x28, class 0x0)
  bcma: bus0: Core 1 found: IEEE 802.11 (manuf 0x4BF, id 0x812, rev 0x21, class 0x0)
  bcma: bus0: Core 2 found: PCIe (manuf 0x4BF, id 0x820, rev 0x16, class 0x0)
  bcma: bus0: Core 3 found: UNKNOWN (manuf 0x43B, id 0x368, rev 0x00, class 0x0)
  bcma: bus0: Found rev 15 PMU (capabilities 0x518C5E0F)
  bcma: bus0: SPROM offset 0x840
  bcma: bus0: Found SPROM revision 10
  bcma: bus0: Workarounds unknown or not needed for device 0xA886
  bcma: bus0: Bus registered

But it not yet supported by brcmsmac so it won't work for now:
  brcmsmac bcma0:1: brcms_b_attach wl0: vendor 0x14e4 device 0x4365
  brcmsmac: unknown device id 4365

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@no-log.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/host_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 925842996986..63410ecfe640 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -297,6 +297,7 @@ static const struct pci_device_id bcma_pci_bridge_tbl[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0016) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_DELL, 0x0018) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_FOXCONN, 0xe092) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_BROADCOM, 0x4365, PCI_VENDOR_ID_HP, 0x804a) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a0) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43a9) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, 0x43aa) },

From 882164a4a928bcaa53280940436ca476e6b1db8e Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Fri, 12 Jan 2018 14:25:59 +0000
Subject: [PATCH 0479/1678] ssb: Prevent build of PCI host features in module

Attempting to build ssb.ko with CONFIG_SSB_DRIVER_PCICORE=y results in
a build error due to use of symbols not exported from vmlinux:

ERROR: "pcibios_enable_device" [drivers/ssb/ssb.ko] undefined!
ERROR: "register_pci_controller" [drivers/ssb/ssb.ko] undefined!
make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1

To prevent this, don't allow the host mode feature to be built if
CONFIG_SSB=m

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/ssb/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index b3f5cae98ea6..9371651d8017 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -117,7 +117,7 @@ config SSB_SERIAL
 
 config SSB_DRIVER_PCICORE_POSSIBLE
 	bool
-	depends on SSB_PCIHOST
+	depends on SSB_PCIHOST && SSB = y
 	default y
 
 config SSB_DRIVER_PCICORE

From 09e93f28aa8d37a07d0c2ad742d085a21d845cda Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 1 Feb 2018 01:27:36 +0100
Subject: [PATCH 0480/1678] mt76x2: remove warnings in mt76x2_mac_write_txwi()

Fix following sparse warnings in mt76x2_mac_write_txwi:
- drivers/net/wireless/mediatek/mt76/mt76x2_mac.c:201:26: warning:
  incorrect type in assignment (different base types)
- drivers/net/wireless/mediatek/mt76/mt76x2_mac.c:201:26: expected
  restricted __le32 [usertype] iv
- drivers/net/wireless/mediatek/mt76/mt76x2_mac.c:201:26: got unsigned
  int [unsigned] [usertype] <noident>
- drivers/net/wireless/mediatek/mt76/mt76x2_mac.c:202:27: warning:
  incorrect type in assignment (different base types)
- drivers/net/wireless/mediatek/mt76/mt76x2_mac.c:202:27: expected
  restricted __le32 [usertype] eiv
- drivers/net/wireless/mediatek/mt76/mt76x2_mac.c:202:27: got unsigned
  int [unsigned] [usertype] <noident>

Fixes: 23405236460b ("mt76: fix transmission of encrypted management frames")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x2_mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
index 7ea3d841918e..d18315652583 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_mac.c
@@ -198,8 +198,8 @@ void mt76x2_mac_write_txwi(struct mt76x2_dev *dev, struct mt76x2_txwi *txwi,
 		ccmp_pn[5] = pn >> 24;
 		ccmp_pn[6] = pn >> 32;
 		ccmp_pn[7] = pn >> 40;
-		txwi->iv = *((u32 *) &ccmp_pn[0]);
-		txwi->eiv = *((u32 *) &ccmp_pn[1]);
+		txwi->iv = *((__le32 *)&ccmp_pn[0]);
+		txwi->eiv = *((__le32 *)&ccmp_pn[1]);
 	}
 
 	spin_lock_bh(&dev->mt76.lock);

From e96826bde3db15d37ebb7905c24a071d457e9132 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 8 Feb 2018 23:08:08 +0100
Subject: [PATCH 0481/1678] mt7601u: move mt7601u_set_macaddr in mac related
 code

Remove static qualifier from mt7601u_set_macaddr routine and move it
in mac related code in order to be used to properly support vif with
different mac address respect to the default one

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/mediatek/mt7601u/eeprom.c    | 24 ++-----------------
 drivers/net/wireless/mediatek/mt7601u/mac.c   | 16 +++++++++++++
 drivers/net/wireless/mediatek/mt7601u/mac.h   |  1 +
 3 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
index da6faea092d6..05d729be0c65 100644
--- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
@@ -19,6 +19,7 @@
 #include <asm/unaligned.h>
 #include "mt7601u.h"
 #include "eeprom.h"
+#include "mac.h"
 
 static bool
 field_valid(u8 val)
@@ -134,27 +135,6 @@ mt7601u_set_chip_cap(struct mt7601u_dev *dev, u8 *eeprom)
 			"Error: device has more than 1 RX/TX stream!\n");
 }
 
-static int
-mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *eeprom)
-{
-	const void *src = eeprom + MT_EE_MAC_ADDR;
-
-	ether_addr_copy(dev->macaddr, src);
-
-	if (!is_valid_ether_addr(dev->macaddr)) {
-		eth_random_addr(dev->macaddr);
-		dev_info(dev->dev,
-			 "Invalid MAC address, using random address %pM\n",
-			 dev->macaddr);
-	}
-
-	mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
-	mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
-		FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
-
-	return 0;
-}
-
 static void mt7601u_set_channel_target_power(struct mt7601u_dev *dev,
 					     u8 *eeprom, u8 max_pwr)
 {
@@ -400,7 +380,7 @@ mt7601u_eeprom_init(struct mt7601u_dev *dev)
 	dev_info(dev->dev, "EEPROM ver:%02hhx fae:%02hhx\n",
 		 eeprom[MT_EE_VERSION_EE], eeprom[MT_EE_VERSION_FAE]);
 
-	mt7601u_set_macaddr(dev, eeprom);
+	mt7601u_set_macaddr(dev, eeprom + MT_EE_MAC_ADDR);
 	mt7601u_set_chip_cap(dev, eeprom);
 	mt7601u_set_channel_power(dev, eeprom);
 	mt7601u_set_country_reg(dev, eeprom);
diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c
index d6dc59bb00df..4d3077941138 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mac.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mac.c
@@ -16,6 +16,22 @@
 #include "trace.h"
 #include <linux/etherdevice.h>
 
+void mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *addr)
+{
+	ether_addr_copy(dev->macaddr, addr);
+
+	if (!is_valid_ether_addr(dev->macaddr)) {
+		eth_random_addr(dev->macaddr);
+		dev_info(dev->dev,
+			 "Invalid MAC address, using random address %pM\n",
+			 dev->macaddr);
+	}
+
+	mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
+	mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
+		FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
+}
+
 static void
 mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate)
 {
diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.h b/drivers/net/wireless/mediatek/mt7601u/mac.h
index 2c22d63c63a2..b7aa24656d0e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mac.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mac.h
@@ -174,5 +174,6 @@ u16 mt76_mac_tx_rate_val(struct mt7601u_dev *dev,
 struct mt76_tx_status
 mt7601u_mac_fetch_tx_status(struct mt7601u_dev *dev);
 void mt76_send_tx_status(struct mt7601u_dev *dev, struct mt76_tx_status *stat);
+void mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *addr);
 
 #endif

From 032a552e8dc9d2147e34157bb3b0b23e8f418e43 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 8 Feb 2018 23:08:09 +0100
Subject: [PATCH 0482/1678] mt7601u: set device mac address in
 mt7601u_add_interface()

If mac80211 adds a vif with a different mac address respect to
the eeprom one, the device will not be able to connect to the ap
since the hw address has not been updated.
Fix the issue updating hw mac address in mt7601u_add_interface routine

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1516935
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt7601u/main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c
index 43ebd460ba86..3c9ea40d9584 100644
--- a/drivers/net/wireless/mediatek/mt7601u/main.c
+++ b/drivers/net/wireless/mediatek/mt7601u/main.c
@@ -64,6 +64,9 @@ static int mt7601u_add_interface(struct ieee80211_hw *hw,
 	 */
 	mvif->idx = idx;
 
+	if (!ether_addr_equal(dev->macaddr, vif->addr))
+		mt7601u_set_macaddr(dev, vif->addr);
+
 	if (dev->wcid_mask[wcid / BITS_PER_LONG] & BIT(wcid % BITS_PER_LONG))
 		return -ENOSPC;
 	dev->wcid_mask[wcid / BITS_PER_LONG] |= BIT(wcid % BITS_PER_LONG);

From 7972326a26b5bf8dc2adac575c4e03ee7e9d193a Mon Sep 17 00:00:00 2001
From: Sudhir Sreedharan <ssreedharan@mvista.com>
Date: Thu, 15 Feb 2018 12:52:45 +0530
Subject: [PATCH 0483/1678] rtl8187: Fix NULL pointer dereference in
 priv->conf_mutex

This can be reproduced by bind/unbind the driver multiple times
in AM3517 board.

Analysis revealed that rtl8187_start() was invoked before probe
finishes(ie. before the mutex is initialized).

 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 CPU: 0 PID: 821 Comm: wpa_supplicant Not tainted 4.9.80-dirty #250
 Hardware name: Generic AM3517 (Flattened Device Tree)
 [<c010e0d8>] (unwind_backtrace) from [<c010beac>] (show_stack+0x10/0x14)
 [<c010beac>] (show_stack) from [<c017401c>] (register_lock_class+0x4f4/0x55c)
 [<c017401c>] (register_lock_class) from [<c0176fe0>] (__lock_acquire+0x74/0x1938)
 [<c0176fe0>] (__lock_acquire) from [<c0178cfc>] (lock_acquire+0xfc/0x23c)
 [<c0178cfc>] (lock_acquire) from [<c08aa2f8>] (mutex_lock_nested+0x50/0x3b0)
 [<c08aa2f8>] (mutex_lock_nested) from [<c05f5bf8>] (rtl8187_start+0x2c/0xd54)
 [<c05f5bf8>] (rtl8187_start) from [<c082dea0>] (drv_start+0xa8/0x320)
 [<c082dea0>] (drv_start) from [<c084d1d4>] (ieee80211_do_open+0x2bc/0x8e4)
 [<c084d1d4>] (ieee80211_do_open) from [<c069be94>] (__dev_open+0xb8/0x120)
 [<c069be94>] (__dev_open) from [<c069c11c>] (__dev_change_flags+0x88/0x14c)
 [<c069c11c>] (__dev_change_flags) from [<c069c1f8>] (dev_change_flags+0x18/0x48)
 [<c069c1f8>] (dev_change_flags) from [<c0710b08>] (devinet_ioctl+0x738/0x840)
 [<c0710b08>] (devinet_ioctl) from [<c067925c>] (sock_ioctl+0x164/0x2f4)
 [<c067925c>] (sock_ioctl) from [<c02883f8>] (do_vfs_ioctl+0x8c/0x9d0)
 [<c02883f8>] (do_vfs_ioctl) from [<c0288da8>] (SyS_ioctl+0x6c/0x7c)
 [<c0288da8>] (SyS_ioctl) from [<c0107760>] (ret_fast_syscall+0x0/0x1c)
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 pgd = cd1ec000
 [00000000] *pgd=8d1de831, *pte=00000000, *ppte=00000000
 Internal error: Oops: 817 [#1] PREEMPT ARM
 Modules linked in:
 CPU: 0 PID: 821 Comm: wpa_supplicant Not tainted 4.9.80-dirty #250
 Hardware name: Generic AM3517 (Flattened Device Tree)
 task: ce73eec0 task.stack: cd1ea000
 PC is at mutex_lock_nested+0xe8/0x3b0
 LR is at mutex_lock_nested+0xd0/0x3b0

Cc: stable@vger.kernel.org
Signed-off-by: Sudhir Sreedharan <ssreedharan@mvista.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
index 121b94f09714..9a1d15b3ce45 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
@@ -1450,6 +1450,7 @@ static int rtl8187_probe(struct usb_interface *intf,
 		goto err_free_dev;
 	}
 	mutex_init(&priv->io_mutex);
+	mutex_init(&priv->conf_mutex);
 
 	SET_IEEE80211_DEV(dev, &intf->dev);
 	usb_set_intfdata(intf, dev);
@@ -1625,7 +1626,6 @@ static int rtl8187_probe(struct usb_interface *intf,
 		printk(KERN_ERR "rtl8187: Cannot register device\n");
 		goto err_free_dmabuf;
 	}
-	mutex_init(&priv->conf_mutex);
 	skb_queue_head_init(&priv->b_tx_status.queue);
 
 	wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n",

From fee05843801c37e527dbe2c5eeb3fb3b15bc9919 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 16 Feb 2018 23:30:01 +0100
Subject: [PATCH 0484/1678] mt7601u: make write with mask access atomic

Introduce __mt7601u_rr and __mt7601u_vendor_single_wr routines in order
to make mt7601u_rmw and mt7601u_rmc atomic. This patch does not fix a
reported issue but makes the usb access more robust to concurrent
operations on the same register since it is theoretically possible that
read and write accesses of mt7601u_rmw/mt7601u_rmc can be interleaved with
a different write operation on the same register.
Moreover using __mt7601u_rr and __mt7601u_vendor_single_wr in
mt7601u_rmw/mt7601u_rmc allows to grab vendor_req_mutex mutex once
instead of twice

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Acked-by: Jakub Kicinski <kubakici@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/mediatek/mt7601u/mt7601u.h   |  3 +-
 drivers/net/wireless/mediatek/mt7601u/usb.c   | 56 +++++++++++++------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
index c7ec40475a5f..9233744451a9 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
@@ -147,7 +147,8 @@ enum {
  * @rx_lock:		protects @rx_q.
  * @con_mon_lock:	protects @ap_bssid, @bcn_*, @avg_rssi.
  * @mutex:		ensures exclusive access from mac80211 callbacks.
- * @vendor_req_mutex:	protects @vend_buf, ensures atomicity of split writes.
+ * @vendor_req_mutex:	protects @vend_buf, ensures atomicity of read/write
+ *			accesses
  * @reg_atomic_mutex:	ensures atomicity of indirect register accesses
  *			(accesses to RF and BBP).
  * @hw_atomic_mutex:	ensures exclusive access to HW during critical
diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
index b9e4f6793138..d8b7863f7926 100644
--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
+++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
@@ -129,15 +129,14 @@ void mt7601u_vendor_reset(struct mt7601u_dev *dev)
 			       MT_VEND_DEV_MODE_RESET, 0, NULL, 0);
 }
 
-u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
+/* should be called with vendor_req_mutex held */
+static u32 __mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 {
 	int ret;
 	u32 val = ~0;
 
 	WARN_ONCE(offset > USHRT_MAX, "read high off:%08x", offset);
 
-	mutex_lock(&dev->vendor_req_mutex);
-
 	ret = mt7601u_vendor_request(dev, MT_VEND_MULTI_READ, USB_DIR_IN,
 				     0, offset, dev->vend_buf, MT_VEND_BUF);
 	if (ret == MT_VEND_BUF)
@@ -146,25 +145,41 @@ u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
 		dev_err(dev->dev, "Error: wrong size read:%d off:%08x\n",
 			ret, offset);
 
-	mutex_unlock(&dev->vendor_req_mutex);
-
 	trace_reg_read(dev, offset, val);
 	return val;
 }
 
+u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset)
+{
+	u32 ret;
+
+	mutex_lock(&dev->vendor_req_mutex);
+	ret = __mt7601u_rr(dev, offset);
+	mutex_unlock(&dev->vendor_req_mutex);
+
+	return ret;
+}
+
+/* should be called with vendor_req_mutex held */
+static int __mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
+				      const u16 offset, const u32 val)
+{
+	int ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
+					 val & 0xffff, offset, NULL, 0);
+	if (!ret)
+		ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
+					     val >> 16, offset + 2, NULL, 0);
+	trace_reg_write(dev, offset, val);
+	return ret;
+}
+
 int mt7601u_vendor_single_wr(struct mt7601u_dev *dev, const u8 req,
 			     const u16 offset, const u32 val)
 {
 	int ret;
 
 	mutex_lock(&dev->vendor_req_mutex);
-
-	ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
-				     val & 0xffff, offset, NULL, 0);
-	if (!ret)
-		ret = mt7601u_vendor_request(dev, req, USB_DIR_OUT,
-					     val >> 16, offset + 2, NULL, 0);
-
+	ret = __mt7601u_vendor_single_wr(dev, req, offset, val);
 	mutex_unlock(&dev->vendor_req_mutex);
 
 	return ret;
@@ -175,23 +190,30 @@ void mt7601u_wr(struct mt7601u_dev *dev, u32 offset, u32 val)
 	WARN_ONCE(offset > USHRT_MAX, "write high off:%08x", offset);
 
 	mt7601u_vendor_single_wr(dev, MT_VEND_WRITE, offset, val);
-	trace_reg_write(dev, offset, val);
 }
 
 u32 mt7601u_rmw(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val)
 {
-	val |= mt7601u_rr(dev, offset) & ~mask;
-	mt7601u_wr(dev, offset, val);
+	mutex_lock(&dev->vendor_req_mutex);
+	val |= __mt7601u_rr(dev, offset) & ~mask;
+	__mt7601u_vendor_single_wr(dev, MT_VEND_WRITE, offset, val);
+	mutex_unlock(&dev->vendor_req_mutex);
+
 	return val;
 }
 
 u32 mt7601u_rmc(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val)
 {
-	u32 reg = mt7601u_rr(dev, offset);
+	u32 reg;
 
+	mutex_lock(&dev->vendor_req_mutex);
+	reg = __mt7601u_rr(dev, offset);
 	val |= reg & ~mask;
 	if (reg != val)
-		mt7601u_wr(dev, offset, val);
+		__mt7601u_vendor_single_wr(dev, MT_VEND_WRITE,
+					   offset, val);
+	mutex_unlock(&dev->vendor_req_mutex);
+
 	return val;
 }
 

From 363656eb5e574b6dab513a10b7166f08783fa3e3 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Tue, 16 Jan 2018 11:20:51 -0800
Subject: [PATCH 0485/1678] fm10k: fix function doxygen comments

Several function header comments had incorrect function parameter
definitions. Recent versions of the upstream kernel have started to warn
about these issues. Fix up the comments which do not match in order to
resolve these new warnings.

While fixing these, update the copyright year also.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_common.c |  5 +++--
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 10 +++++-----
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c    | 11 ++++++-----
 drivers/net/ethernet/intel/fm10k/fm10k_pf.c     |  4 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_tlv.c    |  7 ++++---
 5 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 736a9f087bc9..c58a5377a287 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
  *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of the register and returns the delta
  *  between the base and the current value.
@@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
  *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
  *  @hw: pointer to the hardware structure
  *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
  *
  *  Function reads the content of 2 registers, combined to represent a 48-bit
  *  statistical value. Extra processing is required to handle overflowing.
@@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
 
 /**
  *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
- *  @hw: pointer to the hardware structure
  *  @q: pointer to the ring of hardware statistics queue
  *  @idx: index pointing to the start of the ring iteration
  *  @count: number of queues to iterate over
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index a38ae5c54da3..75c99aed3c41 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports,
 
 /**
  * fm10k_udp_tunnel_add
- * @netdev: network interface device structure
+ * @dev: network interface device structure
  * @ti: Tunnel endpoint information
  *
  * This function is called when a new UDP tunnel port has been added.
@@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev,
 
 /**
  * fm10k_udp_tunnel_del
- * @netdev: network interface device structure
- * @ti: Tunnel endpoint information
+ * @dev: network interface device structure
+ * @ti: Tunnel end point information
  *
  * This function is called when a new UDP tunnel port is deleted. The freed
  * port will be removed from the list, then we reprogram the offloaded port
@@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
  * @glort: the target glort for this update
  * @addr: the address to update
  * @vid: the vid to update
- * @sync: whether to add or remove
+ * @set: whether to add or remove
  *
  * This function queues up a MAC request for sending to the switch manager.
  * A separate thread monitors the queue and sends updates to the switch
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index a434fecfdfeb..d7aad4cb73a7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface)
 
 /**
  * fm10k_service_timer - Timer Call-back
- * @data: pointer to interface cast into an unsigned long
+ * @t: pointer to timer data
  **/
 static void fm10k_service_timer(struct timer_list *t)
 {
@@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_flush_tx - flush queues on host not ready
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 {
@@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
 
 /**
  * fm10k_watchdog_subtask - check and bring link up
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  **/
 static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 {
@@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
 
 /**
  * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
- * @interface - pointer to the device interface structure
+ * @interface: pointer to the device interface structure
  *
  * This function serves two purposes.  First it strobes the interrupt lines
  * in order to make certain interrupts are occurring.  Secondly it sets the
@@ -1995,6 +1995,7 @@ skip_tx_dma_drain:
 /**
  * fm10k_sw_init - Initialize general software structures
  * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
  *
  * fm10k_sw_init initializes the interface private data structure.
  * Fields are initialized based on PCI device information and
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index d6406fc31ffb..bee192fe2ffb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2017 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
 
 /**
  * fm10k_iov_select_vid - Select correct default VLAN ID
- * @hw: Pointer to hardware structure
+ * @vf_info: pointer to VF information structure
  * @vid: VLAN ID to correct
  *
  * Will report an error if the VLAN ID is out of range. For VID = 0, it will
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index f8e87bf086b9..9d0d31da426b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,5 +1,5 @@
 /* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2018 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
  *  @msg: Pointer to message block
  *  @attr_id: Attribute ID
  *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
  *
  *  This function will reorder a MAC address to be CPU endian and store it
  *  in the attribute buffer.  It will return success if provided with a
@@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
 /**
  *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
  *  @attr: Pointer to attribute
- *  @attr_id: Attribute ID
  *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
  *
  *  This function pulls the MAC address back out of the attribute and will
  *  place it in the array pointed by by mac_addr.  It will return success
@@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
  *  @hw: Pointer to hardware structure
  *  @msg: Pointer to message
  *  @mbx: Pointer to mailbox information structure
- *  @func: Function array containing list of message handling functions
+ *  @data: Pointer to message handler data structure
  *
  *  This function should be the first function called upon receiving a
  *  message.  The handler will identify the message type and call the correct

From 7d6707a9daa8f491923406d37e86b55384c6b9fb Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Tue, 16 Jan 2018 11:20:52 -0800
Subject: [PATCH 0486/1678] fm10k: fix incorrect warning for function prototype

Recent kernels now complain about incorrect function prototype comments,
in order to ensure comments are accurate to the function. However, it
incorrectly associates the comment above the fm10k_pci_tbl[] as
a function header comment. Fix this by removing the extra "*" in the
comment. This normally indicates that the function is a doxygen style
function header comment.

Once removed, the logic no longer kicks in and the following warning is
fixed:

  warning: cannot understand function prototype: 'const struct pci_device_id fm10k_pci_tbl[] = '

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index d7aad4cb73a7..50f53e403ef5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
 	[fm10k_device_vf] = &fm10k_vf_info,
 };
 
-/**
+/*
  * fm10k_pci_tbl - PCI Device ID Table
  *
  * Wildcard entries (PCI_ANY_ID) should come last

From e9d328d3b753d6f9c75653107fc0689a8e681b16 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 18 Jan 2018 09:18:57 -0800
Subject: [PATCH 0487/1678] fm10k: bump version number

We're aligned with latest version released on SourceForge, so update the
version number to match.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 8e12aae065d8..2c93d719438f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,13 +28,13 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.22.1-k"
+#define DRV_VERSION	"0.23.4-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
 static const char fm10k_driver_string[] = DRV_SUMMARY;
 static const char fm10k_copyright[] =
-	"Copyright(c) 2013 - 2017 Intel Corporation.";
+	"Copyright(c) 2013 - 2018 Intel Corporation.";
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION(DRV_SUMMARY);

From 44d15d930bb8463d10b05bbed45e881e5055495a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 27 Feb 2018 17:38:08 +0200
Subject: [PATCH 0488/1678] team: Use extack to report enslavement failures

Use extack inside team's enslavement function and also propagate it to
the netdevice notifier to allow enslaved ports to report the failure
reason. Example:

$ teamd -t team0 -d -c '{"runner": {"name": "lacp"}}'
$ ip link set dev lo master team0
Error: Loopback device can't be added as a team port.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a468439969df..5dd781e65958 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port)
 }
 #endif
 
-static int team_upper_dev_link(struct team *team, struct team_port *port)
+static int team_upper_dev_link(struct team *team, struct team_port *port,
+			       struct netlink_ext_ack *extack)
 {
 	struct netdev_lag_upper_info lag_upper_info;
 	int err;
 
 	lag_upper_info.tx_type = team->mode->lag_tx_type;
 	err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
-					   &lag_upper_info, NULL);
+					   &lag_upper_info, extack);
 	if (err)
 		return err;
 	port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup);
 static int team_dev_type_check_change(struct net_device *dev,
 				      struct net_device *port_dev);
 
-static int team_port_add(struct team *team, struct net_device *port_dev)
+static int team_port_add(struct team *team, struct net_device *port_dev,
+			 struct netlink_ext_ack *extack)
 {
 	struct net_device *dev = team->dev;
 	struct team_port *port;
@@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 	int err;
 
 	if (port_dev->flags & IFF_LOOPBACK) {
+		NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port");
 		netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
 			   portname);
 		return -EINVAL;
 	}
 
 	if (team_port_exists(port_dev)) {
+		NL_SET_ERR_MSG(extack, "Device is already a port of a team device");
 		netdev_err(dev, "Device %s is already a port "
 				"of a team device\n", portname);
 		return -EBUSY;
@@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
 	if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
 	    vlan_uses_dev(dev)) {
+		NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
 		netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n",
 			   portname);
 		return -EPERM;
@@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		return err;
 
 	if (port_dev->flags & IFF_UP) {
+		NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port");
 		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
 			   portname);
 		return -EBUSY;
@@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_handler_register;
 	}
 
-	err = team_upper_dev_link(team, port);
+	err = team_upper_dev_link(team, port, extack);
 	if (err) {
 		netdev_err(dev, "Device %s failed to set upper link\n",
 			   portname);
@@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
 	int err;
 
 	mutex_lock(&team->lock);
-	err = team_port_add(team, port_dev);
+	err = team_port_add(team, port_dev, extack);
 	mutex_unlock(&team->lock);
 
 	if (!err)

From 03145864bd0fcac29e33442f39d67d4f28b0777c Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 27 Feb 2018 15:52:57 +0000
Subject: [PATCH 0489/1678] sfp: support 1G BiDi (eg, FiberStore SFP-GE-BX)
 modules

Some BiDi modules (eg, FiberStore SFP-GE-BX) are not compliant with
1000BASE-BX as they use different wavelengths from the 1000BASE-BX
standard (eg, 1310nm/1550nm rather than 1310nm/1490nm).  These modules
support 1000BASE-X ethernet, so detect them by a failure to find any
other support, the 8B10B encoding and a bit rate that falls within the
1Gbps window.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp-bus.c | 61 ++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 8961209ee949..15749428679e 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -180,10 +180,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		       unsigned long *support)
 {
 	unsigned int br_min, br_nom, br_max;
-
-	phylink_set(support, Autoneg);
-	phylink_set(support, Pause);
-	phylink_set(support, Asym_Pause);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, };
 
 	/* Decode the bitrate information to MBd */
 	br_min = br_nom = br_max = 0;
@@ -201,20 +198,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 
 	/* Set ethtool support from the compliance fields. */
 	if (id->base.e10g_base_sr)
-		phylink_set(support, 10000baseSR_Full);
+		phylink_set(modes, 10000baseSR_Full);
 	if (id->base.e10g_base_lr)
-		phylink_set(support, 10000baseLR_Full);
+		phylink_set(modes, 10000baseLR_Full);
 	if (id->base.e10g_base_lrm)
-		phylink_set(support, 10000baseLRM_Full);
+		phylink_set(modes, 10000baseLRM_Full);
 	if (id->base.e10g_base_er)
-		phylink_set(support, 10000baseER_Full);
+		phylink_set(modes, 10000baseER_Full);
 	if (id->base.e1000_base_sx ||
 	    id->base.e1000_base_lx ||
 	    id->base.e1000_base_cx)
-		phylink_set(support, 1000baseX_Full);
+		phylink_set(modes, 1000baseX_Full);
 	if (id->base.e1000_base_t) {
-		phylink_set(support, 1000baseT_Half);
-		phylink_set(support, 1000baseT_Full);
+		phylink_set(modes, 1000baseT_Half);
+		phylink_set(modes, 1000baseT_Full);
 	}
 
 	/* 1000Base-PX or 1000Base-BX10 */
@@ -228,20 +225,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) {
 		/* This may look odd, but some manufacturers use 12000MBd */
 		if (br_min <= 12000 && br_max >= 10300)
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 		if (br_min <= 3200 && br_max >= 3100)
-			phylink_set(support, 2500baseX_Full);
+			phylink_set(modes, 2500baseX_Full);
 		if (br_min <= 1300 && br_max >= 1200)
-			phylink_set(support, 1000baseX_Full);
+			phylink_set(modes, 1000baseX_Full);
 	}
 	if (id->base.sfp_ct_passive) {
 		if (id->base.passive.sff8431_app_e)
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 	}
 	if (id->base.sfp_ct_active) {
 		if (id->base.active.sff8431_app_e ||
 		    id->base.active.sff8431_lim) {
-			phylink_set(support, 10000baseCR_Full);
+			phylink_set(modes, 10000baseCR_Full);
 		}
 	}
 
@@ -249,18 +246,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	case 0x00: /* Unspecified */
 		break;
 	case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
-		phylink_set(support, 100000baseSR4_Full);
-		phylink_set(support, 25000baseSR_Full);
+		phylink_set(modes, 100000baseSR4_Full);
+		phylink_set(modes, 25000baseSR_Full);
 		break;
 	case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
 	case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
-		phylink_set(support, 100000baseLR4_ER4_Full);
+		phylink_set(modes, 100000baseLR4_ER4_Full);
 		break;
 	case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
 	case 0x0c: /* 25Gbase-CR CA-S */
 	case 0x0d: /* 25Gbase-CR CA-N */
-		phylink_set(support, 100000baseCR4_Full);
-		phylink_set(support, 25000baseCR_Full);
+		phylink_set(modes, 100000baseCR4_Full);
+		phylink_set(modes, 25000baseCR_Full);
 		break;
 	default:
 		dev_warn(bus->sfp_dev,
@@ -274,10 +271,28 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	    id->base.fc_speed_200 ||
 	    id->base.fc_speed_400) {
 		if (id->base.br_nominal >= 31)
-			phylink_set(support, 2500baseX_Full);
+			phylink_set(modes, 2500baseX_Full);
 		if (id->base.br_nominal >= 12)
-			phylink_set(support, 1000baseX_Full);
+			phylink_set(modes, 1000baseX_Full);
 	}
+
+	/* If we haven't discovered any modes that this module supports, try
+	 * the encoding and bitrate to determine supported modes. Some BiDi
+	 * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to
+	 * the differing wavelengths, so do not set any transceiver bits.
+	 */
+	if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+		/* If the encoding and bit rate allows 1000baseX */
+		if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+		    br_min <= 1300 && br_max >= 1200)
+			phylink_set(modes, 1000baseX_Full);
+	}
+
+	bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	phylink_set(support, Autoneg);
+	phylink_set(support, Pause);
+	phylink_set(support, Asym_Pause);
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 

From a9c79364df324a69ba1b71accd5b8a3155e570ac Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 27 Feb 2018 15:53:02 +0000
Subject: [PATCH 0490/1678] phylink,sfp: negotiate interface format with MAC

Negotiate the interface format with the MAC rather than requiring it to
be a fixed type specified solely by the SFP module.  This allows modules
that can work with several different interface signalling formats to
select a format compatible with the MAC - for example, a Fiber module
supporing Gigabit ethernet and faster connected to a Gigabit only MAC
needs to select the 1000BASE-X mode.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c |  33 +++++++------
 drivers/net/phy/sfp-bus.c | 101 +++++++++++++++-----------------------
 include/linux/sfp.h       |  18 ++++---
 3 files changed, 68 insertions(+), 84 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6ac8b29b2dc3..27327c917a59 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1584,25 +1584,14 @@ static int phylink_sfp_module_insert(void *upstream,
 	bool changed;
 	u8 port;
 
-	sfp_parse_support(pl->sfp_bus, id, support);
-	port = sfp_parse_port(pl->sfp_bus, id, support);
-	iface = sfp_parse_interface(pl->sfp_bus, id);
-
 	ASSERT_RTNL();
 
-	switch (iface) {
-	case PHY_INTERFACE_MODE_SGMII:
-	case PHY_INTERFACE_MODE_1000BASEX:
-	case PHY_INTERFACE_MODE_2500BASEX:
-	case PHY_INTERFACE_MODE_10GKR:
-		break;
-	default:
-		return -EINVAL;
-	}
+	sfp_parse_support(pl->sfp_bus, id, support);
+	port = sfp_parse_port(pl->sfp_bus, id, support);
 
 	memset(&config, 0, sizeof(config));
 	linkmode_copy(config.advertising, support);
-	config.interface = iface;
+	config.interface = PHY_INTERFACE_MODE_NA;
 	config.speed = SPEED_UNKNOWN;
 	config.duplex = DUPLEX_UNKNOWN;
 	config.pause = MLO_PAUSE_AN;
@@ -1610,6 +1599,22 @@ static int phylink_sfp_module_insert(void *upstream,
 
 	/* Ignore errors if we're expecting a PHY to attach later */
 	ret = phylink_validate(pl, support, &config);
+	if (ret) {
+		netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		return ret;
+	}
+
+	iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+	if (iface == PHY_INTERFACE_MODE_NA) {
+		netdev_err(pl->netdev,
+			   "selection of interface failed, advertisment %*pb\n",
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+		return -EINVAL;
+	}
+
+	config.interface = iface;
+	ret = phylink_validate(pl, support, &config);
 	if (ret) {
 		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
 			   phylink_an_mode_str(MLO_AN_INBAND),
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 15749428679e..3d4ff5d0d2a6 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -105,68 +105,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 }
 EXPORT_SYMBOL_GPL(sfp_parse_port);
 
-/**
- * sfp_parse_interface() - Parse the phy_interface_t
- * @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
- *
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM. There is no standard or defined way
- * to derive this information, so we use some heuristics.
- *
- * If the encoding is 64b66b, then the module must be >= 10G, so
- * return %PHY_INTERFACE_MODE_10GKR.
- *
- * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre
- * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return
- * %PHY_INTERFACE_MODE_SGMII mode.
- *
- * If the encoding is not known, return %PHY_INTERFACE_MODE_NA.
- */
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id)
-{
-	phy_interface_t iface;
-
-	/* Setting the serdes link mode is guesswork: there's no field in
-	 * the EEPROM which indicates what mode should be used.
-	 *
-	 * If the module wants 64b66b, then it must be >= 10G.
-	 *
-	 * If it's a gigabit-only fiber module, it probably does not have
-	 * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
-	 * to SGMII mode (which is required to support non-gigabit speeds).
-	 */
-	switch (id->base.encoding) {
-	case SFP_ENCODING_8472_64B66B:
-		iface = PHY_INTERFACE_MODE_10GKR;
-		break;
-
-	case SFP_ENCODING_8B10B:
-		if (!id->base.e1000_base_t &&
-		    !id->base.e100_base_lx &&
-		    !id->base.e100_base_fx)
-			iface = PHY_INTERFACE_MODE_1000BASEX;
-		else
-			iface = PHY_INTERFACE_MODE_SGMII;
-		break;
-
-	default:
-		if (id->base.e1000_base_cx) {
-			iface = PHY_INTERFACE_MODE_1000BASEX;
-			break;
-		}
-
-		iface = PHY_INTERFACE_MODE_NA;
-		dev_err(bus->sfp_dev,
-			"SFP module encoding does not support 8b10b nor 64b66b\n");
-		break;
-	}
-
-	return iface;
-}
-EXPORT_SYMBOL_GPL(sfp_parse_interface);
-
 /**
  * sfp_parse_support() - Parse the eeprom id for supported link modes
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
@@ -296,6 +234,45 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 }
 EXPORT_SYMBOL_GPL(sfp_parse_support);
 
+/**
+ * sfp_select_interface() - Select appropriate phy_interface_t mode
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ * @link_modes: ethtool link modes mask
+ *
+ * Derive the phy_interface_t mode for the information found in the
+ * module's identifying EEPROM and the link modes mask. There is no
+ * standard or defined way to derive this information, so we decide
+ * based upon the link mode mask.
+ */
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *link_modes)
+{
+	if (phylink_test(link_modes, 10000baseCR_Full) ||
+	    phylink_test(link_modes, 10000baseSR_Full) ||
+	    phylink_test(link_modes, 10000baseLR_Full) ||
+	    phylink_test(link_modes, 10000baseLRM_Full) ||
+	    phylink_test(link_modes, 10000baseER_Full))
+		return PHY_INTERFACE_MODE_10GKR;
+
+	if (phylink_test(link_modes, 2500baseX_Full))
+		return PHY_INTERFACE_MODE_2500BASEX;
+
+	if (id->base.e1000_base_t ||
+	    id->base.e100_base_lx ||
+	    id->base.e100_base_fx)
+		return PHY_INTERFACE_MODE_SGMII;
+
+	if (phylink_test(link_modes, 1000baseX_Full))
+		return PHY_INTERFACE_MODE_1000BASEX;
+
+	dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n");
+
+	return PHY_INTERFACE_MODE_NA;
+}
+EXPORT_SYMBOL_GPL(sfp_select_interface);
+
 static LIST_HEAD(sfp_buses);
 static DEFINE_MUTEX(sfp_mutex);
 
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index e724d5a3dd80..ebce9e24906a 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -422,10 +422,11 @@ struct sfp_upstream_ops {
 #if IS_ENABLED(CONFIG_SFP)
 int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		   unsigned long *support);
-phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-				    const struct sfp_eeprom_id *id);
 void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 		       unsigned long *support);
+phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *link_modes);
 
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
@@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
 	return PORT_OTHER;
 }
 
-static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
-						const struct sfp_eeprom_id *id)
-{
-	return PHY_INTERFACE_MODE_NA;
-}
-
 static inline void sfp_parse_support(struct sfp_bus *bus,
 				     const struct sfp_eeprom_id *id,
 				     unsigned long *support)
 {
 }
 
+static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
+						   const struct sfp_eeprom_id *id,
+						   unsigned long *link_modes)
+{
+	return PHY_INTERFACE_MODE_NA;
+}
+
 static inline int sfp_get_module_info(struct sfp_bus *bus,
 				      struct ethtool_modinfo *modinfo)
 {

From 66f5325ce93a2e6fd726fca31fea91baf36a392e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 27 Feb 2018 15:53:07 +0000
Subject: [PATCH 0491/1678] dt-bindings: add maximum power level to SFP binding

Add the new maximum power level property to the SFP binding.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/sff,sfp.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt
index f1c441bedf68..929591d52ed6 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.txt
+++ b/Documentation/devicetree/bindings/net/sff,sfp.txt
@@ -33,6 +33,10 @@ Optional Properties:
   Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high:
   high Tx rate. Must not be present for SFF modules
 
+- maximum-power-milliwatt : Maximum module power consumption
+  Specifies the maximum power consumption allowable by a module in the
+  slot, in milli-Watts.  Presently, modules can be up to 1W, 1.5W or 2W.
+
 Example #1: Direct serdes to SFP connection
 
 sfp_eth3: sfp-eth3 {
@@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 {
 	i2c-bus = <&sfp_1g_i2c>;
 	los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>;
 	mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>;
+	maximum-power-milliwatt = <1000>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>;
 	tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>;

From 3bb35261c74e394aa42d0c636d2608093a1e3309 Mon Sep 17 00:00:00 2001
From: Jon Nettleton <jon@solid-run.com>
Date: Tue, 27 Feb 2018 15:53:12 +0000
Subject: [PATCH 0492/1678] sfp: add high power module support

This patch is the result of work by both Jon Nettleton and Russell King.
Jon wrote the original patch, adding support for SFP modules which
require a power level greater than '1'.

Russell's changes:
- Fix the power levels for big-endian, and make the code flow better.
- Convert to use device_property_read_u8()
- Warn for power levels exceeding host level
  SFF-8431 says:

  "To avoid exceeding system power supply limits and cooling capacity,
   all modules at power up by default shall operate with up to 1.0 W.
   Hosts supporting Power Level II or III operation may enable a Power
   Level II or III module through the 2-wire interface. Power Level II
   or III modules shall assert the power level declaration bit of
   SFF-8472."

  Print a warning for modules that exceed the host power level, and
  leave them operating in power level 1.

- Fix i2c write
  The first byte of any write after the bus address is always the
  device address.  In order to write a value to device D, address I,
  value V, we need to generate on the bus:

    S DDDDDDDD A IIIIIIII A VVVVVVVV A P

  where S = start, R = restart, A = ack, P = stop.  Splitting this
  as two:

    S DDDDDDDD A IIIIIIII A R DDDDDDDD A VVVVVVVV A P

  results in the device's address register being written first by I
  and then by V - the addressed register within the device is not
  written.

- Avoid power mode switching if 0xa2 is not implemented
  Some modules indicate that they support power level II or power level
  III, but do not implement address 0xa2, meaning that the bit to set
  them to high power mode is not accessible.

  These modules appear to have the sff8472_compliance field set to zero,
  and also do not implement diagnostics.  Detect this, but also ensure
  that the module does not require the address switching mode, which we
  do not implement.

- Use mW for power level rather than power level number.

- Fix high power mode transition
  We must not switch to SFP_MOD_PRESENT state until we have finished
  initialising, because the remaining state machines check for that
  state.  Add SFP_MOD_HPOWER as an intermediate state.

- Use definition for I2C register address rather than constant.

Signed-off-by: Jon Nettleton <jon@solid-run.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 150 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 132 insertions(+), 18 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 6c7d9289078d..83bf4959b043 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -42,6 +42,7 @@ enum {
 
 	SFP_MOD_EMPTY = 0,
 	SFP_MOD_PROBE,
+	SFP_MOD_HPOWER,
 	SFP_MOD_PRESENT,
 	SFP_MOD_ERROR,
 
@@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = {
  * access the I2C EEPROM.  However, Avago modules require 300ms.
  */
 #define T_PROBE_INIT	msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL	msecs_to_jiffies(300)
 #define T_PROBE_RETRY	msecs_to_jiffies(100)
 
 /* SFP modules appear to always have their PHY configured for bus address
@@ -110,10 +112,12 @@ struct sfp {
 	struct sfp_bus *sfp_bus;
 	struct phy_device *mod_phy;
 	const struct sff_data *type;
+	u32 max_power_mW;
 
 	unsigned int (*get_state)(struct sfp *);
 	void (*set_state)(struct sfp *, unsigned int);
 	int (*read)(struct sfp *, bool, u8, void *, size_t);
+	int (*write)(struct sfp *, bool, u8, void *, size_t);
 
 	struct gpio_desc *gpio[GPIO_MAX];
 
@@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
 	}
 }
 
-static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
-			 void *buf, size_t len)
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+			size_t len)
 {
 	struct i2c_msg msgs[2];
+	u8 bus_addr = a2 ? 0x51 : 0x50;
 	int ret;
 
 	msgs[0].addr = bus_addr;
@@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
 	msgs[1].len = len;
 	msgs[1].buf = buf;
 
-	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
 	if (ret < 0)
 		return ret;
 
 	return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
-static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
-			size_t len)
+static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
+	size_t len)
 {
-	return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+	struct i2c_msg msgs[1];
+	u8 bus_addr = a2 ? 0x51 : 0x50;
+	int ret;
+
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1 + len;
+	msgs[0].buf = kmalloc(1 + len, GFP_KERNEL);
+	if (!msgs[0].buf)
+		return -ENOMEM;
+
+	msgs[0].buf[0] = dev_addr;
+	memcpy(&msgs[0].buf[1], buf, len);
+
+	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+
+	kfree(msgs[0].buf);
+
+	if (ret < 0)
+		return ret;
+
+	return ret == ARRAY_SIZE(msgs) ? len : 0;
 }
 
 static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
@@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
 
 	sfp->i2c = i2c;
 	sfp->read = sfp_i2c_read;
+	sfp->write = sfp_i2c_write;
 
 	i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
 	if (IS_ERR(i2c_mii))
@@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
 	return sfp->read(sfp, a2, addr, buf, len);
 }
 
+static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+	return sfp->write(sfp, a2, addr, buf, len);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
 	u8 *p, check;
@@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp)
 		sfp_sm_probe_phy(sfp);
 }
 
+static int sfp_sm_mod_hpower(struct sfp *sfp)
+{
+	u32 power;
+	u8 val;
+	int err;
+
+	power = 1000;
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
+		power = 1500;
+	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
+		power = 2000;
+
+	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
+	    (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
+	    SFP_DIAGMON_DDM) {
+		/* The module appears not to implement bus address 0xa2,
+		 * or requires an address change sequence, so assume that
+		 * the module powers up in the indicated power mode.
+		 */
+		if (power > sfp->max_power_mW) {
+			dev_err(sfp->dev,
+				"Host does not support %u.%uW modules\n",
+				power / 1000, (power / 100) % 10);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
+	if (power > sfp->max_power_mW) {
+		dev_warn(sfp->dev,
+			 "Host does not support %u.%uW modules, module left in power mode 1\n",
+			 power / 1000, (power / 100) % 10);
+		return 0;
+	}
+
+	if (power <= 1000)
+		return 0;
+
+	err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err != sizeof(val)) {
+		dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
+		err = -EAGAIN;
+		goto err;
+	}
+
+	val |= BIT(0);
+
+	err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
+	if (err != sizeof(val)) {
+		dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
+		err = -EAGAIN;
+		goto err;
+	}
+
+	dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+		 power / 1000, (power / 100) % 10);
+	return T_HPOWER_LEVEL;
+
+err:
+	return err;
+}
+
 static int sfp_sm_mod_probe(struct sfp *sfp)
 {
 	/* SFP module inserted - read I2C data */
 	struct sfp_eeprom_id id;
 	u8 check;
-	int err;
+	int ret;
 
-	err = sfp_read(sfp, false, 0, &id, sizeof(id));
-	if (err < 0) {
-		dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+	ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+	if (ret < 0) {
+		dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
 		return -EAGAIN;
 	}
 
-	if (err != sizeof(id)) {
-		dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+	if (ret != sizeof(id)) {
+		dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
 		return -EAGAIN;
 	}
 
@@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		dev_warn(sfp->dev,
 			 "module address swap to access page 0xA2 is not supported.\n");
 
-	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+	ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+	if (ret < 0)
+		return ret;
+
+	return sfp_sm_mod_hpower(sfp);
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
@@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 		if (event == SFP_E_REMOVE) {
 			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
 		} else if (event == SFP_E_TIMEOUT) {
-			int err = sfp_sm_mod_probe(sfp);
+			int val = sfp_sm_mod_probe(sfp);
 
-			if (err == 0)
+			if (val == 0)
 				sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-			else if (err == -EAGAIN)
-				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-			else
+			else if (val > 0)
+				sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
+			else if (val != -EAGAIN)
 				sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+			else
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
 		}
 		break;
 
+	case SFP_MOD_HPOWER:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+			break;
+		}
+		/* fallthrough */
 	case SFP_MOD_PRESENT:
 	case SFP_MOD_ERROR:
 		if (event == SFP_E_REMOVE) {
@@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev)
 	if (!(sfp->gpio[GPIO_MODDEF0]))
 		sfp->get_state = sff_gpio_get_state;
 
+	device_property_read_u32(&pdev->dev, "maximum-power-milliwatt",
+				 &sfp->max_power_mW);
+	if (!sfp->max_power_mW)
+		sfp->max_power_mW = 1000;
+
+	dev_info(sfp->dev, "Host maximum power %u.%uW\n",
+		 sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10);
+
 	sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
 	if (!sfp->sfp_bus)
 		return -ENOMEM;

From 4f4aaa1720a3dac0e7afb51bf1177eb66d9c2172 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hern=C3=A1n=20Gonzalez?= <hernan@vanguardiasur.com.ar>
Date: Tue, 27 Feb 2018 19:31:34 -0300
Subject: [PATCH 0493/1678] qlogic/qed: Constify *pkt_type_str[]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Note: This is compile only tested as I have no access to the hw.
Constifying and declaring as static saves 24 bytes.

add/remove: 0/1 grow/shrink: 0/0 up/down: 0/-24 (-24)
Function                                     old     new   delta
pkt_type_str                                  24       -     -24
Total: Before=3599256, After=3599232, chg -0.00%

Signed-off-by: Hernán Gonzalez <hernan@vanguardiasur.com.ar>
Acked-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ca4a81dc1ace..03ad4eeac7f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type {
 /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
 #define QED_IWARP_MAX_BDS_PER_FPDU 3
 
-char *pkt_type_str[] = {
+static const char * const pkt_type_str[] = {
 	"QED_IWARP_MPA_PKT_PACKED",
 	"QED_IWARP_MPA_PKT_PARTIAL",
 	"QED_IWARP_MPA_PKT_UNALIGNED"

From 262c9740159ab0286663b46b00e20845fed05e57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hern=C3=A1n=20Gonzalez?= <hernan@vanguardiasur.com.ar>
Date: Tue, 27 Feb 2018 19:29:23 -0300
Subject: [PATCH 0494/1678] emulex/benet: Constify
 *be_misconfig_evt_port_state[]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Note: This is compile only tested as I have no access to the hw.
No benefit gained except for some self-documenting.

add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
Function                                     old     new   delta
Total: Before=2757703, After=2757703, chg +0.00%

Signed-off-by: Hernán Gonzalez <hernan@vanguardiasur.com.ar>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +-
 drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1a49297224ed..ff92ab1daeb8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -19,7 +19,7 @@
 #include "be.h"
 #include "be_cmds.h"
 
-char *be_misconfig_evt_port_state[] = {
+const char * const be_misconfig_evt_port_state[] = {
 	"Physical Link is functional",
 	"Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.",
 	"Optics of two types installed – Remove one optic or install matching pair of optics.",
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 09da2d82c2f0..e8b43cf44b6f 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -201,7 +201,7 @@ enum {
 			 phy_state == BE_PHY_UNQUALIFIED ||	\
 			 phy_state == BE_PHY_UNCERTIFIED)
 
-extern  char *be_misconfig_evt_port_state[];
+extern const  char * const be_misconfig_evt_port_state[];
 
 /* async event indicating misconfigured port */
 struct be_async_event_misconfig_port {

From 82695b30ffeeab665f41416c6f5015dea3147bd5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 27 Feb 2018 15:48:21 -0800
Subject: [PATCH 0495/1678] inet: whitespace cleanup

Ran simple script to find/remove trailing whitespace and blank lines
at EOF because that kind of stuff git whines about and editors leave
behind.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ethoc.h                |  1 -
 include/net/flow.h                 |  2 +-
 include/net/inet_connection_sock.h | 10 +++++-----
 include/net/ip.h                   | 12 ++++++------
 include/net/ip_fib.h               |  2 +-
 include/net/ipv6.h                 | 10 +++++-----
 include/net/xfrm.h                 | 14 +++++++-------
 net/ipv4/fib_semantics.c           |  2 +-
 net/ipv4/proc.c                    |  1 -
 net/ipv4/tunnel4.c                 |  2 +-
 net/ipv4/xfrm4_policy.c            |  1 -
 net/ipv6/anycast.c                 |  1 -
 net/ipv6/exthdrs_core.c            |  1 -
 net/ipv6/ipv6_sockglue.c           |  1 -
 net/ipv6/proc.c                    |  1 -
 net/ipv6/xfrm6_state.c             |  1 -
 16 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/include/net/ethoc.h b/include/net/ethoc.h
index bb7f467da7fc..29ba069a1d93 100644
--- a/include/net/ethoc.h
+++ b/include/net/ethoc.h
@@ -21,4 +21,3 @@ struct ethoc_platform_data {
 };
 
 #endif /* !LINUX_NET_ETHOC_H */
-
diff --git a/include/net/flow.h b/include/net/flow.h
index f1624fd5b1d0..64e7ee9cb980 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos,
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
 }
-				      
+
 
 struct flowi6 {
 	struct flowi_common	__fl_common;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c1a93ce35e62..b68fea022a82 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops {
 	u16	    net_header_len;
 	u16	    net_frag_header_len;
 	u16	    sockaddr_len;
-	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
+	int	    (*setsockopt)(struct sock *sk, int level, int optname,
 				  char __user *optval, unsigned int optlen);
-	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
+	int	    (*getsockopt)(struct sock *sk, int level, int optname,
 				  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
 	int	    (*compat_setsockopt)(struct sock *sk,
@@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops {
 
 /** inet_connection_sock - INET connection oriented sock
  *
- * @icsk_accept_queue:	   FIFO of established children 
+ * @icsk_accept_queue:	   FIFO of established children
  * @icsk_bind_hash:	   Bind node
  * @icsk_timeout:	   Timeout
  * @icsk_retransmit_timer: Resend (no ack)
@@ -122,7 +122,7 @@ struct inet_connection_sock {
 		unsigned long	  timeout;	 /* Currently scheduled timeout		   */
 		__u32		  lrcvtime;	 /* timestamp of last received data packet */
 		__u16		  last_seg_size; /* Size of last incoming segment	   */
-		__u16		  rcv_mss;	 /* MSS used for delayed ACK decisions	   */ 
+		__u16		  rcv_mss;	 /* MSS used for delayed ACK decisions	   */
 	} icsk_ack;
 	struct {
 		int		  enabled;
@@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[];
 static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	
+
 	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
 		icsk->icsk_pending = 0;
 #ifdef INET_CSK_CLEAR_TIMERS
diff --git a/include/net/ip.h b/include/net/ip.h
index 746abff9ce51..fe63ba95d12b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void ip4_datagram_release_cb(struct sock *sk);
 
 struct ip_reply_arg {
-	struct kvec iov[1];   
+	struct kvec iov[1];
 	int	    flags;
 	__wsum 	    csum;
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
-				/* -1 if not needed */ 
+				/* -1 if not needed */
 	int	    bound_dev_if;
 	u8  	    tos;
 	kuid_t	    uid;
-}; 
+};
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
 
@@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net);
 /*
  *	Functions provided by ip_forward.c
  */
- 
+
 int ip_forward(struct sk_buff *skb);
- 
+
 /*
  *	Functions provided by ip_options.c
  */
- 
+
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f80524396c06..15e19c5c6f26 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -157,7 +157,7 @@ struct fib_result_nl {
 	unsigned char	nh_sel;
 	unsigned char	type;
 	unsigned char	scope;
-	int             err;      
+	int             err;
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7a98cd583c73..cabd3cdd4015 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -105,8 +105,8 @@
 
 #define IPV6_ADDR_ANY		0x0000U
 
-#define IPV6_ADDR_UNICAST      	0x0001U	
-#define IPV6_ADDR_MULTICAST    	0x0002U	
+#define IPV6_ADDR_UNICAST	0x0001U
+#define IPV6_ADDR_MULTICAST	0x0002U
 
 #define IPV6_ADDR_LOOPBACK	0x0010U
 #define IPV6_ADDR_LINKLOCAL	0x0020U
@@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 #endif
 }
 
-static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
+static inline void ipv6_addr_prefix(struct in6_addr *pfx,
 				    const struct in6_addr *addr,
 				    int plen)
 {
@@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr,
 	addr[1] = wl;
 }
 
-static inline void ipv6_addr_set(struct in6_addr *addr, 
+static inline void ipv6_addr_set(struct in6_addr *addr,
 				     __be32 w1, __be32 w2,
 				     __be32 w3, __be32 w4)
 {
@@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int
 	}
 
 	/*
-	 *	we should *never* get to this point since that 
+	 *	we should *never* get to this point since that
 	 *	would mean the addrs are equal
 	 *
 	 *	However, we do get to it 8) And exacly, when
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7d2077665c0b..aa027ba1d032 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
 static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
-static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
-static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
+static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }
+static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; }
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-{ 
-	return 1; 
-} 
+{
+	return 1;
+}
 static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
 {
 	return 1;
@@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x,
 {
 	if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
 	    (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) ||
-	     ipv6_addr_any((struct in6_addr *)saddr) || 
+	     ipv6_addr_any((struct in6_addr *)saddr) ||
 	     ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
 		return 1;
 	return 0;
@@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname,
 static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
 {
  	return -ENOPROTOOPT;
-} 
+}
 
 static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index cd46d7666598..f31e6575ab91 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
 		fnhe = rcu_dereference_protected(hash[i].chain, 1);
 		while (fnhe) {
 			struct fib_nh_exception *next;
-			
+
 			next = rcu_dereference_protected(fnhe->fnhe_next, 1);
 
 			rt_fibinfo_free(&fnhe->fnhe_rth_input);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index fdabc70283b6..d97e83b2dd33 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void)
 {
 	return register_pernet_subsys(&ip_proc_ops);
 }
-
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ec35eaa5c029..c0630013c1ae 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister);
 	for (handler = rcu_dereference(head);		\
 	     handler != NULL;				\
 	     handler = rcu_dereference(handler->next))	\
-	
+
 static int tunnel4_rcv(struct sk_buff *skb)
 {
 	struct xfrm_tunnel *handler;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 796ac4115485..0c752dc3f93b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -379,4 +379,3 @@ void __init xfrm4_init(void)
 	xfrm4_protocol_init();
 	register_pernet_subsys(&xfrm4_net_ops);
 }
-
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..d7d0abc7fd0e 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net)
 	remove_proc_entry("anycast6", net->proc_net);
 }
 #endif
-
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 	return nexthdr;
 }
 EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 }
 EXPORT_SYMBOL(compat_ipv6_getsockopt);
 #endif
-
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b8858c546f41..1678cf037688 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void)
 {
 	unregister_pernet_subsys(&ipv6_proc_ops);
 }
-
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
 {
 	xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
 }
-

From 66e19689f57b61a2f347ea79c16c6b3cb92f090e Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 27 Feb 2018 23:22:33 -0500
Subject: [PATCH 0496/1678] net/macmace: Fix and clean up log messages

Don't log the unexpanded "eth%d" format string.
Log the chip revision in the probe message (consistent with mace.c).
Drop redundant debug messages for FIFO events recorded in the
interface statistics (also consistent with mace.c).

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apple/macmace.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index f17a160dbff2..cfd8f3d8a94c 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev)
 	dev->netdev_ops		= &mace_netdev_ops;
 	dev->watchdog_timeo	= TX_TIMEOUT;
 
-	printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n",
-	       dev->name, dev->dev_addr);
+	pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n",
+		dev->dev_addr, mp->chipid);
 
 	err = register_netdev(dev);
 	if (!err)
@@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
 			else if (fs & (UFLO|LCOL|RTRY)) {
 				++dev->stats.tx_aborted_errors;
 				if (mb->xmtfs & UFLO) {
-					printk(KERN_ERR "%s: DMA underrun.\n", dev->name);
 					dev->stats.tx_fifo_errors++;
 					mace_txdma_reset(dev);
 				}
@@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 
 	if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) {
 		dev->stats.rx_errors++;
-		if (frame_status & RS_OFLO) {
-			printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name);
+		if (frame_status & RS_OFLO)
 			dev->stats.rx_fifo_errors++;
-		}
 		if (frame_status & RS_CLSN)
 			dev->stats.collisions++;
 		if (frame_status & RS_FRAMERR)

From 1e9b9a8bbdbb7d472a2a203f658b38f0b2d99333 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 27 Feb 2018 23:22:33 -0500
Subject: [PATCH 0497/1678] net/macmace: Drop redundant MACH_IS_MAC test

The MACH_IS_MAC test is redundant here because the platform device
won't get registered unless MACH_IS_MAC.
Adopt module_platform_driver() convention.

Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apple/macmace.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index cfd8f3d8a94c..137cbb470af2 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -767,18 +767,4 @@ static struct platform_driver mac_mace_driver = {
 	},
 };
 
-static int __init mac_mace_init_module(void)
-{
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
-	return platform_driver_register(&mac_mace_driver);
-}
-
-static void __exit mac_mace_cleanup_module(void)
-{
-	platform_driver_unregister(&mac_mace_driver);
-}
-
-module_init(mac_mace_init_module);
-module_exit(mac_mace_cleanup_module);
+module_platform_driver(mac_mace_driver);

From 14967f94972c0388f455625e7109ab7651429014 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 28 Feb 2018 07:55:20 +0100
Subject: [PATCH 0498/1678] r8169: convert remaining feature flag and remove
 enum features

Now that only one feature flag is left we can convert it and remove
enum features.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index cc51286ee51f..0a0638d692f9 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -735,10 +735,6 @@ struct ring_info {
 	u8		__pad[sizeof(void *) - sizeof(u32)];
 };
 
-enum features {
-	RTL_FEATURE_GMII	= (1 << 0),
-};
-
 struct rtl8169_counters {
 	__le64	tx_packets;
 	__le64	rx_packets;
@@ -8233,7 +8229,7 @@ static const struct rtl_cfg_info {
 	unsigned int region;
 	unsigned int align;
 	u16 event_slow;
-	unsigned features;
+	unsigned int has_gmii:1;
 	const struct rtl_coalesce_info *coalesce_info;
 	u8 default_ver;
 } rtl_cfg_infos [] = {
@@ -8242,7 +8238,7 @@ static const struct rtl_cfg_info {
 		.region		= 1,
 		.align		= 0,
 		.event_slow	= SYSErr | LinkChg | RxOverflow | RxFIFOOver,
-		.features	= RTL_FEATURE_GMII,
+		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8169,
 		.default_ver	= RTL_GIGA_MAC_VER_01,
 	},
@@ -8251,7 +8247,7 @@ static const struct rtl_cfg_info {
 		.region		= 2,
 		.align		= 8,
 		.event_slow	= SYSErr | LinkChg | RxOverflow,
-		.features	= RTL_FEATURE_GMII,
+		.has_gmii	= 1,
 		.coalesce_info	= rtl_coalesce_info_8168_8136,
 		.default_ver	= RTL_GIGA_MAC_VER_11,
 	},
@@ -8394,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mii->mdio_write = rtl_mdio_write;
 	mii->phy_id_mask = 0x1f;
 	mii->reg_num_mask = 0x1f;
-	mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+	mii->supports_gmii = cfg->has_gmii;
 
 	/* disable ASPM completely as that cause random device stop working
 	 * problems as well as full system hangs for some PCIe devices users */

From 56beda3db6020428885f0589a0ac16768ea94543 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 28 Feb 2018 10:14:13 +0100
Subject: [PATCH 0499/1678] net: mvpp2: Add hardware offloading for VLAN
 filtering

Marvell PPv2 controller allows for generic packet filtering. This commit
adds entries to implement VLAN filtering. The approach taken is :

 - Filter entries that would match on the presence of the VLAN tag
   (existing VLAN detection, DSA / EDSA detection) will set the next
   lookup ID to be for the VID.

 - For each VLAN existing on a given port, we add an entry that matches
   this specific VID. If the incoming packet matches the VID entry, it is
   set for the next lookup in the chain (LU_L2).

 - A Guard entry is added for each port, that will match if the incoming
   packet didn't match any of the above VID entries. This entry tags the
   packet to be dropped.

Due to this design, and the fact that the total 256 filter entries are
also used for other purposes, we have a limit of 10 VLANs per port. To
accommodate the case where we would need more VLANS on one port, this
patch implements the ndo_set_features to allow for disabling of VLAN
filtering using ethtool.

The default config has VLAN filtering disabled.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 414 ++++++++++++++++++++++++---
 1 file changed, 380 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 5a1668cdb461..9418f6eed086 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -65,6 +65,10 @@
 #define     MVPP2_RXQ_PACKET_OFFSET_MASK	0x70000000
 #define     MVPP2_RXQ_DISABLE_MASK		BIT(31)
 
+/* Top Registers */
+#define MVPP2_MH_REG(port)			(0x5040 + 4 * (port))
+#define MVPP2_DSA_EXTENDED			BIT(5)
+
 /* Parser Registers */
 #define MVPP2_PRS_INIT_LOOKUP_REG		0x1000
 #define     MVPP2_PRS_PORT_LU_MAX		0xf
@@ -473,6 +477,7 @@
 #define MVPP2_ETH_TYPE_LEN		2
 #define MVPP2_PPPOE_HDR_SIZE		8
 #define MVPP2_VLAN_TAG_LEN		4
+#define MVPP2_VLAN_TAG_EDSA_LEN		8
 
 /* Lbtd 802.3 type */
 #define MVPP2_IP_LBDT_TYPE		0xfffa
@@ -609,35 +614,64 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_LU_BYTE			20
 #define MVPP2_PRS_TCAM_EN_OFFS(offs)		((offs) + 2)
 #define MVPP2_PRS_TCAM_INV_WORD			5
+
+#define MVPP2_PRS_VID_TCAM_BYTE         2
+
+/* There is a TCAM range reserved for VLAN filtering entries, range size is 33
+ * 10 VLAN ID filter entries per port
+ * 1 default VLAN filter entry per port
+ * It is assumed that there are 3 ports for filter, not including loopback port
+ */
+#define MVPP2_PRS_VLAN_FILT_MAX		11
+#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE	33
+
+#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY   (MVPP2_PRS_VLAN_FILT_MAX - 2)
+#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY  (MVPP2_PRS_VLAN_FILT_MAX - 1)
+
 /* Tcam entries ID */
 #define MVPP2_PE_DROP_ALL		0
 #define MVPP2_PE_FIRST_FREE_TID		1
-#define MVPP2_PE_LAST_FREE_TID		(MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+
+/* VLAN filtering range */
+#define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
+#define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
+					 MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
+#define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN	(MVPP2_PRS_TCAM_SRAM_SIZE - 30)
 #define MVPP2_PE_MAC_MC_IP6		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
 #define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
 #define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
 #define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
 #define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
 #define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 4)
 #define MVPP2_PE_MAC_MC_ALL		(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
 #define MVPP2_PE_MAC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
+#define MVPP2_PRS_VID_PORT_FIRST(port)	(MVPP2_PE_VID_FILT_RANGE_START + \
+					 ((port) * MVPP2_PRS_VLAN_FILT_MAX))
+#define MVPP2_PRS_VID_PORT_LAST(port)	(MVPP2_PRS_VID_PORT_FIRST(port) \
+					 + MVPP2_PRS_VLAN_FILT_MAX_ENTRY)
+/* Index of default vid filter for given port */
+#define MVPP2_PRS_VID_PORT_DFLT(port)	(MVPP2_PRS_VID_PORT_FIRST(port) \
+					 + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY)
+
 /* Sram structure
  * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0).
  */
@@ -725,6 +759,7 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT		BIT(4)
 #define MVPP2_PRS_SINGLE_VLAN_AI		0
 #define MVPP2_PRS_DBL_VLAN_AI_BIT		BIT(7)
+#define MVPP2_PRS_EDSA_VID_AI_BIT		BIT(0)
 
 /* DSA/EDSA type */
 #define MVPP2_PRS_TAGGED		true
@@ -747,6 +782,7 @@ enum mvpp2_prs_lookup {
 	MVPP2_PRS_LU_MAC,
 	MVPP2_PRS_LU_DSA,
 	MVPP2_PRS_LU_VLAN,
+	MVPP2_PRS_LU_VID,
 	MVPP2_PRS_LU_L2,
 	MVPP2_PRS_LU_PPPOE,
 	MVPP2_PRS_LU_IP4,
@@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset,
 	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff);
 }
 
+/* Set vid in tcam sw entry */
+static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset,
+				unsigned short vid)
+{
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf);
+	mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff);
+}
+
 /* Set bits in sram sw entry */
 static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num,
 				    int val)
@@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA);
 		pe.index = tid;
 
-		/* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/
-		mvpp2_prs_sram_shift_set(&pe, shift,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
 		/* Update shadow table */
 		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA);
 
 		if (tagged) {
 			/* Set tagged bit in DSA tag */
 			mvpp2_prs_tcam_data_byte_set(&pe, 0,
-						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
-						     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
-			/* Clear all ai bits for next iteration */
-			mvpp2_prs_sram_ai_update(&pe, 0,
-						 MVPP2_PRS_SRAM_AI_MASK);
-			/* If packet is tagged continue check vlans */
-			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+					     MVPP2_PRS_TCAM_DSA_TAGGED_BIT,
+					     MVPP2_PRS_TCAM_DSA_TAGGED_BIT);
+
+			/* Set ai bits for next iteration */
+			if (extend)
+				mvpp2_prs_sram_ai_update(&pe, 1,
+							MVPP2_PRS_SRAM_AI_MASK);
+			else
+				mvpp2_prs_sram_ai_update(&pe, 0,
+							MVPP2_PRS_SRAM_AI_MASK);
+
+			/* If packet is tagged continue check vid filtering */
+			mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
 		} else {
+			/* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/
+			mvpp2_prs_sram_shift_set(&pe, shift,
+					MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
 			/* Set result info bits to 'no vlans' */
 			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE,
 						 MVPP2_PRS_RI_VLAN_MASK);
@@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 
 		mvpp2_prs_match_etype(pe, 0, tpid);
 
-		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2);
-		/* Shift 4 bytes - skip 1 vlan tag */
-		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+		/* VLAN tag detected, proceed with VID filtering */
+		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+
 		/* Clear all ai bits for next iteration */
 		mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
@@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 		mvpp2_prs_match_etype(pe, 4, tpid2);
 
 		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		/* Shift 8 bytes - skip 2 vlan tags */
-		mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN,
+		/* Shift 4 bytes - skip outer vlan tag */
+		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
 					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
 		mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
 					 MVPP2_PRS_RI_VLAN_MASK);
@@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv)
 	mvpp2_prs_hw_write(priv, &pe);
 }
 
+/* Initialize parser entries for VID filtering */
+static void mvpp2_prs_vid_init(struct mvpp2 *priv)
+{
+	struct mvpp2_prs_entry pe;
+
+	memset(&pe, 0, sizeof(pe));
+
+	/* Set default vid entry */
+	pe.index = MVPP2_PE_VID_FLTR_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT);
+
+	/* Skip VLAN header - Set offset to 4 bytes */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	/* Set default vid entry for extended DSA*/
+	memset(&pe, 0, sizeof(pe));
+
+	/* Set default vid entry */
+	pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT;
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT,
+				 MVPP2_PRS_EDSA_VID_AI_BIT);
+
+	/* Skip VLAN header - Set offset to 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN,
+				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Unmask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK);
+
+	/* Update shadow table and hw entry */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Match basic ethertypes */
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
@@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv)
 	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
 	pe.index = MVPP2_PE_VLAN_DBL;
 
-	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
+
 	/* Clear ai for next iterations */
 	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
@@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv)
 	return 0;
 }
 
+/* Find tcam entry with matched pair <vid,port> */
+static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
+				    u16 mask)
+{
+	unsigned char byte[2], enable[2];
+	struct mvpp2_prs_entry pe;
+	u16 rvid, rmask;
+	int tid;
+
+	/* Go through the all entries with MVPP2_PRS_LU_VID */
+	for (tid = MVPP2_PE_VID_FILT_RANGE_START;
+	     tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) {
+		if (!priv->prs_shadow[tid].valid ||
+		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
+			continue;
+
+		pe.index = tid;
+
+		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
+		mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
+
+		rvid = ((byte[0] & 0xf) << 8) + byte[1];
+		rmask = ((enable[0] & 0xf) << 8) + enable[1];
+
+		if (rvid != vid || rmask != mask)
+			continue;
+
+		return tid;
+	}
+
+	return 0;
+}
+
+/* Write parser entry for VID filtering */
+static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
+{
+	unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START +
+				 port->id * MVPP2_PRS_VLAN_FILT_MAX;
+	unsigned int mask = 0xfff, reg_val, shift;
+	struct mvpp2 *priv = port->priv;
+	struct mvpp2_prs_entry pe;
+	int tid;
+
+	/* Scan TCAM and see if entry with this <vid,port> already exist */
+	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
+
+	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+	if (reg_val & MVPP2_DSA_EXTENDED)
+		shift = MVPP2_VLAN_TAG_EDSA_LEN;
+	else
+		shift = MVPP2_VLAN_TAG_LEN;
+
+	/* No such entry */
+	if (!tid) {
+		memset(&pe, 0, sizeof(pe));
+
+		/* Go through all entries from first to last in vlan range */
+		tid = mvpp2_prs_tcam_first_free(priv, vid_start,
+						vid_start +
+						MVPP2_PRS_VLAN_FILT_MAX_ENTRY);
+
+		/* There isn't room for a new VID filter */
+		if (tid < 0)
+			return tid;
+
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+		pe.index = tid;
+
+		/* Mask all ports */
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	} else {
+		mvpp2_prs_hw_read(priv, &pe);
+	}
+
+	/* Enable the current port */
+	mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Skip VLAN header - Set offset to 4 or 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Set match on VID */
+	mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Update shadow table */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+
+	return 0;
+}
+
+/* Write parser entry for VID filtering */
+static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
+{
+	struct mvpp2 *priv = port->priv;
+	int tid;
+
+	/* Scan TCAM and see if entry with this <vid,port> already exist */
+	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
+
+	/* No such entry */
+	if (tid)
+		return;
+
+	mvpp2_prs_hw_inv(priv, tid);
+	priv->prs_shadow[tid].valid = false;
+}
+
+/* Remove all existing VID filters on this port */
+static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	int tid;
+
+	for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id);
+	     tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) {
+		if (priv->prs_shadow[tid].valid)
+			mvpp2_prs_vid_entry_remove(port, tid);
+	}
+}
+
+/* Remove VID filering entry for this port */
+static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port)
+{
+	unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+	struct mvpp2 *priv = port->priv;
+
+	/* Invalidate the guard entry */
+	mvpp2_prs_hw_inv(priv, tid);
+
+	priv->prs_shadow[tid].valid = false;
+}
+
+/* Add guard entry that drops packets when no VID is matched on this port */
+static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port)
+{
+	unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id);
+	struct mvpp2 *priv = port->priv;
+	unsigned int reg_val, shift;
+	struct mvpp2_prs_entry pe;
+
+	if (priv->prs_shadow[tid].valid)
+		return;
+
+	memset(&pe, 0, sizeof(pe));
+
+	pe.index = tid;
+
+	reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id));
+	if (reg_val & MVPP2_DSA_EXTENDED)
+		shift = MVPP2_VLAN_TAG_EDSA_LEN;
+	else
+		shift = MVPP2_VLAN_TAG_LEN;
+
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID);
+
+	/* Mask all ports */
+	mvpp2_prs_tcam_port_map_set(&pe, 0);
+
+	/* Update port mask */
+	mvpp2_prs_tcam_port_set(&pe, port->id, true);
+
+	/* Continue - set next lookup */
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2);
+
+	/* Skip VLAN header - Set offset to 4 or 8 bytes */
+	mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+
+	/* Drop VLAN packets that don't belong to any VIDs on this port */
+	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK,
+				 MVPP2_PRS_RI_DROP_MASK);
+
+	/* Clear all ai bits for next iteration */
+	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+
+	/* Update shadow table */
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID);
+	mvpp2_prs_hw_write(priv, &pe);
+}
+
 /* Parser default initialization */
 static int mvpp2_prs_default_init(struct platform_device *pdev,
 				  struct mvpp2 *priv)
@@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev,
 
 	mvpp2_prs_dsa_init(priv);
 
+	mvpp2_prs_vid_init(priv);
+
 	err = mvpp2_prs_etype_init(priv);
 	if (err)
 		return err;
@@ -7153,6 +7447,12 @@ retry:
 			}
 		}
 	}
+
+	/* Disable VLAN filtering in promiscuous mode */
+	if (dev->flags & IFF_PROMISC)
+		mvpp2_prs_vid_disable_filtering(port);
+	else
+		mvpp2_prs_vid_enable_filtering(port);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return ret;
 }
 
+static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	int ret;
+
+	ret = mvpp2_prs_vid_entry_add(port, vid);
+	if (ret)
+		netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n",
+			   MVPP2_PRS_VLAN_FILT_MAX - 1);
+	return ret;
+}
+
+static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	mvpp2_prs_vid_entry_remove(port, vid);
+	return 0;
+}
+
+static int mvpp2_set_features(struct net_device *dev,
+			      netdev_features_t features)
+{
+	netdev_features_t changed = dev->features ^ features;
+	struct mvpp2_port *port = netdev_priv(dev);
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+			mvpp2_prs_vid_enable_filtering(port);
+		} else {
+			/* Invalidate all registered VID filters for this
+			 * port
+			 */
+			mvpp2_prs_vid_remove_all(port);
+
+			mvpp2_prs_vid_disable_filtering(port);
+		}
+	}
+
+	return 0;
+}
+
 /* Ethtool methods */
 
 /* Set interrupt coalescing for ethtools */
@@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 	.ndo_change_mtu		= mvpp2_change_mtu,
 	.ndo_get_stats64	= mvpp2_get_stats64,
 	.ndo_do_ioctl		= mvpp2_ioctl,
+	.ndo_vlan_rx_add_vid	= mvpp2_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= mvpp2_vlan_rx_kill_vid,
+	.ndo_set_features	= mvpp2_set_features,
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
@@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->features = features | NETIF_F_RXCSUM;
-	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
+	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
+			    NETIF_F_HW_VLAN_CTAG_FILTER;
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
 

From eed4baeb040aa41323d0091c18d36cc5a895792d Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:44:58 +0100
Subject: [PATCH 0500/1678] mlxsw: spectrum: qdiscs: Support qdisc per tclass

Add the option to set a qdisc per tclass.  Match the qdisc to the tclass by
parent ID. Supported currently for sch_red only.
It allows offloading sch_prio as root qdisc and sch_red as its child.
(However, doing so might corrupt the stats for both parent and child.)

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  1 +
 .../ethernet/mellanox/mlxsw/spectrum_qdisc.c  | 62 ++++++++++++++++---
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 2673310f92da..cc9786ff3b05 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -247,6 +247,7 @@ struct mlxsw_sp_port {
 	struct mlxsw_sp_port_sample *sample;
 	struct list_head vlans_list;
 	struct mlxsw_sp_qdisc *root_qdisc;
+	struct mlxsw_sp_qdisc *tclass_qdiscs;
 	unsigned acl_rule_count;
 	struct mlxsw_sp_acl_block *ing_acl_block;
 	struct mlxsw_sp_acl_block *eg_acl_block;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0b7670459051..858846a019ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -42,6 +42,8 @@
 #include "reg.h"
 
 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
+	MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
 
 enum mlxsw_sp_qdisc_type {
 	MLXSW_SP_QDISC_NO_QDISC,
@@ -99,6 +101,26 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
 	       mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
+		    bool root_only)
+{
+	int tclass, child_index;
+
+	if (parent == TC_H_ROOT)
+		return mlxsw_sp_port->root_qdisc;
+
+	if (root_only || !mlxsw_sp_port->root_qdisc ||
+	    !mlxsw_sp_port->root_qdisc->ops ||
+	    TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle ||
+	    TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+		return NULL;
+
+	child_index = TC_H_MIN(parent);
+	tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
+	return &mlxsw_sp_port->tclass_qdiscs[tclass];
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -406,11 +428,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-	if (p->parent != TC_H_ROOT)
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
+	if (!mlxsw_sp_qdisc)
 		return -EOPNOTSUPP;
 
-	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
-
 	if (p->command == TC_RED_REPLACE)
 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
 					      mlxsw_sp_qdisc,
@@ -441,9 +462,12 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	int i;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+				       &mlxsw_sp_port->tclass_qdiscs[i]);
+	}
 
 	return 0;
 }
@@ -569,10 +593,10 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
-	if (p->parent != TC_H_ROOT)
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+	if (!mlxsw_sp_qdisc)
 		return -EOPNOTSUPP;
 
-	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
 	if (p->command == TC_PRIO_REPLACE)
 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
 					      mlxsw_sp_qdisc,
@@ -596,17 +620,35 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
-	mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
-					    GFP_KERNEL);
-	if (!mlxsw_sp_port->root_qdisc)
-		return -ENOMEM;
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+	int i;
 
+	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL);
+	if (!mlxsw_sp_qdisc)
+		goto err_root_qdisc_init;
+
+	mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
 	mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
+	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,
+				 GFP_KERNEL);
+	if (!mlxsw_sp_qdisc)
+		goto err_tclass_qdiscs_init;
+
+	mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i;
+
 	return 0;
+
+err_tclass_qdiscs_init:
+	kfree(mlxsw_sp_port->root_qdisc);
+err_root_qdisc_init:
+	return -ENOMEM;
 }
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	kfree(mlxsw_sp_port->tclass_qdiscs);
 	kfree(mlxsw_sp_port->root_qdisc);
 }

From 2f88047ec42cc0cc8228247033a8f83960ce7f81 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:44:59 +0100
Subject: [PATCH 0501/1678] mlxsw: spectrum: Add priority counters

Add TX packets and bytes counters per switch priority per port.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 ++++++++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8d2d140d7910..7c6204f701ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev,
 		xstats->tail_drop[i] =
 			mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl);
 	}
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT,
+						  i, ppcnt_pl);
+		if (err)
+			continue;
+
+		xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl);
+		xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl);
+	}
 }
 
 static void update_stats_cache(struct work_struct *work)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index cc9786ff3b05..d5e711d8ad71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats {
 	u64 wred_drop[TC_MAX_QUEUE];
 	u64 tail_drop[TC_MAX_QUEUE];
 	u64 backlog[TC_MAX_QUEUE];
+	u64 tx_bytes[IEEE_8021QAZ_MAX_TCS];
+	u64 tx_packets[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct mlxsw_sp_port {

From 1631ab2e8d31577d65f4cf857f609ef3131ff61e Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:00 +0100
Subject: [PATCH 0502/1678] mlxsw: spectrum: qdiscs: Add priority map per qdisc

Add priority map per qdisc, to indicate which priorities are being
directed through this qdisc.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 858846a019ae..0e0299020d82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -78,6 +78,7 @@ struct mlxsw_sp_qdisc_ops {
 struct mlxsw_sp_qdisc {
 	u32 handle;
 	u8 tclass_num;
+	u8 prio_bitmap;
 	union {
 		struct red_stats red;
 	} xstats_base;
@@ -467,6 +468,7 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
 				       &mlxsw_sp_port->tclass_qdiscs[i]);
+		mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
 	}
 
 	return 0;
@@ -494,11 +496,15 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 	int tclass, i;
 	int err;
 
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
+
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
 		err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
 		if (err)
 			return err;
+		mlxsw_sp_port->tclass_qdiscs[tclass].prio_bitmap |= BIT(i);
 	}
 
 	return 0;
@@ -628,6 +634,7 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 		goto err_root_qdisc_init;
 
 	mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc;
+	mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff;
 	mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 
 	mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS,

From 04cc0bf5d6ceb0ff19e8636fb39695620e587b06 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:01 +0100
Subject: [PATCH 0503/1678] mlxsw: spectrum: qdiscs: Collect stats for sch_red
 based on priomap

Priority counters count packets according to their packet priority.
Collect the stats for sch_red based on these counters, so the qdisc bstats
will be the sum of counters matching the priorities marked in the qdisc
priomap.
Changing the mapping of the priorities to bands while traffic is running
can result in losing the stats of the bands qdiscs from their last dump
call to this change, as if the qdisc was unoffloaded and re-offloaded. It
will not affect the traffic behaviour according to sch_red.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_qdisc.c  | 69 ++++++++++++++-----
 1 file changed, 50 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 0e0299020d82..b722af360475 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -208,6 +208,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
+static void
+mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
+				       u8 prio_bitmap, u64 *tx_packets,
+				       u64 *tx_bytes)
+{
+	int i;
+
+	*tx_packets = 0;
+	*tx_bytes = 0;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (prio_bitmap & BIT(i)) {
+			*tx_packets += xstats->tx_packets[i];
+			*tx_bytes += xstats->tx_bytes[i];
+		}
+	}
+}
+
 static int
 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
 				  int tclass_num, u32 min, u32 max,
@@ -253,17 +270,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
 	struct red_stats *red_base;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 	red_base = &mlxsw_sp_qdisc->xstats_base.red;
 
-	stats_base->tx_packets = stats->tx_packets;
-	stats_base->tx_bytes = stats->tx_bytes;
-
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &stats_base->tx_packets,
+					       &stats_base->tx_bytes);
 	red_base->prob_mark = xstats->ecn;
 	red_base->prob_drop = xstats->wred_drop[tclass_num];
 	red_base->pdrop = xstats->tail_drop[tclass_num];
@@ -380,14 +396,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
-	struct rtnl_link_stats64 *stats;
 
 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
-	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
 	stats_base = &mlxsw_sp_qdisc->stats_base;
 
-	tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
-	tx_packets = stats->tx_packets - stats_base->tx_packets;
+	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
+					       mlxsw_sp_qdisc->prio_bitmap,
+					       &tx_packets, &tx_bytes);
+	tx_bytes = tx_bytes - stats_base->tx_bytes;
+	tx_packets = tx_packets - stats_base->tx_packets;
+
 	overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
 		     stats_base->overlimits;
 	drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] -
@@ -493,18 +511,31 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 			    void *params)
 {
 	struct tc_prio_qopt_offload_params *p = params;
-	int tclass, i;
+	struct mlxsw_sp_qdisc *child_qdisc;
+	int tclass, i, band;
+	u8 old_priomap;
 	int err;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
-
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
-		err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
-		if (err)
-			return err;
-		mlxsw_sp_port->tclass_qdiscs[tclass].prio_bitmap |= BIT(i);
+	for (band = 0; band < p->bands; band++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+		old_priomap = child_qdisc->prio_bitmap;
+		child_qdisc->prio_bitmap = 0;
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+			if (p->priomap[i] == band) {
+				child_qdisc->prio_bitmap |= BIT(i);
+				if (BIT(i) & old_priomap)
+					continue;
+				err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
+								i, tclass);
+				if (err)
+					return err;
+			}
+		}
+		if (old_priomap != child_qdisc->prio_bitmap &&
+		    child_qdisc->ops && child_qdisc->ops->clean_stats)
+			child_qdisc->ops->clean_stats(mlxsw_sp_port,
+						      child_qdisc);
 	}
 
 	return 0;

From cc6e5c13af8b592cfed49f4854eddddbb58c6949 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:02 +0100
Subject: [PATCH 0504/1678] mlxsw: spectrum: qdiscs: Update backlog handling of
 a child qdiscs

When removing a child qdisc its backlog will be decreased from the parent
backlog. The driver backlog count should do the same.
When the parent changes its configuration, the child might need to clean
its stats. However, the backlog can't be cleaned with the rest of the
stats, because it reflects a momentary value that needs to be synced with
the core, not the history of the qdisc.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c   | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index b722af360475..5ddaafc8aa18 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -294,6 +294,12 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
+	struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
+
+	if (root_qdisc != mlxsw_sp_qdisc)
+		root_qdisc->stats_base.backlog -=
+					mlxsw_sp_qdisc->stats_base.backlog;
+
 	return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
 						  mlxsw_sp_qdisc->tclass_num);
 }
@@ -358,6 +364,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
 	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
 				       mlxsw_sp_qdisc->stats_base.backlog);
 	p->qstats->backlog -= backlog;
+	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
 static int
@@ -512,7 +519,7 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct tc_prio_qopt_offload_params *p = params;
 	struct mlxsw_sp_qdisc *child_qdisc;
-	int tclass, i, band;
+	int tclass, i, band, backlog;
 	u8 old_priomap;
 	int err;
 
@@ -533,9 +540,12 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 			}
 		}
 		if (old_priomap != child_qdisc->prio_bitmap &&
-		    child_qdisc->ops && child_qdisc->ops->clean_stats)
+		    child_qdisc->ops && child_qdisc->ops->clean_stats) {
+			backlog = child_qdisc->stats_base.backlog;
 			child_qdisc->ops->clean_stats(mlxsw_sp_port,
 						      child_qdisc);
+			child_qdisc->stats_base.backlog = backlog;
+		}
 	}
 
 	return 0;

From fd5ac14a1aae63ef95b22cc2eb23e3a25b3436be Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:03 +0100
Subject: [PATCH 0505/1678] net: sch: Don't warn on missmatching qlen and
 backlog for offloaded qdiscs

Offloaded qdiscs are allowed to expose only parts of their statistics.
It means that if backlog is being exposed and qlen is not, it might trigger
a warning in qdisc_tree_reduce_backlog.
Do not warn in case the qdisc that was removed was an offloaded one.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 27e672c12492..68f9d942bed4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 			       unsigned int len)
 {
+	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
@@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 		 * If child was empty even before update then backlog
 		 * counter is screwed and we skip notification because
 		 * parent class is already passive.
+		 *
+		 * If the original child was offloaded then it is allowed
+		 * to be seem as empty, so the parent is notified anyway.
 		 */
-		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
+		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
+						       !qdisc_is_offloaded);
 		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {

From 23f2b4048cc49d786c452f681dd5d8920ca095a7 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:04 +0100
Subject: [PATCH 0506/1678] mlxsw: spectrum: Update sch_prio stats to include
 sch_red related drops

sch_prio as root qdisc should count all the drops its children have. Since
it is possible for it to have sch_red children, it needs to count RED early
drops.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 5ddaafc8aa18..bed7495d35d6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -584,6 +584,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		drops += xstats->tail_drop[i];
+		drops += xstats->wred_drop[i];
 		backlog += xstats->backlog[i];
 	}
 	drops = drops - stats_base->drops;
@@ -619,8 +620,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	stats_base->tx_bytes = stats->tx_bytes;
 
 	stats_base->drops = 0;
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		stats_base->drops += xstats->tail_drop[i];
+		stats_base->drops += xstats->wred_drop[i];
+	}
 
 	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }

From 98ceb7b6d64552f995973be1a0ee9af0bf85fb3d Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:05 +0100
Subject: [PATCH 0507/1678] mlxsw: spectrum: qdiscs: prio: Delete child qdiscs
 when removing bands

When the number the bands of sch_prio is decreased, child qdiscs on the
deleted bands would get deleted as well.
This change and deletions are being done under sch_tree_lock of the
sch_prio qdisc. Part of the destruction of qdisc is unoffloading it, if
it is offloaded. Un-offloading can't be done inside this lock.
Move the offload command to be done before reducing the number of bands,
so unoffloading of the qdiscs that are about to be deleted could be done
outside of the lock.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c    |  7 ++++++-
 net/sched/sch_prio.c                                | 13 ++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index bed7495d35d6..a2a3ac09c3bc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -547,7 +547,12 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 			child_qdisc->stats_base.backlog = backlog;
 		}
 	}
-
+	for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
+		child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
+		child_qdisc->prio_bitmap = 0;
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
+	}
 	return 0;
 }
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index efbf51f35778..ba2d6d17d95a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch)
 	sch->q.qlen = 0;
 }
 
-static int prio_offload(struct Qdisc *sch, bool enable)
+static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 {
-	struct prio_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
 	struct tc_prio_qopt_offload opt = {
 		.handle = sch->handle,
@@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable)
 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 		return -EOPNOTSUPP;
 
-	if (enable) {
+	if (qopt) {
 		opt.command = TC_PRIO_REPLACE;
-		opt.replace_params.bands = q->bands;
-		memcpy(&opt.replace_params.priomap, q->prio2band,
+		opt.replace_params.bands = qopt->bands;
+		memcpy(&opt.replace_params.priomap, qopt->priomap,
 		       TC_PRIO_MAX + 1);
 		opt.replace_params.qstats = &sch->qstats;
 	} else {
@@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch)
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	tcf_block_put(q->block);
-	prio_offload(sch, false);
+	prio_offload(sch, NULL);
 	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 		}
 	}
 
+	prio_offload(sch, qopt);
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
@@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	sch_tree_unlock(sch);
-	prio_offload(sch, true);
 	return 0;
 }
 

From b9c7a7acc749f3d0667a2ab44ea38110d5a1f286 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:06 +0100
Subject: [PATCH 0508/1678] net: sch: prio: Add offload ability for grafting a
 child

Offload sch_prio graft command for capable drivers.
Warn in case of a failure, unless the graft was done as part of a destroy
operation (the new qdisc is a noop) or if all the qdiscs (the parent, the
old child, and the new one) are not offloaded.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h |  8 ++++++++
 net/sched/sch_prio.c  | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 87406252f0a3..e828d31be5da 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -806,6 +806,7 @@ enum tc_prio_command {
 	TC_PRIO_REPLACE,
 	TC_PRIO_DESTROY,
 	TC_PRIO_STATS,
+	TC_PRIO_GRAFT,
 };
 
 struct tc_prio_qopt_offload_params {
@@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params {
 	struct gnet_stats_queue *qstats;
 };
 
+struct tc_prio_qopt_offload_graft_params {
+	u8 band;
+	u32 child_handle;
+};
+
 struct tc_prio_qopt_offload {
 	enum tc_prio_command command;
 	u32 handle;
@@ -825,6 +831,8 @@ struct tc_prio_qopt_offload {
 	union {
 		struct tc_prio_qopt_offload_params replace_params;
 		struct tc_qopt_offload_stats stats;
+		struct tc_prio_qopt_offload_graft_params graft_params;
 	};
 };
+
 #endif
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ba2d6d17d95a..222e53d3d27a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -308,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		      struct Qdisc **old, struct netlink_ext_ack *extack)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tc_prio_qopt_offload graft_offload;
+	struct net_device *dev = qdisc_dev(sch);
 	unsigned long band = arg - 1;
+	bool any_qdisc_is_offloaded;
+	int err;
 
 	if (new == NULL)
 		new = &noop_qdisc;
 
 	*old = qdisc_replace(sch, new, &q->queues[band]);
+
+	if (!tc_can_offload(dev))
+		return 0;
+
+	graft_offload.handle = sch->handle;
+	graft_offload.parent = sch->parent;
+	graft_offload.graft_params.band = band;
+	graft_offload.graft_params.child_handle = new->handle;
+	graft_offload.command = TC_PRIO_GRAFT;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+					    &graft_offload);
+
+	/* Don't report error if the graft is part of destroy operation. */
+	if (err && new != &noop_qdisc) {
+		/* Don't report error if the parent, the old child and the new
+		 * one are not offloaded.
+		 */
+		any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
+		any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
+		if (*old)
+			any_qdisc_is_offloaded |= (*old)->flags &
+						   TCQ_F_OFFLOADED;
+
+		if (any_qdisc_is_offloaded)
+			NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
+	}
+
 	return 0;
 }
 

From 32dc5efc6cb40dea9ee0797b7c237efc470e0c35 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Wed, 28 Feb 2018 10:45:07 +0100
Subject: [PATCH 0509/1678] mlxsw: spectrum: qdiscs: prio: Handle graft command

Handle graft command for an offloaded sch_prio.
Grafting a qdisc to any place other than under its original parent is not
supported by mlxsw and will cause the grafted qdisc to stop being
offloaded.

Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_qdisc.c  | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index a2a3ac09c3bc..91262b0573e3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -122,6 +122,24 @@ mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
 	return &mlxsw_sp_port->tclass_qdiscs[tclass];
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
+{
+	int i;
+
+	if (mlxsw_sp_port->root_qdisc->handle == handle)
+		return mlxsw_sp_port->root_qdisc;
+
+	if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC)
+		return NULL;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle)
+			return &mlxsw_sp_port->tclass_qdiscs[i];
+
+	return NULL;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -643,6 +661,39 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 };
 
+/* Grafting is not supported in mlxsw. It will result in un-offloading of the
+ * grafted qdisc as well as the qdisc in the qdisc new location.
+ * (However, if the graft is to the location where the qdisc is already at, it
+ * will be ignored completely and won't cause un-offloading).
+ */
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			  struct tc_prio_qopt_offload_graft_params *p)
+{
+	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+	struct mlxsw_sp_qdisc *old_qdisc;
+
+	/* Check if the grafted qdisc is already in its "new" location. If so -
+	 * nothing needs to be done.
+	 */
+	if (p->band < IEEE_8021QAZ_MAX_TCS &&
+	    mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+		return 0;
+
+	/* See if the grafted qdisc is already offloaded on any tclass. If so,
+	 * unoffload it.
+	 */
+	old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
+						  p->child_handle);
+	if (old_qdisc)
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
+
+	mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
+			       &mlxsw_sp_port->tclass_qdiscs[tclass_num]);
+	return -EOPNOTSUPP;
+}
+
 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct tc_prio_qopt_offload *p)
 {
@@ -668,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 	case TC_PRIO_STATS:
 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
 						&p->stats);
+	case TC_PRIO_GRAFT:
+		return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+						 &p->graft_params);
 	default:
 		return -EOPNOTSUPP;
 	}

From cccc200fcaf04cff4342036a72e51d6adf6c98c1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 28 Feb 2018 11:43:27 +0100
Subject: [PATCH 0510/1678] ipvlan: egress mcast packets are not exceptional

Currently, if IPv6 is enabled on top of an ipvlan device in l3
mode, the following warning message:

 Dropped {multi|broad}cast of type= [86dd]

is emitted every time that a RS is generated and dmseg is soon
filled with irrelevant messages. Replace pr_warn with pr_debug,
to preserve debuggability, without scaring the sysadmin.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 1b5dc200b573..dbbb884eb81a 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -498,8 +498,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 
 	/* In this mode we dont care about multicast and broadcast traffic */
 	if (is_multicast_ether_addr(ethh->h_dest)) {
-		pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
-				    ntohs(skb->protocol));
+		pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n",
+				     ntohs(skb->protocol));
 		kfree_skb(skb);
 		goto out;
 	}

From 8230819494b3bf284ca7262ac5f877333147b937 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 28 Feb 2018 10:59:27 +0100
Subject: [PATCH 0511/1678] ipvlan: use per device spinlock to protect addrs
 list updates

This changeset moves ipvlan address under RCU protection, using
a per ipvlan device spinlock to protect list mutation and RCU
read access to protect list traversal.

Also explicitly use RCU read lock to traverse the per port
ipvlans list, so that we can now perform a full address lookup
without asserting the RTNL lock.

Overall this allows the ipvlan driver to check fully for duplicate
addresses - before this commit ipv6 addresses assigned by autoconf
via prefix delegation where accepted without any check - and avoid
the following rntl assertion failure still in the same code path:

 RTNL: assertion failed at drivers/net/ipvlan/ipvlan_core.c (124)
 WARNING: CPU: 15 PID: 0 at drivers/net/ipvlan/ipvlan_core.c:124 ipvlan_addr_busy+0x97/0xa0 [ipvlan]
 Modules linked in: ipvlan(E) ixgbe
 CPU: 15 PID: 0 Comm: swapper/15 Tainted: G            E    4.16.0-rc2.ipvlan+ #1782
 Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016
 RIP: 0010:ipvlan_addr_busy+0x97/0xa0 [ipvlan]
 RSP: 0018:ffff881ff9e03768 EFLAGS: 00010286
 RAX: 0000000000000000 RBX: ffff881fdf2a9000 RCX: 0000000000000000
 RDX: 0000000000000001 RSI: 00000000000000f6 RDI: 0000000000000300
 RBP: ffff881fdf2a8000 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000001 R11: ffff881ff9e034c0 R12: ffff881fe07bcc00
 R13: 0000000000000001 R14: ffffffffa02002b0 R15: 0000000000000001
 FS:  0000000000000000(0000) GS:ffff881ff9e00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fc5c1a4f248 CR3: 000000207e012005 CR4: 00000000001606e0
 Call Trace:
  <IRQ>
  ipvlan_addr6_event+0x6c/0xd0 [ipvlan]
  notifier_call_chain+0x49/0x90
  atomic_notifier_call_chain+0x6a/0x100
  ipv6_add_addr+0x5f9/0x720
  addrconf_prefix_rcv_add_addr+0x244/0x3c0
  addrconf_prefix_rcv+0x2f3/0x790
  ndisc_router_discovery+0x633/0xb70
  ndisc_rcv+0x155/0x180
  icmpv6_rcv+0x4ac/0x5f0
  ip6_input_finish+0x138/0x6a0
  ip6_input+0x41/0x1f0
  ipv6_rcv+0x4db/0x8d0
  __netif_receive_skb_core+0x3d5/0xe40
  netif_receive_skb_internal+0x89/0x370
  napi_gro_receive+0x14f/0x1e0
  ixgbe_clean_rx_irq+0x4ce/0x1020 [ixgbe]
  ixgbe_poll+0x31a/0x7a0 [ixgbe]
  net_rx_action+0x296/0x4f0
  __do_softirq+0xcf/0x4f5
  irq_exit+0xf5/0x110
  do_IRQ+0x62/0x110
  common_interrupt+0x91/0x91
  </IRQ>

 v1 -> v2: drop unneeded in_softirq check in ipvlan_addr6_validator_event()

Fixes: e9997c2938b2 ("ipvlan: fix check for IP addresses in control path")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan.h      |  1 +
 drivers/net/ipvlan/ipvlan_core.c | 30 ++++++++++------
 drivers/net/ipvlan/ipvlan_main.c | 60 +++++++++++++++++++-------------
 3 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 5166575a164d..a115f12bf130 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -74,6 +74,7 @@ struct ipvl_dev {
 	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
 	netdev_features_t	sfeatures;
 	u32			msg_enable;
+	spinlock_t		addrs_lock;
 };
 
 struct ipvl_addr {
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index dbbb884eb81a..17daebd19e65 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6)
 {
-	struct ipvl_addr *addr;
+	struct ipvl_addr *addr, *ret = NULL;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
-		if (addr_equal(is_v6, addr, iaddr))
-			return addr;
-	return NULL;
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
+		if (addr_equal(is_v6, addr, iaddr)) {
+			ret = addr;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
 }
 
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 {
 	struct ipvl_dev *ipvlan;
+	bool ret = false;
 
-	ASSERT_RTNL();
-
-	list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-		if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
-			return true;
+	rcu_read_lock();
+	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+		if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
+			ret = true;
+			break;
+		}
 	}
-	return false;
+	rcu_read_unlock();
+	return ret;
 }
 
 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index d05b902c925b..3efc1c92c6a7 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
 	else
 		dev->flags &= ~IFF_NOARP;
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 		ipvlan_ht_addr_add(ipvlan, addr);
+	rcu_read_unlock();
 
 	return dev_uc_add(phy_dev, phy_dev->dev_addr);
 }
@@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
 
 	dev_uc_del(phy_dev, phy_dev->dev_addr);
 
-	list_for_each_entry(addr, &ipvlan->addrs, anode)
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
 		ipvlan_ht_addr_del(addr);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	ipvlan->sfeatures = IPVLAN_FEATURES;
 	ipvlan_adjust_mtu(ipvlan, phy_dev);
 	INIT_LIST_HEAD(&ipvlan->addrs);
+	spin_lock_init(&ipvlan->addrs_lock);
 
 	/* TODO Probably put random address here to be presented to the
 	 * world but keep using the physical-dev address for the outgoing
@@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_addr *addr, *next;
 
+	spin_lock_bh(&ipvlan->addrs_lock);
 	list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
 		ipvlan_ht_addr_del(addr);
-		list_del(&addr->anode);
+		list_del_rcu(&addr->anode);
 		kfree_rcu(addr, rcu);
 	}
+	spin_unlock_bh(&ipvlan->addrs_lock);
 
 	ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
 	list_del_rcu(&ipvlan->pnode);
@@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 		if (dev->reg_state != NETREG_UNREGISTERING)
 			break;
 
-		list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
-					 pnode)
+		list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
 			ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
 							    &lst_kill);
 		unregister_netdevice_many(&lst_kill);
@@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 	return NOTIFY_DONE;
 }
 
+/* the caller must held the addrs lock */
 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
@@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 		addr->atype = IPVL_IPV6;
 #endif
 	}
-	list_add_tail(&addr->anode, &ipvlan->addrs);
+
+	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
 
 	/* If the interface is not up, the address will be added to the hash
 	 * list by ipvlan_open.
@@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
+	spin_lock_bh(&ipvlan->addrs_lock);
 	addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
-	if (!addr)
+	if (!addr) {
+		spin_unlock_bh(&ipvlan->addrs_lock);
 		return;
+	}
 
 	ipvlan_ht_addr_del(addr);
-	list_del(&addr->anode);
+	list_del_rcu(&addr->anode);
+	spin_unlock_bh(&ipvlan->addrs_lock);
 	kfree_rcu(addr, rcu);
-
-	return;
 }
 
 static bool ipvlan_is_valid_dev(const struct net_device *dev)
@@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
 #if IS_ENABLED(CONFIG_IPV6)
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
-	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
 		netif_err(ipvlan, ifup, ipvlan->dev,
 			  "Failed to add IPv6=%pI6c addr for %s intf\n",
 			  ip6_addr, ipvlan->dev->name);
-		return -EINVAL;
-	}
-
-	return ipvlan_add_addr(ipvlan, ip6_addr, true);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
 }
 
 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
@@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 	struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev;
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-	/* FIXME IPv6 autoconf calls us from bh without RTNL */
-	if (in_softirq())
-		return NOTIFY_DONE;
-
 	if (!ipvlan_is_valid_dev(dev))
 		return NOTIFY_DONE;
 
@@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
 
 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
-	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
+	int ret = -EINVAL;
+
+	spin_lock_bh(&ipvlan->addrs_lock);
+	if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
 		netif_err(ipvlan, ifup, ipvlan->dev,
 			  "Failed to add IPv4=%pI4 on %s intf.\n",
 			  ip4_addr, ipvlan->dev->name);
-		return -EINVAL;
-	}
-
-	return ipvlan_add_addr(ipvlan, ip4_addr, false);
+	else
+		ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
+	spin_unlock_bh(&ipvlan->addrs_lock);
+	return ret;
 }
 
 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)

From 73bae6736b6b577b52fbba2d4b71a5a3e9920fb5 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:06 +0200
Subject: [PATCH 0512/1678] selftests: forwarding: Add initial testing
 framework

Add initial framework to test packet forwarding functionality. The tests
can run on actual devices using loop-backed cables or using veth pairs.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/.gitignore       |   1 +
 tools/testing/selftests/net/forwarding/README |  56 ++++
 .../net/forwarding/bridge_vlan_aware.sh       |  83 ++++++
 tools/testing/selftests/net/forwarding/config |  12 +
 .../net/forwarding/forwarding.config.sample   |  31 ++
 tools/testing/selftests/net/forwarding/lib.sh | 282 ++++++++++++++++++
 6 files changed, 465 insertions(+)
 create mode 100644 tools/testing/selftests/net/forwarding/.gitignore
 create mode 100644 tools/testing/selftests/net/forwarding/README
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
 create mode 100644 tools/testing/selftests/net/forwarding/config
 create mode 100644 tools/testing/selftests/net/forwarding/forwarding.config.sample
 create mode 100644 tools/testing/selftests/net/forwarding/lib.sh

diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000000..a793eef5b876
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000000..4a0964c42860
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,56 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000000..e6faa9548460
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000000..5cd2aed97958
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000000..ab235c124f20
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000000..ff6550e9ddaf
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+
+if [[ -f forwarding.config ]]; then
+	source forwarding.config
+fi
+
+##############################################################################
+# Sanity checks
+
+if [[ "$(id -u)" -ne 0 ]]; then
+	echo "SKIP: need root privileges"
+	exit 0
+fi
+
+tc -j &> /dev/null
+if [[ $? -ne 0 ]]; then
+	echo "SKIP: iproute2 too old, missing JSON support"
+	exit 0
+fi
+
+if [[ ! -x "$(command -v jq)" ]]; then
+	echo "SKIP: jq not installed"
+	exit 0
+fi
+
+if [[ ! -v NUM_NETIFS ]]; then
+	echo "SKIP: importer does not define \"NUM_NETIFS\""
+	exit 0
+fi
+
+##############################################################################
+# Network interfaces configuration
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+	ip link show dev ${NETIFS[p$i]} &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: could not find all required interfaces"
+		exit 0
+	fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -ne 0 ]]; then
+		RET=$err
+		retmsg=$msg
+	fi
+}
+
+check_fail()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -eq 0 ]]; then
+		RET=1
+		retmsg=$msg
+	fi
+}
+
+log_test()
+{
+	local test_name=$1
+	local opt_str=$2
+
+	if [[ $# -eq 2 ]]; then
+		opt_str="($opt_str)"
+	fi
+
+	if [[ $RET -ne 0 ]]; then
+		EXIT_STATUS=1
+		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+		if [[ ! -z "$retmsg" ]]; then
+			printf "\t%s\n" "$retmsg"
+		fi
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo "Hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+		return 1
+	fi
+
+	printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+	return 0
+}
+
+setup_wait()
+{
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		while true; do
+			ip link show dev ${NETIFS[p$i]} up \
+				| grep 'state UP' &> /dev/null
+			if [[ $? -ne 0 ]]; then
+				sleep 1
+			else
+				break
+			fi
+		done
+	done
+
+	# Make sure links are ready.
+	sleep $WAIT_TIME
+}
+
+pre_cleanup()
+{
+	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+		echo "Pausing before cleanup, hit any key to continue"
+		read
+	fi
+}
+
+vrf_prepare()
+{
+	ip -4 rule add pref 32765 table local
+	ip -4 rule del pref 0
+	ip -6 rule add pref 32765 table local
+	ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+	ip -6 rule add pref 0 table local
+	ip -6 rule del pref 32765
+	ip -4 rule add pref 0 table local
+	ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+	local vrf_name=$1
+
+	__last_tb_id=$((__last_tb_id + 1))
+	__TB_IDS[$vrf_name]=$__last_tb_id
+	return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+	local vrf_name=$1
+
+	return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_assign $vrf_name
+	tb_id=$?
+
+	ip link add dev $vrf_name type vrf table $tb_id
+	ip -4 route add table $tb_id unreachable default metric 4278198272
+	ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_lookup $vrf_name
+	tb_id=$?
+
+	ip -6 route del table $tb_id unreachable default metric 4278198272
+	ip -4 route del table $tb_id unreachable default metric 4278198272
+	ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+	local if_name=$1
+	local add_del=$2
+	local array
+
+	shift
+	shift
+	array=("${@}")
+
+	for addrstr in "${array[@]}"; do
+		ip address $add_del $addrstr dev $if_name
+	done
+}
+
+simple_if_init()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $vrf_name up
+	ip link set dev $if_name up
+
+	__addr_add_del $if_name add "${array[@]}"
+}
+
+simple_if_fini()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	__addr_add_del $if_name del "${array[@]}"
+
+	ip link set dev $if_name down
+	vrf_destroy $vrf_name
+}
+
+master_name_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+##############################################################################
+# Tests
+
+ping_test()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	RET=0
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	check_err $?
+	log_test "ping"
+}
+
+ping6_test()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	RET=0
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	check_err $?
+	log_test "ping6"
+}

From d4deb01467ec9ed33fe0f4b4fa4632c94f2dd8c9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:07 +0200
Subject: [PATCH 0513/1678] selftests: forwarding: Add a test for FDB learning

Send a packet with a specific destination MAC, make sure it was learned
on the ingress port and then aged-out.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/bridge_vlan_aware.sh       |  5 +-
 tools/testing/selftests/net/forwarding/lib.sh | 92 +++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index e6faa9548460..1ba119f28963 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -26,7 +26,9 @@ h2_destroy()
 
 switch_create()
 {
-	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+		mcast_snooping 0
 
 	ip link set dev $swp1 master br0
 	ip link set dev $swp2 master br0
@@ -79,5 +81,6 @@ setup_wait
 
 ping_test $h1 192.0.2.2
 ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ff6550e9ddaf..21af63e860dc 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -7,6 +7,7 @@
 # Can be overridden by the configuration file.
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
 WAIT_TIME=${WAIT_TIME:=5}
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
@@ -34,6 +35,11 @@ if [[ ! -x "$(command -v jq)" ]]; then
 	exit 0
 fi
 
+if [[ ! -x "$(command -v $MZ)" ]]; then
+	echo "SKIP: $MZ not installed"
+	exit 0
+fi
+
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
 	exit 0
@@ -250,6 +256,17 @@ master_name_get()
 	ip -j link show dev $if_name | jq -r '.[]["master"]'
 }
 
+bridge_ageing_time_get()
+{
+	local bridge=$1
+	local ageing_time
+
+	# Need to divide by 100 to convert to seconds.
+	ageing_time=$(ip -j -d link show dev $bridge \
+		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+	echo $((ageing_time / 100))
+}
+
 ##############################################################################
 # Tests
 
@@ -280,3 +297,78 @@ ping6_test()
 	check_err $?
 	log_test "ping6"
 }
+
+learning_test()
+{
+	local bridge=$1
+	local br_port1=$2	# Connected to `host1_if`.
+	local host1_if=$3
+	local host2_if=$4
+	local mac=de:ad:be:ef:13:37
+	local ageing_time
+
+	RET=0
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	# Disable unknown unicast flooding on `br_port1` to make sure
+	# packets are only forwarded through the port after a matching
+	# FDB entry was installed.
+	bridge link set dev $br_port1 flood off
+
+	tc qdisc add dev $host1_if ingress
+	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_fail $? "Packet reached second host when should not"
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_err $? "Did not find FDB record when should"
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_err $? "Packet did not reach second host when should"
+
+	# Wait for 10 seconds after the ageing time to make sure FDB
+	# record was aged-out.
+	ageing_time=$(bridge_ageing_time_get $bridge)
+	sleep $((ageing_time + 10))
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning off
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning on
+
+	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host1_if ingress
+
+	bridge link set dev $br_port1 flood on
+
+	log_test "FDB learning"
+}

From 236dd50bf67a2ab5fd39d82e712d1fafdd4c602d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:08 +0200
Subject: [PATCH 0514/1678] selftests: forwarding: Add a test for flooded
 traffic

Add test cases for unknown unicast and unregistered multicast flooding.

For each traffic type, turn off flooding on one bridged port and inject
a packet of the specified type through the second bridged port. Make
sure the packet was not received by checking the ACL counters on the
other end. Later, turn on flooding and make sure the packet was
received.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/bridge_vlan_aware.sh       |  1 +
 tools/testing/selftests/net/forwarding/lib.sh | 89 +++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 1ba119f28963..651998e70557 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -82,5 +82,6 @@ setup_wait
 ping_test $h1 192.0.2.2
 ping6_test $h1 2001:db8:1::2
 learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 21af63e860dc..e2a4ee8946ef 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -372,3 +372,92 @@ learning_test()
 
 	log_test "FDB learning"
 }
+
+flood_test_do()
+{
+	local should_flood=$1
+	local mac=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local err=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was flooded to it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -ne 0 && $should_flood == "true" || \
+	      $? -eq 0 && $should_flood == "false" ]]; then
+		err=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $err
+}
+
+flood_unicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=de:ad:be:ef:13:37
+	local ip=192.0.2.100
+
+	RET=0
+
+	bridge link set dev $br_port flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=01:00:5e:00:00:01
+	local ip=239.0.0.1
+
+	RET=0
+
+	bridge link set dev $br_port mcast_flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port mcast_flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+	# `br_port` is connected to `host2_if`
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+
+	flood_unicast_test $br_port $host1_if $host2_if
+	flood_multicast_test $br_port $host1_if $host2_if
+}

From 7b7bc875559c0c06406d77a7f71e3c0e289a60be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:09 +0200
Subject: [PATCH 0515/1678] selftests: forwarding: Add a test for basic IPv4
 and IPv6 routing

Configure two hosts which are directly connected to the same router and
test IPv4 and IPv6 ping. Use a large MTU and check that ping is
unaffected.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh |  15 +++
 .../selftests/net/forwarding/router.sh        | 125 ++++++++++++++++++
 2 files changed, 140 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router.sh

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e2a4ee8946ef..962153b7181b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -267,6 +267,21 @@ bridge_ageing_time_get()
 	echo $((ageing_time / 100))
 }
 
+forwarding_enable()
+{
+       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
+       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+
+       sysctl -q -w net.ipv4.conf.all.forwarding=1
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+forwarding_restore()
+{
+       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
+       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+}
+
 ##############################################################################
 # Tests
 
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000000..cc6a14abfa87
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS

From 937eeb3482748bb85486070e10b5fbeb6b973f63 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:10 +0200
Subject: [PATCH 0516/1678] selftests: forwarding: Create test topology for
 multipath routing

Create a topology with two hosts, each directly connected to a different
router. Both routers are connected using two links, enabling multipath
routing.

Test IPv4 and IPv6 ping using default MTU and large MTU.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/router_multipath.sh        | 209 ++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_multipath.sh

diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000000..86d189b42e9b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,209 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+
+	ip route add 198.51.100.0/24 vrf vrf-r1 \
+		nexthop via 169.254.2.22 dev $rp12 \
+		nexthop via 169.254.3.23 dev $rp13
+	ip route add 2001:db8:2::/64 vrf vrf-r1 \
+		nexthop via fe80:2::22 dev $rp12 \
+		nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+
+	ip route add 192.0.2.0/24 vrf vrf-r2 \
+		nexthop via 169.254.2.12 dev $rp22 \
+		nexthop via 169.254.3.13 dev $rp23
+	ip route add 2001:db8:1::/64 vrf vrf-r2 \
+		nexthop via fe80:2::12 dev $rp22 \
+		nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 198.51.100.2
+ping6_test $h1 2001:db8:2::2
+
+exit $EXIT_STATUS

From 3d578d879517a97f4af867e6a8a021baf15e5101 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:11 +0200
Subject: [PATCH 0517/1678] selftests: forwarding: Test IPv4 weighted nexthops

Use different weights for the multipath route configured on the first
router and check that the different flows generated by the first host
are distributed according to the provided weights.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 14 ++++
 .../net/forwarding/router_multipath.sh        | 77 +++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 962153b7181b..6866f4a4bc4e 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -113,6 +113,13 @@ log_test()
 	return 0
 }
 
+log_info()
+{
+	local msg=$1
+
+	echo "INFO: $msg"
+}
+
 setup_wait()
 {
 	for i in $(eval echo {1..$NUM_NETIFS}); do
@@ -256,6 +263,13 @@ master_name_get()
 	ip -j link show dev $if_name | jq -r '.[]["master"]'
 }
 
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
 bridge_ageing_time_get()
 {
 	local bridge=$1
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 86d189b42e9b..5b425dffb5d0 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -158,6 +158,82 @@ router2_destroy()
 	vrf_destroy "vrf-r2"
 }
 
+multipath_eval()
+{
+       local weight_rp12=$1
+       local weight_rp13=$2
+       local packets_rp12=$3
+       local packets_rp13=$4
+       local weights_ratio packets_ratio diff
+
+       RET=0
+
+       if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+               weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+		       | bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+		       | bc -l)
+       else
+               weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \
+		       bc -l)
+               packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \
+		       bc -l)
+       fi
+
+       diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+       diff=${diff#-}
+
+       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       check_err $? "Too large discrepancy between expected and measured ratios"
+       log_test "Multipath"
+       log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+multipath4_test()
+{
+       local weight_rp12=$1
+       local weight_rp13=$2
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test 1 1
+	multipath4_test 2 1
+	multipath4_test 11 45
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -205,5 +281,6 @@ setup_wait
 
 ping_test $h1 198.51.100.2
 ping6_test $h1 2001:db8:2::2
+multipath_test
 
 exit $EXIT_STATUS

From 4fb20ae13711ee05fba6e5dd2b81a83d9e5b5249 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:12 +0200
Subject: [PATCH 0518/1678] selftests: forwarding: Test IPv6 weighted nexthops

Have one host generate 16K IPv6 echo requests with a random flow label
and check that they are distributed between both multipath links
according to the provided weights.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/router_multipath.sh        | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 5b425dffb5d0..d31888e3133e 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -226,12 +226,48 @@ multipath4_test()
        sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
 }
 
+multipath6_test()
+{
+       local weight_rp12=$1
+       local weight_rp13=$2
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+	       ip vrf exec vrf-h1 ping 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+}
+
 multipath_test()
 {
 	log_info "Running IPv4 multipath tests"
 	multipath4_test 1 1
 	multipath4_test 2 1
 	multipath4_test 11 45
+
+	log_info "Running IPv6 multipath tests"
+	multipath6_test 1 1
+	multipath6_test 2 1
+	multipath6_test 11 45
 }
 
 setup_prepare()

From 2f19f2125d824fc607359f6d9248bd765edf7183 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:13 +0200
Subject: [PATCH 0519/1678] selftests: forwarding: Add tc offload check helper

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 6866f4a4bc4e..92e46426bd1d 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -296,6 +296,19 @@ forwarding_restore()
        sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
 }
 
+tc_offload_check()
+{
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		ethtool -k ${NETIFS[p$i]} \
+			| grep "hw-tc-offload: on" &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
 ##############################################################################
 # Tests
 

From 4e4272d2a68dcdd2555774dba9d8b237fe0eb63d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:14 +0200
Subject: [PATCH 0520/1678] selftests: forwarding: Add MAC get helper

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 92e46426bd1d..083a16eb7803 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -270,6 +270,13 @@ link_stats_tx_packets_get()
        ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
 }
 
+mac_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
 bridge_ageing_time_get()
 {
 	local bridge=$1

From 781fe631fafda37402abdbc4647698a107e2284d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:15 +0200
Subject: [PATCH 0521/1678] selftests: forwarding: Allow to get netdev
 interfaces names from commandline

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 083a16eb7803..3385ba76ac19 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -45,6 +45,21 @@ if [[ ! -v NUM_NETIFS ]]; then
 	exit 0
 fi
 
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+	if [[ "$count" -eq "0" ]]; then
+		unset NETIFS
+		declare -A NETIFS
+	fi
+	count=$((count + 1))
+	NETIFS[p$count]="$1"
+	shift
+done
+
 ##############################################################################
 # Network interfaces configuration
 

From 07e5c75184a11c46eddb08f2435ba4f3e7152e62 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:16 +0200
Subject: [PATCH 0522/1678] selftests: forwarding: Introduce tc flower matching
 tests

Add first part of flower tests. This patch only contains dst/src ip/mac
matching.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/tc_common.sh     |  23 ++
 .../selftests/net/forwarding/tc_flower.sh     | 196 ++++++++++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 tools/testing/selftests/net/forwarding/tc_common.sh
 create mode 100755 tools/testing/selftests/net/forwarding/tc_flower.sh

diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000000..acd0b520241c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+tc_check_packets()
+{
+	local id=$1
+	local handle=$2
+	local count=$3
+	local ret
+
+	output="$(tc -j -s filter show $id)"
+	# workaround the jq bug which causes jq to return 0 in case input is ""
+	ret=$?
+	if [[ $ret -ne 0 ]]; then
+		return $ret
+	fi
+	echo $output | \
+		jq -e ".[] \
+		| select(.options.handle == $handle) \
+		| select(.options.actions[0].stats.packets == $count)" \
+		&> /dev/null
+	return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000000..026a4ea4b2fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_mac $h1mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_ip 198.51.100.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_ip 192.0.2.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags src_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "src_ip match ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+match_dst_mac_test
+match_src_mac_test
+match_dst_ip_test
+match_src_ip_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	match_dst_mac_test
+	match_src_mac_test
+	match_dst_ip_test
+	match_src_ip_test
+fi
+
+exit $EXIT_STATUS

From bc13af291ebba244226b12e75d36a6eb6373a5d6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:17 +0200
Subject: [PATCH 0523/1678] selftests: forwarding: Introduce tc actions tests

Add first part of actions tests. This patch only contains tests of gact
ok/drop/trap and mirred redirect egress.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/tc_actions.sh    | 195 ++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_actions.sh

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000000..84234317a25d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+
+	simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.1/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_redirect_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched without redirect rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match incoming redirected packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "mirred egress redirect ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		skip_hw dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not dropped"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action ok
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see trapped packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 192.0.2.2 action drop
+	tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_fail $? "Saw packet without trap rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action trap
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not trapped"
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see trapped packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swp1origmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp1 address $h2mac
+	ip link set $swp2 address $h1mac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+	ip link set $swp1 address $swp1origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+gact_drop_and_ok_test
+mirred_egress_redirect_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	gact_drop_and_ok_test
+	mirred_egress_redirect_test
+	gact_trap_test
+fi
+
+exit $EXIT_STATUS

From b13f245e844d1535597298ac0f0f0a94f56c239b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:18 +0200
Subject: [PATCH 0524/1678] selftests: forwarding: Introduce basic tc chains
 tests

Tests chains matching and goto chain action.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/tc_chains.sh     | 122 ++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_chains.sh

diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000000..94c114ad8b44
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=2
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_fail $? "matched on filter in unreachable chain"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $h2mac action goto chain 1
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter with goto chain action"
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_err $? "Did not match on correct filter in chain 1"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "gact goto chain ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+unreachable_chain_test
+gact_goto_chain_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	unreachable_chain_test
+	gact_goto_chain_test
+fi
+
+exit $EXIT_STATUS

From 4908e24b812d74d0534e8b33edd58417aefa93d0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 28 Feb 2018 12:25:19 +0200
Subject: [PATCH 0525/1678] selftests: forwarding: Introduce basic shared
 blocks tests

Test shared block infrastructure. This is a basic test that shares TC
block in between 2 clsact qdiscs.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh |   6 +
 .../selftests/net/forwarding/tc_shblocks.sh   | 122 ++++++++++++++++++
 2 files changed, 128 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_shblocks.sh

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 3385ba76ac19..23866a685f77 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -30,6 +30,12 @@ if [[ $? -ne 0 ]]; then
 	exit 0
 fi
 
+tc filter help 2>&1 | grep block &> /dev/null
+if [[ $? -ne 0 ]]; then
+	echo "SKIP: iproute2 too old, missing shared block support"
+	exit 0
+fi
+
 if [[ ! -x "$(command -v jq)" ]]; then
 	echo "SKIP: jq not installed"
 	exit 0
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000000..cfc8a2ace388
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+	simple_if_init $swp2 192.0.2.2/24
+	tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	simple_if_fini $swp2 192.0.2.2/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+	RET=0
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 1
+	check_err $? "Did not match first incoming packet on a block"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 2
+	check_err $? "Did not match second incoming packet on a block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp2 address $swmac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+shared_block_test
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	shared_block_test
+fi
+
+exit $EXIT_STATUS

From 292749915743758013e290c994f41de196d47498 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 28 Feb 2018 20:43:38 +0100
Subject: [PATCH 0526/1678] r8169: fix interrupt number after adding support
 for MSI-X interrupts

In case of MSI-X the interrupt number may differ from pcidev->irq.
Fix this by using pci_irq_vector().

Fixes: 6c6aa15fdea5 ("r8169: improve interrupt handling")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0a0638d692f9..6fd13334aa28 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7861,7 +7861,7 @@ static void rtl8169_netpoll(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	rtl8169_interrupt(tp->pci_dev->irq, dev);
+	rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev);
 }
 #endif
 
@@ -8608,7 +8608,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
 		   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq);
+		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff),
+		   pci_irq_vector(pdev, 0));
 	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
 		netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "
 			   "tx checksumming: %s]\n",

From bfff4862653bb96001ab57c1edd6d03f48e5f035 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Feb 2018 22:40:16 -0500
Subject: [PATCH 0527/1678] net: fib_rules: support for match on ip_proto,
 sport and dport

uapi for ip_proto, sport and dport range match
in fib rules.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h        | 36 ++++++++++++-
 include/uapi/linux/fib_rules.h |  8 +++
 net/core/fib_rules.c           | 92 +++++++++++++++++++++++++++++++++-
 3 files changed, 133 insertions(+), 3 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b3d216249240..6dd0a00653ae 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -27,7 +27,7 @@ struct fib_rule {
 	u8			action;
 	u8			l3mdev;
 	u8                      proto;
-	/* 1 byte hole, try to use */
+	u8			ip_proto;
 	u32			target;
 	__be64			tun_id;
 	struct fib_rule __rcu	*ctarget;
@@ -40,6 +40,8 @@ struct fib_rule {
 	char			iifname[IFNAMSIZ];
 	char			oifname[IFNAMSIZ];
 	struct fib_kuid_range	uid_range;
+	struct fib_rule_port_range	sport_range;
+	struct fib_rule_port_range	dport_range;
 	struct rcu_head		rcu;
 };
 
@@ -144,6 +146,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 	return frh->table;
 }
 
+static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range)
+{
+	return range->start != 0 && range->end != 0;
+}
+
+static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a,
+					 __be16 port)
+{
+	return ntohs(port) >= a->start &&
+		ntohs(port) <= a->end;
+}
+
+static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a)
+{
+	return a->start != 0 && a->end != 0 && a->end < 0xffff &&
+		a->start <= a->end;
+}
+
+static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a,
+					       struct fib_rule_port_range *b)
+{
+	return a->start == b->start &&
+		a->end == b->end;
+}
+
+static inline bool fib_rule_requires_fldissect(struct fib_rule *rule)
+{
+	return rule->ip_proto ||
+		fib_rule_port_range_set(&rule->sport_range) ||
+		fib_rule_port_range_set(&rule->dport_range);
+}
+
 struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *,
 					 struct net *);
 void fib_rules_unregister(struct fib_rules_ops *);
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 77d90ae38114..232df14e1287 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -35,6 +35,11 @@ struct fib_rule_uid_range {
 	__u32		end;
 };
 
+struct fib_rule_port_range {
+	__u16		start;
+	__u16		end;
+};
+
 enum {
 	FRA_UNSPEC,
 	FRA_DST,	/* destination address */
@@ -59,6 +64,9 @@ enum {
 	FRA_L3MDEV,	/* iif or oif is l3mdev goto its table */
 	FRA_UID_RANGE,	/* UID range */
 	FRA_PROTOCOL,   /* Originator of the rule */
+	FRA_IP_PROTO,	/* ip proto */
+	FRA_SPORT_RANGE, /* sport */
+	FRA_DPORT_RANGE, /* dport */
 	__FRA_MAX
 };
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a6aea805a0a2..f6f04fc0f629 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule)
 	if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) ||
 	    !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end))
 		return false;
+	if (fib_rule_port_range_set(&rule->sport_range))
+		return false;
+	if (fib_rule_port_range_set(&rule->dport_range))
+		return false;
 	return true;
 }
 EXPORT_SYMBOL_GPL(fib_rule_matchall);
@@ -221,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
 	return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
 }
 
+static int nla_get_port_range(struct nlattr *pattr,
+			      struct fib_rule_port_range *port_range)
+{
+	const struct fib_rule_port_range *pr = nla_data(pattr);
+
+	if (!fib_rule_port_range_valid(pr))
+		return -EINVAL;
+
+	port_range->start = pr->start;
+	port_range->end = pr->end;
+
+	return 0;
+}
+
+static int nla_put_port_range(struct sk_buff *skb, int attrtype,
+			      struct fib_rule_port_range *range)
+{
+	return nla_put(skb, attrtype, sizeof(*range), range);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 			  struct flowi *fl, int flags,
 			  struct fib_lookup_arg *arg)
@@ -425,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
 		    !uid_eq(r->uid_range.end, rule->uid_range.end))
 			continue;
 
+		if (r->ip_proto != rule->ip_proto)
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->sport_range,
+						 &rule->sport_range))
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->dport_range,
+						 &rule->dport_range))
+			continue;
+
 		if (!ops->compare(r, frh, tb))
 			continue;
 		return 1;
@@ -569,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		rule->uid_range = fib_kuid_range_unset;
 	}
 
+	if (tb[FRA_IP_PROTO])
+		rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]);
+
+	if (tb[FRA_SPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+					 &rule->sport_range);
+		if (err)
+			goto errout_free;
+	}
+
+	if (tb[FRA_DPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+					 &rule->dport_range);
+		if (err)
+			goto errout_free;
+	}
+
 	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
 	    rule_exists(ops, frh, tb, rule)) {
 		err = -EEXIST;
@@ -634,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(skb->sk);
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+	struct fib_rule_port_range sprange = {0, 0};
+	struct fib_rule_port_range dprange = {0, 0};
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *r;
 	struct nlattr *tb[FRA_MAX+1];
@@ -667,6 +721,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		range = fib_kuid_range_unset;
 	}
 
+	if (tb[FRA_SPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
+					 &sprange);
+		if (err)
+			goto errout;
+	}
+
+	if (tb[FRA_DPORT_RANGE]) {
+		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
+					 &dprange);
+		if (err)
+			goto errout;
+	}
+
 	list_for_each_entry(rule, &ops->rules_list, list) {
 		if (tb[FRA_PROTOCOL] &&
 		    (rule->proto != nla_get_u8(tb[FRA_PROTOCOL])))
@@ -712,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		     !uid_eq(rule->uid_range.end, range.end)))
 			continue;
 
+		if (tb[FRA_IP_PROTO] &&
+		    (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO])))
+			continue;
+
+		if (fib_rule_port_range_set(&sprange) &&
+		    !fib_rule_port_range_compare(&rule->sport_range, &sprange))
+			continue;
+
+		if (fib_rule_port_range_set(&dprange) &&
+		    !fib_rule_port_range_compare(&rule->dport_range, &dprange))
+			continue;
+
 		if (!ops->compare(rule, frh, tb))
 			continue;
 
@@ -790,7 +870,10 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(4) /* FRA_FWMASK */
 			 + nla_total_size_64bit(8) /* FRA_TUN_ID */
 			 + nla_total_size(sizeof(struct fib_kuid_range))
-			 + nla_total_size(1); /* FRA_PROTOCOL */
+			 + nla_total_size(1) /* FRA_PROTOCOL */
+			 + nla_total_size(1) /* FRA_IP_PROTO */
+			 + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */
+			 + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */
 
 	if (ops->nlmsg_payload)
 		payload += ops->nlmsg_payload(rule);
@@ -855,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	    (rule->l3mdev &&
 	     nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
 	    (uid_range_set(&rule->uid_range) &&
-	     nla_put_uid_range(skb, &rule->uid_range)))
+	     nla_put_uid_range(skb, &rule->uid_range)) ||
+	    (fib_rule_port_range_set(&rule->sport_range) &&
+	     nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) ||
+	    (fib_rule_port_range_set(&rule->dport_range) &&
+	     nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) ||
+	    (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto)))
 		goto nla_put_failure;
 
 	if (rule->suppress_ifgroup != -1) {

From 4a2d73a4fb36ed4d73967bd3892cfab014a52ab2 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Feb 2018 22:41:06 -0500
Subject: [PATCH 0528/1678] ipv4: fib_rules: support match on sport, dport and
 ip proto

support to match on src port, dst port and ip protocol.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_rules.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 35d646a62ad4..16083b82954b 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tos && (r->tos != fl4->flowi4_tos))
 		return 0;
 
+	if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->sport_range) &&
+	    !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->dport_range) &&
+	    !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
+		return 0;
+
 	return 1;
 }
 

From bb0ad1987e963e47a02cc53b8275ffe2b38d4b70 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Feb 2018 22:41:37 -0500
Subject: [PATCH 0529/1678] ipv6: fib6_rules: support for match on sport, dport
 and ip proto

support to match on src port, dst port and ip protocol.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/fib6_rules.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 95a2c9e8699a..bcd1f22ac7b1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -223,6 +223,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
 		return 0;
 
+	if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->sport_range) &&
+	    !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+		return 0;
+
+	if (fib_rule_port_range_set(&rule->dport_range) &&
+	    !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+		return 0;
+
 	return 1;
 }
 

From e37b1e978bec5334dc379d8c2423d063af207430 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Feb 2018 22:42:41 -0500
Subject: [PATCH 0530/1678] ipv6: route: dissect flow in input path if fib
 rules need it

Dissect flow in fwd path if fib rules require it. Controlled by
a flag to avoid penatly for the common case. Flag is set when fib
rules with sport, dport and proto match that require flow dissect
are installed. Also passes the dissected hash keys to the multipath
hash function when applicable to avoid dissecting the flow again.
icmp packets will continue to use inner header for hash
calculations (Thanks to Nikolay Aleksandrov for some review here).

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     | 27 ++++++++++++++++++++++++-
 include/net/netns/ipv4.h |  1 +
 net/ipv4/fib_rules.c     |  8 ++++++++
 net/ipv4/fib_semantics.c |  2 +-
 net/ipv4/route.c         | 43 +++++++++++++++++++++++++++-------------
 5 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 15e19c5c6f26..8812582a94d5 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net)
 	return 0;
 }
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi4 *fl4,
+						 struct flow_keys *flkeys)
+{
+	return false;
+}
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule);
 int fib4_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib4_rules_seq_read(struct net *net);
 
+static inline bool fib4_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi4 *fl4,
+						 struct flow_keys *flkeys)
+{
+	unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+	if (!net->ipv4.fib_rules_require_fldissect)
+		return false;
+
+	skb_flow_dissect_flow_keys(skb, flkeys, flag);
+	fl4->fl4_sport = flkeys->ports.src;
+	fl4->fl4_dport = flkeys->ports.dst;
+	fl4->flowi4_proto = flkeys->basic.ip_proto;
+
+	return true;
+}
+
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
@@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb);
+		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
 void fib_select_path(struct net *net, struct fib_result *res,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 44668c29701a..3a970e429ab6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -52,6 +52,7 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
+	unsigned int		fib_rules_require_fldissect;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
 #endif
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 16083b82954b..737d11bc8838 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -255,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	}
 #endif
 
+	if (fib_rule_requires_fldissect(rule))
+		net->ipv4.fib_rules_require_fldissect++;
+
 	rule4->src_len = frh->src_len;
 	rule4->srcmask = inet_make_mask(rule4->src_len);
 	rule4->dst_len = frh->dst_len;
@@ -283,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
+
+	if (net->ipv4.fib_rules_require_fldissect &&
+	    fib_rule_requires_fldissect(rule))
+		net->ipv4.fib_rules_require_fldissect--;
 errout:
 	return err;
 }
@@ -400,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net)
 		goto fail;
 	net->ipv4.rules_ops = ops;
 	net->ipv4.fib_has_custom_rules = false;
+	net->ipv4.fib_rules_require_fldissect = 0;
 	return 0;
 
 fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f31e6575ab91..181b0d8d589c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, fl4, skb);
+		int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
 
 		fib_select_multipath(res, h);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 26eefa2eaa44..3bb686dac273 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 
 /* if skb is set it will be used and fl4 can be NULL */
 int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
-		       const struct sk_buff *skb)
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct net *net = fi->fib_net;
 	struct flow_keys hash_keys;
@@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 			if (skb->l4_hash)
 				return skb_get_hash_raw(skb) >> 1;
 			memset(&hash_keys, 0, sizeof(hash_keys));
-			skb_flow_dissect_flow_keys(skb, &keys, flag);
 
-			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-			hash_keys.ports.src = keys.ports.src;
-			hash_keys.ports.dst = keys.ports.dst;
-			hash_keys.basic.ip_proto = keys.basic.ip_proto;
+			if (flkeys) {
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+				hash_keys.ports.src = flkeys->ports.src;
+				hash_keys.ports.dst = flkeys->ports.dst;
+				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+			} else {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+				hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+				hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+				hash_keys.ports.src = keys.ports.src;
+				hash_keys.ports.dst = keys.ports.dst;
+				hash_keys.basic.ip_proto = keys.basic.ip_proto;
+			}
 		} else {
 			memset(&hash_keys, 0, sizeof(hash_keys));
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
 static int ip_mkroute_input(struct sk_buff *skb,
 			    struct fib_result *res,
 			    struct in_device *in_dev,
-			    __be32 daddr, __be32 saddr, u32 tos)
+			    __be32 daddr, __be32 saddr, u32 tos,
+			    struct flow_keys *hkeys)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi && res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, NULL, skb);
+		int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
 
 		fib_select_multipath(res, h);
 	}
@@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       struct fib_result *res)
 {
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct flow_keys *flkeys = NULL, _flkeys;
+	struct net    *net = dev_net(dev);
 	struct ip_tunnel_info *tun_info;
-	struct flowi4	fl4;
+	int		err = -EINVAL;
 	unsigned int	flags = 0;
 	u32		itag = 0;
 	struct rtable	*rth;
-	int		err = -EINVAL;
-	struct net    *net = dev_net(dev);
+	struct flowi4	fl4;
 	bool do_cache;
 
 	/* IP on this device is disabled. */
@@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+	if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys))
+		flkeys = &_flkeys;
+
 	err = fib_lookup(net, &fl4, res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
@@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
-	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:	return err;
 
 brd_input:

From 5e5d6fed374155ba1a7a5ca5f12fbec2285d06a2 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 28 Feb 2018 22:43:22 -0500
Subject: [PATCH 0531/1678] ipv6: route: dissect flow in input path if fib
 rules need it

Dissect flow in fwd path if fib rules require it. Controlled by
a flag to avoid penatly for the common case. Flag is set when fib
rules with sport, dport and proto match that require flow dissect
are installed. Also passes the dissected hash keys to the multipath
hash function when applicable to avoid dissecting the flow again.
icmp packets will continue to use inner header for hash
calculations.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h    | 25 +++++++++++++++++++++++++
 include/net/ip6_route.h  |  4 +++-
 include/net/netns/ipv6.h |  3 ++-
 net/ipv6/fib6_rules.c    | 16 ++++++++++++++++
 net/ipv6/icmp.c          |  2 +-
 net/ipv6/route.c         | 34 +++++++++++++++++++++++++---------
 6 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 34ec321d6a03..8d906a35b534 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -415,6 +415,24 @@ void fib6_rules_cleanup(void);
 bool fib6_rule_default(const struct fib_rule *rule);
 int fib6_rules_dump(struct net *net, struct notifier_block *nb);
 unsigned int fib6_rules_seq_read(struct net *net);
+
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi6 *fl6,
+						 struct flow_keys *flkeys)
+{
+	unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+
+	if (!net->ipv6.fib6_rules_require_fldissect)
+		return false;
+
+	skb_flow_dissect_flow_keys(skb, flkeys, flag);
+	fl6->fl6_sport = flkeys->ports.src;
+	fl6->fl6_dport = flkeys->ports.dst;
+	fl6->flowi6_proto = flkeys->basic.ip_proto;
+
+	return true;
+}
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -436,5 +454,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net)
 {
 	return 0;
 }
+static inline bool fib6_rules_early_flow_dissect(struct net *net,
+						 struct sk_buff *skb,
+						 struct flowi6 *fl6,
+						 struct flow_keys *flkeys)
+{
+	return false;
+}
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a65f3cd..da2bde5fda8f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+		       struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -266,4 +267,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
 	       ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) &&
 	       !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate);
 }
+
 #endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 987cc4569cb8..2b9194229a56 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -71,7 +71,8 @@ struct netns_ipv6 {
 	unsigned int		 ip6_rt_gc_expire;
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	bool			 fib6_has_custom_rules;
+	unsigned int		fib6_rules_require_fldissect;
+	bool			fib6_has_custom_rules;
 	struct rt6_info         *ip6_prohibit_entry;
 	struct rt6_info         *ip6_blk_hole_entry;
 	struct fib6_table       *fib6_local_tbl;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index bcd1f22ac7b1..04e5f523e50f 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -269,12 +269,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	if (fib_rule_requires_fldissect(rule))
+		net->ipv6.fib6_rules_require_fldissect++;
+
 	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
 	return err;
 }
 
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+	struct net *net = rule->fr_net;
+
+	if (net->ipv6.fib6_rules_require_fldissect &&
+	    fib_rule_requires_fldissect(rule))
+		net->ipv6.fib6_rules_require_fldissect--;
+
+	return 0;
+}
+
 static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 			     struct nlattr **tb)
 {
@@ -334,6 +348,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
 	.match			= fib6_rule_match,
 	.suppress		= fib6_rule_suppress,
 	.configure		= fib6_rule_configure,
+	.delete			= fib6_rule_delete,
 	.compare		= fib6_rule_compare,
 	.fill			= fib6_rule_fill,
 	.nlmsg_payload		= fib6_rule_nlmsg_payload,
@@ -361,6 +376,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
 		goto out_fib6_rules_ops;
 
 	net->ipv6.fib6_rules_ops = ops;
+	net->ipv6.fib6_rules_require_fldissect = 0;
 out:
 	return err;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4fa4f1b150a4..b0778d323b6e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+	fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aa709b644945..e2bb40824c85 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -460,7 +460,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+		fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -1786,10 +1786,12 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
-				  struct flow_keys *keys)
+				  struct flow_keys *keys,
+				  struct flow_keys *flkeys)
 {
 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
 	const struct ipv6hdr *key_iph = outer_iph;
+	struct flow_keys *_flkeys = flkeys;
 	const struct ipv6hdr *inner_iph;
 	const struct icmp6hdr *icmph;
 	struct ipv6hdr _inner_iph;
@@ -1811,22 +1813,31 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
 		goto out;
 
 	key_iph = inner_iph;
+	_flkeys = NULL;
 out:
 	memset(keys, 0, sizeof(*keys));
 	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys->addrs.v6addrs.src = key_iph->saddr;
-	keys->addrs.v6addrs.dst = key_iph->daddr;
-	keys->tags.flow_label = ip6_flowinfo(key_iph);
-	keys->basic.ip_proto = key_iph->nexthdr;
+	if (_flkeys) {
+		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+		keys->tags.flow_label = _flkeys->tags.flow_label;
+		keys->basic.ip_proto = _flkeys->basic.ip_proto;
+	} else {
+		keys->addrs.v6addrs.src = key_iph->saddr;
+		keys->addrs.v6addrs.dst = key_iph->daddr;
+		keys->tags.flow_label = ip6_flowinfo(key_iph);
+		keys->basic.ip_proto = key_iph->nexthdr;
+	}
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
+		       struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
 
 	if (skb) {
-		ip6_multipath_l3_keys(skb, &hash_keys);
+		ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
 		return flow_hash_from_keys(&hash_keys) >> 1;
 	}
 
@@ -1847,12 +1858,17 @@ void ip6_route_input(struct sk_buff *skb)
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
 	};
+	struct flow_keys *flkeys = NULL, _flkeys;
 
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+		flkeys = &_flkeys;
+
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+		fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
 	skb_dst_drop(skb);
 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 }

From 6853f21f764b04e58df5e44629fec1fb8f3cbf2e Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:29 +0200
Subject: [PATCH 0532/1678] ipmr,ipmr6: Define a uniform vif_device

The two implementations have almost identical structures - vif_device and
mif_device. As a step toward uniforming the mr_tables, eliminate the
mif_device and relocate the vif_device definition into a new common
header file.

Also, introduce a common initializing function for setting most of the
vif_device fields in a new common source file. This requires modifying
the ipv{4,6] Kconfig and ipv4 makefile as we're introducing a new common
config option - CONFIG_IP_MROUTE_COMMON.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h      | 13 +---------
 include/linux/mroute6.h     | 11 +-------
 include/linux/mroute_base.h | 52 +++++++++++++++++++++++++++++++++++++
 net/ipv4/Kconfig            |  5 ++++
 net/ipv4/Makefile           |  1 +
 net/ipv4/ipmr.c             | 32 +++++++++++------------
 net/ipv4/ipmr_base.c        | 28 ++++++++++++++++++++
 net/ipv6/Kconfig            |  1 +
 net/ipv6/ip6mr.c            | 37 ++++++++++----------------
 9 files changed, 117 insertions(+), 63 deletions(-)
 create mode 100644 include/linux/mroute_base.h
 create mode 100644 net/ipv4/ipmr_base.c

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 5396521a776a..b8aadffe6237 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -9,6 +9,7 @@
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
@@ -56,18 +57,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_device {
-	struct net_device 	*dev;			/* Device we are using */
-	struct netdev_phys_item_id dev_parent_id;	/* Device parent ID    */
-	unsigned long	bytes_in,bytes_out;
-	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
-	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
-	unsigned char	threshold;		/* TTL threshold 		*/
-	unsigned short	flags;			/* Control flags 		*/
-	__be32		local,remote;		/* Addresses(remote for tunnels)*/
-	int		link;			/* Physical interface index	*/
-};
-
 struct vif_entry_notifier_info {
 	struct fib_notifier_info info;
 	struct net_device *dev;
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3014c52bfd86..e5e5b8282551 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -7,6 +7,7 @@
 #include <linux/skbuff.h>	/* for struct sk_buff_head */
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
+#include <linux/mroute_base.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -62,16 +63,6 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
-struct mif_device {
-	struct net_device 	*dev;			/* Device we are using */
-	unsigned long	bytes_in,bytes_out;
-	unsigned long	pkt_in,pkt_out;		/* Statistics 			*/
-	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
-	unsigned char	threshold;		/* TTL threshold 		*/
-	unsigned short	flags;			/* Control flags 		*/
-	int		link;			/* Physical interface index	*/
-};
-
 #define VIFF_STATIC 0x8000
 
 struct mfc6_cache {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
new file mode 100644
index 000000000000..0de651e15f27
--- /dev/null
+++ b/include/linux/mroute_base.h
@@ -0,0 +1,52 @@
+#ifndef __LINUX_MROUTE_BASE_H
+#define __LINUX_MROUTE_BASE_H
+
+#include <linux/netdevice.h>
+
+/**
+ * struct vif_device - interface representor for multicast routing
+ * @dev: network device being used
+ * @bytes_in: statistic; bytes ingressing
+ * @bytes_out: statistic; bytes egresing
+ * @pkt_in: statistic; packets ingressing
+ * @pkt_out: statistic; packets egressing
+ * @rate_limit: Traffic shaping (NI)
+ * @threshold: TTL threshold
+ * @flags: Control flags
+ * @link: Physical interface index
+ * @dev_parent_id: device parent id
+ * @local: Local address
+ * @remote: Remote address for tunnels
+ */
+struct vif_device {
+	struct net_device *dev;
+	unsigned long bytes_in, bytes_out;
+	unsigned long pkt_in, pkt_out;
+	unsigned long rate_limit;
+	unsigned char threshold;
+	unsigned short flags;
+	int link;
+
+	/* Currently only used by ipmr */
+	struct netdev_phys_item_id dev_parent_id;
+	__be32 local, remote;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+void vif_device_init(struct vif_device *v,
+		     struct net_device *dev,
+		     unsigned long rate_limit,
+		     unsigned char threshold,
+		     unsigned short flags,
+		     unsigned short get_iflink_mask);
+#else
+static inline void vif_device_init(struct vif_device *v,
+				   struct net_device *dev,
+				   unsigned long rate_limit,
+				   unsigned char threshold,
+				   unsigned short flags,
+				   unsigned short get_iflink_mask)
+{
+}
+#endif
+#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index f48fe6fc7e8c..80dad301361d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST
 	  Network), but can be distributed all over the Internet. If you want
 	  to do that, say Y here and to "IP multicast routing" below.
 
+config IP_MROUTE_COMMON
+	bool
+	depends on IP_MROUTE || IPV6_MROUTE
+
 config IP_MROUTE
 	bool "IP: multicast routing"
 	depends on IP_MULTICAST
+	select IP_MROUTE_COMMON
 	help
 	  This is used if you want your machine to act as a router for IP
 	  packets that have several destination addresses. It is needed on the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 47a0a6649a9d..a07b7dd06def 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
+obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
 obj-$(CONFIG_NET_FOU) += fou.o
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 591d1fc80a1f..1370edad64bf 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -945,6 +945,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	ip_rt_multicast_event(in_dev);
 
 	/* Fill in the VIF structures */
+	vif_device_init(v, dev, vifc->vifc_rate_limit,
+			vifc->vifc_threshold,
+			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
+			(VIFF_TUNNEL | VIFF_REGISTER));
 
 	attr.orig_dev = dev;
 	if (!switchdev_port_attr_get(dev, &attr)) {
@@ -953,20 +957,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 	} else {
 		v->dev_parent_id.id_len = 0;
 	}
-	v->rate_limit = vifc->vifc_rate_limit;
+
 	v->local = vifc->vifc_lcl_addr.s_addr;
 	v->remote = vifc->vifc_rmt_addr.s_addr;
-	v->flags = vifc->vifc_flags;
-	if (!mrtsock)
-		v->flags |= VIFF_STATIC;
-	v->threshold = vifc->vifc_threshold;
-	v->bytes_in = 0;
-	v->bytes_out = 0;
-	v->pkt_in = 0;
-	v->pkt_out = 0;
-	v->link = dev->ifindex;
-	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
-		v->link = dev_get_iflink(dev);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
@@ -2316,7 +2309,8 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	}
 
 	if (VIF_EXISTS(mrt, c->mfc_parent) &&
-	    nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+	    nla_put_u32(skb, RTA_IIF,
+			mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
 		return -EMSGSIZE;
 
 	if (c->mfc_flags & MFC_OFFLOAD)
@@ -2327,6 +2321,8 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
 		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			struct vif_device *vif;
+
 			if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
 				nla_nest_cancel(skb, mp_attr);
 				return -EMSGSIZE;
@@ -2334,7 +2330,8 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
+			vif = &mrt->vif_table[ct];
+			nhp->rtnh_ifindex = vif->dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -2954,8 +2951,8 @@ struct ipmr_vif_iter {
 };
 
 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-					   struct ipmr_vif_iter *iter,
-					   loff_t pos)
+					    struct ipmr_vif_iter *iter,
+					    loff_t pos)
 {
 	struct mr_table *mrt = iter->mrt;
 
@@ -3020,7 +3017,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
 	} else {
 		const struct vif_device *vif = v;
-		const char *name =  vif->dev ? vif->dev->name : "none";
+		const char *name =  vif->dev ?
+				    vif->dev->name : "none";
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
new file mode 100644
index 000000000000..22758f848344
--- /dev/null
+++ b/net/ipv4/ipmr_base.c
@@ -0,0 +1,28 @@
+/* Linux multicast routing support
+ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
+ */
+
+#include <linux/mroute_base.h>
+
+/* Sets everything common except 'dev', since that is done under locking */
+void vif_device_init(struct vif_device *v,
+		     struct net_device *dev,
+		     unsigned long rate_limit,
+		     unsigned char threshold,
+		     unsigned short flags,
+		     unsigned short get_iflink_mask)
+{
+	v->dev = NULL;
+	v->bytes_in = 0;
+	v->bytes_out = 0;
+	v->pkt_in = 0;
+	v->pkt_out = 0;
+	v->rate_limit = rate_limit;
+	v->flags = flags;
+	v->threshold = threshold;
+	if (v->flags & get_iflink_mask)
+		v->link = dev_get_iflink(dev);
+	else
+		v->link = dev->ifindex;
+}
+EXPORT_SYMBOL(vif_device_init);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
 config IPV6_MROUTE
 	bool "IPv6: multicast routing"
 	depends on IPV6
+	select IP_MROUTE_COMMON
 	---help---
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 295eb5ecaee5..e397990f6eb8 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -62,7 +62,7 @@ struct mr6_table {
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc6_unres_queue;
 	struct list_head	mfc6_cache_array[MFC6_LINES];
-	struct mif_device	vif6_table[MAXMIFS];
+	struct vif_device	vif6_table[MAXMIFS];
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
@@ -384,7 +384,7 @@ struct ipmr_vif_iter {
 	int ct;
 };
 
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
+static struct vif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
@@ -450,7 +450,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 		seq_puts(seq,
 			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 	} else {
-		const struct mif_device *vif = v;
+		const struct vif_device *vif = v;
 		const char *name = vif->dev ? vif->dev->name : "none";
 
 		seq_printf(seq,
@@ -776,7 +776,7 @@ failure:
 static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 		       struct list_head *head)
 {
-	struct mif_device *v;
+	struct vif_device *v;
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 
@@ -929,7 +929,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &mrt->vif6_table[vifi];
+	struct vif_device *v = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
@@ -980,21 +980,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 					     dev->ifindex, &in6_dev->cnf);
 	}
 
-	/*
-	 *	Fill in the VIF structures
-	 */
-	v->rate_limit = vifc->vifc_rate_limit;
-	v->flags = vifc->mif6c_flags;
-	if (!mrtsock)
-		v->flags |= VIFF_STATIC;
-	v->threshold = vifc->vifc_threshold;
-	v->bytes_in = 0;
-	v->bytes_out = 0;
-	v->pkt_in = 0;
-	v->pkt_out = 0;
-	v->link = dev->ifindex;
-	if (v->flags & MIFF_REGISTER)
-		v->link = dev_get_iflink(dev);
+	/* Fill in the VIF structures */
+	vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+			vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+			MIFF_REGISTER);
 
 	/* And finish update writing critical data */
 	write_lock_bh(&mrt_lock);
@@ -1332,7 +1321,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
 	struct mr6_table *mrt;
-	struct mif_device *v;
+	struct vif_device *v;
 	int ct;
 
 	if (event != NETDEV_UNREGISTER)
@@ -1873,7 +1862,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 {
 	struct sioc_sg_req6 sr;
 	struct sioc_mif_req6 vr;
-	struct mif_device *vif;
+	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
 	struct mr6_table *mrt;
@@ -1947,7 +1936,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
 	struct compat_sioc_sg_req6 sr;
 	struct compat_sioc_mif_req6 vr;
-	struct mif_device *vif;
+	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
 	struct mr6_table *mrt;
@@ -2018,7 +2007,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &mrt->vif6_table[vifi];
+	struct vif_device *vif = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi6 fl6;

From 8571ab479a6e1ef46ead5ebee567e128a422767c Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:30 +0200
Subject: [PATCH 0533/1678] ip6mr: Make mroute_sk rcu-based

In ipmr the mr_table socket is handled under RCU. Introduce the same
for ip6mr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h |  6 +++---
 net/ipv6/ip6_output.c   |  2 +-
 net/ipv6/ip6mr.c        | 45 ++++++++++++++++++++++++-----------------
 3 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index e5e5b8282551..e1b9fb06e1ea 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -111,12 +111,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-	return NULL;
+	return false;
 }
 static inline int ip6mr_sk_done(struct sock *sk)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 997c7f19ad62..a6eb0e699b15 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-		    ((mroute6_socket(net, skb) &&
+		    ((mroute6_is_socket(net, skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e397990f6eb8..a0e297ddca6e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -58,7 +58,7 @@ struct mr6_table {
 	struct list_head	list;
 	possible_net_t		net;
 	u32			id;
-	struct sock		*mroute6_sk;
+	struct sock __rcu	*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc6_unres_queue;
 	struct list_head	mfc6_cache_array[MFC6_LINES];
@@ -1121,6 +1121,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert)
 {
+	struct sock *mroute6_sk;
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
 	int ret;
@@ -1190,17 +1191,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (!mrt->mroute6_sk) {
+	rcu_read_lock();
+	mroute6_sk = rcu_dereference(mrt->mroute6_sk);
+	if (!mroute6_sk) {
+		rcu_read_unlock();
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	mrt6msg_netlink_event(mrt, skb);
 
-	/*
-	 *	Deliver to user space multicast routing algorithms
-	 */
-	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+	/* Deliver to user space multicast routing algorithms */
+	ret = sock_queue_rcv_skb(mroute6_sk, skb);
+	rcu_read_unlock();
 	if (ret < 0) {
 		net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
 		kfree_skb(skb);
@@ -1584,11 +1587,11 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(mrt->mroute6_sk == NULL)) {
-		mrt->mroute6_sk = sk;
-		net->ipv6.devconf_all->mc_forwarding++;
-	} else {
+	if (rtnl_dereference(mrt->mroute6_sk)) {
 		err = -EADDRINUSE;
+	} else {
+		rcu_assign_pointer(mrt->mroute6_sk, sk);
+		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	write_unlock_bh(&mrt_lock);
 
@@ -1614,9 +1617,9 @@ int ip6mr_sk_done(struct sock *sk)
 
 	rtnl_lock();
 	ip6mr_for_each_table(mrt, net) {
-		if (sk == mrt->mroute6_sk) {
+		if (sk == rtnl_dereference(mrt->mroute6_sk)) {
 			write_lock_bh(&mrt_lock);
-			mrt->mroute6_sk = NULL;
+			RCU_INIT_POINTER(mrt->mroute6_sk, NULL);
 			net->ipv6.devconf_all->mc_forwarding--;
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1630,11 +1633,12 @@ int ip6mr_sk_done(struct sock *sk)
 		}
 	}
 	rtnl_unlock();
+	synchronize_rcu();
 
 	return err;
 }
 
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
 	struct mr6_table *mrt;
 	struct flowi6 fl6 = {
@@ -1646,8 +1650,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 		return NULL;
 
-	return mrt->mroute6_sk;
+	return rcu_access_pointer(mrt->mroute6_sk);
 }
+EXPORT_SYMBOL(mroute6_is_socket);
 
 /*
  *	Socket options and virtual interface manipulation. The whole
@@ -1674,7 +1679,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
-		if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+		if (sk != rcu_access_pointer(mrt->mroute6_sk) &&
+		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1696,7 +1702,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+		ret = mif6_add(net, mrt, &vif,
+			       sk == rtnl_dereference(mrt->mroute6_sk));
 		rtnl_unlock();
 		return ret;
 
@@ -1731,7 +1738,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			ret = ip6mr_mfc_delete(mrt, &mfc, parent);
 		else
 			ret = ip6mr_mfc_add(net, mrt, &mfc,
-					    sk == mrt->mroute6_sk, parent);
+					    sk ==
+					    rtnl_dereference(mrt->mroute6_sk),
+					    parent);
 		rtnl_unlock();
 		return ret;
 
@@ -1783,7 +1792,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
 			return -EINVAL;
-		if (sk == mrt->mroute6_sk)
+		if (sk == rcu_access_pointer(mrt->mroute6_sk))
 			return -EBUSY;
 
 		rtnl_lock();

From 87c418bf1323d57140f4b448715f64de3fbb7e91 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:31 +0200
Subject: [PATCH 0534/1678] ip6mr: Align hash implementation to ipmr

Since commit 8fb472c09b9d ("ipmr: improve hash scalability") ipmr has
been using rhashtable as a basis for its mfc routes, but ip6mr is
currently still using the old private MFC hash implementation.

Align ip6mr to the current ipmr implementation.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h |  30 ++--
 net/ipv6/ip6mr.c        | 311 +++++++++++++++++++++-------------------
 2 files changed, 183 insertions(+), 158 deletions(-)

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index e1b9fb06e1ea..e2dac199861e 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -8,6 +8,7 @@
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
 #include <linux/mroute_base.h>
+#include <linux/rhashtable.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -65,10 +66,20 @@ static inline void ip6_mr_cleanup(void)
 
 #define VIFF_STATIC 0x8000
 
+struct mfc6_cache_cmp_arg {
+	struct in6_addr mf6c_mcastgrp;
+	struct in6_addr mf6c_origin;
+};
+
 struct mfc6_cache {
-	struct list_head list;
-	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
-	struct in6_addr mf6c_origin;			/* Source of packet 		*/
+	struct rhlist_head mnode;
+	union {
+		struct {
+			struct in6_addr mf6c_mcastgrp;
+			struct in6_addr mf6c_origin;
+		};
+		struct mfc6_cache_cmp_arg cmparg;
+	};
 	mifi_t mf6c_parent;			/* Source interface		*/
 	int mfc_flags;				/* Flags on line		*/
 
@@ -88,22 +99,13 @@ struct mfc6_cache {
 			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
 		} res;
 	} mfc_un;
+	struct list_head list;
+	struct rcu_head rcu;
 };
 
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
-#define MFC6_LINES		64
-
-#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \
-			  (__force u32)(a)->s6_addr32[1] ^ \
-			  (__force u32)(a)->s6_addr32[2] ^ \
-			  (__force u32)(a)->s6_addr32[3] ^ \
-			  (__force u32)(g)->s6_addr32[0] ^ \
-			  (__force u32)(g)->s6_addr32[1] ^ \
-			  (__force u32)(g)->s6_addr32[2] ^ \
-			  (__force u32)(g)->s6_addr32[3]) % MFC6_LINES)
-
 #define MFC_ASSERT_THRESH (3*HZ)		/* Maximal freq. of asserts */
 
 struct rtmsg;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a0e297ddca6e..6f0b7f4894b2 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -61,8 +61,9 @@ struct mr6_table {
 	struct sock __rcu	*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc6_unres_queue;
-	struct list_head	mfc6_cache_array[MFC6_LINES];
 	struct vif_device	vif6_table[MAXMIFS];
+	struct rhltable		mfc6_hash;
+	struct list_head	mfc6_cache_list;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
@@ -299,10 +300,29 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 }
 #endif
 
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+			  const void *ptr)
+{
+	const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+	struct mfc6_cache *c = (struct mfc6_cache *)ptr;
+
+	return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+	       !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
+
+static const struct rhashtable_params ip6mr_rht_params = {
+	.head_offset = offsetof(struct mfc6_cache, mnode),
+	.key_offset = offsetof(struct mfc6_cache, cmparg),
+	.key_len = sizeof(struct mfc6_cache_cmp_arg),
+	.nelem_hint = 3,
+	.locks_mul = 1,
+	.obj_cmpfn = ip6mr_hash_cmp,
+	.automatic_shrinking = true,
+};
+
 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 {
 	struct mr6_table *mrt;
-	unsigned int i;
 
 	mrt = ip6mr_get_table(net, id);
 	if (mrt)
@@ -314,10 +334,8 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 	mrt->id = id;
 	write_pnet(&mrt->net, net);
 
-	/* Forwarding cache */
-	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
+	rhltable_init(&mrt->mfc6_hash, &ip6mr_rht_params);
+	INIT_LIST_HEAD(&mrt->mfc6_cache_list);
 	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 
 	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
@@ -335,6 +353,7 @@ static void ip6mr_free_table(struct mr6_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
 	mroute_clean_tables(mrt, true);
+	rhltable_destroy(&mrt->mfc6_hash);
 	kfree(mrt);
 }
 
@@ -344,7 +363,6 @@ struct ipmr_mfc_iter {
 	struct seq_net_private p;
 	struct mr6_table *mrt;
 	struct list_head *cache;
-	int ct;
 };
 
 
@@ -354,14 +372,12 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	struct mr6_table *mrt = it->mrt;
 	struct mfc6_cache *mfc;
 
-	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-		it->cache = &mrt->mfc6_cache_array[it->ct];
-		list_for_each_entry(mfc, it->cache, list)
-			if (pos-- == 0)
-				return mfc;
-	}
-	read_unlock(&mrt_lock);
+	rcu_read_lock();
+	it->cache = &mrt->mfc6_cache_list;
+	list_for_each_entry_rcu(mfc, &mrt->mfc6_cache_list, list)
+		if (pos-- == 0)
+			return mfc;
+	rcu_read_unlock();
 
 	spin_lock_bh(&mfc_unres_lock);
 	it->cache = &mrt->mfc6_unres_queue;
@@ -517,19 +533,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (it->cache == &mrt->mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
-	while (++it->ct < MFC6_LINES) {
-		it->cache = &mrt->mfc6_cache_array[it->ct];
-		if (list_empty(it->cache))
-			continue;
-		return list_first_entry(it->cache, struct mfc6_cache, list);
-	}
-
 	/* exhausted cache_array, show unresolved */
-	read_unlock(&mrt_lock);
+	rcu_read_unlock();
 	it->cache = &mrt->mfc6_unres_queue;
-	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
 	if (!list_empty(it->cache))
@@ -549,8 +555,8 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 
 	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc6_cache_array[it->ct])
-		read_unlock(&mrt_lock);
+	else if (it->cache == &mrt->mfc6_cache_list)
+		rcu_read_unlock();
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -827,9 +833,16 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	return 0;
 }
 
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+	struct mfc6_cache *c = container_of(head, struct mfc6_cache, rcu);
+
+	kmem_cache_free(mrt_cachep, c);
+}
+
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	kmem_cache_free(mrt_cachep, c);
+	call_rcu(&c->rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
@@ -1002,14 +1015,17 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 					   const struct in6_addr *origin,
 					   const struct in6_addr *mcastgrp)
 {
-	int line = MFC6_HASH(mcastgrp, origin);
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = *origin,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
+	struct rhlist_head *tmp, *list;
 	struct mfc6_cache *c;
 
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-			return c;
-	}
+	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		return c;
+
 	return NULL;
 }
 
@@ -1017,13 +1033,16 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
 						      mifi_t mifi)
 {
-	int line = MFC6_HASH(&in6addr_any, &in6addr_any);
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = in6addr_any,
+		.mf6c_mcastgrp = in6addr_any,
+	};
+	struct rhlist_head *tmp, *list;
 	struct mfc6_cache *c;
 
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-		if (ipv6_addr_any(&c->mf6c_origin) &&
-		    ipv6_addr_any(&c->mf6c_mcastgrp) &&
-		    (c->mfc_un.res.ttls[mifi] < 255))
+	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (c->mfc_un.res.ttls[mifi] < 255)
 			return c;
 
 	return NULL;
@@ -1034,29 +1053,53 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
 					       struct in6_addr *mcastgrp,
 					       mifi_t mifi)
 {
-	int line = MFC6_HASH(mcastgrp, &in6addr_any);
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = in6addr_any,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
+	struct rhlist_head *tmp, *list;
 	struct mfc6_cache *c, *proxy;
 
 	if (ipv6_addr_any(mcastgrp))
 		goto skip;
 
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
-		if (ipv6_addr_any(&c->mf6c_origin) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
-			if (c->mfc_un.res.ttls[mifi] < 255)
-				return c;
+	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+		if (c->mfc_un.res.ttls[mifi] < 255)
+			return c;
 
-			/* It's ok if the mifi is part of the static tree */
-			proxy = ip6mr_cache_find_any_parent(mrt,
-							    c->mf6c_parent);
-			if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-				return c;
-		}
+		/* It's ok if the mifi is part of the static tree */
+		proxy = ip6mr_cache_find_any_parent(mrt, c->mf6c_parent);
+		if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
+			return c;
+	}
 
 skip:
 	return ip6mr_cache_find_any_parent(mrt, mifi);
 }
 
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr6_table *mrt,
+			const struct in6_addr *origin,
+			const struct in6_addr *mcastgrp,
+			int parent)
+{
+	struct mfc6_cache_cmp_arg arg = {
+		.mf6c_origin = *origin,
+		.mf6c_mcastgrp = *mcastgrp,
+	};
+	struct rhlist_head *tmp, *list;
+	struct mfc6_cache *c;
+
+	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (parent == -1 || parent == c->mf6c_parent)
+			return c;
+
+	return NULL;
+}
+
 /*
  *	Allocate a multicast cache entry
  */
@@ -1296,26 +1339,21 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
 			    int parent)
 {
-	int line;
-	struct mfc6_cache *c, *next;
+	struct mfc6_cache *c;
 
-	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+	/* The entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+	rcu_read_unlock();
+	if (!c)
+		return -ENOENT;
+	rhltable_remove(&mrt->mfc6_hash, &c->mnode, ip6mr_rht_params);
+	list_del_rcu(&c->list);
 
-	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp,
-				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
-		    (parent == -1 || parent == c->mf6c_parent)) {
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
-
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_cache_free(c);
-			return 0;
-		}
-	}
-	return -ENOENT;
+	mr6_netlink_event(mrt, c, RTM_DELROUTE);
+	ip6mr_cache_free(c);
+	return 0;
 }
 
 static int ip6mr_device_event(struct notifier_block *this,
@@ -1448,11 +1486,10 @@ void ip6_mr_cleanup(void)
 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 			 struct mf6cctl *mfc, int mrtsock, int parent)
 {
-	bool found = false;
-	int line;
-	struct mfc6_cache *uc, *c;
 	unsigned char ttls[MAXMIFS];
-	int i;
+	struct mfc6_cache *uc, *c;
+	bool found;
+	int i, err;
 
 	if (mfc->mf6cc_parent >= MAXMIFS)
 		return -ENFILE;
@@ -1461,22 +1498,14 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 	for (i = 0; i < MAXMIFS; i++) {
 		if (IF_ISSET(i, &mfc->mf6cc_ifset))
 			ttls[i] = 1;
-
 	}
 
-	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
-	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
-		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp,
-				    &mfc->mf6cc_mcastgrp.sin6_addr) &&
-		    (parent == -1 || parent == mfc->mf6cc_parent)) {
-			found = true;
-			break;
-		}
-	}
-
-	if (found) {
+	/* The entries are added/deleted only under RTNL */
+	rcu_read_lock();
+	c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+				    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+	rcu_read_unlock();
+	if (c) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
 		ip6mr_update_thresholds(mrt, c, ttls);
@@ -1502,13 +1531,17 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
-	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &mrt->mfc6_cache_array[line]);
-	write_unlock_bh(&mrt_lock);
+	err = rhltable_insert_key(&mrt->mfc6_hash, &c->cmparg, &c->mnode,
+				  ip6mr_rht_params);
+	if (err) {
+		pr_err("ip6mr: rhtable insert error %d\n", err);
+		ip6mr_cache_free(c);
+		return err;
+	}
+	list_add_tail_rcu(&c->list, &mrt->mfc6_cache_list);
 
-	/*
-	 *	Check to see if we resolved a queued list. If so we
-	 *	need to send on the frames and tidy up.
+	/* Check to see if we resolved a queued list. If so we
+	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
@@ -1539,13 +1572,11 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 
 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 {
-	int i;
+	struct mfc6_cache *c, *tmp;
 	LIST_HEAD(list);
-	struct mfc6_cache *c, *next;
+	int i;
 
-	/*
-	 *	Shut down all active vif entries
-	 */
+	/* Shut down all active vif entries */
 	for (i = 0; i < mrt->maxvif; i++) {
 		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
 			continue;
@@ -1553,25 +1584,19 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 	}
 	unregister_netdevice_many(&list);
 
-	/*
-	 *	Wipe the cache
-	 */
-	for (i = 0; i < MFC6_LINES; i++) {
-		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-			if (!all && (c->mfc_flags & MFC_STATIC))
-				continue;
-			write_lock_bh(&mrt_lock);
-			list_del(&c->list);
-			write_unlock_bh(&mrt_lock);
-
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_cache_free(c);
-		}
+	/* Wipe the cache */
+	list_for_each_entry_safe(c, tmp, &mrt->mfc6_cache_list, list) {
+		if (!all && (c->mfc_flags & MFC_STATIC))
+			continue;
+		rhltable_remove(&mrt->mfc6_hash, &c->mnode, ip6mr_rht_params);
+		list_del_rcu(&c->list);
+		mr6_netlink_event(mrt, c, RTM_DELROUTE);
+		ip6mr_cache_free(c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc6_unres_queue, list) {
 			list_del(&c->list);
 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
 			ip6mr_destroy_unres(mrt, c);
@@ -1905,19 +1930,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
 			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -1979,19 +2004,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		if (copy_from_user(&sr, arg, sizeof(sr)))
 			return -EFAULT;
 
-		read_lock(&mrt_lock);
+		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
 			sr.wrong_if = c->mfc_un.res.wrong_if;
-			read_unlock(&mrt_lock);
+			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
 				return -EFAULT;
 			return 0;
 		}
-		read_unlock(&mrt_lock);
+		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	default:
 		return -ENOIOCTLCMD;
@@ -2115,10 +2140,14 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 		/* For an (*,G) entry, we only check that the incoming
 		 * interface is part of the static tree.
 		 */
+		rcu_read_lock();
 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255) {
+			rcu_read_unlock();
 			goto forward;
+		}
+		rcu_read_unlock();
 	}
 
 	/*
@@ -2535,34 +2564,30 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 	struct mr6_table *mrt;
 	struct mfc6_cache *mfc;
 	unsigned int t = 0, s_t;
-	unsigned int h = 0, s_h;
 	unsigned int e = 0, s_e;
 
 	s_t = cb->args[0];
-	s_h = cb->args[1];
-	s_e = cb->args[2];
+	s_e = cb->args[1];
 
-	read_lock(&mrt_lock);
+	rcu_read_lock();
 	ip6mr_for_each_table(mrt, net) {
 		if (t < s_t)
 			goto next_table;
-		if (t > s_t)
-			s_h = 0;
-		for (h = s_h; h < MFC6_LINES; h++) {
-			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
-				if (e < s_e)
-					goto next_entry;
-				if (ip6mr_fill_mroute(mrt, skb,
-						      NETLINK_CB(cb->skb).portid,
-						      cb->nlh->nlmsg_seq,
-						      mfc, RTM_NEWROUTE,
-						      NLM_F_MULTI) < 0)
-					goto done;
+		list_for_each_entry_rcu(mfc, &mrt->mfc6_cache_list, list) {
+			if (e < s_e)
+				goto next_entry;
+			if (ip6mr_fill_mroute(mrt, skb,
+					      NETLINK_CB(cb->skb).portid,
+					      cb->nlh->nlmsg_seq,
+					      mfc, RTM_NEWROUTE,
+					      NLM_F_MULTI) < 0)
+				goto done;
 next_entry:
-				e++;
-			}
-			e = s_e = 0;
+			e++;
 		}
+		e = 0;
+		s_e = 0;
+
 		spin_lock_bh(&mfc_unres_lock);
 		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
 			if (e < s_e)
@@ -2580,15 +2605,13 @@ next_entry2:
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 		e = s_e = 0;
-		s_h = 0;
 next_table:
 		t++;
 	}
 done:
-	read_unlock(&mrt_lock);
+	rcu_read_unlock();
 
-	cb->args[2] = e;
-	cb->args[1] = h;
+	cb->args[1] = e;
 	cb->args[0] = t;
 
 	return skb->len;

From b70432f7319eb75b24ca57dde8146c5e27244780 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:32 +0200
Subject: [PATCH 0535/1678] mroute*: Make mr_table a common struct

Following previous changes to ip6mr, mr_table and mr6_table are
basically the same [up to mr6_table having additional '6' suffixes to
its variable names].
Move the common structure definition into a common header; This
requires renaming all references in ip6mr to variables that had the
distinct suffix.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h      |  21 ---
 include/linux/mroute6.h     |   1 -
 include/linux/mroute_base.h |  46 ++++++
 include/net/netns/ipv6.h    |   2 +-
 net/ipv4/ipmr.c             |   2 -
 net/ipv6/ip6mr.c            | 301 +++++++++++++++++-------------------
 6 files changed, 186 insertions(+), 187 deletions(-)

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index b8aadffe6237..8688c5d03a24 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -4,8 +4,6 @@
 
 #include <linux/in.h>
 #include <linux/pim.h>
-#include <linux/rhashtable.h>
-#include <net/sock.h>
 #include <net/fib_rules.h>
 #include <net/fib_notifier.h>
 #include <uapi/linux/mroute.h>
@@ -67,25 +65,6 @@ struct vif_entry_notifier_info {
 
 #define VIFF_STATIC 0x8000
 
-#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
-
-struct mr_table {
-	struct list_head	list;
-	possible_net_t		net;
-	u32			id;
-	struct sock __rcu	*mroute_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc_unres_queue;
-	struct vif_device	vif_table[MAXVIFS];
-	struct rhltable		mfc_hash;
-	struct list_head	mfc_cache_list;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	bool			mroute_do_assert;
-	bool			mroute_do_pim;
-	int			mroute_reg_vif_num;
-};
-
 /* mfc_flags:
  * MFC_STATIC - the entry was added statically (not by a routing daemon)
  * MFC_OFFLOAD - the entry was offloaded to the hardware
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index e2dac199861e..d5c8dc155a42 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -8,7 +8,6 @@
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
 #include <linux/mroute_base.h>
-#include <linux/rhashtable.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 0de651e15f27..1cc944a14df5 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -2,6 +2,9 @@
 #define __LINUX_MROUTE_BASE_H
 
 #include <linux/netdevice.h>
+#include <linux/rhashtable.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 
 /**
  * struct vif_device - interface representor for multicast routing
@@ -32,6 +35,49 @@ struct vif_device {
 	__be32 local, remote;
 };
 
+#ifndef MAXVIFS
+/* This one is nasty; value is defined in uapi using different symbols for
+ * mroute and morute6 but both map into same 32.
+ */
+#define MAXVIFS	32
+#endif
+
+#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
+
+/**
+ * struct mr_table - a multicast routing table
+ * @list: entry within a list of multicast routing tables
+ * @net: net where this table belongs
+ * @id: identifier of the table
+ * @mroute_sk: socket associated with the table
+ * @ipmr_expire_timer: timer for handling unresolved routes
+ * @mfc_unres_queue: list of unresolved MFC entries
+ * @vif_table: array containing all possible vifs
+ * @mfc_hash: Hash table of all resolved routes for easy lookup
+ * @mfc_cache_list: list of resovled routes for possible traversal
+ * @maxvif: Identifier of highest value vif currently in use
+ * @cache_resolve_queue_len: current size of unresolved queue
+ * @mroute_do_assert: Whether to inform userspace on wrong ingress
+ * @mroute_do_pim: Whether to receive IGMP PIMv1
+ * @mroute_reg_vif_num: PIM-device vif index
+ */
+struct mr_table {
+	struct list_head	list;
+	possible_net_t		net;
+	u32			id;
+	struct sock __rcu	*mroute_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc_unres_queue;
+	struct vif_device	vif_table[MAXVIFS];
+	struct rhltable		mfc_hash;
+	struct list_head	mfc_cache_list;
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	bool			mroute_do_assert;
+	bool			mroute_do_pim;
+	int			mroute_reg_vif_num;
+};
+
 #ifdef CONFIG_IP_MROUTE_COMMON
 void vif_device_init(struct vif_device *v,
 		     struct net_device *dev,
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 2b9194229a56..e286fda09fcf 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -85,7 +85,7 @@ struct netns_ipv6 {
 	struct sock		*mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-	struct mr6_table	*mrt6;
+	struct mr_table		*mrt6;
 #else
 	struct list_head	mr6_tables;
 	struct fib_rules_ops	*mr6_rules_ops;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1370edad64bf..a1bf0020cad1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -31,7 +31,6 @@
 #include <linux/cache.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -53,7 +52,6 @@
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/route.h>
-#include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
 #include <net/raw.h>
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6f0b7f4894b2..adbb826ca8fd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
@@ -36,7 +35,6 @@
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
 #include <net/raw.h>
 #include <linux/notifier.h>
 #include <linux/if_arp.h>
@@ -54,31 +52,12 @@
 #include <net/ip6_checksum.h>
 #include <linux/netconf.h>
 
-struct mr6_table {
-	struct list_head	list;
-	possible_net_t		net;
-	u32			id;
-	struct sock __rcu	*mroute6_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc6_unres_queue;
-	struct vif_device	vif6_table[MAXMIFS];
-	struct rhltable		mfc6_hash;
-	struct list_head	mfc6_cache_list;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	bool			mroute_do_assert;
-	bool			mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-	int			mroute_reg_vif_num;
-#endif
-};
-
 struct ip6mr_rule {
 	struct fib_rule		common;
 };
 
 struct ip6mr_result {
-	struct mr6_table	*mrt;
+	struct mr_table	*mrt;
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -87,11 +66,7 @@ struct ip6mr_result {
 
 static DEFINE_RWLOCK(mrt_lock);
 
-/*
- *	Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -106,30 +81,30 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 			   struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			       struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 #define ip6mr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	ip6mr_for_each_table(mrt, net) {
 		if (mrt->id == id)
@@ -139,7 +114,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-			    struct mr6_table **mrt)
+			    struct mr_table **mrt)
 {
 	int err;
 	struct ip6mr_result res;
@@ -160,7 +135,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 			     int flags, struct fib_lookup_arg *arg)
 {
 	struct ip6mr_result *res = arg->result;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	switch (rule->action) {
 	case FR_ACT_TO_TBL:
@@ -228,7 +203,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 static int __net_init ip6mr_rules_init(struct net *net)
 {
 	struct fib_rules_ops *ops;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	int err;
 
 	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -259,7 +234,7 @@ err1:
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
-	struct mr6_table *mrt, *next;
+	struct mr_table *mrt, *next;
 
 	rtnl_lock();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -273,13 +248,13 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 #define ip6mr_for_each_table(mrt, net) \
 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
 	return net->ipv6.mrt6;
 }
 
 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
-			    struct mr6_table **mrt)
+			    struct mr_table **mrt)
 {
 	*mrt = net->ipv6.mrt6;
 	return 0;
@@ -320,9 +295,9 @@ static const struct rhashtable_params ip6mr_rht_params = {
 	.automatic_shrinking = true,
 };
 
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, id);
 	if (mrt)
@@ -334,9 +309,9 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 	mrt->id = id;
 	write_pnet(&mrt->net, net);
 
-	rhltable_init(&mrt->mfc6_hash, &ip6mr_rht_params);
-	INIT_LIST_HEAD(&mrt->mfc6_cache_list);
-	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+	rhltable_init(&mrt->mfc_hash, &ip6mr_rht_params);
+	INIT_LIST_HEAD(&mrt->mfc_cache_list);
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
 
 	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
 
@@ -349,11 +324,11 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 	return mrt;
 }
 
-static void ip6mr_free_table(struct mr6_table *mrt)
+static void ip6mr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
 	mroute_clean_tables(mrt, true);
-	rhltable_destroy(&mrt->mfc6_hash);
+	rhltable_destroy(&mrt->mfc_hash);
 	kfree(mrt);
 }
 
@@ -361,7 +336,7 @@ static void ip6mr_free_table(struct mr6_table *mrt)
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct list_head *cache;
 };
 
@@ -369,18 +344,18 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr6_table *mrt = it->mrt;
+	struct mr_table *mrt = it->mrt;
 	struct mfc6_cache *mfc;
 
 	rcu_read_lock();
-	it->cache = &mrt->mfc6_cache_list;
-	list_for_each_entry_rcu(mfc, &mrt->mfc6_cache_list, list)
+	it->cache = &mrt->mfc_cache_list;
+	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
 		if (pos-- == 0)
 			return mfc;
 	rcu_read_unlock();
 
 	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc6_unres_queue;
+	it->cache = &mrt->mfc_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
@@ -396,7 +371,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 
 struct ipmr_vif_iter {
 	struct seq_net_private p;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	int ct;
 };
 
@@ -404,13 +379,13 @@ static struct vif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	struct mr6_table *mrt = iter->mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(mrt, iter->ct))
+		if (!VIF_EXISTS(mrt, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &mrt->vif6_table[iter->ct];
+			return &mrt->vif_table[iter->ct];
 	}
 	return NULL;
 }
@@ -420,7 +395,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 	if (!mrt)
@@ -437,16 +412,16 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = iter->mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ip6mr_vif_seq_idx(net, iter, 0);
 
 	while (++iter->ct < mrt->maxvif) {
-		if (!MIF_EXISTS(mrt, iter->ct))
+		if (!VIF_EXISTS(mrt, iter->ct))
 			continue;
-		return &mrt->vif6_table[iter->ct];
+		return &mrt->vif_table[iter->ct];
 	}
 	return NULL;
 }
@@ -460,7 +435,7 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
 	struct ipmr_vif_iter *iter = seq->private;
-	struct mr6_table *mrt = iter->mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -471,7 +446,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - mrt->vif6_table,
+			   vif - mrt->vif_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -503,7 +478,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 	if (!mrt)
@@ -520,7 +495,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = it->mrt;
+	struct mr_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -530,12 +505,12 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->list.next != it->cache)
 		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
-	if (it->cache == &mrt->mfc6_unres_queue)
+	if (it->cache == &mrt->mfc_unres_queue)
 		goto end_of_list;
 
 	/* exhausted cache_array, show unresolved */
 	rcu_read_unlock();
-	it->cache = &mrt->mfc6_unres_queue;
+	it->cache = &mrt->mfc_unres_queue;
 
 	spin_lock_bh(&mfc_unres_lock);
 	if (!list_empty(it->cache))
@@ -551,11 +526,11 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct mr6_table *mrt = it->mrt;
+	struct mr_table *mrt = it->mrt;
 
-	if (it->cache == &mrt->mfc6_unres_queue)
+	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc6_cache_list)
+	else if (it->cache == &mrt->mfc_cache_list)
 		rcu_read_unlock();
 }
 
@@ -571,20 +546,20 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc6_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
-		struct mr6_table *mrt = it->mrt;
+		struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent);
 
-		if (it->cache != &mrt->mfc6_unres_queue) {
+		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(mrt, n) &&
+				if (VIF_EXISTS(mrt, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 						   " %2d:%-3d",
@@ -630,7 +605,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
@@ -664,7 +639,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = mrt->vif6_table[reg_vif_num].dev;
+		reg_dev = mrt->vif_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -699,7 +674,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_oif	= dev->ifindex,
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -742,7 +717,7 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 {
 	struct net_device *dev;
 	char name[IFNAMSIZ];
@@ -779,7 +754,7 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 		       struct list_head *head)
 {
 	struct vif_device *v;
@@ -789,7 +764,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &mrt->vif6_table[vifi];
+	v = &mrt->vif_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -808,7 +783,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
 	if (vifi + 1 == mrt->maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(mrt, tmp))
+			if (VIF_EXISTS(mrt, tmp))
 				break;
 		}
 		mrt->maxvif = tmp + 1;
@@ -849,7 +824,7 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
@@ -875,13 +850,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
 	struct mfc6_cache *c, *next;
 
-	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
@@ -895,20 +870,20 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
 		ip6mr_destroy_unres(mrt, c);
 	}
 
-	if (!list_empty(&mrt->mfc6_unres_queue))
+	if (!list_empty(&mrt->mfc_unres_queue))
 		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(struct timer_list *t)
 {
-	struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (!list_empty(&mrt->mfc6_unres_queue))
+	if (!list_empty(&mrt->mfc_unres_queue))
 		ipmr_do_expire_process(mrt);
 
 	spin_unlock(&mfc_unres_lock);
@@ -916,7 +891,8 @@ static void ipmr_expire_process(struct timer_list *t)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+				    struct mfc6_cache *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -926,7 +902,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
 	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
-		if (MIF_EXISTS(mrt, vifi) &&
+		if (VIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -938,17 +914,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
 	cache->mfc_un.res.lastuse = jiffies;
 }
 
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
 		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct vif_device *v = &mrt->vif6_table[vifi];
+	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(mrt, vifi))
+	if (VIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -1011,7 +987,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 					   const struct in6_addr *origin,
 					   const struct in6_addr *mcastgrp)
 {
@@ -1022,7 +998,7 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 	struct rhlist_head *tmp, *list;
 	struct mfc6_cache *c;
 
-	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
 		return c;
 
@@ -1030,7 +1006,7 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 }
 
 /* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr_table *mrt,
 						      mifi_t mifi)
 {
 	struct mfc6_cache_cmp_arg arg = {
@@ -1040,7 +1016,7 @@ static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
 	struct rhlist_head *tmp, *list;
 	struct mfc6_cache *c;
 
-	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
 		if (c->mfc_un.res.ttls[mifi] < 255)
 			return c;
@@ -1049,7 +1025,7 @@ static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
 }
 
 /* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 					       struct in6_addr *mcastgrp,
 					       mifi_t mifi)
 {
@@ -1063,7 +1039,7 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
 	if (ipv6_addr_any(mcastgrp))
 		goto skip;
 
-	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
 		if (c->mfc_un.res.ttls[mifi] < 255)
 			return c;
@@ -1080,7 +1056,7 @@ skip:
 
 /* Look for a (S,G,iif) entry if parent != -1 */
 static struct mfc6_cache *
-ip6mr_cache_find_parent(struct mr6_table *mrt,
+ip6mr_cache_find_parent(struct mr_table *mrt,
 			const struct in6_addr *origin,
 			const struct in6_addr *mcastgrp,
 			int parent)
@@ -1092,7 +1068,7 @@ ip6mr_cache_find_parent(struct mr6_table *mrt,
 	struct rhlist_head *tmp, *list;
 	struct mfc6_cache *c;
 
-	list = rhltable_lookup(&mrt->mfc6_hash, &arg, ip6mr_rht_params);
+	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
 		if (parent == -1 || parent == c->mf6c_parent)
 			return c;
@@ -1127,7 +1103,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 				struct mfc6_cache *uc, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
@@ -1161,7 +1137,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert)
 {
 	struct sock *mroute6_sk;
@@ -1235,7 +1211,7 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	}
 
 	rcu_read_lock();
-	mroute6_sk = rcu_dereference(mrt->mroute6_sk);
+	mroute6_sk = rcu_dereference(mrt->mroute_sk);
 	if (!mroute6_sk) {
 		rcu_read_unlock();
 		kfree_skb(skb);
@@ -1260,14 +1236,14 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
  */
 
 static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, struct sk_buff *skb)
 {
 	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -1311,7 +1287,7 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc6_unres_queue);
+		list_add(&c->list, &mrt->mfc_unres_queue);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		ipmr_do_expire_process(mrt);
@@ -1336,7 +1312,7 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
 			    int parent)
 {
 	struct mfc6_cache *c;
@@ -1348,7 +1324,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
 	rcu_read_unlock();
 	if (!c)
 		return -ENOENT;
-	rhltable_remove(&mrt->mfc6_hash, &c->mnode, ip6mr_rht_params);
+	rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
 	list_del_rcu(&c->list);
 
 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1361,7 +1337,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct vif_device *v;
 	int ct;
 
@@ -1369,7 +1345,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 
 	ip6mr_for_each_table(mrt, net) {
-		v = &mrt->vif6_table[0];
+		v = &mrt->vif_table[0];
 		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 			if (v->dev == dev)
 				mif6_delete(mrt, ct, 1, NULL);
@@ -1483,7 +1459,7 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 			 struct mf6cctl *mfc, int mrtsock, int parent)
 {
 	unsigned char ttls[MAXMIFS];
@@ -1531,21 +1507,21 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
-	err = rhltable_insert_key(&mrt->mfc6_hash, &c->cmparg, &c->mnode,
+	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
 				  ip6mr_rht_params);
 	if (err) {
 		pr_err("ip6mr: rhtable insert error %d\n", err);
 		ip6mr_cache_free(c);
 		return err;
 	}
-	list_add_tail_rcu(&c->list, &mrt->mfc6_cache_list);
+	list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
 
 	/* Check to see if we resolved a queued list. If so we
 	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			list_del(&uc->list);
@@ -1554,7 +1530,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
 			break;
 		}
 	}
-	if (list_empty(&mrt->mfc6_unres_queue))
+	if (list_empty(&mrt->mfc_unres_queue))
 		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -1570,7 +1546,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	struct mfc6_cache *c, *tmp;
 	LIST_HEAD(list);
@@ -1578,17 +1554,17 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 
 	/* Shut down all active vif entries */
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
 			continue;
 		mif6_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
 	/* Wipe the cache */
-	list_for_each_entry_safe(c, tmp, &mrt->mfc6_cache_list, list) {
+	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
 		if (!all && (c->mfc_flags & MFC_STATIC))
 			continue;
-		rhltable_remove(&mrt->mfc6_hash, &c->mnode, ip6mr_rht_params);
+		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
 		list_del_rcu(&c->list);
 		mr6_netlink_event(mrt, c, RTM_DELROUTE);
 		ip6mr_cache_free(c);
@@ -1596,7 +1572,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, tmp, &mrt->mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
 			mr6_netlink_event(mrt, c, RTM_DELROUTE);
 			ip6mr_destroy_unres(mrt, c);
@@ -1605,17 +1581,17 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 	}
 }
 
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (rtnl_dereference(mrt->mroute6_sk)) {
+	if (rtnl_dereference(mrt->mroute_sk)) {
 		err = -EADDRINUSE;
 	} else {
-		rcu_assign_pointer(mrt->mroute6_sk, sk);
+		rcu_assign_pointer(mrt->mroute_sk, sk);
 		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	write_unlock_bh(&mrt_lock);
@@ -1634,7 +1610,7 @@ int ip6mr_sk_done(struct sock *sk)
 {
 	int err = -EACCES;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1642,9 +1618,9 @@ int ip6mr_sk_done(struct sock *sk)
 
 	rtnl_lock();
 	ip6mr_for_each_table(mrt, net) {
-		if (sk == rtnl_dereference(mrt->mroute6_sk)) {
+		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			write_lock_bh(&mrt_lock);
-			RCU_INIT_POINTER(mrt->mroute6_sk, NULL);
+			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
 			net->ipv6.devconf_all->mc_forwarding--;
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1665,7 +1641,7 @@ int ip6mr_sk_done(struct sock *sk)
 
 bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_oif	= skb->dev->ifindex,
@@ -1675,7 +1651,7 @@ bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 	if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 		return NULL;
 
-	return rcu_access_pointer(mrt->mroute6_sk);
+	return rcu_access_pointer(mrt->mroute_sk);
 }
 EXPORT_SYMBOL(mroute6_is_socket);
 
@@ -1693,7 +1669,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1704,7 +1680,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
-		if (sk != rcu_access_pointer(mrt->mroute6_sk) &&
+		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
 		    !ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EACCES;
 	}
@@ -1728,7 +1704,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			return -ENFILE;
 		rtnl_lock();
 		ret = mif6_add(net, mrt, &vif,
-			       sk == rtnl_dereference(mrt->mroute6_sk));
+			       sk == rtnl_dereference(mrt->mroute_sk));
 		rtnl_unlock();
 		return ret;
 
@@ -1764,7 +1740,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		else
 			ret = ip6mr_mfc_add(net, mrt, &mfc,
 					    sk ==
-					    rtnl_dereference(mrt->mroute6_sk),
+					    rtnl_dereference(mrt->mroute_sk),
 					    parent);
 		rtnl_unlock();
 		return ret;
@@ -1817,7 +1793,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
 		if (v != RT_TABLE_DEFAULT && v >= 100000000)
 			return -EINVAL;
-		if (sk == rcu_access_pointer(mrt->mroute6_sk))
+		if (sk == rcu_access_pointer(mrt->mroute_sk))
 			return -EBUSY;
 
 		rtnl_lock();
@@ -1848,7 +1824,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	if (sk->sk_type != SOCK_RAW ||
 	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1899,7 +1875,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
@@ -1912,8 +1888,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &mrt->vif6_table[vr.mifi];
-		if (MIF_EXISTS(mrt, vr.mifi)) {
+		vif = &mrt->vif_table[vr.mifi];
+		if (VIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1973,7 +1949,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
@@ -1986,8 +1962,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &mrt->vif6_table[vr.mifi];
-		if (MIF_EXISTS(mrt, vr.mifi)) {
+		vif = &mrt->vif_table[vr.mifi];
+		if (VIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -2037,11 +2013,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct vif_device *vif = &mrt->vif6_table[vifi];
+	struct vif_device *vif = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi6 fl6;
@@ -2111,18 +2087,18 @@ out_free:
 	return 0;
 }
 
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
 	int ct;
 
 	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
-		if (mrt->vif6_table[ct].dev == dev)
+		if (mrt->vif_table[ct].dev == dev)
 			break;
 	}
 	return ct;
 }
 
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 			   struct sk_buff *skb, struct mfc6_cache *cache)
 {
 	int psend = -1;
@@ -2153,7 +2129,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (mrt->vif6_table[vif].dev != skb->dev) {
+	if (mrt->vif_table[vif].dev != skb->dev) {
 		cache->mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
@@ -2173,8 +2149,8 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 	}
 
 forward:
-	mrt->vif6_table[vif].pkt_in++;
-	mrt->vif6_table[vif].bytes_in += skb->len;
+	mrt->vif_table[vif].pkt_in++;
+	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -2225,7 +2201,7 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct flowi6 fl6 = {
 		.flowi6_iif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
@@ -2276,7 +2252,7 @@ int ip6_mr_input(struct sk_buff *skb)
 }
 
 
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			       struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	struct rta_mfc_stats mfcs;
@@ -2291,15 +2267,16 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 		return -ENOENT;
 	}
 
-	if (MIF_EXISTS(mrt, c->mf6c_parent) &&
-	    nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
+	if (VIF_EXISTS(mrt, c->mf6c_parent) &&
+	    nla_put_u32(skb, RTA_IIF,
+			mrt->vif_table[c->mf6c_parent].dev->ifindex) < 0)
 		return -EMSGSIZE;
 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
 	if (!mp_attr)
 		return -EMSGSIZE;
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
 			if (!nhp) {
 				nla_nest_cancel(skb, mp_attr);
@@ -2308,7 +2285,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -2334,7 +2311,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		    u32 portid)
 {
 	int err;
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
@@ -2403,7 +2380,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 	return err;
 }
 
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
 			     int flags)
 {
@@ -2468,7 +2445,7 @@ static int mr6_msgsize(bool unresolved, int maxvif)
 	return len;
 }
 
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd)
 {
 	struct net *net = read_pnet(&mrt->net);
@@ -2510,7 +2487,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
 	return len;
 }
 
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct nlmsghdr *nlh;
@@ -2561,7 +2538,7 @@ errout:
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct mr6_table *mrt;
+	struct mr_table *mrt;
 	struct mfc6_cache *mfc;
 	unsigned int t = 0, s_t;
 	unsigned int e = 0, s_e;
@@ -2573,7 +2550,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 	ip6mr_for_each_table(mrt, net) {
 		if (t < s_t)
 			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc6_cache_list, list) {
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
 			if (e < s_e)
 				goto next_entry;
 			if (ip6mr_fill_mroute(mrt, skb,
@@ -2589,7 +2566,7 @@ next_entry:
 		s_e = 0;
 
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
+		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
 			if (e < s_e)
 				goto next_entry2;
 			if (ip6mr_fill_mroute(mrt, skb,

From 0bbbf0e7d0e7ea8267836986346a9b3a35b74e4e Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:33 +0200
Subject: [PATCH 0536/1678] ipmr, ip6mr: Unite creation of new mr_table

Now that both ipmr and ip6mr are using the same mr_table structure,
we can have a common function to allocate & initialize a new instance.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 17 +++++++++++++++++
 net/ipv4/ipmr.c             | 27 ++++++++++-----------------
 net/ipv4/ipmr_base.c        | 27 +++++++++++++++++++++++++++
 net/ipv6/ip6mr.c            | 30 ++++++++++--------------------
 4 files changed, 64 insertions(+), 37 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 1cc944a14df5..805305722803 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -85,6 +85,13 @@ void vif_device_init(struct vif_device *v,
 		     unsigned char threshold,
 		     unsigned short flags,
 		     unsigned short get_iflink_mask);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       const struct rhashtable_params *rht_params,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net));
 #else
 static inline void vif_device_init(struct vif_device *v,
 				   struct net_device *dev,
@@ -94,5 +101,15 @@ static inline void vif_device_init(struct vif_device *v,
 				   unsigned short get_iflink_mask)
 {
 }
+
+static inline struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       const struct rhashtable_params *rht_params,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net))
+{
+	return NULL;
+}
 #endif
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1bf0020cad1..f213933db177 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -352,6 +352,14 @@ static const struct rhashtable_params ipmr_rht_params = {
 	.automatic_shrinking = true,
 };
 
+static void ipmr_new_table_set(struct mr_table *mrt,
+			       struct net *net)
+{
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+}
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -364,23 +372,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	if (mrt)
 		return mrt;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (!mrt)
-		return ERR_PTR(-ENOMEM);
-	write_pnet(&mrt->net, net);
-	mrt->id = id;
-
-	rhltable_init(&mrt->mfc_hash, &ipmr_rht_params);
-	INIT_LIST_HEAD(&mrt->mfc_cache_list);
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-	mrt->mroute_reg_vif_num = -1;
-#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
-	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
-#endif
-	return mrt;
+	return mr_table_alloc(net, id, &ipmr_rht_params,
+			      ipmr_expire_process, ipmr_new_table_set);
 }
 
 static void ipmr_free_table(struct mr_table *mrt)
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 22758f848344..3e21a5806d0e 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -26,3 +26,30 @@ void vif_device_init(struct vif_device *v,
 		v->link = dev->ifindex;
 }
 EXPORT_SYMBOL(vif_device_init);
+
+struct mr_table *
+mr_table_alloc(struct net *net, u32 id,
+	       const struct rhashtable_params *rht_params,
+	       void (*expire_func)(struct timer_list *t),
+	       void (*table_set)(struct mr_table *mrt,
+				 struct net *net))
+{
+	struct mr_table *mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (!mrt)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	rhltable_init(&mrt->mfc_hash, rht_params);
+	INIT_LIST_HEAD(&mrt->mfc_cache_list);
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+	timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
+
+	mrt->mroute_reg_vif_num = -1;
+	table_set(mrt, net);
+	return mrt;
+}
+EXPORT_SYMBOL(mr_table_alloc);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index adbb826ca8fd..d50852882966 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -31,7 +31,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/compat.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -295,6 +294,14 @@ static const struct rhashtable_params ip6mr_rht_params = {
 	.automatic_shrinking = true,
 };
 
+static void ip6mr_new_table_set(struct mr_table *mrt,
+				struct net *net)
+{
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
+#endif
+}
+
 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -303,25 +310,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 	if (mrt)
 		return mrt;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (!mrt)
-		return NULL;
-	mrt->id = id;
-	write_pnet(&mrt->net, net);
-
-	rhltable_init(&mrt->mfc_hash, &ip6mr_rht_params);
-	INIT_LIST_HEAD(&mrt->mfc_cache_list);
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
-
-#ifdef CONFIG_IPV6_PIMSM_V2
-	mrt->mroute_reg_vif_num = -1;
-#endif
-#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
-#endif
-	return mrt;
+	return mr_table_alloc(net, id, &ip6mr_rht_params,
+			      ipmr_expire_process, ip6mr_new_table_set);
 }
 
 static void ip6mr_free_table(struct mr_table *mrt)

From 494fff56379c4ad5b8fe36a5b7ffede4044ca7bb Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:34 +0200
Subject: [PATCH 0537/1678] ipmr, ip6mr: Make mfc_cache a common structure

mfc_cache and mfc6_cache are almost identical - the main difference is
in the origin/group addresses and comparison-key. Make a common
structure encapsulating most of the multicast routing logic  - mr_mfc
and convert both ipmr and ip6mr into using it.

For easy conversion [casting, in this case] mr_mfc has to be the first
field inside every multicast routing abstraction utilizing it.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.c |  21 +-
 include/linux/mroute.h                        |  45 +---
 include/linux/mroute6.h                       |  23 +-
 include/linux/mroute_base.h                   |  45 ++++
 net/ipv4/ipmr.c                               | 233 ++++++++--------
 net/ipv6/ip6mr.c                              | 248 +++++++++---------
 6 files changed, 312 insertions(+), 303 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index d20b143de3b4..978a3c70653a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 
 	switch (mr_route->mr_table->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		ivif = mr_route->mfc4->mfc_parent;
-		return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+		ivif = mr_route->mfc4->_c.mfc_parent;
+		return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
 	case MLXSW_SP_L3_PROTO_IPV6:
 		/* fall through */
 	default:
@@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 	mr_route->mfc4 = mfc;
 	mr_route->mr_table = mr_table;
 	for (i = 0; i < MAXVIFS; i++) {
-		if (mfc->mfc_un.res.ttls[i] != 255) {
+		if (mfc->_c.mfc_un.res.ttls[i] != 255) {
 			err = mlxsw_sp_mr_route_evif_link(mr_route,
 							  &mr_table->vifs[i]);
 			if (err)
@@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 				mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
 		}
 	}
-	mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+	mlxsw_sp_mr_route_ivif_link(mr_route,
+				    &mr_table->vifs[mfc->_c.mfc_parent]);
 
 	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
 	return mr_route;
@@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
 	switch (mr_route->mr_table->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
 		if (offload)
-			mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+			mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
 		else
-			mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+			mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
 		/* fall through */
@@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 
 	switch (mr_route->mr_table->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		if (mr_route->mfc4->mfc_un.res.pkt != packets)
-			mr_route->mfc4->mfc_un.res.lastuse = jiffies;
-		mr_route->mfc4->mfc_un.res.pkt = packets;
-		mr_route->mfc4->mfc_un.res.bytes = bytes;
+		if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
+			mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
+		mr_route->mfc4->_c.mfc_un.res.pkt = packets;
+		mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
 		/* fall through */
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 8688c5d03a24..63b36e6c72a0 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -81,28 +81,13 @@ struct mfc_cache_cmp_arg {
 
 /**
  * struct mfc_cache - multicast routing entries
- * @mnode: rhashtable list
+ * @_c: Common multicast routing information; has to be first [for casting]
  * @mfc_mcastgrp: destination multicast group address
  * @mfc_origin: source address
  * @cmparg: used for rhashtable comparisons
- * @mfc_parent: source interface (iif)
- * @mfc_flags: entry flags
- * @expires: unresolved entry expire time
- * @unresolved: unresolved cached skbs
- * @last_assert: time of last assert
- * @minvif: minimum VIF id
- * @maxvif: maximum VIF id
- * @bytes: bytes that have passed for this entry
- * @pkt: packets that have passed for this entry
- * @wrong_if: number of wrong source interface hits
- * @lastuse: time of last use of the group (traffic or update)
- * @ttls: OIF TTL threshold array
- * @refcount: reference count for this entry
- * @list: global entry list
- * @rcu: used for entry destruction
  */
 struct mfc_cache {
-	struct rhlist_head mnode;
+	struct mr_mfc _c;
 	union {
 		struct {
 			__be32 mfc_mcastgrp;
@@ -110,28 +95,6 @@ struct mfc_cache {
 		};
 		struct mfc_cache_cmp_arg cmparg;
 	};
-	vifi_t mfc_parent;
-	int mfc_flags;
-
-	union {
-		struct {
-			unsigned long expires;
-			struct sk_buff_head unresolved;
-		} unres;
-		struct {
-			unsigned long last_assert;
-			int minvif;
-			int maxvif;
-			unsigned long bytes;
-			unsigned long pkt;
-			unsigned long wrong_if;
-			unsigned long lastuse;
-			unsigned char ttls[MAXVIFS];
-			refcount_t refcount;
-		} res;
-	} mfc_un;
-	struct list_head list;
-	struct rcu_head	rcu;
 };
 
 struct mfc_entry_notifier_info {
@@ -155,12 +118,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
 
 static inline void ipmr_cache_put(struct mfc_cache *c)
 {
-	if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+	if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
 		ipmr_cache_free(c);
 }
 static inline void ipmr_cache_hold(struct mfc_cache *c)
 {
-	refcount_inc(&c->mfc_un.res.refcount);
+	refcount_inc(&c->_c.mfc_un.res.refcount);
 }
 
 #endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index d5c8dc155a42..6acf576fc135 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -71,7 +71,7 @@ struct mfc6_cache_cmp_arg {
 };
 
 struct mfc6_cache {
-	struct rhlist_head mnode;
+	struct mr_mfc _c;
 	union {
 		struct {
 			struct in6_addr mf6c_mcastgrp;
@@ -79,27 +79,6 @@ struct mfc6_cache {
 		};
 		struct mfc6_cache_cmp_arg cmparg;
 	};
-	mifi_t mf6c_parent;			/* Source interface		*/
-	int mfc_flags;				/* Flags on line		*/
-
-	union {
-		struct {
-			unsigned long expires;
-			struct sk_buff_head unresolved;	/* Unresolved buffers		*/
-		} unres;
-		struct {
-			unsigned long last_assert;
-			int minvif;
-			int maxvif;
-			unsigned long bytes;
-			unsigned long pkt;
-			unsigned long wrong_if;
-			unsigned long lastuse;
-			unsigned char ttls[MAXMIFS];	/* TTL thresholds		*/
-		} res;
-	} mfc_un;
-	struct list_head list;
-	struct rcu_head rcu;
 };
 
 #define MFC_STATIC		1
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 805305722803..2769e2f98b32 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -44,6 +44,51 @@ struct vif_device {
 
 #define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
 
+/**
+ * struct mr_mfc - common multicast routing entries
+ * @mnode: rhashtable list
+ * @mfc_parent: source interface (iif)
+ * @mfc_flags: entry flags
+ * @expires: unresolved entry expire time
+ * @unresolved: unresolved cached skbs
+ * @last_assert: time of last assert
+ * @minvif: minimum VIF id
+ * @maxvif: maximum VIF id
+ * @bytes: bytes that have passed for this entry
+ * @pkt: packets that have passed for this entry
+ * @wrong_if: number of wrong source interface hits
+ * @lastuse: time of last use of the group (traffic or update)
+ * @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
+ * @list: global entry list
+ * @rcu: used for entry destruction
+ */
+struct mr_mfc {
+	struct rhlist_head mnode;
+	unsigned short mfc_parent;
+	int mfc_flags;
+
+	union {
+		struct {
+			unsigned long expires;
+			struct sk_buff_head unresolved;
+		} unres;
+		struct {
+			unsigned long last_assert;
+			int minvif;
+			int maxvif;
+			unsigned long bytes;
+			unsigned long pkt;
+			unsigned long wrong_if;
+			unsigned long lastuse;
+			unsigned char ttls[MAXVIFS];
+			refcount_t refcount;
+		} res;
+	} mfc_un;
+	struct list_head list;
+	struct rcu_head	rcu;
+};
+
 /**
  * struct mr_table - a multicast routing table
  * @list: entry within a list of multicast routing tables
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f213933db177..3f7515086e85 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -106,7 +106,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mfc_cache *c, struct rtmsg *rtm);
+			      struct mr_mfc *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -343,7 +343,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ipmr_rht_params = {
-	.head_offset = offsetof(struct mfc_cache, mnode),
+	.head_offset = offsetof(struct mr_mfc, mnode),
 	.key_offset = offsetof(struct mfc_cache, cmparg),
 	.key_len = sizeof(struct mfc_cache_cmp_arg),
 	.nelem_hint = 3,
@@ -752,14 +752,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
 static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
-	struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-	kmem_cache_free(mrt_cachep, c);
+	kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
 void ipmr_cache_free(struct mfc_cache *c)
 {
-	call_rcu(&c->rcu, ipmr_cache_free_rcu);
+	call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
 EXPORT_SYMBOL(ipmr_cache_free);
 
@@ -774,7 +774,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 
 	atomic_dec(&mrt->cache_resolve_queue_len);
 
-	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
+	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
@@ -798,9 +798,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 static void ipmr_expire_process(struct timer_list *t)
 {
 	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
-	unsigned long now;
+	struct mr_mfc *c, *next;
 	unsigned long expires;
-	struct mfc_cache *c, *next;
+	unsigned long now;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
@@ -822,8 +822,8 @@ static void ipmr_expire_process(struct timer_list *t)
 		}
 
 		list_del(&c->list);
-		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_destroy_unres(mrt, c);
+		mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
+		ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
 	}
 
 	if (!list_empty(&mrt->mfc_unres_queue))
@@ -834,7 +834,7 @@ out:
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
 				   unsigned char *ttls)
 {
 	int vifi;
@@ -974,11 +974,11 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 			.mfc_origin = origin
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
+	struct mr_mfc *c;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		return c;
+		return (struct mfc_cache *)c;
 
 	return NULL;
 }
@@ -992,12 +992,12 @@ static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
 			.mfc_origin = htonl(INADDR_ANY)
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
+	struct mr_mfc *c;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
 		if (c->mfc_un.res.ttls[vifi] < 255)
-			return c;
+			return (struct mfc_cache *)c;
 
 	return NULL;
 }
@@ -1011,20 +1011,22 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
 			.mfc_origin = htonl(INADDR_ANY)
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c, *proxy;
+	struct mr_mfc *c;
 
 	if (mcastgrp == htonl(INADDR_ANY))
 		goto skip;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+		struct mfc_cache *proxy;
+
 		if (c->mfc_un.res.ttls[vifi] < 255)
-			return c;
+			return (struct mfc_cache *)c;
 
 		/* It's ok if the vifi is part of the static tree */
 		proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
-			return c;
+		if (proxy && proxy->_c.mfc_un.res.ttls[vifi] < 255)
+			return (struct mfc_cache *)c;
 	}
 
 skip:
@@ -1041,12 +1043,12 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
 			.mfc_origin = origin,
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc_cache *c;
+	struct mr_mfc *c;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
 		if (parent == -1 || parent == c->mfc_parent)
-			return c;
+			return (struct mfc_cache *)c;
 
 	return NULL;
 }
@@ -1057,9 +1059,9 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
 	if (c) {
-		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-		c->mfc_un.res.minvif = MAXVIFS;
-		refcount_set(&c->mfc_un.res.refcount, 1);
+		c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+		c->_c.mfc_un.res.minvif = MAXVIFS;
+		refcount_set(&c->_c.mfc_un.res.refcount, 1);
 	}
 	return c;
 }
@@ -1069,8 +1071,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 
 	if (c) {
-		skb_queue_head_init(&c->mfc_un.unres.unresolved);
-		c->mfc_un.unres.expires = jiffies + 10*HZ;
+		skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+		c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 	}
 	return c;
 }
@@ -1083,12 +1085,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 	struct nlmsgerr *e;
 
 	/* Play the pending entries through our router */
-	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
 
-			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+			if (__ipmr_fill_mroute(mrt, skb, &c->_c,
+					       nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) -
 						 (u8 *)nlh;
 			} else {
@@ -1196,7 +1199,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 	int err;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr) {
 			found = true;
@@ -1215,12 +1218,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		}
 
 		/* Fill in the new cache entry */
-		c->mfc_parent	= -1;
+		c->_c.mfc_parent = -1;
 		c->mfc_origin	= iph->saddr;
 		c->mfc_mcastgrp	= iph->daddr;
 
 		/* Reflect first query at mrouted. */
 		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
+
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -1233,15 +1237,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc_unres_queue);
+		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
-			mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
+			mod_timer(&mrt->ipmr_expire_timer,
+				  c->_c.mfc_un.unres.expires);
 	}
 
 	/* See if we can append the packet */
-	if (c->mfc_un.unres.unresolved.qlen > 3) {
+	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
@@ -1249,7 +1254,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
 			skb->dev = dev;
 			skb->skb_iif = dev->ifindex;
 		}
-		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
 
@@ -1271,8 +1276,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 	rcu_read_unlock();
 	if (!c)
 		return -ENOENT;
-	rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-	list_del_rcu(&c->list);
+	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
+	list_del_rcu(&c->_c.list);
 	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mroute_netlink_event(mrt, c, RTM_DELROUTE);
 	ipmr_cache_put(c);
@@ -1284,6 +1289,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 			struct mfcctl *mfc, int mrtsock, int parent)
 {
 	struct mfc_cache *uc, *c;
+	struct mr_mfc *_uc;
 	bool found;
 	int ret;
 
@@ -1297,10 +1303,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	rcu_read_unlock();
 	if (c) {
 		write_lock_bh(&mrt_lock);
-		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+		c->_c.mfc_parent = mfc->mfcc_parent;
+		ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
 		if (!mrtsock)
-			c->mfc_flags |= MFC_STATIC;
+			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
 		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
 					      mrt->id);
@@ -1318,28 +1324,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
-	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
+	c->_c.mfc_parent = mfc->mfcc_parent;
+	ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
 	if (!mrtsock)
-		c->mfc_flags |= MFC_STATIC;
+		c->_c.mfc_flags |= MFC_STATIC;
 
-	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
 				  ipmr_rht_params);
 	if (ret) {
 		pr_err("ipmr: rhtable insert error %d\n", ret);
 		ipmr_cache_free(c);
 		return ret;
 	}
-	list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 	/* Check to see if we resolved a queued list. If so we
 	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+		uc = (struct mfc_cache *)_uc;
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-			list_del(&uc->list);
+			list_del(&_uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
@@ -1362,7 +1369,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	struct net *net = read_pnet(&mrt->net);
-	struct mfc_cache *c, *tmp;
+	struct mr_mfc *c, *tmp;
+	struct mfc_cache *cache;
 	LIST_HEAD(list);
 	int i;
 
@@ -1380,18 +1388,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 			continue;
 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
 		list_del_rcu(&c->list);
-		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+		cache = (struct mfc_cache *)c;
+		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
 					      mrt->id);
-		mroute_netlink_event(mrt, c, RTM_DELROUTE);
-		ipmr_cache_put(c);
+		mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+		ipmr_cache_put(cache);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			mroute_netlink_event(mrt, c, RTM_DELROUTE);
-			ipmr_destroy_unres(mrt, c);
+			cache = (struct mfc_cache *)c;
+			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+			ipmr_destroy_unres(mrt, cache);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1683,9 +1693,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1757,9 +1767,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1983,18 +1993,18 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 /* "local" means that we should preserve one skb (for local delivery) */
 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct net_device *dev, struct sk_buff *skb,
-			  struct mfc_cache *cache, int local)
+			  struct mfc_cache *c, int local)
 {
 	int true_vifi = ipmr_find_vif(mrt, dev);
 	int psend = -1;
 	int vif, ct;
 
-	vif = cache->mfc_parent;
-	cache->mfc_un.res.pkt++;
-	cache->mfc_un.res.bytes += skb->len;
-	cache->mfc_un.res.lastuse = jiffies;
+	vif = c->_c.mfc_parent;
+	c->_c.mfc_un.res.pkt++;
+	c->_c.mfc_un.res.bytes += skb->len;
+	c->_c.mfc_un.res.lastuse = jiffies;
 
-	if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
+	if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
 		struct mfc_cache *cache_proxy;
 
 		/* For an (*,G) entry, we only check that the incomming
@@ -2002,7 +2012,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 		 */
 		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
 			goto forward;
 	}
 
@@ -2023,7 +2033,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			goto dont_forward;
 		}
 
-		cache->mfc_un.res.wrong_if++;
+		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2032,10 +2042,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 		     * large chunk of pimd to kernel. Ough... --ANK
 		     */
 		    (mrt->mroute_do_pim ||
-		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
-			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-			cache->mfc_un.res.last_assert = jiffies;
+			       c->_c.mfc_un.res.last_assert +
+			       MFC_ASSERT_THRESH)) {
+			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
 		}
 		goto dont_forward;
@@ -2046,33 +2057,33 @@ forward:
 	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/* Forward the frame */
-	if (cache->mfc_origin == htonl(INADDR_ANY) &&
-	    cache->mfc_mcastgrp == htonl(INADDR_ANY)) {
+	if (c->mfc_origin == htonl(INADDR_ANY) &&
+	    c->mfc_mcastgrp == htonl(INADDR_ANY)) {
 		if (true_vifi >= 0 &&
-		    true_vifi != cache->mfc_parent &&
+		    true_vifi != c->_c.mfc_parent &&
 		    ip_hdr(skb)->ttl >
-				cache->mfc_un.res.ttls[cache->mfc_parent]) {
+				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
 			/* It's an (*,*) entry and the packet is not coming from
 			 * the upstream: forward the packet to the upstream
 			 * only.
 			 */
-			psend = cache->mfc_parent;
+			psend = c->_c.mfc_parent;
 			goto last_forward;
 		}
 		goto dont_forward;
 	}
-	for (ct = cache->mfc_un.res.maxvif - 1;
-	     ct >= cache->mfc_un.res.minvif; ct--) {
+	for (ct = c->_c.mfc_un.res.maxvif - 1;
+	     ct >= c->_c.mfc_un.res.minvif; ct--) {
 		/* For (*,G) entry, don't forward to the incoming interface */
-		if ((cache->mfc_origin != htonl(INADDR_ANY) ||
+		if ((c->mfc_origin != htonl(INADDR_ANY) ||
 		     ct != true_vifi) &&
-		    ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
+		    ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 				if (skb2)
 					ipmr_queue_xmit(net, mrt, true_vifi,
-							skb2, cache, psend);
+							skb2, c, psend);
 			}
 			psend = ct;
 		}
@@ -2084,9 +2095,9 @@ last_forward:
 
 			if (skb2)
 				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
-						cache, psend);
+						c, psend);
 		} else {
-			ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
+			ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
 			return;
 		}
 	}
@@ -2285,7 +2296,7 @@ drop:
 #endif
 
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mfc_cache *c, struct rtmsg *rtm)
+			      struct mr_mfc *c, struct rtmsg *rtm)
 {
 	struct rta_mfc_stats mfcs;
 	struct nlattr *mp_attr;
@@ -2401,7 +2412,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 	}
 
 	read_lock(&mrt_lock);
-	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
+	err = __ipmr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	rcu_read_unlock();
 	return err;
@@ -2429,7 +2440,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		goto nla_put_failure;
 	rtm->rtm_type     = RTN_MULTICAST;
 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-	if (c->mfc_flags & MFC_STATIC)
+	if (c->_c.mfc_flags & MFC_STATIC)
 		rtm->rtm_protocol = RTPROT_STATIC;
 	else
 		rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2438,7 +2449,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
 	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
 		goto nla_put_failure;
-	err = __ipmr_fill_mroute(mrt, skb, c, rtm);
+	err = __ipmr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2479,7 +2490,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif),
+	skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
+				       mrt->maxvif),
 			GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -2624,10 +2636,10 @@ errout_free:
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct mr_table *mrt;
-	struct mfc_cache *mfc;
 	unsigned int t = 0, s_t;
 	unsigned int e = 0, s_e;
+	struct mr_table *mrt;
+	struct mr_mfc *mfc;
 
 	s_t = cb->args[0];
 	s_e = cb->args[1];
@@ -2642,8 +2654,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 			if (ipmr_fill_mroute(mrt, skb,
 					     NETLINK_CB(cb->skb).portid,
 					     cb->nlh->nlmsg_seq,
-					     mfc, RTM_NEWROUTE,
-					     NLM_F_MULTI) < 0)
+					     (struct mfc_cache *)mfc,
+					     RTM_NEWROUTE, NLM_F_MULTI) < 0)
 				goto done;
 next_entry:
 			e++;
@@ -2658,8 +2670,8 @@ next_entry:
 			if (ipmr_fill_mroute(mrt, skb,
 					     NETLINK_CB(cb->skb).portid,
 					     cb->nlh->nlmsg_seq,
-					     mfc, RTM_NEWROUTE,
-					     NLM_F_MULTI) < 0) {
+					     (struct mfc_cache *)mfc,
+					     RTM_NEWROUTE, NLM_F_MULTI) < 0) {
 				spin_unlock_bh(&mfc_unres_lock);
 				goto done;
 			}
@@ -3051,20 +3063,21 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 					  struct ipmr_mfc_iter *it, loff_t pos)
 {
 	struct mr_table *mrt = it->mrt;
-	struct mfc_cache *mfc;
+	struct mr_mfc *mfc;
 
 	rcu_read_lock();
 	it->cache = &mrt->mfc_cache_list;
 	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
 		if (pos-- == 0)
-			return mfc;
+			return (struct mfc_cache *)mfc;
 	rcu_read_unlock();
 
 	spin_lock_bh(&mfc_unres_lock);
 	it->cache = &mrt->mfc_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
-			return mfc;
+			return (struct mfc_cache *)mfc;
+
 	spin_unlock_bh(&mfc_unres_lock);
 
 	it->cache = NULL;
@@ -3100,8 +3113,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->list.next != it->cache)
-		return list_entry(mfc->list.next, struct mfc_cache, list);
+	if (mfc->_c.list.next != it->cache)
+		return (struct mfc_cache *)(list_entry(mfc->_c.list.next,
+						       struct mr_mfc, list));
 
 	if (it->cache == &mrt->mfc_unres_queue)
 		goto end_of_list;
@@ -3112,7 +3126,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	spin_lock_bh(&mfc_unres_lock);
 	if (!list_empty(it->cache))
-		return list_first_entry(it->cache, struct mfc_cache, list);
+		return (struct mfc_cache *)(list_first_entry(it->cache,
+							     struct mr_mfc,
+							     list));
 
 end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -3147,20 +3163,20 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "%08X %08X %-3hd",
 			   (__force u32) mfc->mfc_mcastgrp,
 			   (__force u32) mfc->mfc_origin,
-			   mfc->mfc_parent);
+			   mfc->_c.mfc_parent);
 
 		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
-				   mfc->mfc_un.res.pkt,
-				   mfc->mfc_un.res.bytes,
-				   mfc->mfc_un.res.wrong_if);
-			for (n = mfc->mfc_un.res.minvif;
-			     n < mfc->mfc_un.res.maxvif; n++) {
+				   mfc->_c.mfc_un.res.pkt,
+				   mfc->_c.mfc_un.res.bytes,
+				   mfc->_c.mfc_un.res.wrong_if);
+			for (n = mfc->_c.mfc_un.res.minvif;
+			     n < mfc->_c.mfc_un.res.maxvif; n++) {
 				if (VIF_EXISTS(mrt, n) &&
-				    mfc->mfc_un.res.ttls[n] < 255)
+				    mfc->_c.mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 					   " %2d:%-3d",
-					   n, mfc->mfc_un.res.ttls[n]);
+					   n, mfc->_c.mfc_un.res.ttls[n]);
 			}
 		} else {
 			/* unresolved mfc_caches don't contain
@@ -3219,7 +3235,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 
 	ipmr_for_each_table(mrt, net) {
 		struct vif_device *v = &mrt->vif_table[0];
-		struct mfc_cache *mfc;
+		struct mr_mfc *mfc;
 		int vifi;
 
 		/* Notifiy on table VIF entries */
@@ -3236,7 +3252,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb)
 		/* Notify on table MFC entries */
 		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
 			call_ipmr_mfc_entry_notifier(nb, net,
-						     FIB_EVENT_ENTRY_ADD, mfc,
+						     FIB_EVENT_ENTRY_ADD,
+						     (struct mfc_cache *)mfc,
 						     mrt->id);
 	}
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d50852882966..3fe254f9f9b7 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -285,7 +285,7 @@ static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 }
 
 static const struct rhashtable_params ip6mr_rht_params = {
-	.head_offset = offsetof(struct mfc6_cache, mnode),
+	.head_offset = offsetof(struct mr_mfc, mnode),
 	.key_offset = offsetof(struct mfc6_cache, cmparg),
 	.key_len = sizeof(struct mfc6_cache_cmp_arg),
 	.nelem_hint = 3,
@@ -335,20 +335,20 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
 	struct mr_table *mrt = it->mrt;
-	struct mfc6_cache *mfc;
+	struct mr_mfc *mfc;
 
 	rcu_read_lock();
 	it->cache = &mrt->mfc_cache_list;
 	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
 		if (pos-- == 0)
-			return mfc;
+			return (struct mfc6_cache *)mfc;
 	rcu_read_unlock();
 
 	spin_lock_bh(&mfc_unres_lock);
 	it->cache = &mrt->mfc_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
-			return mfc;
+			return (struct mfc6_cache *)mfc;
 	spin_unlock_bh(&mfc_unres_lock);
 
 	it->cache = NULL;
@@ -492,8 +492,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->list.next != it->cache)
-		return list_entry(mfc->list.next, struct mfc6_cache, list);
+	if (mfc->_c.list.next != it->cache)
+		return (struct mfc6_cache *)(list_entry(mfc->_c.list.next,
+							struct mr_mfc, list));
 
 	if (it->cache == &mrt->mfc_unres_queue)
 		goto end_of_list;
@@ -504,7 +505,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	spin_lock_bh(&mfc_unres_lock);
 	if (!list_empty(it->cache))
-		return list_first_entry(it->cache, struct mfc6_cache, list);
+		return (struct mfc6_cache *)(list_first_entry(it->cache,
+							      struct mr_mfc,
+							      list));
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -540,20 +543,20 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
-			   mfc->mf6c_parent);
+			   mfc->_c.mfc_parent);
 
 		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
-				   mfc->mfc_un.res.pkt,
-				   mfc->mfc_un.res.bytes,
-				   mfc->mfc_un.res.wrong_if);
-			for (n = mfc->mfc_un.res.minvif;
-			     n < mfc->mfc_un.res.maxvif; n++) {
+				   mfc->_c.mfc_un.res.pkt,
+				   mfc->_c.mfc_un.res.bytes,
+				   mfc->_c.mfc_un.res.wrong_if);
+			for (n = mfc->_c.mfc_un.res.minvif;
+			     n < mfc->_c.mfc_un.res.maxvif; n++) {
 				if (VIF_EXISTS(mrt, n) &&
-				    mfc->mfc_un.res.ttls[n] < 255)
+				    mfc->_c.mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
-						   " %2d:%-3d",
-						   n, mfc->mfc_un.res.ttls[n]);
+						   " %2d:%-3d", n,
+						   mfc->_c.mfc_un.res.ttls[n]);
 			}
 		} else {
 			/* unresolved mfc_caches don't contain
@@ -800,14 +803,14 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 
 static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 {
-	struct mfc6_cache *c = container_of(head, struct mfc6_cache, rcu);
+	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 
-	kmem_cache_free(mrt_cachep, c);
+	kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 }
 
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	call_rcu(&c->rcu, ip6mr_cache_free_rcu);
+	call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 }
 
 /* Destroy an unresolved cache entry, killing queued skbs
@@ -821,7 +824,7 @@ static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 
 	atomic_dec(&mrt->cache_resolve_queue_len);
 
-	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
@@ -844,7 +847,7 @@ static void ipmr_do_expire_process(struct mr_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
-	struct mfc6_cache *c, *next;
+	struct mr_mfc *c, *next;
 
 	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
@@ -856,8 +859,8 @@ static void ipmr_do_expire_process(struct mr_table *mrt)
 		}
 
 		list_del(&c->list);
-		mr6_netlink_event(mrt, c, RTM_DELROUTE);
-		ip6mr_destroy_unres(mrt, c);
+		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+		ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 	}
 
 	if (!list_empty(&mrt->mfc_unres_queue))
@@ -882,7 +885,7 @@ static void ipmr_expire_process(struct timer_list *t)
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
 static void ip6mr_update_thresholds(struct mr_table *mrt,
-				    struct mfc6_cache *cache,
+				    struct mr_mfc *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -986,11 +989,11 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 		.mf6c_mcastgrp = *mcastgrp,
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc6_cache *c;
+	struct mr_mfc *c;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		return c;
+		return (struct mfc6_cache *)c;
 
 	return NULL;
 }
@@ -1004,12 +1007,12 @@ static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr_table *mrt,
 		.mf6c_mcastgrp = in6addr_any,
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc6_cache *c;
+	struct mr_mfc *c;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
 		if (c->mfc_un.res.ttls[mifi] < 255)
-			return c;
+			return (struct mfc6_cache *)c;
 
 	return NULL;
 }
@@ -1024,7 +1027,8 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 		.mf6c_mcastgrp = *mcastgrp,
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc6_cache *c, *proxy;
+	struct mr_mfc *c;
+	struct mfc6_cache *proxy;
 
 	if (ipv6_addr_any(mcastgrp))
 		goto skip;
@@ -1032,12 +1036,12 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
 		if (c->mfc_un.res.ttls[mifi] < 255)
-			return c;
+			return (struct mfc6_cache *)c;
 
 		/* It's ok if the mifi is part of the static tree */
-		proxy = ip6mr_cache_find_any_parent(mrt, c->mf6c_parent);
-		if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
-			return c;
+		proxy = ip6mr_cache_find_any_parent(mrt, c->mfc_parent);
+		if (proxy && proxy->_c.mfc_un.res.ttls[mifi] < 255)
+			return (struct mfc6_cache *)c;
 	}
 
 skip:
@@ -1056,12 +1060,12 @@ ip6mr_cache_find_parent(struct mr_table *mrt,
 		.mf6c_mcastgrp = *mcastgrp,
 	};
 	struct rhlist_head *tmp, *list;
-	struct mfc6_cache *c;
+	struct mr_mfc *c;
 
 	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (parent == -1 || parent == c->mf6c_parent)
-			return c;
+		if (parent == -1 || parent == c->mfc_parent)
+			return (struct mfc6_cache *)c;
 
 	return NULL;
 }
@@ -1074,8 +1078,8 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
-	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
-	c->mfc_un.res.minvif = MAXMIFS;
+	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+	c->_c.mfc_un.res.minvif = MAXMIFS;
 	return c;
 }
 
@@ -1084,8 +1088,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (!c)
 		return NULL;
-	skb_queue_head_init(&c->mfc_un.unres.unresolved);
-	c->mfc_un.unres.expires = jiffies + 10 * HZ;
+	skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+	c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 	return c;
 }
 
@@ -1102,7 +1106,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 	 *	Play the pending entries through our router
 	 */
 
-	while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
 		if (ipv6_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
@@ -1221,19 +1225,16 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 	return ret;
 }
 
-/*
- *	Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+				  struct sk_buff *skb)
 {
+	struct mfc6_cache *c;
 	bool found = false;
 	int err;
-	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -1254,10 +1255,8 @@ ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, struct sk_buff *skb)
 			return -ENOBUFS;
 		}
 
-		/*
-		 *	Fill in the new cache entry
-		 */
-		c->mf6c_parent = -1;
+		/* Fill in the new cache entry */
+		c->_c.mfc_parent = -1;
 		c->mf6c_origin = ipv6_hdr(skb)->saddr;
 		c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 
@@ -1277,20 +1276,18 @@ ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&mrt->cache_resolve_queue_len);
-		list_add(&c->list, &mrt->mfc_unres_queue);
+		list_add(&c->_c.list, &mrt->mfc_unres_queue);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 
 		ipmr_do_expire_process(mrt);
 	}
 
-	/*
-	 *	See if we can append the packet
-	 */
-	if (c->mfc_un.unres.unresolved.qlen > 3) {
+	/* See if we can append the packet */
+	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
 		kfree_skb(skb);
 		err = -ENOBUFS;
 	} else {
-		skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
 		err = 0;
 	}
 
@@ -1314,8 +1311,8 @@ static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
 	rcu_read_unlock();
 	if (!c)
 		return -ENOENT;
-	rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
-	list_del_rcu(&c->list);
+	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+	list_del_rcu(&c->_c.list);
 
 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
 	ip6mr_cache_free(c);
@@ -1454,6 +1451,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 {
 	unsigned char ttls[MAXMIFS];
 	struct mfc6_cache *uc, *c;
+	struct mr_mfc *_uc;
 	bool found;
 	int i, err;
 
@@ -1473,10 +1471,10 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 	rcu_read_unlock();
 	if (c) {
 		write_lock_bh(&mrt_lock);
-		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(mrt, c, ttls);
+		c->_c.mfc_parent = mfc->mf6cc_parent;
+		ip6mr_update_thresholds(mrt, &c->_c, ttls);
 		if (!mrtsock)
-			c->mfc_flags |= MFC_STATIC;
+			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 		return 0;
@@ -1492,29 +1490,30 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
-	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(mrt, c, ttls);
+	c->_c.mfc_parent = mfc->mf6cc_parent;
+	ip6mr_update_thresholds(mrt, &c->_c, ttls);
 	if (!mrtsock)
-		c->mfc_flags |= MFC_STATIC;
+		c->_c.mfc_flags |= MFC_STATIC;
 
-	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode,
+	err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
 				  ip6mr_rht_params);
 	if (err) {
 		pr_err("ip6mr: rhtable insert error %d\n", err);
 		ip6mr_cache_free(c);
 		return err;
 	}
-	list_add_tail_rcu(&c->list, &mrt->mfc_cache_list);
+	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
 
 	/* Check to see if we resolved a queued list. If so we
 	 * need to send on the frames and tidy up.
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
+	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+		uc = (struct mfc6_cache *)_uc;
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-			list_del(&uc->list);
+			list_del(&_uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
@@ -1538,7 +1537,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 
 static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
-	struct mfc6_cache *c, *tmp;
+	struct mr_mfc *c, *tmp;
 	LIST_HEAD(list);
 	int i;
 
@@ -1556,16 +1555,17 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 			continue;
 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
 		list_del_rcu(&c->list);
-		mr6_netlink_event(mrt, c, RTM_DELROUTE);
-		ip6mr_cache_free(c);
+		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+		ip6mr_cache_free((struct mfc6_cache *)c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			mr6_netlink_event(mrt, c, RTM_DELROUTE);
-			ip6mr_destroy_unres(mrt, c);
+			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+					  RTM_DELROUTE);
+			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1899,9 +1899,9 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1973,9 +1973,9 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 		rcu_read_lock();
 		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->mfc_un.res.pkt;
-			sr.bytecnt = c->mfc_un.res.bytes;
-			sr.wrong_if = c->mfc_un.res.wrong_if;
+			sr.pktcnt = c->_c.mfc_un.res.pkt;
+			sr.bytecnt = c->_c.mfc_un.res.bytes;
+			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
 
 			if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -2089,18 +2089,18 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
 }
 
 static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
-			   struct sk_buff *skb, struct mfc6_cache *cache)
+			   struct sk_buff *skb, struct mfc6_cache *c)
 {
 	int psend = -1;
 	int vif, ct;
 	int true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-	vif = cache->mf6c_parent;
-	cache->mfc_un.res.pkt++;
-	cache->mfc_un.res.bytes += skb->len;
-	cache->mfc_un.res.lastuse = jiffies;
+	vif = c->_c.mfc_parent;
+	c->_c.mfc_un.res.pkt++;
+	c->_c.mfc_un.res.bytes += skb->len;
+	c->_c.mfc_un.res.lastuse = jiffies;
 
-	if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+	if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
 		struct mfc6_cache *cache_proxy;
 
 		/* For an (*,G) entry, we only check that the incoming
@@ -2109,7 +2109,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 		rcu_read_lock();
 		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
 		if (cache_proxy &&
-		    cache_proxy->mfc_un.res.ttls[true_vifi] < 255) {
+		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
 			rcu_read_unlock();
 			goto forward;
 		}
@@ -2120,7 +2120,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
 	if (mrt->vif_table[vif].dev != skb->dev) {
-		cache->mfc_un.res.wrong_if++;
+		c->_c.mfc_un.res.wrong_if++;
 
 		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -2129,10 +2129,11 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
 		    (mrt->mroute_do_pim ||
-		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
+		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
-			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
-			cache->mfc_un.res.last_assert = jiffies;
+			       c->_c.mfc_un.res.last_assert +
+			       MFC_ASSERT_THRESH)) {
+			c->_c.mfc_un.res.last_assert = jiffies;
 			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
@@ -2145,36 +2146,38 @@ forward:
 	/*
 	 *	Forward the frame
 	 */
-	if (ipv6_addr_any(&cache->mf6c_origin) &&
-	    ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+	if (ipv6_addr_any(&c->mf6c_origin) &&
+	    ipv6_addr_any(&c->mf6c_mcastgrp)) {
 		if (true_vifi >= 0 &&
-		    true_vifi != cache->mf6c_parent &&
+		    true_vifi != c->_c.mfc_parent &&
 		    ipv6_hdr(skb)->hop_limit >
-				cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
 			/* It's an (*,*) entry and the packet is not coming from
 			 * the upstream: forward the packet to the upstream
 			 * only.
 			 */
-			psend = cache->mf6c_parent;
+			psend = c->_c.mfc_parent;
 			goto last_forward;
 		}
 		goto dont_forward;
 	}
-	for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+	for (ct = c->_c.mfc_un.res.maxvif - 1;
+	     ct >= c->_c.mfc_un.res.minvif; ct--) {
 		/* For (*,G) entry, don't forward to the incoming interface */
-		if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
-		    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+		if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+		    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, mrt, skb2, cache, psend);
+					ip6mr_forward2(net, mrt, skb2,
+						       c, psend);
 			}
 			psend = ct;
 		}
 	}
 last_forward:
 	if (psend != -1) {
-		ip6mr_forward2(net, mrt, skb, cache, psend);
+		ip6mr_forward2(net, mrt, skb, c, psend);
 		return;
 	}
 
@@ -2252,21 +2255,22 @@ static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	int ct;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mf6c_parent >= MAXMIFS) {
+	if (c->_c.mfc_parent >= MAXMIFS) {
 		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
 		return -ENOENT;
 	}
 
-	if (VIF_EXISTS(mrt, c->mf6c_parent) &&
+	if (VIF_EXISTS(mrt, c->_c.mfc_parent) &&
 	    nla_put_u32(skb, RTA_IIF,
-			mrt->vif_table[c->mf6c_parent].dev->ifindex) < 0)
+			mrt->vif_table[c->_c.mfc_parent].dev->ifindex) < 0)
 		return -EMSGSIZE;
 	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
 	if (!mp_attr)
 		return -EMSGSIZE;
 
-	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+	for (ct = c->_c.mfc_un.res.minvif;
+	     ct < c->_c.mfc_un.res.maxvif; ct++) {
+		if (VIF_EXISTS(mrt, ct) && c->_c.mfc_un.res.ttls[ct] < 255) {
 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
 			if (!nhp) {
 				nla_nest_cancel(skb, mp_attr);
@@ -2274,7 +2278,7 @@ static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			}
 
 			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+			nhp->rtnh_hops = c->_c.mfc_un.res.ttls[ct];
 			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
@@ -2282,12 +2286,12 @@ static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 
 	nla_nest_end(skb, mp_attr);
 
-	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+	lastuse = READ_ONCE(c->_c.mfc_un.res.lastuse);
 	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
 
-	mfcs.mfcs_packets = c->mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+	mfcs.mfcs_packets = c->_c.mfc_un.res.pkt;
+	mfcs.mfcs_bytes = c->_c.mfc_un.res.bytes;
+	mfcs.mfcs_wrong_if = c->_c.mfc_un.res.wrong_if;
 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
 	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
 			      RTA_PAD))
@@ -2363,7 +2367,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 	}
 
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
-		cache->mfc_flags |= MFC_NOTIFY;
+		cache->_c.mfc_flags |= MFC_NOTIFY;
 
 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
@@ -2392,7 +2396,7 @@ static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		goto nla_put_failure;
 	rtm->rtm_type = RTN_MULTICAST;
 	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
-	if (c->mfc_flags & MFC_STATIC)
+	if (c->_c.mfc_flags & MFC_STATIC)
 		rtm->rtm_protocol = RTPROT_STATIC;
 	else
 		rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2442,7 +2446,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+	skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
 			GFP_ATOMIC);
 	if (!skb)
 		goto errout;
@@ -2528,10 +2532,10 @@ errout:
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct mr_table *mrt;
-	struct mfc6_cache *mfc;
 	unsigned int t = 0, s_t;
 	unsigned int e = 0, s_e;
+	struct mr_table *mrt;
+	struct mr_mfc *mfc;
 
 	s_t = cb->args[0];
 	s_e = cb->args[1];
@@ -2546,8 +2550,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 			if (ip6mr_fill_mroute(mrt, skb,
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
-					      mfc, RTM_NEWROUTE,
-					      NLM_F_MULTI) < 0)
+					      (struct mfc6_cache *)mfc,
+					      RTM_NEWROUTE, NLM_F_MULTI) < 0)
 				goto done;
 next_entry:
 			e++;
@@ -2562,8 +2566,8 @@ next_entry:
 			if (ip6mr_fill_mroute(mrt, skb,
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
-					      mfc, RTM_NEWROUTE,
-					      NLM_F_MULTI) < 0) {
+					     (struct mfc6_cache *)mfc,
+					      RTM_NEWROUTE, NLM_F_MULTI) < 0) {
 				spin_unlock_bh(&mfc_unres_lock);
 				goto done;
 			}

From 845c9a7ae7f5342ba42280c3a2f2aa92bce641d7 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:35 +0200
Subject: [PATCH 0538/1678] ipmr, ip6mr: Unite logic for searching in MFC cache

ipmr and ip6mr utilize the exact same methods for searching the
hashed resolved connections, difference being only in the construction
of the hash comparison key.

In order to unite the flow, introduce an mr_table operation set that
would contain the protocol specific information required for common
flows, in this case - the hash parameters and a comparison key
representing a (*,*) route.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 52 ++++++++++++++++++++++++--
 net/ipv4/ipmr.c             | 71 ++++++++---------------------------
 net/ipv4/ipmr_base.c        | 54 ++++++++++++++++++++++++++-
 net/ipv6/ip6mr.c            | 74 +++++++++----------------------------
 4 files changed, 134 insertions(+), 117 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 2769e2f98b32..46a082e25dab 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -89,10 +89,23 @@ struct mr_mfc {
 	struct rcu_head	rcu;
 };
 
+struct mr_table;
+
+/**
+ * struct mr_table_ops - callbacks and info for protocol-specific ops
+ * @rht_params: parameters for accessing the MFC hash
+ * @cmparg_any: a hash key to be used for matching on (*,*) routes
+ */
+struct mr_table_ops {
+	const struct rhashtable_params *rht_params;
+	void *cmparg_any;
+};
+
 /**
  * struct mr_table - a multicast routing table
  * @list: entry within a list of multicast routing tables
  * @net: net where this table belongs
+ * @ops: protocol specific operations
  * @id: identifier of the table
  * @mroute_sk: socket associated with the table
  * @ipmr_expire_timer: timer for handling unresolved routes
@@ -109,6 +122,7 @@ struct mr_mfc {
 struct mr_table {
 	struct list_head	list;
 	possible_net_t		net;
+	struct mr_table_ops	ops;
 	u32			id;
 	struct sock __rcu	*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
@@ -133,10 +147,19 @@ void vif_device_init(struct vif_device *v,
 
 struct mr_table *
 mr_table_alloc(struct net *net, u32 id,
-	       const struct rhashtable_params *rht_params,
+	       struct mr_table_ops *ops,
 	       void (*expire_func)(struct timer_list *t),
 	       void (*table_set)(struct mr_table *mrt,
 				 struct net *net));
+
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_find_parent(struct mr_table *mrt,
+			 void *hasharg, int parent);
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
+
 #else
 static inline void vif_device_init(struct vif_device *v,
 				   struct net_device *dev,
@@ -147,14 +170,37 @@ static inline void vif_device_init(struct vif_device *v,
 {
 }
 
-static inline struct mr_table *
+static inline void *
 mr_table_alloc(struct net *net, u32 id,
-	       const struct rhashtable_params *rht_params,
+	       struct mr_table_ops *ops,
 	       void (*expire_func)(struct timer_list *t),
 	       void (*table_set)(struct mr_table *mrt,
 				 struct net *net))
 {
 	return NULL;
 }
+
+static inline void *mr_mfc_find_parent(struct mr_table *mrt,
+				       void *hasharg, int parent)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_find_any_parent(struct mr_table *mrt,
+					   int vifi)
+{
+	return NULL;
+}
+
+static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
+					     int vifi, void *hasharg)
+{
+	return NULL;
+}
 #endif
+
+static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
+{
+	return mr_mfc_find_parent(mrt, hasharg, -1);
+}
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f7515086e85..00898c319cc5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -360,6 +360,16 @@ static void ipmr_new_table_set(struct mr_table *mrt,
 #endif
 }
 
+static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
+	.mfc_mcastgrp = htonl(INADDR_ANY),
+	.mfc_origin = htonl(INADDR_ANY),
+};
+
+static struct mr_table_ops ipmr_mr_table_ops = {
+	.rht_params = &ipmr_rht_params,
+	.cmparg_any = &ipmr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -372,7 +382,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	if (mrt)
 		return mrt;
 
-	return mr_table_alloc(net, id, &ipmr_rht_params,
+	return mr_table_alloc(net, id, &ipmr_mr_table_ops,
 			      ipmr_expire_process, ipmr_new_table_set);
 }
 
@@ -973,33 +983,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = origin
 	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
 
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		return (struct mfc_cache *)c;
-
-	return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt,
-						    int vifi)
-{
-	struct mfc_cache_cmp_arg arg = {
-			.mfc_mcastgrp = htonl(INADDR_ANY),
-			.mfc_origin = htonl(INADDR_ANY)
-	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (c->mfc_un.res.ttls[vifi] < 255)
-			return (struct mfc_cache *)c;
-
-	return NULL;
+	return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1010,27 +995,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = htonl(INADDR_ANY)
 	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
 
 	if (mcastgrp == htonl(INADDR_ANY))
-		goto skip;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-		struct mfc_cache *proxy;
-
-		if (c->mfc_un.res.ttls[vifi] < 255)
-			return (struct mfc_cache *)c;
-
-		/* It's ok if the vifi is part of the static tree */
-		proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent);
-		if (proxy && proxy->_c.mfc_un.res.ttls[vifi] < 255)
-			return (struct mfc_cache *)c;
-	}
-
-skip:
-	return ipmr_cache_find_any_parent(mrt, vifi);
+		return mr_mfc_find_any_parent(mrt, vifi);
+	return mr_mfc_find_any(mrt, vifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1042,15 +1010,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
 			.mfc_mcastgrp = mcastgrp,
 			.mfc_origin = origin,
 	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
 
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (parent == -1 || parent == c->mfc_parent)
-			return (struct mfc_cache *)c;
-
-	return NULL;
+	return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
 /* Allocate a multicast cache entry */
@@ -2010,7 +1971,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 		/* For an (*,G) entry, we only check that the incomming
 		 * interface is part of the static tree.
 		 */
-		cache_proxy = ipmr_cache_find_any_parent(mrt, vif);
+		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
 		if (cache_proxy &&
 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
 			goto forward;
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 3e21a5806d0e..172f92acffef 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -29,7 +29,7 @@ EXPORT_SYMBOL(vif_device_init);
 
 struct mr_table *
 mr_table_alloc(struct net *net, u32 id,
-	       const struct rhashtable_params *rht_params,
+	       struct mr_table_ops *ops,
 	       void (*expire_func)(struct timer_list *t),
 	       void (*table_set)(struct mr_table *mrt,
 				 struct net *net))
@@ -42,7 +42,8 @@ mr_table_alloc(struct net *net, u32 id,
 	mrt->id = id;
 	write_pnet(&mrt->net, net);
 
-	rhltable_init(&mrt->mfc_hash, rht_params);
+	mrt->ops = *ops;
+	rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
 	INIT_LIST_HEAD(&mrt->mfc_cache_list);
 	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
 
@@ -53,3 +54,52 @@ mr_table_alloc(struct net *net, u32 id,
 	return mrt;
 }
 EXPORT_SYMBOL(mr_table_alloc);
+
+void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c;
+
+	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (parent == -1 || parent == c->mfc_parent)
+			return c;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_parent);
+
+void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c;
+
+	list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
+			       *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode)
+		if (c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_find_any_parent);
+
+void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
+{
+	struct rhlist_head *tmp, *list;
+	struct mr_mfc *c, *proxy;
+
+	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
+	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
+		if (c->mfc_un.res.ttls[vifi] < 255)
+			return c;
+
+		/* It's ok if the vifi is part of the static tree */
+		proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
+		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
+			return c;
+	}
+
+	return mr_mfc_find_any_parent(mrt, vifi);
+}
+EXPORT_SYMBOL(mr_mfc_find_any);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3fe254f9f9b7..ea902a952fb6 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -302,6 +302,16 @@ static void ip6mr_new_table_set(struct mr_table *mrt,
 #endif
 }
 
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+	.mf6c_origin = IN6ADDR_ANY_INIT,
+	.mf6c_mcastgrp = IN6ADDR_ANY_INIT,
+};
+
+static struct mr_table_ops ip6mr_mr_table_ops = {
+	.rht_params = &ip6mr_rht_params,
+	.cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
+
 static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -310,7 +320,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 	if (mrt)
 		return mrt;
 
-	return mr_table_alloc(net, id, &ip6mr_rht_params,
+	return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 			      ipmr_expire_process, ip6mr_new_table_set);
 }
 
@@ -988,33 +998,8 @@ static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 		.mf6c_origin = *origin,
 		.mf6c_mcastgrp = *mcastgrp,
 	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
 
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		return (struct mfc6_cache *)c;
-
-	return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr_table *mrt,
-						      mifi_t mifi)
-{
-	struct mfc6_cache_cmp_arg arg = {
-		.mf6c_origin = in6addr_any,
-		.mf6c_mcastgrp = in6addr_any,
-	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (c->mfc_un.res.ttls[mifi] < 255)
-			return (struct mfc6_cache *)c;
-
-	return NULL;
+	return mr_mfc_find(mrt, &arg);
 }
 
 /* Look for a (*,G) entry */
@@ -1026,26 +1011,10 @@ static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 		.mf6c_origin = in6addr_any,
 		.mf6c_mcastgrp = *mcastgrp,
 	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
-	struct mfc6_cache *proxy;
 
 	if (ipv6_addr_any(mcastgrp))
-		goto skip;
-
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-		if (c->mfc_un.res.ttls[mifi] < 255)
-			return (struct mfc6_cache *)c;
-
-		/* It's ok if the mifi is part of the static tree */
-		proxy = ip6mr_cache_find_any_parent(mrt, c->mfc_parent);
-		if (proxy && proxy->_c.mfc_un.res.ttls[mifi] < 255)
-			return (struct mfc6_cache *)c;
-	}
-
-skip:
-	return ip6mr_cache_find_any_parent(mrt, mifi);
+		return mr_mfc_find_any_parent(mrt, mifi);
+	return mr_mfc_find_any(mrt, mifi, &arg);
 }
 
 /* Look for a (S,G,iif) entry if parent != -1 */
@@ -1059,20 +1028,11 @@ ip6mr_cache_find_parent(struct mr_table *mrt,
 		.mf6c_origin = *origin,
 		.mf6c_mcastgrp = *mcastgrp,
 	};
-	struct rhlist_head *tmp, *list;
-	struct mr_mfc *c;
 
-	list = rhltable_lookup(&mrt->mfc_hash, &arg, ip6mr_rht_params);
-	rhl_for_each_entry_rcu(c, tmp, list, mnode)
-		if (parent == -1 || parent == c->mfc_parent)
-			return (struct mfc6_cache *)c;
-
-	return NULL;
+	return mr_mfc_find_parent(mrt, &arg, parent);
 }
 
-/*
- *	Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
 static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
@@ -2107,7 +2067,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 		 * interface is part of the static tree.
 		 */
 		rcu_read_lock();
-		cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
 		if (cache_proxy &&
 		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
 			rcu_read_unlock();

From c8d6196803265484f7e1cdd1b00a188dc59a5988 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:36 +0200
Subject: [PATCH 0539/1678] ipmr, ip6mr: Unite mfc seq logic

With the exception of the final dump, ipmr and ip6mr have the exact same
seq logic for traversing a given mr_table. Refactor that code and make
it common.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 69 ++++++++++++++++++++++++++
 net/ipv4/ipmr.c             | 93 ++---------------------------------
 net/ipv4/ipmr_base.c        | 62 ++++++++++++++++++++++++
 net/ipv6/ip6mr.c            | 97 +++----------------------------------
 4 files changed, 143 insertions(+), 178 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 46a082e25dab..a007c5ad0fde 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -3,6 +3,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/rhashtable.h>
+#include <linux/spinlock.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
@@ -203,4 +204,72 @@ static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
 {
 	return mr_mfc_find_parent(mrt, hasharg, -1);
 }
+
+#ifdef CONFIG_PROC_FS
+struct mr_mfc_iter {
+	struct seq_net_private p;
+	struct mr_table *mrt;
+	struct list_head *cache;
+
+	/* Lock protecting the mr_table's unresolved queue */
+	spinlock_t *lock;
+};
+
+#ifdef CONFIG_IP_MROUTE_COMMON
+/* These actually return 'struct mr_mfc *', but to avoid need for explicit
+ * castings they simply return void.
+ */
+void *mr_mfc_seq_idx(struct net *net,
+		     struct mr_mfc_iter *it, loff_t pos);
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+		      loff_t *pos);
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+				     struct mr_table *mrt, spinlock_t *lock)
+{
+	struct mr_mfc_iter *it = seq->private;
+
+	it->mrt = mrt;
+	it->cache = NULL;
+	it->lock = lock;
+
+	return *pos ? mr_mfc_seq_idx(seq_file_net(seq),
+				     seq->private, *pos - 1)
+		    : SEQ_START_TOKEN;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+	struct mr_mfc_iter *it = seq->private;
+	struct mr_table *mrt = it->mrt;
+
+	if (it->cache == &mrt->mfc_unres_queue)
+		spin_unlock_bh(it->lock);
+	else if (it->cache == &mrt->mfc_cache_list)
+		rcu_read_unlock();
+}
+#else
+static inline void *mr_mfc_seq_idx(struct net *net,
+				   struct mr_mfc_iter *it, loff_t pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+				    loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos,
+				     struct mr_table *mrt, spinlock_t *lock)
+{
+	return NULL;
+}
+
+static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
+{
+}
+#endif
+#endif
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 00898c319cc5..1eb19d9b1be0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3014,41 +3014,8 @@ static const struct file_operations ipmr_vif_fops = {
 	.release = seq_release_net,
 };
 
-struct ipmr_mfc_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	struct list_head *cache;
-};
-
-static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
-					  struct ipmr_mfc_iter *it, loff_t pos)
-{
-	struct mr_table *mrt = it->mrt;
-	struct mr_mfc *mfc;
-
-	rcu_read_lock();
-	it->cache = &mrt->mfc_cache_list;
-	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-		if (pos-- == 0)
-			return (struct mfc_cache *)mfc;
-	rcu_read_unlock();
-
-	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc_unres_queue;
-	list_for_each_entry(mfc, it->cache, list)
-		if (pos-- == 0)
-			return (struct mfc_cache *)mfc;
-
-	spin_unlock_bh(&mfc_unres_lock);
-
-	it->cache = NULL;
-	return NULL;
-}
-
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -3056,57 +3023,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 	if (!mrt)
 		return ERR_PTR(-ENOENT);
 
-	it->mrt = mrt;
-	it->cache = NULL;
-	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = it->mrt;
-	struct mfc_cache *mfc = v;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-	if (mfc->_c.list.next != it->cache)
-		return (struct mfc_cache *)(list_entry(mfc->_c.list.next,
-						       struct mr_mfc, list));
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		goto end_of_list;
-
-	/* exhausted cache_array, show unresolved */
-	rcu_read_unlock();
-	it->cache = &mrt->mfc_unres_queue;
-
-	spin_lock_bh(&mfc_unres_lock);
-	if (!list_empty(it->cache))
-		return (struct mfc_cache *)(list_first_entry(it->cache,
-							     struct mr_mfc,
-							     list));
-
-end_of_list:
-	spin_unlock_bh(&mfc_unres_lock);
-	it->cache = NULL;
-
-	return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct mr_table *mrt = it->mrt;
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc_cache_list)
-		rcu_read_unlock();
+	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -3118,7 +3035,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
 	} else {
 		const struct mfc_cache *mfc = v;
-		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_mfc_iter *it = seq->private;
 		const struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%08X %08X %-3hd",
@@ -3152,15 +3069,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
-	.next  = ipmr_mfc_seq_next,
-	.stop  = ipmr_mfc_seq_stop,
+	.next  = mr_mfc_seq_next,
+	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct ipmr_mfc_iter));
+			    sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ipmr_mfc_fops = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 172f92acffef..37ad0a793035 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -103,3 +103,65 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
 	return mr_mfc_find_any_parent(mrt, vifi);
 }
 EXPORT_SYMBOL(mr_mfc_find_any);
+
+#ifdef CONFIG_PROC_FS
+void *mr_mfc_seq_idx(struct net *net,
+		     struct mr_mfc_iter *it, loff_t pos)
+{
+	struct mr_table *mrt = it->mrt;
+	struct mr_mfc *mfc;
+
+	rcu_read_lock();
+	it->cache = &mrt->mfc_cache_list;
+	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+		if (pos-- == 0)
+			return mfc;
+	rcu_read_unlock();
+
+	spin_lock_bh(it->lock);
+	it->cache = &mrt->mfc_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
+		if (pos-- == 0)
+			return mfc;
+	spin_unlock_bh(it->lock);
+
+	it->cache = NULL;
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_idx);
+
+void *mr_mfc_seq_next(struct seq_file *seq, void *v,
+		      loff_t *pos)
+{
+	struct mr_mfc_iter *it = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = it->mrt;
+	struct mr_mfc *c = v;
+
+	++*pos;
+
+	if (v == SEQ_START_TOKEN)
+		return mr_mfc_seq_idx(net, seq->private, 0);
+
+	if (c->list.next != it->cache)
+		return list_entry(c->list.next, struct mr_mfc, list);
+
+	if (it->cache == &mrt->mfc_unres_queue)
+		goto end_of_list;
+
+	/* exhausted cache_array, show unresolved */
+	rcu_read_unlock();
+	it->cache = &mrt->mfc_unres_queue;
+
+	spin_lock_bh(it->lock);
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mr_mfc, list);
+
+end_of_list:
+	spin_unlock_bh(it->lock);
+	it->cache = NULL;
+
+	return NULL;
+}
+EXPORT_SYMBOL(mr_mfc_seq_next);
+#endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ea902a952fb6..26315065e7df 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -333,40 +333,8 @@ static void ip6mr_free_table(struct mr_table *mrt)
 }
 
 #ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	struct list_head *cache;
-};
-
-
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
-					   struct ipmr_mfc_iter *it, loff_t pos)
-{
-	struct mr_table *mrt = it->mrt;
-	struct mr_mfc *mfc;
-
-	rcu_read_lock();
-	it->cache = &mrt->mfc_cache_list;
-	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-		if (pos-- == 0)
-			return (struct mfc6_cache *)mfc;
-	rcu_read_unlock();
-
-	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &mrt->mfc_unres_queue;
-	list_for_each_entry(mfc, it->cache, list)
-		if (pos-- == 0)
-			return (struct mfc6_cache *)mfc;
-	spin_unlock_bh(&mfc_unres_lock);
-
-	it->cache = NULL;
-	return NULL;
-}
-
-/*
- *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
  */
 
 struct ipmr_vif_iter {
@@ -476,7 +444,6 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -484,57 +451,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 	if (!mrt)
 		return ERR_PTR(-ENOENT);
 
-	it->mrt = mrt;
-	it->cache = NULL;
-	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct mfc6_cache *mfc = v;
-	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = it->mrt;
-
-	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		return ipmr_mfc_seq_idx(net, seq->private, 0);
-
-	if (mfc->_c.list.next != it->cache)
-		return (struct mfc6_cache *)(list_entry(mfc->_c.list.next,
-							struct mr_mfc, list));
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		goto end_of_list;
-
-	/* exhausted cache_array, show unresolved */
-	rcu_read_unlock();
-	it->cache = &mrt->mfc_unres_queue;
-
-	spin_lock_bh(&mfc_unres_lock);
-	if (!list_empty(it->cache))
-		return (struct mfc6_cache *)(list_first_entry(it->cache,
-							      struct mr_mfc,
-							      list));
-
- end_of_list:
-	spin_unlock_bh(&mfc_unres_lock);
-	it->cache = NULL;
-
-	return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
-	struct ipmr_mfc_iter *it = seq->private;
-	struct mr_table *mrt = it->mrt;
-
-	if (it->cache == &mrt->mfc_unres_queue)
-		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &mrt->mfc_cache_list)
-		rcu_read_unlock();
+	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -548,7 +465,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			 "Iif      Pkts  Bytes     Wrong  Oifs\n");
 	} else {
 		const struct mfc6_cache *mfc = v;
-		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_mfc_iter *it = seq->private;
 		struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
@@ -581,15 +498,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
-	.next  = ipmr_mfc_seq_next,
-	.stop  = ipmr_mfc_seq_stop,
+	.next  = mr_mfc_seq_next,
+	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
 
 static int ipmr_mfc_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct ipmr_mfc_iter));
+			    sizeof(struct mr_mfc_iter));
 }
 
 static const struct file_operations ip6mr_mfc_fops = {

From 3feda6b46f734704840685a62b645cbe4efb810c Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:37 +0200
Subject: [PATCH 0540/1678] ipmr, ip6mr: Unite vif seq functions

Same as previously done with the mfc seq, the logic for the vif seq is
refactored to be shared between ipmr and ip6mr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 33 ++++++++++++++++++++++++
 net/ipv4/ipmr.c             | 49 ++++--------------------------------
 net/ipv4/ipmr_base.c        | 33 ++++++++++++++++++++++++
 net/ipv6/ip6mr.c            | 50 ++++---------------------------------
 4 files changed, 76 insertions(+), 89 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index a007c5ad0fde..cfaec9bd2d3c 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -206,6 +206,12 @@ static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
 }
 
 #ifdef CONFIG_PROC_FS
+struct mr_vif_iter {
+	struct seq_net_private p;
+	struct mr_table *mrt;
+	int ct;
+};
+
 struct mr_mfc_iter {
 	struct seq_net_private p;
 	struct mr_table *mrt;
@@ -216,6 +222,16 @@ struct mr_mfc_iter {
 };
 
 #ifdef CONFIG_IP_MROUTE_COMMON
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos);
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return *pos ? mr_vif_seq_idx(seq_file_net(seq),
+				     seq->private, *pos - 1)
+		    : SEQ_START_TOKEN;
+}
+
 /* These actually return 'struct mr_mfc *', but to avoid need for explicit
  * castings they simply return void.
  */
@@ -249,6 +265,23 @@ static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v)
 		rcu_read_unlock();
 }
 #else
+static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter,
+				   loff_t pos)
+{
+	return NULL;
+}
+
+static inline void *mr_vif_seq_next(struct seq_file *seq,
+				    void *v, loff_t *pos)
+{
+	return NULL;
+}
+
+static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return NULL;
+}
+
 static inline void *mr_mfc_seq_idx(struct net *net,
 				   struct mr_mfc_iter *it, loff_t pos)
 {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1eb19d9b1be0..f5ff54297824 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2908,31 +2908,11 @@ out:
 /* The /proc interfaces to multicast routing :
  * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
-struct ipmr_vif_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	int ct;
-};
-
-static struct vif_device *ipmr_vif_seq_idx(struct net *net,
-					    struct ipmr_vif_iter *iter,
-					    loff_t pos)
-{
-	struct mr_table *mrt = iter->mrt;
-
-	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		if (pos-- == 0)
-			return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
-}
 
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -2943,26 +2923,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
-	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_vif_iter *iter = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = iter->mrt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ipmr_vif_seq_idx(net, iter, 0);
-
-	while (++iter->ct < mrt->maxvif) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
+	return mr_vif_seq_start(seq, pos);
 }
 
 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -2973,7 +2934,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
@@ -2996,7 +2957,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ipmr_vif_seq_ops = {
 	.start = ipmr_vif_seq_start,
-	.next  = ipmr_vif_seq_next,
+	.next  = mr_vif_seq_next,
 	.stop  = ipmr_vif_seq_stop,
 	.show  = ipmr_vif_seq_show,
 };
@@ -3004,7 +2965,7 @@ static const struct seq_operations ipmr_vif_seq_ops = {
 static int ipmr_vif_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-			    sizeof(struct ipmr_vif_iter));
+			    sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ipmr_vif_fops = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 37ad0a793035..e1b7b639e9b1 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -105,6 +105,39 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
 EXPORT_SYMBOL(mr_mfc_find_any);
 
 #ifdef CONFIG_PROC_FS
+void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
+{
+	struct mr_table *mrt = iter->mrt;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!VIF_EXISTS(mrt, iter->ct))
+			continue;
+		if (pos-- == 0)
+			return &mrt->vif_table[iter->ct];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_idx);
+
+void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct mr_vif_iter *iter = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = iter->mrt;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return mr_vif_seq_idx(net, iter, 0);
+
+	while (++iter->ct < mrt->maxvif) {
+		if (!VIF_EXISTS(mrt, iter->ct))
+			continue;
+		return &mrt->vif_table[iter->ct];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(mr_vif_seq_next);
+
 void *mr_mfc_seq_idx(struct net *net,
 		     struct mr_mfc_iter *it, loff_t pos)
 {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 26315065e7df..ddd9e6bba499 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -337,31 +337,10 @@ static void ip6mr_free_table(struct mr_table *mrt)
  * /proc/ip6_mr_cache /proc/ip6_mr_vif
  */
 
-struct ipmr_vif_iter {
-	struct seq_net_private p;
-	struct mr_table *mrt;
-	int ct;
-};
-
-static struct vif_device *ip6mr_vif_seq_idx(struct net *net,
-					    struct ipmr_vif_iter *iter,
-					    loff_t pos)
-{
-	struct mr_table *mrt = iter->mrt;
-
-	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		if (pos-- == 0)
-			return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
-}
-
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct mr_table *mrt;
 
@@ -372,26 +351,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
-	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
-		: SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct ipmr_vif_iter *iter = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = iter->mrt;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ip6mr_vif_seq_idx(net, iter, 0);
-
-	while (++iter->ct < mrt->maxvif) {
-		if (!VIF_EXISTS(mrt, iter->ct))
-			continue;
-		return &mrt->vif_table[iter->ct];
-	}
-	return NULL;
+	return mr_vif_seq_start(seq, pos);
 }
 
 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -402,7 +362,7 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_vif_iter *iter = seq->private;
 	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
@@ -424,7 +384,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 static const struct seq_operations ip6mr_vif_seq_ops = {
 	.start = ip6mr_vif_seq_start,
-	.next  = ip6mr_vif_seq_next,
+	.next  = mr_vif_seq_next,
 	.stop  = ip6mr_vif_seq_stop,
 	.show  = ip6mr_vif_seq_show,
 };
@@ -432,7 +392,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
 static int ip6mr_vif_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-			    sizeof(struct ipmr_vif_iter));
+			    sizeof(struct mr_vif_iter));
 }
 
 static const struct file_operations ip6mr_vif_fops = {

From 889cd83cbe411dda854429f3223ab2d31a860a4a Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:38 +0200
Subject: [PATCH 0541/1678] ip6mr: Remove MFC_NOTIFY and refactor flags

MFC_NOTIFY exists in ip6mr, probably as some legacy code
[was already removed for ipmr in commit
06bd6c0370bb ("net: ipmr: remove unused MFC_NOTIFY flag and make the flags enum").
Remove it from ip6mr as well, and move the enum into a common file;
Notice MFC_OFFLOAD is currently only used by ipmr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h      | 9 ---------
 include/linux/mroute6.h     | 3 ---
 include/linux/mroute_base.h | 9 +++++++++
 net/ipv6/ip6mr.c            | 3 ---
 4 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 63b36e6c72a0..7ed82e4f11b3 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -65,15 +65,6 @@ struct vif_entry_notifier_info {
 
 #define VIFF_STATIC 0x8000
 
-/* mfc_flags:
- * MFC_STATIC - the entry was added statically (not by a routing daemon)
- * MFC_OFFLOAD - the entry was offloaded to the hardware
- */
-enum {
-	MFC_STATIC = BIT(0),
-	MFC_OFFLOAD = BIT(1),
-};
-
 struct mfc_cache_cmp_arg {
 	__be32 mfc_mcastgrp;
 	__be32 mfc_origin;
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 6acf576fc135..1ac38e6819f5 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -81,9 +81,6 @@ struct mfc6_cache {
 	};
 };
 
-#define MFC_STATIC		1
-#define MFC_NOTIFY		2
-
 #define MFC_ASSERT_THRESH (3*HZ)		/* Maximal freq. of asserts */
 
 struct rtmsg;
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index cfaec9bd2d3c..f40202b16dae 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -45,6 +45,15 @@ struct vif_device {
 
 #define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
 
+/* mfc_flags:
+ * MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
+ */
+enum {
+	MFC_STATIC = BIT(0),
+	MFC_OFFLOAD = BIT(1),
+};
+
 /**
  * struct mr_mfc - common multicast routing entries
  * @mnode: rhashtable list
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ddd9e6bba499..c3b3f1c381e1 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2203,9 +2203,6 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		return err;
 	}
 
-	if (rtm->rtm_flags & RTM_F_NOTIFY)
-		cache->_c.mfc_flags |= MFC_NOTIFY;
-
 	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;

From 7b0db85737db3f4d76b2a412e4f19eae59b8b494 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Wed, 28 Feb 2018 23:29:39 +0200
Subject: [PATCH 0542/1678] ipmr, ip6mr: Unite dumproute flows

The various MFC entries are being held in the same kind of mr_tables
for both ipmr and ip6mr, and their traversal logic is identical.
Also, with the exception of the addresses [and other small tidbits]
the major bulk of the nla setting is identical.

Unite as much of the dumping as possible between the two.
Notice this requires creating an mr_table iterator for each, as the
for-each preprocessor macro can't be used by the common logic.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h |  29 +++++++
 net/ipv4/ipmr.c             | 161 +++++++++---------------------------
 net/ipv4/ipmr_base.c        | 123 +++++++++++++++++++++++++++
 net/ipv6/ip6mr.c            | 156 +++++++++-------------------------
 4 files changed, 230 insertions(+), 239 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index f40202b16dae..c2560cb50f1d 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -170,6 +170,16 @@ void *mr_mfc_find_parent(struct mr_table *mrt,
 void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi);
 void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
 
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+		   struct mr_mfc *c, struct rtmsg *rtm);
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct mr_table *(*iter)(struct net *net,
+					      struct mr_table *mrt),
+		     int (*fill)(struct mr_table *mrt,
+				 struct sk_buff *skb,
+				 u32 portid, u32 seq, struct mr_mfc *c,
+				 int cmd, int flags),
+		     spinlock_t *lock);
 #else
 static inline void vif_device_init(struct vif_device *v,
 				   struct net_device *dev,
@@ -207,6 +217,25 @@ static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt,
 {
 	return NULL;
 }
+
+static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+				 struct mr_mfc *c, struct rtmsg *rtm)
+{
+	return -EINVAL;
+}
+
+static inline int
+mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		 struct mr_table *(*iter)(struct net *net,
+					  struct mr_table *mrt),
+		 int (*fill)(struct mr_table *mrt,
+			     struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mr_mfc *c,
+			     int cmd, int flags),
+		 spinlock_t *lock)
+{
+	return -EINVAL;
+}
 #endif
 
 static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f5ff54297824..d752a70855d8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -105,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			  struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mr_mfc *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -117,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ipmr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+					   struct mr_table *mrt)
+{
+	struct mr_table *ret;
+
+	if (!mrt)
+		ret = list_entry_rcu(net->ipv4.mr_tables.next,
+				     struct mr_table, list);
+	else
+		ret = list_entry_rcu(mrt->list.next,
+				     struct mr_table, list);
+
+	if (&ret->list == &net->ipv4.mr_tables)
+		return NULL;
+	return ret;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -284,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default);
 #define ipmr_for_each_table(mrt, net) \
 	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
 
+static struct mr_table *ipmr_mr_table_iter(struct net *net,
+					   struct mr_table *mrt)
+{
+	if (!mrt)
+		return net->ipv4.mrt;
+	return NULL;
+}
+
 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
 {
 	return net->ipv4.mrt;
@@ -1051,8 +1074,8 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct iphdr));
 
-			if (__ipmr_fill_mroute(mrt, skb, &c->_c,
-					       nlmsg_data(nlh)) > 0) {
+			if (mr_fill_mroute(mrt, skb, &c->_c,
+					   nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) -
 						 (u8 *)nlh;
 			} else {
@@ -2256,66 +2279,6 @@ drop:
 }
 #endif
 
-static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			      struct mr_mfc *c, struct rtmsg *rtm)
-{
-	struct rta_mfc_stats mfcs;
-	struct nlattr *mp_attr;
-	struct rtnexthop *nhp;
-	unsigned long lastuse;
-	int ct;
-
-	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mfc_parent >= MAXVIFS) {
-		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-		return -ENOENT;
-	}
-
-	if (VIF_EXISTS(mrt, c->mfc_parent) &&
-	    nla_put_u32(skb, RTA_IIF,
-			mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
-		return -EMSGSIZE;
-
-	if (c->mfc_flags & MFC_OFFLOAD)
-		rtm->rtm_flags |= RTNH_F_OFFLOAD;
-
-	if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
-		return -EMSGSIZE;
-
-	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
-			struct vif_device *vif;
-
-			if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) {
-				nla_nest_cancel(skb, mp_attr);
-				return -EMSGSIZE;
-			}
-
-			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			vif = &mrt->vif_table[ct];
-			nhp->rtnh_ifindex = vif->dev->ifindex;
-			nhp->rtnh_len = sizeof(*nhp);
-		}
-	}
-
-	nla_nest_end(skb, mp_attr);
-
-	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
-	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-	mfcs.mfcs_packets = c->mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-			      RTA_PAD))
-		return -EMSGSIZE;
-
-	rtm->rtm_type = RTN_MULTICAST;
-	return 1;
-}
-
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
 		   struct rtmsg *rtm, u32 portid)
@@ -2373,7 +2336,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 	}
 
 	read_lock(&mrt_lock);
-	err = __ipmr_fill_mroute(mrt, skb, &cache->_c, rtm);
+	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	rcu_read_unlock();
 	return err;
@@ -2410,7 +2373,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
 	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
 		goto nla_put_failure;
-	err = __ipmr_fill_mroute(mrt, skb, &c->_c, rtm);
+	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2423,6 +2386,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			     u32 portid, u32 seq, struct mr_mfc *c, int cmd,
+			     int flags)
+{
+	return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
+				cmd, flags);
+}
+
 static size_t mroute_msgsize(bool unresolved, int maxvif)
 {
 	size_t len =
@@ -2596,62 +2567,8 @@ errout_free:
 
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	unsigned int t = 0, s_t;
-	unsigned int e = 0, s_e;
-	struct mr_table *mrt;
-	struct mr_mfc *mfc;
-
-	s_t = cb->args[0];
-	s_e = cb->args[1];
-
-	rcu_read_lock();
-	ipmr_for_each_table(mrt, net) {
-		if (t < s_t)
-			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-			if (e < s_e)
-				goto next_entry;
-			if (ipmr_fill_mroute(mrt, skb,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     (struct mfc_cache *)mfc,
-					     RTM_NEWROUTE, NLM_F_MULTI) < 0)
-				goto done;
-next_entry:
-			e++;
-		}
-		e = 0;
-		s_e = 0;
-
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (ipmr_fill_mroute(mrt, skb,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     (struct mfc_cache *)mfc,
-					     RTM_NEWROUTE, NLM_F_MULTI) < 0) {
-				spin_unlock_bh(&mfc_unres_lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(&mfc_unres_lock);
-		e = 0;
-		s_e = 0;
-next_table:
-		t++;
-	}
-done:
-	rcu_read_unlock();
-
-	cb->args[1] = e;
-	cb->args[0] = t;
-
-	return skb->len;
+	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
+				_ipmr_fill_mroute, &mfc_unres_lock);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index e1b7b639e9b1..8ba55bfda817 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -198,3 +198,126 @@ end_of_list:
 }
 EXPORT_SYMBOL(mr_mfc_seq_next);
 #endif
+
+int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+		   struct mr_mfc *c, struct rtmsg *rtm)
+{
+	struct rta_mfc_stats mfcs;
+	struct nlattr *mp_attr;
+	struct rtnexthop *nhp;
+	unsigned long lastuse;
+	int ct;
+
+	/* If cache is unresolved, don't try to parse IIF and OIF */
+	if (c->mfc_parent >= MAXVIFS) {
+		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
+		return -ENOENT;
+	}
+
+	if (VIF_EXISTS(mrt, c->mfc_parent) &&
+	    nla_put_u32(skb, RTA_IIF,
+			mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
+		return -EMSGSIZE;
+
+	if (c->mfc_flags & MFC_OFFLOAD)
+		rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
+	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
+	if (!mp_attr)
+		return -EMSGSIZE;
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			struct vif_device *vif;
+
+			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
+			if (!nhp) {
+				nla_nest_cancel(skb, mp_attr);
+				return -EMSGSIZE;
+			}
+
+			nhp->rtnh_flags = 0;
+			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
+			vif = &mrt->vif_table[ct];
+			nhp->rtnh_ifindex = vif->dev->ifindex;
+			nhp->rtnh_len = sizeof(*nhp);
+		}
+	}
+
+	nla_nest_end(skb, mp_attr);
+
+	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
+	mfcs.mfcs_packets = c->mfc_un.res.pkt;
+	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
+	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
+	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
+			      RTA_PAD))
+		return -EMSGSIZE;
+
+	rtm->rtm_type = RTN_MULTICAST;
+	return 1;
+}
+EXPORT_SYMBOL(mr_fill_mroute);
+
+int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
+		     struct mr_table *(*iter)(struct net *net,
+					      struct mr_table *mrt),
+		     int (*fill)(struct mr_table *mrt,
+				 struct sk_buff *skb,
+				 u32 portid, u32 seq, struct mr_mfc *c,
+				 int cmd, int flags),
+		     spinlock_t *lock)
+{
+	unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+	struct net *net = sock_net(skb->sk);
+	struct mr_table *mrt;
+	struct mr_mfc *mfc;
+
+	rcu_read_lock();
+	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
+		if (t < s_t)
+			goto next_table;
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+			if (e < s_e)
+				goto next_entry;
+			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, mfc,
+				 RTM_NEWROUTE, NLM_F_MULTI) < 0)
+				goto done;
+next_entry:
+			e++;
+		}
+		e = 0;
+		s_e = 0;
+
+		spin_lock_bh(lock);
+		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+			if (e < s_e)
+				goto next_entry2;
+			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, mfc,
+				 RTM_NEWROUTE, NLM_F_MULTI) < 0) {
+				spin_unlock_bh(lock);
+				goto done;
+			}
+next_entry2:
+			e++;
+		}
+		spin_unlock_bh(lock);
+		e = 0;
+		s_e = 0;
+next_table:
+		t++;
+	}
+done:
+	rcu_read_unlock();
+
+	cb->args[1] = e;
+	cb->args[0] = t;
+
+	return skb->len;
+}
+EXPORT_SYMBOL(mr_rtm_dumproute);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index c3b3f1c381e1..2a38f9de45d3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -87,8 +87,6 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
 			   struct sk_buff *skb, struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			       struct mfc6_cache *c, struct rtmsg *rtm);
 static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 			      int cmd);
 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
@@ -101,6 +99,23 @@ static void ipmr_expire_process(struct timer_list *t);
 #define ip6mr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+					    struct mr_table *mrt)
+{
+	struct mr_table *ret;
+
+	if (!mrt)
+		ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+				     struct mr_table, list);
+	else
+		ret = list_entry_rcu(mrt->list.next,
+				     struct mr_table, list);
+
+	if (&ret->list == &net->ipv6.mr6_tables)
+		return NULL;
+	return ret;
+}
+
 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
 	struct mr_table *mrt;
@@ -247,6 +262,14 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 #define ip6mr_for_each_table(mrt, net) \
 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+					    struct mr_table *mrt)
+{
+	if (!mrt)
+		return net->ipv6.mrt6;
+	return NULL;
+}
+
 static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 {
 	return net->ipv6.mrt6;
@@ -948,7 +971,8 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 			struct nlmsghdr *nlh = skb_pull(skb,
 							sizeof(struct ipv6hdr));
 
-			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+			if (mr_fill_mroute(mrt, skb, &c->_c,
+					   nlmsg_data(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -2081,63 +2105,6 @@ int ip6_mr_input(struct sk_buff *skb)
 	return 0;
 }
 
-
-static int __ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			       struct mfc6_cache *c, struct rtmsg *rtm)
-{
-	struct rta_mfc_stats mfcs;
-	struct nlattr *mp_attr;
-	struct rtnexthop *nhp;
-	unsigned long lastuse;
-	int ct;
-
-	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->_c.mfc_parent >= MAXMIFS) {
-		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
-		return -ENOENT;
-	}
-
-	if (VIF_EXISTS(mrt, c->_c.mfc_parent) &&
-	    nla_put_u32(skb, RTA_IIF,
-			mrt->vif_table[c->_c.mfc_parent].dev->ifindex) < 0)
-		return -EMSGSIZE;
-	mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
-	if (!mp_attr)
-		return -EMSGSIZE;
-
-	for (ct = c->_c.mfc_un.res.minvif;
-	     ct < c->_c.mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(mrt, ct) && c->_c.mfc_un.res.ttls[ct] < 255) {
-			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
-			if (!nhp) {
-				nla_nest_cancel(skb, mp_attr);
-				return -EMSGSIZE;
-			}
-
-			nhp->rtnh_flags = 0;
-			nhp->rtnh_hops = c->_c.mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
-			nhp->rtnh_len = sizeof(*nhp);
-		}
-	}
-
-	nla_nest_end(skb, mp_attr);
-
-	lastuse = READ_ONCE(c->_c.mfc_un.res.lastuse);
-	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
-	mfcs.mfcs_packets = c->_c.mfc_un.res.pkt;
-	mfcs.mfcs_bytes = c->_c.mfc_un.res.bytes;
-	mfcs.mfcs_wrong_if = c->_c.mfc_un.res.wrong_if;
-	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
-			      RTA_PAD))
-		return -EMSGSIZE;
-
-	rtm->rtm_type = RTN_MULTICAST;
-	return 1;
-}
-
 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		    u32 portid)
 {
@@ -2203,7 +2170,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
 		return err;
 	}
 
-	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
@@ -2239,7 +2206,7 @@ static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
 	    nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
 		goto nla_put_failure;
-	err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
 	/* do not break the dump if cache is unresolved */
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
@@ -2252,6 +2219,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags)
+{
+	return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+				 cmd, flags);
+}
+
 static int mr6_msgsize(bool unresolved, int maxvif)
 {
 	size_t len =
@@ -2365,59 +2340,6 @@ errout:
 
 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
-	unsigned int t = 0, s_t;
-	unsigned int e = 0, s_e;
-	struct mr_table *mrt;
-	struct mr_mfc *mfc;
-
-	s_t = cb->args[0];
-	s_e = cb->args[1];
-
-	rcu_read_lock();
-	ip6mr_for_each_table(mrt, net) {
-		if (t < s_t)
-			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-			if (e < s_e)
-				goto next_entry;
-			if (ip6mr_fill_mroute(mrt, skb,
-					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
-					      (struct mfc6_cache *)mfc,
-					      RTM_NEWROUTE, NLM_F_MULTI) < 0)
-				goto done;
-next_entry:
-			e++;
-		}
-		e = 0;
-		s_e = 0;
-
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (ip6mr_fill_mroute(mrt, skb,
-					      NETLINK_CB(cb->skb).portid,
-					      cb->nlh->nlmsg_seq,
-					     (struct mfc6_cache *)mfc,
-					      RTM_NEWROUTE, NLM_F_MULTI) < 0) {
-				spin_unlock_bh(&mfc_unres_lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(&mfc_unres_lock);
-		e = s_e = 0;
-next_table:
-		t++;
-	}
-done:
-	rcu_read_unlock();
-
-	cb->args[1] = e;
-	cb->args[0] = t;
-
-	return skb->len;
+	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+				_ip6mr_fill_mroute, &mfc_unres_lock);
 }

From 0f6271264afd975bc599d6f30f3693e9aea57036 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Thu, 1 Mar 2018 13:51:26 +0100
Subject: [PATCH 0543/1678] net/smc: cleanup smc_llc.h and smc_clc.h headers

Remove structures used internal only from headers.
And remove an extra function parameter.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  |  8 ++++----
 net/smc/smc_clc.c |  3 +++
 net/smc/smc_clc.h |  6 ------
 net/smc/smc_llc.c | 30 ++++++++++++++++++++++++++++++
 net/smc/smc_llc.h | 28 ----------------------------
 5 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 38ae22b65e77..b1961a789837 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -312,7 +312,7 @@ out:
 	return rc;
 }
 
-static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct smc_link_group *lgr = smc->conn.lgr;
 	struct smc_link *link;
@@ -346,7 +346,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
-				       gid, SMC_LLC_RESP);
+				       &link->smcibdev->gid[link->ibport - 1],
+				       SMC_LLC_RESP);
 	if (rc < 0)
 		return SMC_CLC_DECL_TCL;
 
@@ -498,8 +499,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		/* QP confirmation over RoCE fabric */
-		reason_code = smc_clnt_conf_first_link(
-			smc, &smcibdev->gid[ibport - 1]);
+		reason_code = smc_clnt_conf_first_link(smc);
 		if (reason_code < 0) {
 			rc = reason_code;
 			goto out_err_unlock;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 8ac51583a063..dff318a2d5bf 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -22,6 +22,9 @@
 #include "smc_clc.h"
 #include "smc_ib.h"
 
+/* eye catcher "SMCR" EBCDIC for CLC messages */
+static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
  */
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index c145a0f36a68..aab3aae2a2ce 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -22,9 +22,6 @@
 #define SMC_CLC_CONFIRM		0x03
 #define SMC_CLC_DECLINE		0x04
 
-/* eye catcher "SMCR" EBCDIC for CLC messages */
-static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
-
 #define SMC_CLC_V1		0x1		/* SMC version                */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
@@ -124,9 +121,6 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
 	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-struct smc_sock;
-struct smc_ib_device;
-
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 92fe4cc8c82c..e4502bbff33d 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -21,6 +21,36 @@
 #include "smc_clc.h"
 #include "smc_llc.h"
 
+#define SMC_LLC_DATA_LEN		40
+
+struct smc_llc_hdr {
+	struct smc_wr_rx_hdr common;
+	u8 length;	/* 44 */
+	u8 reserved;
+	u8 flags;
+};
+
+struct smc_llc_msg_confirm_link {	/* type 0x01 */
+	struct smc_llc_hdr hd;
+	u8 sender_mac[ETH_ALEN];
+	u8 sender_gid[SMC_GID_SIZE];
+	u8 sender_qp_num[3];
+	u8 link_num;
+	u8 link_uid[SMC_LGR_ID_SIZE];
+	u8 max_links;
+	u8 reserved[9];
+};
+
+union smc_llc_msg {
+	struct smc_llc_msg_confirm_link confirm_link;
+	struct {
+		struct smc_llc_hdr hdr;
+		u8 data[SMC_LLC_DATA_LEN];
+	} raw;
+};
+
+#define SMC_LLC_FLAG_RESP		0x80
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 51b27ce90dbd..a7888607ab53 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -28,34 +28,6 @@ enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
 };
 
-#define SMC_LLC_DATA_LEN		40
-
-struct smc_llc_hdr {
-	struct smc_wr_rx_hdr common;
-	u8 length;	/* 44 */
-	u8 reserved;
-	u8 flags;
-};
-
-struct smc_llc_msg_confirm_link {	/* type 0x01 */
-	struct smc_llc_hdr hd;
-	u8 sender_mac[ETH_ALEN];
-	u8 sender_gid[SMC_GID_SIZE];
-	u8 sender_qp_num[3];
-	u8 link_num;
-	u8 link_uid[SMC_LGR_ID_SIZE];
-	u8 max_links;
-	u8 reserved[9];
-};
-
-union smc_llc_msg {
-	struct smc_llc_msg_confirm_link confirm_link;
-	struct {
-		struct smc_llc_hdr hdr;
-		u8 data[SMC_LLC_DATA_LEN];
-	} raw;
-};
-
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
 			      enum smc_llc_reqresp reqresp);

From 696cd3016975d31e3499c49a7a747d7615a16b3b Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:27 +0100
Subject: [PATCH 0544/1678] net/smc: move netinfo function to file smc_clc.c

The function smc_netinfo_by_tcpsk() belongs to CLC handling.
Move it to smc_clc.c and rename to smc_clc_netinfo_by_tcpsk.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  | 42 +-----------------------------------------
 net/smc/smc.h     |  2 --
 net/smc/smc_clc.c | 44 ++++++++++++++++++++++++++++++++++++++++++--
 net/smc/smc_clc.h |  2 ++
 4 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b1961a789837..b90cbfdb9916 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -24,7 +24,6 @@
 
 #include <linux/module.h>
 #include <linux/socket.h>
-#include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
@@ -273,45 +272,6 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_netinfo_by_tcpsk(struct socket *clcsock,
-			 __be32 *subnet, u8 *prefix_len)
-{
-	struct dst_entry *dst = sk_dst_get(clcsock->sk);
-	struct in_device *in_dev;
-	struct sockaddr_in addr;
-	int rc = -ENOENT;
-
-	if (!dst) {
-		rc = -ENOTCONN;
-		goto out;
-	}
-	if (!dst->dev) {
-		rc = -ENODEV;
-		goto out_rel;
-	}
-
-	/* get address to which the internal TCP socket is bound */
-	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
-	/* analyze IPv4 specific data of net_device belonging to TCP socket */
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(dst->dev);
-	for_ifa(in_dev) {
-		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-			continue;
-		*prefix_len = inet_mask_len(ifa->ifa_mask);
-		*subnet = ifa->ifa_address & ifa->ifa_mask;
-		rc = 0;
-		break;
-	} endfor_ifa(in_dev);
-	rcu_read_unlock();
-
-out_rel:
-	dst_release(dst);
-out:
-	return rc;
-}
-
 static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct smc_link_group *lgr = smc->conn.lgr;
@@ -808,7 +768,7 @@ static void smc_listen_work(struct work_struct *work)
 	}
 
 	/* determine subnet and mask from internal TCP socket */
-	rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
+	rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 		goto decline_rdma;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9518986c97b1..9895c190d146 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -263,8 +263,6 @@ static inline bool using_ipsec(struct smc_sock *smc)
 
 struct smc_clc_msg_local;
 
-int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-			 u8 *prefix_len);
 void smc_conn_free(struct smc_connection *conn);
 int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 		    struct smc_ib_device *smcibdev, u8 ibport,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index dff318a2d5bf..874c5a75d6dd 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/in.h>
+#include <linux/inetdevice.h>
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
@@ -73,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 	return true;
 }
 
+/* determine subnet and mask of internal TCP socket */
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
+			     __be32 *subnet, u8 *prefix_len)
+{
+	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct in_device *in_dev;
+	struct sockaddr_in addr;
+	int rc = -ENOENT;
+
+	if (!dst) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+	if (!dst->dev) {
+		rc = -ENODEV;
+		goto out_rel;
+	}
+
+	/* get address to which the internal TCP socket is bound */
+	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
+	/* analyze IPv4 specific data of net_device belonging to TCP socket */
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dst->dev);
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
+			continue;
+		*prefix_len = inet_mask_len(ifa->ifa_mask);
+		*subnet = ifa->ifa_address & ifa->ifa_mask;
+		rc = 0;
+		break;
+	} endfor_ifa(in_dev);
+	rcu_read_unlock();
+
+out_rel:
+	dst_release(dst);
+out:
+	return rc;
+}
+
 /* Wait for data on the tcp-socket, analyze received data
  * Returns:
  * 0 if success and it was not a decline that we received.
@@ -214,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 
 	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
 	/* determine subnet and mask from internal TCP socket */
-	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-				  &pclc_prfx.prefix_len);
+	rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+				      &pclc_prfx.prefix_len);
 	if (rc)
 		return SMC_CLC_DECL_CNFERR; /* configuration error */
 	pclc_prfx.ipv6_prefixes_cnt = 0;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index aab3aae2a2ce..a20fc75efb24 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -121,6 +121,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
 	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
+int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
+			     u8 *prefix_len);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);

From be6d467b997f9e32aa9b27add06e7b0c8627a566 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:28 +0100
Subject: [PATCH 0545/1678] net/smc: remove unused fields from smc structures

The daddr field holds the destination IPv4 address. The field was set but
never used and can be removed. The addr field was a left-over from an
earlier version of non-blocking connects and can be removed.
The result of the call to kernel_getpeername is not used, the call can be
removed. Non-blocking connects are working, so remove restriction comment.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 15 ++++-----------
 net/smc/smc.h      |  3 +--
 net/smc/smc_core.c |  7 +++----
 net/smc/smc_core.h |  1 -
 4 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b90cbfdb9916..cda3d5314e3f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,7 +7,6 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - non-blocking connect postponed
  *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
@@ -345,7 +344,6 @@ static void smc_lgr_forget(struct smc_link_group *lgr)
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
-	struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr;
 	struct smc_clc_msg_accept_confirm aclc;
 	int local_contact = SMC_FIRST_CONTACT;
 	struct smc_ib_device *smcibdev;
@@ -399,8 +397,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	srv_first_contact = aclc.hdr.flag;
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev,
-					ibport, &aclc.lcl, srv_first_contact);
+	local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl,
+					srv_first_contact);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -518,7 +516,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 		goto out_err;
 	if (addr->sa_family != AF_INET)
 		goto out_err;
-	smc->addr = addr;	/* needed for nonblocking connect */
 
 	lock_sock(sk);
 	switch (sk->sk_state) {
@@ -726,7 +723,6 @@ static void smc_listen_work(struct work_struct *work)
 	struct sock *newsmcsk = &new_smc->sk;
 	struct smc_clc_msg_proposal *pclc;
 	struct smc_ib_device *smcibdev;
-	struct sockaddr_in peeraddr;
 	u8 buf[SMC_CLC_MAX_LEN];
 	struct smc_link *link;
 	int reason_code = 0;
@@ -782,13 +778,10 @@ static void smc_listen_work(struct work_struct *work)
 		goto decline_rdma;
 	}
 
-	/* get address of the peer connected to the internal TCP socket */
-	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr);
-
 	/* allocate connection / link group */
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
-					smcibdev, ibport, &pclc->lcl, 0);
+	local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl,
+					0);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 9895c190d146..268cdf11533c 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -172,7 +172,6 @@ struct smc_sock {				/* smc sock container */
 	struct sock		sk;
 	struct socket		*clcsock;	/* internal tcp socket */
 	struct smc_connection	conn;		/* smc connection */
-	struct sockaddr		*addr;		/* inet connect address */
 	struct smc_sock		*listen_smc;	/* listen parent */
 	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
 	struct work_struct	smc_listen_work;/* prepare new accept socket */
@@ -264,7 +263,7 @@ static inline bool using_ipsec(struct smc_sock *smc)
 struct smc_clc_msg_local;
 
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
 		    struct smc_ib_device *smcibdev, u8 ibport,
 		    struct smc_clc_msg_local *lcl, int srv_first_contact);
 struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2424c7100aaf..bc11d06e38ae 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -144,7 +144,7 @@ free:
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
+static int smc_lgr_create(struct smc_sock *smc,
 			  struct smc_ib_device *smcibdev, u8 ibport,
 			  char *peer_systemid, unsigned short vlan_id)
 {
@@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 	}
 	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 	lgr->sync_err = false;
-	lgr->daddr = peer_in_addr;
 	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
 	lgr->vlan_id = vlan_id;
 	rwlock_init(&lgr->sndbufs_lock);
@@ -400,7 +399,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 }
 
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
+int smc_conn_create(struct smc_sock *smc,
 		    struct smc_ib_device *smcibdev, u8 ibport,
 		    struct smc_clc_msg_local *lcl, int srv_first_contact)
 {
@@ -457,7 +456,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
 
 create:
 	if (local_contact == SMC_FIRST_CONTACT) {
-		rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport,
+		rc = smc_lgr_create(smc, smcibdev, ibport,
 				    lcl->id_for_peer, vlan_id);
 		if (rc)
 			goto out;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fe691bf9af91..7852c3fabf12 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -124,7 +124,6 @@ struct smc_rtoken {				/* address/key of remote RMB */
 struct smc_link_group {
 	struct list_head	list;
 	enum smc_lgr_role	role;		/* client or server */
-	__be32			daddr;		/* destination ip address */
 	struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];	/* smc link */
 	char			peer_systemid[SMC_SYSTEMID_LEN];
 						/* unique system_id of peer */

From 313164da55da0fb24191e729f989f4b2c2793ead Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:29 +0100
Subject: [PATCH 0546/1678] net/smc: respond to test link messages

Add TEST LINK message responses, which also serves as preparation for
support of sockopt TCP_KEEPALIVE.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc_llc.h |  3 +++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index e4502bbff33d..9e0a556e40c8 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -41,8 +41,15 @@ struct smc_llc_msg_confirm_link {	/* type 0x01 */
 	u8 reserved[9];
 };
 
+struct smc_llc_msg_test_link {		/* type 0x07 */
+	struct smc_llc_hdr hd;
+	u8 user_data[16];
+	u8 reserved[24];
+};
+
 union smc_llc_msg {
 	struct smc_llc_msg_confirm_link confirm_link;
+	struct smc_llc_msg_test_link test_link;
 	struct {
 		struct smc_llc_hdr hdr;
 		u8 data[SMC_LLC_DATA_LEN];
@@ -130,6 +137,30 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	return rc;
 }
 
+/* send LLC test link request or response */
+int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
+			   enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_test_link *testllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	testllc = (struct smc_llc_msg_test_link *)wr_buf;
+	memset(testllc, 0, sizeof(*testllc));
+	testllc->hd.common.type = SMC_LLC_TEST_LINK;
+	testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
+	if (reqresp == SMC_LLC_RESP)
+		testllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
 /********************************* receive ***********************************/
 
 static void smc_llc_rx_confirm_link(struct smc_link *link,
@@ -149,6 +180,16 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 	}
 }
 
+static void smc_llc_rx_test_link(struct smc_link *link,
+				 struct smc_llc_msg_test_link *llc)
+{
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP);
+	}
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
 	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -158,8 +199,15 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 		return; /* short message */
 	if (llc->raw.hdr.length != sizeof(*llc))
 		return; /* invalid message */
-	if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+
+	switch (llc->raw.hdr.common.type) {
+	case SMC_LLC_TEST_LINK:
+		smc_llc_rx_test_link(link, &llc->test_link);
+		break;
+	case SMC_LLC_CONFIRM_LINK:
 		smc_llc_rx_confirm_link(link, &llc->confirm_link);
+		break;
+	}
 }
 
 /***************************** init, exit, misc ******************************/
@@ -169,6 +217,10 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
 		.handler	= smc_llc_rx_handler,
 		.type		= SMC_LLC_CONFIRM_LINK
 	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_TEST_LINK
+	},
 	{
 		.handler	= NULL,
 	}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index a7888607ab53..6c8a062db4f3 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -26,11 +26,14 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
+	SMC_LLC_TEST_LINK		= 0x07,
 };
 
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
 			      enum smc_llc_reqresp reqresp);
+int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
+			   enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */

From 4ed75de58e9191c011e318dc98b4b157dc633444 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:30 +0100
Subject: [PATCH 0547/1678] net/smc: process confirm/delete rkey messages

Process and respond to CONFIRM RKEY and DELETE RKEY messages.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  48 +++++++++++----
 net/smc/smc_core.h |   2 +
 net/smc/smc_llc.c  | 143 +++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_llc.h  |   3 +
 4 files changed, 186 insertions(+), 10 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bc11d06e38ae..31bb2d1dbd77 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -697,27 +697,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 	return -ENOSPC;
 }
 
-/* save rkey and dma_addr received from peer during clc handshake */
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
-			    struct smc_clc_msg_accept_confirm *clc)
+/* add a new rtoken from peer */
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 {
-	u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr);
-	struct smc_link_group *lgr = conn->lgr;
-	u32 rkey = ntohl(clc->rmb_rkey);
+	u64 dma_addr = be64_to_cpu(nw_vaddr);
+	u32 rkey = ntohl(nw_rkey);
 	int i;
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
 		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
-			conn->rtoken_idx = i;
+			/* already in list */
+			return i;
+		}
+	}
+	i = smc_rmb_reserve_rtoken_idx(lgr);
+	if (i < 0)
+		return i;
+	lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
+	lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+	return i;
+}
+
+/* delete an rtoken */
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+{
+	u32 rkey = ntohl(nw_rkey);
+	int i;
+
+	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+		if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+		    test_bit(i, lgr->rtokens_used_mask)) {
+			lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
+			lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+
+			clear_bit(i, lgr->rtokens_used_mask);
 			return 0;
 		}
 	}
-	conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr);
+	return -ENOENT;
+}
+
+/* save rkey and dma_addr received from peer during clc handshake */
+int smc_rmb_rtoken_handling(struct smc_connection *conn,
+			    struct smc_clc_msg_accept_confirm *clc)
+{
+	conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+					  clc->rmb_rkey);
 	if (conn->rtoken_idx < 0)
 		return conn->rtoken_idx;
-	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey;
-	lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr;
 	return 0;
 }
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 7852c3fabf12..7be693b940a2 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -189,6 +189,8 @@ void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
+int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 9e0a556e40c8..3e47b94608b5 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -47,8 +47,50 @@ struct smc_llc_msg_test_link {		/* type 0x07 */
 	u8 reserved[24];
 };
 
+struct smc_rmb_rtoken {
+	union {
+		u8 num_rkeys;	/* first rtoken byte of CONFIRM LINK msg */
+				/* is actually the num of rtokens, first */
+				/* rtoken is always for the current link */
+		u8 link_id;	/* link id of the rtoken */
+	};
+	__be32 rmb_key;
+	__be64 rmb_vaddr;
+} __packed;			/* format defined in RFC7609 */
+
+#define SMC_LLC_RKEYS_PER_MSG	3
+
+struct smc_llc_msg_confirm_rkey {	/* type 0x06 */
+	struct smc_llc_hdr hd;
+	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+	u8 reserved;
+};
+
+struct smc_llc_msg_confirm_rkey_cont {	/* type 0x08 */
+	struct smc_llc_hdr hd;
+	u8 num_rkeys;
+	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
+};
+
+#define SMC_LLC_DEL_RKEY_MAX	8
+#define SMC_LLC_FLAG_RKEY_NEG	0x20
+
+struct smc_llc_msg_delete_rkey {	/* type 0x09 */
+	struct smc_llc_hdr hd;
+	u8 num_rkeys;
+	u8 err_mask;
+	u8 reserved[2];
+	__be32 rkey[8];
+	u8 reserved2[4];
+};
+
 union smc_llc_msg {
 	struct smc_llc_msg_confirm_link confirm_link;
+
+	struct smc_llc_msg_confirm_rkey confirm_rkey;
+	struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
+	struct smc_llc_msg_delete_rkey delete_rkey;
+
 	struct smc_llc_msg_test_link test_link;
 	struct {
 		struct smc_llc_hdr hdr;
@@ -161,6 +203,22 @@ int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16],
 	return rc;
 }
 
+/* send a prepared message */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
+{
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	memcpy(wr_buf, llcbuf, llclen);
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
 /********************************* receive ***********************************/
 
 static void smc_llc_rx_confirm_link(struct smc_link *link,
@@ -190,6 +248,70 @@ static void smc_llc_rx_test_link(struct smc_link *link,
 	}
 }
 
+static void smc_llc_rx_confirm_rkey(struct smc_link *link,
+				    struct smc_llc_msg_confirm_rkey *llc)
+{
+	struct smc_link_group *lgr;
+	int rc;
+
+	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		rc = smc_rtoken_add(lgr,
+				    llc->rtoken[0].rmb_vaddr,
+				    llc->rtoken[0].rmb_key);
+
+		/* ignore rtokens for other links, we have only one link */
+
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		if (rc < 0)
+			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+				      struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		/* ignore rtokens for other links, we have only one link */
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
+static void smc_llc_rx_delete_rkey(struct smc_link *link,
+				   struct smc_llc_msg_delete_rkey *llc)
+{
+	struct smc_link_group *lgr;
+	u8 err_mask = 0;
+	int i, max;
+
+	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		/* unused as long as we don't send this type of msg */
+	} else {
+		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+		for (i = 0; i < max; i++) {
+			if (smc_rtoken_delete(lgr, llc->rkey[i]))
+				err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+		}
+
+		if (err_mask) {
+			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+			llc->err_mask = err_mask;
+		}
+
+		llc->hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, (void *)llc, sizeof(*llc));
+	}
+}
+
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
 	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
@@ -207,6 +329,15 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 	case SMC_LLC_CONFIRM_LINK:
 		smc_llc_rx_confirm_link(link, &llc->confirm_link);
 		break;
+	case SMC_LLC_CONFIRM_RKEY:
+		smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
+		break;
+	case SMC_LLC_CONFIRM_RKEY_CONT:
+		smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+		break;
+	case SMC_LLC_DELETE_RKEY:
+		smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+		break;
 	}
 }
 
@@ -221,6 +352,18 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
 		.handler	= smc_llc_rx_handler,
 		.type		= SMC_LLC_TEST_LINK
 	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_CONFIRM_RKEY
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_CONFIRM_RKEY_CONT
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_DELETE_RKEY
+	},
 	{
 		.handler	= NULL,
 	}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 6c8a062db4f3..5573f0d0578e 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -26,7 +26,10 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
+	SMC_LLC_CONFIRM_RKEY		= 0x06,
 	SMC_LLC_TEST_LINK		= 0x07,
+	SMC_LLC_CONFIRM_RKEY_CONT	= 0x08,
+	SMC_LLC_DELETE_RKEY		= 0x09,
 };
 
 /* transmit */

From 75d320d611d8569ebe4e42718de035fcc79f8069 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:31 +0100
Subject: [PATCH 0548/1678] net/smc: do not allow eyecatchers in rmbe

SMC does not support eyecatchers in RMB elements,
decline peers requesting this support.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 11 +++++++++--
 net/smc/smc_clc.h  |  1 +
 net/smc/smc_core.h |  2 ++
 net/smc/smc_llc.c  | 16 +++++++++++++++-
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index cda3d5314e3f..0d491f505608 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -291,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 		return rc;
 	}
 
+	if (link->llc_confirm_rc)
+		return SMC_CLC_DECL_RMBE_EC;
+
 	rc = smc_ib_modify_qp_rts(link);
 	if (rc)
 		return SMC_CLC_DECL_INTERR;
@@ -310,7 +313,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 	if (rc < 0)
 		return SMC_CLC_DECL_TCL;
 
-	return rc;
+	return 0;
 }
 
 static void smc_conn_save_peer_info(struct smc_sock *smc,
@@ -705,9 +708,13 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE);
+		return rc;
 	}
 
-	return rc;
+	if (link->llc_confirm_resp_rc)
+		return SMC_CLC_DECL_RMBE_EC;
+
+	return 0;
 }
 
 /* setup for RDMA connection of server */
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index a20fc75efb24..20e048beac30 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -33,6 +33,7 @@
 #define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
 #define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */
 #define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */
+#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */
 
 struct smc_clc_msg_hdr {	/* header1 of clc messages */
 	u8 eyecatcher[4];	/* eye catcher */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 7be693b940a2..2b65b3d7f1f5 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -89,6 +89,8 @@ struct smc_link {
 	u8			link_id;	/* unique # within link group */
 	struct completion	llc_confirm;	/* wait for rx of conf link */
 	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
+	int			llc_confirm_rc; /* rc from confirm link msg */
+	int			llc_confirm_resp_rc; /* rc from conf_resp msg */
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 3e47b94608b5..838a160a3bd9 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -30,6 +30,8 @@ struct smc_llc_hdr {
 	u8 flags;
 };
 
+#define SMC_LLC_FLAG_NO_RMBE_EYEC	0x03
+
 struct smc_llc_msg_confirm_link {	/* type 0x01 */
 	struct smc_llc_hdr hd;
 	u8 sender_mac[ETH_ALEN];
@@ -166,6 +168,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	memset(confllc, 0, sizeof(*confllc));
 	confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
 	confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
 	if (reqresp == SMC_LLC_RESP)
 		confllc->hd.flags |= SMC_LLC_FLAG_RESP;
 	memcpy(confllc->sender_mac, mac, ETH_ALEN);
@@ -225,13 +228,24 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 				    struct smc_llc_msg_confirm_link *llc)
 {
 	struct smc_link_group *lgr;
+	int conf_rc;
 
 	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+
+	/* RMBE eyecatchers are not supported */
+	if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
+		conf_rc = 0;
+	else
+		conf_rc = ENOTSUPP;
+
 	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (lgr->role == SMC_SERV)
+		if (lgr->role == SMC_SERV) {
+			link->llc_confirm_resp_rc = conf_rc;
 			complete(&link->llc_confirm_resp);
+		}
 	} else {
 		if (lgr->role == SMC_CLNT) {
+			link->llc_confirm_rc = conf_rc;
 			link->link_id = llc->link_num;
 			complete(&link->llc_confirm);
 		}

From 52bedf37bafe1d3bc36d1513ad059d9fd28b3c3f Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:32 +0100
Subject: [PATCH 0549/1678] net/smc: process add/delete link messages

Add initial support for the LLC messages ADD LINK and DELETE LINK.
Introduce a link state field. Extend the initial LLC handshake with
ADD LINK processing.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  42 ++++++++++++
 net/smc/smc_core.c |   3 +
 net/smc/smc_core.h |  10 +++
 net/smc/smc_llc.c  | 168 +++++++++++++++++++++++++++++++++++++++++++--
 net/smc/smc_llc.h  |   7 ++
 5 files changed, 223 insertions(+), 7 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0d491f505608..5267ed19b67d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -313,6 +313,27 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 	if (rc < 0)
 		return SMC_CLC_DECL_TCL;
 
+	/* receive ADD LINK request from server over RoCE fabric */
+	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
+							 SMC_LLC_WAIT_TIME);
+	if (rest <= 0) {
+		struct smc_clc_msg_decline dclc;
+
+		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+				      SMC_CLC_DECLINE);
+		return rc;
+	}
+
+	/* send add link reject message, only one link supported for now */
+	rc = smc_llc_send_add_link(link,
+				   link->smcibdev->mac[link->ibport - 1],
+				   &link->smcibdev->gid[link->ibport - 1],
+				   SMC_LLC_RESP);
+	if (rc < 0)
+		return SMC_CLC_DECL_TCL;
+
+	link->state = SMC_LNK_ACTIVE;
+
 	return 0;
 }
 
@@ -714,6 +735,27 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	if (link->llc_confirm_resp_rc)
 		return SMC_CLC_DECL_RMBE_EC;
 
+	/* send ADD LINK request to client over the RoCE fabric */
+	rc = smc_llc_send_add_link(link,
+				   link->smcibdev->mac[link->ibport - 1],
+				   &link->smcibdev->gid[link->ibport - 1],
+				   SMC_LLC_REQ);
+	if (rc < 0)
+		return SMC_CLC_DECL_TCL;
+
+	/* receive ADD LINK response from client over the RoCE fabric */
+	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
+							 SMC_LLC_WAIT_TIME);
+	if (rest <= 0) {
+		struct smc_clc_msg_decline dclc;
+
+		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+				      SMC_CLC_DECLINE);
+		return rc;
+	}
+
+	link->state = SMC_LNK_ACTIVE;
+
 	return 0;
 }
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 31bb2d1dbd77..eb343e15a723 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -176,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc,
 
 	lnk = &lgr->lnk[SMC_SINGLE_LINK];
 	/* initialize link */
+	lnk->state = SMC_LNK_ACTIVATING;
 	lnk->smcibdev = smcibdev;
 	lnk->ibport = ibport;
 	lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
@@ -197,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc,
 		goto destroy_qp;
 	init_completion(&lnk->llc_confirm);
 	init_completion(&lnk->llc_confirm_resp);
+	init_completion(&lnk->llc_add);
+	init_completion(&lnk->llc_add_resp);
 
 	smc->conn.lgr = lgr;
 	rwlock_init(&lgr->conns_lock);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 2b65b3d7f1f5..cead2093c4b4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,12 @@ enum smc_lgr_role {		/* possible roles of a link group */
 	SMC_SERV	/* server */
 };
 
+enum smc_link_state {			/* possible states of a link */
+	SMC_LNK_INACTIVE,	/* link is inactive */
+	SMC_LNK_ACTIVATING,	/* link is being activated */
+	SMC_LNK_ACTIVE		/* link is active */
+};
+
 #define SMC_WR_BUF_SIZE		48	/* size of work request buffer */
 
 struct smc_wr_buf {
@@ -87,10 +93,14 @@ struct smc_link {
 	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */
 	u8			peer_gid[sizeof(union ib_gid)];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
+
+	enum smc_link_state	state;		/* state of link */
 	struct completion	llc_confirm;	/* wait for rx of conf link */
 	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
 	int			llc_confirm_rc; /* rc from confirm link msg */
 	int			llc_confirm_resp_rc; /* rc from conf_resp msg */
+	struct completion	llc_add;	/* wait for rx of add link */
+	struct completion	llc_add_resp;	/* wait for rx of add link rsp*/
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 838a160a3bd9..45b3e0211c39 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -4,9 +4,6 @@
  *
  *  Link Layer Control (LLC)
  *
- *  For now, we only support the necessary "confirm link" functionality
- *  which happens for the first RoCE link after successful CLC handshake.
- *
  *  Copyright IBM Corp. 2016
  *
  *  Author(s):  Klaus Wacker <Klaus.Wacker@de.ibm.com>
@@ -26,7 +23,13 @@
 struct smc_llc_hdr {
 	struct smc_wr_rx_hdr common;
 	u8 length;	/* 44 */
-	u8 reserved;
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u8 reserved:4,
+	   add_link_rej_rsn:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	u8 add_link_rej_rsn:4,
+	   reserved:4;
+#endif
 	u8 flags;
 };
 
@@ -43,6 +46,33 @@ struct smc_llc_msg_confirm_link {	/* type 0x01 */
 	u8 reserved[9];
 };
 
+#define SMC_LLC_FLAG_ADD_LNK_REJ	0x40
+#define SMC_LLC_REJ_RSN_NO_ALT_PATH	1
+
+#define SMC_LLC_ADD_LNK_MAX_LINKS	2
+
+struct smc_llc_msg_add_link {		/* type 0x02 */
+	struct smc_llc_hdr hd;
+	u8 sender_mac[ETH_ALEN];
+	u8 reserved2[2];
+	u8 sender_gid[SMC_GID_SIZE];
+	u8 sender_qp_num[3];
+	u8 link_num;
+	u8 flags2;	/* QP mtu */
+	u8 initial_psn[3];
+	u8 reserved[8];
+};
+
+#define SMC_LLC_FLAG_DEL_LINK_ALL	0x40
+#define SMC_LLC_FLAG_DEL_LINK_ORDERLY	0x20
+
+struct smc_llc_msg_del_link {		/* type 0x04 */
+	struct smc_llc_hdr hd;
+	u8 link_num;
+	__be32 reason;
+	u8 reserved[35];
+} __packed;			/* format defined in RFC7609 */
+
 struct smc_llc_msg_test_link {		/* type 0x07 */
 	struct smc_llc_hdr hd;
 	u8 user_data[16];
@@ -88,6 +118,8 @@ struct smc_llc_msg_delete_rkey {	/* type 0x09 */
 
 union smc_llc_msg {
 	struct smc_llc_msg_confirm_link confirm_link;
+	struct smc_llc_msg_add_link add_link;
+	struct smc_llc_msg_del_link delete_link;
 
 	struct smc_llc_msg_confirm_rkey confirm_rkey;
 	struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
@@ -176,7 +208,64 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
 	/* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
 	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-	confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send ADD LINK request or response */
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
+			  union ib_gid *gid,
+			  enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_add_link *addllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	addllc = (struct smc_llc_msg_add_link *)wr_buf;
+	memset(addllc, 0, sizeof(*addllc));
+	addllc->hd.common.type = SMC_LLC_ADD_LINK;
+	addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP) {
+		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+		/* always reject more links for now */
+		addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+		addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+	}
+	memcpy(addllc->sender_mac, mac, ETH_ALEN);
+	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+	/* send llc message */
+	rc = smc_wr_tx_send(link, pend);
+	return rc;
+}
+
+/* send DELETE LINK request or response */
+int smc_llc_send_delete_link(struct smc_link *link,
+			     enum smc_llc_reqresp reqresp)
+{
+	struct smc_llc_msg_del_link *delllc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	delllc = (struct smc_llc_msg_del_link *)wr_buf;
+	memset(delllc, 0, sizeof(*delllc));
+	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+	delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP)
+		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	/* DEL_LINK_ALL because only 1 link supported */
+	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	delllc->link_num = link->link_id;
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -239,12 +328,14 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 		conf_rc = ENOTSUPP;
 
 	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (lgr->role == SMC_SERV) {
+		if (lgr->role == SMC_SERV &&
+		    link->state == SMC_LNK_ACTIVATING) {
 			link->llc_confirm_resp_rc = conf_rc;
 			complete(&link->llc_confirm_resp);
 		}
 	} else {
-		if (lgr->role == SMC_CLNT) {
+		if (lgr->role == SMC_CLNT &&
+		    link->state == SMC_LNK_ACTIVATING) {
 			link->llc_confirm_rc = conf_rc;
 			link->link_id = llc->link_num;
 			complete(&link->llc_confirm);
@@ -252,6 +343,55 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 	}
 }
 
+static void smc_llc_rx_add_link(struct smc_link *link,
+				struct smc_llc_msg_add_link *llc)
+{
+	struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+						  lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		if (link->state == SMC_LNK_ACTIVATING)
+			complete(&link->llc_add_resp);
+	} else {
+		if (link->state == SMC_LNK_ACTIVATING) {
+			complete(&link->llc_add);
+			return;
+		}
+
+		if (lgr->role == SMC_SERV) {
+			smc_llc_send_add_link(link,
+					link->smcibdev->mac[link->ibport - 1],
+					&link->smcibdev->gid[link->ibport - 1],
+					SMC_LLC_REQ);
+
+		} else {
+			smc_llc_send_add_link(link,
+					link->smcibdev->mac[link->ibport - 1],
+					&link->smcibdev->gid[link->ibport - 1],
+					SMC_LLC_RESP);
+		}
+	}
+}
+
+static void smc_llc_rx_delete_link(struct smc_link *link,
+				   struct smc_llc_msg_del_link *llc)
+{
+	struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+						  lnk[SMC_SINGLE_LINK]);
+
+	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+		if (lgr->role == SMC_SERV)
+			smc_lgr_terminate(lgr);
+	} else {
+		if (lgr->role == SMC_SERV) {
+			smc_llc_send_delete_link(link, SMC_LLC_REQ);
+		} else {
+			smc_llc_send_delete_link(link, SMC_LLC_RESP);
+			smc_lgr_terminate(lgr);
+		}
+	}
+}
+
 static void smc_llc_rx_test_link(struct smc_link *link,
 				 struct smc_llc_msg_test_link *llc)
 {
@@ -343,6 +483,12 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 	case SMC_LLC_CONFIRM_LINK:
 		smc_llc_rx_confirm_link(link, &llc->confirm_link);
 		break;
+	case SMC_LLC_ADD_LINK:
+		smc_llc_rx_add_link(link, &llc->add_link);
+		break;
+	case SMC_LLC_DELETE_LINK:
+		smc_llc_rx_delete_link(link, &llc->delete_link);
+		break;
 	case SMC_LLC_CONFIRM_RKEY:
 		smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
 		break;
@@ -366,6 +512,14 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
 		.handler	= smc_llc_rx_handler,
 		.type		= SMC_LLC_TEST_LINK
 	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_ADD_LINK
+	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_DELETE_LINK
+	},
 	{
 		.handler	= smc_llc_rx_handler,
 		.type		= SMC_LLC_CONFIRM_RKEY
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 5573f0d0578e..e4a7d5e234d5 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -18,6 +18,7 @@
 #define SMC_LLC_FLAG_RESP		0x80
 
 #define SMC_LLC_WAIT_FIRST_TIME		(5 * HZ)
+#define SMC_LLC_WAIT_TIME		(2 * HZ)
 
 enum smc_llc_reqresp {
 	SMC_LLC_REQ,
@@ -26,6 +27,8 @@ enum smc_llc_reqresp {
 
 enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
+	SMC_LLC_ADD_LINK		= 0x02,
+	SMC_LLC_DELETE_LINK		= 0x04,
 	SMC_LLC_CONFIRM_RKEY		= 0x06,
 	SMC_LLC_TEST_LINK		= 0x07,
 	SMC_LLC_CONFIRM_RKEY_CONT	= 0x08,
@@ -35,6 +38,10 @@ enum smc_llc_msg_type {
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
 			      enum smc_llc_reqresp reqresp);
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+			  enum smc_llc_reqresp reqresp);
+int smc_llc_send_delete_link(struct smc_link *link,
+			     enum smc_llc_reqresp reqresp);
 int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16],
 			   enum smc_llc_reqresp reqresp);
 int smc_llc_init(void) __init;

From 9651b9346f5bc85a4fef96789c756748483d9ee2 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Thu, 1 Mar 2018 13:51:33 +0100
Subject: [PATCH 0550/1678] net/smc: prevent new connections on link group

When the processing of a DELETE LINK message has started,
new connections should not be added to the link group that
is about to terminate.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  9 ---------
 net/smc/smc_core.c | 19 ++++++++++---------
 net/smc/smc_core.h |  1 +
 net/smc/smc_llc.c  |  1 +
 4 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5267ed19b67d..26684e086750 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -356,15 +356,6 @@ static void smc_link_save_peer_info(struct smc_link *link,
 	link->peer_mtu = clc->qp_mtu;
 }
 
-static void smc_lgr_forget(struct smc_link_group *lgr)
-{
-	spin_lock_bh(&smc_lgr_list.lock);
-	/* do not use this link group for new connections */
-	if (!list_empty(&lgr->list))
-		list_del_init(&lgr->list);
-	spin_unlock_bh(&smc_lgr_list.lock);
-}
-
 /* setup for RDMA connection of client */
 static int smc_connect_rdma(struct smc_sock *smc)
 {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index eb343e15a723..702ce5f85e97 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -308,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr)
 	kfree(lgr);
 }
 
+void smc_lgr_forget(struct smc_link_group *lgr)
+{
+	spin_lock_bh(&smc_lgr_list.lock);
+	/* do not use this link group for new connections */
+	if (!list_empty(&lgr->list))
+		list_del_init(&lgr->list);
+	spin_unlock_bh(&smc_lgr_list.lock);
+}
+
 /* terminate linkgroup abnormally */
 void smc_lgr_terminate(struct smc_link_group *lgr)
 {
@@ -315,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
 	struct smc_sock *smc;
 	struct rb_node *node;
 
-	spin_lock_bh(&smc_lgr_list.lock);
-	if (list_empty(&lgr->list)) {
-		/* termination already triggered */
-		spin_unlock_bh(&smc_lgr_list.lock);
-		return;
-	}
-	/* do not use this link group for new connections */
-	list_del_init(&lgr->list);
-	spin_unlock_bh(&smc_lgr_list.lock);
+	smc_lgr_forget(lgr);
 
 	write_lock_bh(&lgr->conns_lock);
 	node = rb_first(&lgr->conns_all);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index cead2093c4b4..07e2a393e6d9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -197,6 +197,7 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
+void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 45b3e0211c39..54e8d6dc9201 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -384,6 +384,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 			smc_lgr_terminate(lgr);
 	} else {
 		if (lgr->role == SMC_SERV) {
+			smc_lgr_forget(lgr);
 			smc_llc_send_delete_link(link, SMC_LLC_REQ);
 		} else {
 			smc_llc_send_delete_link(link, SMC_LLC_RESP);

From e31a6f9067e3c30da1bea26ce2d1355af9577d13 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Fri, 23 Feb 2018 07:25:46 -0800
Subject: [PATCH 0551/1678] net: phylink: Remove redundant netdev.phydev
 assignment

As a part of working on MII time stamping infrastructure, I was trying
to figure out how netdev->phydev gets assigned, and I stumbled across
this.  Ever since the new phylink code came in, the field is assigned
twice.

The function, phylink_connect_phy(), calls

	phy_attach_direct()
	phylink_bringup_phy()

and phy_attach_direct() sets

	dev->phydev = phydev;

but phylink_bringup_phy() then sets the same field again:

	pl->netdev->phydev = phy;

Similarly, the function, phylink_of_phy_connect(), calls

	of_phy_attach()
		phy_attach_direct()
	phylink_bringup_phy()

The removal code is also duplicated:

phylink_disconnect_phy()
	pl->netdev->phydev = NULL;
	phy_disconnect()
		phy_detach()
			phydev->attached_dev->phydev = NULL;

This patch removes the redundant assignments, restricting manipulation
of the netdev.phydev field to phy_attach_direct() and phy_detach().

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 27327c917a59..7d1724072325 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -679,7 +679,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 
 	mutex_lock(&phy->lock);
 	mutex_lock(&pl->state_mutex);
-	pl->netdev->phydev = phy;
 	pl->phydev = phy;
 	linkmode_copy(pl->supported, supported);
 	linkmode_copy(pl->link_config.advertising, config.advertising);
@@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl)
 	if (phy) {
 		mutex_lock(&phy->lock);
 		mutex_lock(&pl->state_mutex);
-		pl->netdev->phydev = NULL;
 		pl->phydev = NULL;
 		mutex_unlock(&pl->state_mutex);
 		mutex_unlock(&phy->lock);

From 3a053b1a30dcb4e39569bcce2f4357509260db75 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 28 Feb 2018 15:59:15 +0200
Subject: [PATCH 0552/1678] net: Fix spelling mistake "greater then" ->
 "greater than"

Fix trivial spelling mistake "greater then" -> "greater than".

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 +-
 net/core/dev.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e2ab13687fb9..d4907b584b38 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb)
 	return false;
 }
 
-/* Reset all TX qdiscs greater then index of a device.  */
+/* Reset all TX qdiscs greater than index of a device.  */
 static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
 	struct Qdisc *qdisc;
diff --git a/net/core/dev.c b/net/core/dev.c
index 5bdcc5a161fe..40fb3aed5df2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc);
 
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
- * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {

From f1c02cfb7b30f5c9d9f4e383260abf89d89c3341 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 28 Feb 2018 16:40:08 +0100
Subject: [PATCH 0553/1678] ipv6: allow userspace to add IFA_F_OPTIMISTIC
 addresses

According to RFC 4429 (section 3.1), adding new IPv6 addresses as
optimistic addresses is acceptable, as long as the implementation
follows some rules:

   * Optimistic DAD SHOULD only be used when the implementation is aware
        that the address is based on a most likely unique interface
        identifier (such as in [RFC2464]), generated randomly [RFC3041],
        or by a well-distributed hash function [RFC3972] or assigned by
        Dynamic Host Configuration Protocol for IPv6 (DHCPv6) [RFC3315].
        Optimistic DAD SHOULD NOT be used for manually entered
        addresses.

Thus, it seems reasonable to allow userspace to set the optimistic flag
when adding new addresses.

We must not let userspace set NODAD + OPTIMISTIC, since if the kernel is
not performing DAD we would never clear the optimistic flag. We must
also ignore userspace's request to add OPTIMISTIC flag to addresses that
have already completed DAD (addresses that don't have the TENTATIVE
flag, or that have the DADFAILED flag).

Then we also need to clear the OPTIMISTIC flag on permanent addresses
when DAD fails. Otherwise, IFA_F_OPTIMISTIC addresses added by userspace
can still be used after DAD has failed, because in
ipv6_chk_addr_and_flags(), IFA_F_OPTIMISTIC overrides IFA_F_TENTATIVE.

Setting IFA_F_OPTIMISTIC from userspace is conditional on
CONFIG_IPV6_OPTIMISTIC_DAD and the optimistic_dad sysctl.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4facfe0b1888..b5fd116c046a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
 #endif
 }
 
+static bool ipv6_allow_optimistic_dad(struct net *net,
+				      struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	if (!idev)
+		return false;
+	if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+		return false;
+
+	return true;
+#else
+	return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
 			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
@@ -1968,6 +1983,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		spin_lock_bh(&ifp->lock);
 		addrconf_del_dad_work(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
+		if (dad_failed)
+			ifp->flags &= ~IFA_F_OPTIMISTIC;
 		spin_unlock_bh(&ifp->lock);
 		if (dad_failed)
 			ipv6_ifa_notify(0, ifp);
@@ -4501,6 +4518,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	    (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
 		return -EINVAL;
 
+	if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+		ifa_flags &= ~IFA_F_OPTIMISTIC;
+
 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		expires = jiffies_to_clock_t(timeout * HZ);
@@ -4574,6 +4594,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct in6_addr *pfx, *peer_pfx;
 	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
+	struct inet6_dev *idev;
 	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
 	u32 ifa_flags;
 	int err;
@@ -4607,7 +4628,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	/* We ignore other flags so far. */
 	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+	idev = ipv6_find_idev(dev);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
+
+	if (!ipv6_allow_optimistic_dad(net, idev))
+		ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+	if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+		NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+		return -EINVAL;
+	}
 
 	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
 	if (!ifa) {

From c8745e07d5d676b2cb49f90baec1ac5e66cb7383 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Thu, 1 Mar 2018 14:47:40 +0900
Subject: [PATCH 0554/1678] samples/bpf: detach prog from cgroup

test_cgrp2_sock.sh and test_cgrp2_sock2.sh tests keep the program
attached to cgroup even after completion.
Using detach functionality of test_cgrp2_sock in both scripts.

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/test_cgrp2_sock.sh  | 1 +
 samples/bpf/test_cgrp2_sock2.sh | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 8ee0371a100a..9f6174236856 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -61,6 +61,7 @@ cleanup_and_exit()
 
 	[ -n "$msg" ] && echo "ERROR: $msg"
 
+	test_cgrp2_sock -d ${CGRP_MNT}/sockopts
 	ip li del cgrp2_sock
 	umount ${CGRP_MNT}
 
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
index fc4e64d00cb3..0f396a86e0cb 100755
--- a/samples/bpf/test_cgrp2_sock2.sh
+++ b/samples/bpf/test_cgrp2_sock2.sh
@@ -28,6 +28,9 @@ function attach_bpf {
 }
 
 function cleanup {
+	if [ -d /tmp/cgroupv2/foo ]; then
+		test_cgrp2_sock -d /tmp/cgroupv2/foo
+	fi
 	ip link del veth0b
 	ip netns delete at_ns0
 	umount /tmp/cgroupv2

From 5f6f845b608a3fa13e5da0584eea5803710cf708 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 1 Mar 2018 17:55:37 -0800
Subject: [PATCH 0555/1678] fib_rules: FRA_GENERIC_POLICY updates for ip proto,
 sport and dport attrs

Fixes: bfff4862653b ("net: fib_rules: support for match on ip_proto, sport and dport")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 6dd0a00653ae..1c9e17c11953 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -112,7 +112,11 @@ struct fib_rule_notifier_info {
 	[FRA_GOTO]	= { .type = NLA_U32 }, \
 	[FRA_L3MDEV]	= { .type = NLA_U8 }, \
 	[FRA_UID_RANGE]	= { .len = sizeof(struct fib_rule_uid_range) }, \
-	[FRA_PROTOCOL]  = { .type = NLA_U8 }
+	[FRA_PROTOCOL]  = { .type = NLA_U8 }, \
+	[FRA_IP_PROTO]  = { .type = NLA_U8 }, \
+	[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \
+	[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {

From 198979be6c16aa62025d5a47680f2c7849b7e64c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 1 Mar 2018 13:49:30 -0800
Subject: [PATCH 0556/1678] selftests: forwarding: Only check tc version for tc
 tests

Capabilities of tc command are irrelevant for router tests:
    $ ./router.sh
    SKIP: iproute2 too old, missing shared block support

Add a CHECK_TC flag and only check tc capabilities if set. Add flag to
tc_common.sh and have it sourced before lib.sh

Also, if the command lacks some feature the test should exit non-0.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/bridge_vlan_aware.sh       |  1 +
 tools/testing/selftests/net/forwarding/lib.sh | 29 ++++++++++++-------
 .../selftests/net/forwarding/tc_actions.sh    |  2 +-
 .../selftests/net/forwarding/tc_chains.sh     |  2 +-
 .../selftests/net/forwarding/tc_common.sh     |  2 ++
 .../selftests/net/forwarding/tc_flower.sh     |  2 +-
 .../selftests/net/forwarding/tc_shblocks.sh   |  2 +-
 7 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 651998e70557..75d922438bc9 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NUM_NETIFS=4
+CHECK_TC="yes"
 source lib.sh
 
 h1_create()
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 23866a685f77..d0af52109360 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -19,26 +19,33 @@ fi
 ##############################################################################
 # Sanity checks
 
+check_tc_version()
+{
+	tc -j &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing JSON support"
+		exit 1
+	fi
+
+	tc filter help 2>&1 | grep block &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing shared block support"
+		exit 1
+	fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
 	exit 0
 fi
 
-tc -j &> /dev/null
-if [[ $? -ne 0 ]]; then
-	echo "SKIP: iproute2 too old, missing JSON support"
-	exit 0
-fi
-
-tc filter help 2>&1 | grep block &> /dev/null
-if [[ $? -ne 0 ]]; then
-	echo "SKIP: iproute2 too old, missing shared block support"
-	exit 0
+if [[ "$CHECK_TC" = "yes" ]]; then
+	check_tc_version
 fi
 
 if [[ ! -x "$(command -v jq)" ]]; then
 	echo "SKIP: jq not installed"
-	exit 0
+	exit 1
 fi
 
 if [[ ! -x "$(command -v $MZ)" ]]; then
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 84234317a25d..8ab5cf0a960b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -2,8 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NUM_NETIFS=4
-source lib.sh
 source tc_common.sh
+source lib.sh
 
 tcflags="skip_hw"
 
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 94c114ad8b44..2fd15226974b 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -2,8 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NUM_NETIFS=2
-source lib.sh
 source tc_common.sh
+source lib.sh
 
 tcflags="skip_hw"
 
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index acd0b520241c..9d3b64a2a264 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+CHECK_TC="yes"
+
 tc_check_packets()
 {
 	local id=$1
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 026a4ea4b2fb..032b882adfc0 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -2,8 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NUM_NETIFS=2
-source lib.sh
 source tc_common.sh
+source lib.sh
 
 tcflags="skip_hw"
 
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index cfc8a2ace388..077b98048ef4 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -2,8 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 NUM_NETIFS=4
-source lib.sh
 source tc_common.sh
+source lib.sh
 
 tcflags="skip_hw"
 

From 5ee0902a0d584bd765165448fec8bb1409d5d027 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 1 Mar 2018 13:49:31 -0800
Subject: [PATCH 0557/1678] selftests: forwarding: Handle 0 for packet
 difference in multipath tests

If the packet stats have a difference of 0, the test output shows:
INFO: Expected ratio 2.00 Measured ratio
Runtime error (func=(main), adr=9): Divide by zero
(standard_in) 2: syntax error
(standard_in) 1: syntax error
./router_multipath.sh: line 187: test: : integer expression expected
TEST: Multipath                                                     [FAIL]
	Too large discrepancy between expected and measured ratios

Handle the 0 and display a cleaner message:
INFO: Running IPv6 multipath tests
TEST: Multipath                                                     [FAIL]
	Packet difference is 0

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/router_multipath.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index d31888e3133e..c1a437c5b15f 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -168,6 +168,13 @@ multipath_eval()
 
        RET=0
 
+       if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+              check_err 1 "Packet difference is 0"
+              log_test "Multipath"
+              log_info "Expected ratio $weights_ratio"
+              return
+       fi
+
        if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
                weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
 		       | bc -l)

From 993d337c242e292dca89de4558dd8ebfc216fc83 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 1 Mar 2018 13:49:32 -0800
Subject: [PATCH 0558/1678] selftests: forwarding: Use PING6 instead of ping
 for ipv6 multipath test

On Debian jessie ping can not handle IPv6 addresses so the command
fails. Use PING6 which is set to ping6.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/router_multipath.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index c1a437c5b15f..745fb40bfa44 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -249,7 +249,7 @@ multipath6_test()
 
        # Generate 16384 echo requests, each with a random flow label.
        for _ in $(seq 1 16384); do
-	       ip vrf exec vrf-h1 ping 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+	       ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
        done
 
        t1_rp12=$(link_stats_tx_packets_get $rp12)

From 36b4c0adad2662464a5a392b592311524bd3a854 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 1 Mar 2018 13:49:33 -0800
Subject: [PATCH 0559/1678] selftests: forwarding: Add description to the
 multipath tests

Add a better description to the summary for multipath tests. e.g.,

INFO: Running IPv6 multipath tests
TEST: ECMP                                               [PASS]
INFO: Expected ratio 1.00 Measured ratio 1.02
TEST: Weighted MP 2:1                                    [PASS]
INFO: Expected ratio 2.00 Measured ratio 2.02
TEST: Weighted MP 11:45                                  [PASS]
INFO: Expected ratio 4.09 Measured ratio 4.03

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/router_multipath.sh        | 37 ++++++++++---------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 745fb40bfa44..55595305a604 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -160,10 +160,11 @@ router2_destroy()
 
 multipath_eval()
 {
-       local weight_rp12=$1
-       local weight_rp13=$2
-       local packets_rp12=$3
-       local packets_rp13=$4
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local packets_rp12=$4
+       local packets_rp13=$5
        local weights_ratio packets_ratio diff
 
        RET=0
@@ -192,14 +193,15 @@ multipath_eval()
 
        test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
        check_err $? "Too large discrepancy between expected and measured ratios"
-       log_test "Multipath"
+       log_test "$desc"
        log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
 }
 
 multipath4_test()
 {
-       local weight_rp12=$1
-       local weight_rp13=$2
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
        local hash_policy
@@ -224,7 +226,7 @@ multipath4_test()
 
        let "packets_rp12 = $t1_rp12 - $t0_rp12"
        let "packets_rp13 = $t1_rp13 - $t0_rp13"
-       multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
 
        # Restore settings.
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
@@ -235,8 +237,9 @@ multipath4_test()
 
 multipath6_test()
 {
-       local weight_rp12=$1
-       local weight_rp13=$2
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
 
@@ -257,7 +260,7 @@ multipath6_test()
 
        let "packets_rp12 = $t1_rp12 - $t0_rp12"
        let "packets_rp13 = $t1_rp13 - $t0_rp13"
-       multipath_eval $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
 
        ip route replace 2001:db8:2::/64 vrf vrf-r1 \
 	       nexthop via fe80:2::22 dev $rp12 \
@@ -267,14 +270,14 @@ multipath6_test()
 multipath_test()
 {
 	log_info "Running IPv4 multipath tests"
-	multipath4_test 1 1
-	multipath4_test 2 1
-	multipath4_test 11 45
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
 
 	log_info "Running IPv6 multipath tests"
-	multipath6_test 1 1
-	multipath6_test 2 1
-	multipath6_test 11 45
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
 }
 
 setup_prepare()

From c967226b1c6caf6888af0f54ea08cb79384cef86 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 1 Mar 2018 18:29:28 -0500
Subject: [PATCH 0560/1678] net/mac89x0: Remove redundant code

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cirrus/mac89x0.c | 32 ---------------------------
 1 file changed, 32 deletions(-)

diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 977d4c2c759d..4fe0ae93ab36 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -115,14 +115,9 @@ struct net_local {
 	int rx_mode;
 	int curr_rx_cfg;
         int send_underrun;      /* keep track of how many underruns in a row we get */
-	struct sk_buff *skb;
 };
 
 /* Index to functions, as function prototypes. */
-
-#if 0
-extern void reset_chip(struct net_device *dev);
-#endif
 static int net_open(struct net_device *dev);
 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
@@ -132,10 +127,6 @@ static int net_close(struct net_device *dev);
 static struct net_device_stats *net_get_stats(struct net_device *dev);
 static int set_mac_address(struct net_device *dev, void *addr);
 
-
-/* Example routines you must write ;->. */
-#define tx_done(dev) 1
-
 /* For reading/writing registers ISA-style */
 static inline int
 readreg_io(struct net_device *dev, int portno)
@@ -297,24 +288,6 @@ out:
 	return ERR_PTR(err);
 }
 
-#if 0
-/* This is useful for something, but I don't know what yet. */
-void __init reset_chip(struct net_device *dev)
-{
-	int reset_start_time;
-
-	writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
-
-	/* wait 30 ms */
-	msleep_interruptible(30);
-
-	/* Wait until the chip is reset */
-	reset_start_time = jiffies;
-	while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2)
-		;
-}
-#endif
-
 /* Open/initialize the board.  This is called (in the current kernel)
    sometime after booting when the 'ifconfig' program is run.
 
@@ -416,11 +389,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 	struct net_local *lp;
 	int ioaddr, status;
 
-	if (dev == NULL) {
-		printk ("net_interrupt(): irq %d for unknown device.\n", irq);
-		return IRQ_NONE;
-	}
-
 	ioaddr = dev->base_addr;
 	lp = netdev_priv(dev);
 

From 43bf2e6d69dd6c2cea7a28763893a3dff34b7873 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 1 Mar 2018 18:29:28 -0500
Subject: [PATCH 0561/1678] net/mac89x0: Convert to platform_driver

Apparently these Dayna cards don't have a pseudoslot declaration ROM
which means they can't be probed like NuBus cards.

Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/m68k/mac/config.c                |  4 ++
 drivers/net/Space.c                   |  3 --
 drivers/net/ethernet/cirrus/mac89x0.c | 68 ++++++++++++---------------
 include/net/Space.h                   |  1 -
 4 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index d3d435248a24..c73eb8209555 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1088,6 +1088,10 @@ int __init mac_platform_init(void)
 	    macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
 		platform_device_register_simple("macsonic", -1, NULL, 0);
 
+	if (macintosh_config->expansion_type == MAC_EXP_PDS ||
+	    macintosh_config->expansion_type == MAC_EXP_PDS_COMM)
+		platform_device_register_simple("mac89x0", -1, NULL, 0);
+
 	if (macintosh_config->ether_type == MAC_ETHER_MACE)
 		platform_device_register_simple("macmace", -1, NULL, 0);
 
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 64333ec999ac..3afda6561434 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -113,9 +113,6 @@ static struct devprobe2 m68k_probes[] __initdata = {
 #endif
 #ifdef CONFIG_MVME147_NET	/* MVME147 internal Ethernet */
 	{mvme147lance_probe, 0},
-#endif
-#ifdef CONFIG_MAC89x0
-	{mac89x0_probe, 0},
 #endif
 	{NULL, 0},
 };
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 4fe0ae93ab36..911139abbe20 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -93,6 +93,7 @@ static const char version[] =
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
@@ -105,6 +106,10 @@ static const char version[] =
 
 #include "cs89x0.h"
 
+static int debug;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
+
 static unsigned int net_debug = NET_DEBUG;
 
 /* Information that need to be kept for each board. */
@@ -167,10 +172,9 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
    anywhere else until we have a really good reason to do so. */
-struct net_device * __init mac89x0_probe(int unit)
+static int mac89x0_device_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
-	static int once_is_enough;
 	struct net_local *lp;
 	static unsigned version_printed;
 	int i, slot;
@@ -180,21 +184,11 @@ struct net_device * __init mac89x0_probe(int unit)
 	int err = -ENODEV;
 	struct nubus_rsrc *fres;
 
-	if (!MACH_IS_MAC)
-		return ERR_PTR(-ENODEV);
+	net_debug = debug;
 
 	dev = alloc_etherdev(sizeof(struct net_local));
 	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	if (unit >= 0) {
-		sprintf(dev->name, "eth%d", unit);
-		netdev_boot_setup_check(dev);
-	}
-
-	if (once_is_enough)
-		goto out;
-	once_is_enough = 1;
+		return -ENOMEM;
 
 	/* We might have to parameterize this later */
 	slot = 0xE;
@@ -221,6 +215,8 @@ struct net_device * __init mac89x0_probe(int unit)
 	if (sig != swab16(CHIP_EISA_ID_SIG))
 		goto out;
 
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
 	/* Initialize the net_device structure. */
 	lp = netdev_priv(dev);
 
@@ -280,12 +276,14 @@ struct net_device * __init mac89x0_probe(int unit)
 	err = register_netdev(dev);
 	if (err)
 		goto out1;
-	return NULL;
+
+	platform_set_drvdata(pdev, dev);
+	return 0;
 out1:
 	nubus_writew(0, dev->base_addr + ADD_PORT);
 out:
 	free_netdev(dev);
-	return ERR_PTR(err);
+	return err;
 }
 
 /* Open/initialize the board.  This is called (in the current kernel)
@@ -571,32 +569,24 @@ static int set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-#ifdef MODULE
-
-static struct net_device *dev_cs89x0;
-static int debug;
-
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
 MODULE_LICENSE("GPL");
 
-int __init
-init_module(void)
+static int mac89x0_device_remove(struct platform_device *pdev)
 {
-	net_debug = debug;
-        dev_cs89x0 = mac89x0_probe(-1);
-	if (IS_ERR(dev_cs89x0)) {
-                printk(KERN_WARNING "mac89x0.c: No card found\n");
-		return PTR_ERR(dev_cs89x0);
-	}
+	struct net_device *dev = platform_get_drvdata(pdev);
+
+	unregister_netdev(dev);
+	nubus_writew(0, dev->base_addr + ADD_PORT);
+	free_netdev(dev);
 	return 0;
 }
 
-void
-cleanup_module(void)
-{
-	unregister_netdev(dev_cs89x0);
-	nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT);
-	free_netdev(dev_cs89x0);
-}
-#endif /* MODULE */
+static struct platform_driver mac89x0_platform_driver = {
+	.probe = mac89x0_device_probe,
+	.remove = mac89x0_device_remove,
+	.driver = {
+		.name = "mac89x0",
+	},
+};
+
+module_platform_driver(mac89x0_platform_driver);
diff --git a/include/net/Space.h b/include/net/Space.h
index 336da258885a..9cce0d80d37a 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit);
 struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
-struct net_device *mac89x0_probe(int unit);
 struct net_device *cops_probe(int unit);
 struct net_device *ltpc_probe(void);
 

From 86c2666eac0e68f0cd21659c7888c95b029761d1 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 1 Mar 2018 18:29:28 -0500
Subject: [PATCH 0562/1678] net/mac89x0: Fix and modernize log messages

Fix log message fragments that no longer produce the desired output
since the behaviour of printk() was changed.
Add missing printk severity levels.
Drop deprecated "out of memory" message as per checkpatch advice.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cirrus/mac89x0.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 911139abbe20..9a266496a538 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -56,6 +56,8 @@
   local_irq_{dis,en}able()
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
 
@@ -245,16 +247,14 @@ static int mac89x0_device_probe(struct platform_device *pdev)
 	if (net_debug && version_printed++ == 0)
 		printk(version);
 
-	printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx",
-	       dev->name,
-	       lp->chip_type==CS8900?'0':'2',
-	       lp->chip_type==CS8920M?"M":"",
-	       lp->chip_revision,
-	       dev->base_addr);
+	pr_info("cs89%c0%s rev %c found at %#8lx\n",
+		lp->chip_type == CS8900 ? '0' : '2',
+		lp->chip_type == CS8920M ? "M" : "",
+		lp->chip_revision, dev->base_addr);
 
 	/* Try to read the MAC address */
 	if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) {
-		printk("\nmac89x0: No EEPROM, giving up now.\n");
+		pr_info("No EEPROM, giving up now.\n");
 		goto out1;
         } else {
                 for (i = 0; i < ETH_ALEN; i += 2) {
@@ -269,7 +269,7 @@ static int mac89x0_device_probe(struct platform_device *pdev)
 
 	/* print the IRQ and ethernet address. */
 
-	printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr);
+	pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq);
 
 	dev->netdev_ops		= &mac89x0_netdev_ops;
 
@@ -472,7 +472,6 @@ net_rx(struct net_device *dev)
 	/* Malloc up new buffer. */
 	skb = alloc_skb(length, GFP_ATOMIC);
 	if (skb == NULL) {
-		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		dev->stats.rx_dropped++;
 		return;
 	}
@@ -560,7 +559,7 @@ static int set_mac_address(struct net_device *dev, void *addr)
 		return -EADDRNOTAVAIL;
 
 	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
-	printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr);
+	netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr);
 
 	/* set the Ethernet address */
 	for (i=0; i < ETH_ALEN/2; i++)

From 5a3b7504c493c71efee722f871299a0abe19b484 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 1 Mar 2018 18:29:28 -0500
Subject: [PATCH 0563/1678] net/mac89x0: Replace custom debug logging with
 netif_* calls

Adopt the conventional style of debug logging because it is both
shorter and more flexible.
Remove the 'version_printed' flag as the version will be printed
only once anyway (when the module loads).

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cirrus/mac89x0.c | 47 +++++++++------------------
 1 file changed, 15 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 9a266496a538..3f8fe8fd79cc 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -61,18 +61,6 @@
 static const char version[] =
 "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
 
-/* ======================= configure the driver here ======================= */
-
-/* use 0 for production, 1 for verification, >2 for debug */
-#ifndef NET_DEBUG
-#define NET_DEBUG 0
-#endif
-
-/* ======================= end of configuration ======================= */
-
-
-/* Always include 'config.h' first in case the user wants to turn on
-   or override something. */
 #include <linux/module.h>
 
 /*
@@ -108,14 +96,13 @@ static const char version[] =
 
 #include "cs89x0.h"
 
-static int debug;
+static int debug = -1;
 module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)");
-
-static unsigned int net_debug = NET_DEBUG;
+MODULE_PARM_DESC(debug, "debug message level");
 
 /* Information that need to be kept for each board. */
 struct net_local {
+	int msg_enable;
 	int chip_type;		/* one of: CS8900, CS8920, CS8920M */
 	char chip_revision;	/* revision letter of the chip ('A'...) */
 	int send_cmd;		/* the propercommand used to send a packet. */
@@ -178,7 +165,6 @@ static int mac89x0_device_probe(struct platform_device *pdev)
 {
 	struct net_device *dev;
 	struct net_local *lp;
-	static unsigned version_printed;
 	int i, slot;
 	unsigned rev_type = 0;
 	unsigned long ioaddr;
@@ -186,8 +172,6 @@ static int mac89x0_device_probe(struct platform_device *pdev)
 	int err = -ENODEV;
 	struct nubus_rsrc *fres;
 
-	net_debug = debug;
-
 	dev = alloc_etherdev(sizeof(struct net_local));
 	if (!dev)
 		return -ENOMEM;
@@ -222,6 +206,8 @@ static int mac89x0_device_probe(struct platform_device *pdev)
 	/* Initialize the net_device structure. */
 	lp = netdev_priv(dev);
 
+	lp->msg_enable = netif_msg_init(debug, 0);
+
 	/* Fill in the 'dev' fields. */
 	dev->base_addr = ioaddr;
 	dev->mem_start = (unsigned long)
@@ -244,8 +230,7 @@ static int mac89x0_device_probe(struct platform_device *pdev)
 	if (lp->chip_type != CS8900 && lp->chip_revision >= 'C')
 		lp->send_cmd = TX_NOW;
 
-	if (net_debug && version_printed++ == 0)
-		printk(version);
+	netif_dbg(lp, drv, dev, "%s", version);
 
 	pr_info("cs89%c0%s rev %c found at %#8lx\n",
 		lp->chip_type == CS8900 ? '0' : '2',
@@ -345,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev)
 	struct net_local *lp = netdev_priv(dev);
 	unsigned long flags;
 
-	if (net_debug > 3)
-		printk("%s: sent %d byte packet of type %x\n",
-		       dev->name, skb->len,
-		       (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-		       | skb->data[ETH_ALEN+ETH_ALEN+1]);
+	netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n",
+		  skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+		  skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
 	/* keep the upload from being interrupted, since we
 	   ask the chip to start transmitting before the
@@ -398,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
 	while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) {
-		if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
+		netif_dbg(lp, intr, dev, "status=%04x\n", status);
 		switch(status & ISQ_EVENT_MASK) {
 		case ISQ_RECEIVER_EVENT:
 			/* Got a packet(s). */
@@ -428,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 				netif_wake_queue(dev);
 			}
 			if (status & TX_UNDERRUN) {
-				if (net_debug > 0) printk("%s: transmit underrun\n", dev->name);
+				netif_dbg(lp, tx_err, dev, "transmit underrun\n");
                                 lp->send_underrun++;
                                 if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381;
                                 else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL;
@@ -449,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id)
 static void
 net_rx(struct net_device *dev)
 {
+	struct net_local *lp = netdev_priv(dev);
 	struct sk_buff *skb;
 	int status, length;
 
@@ -480,10 +464,9 @@ net_rx(struct net_device *dev)
 	skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame),
 				length);
 
-	if (net_debug > 3)printk("%s: received %d byte packet of type %x\n",
-                                 dev->name, length,
-                                 (skb->data[ETH_ALEN+ETH_ALEN] << 8)
-				 | skb->data[ETH_ALEN+ETH_ALEN+1]);
+	netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n",
+		  length, skb->data[ETH_ALEN + ETH_ALEN] << 8 |
+		  skb->data[ETH_ALEN + ETH_ALEN + 1]);
 
         skb->protocol=eth_type_trans(skb,dev);
 	netif_rx(skb);

From 6ed33d3a06e6ded784a9fe2780b33d6cd2ba2df7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Mar 2018 16:08:55 -0800
Subject: [PATCH 0564/1678] net: phy: aquantia: Utilize genphy_c45_aneg_done()

The driver duplicates what the generic function does, so use the generic
function intead.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/phy/aquantia.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c
index e8ae50e1255e..319edc9c8ec7 100644
--- a/drivers/net/phy/aquantia.c
+++ b/drivers/net/phy/aquantia.c
@@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
-static int aquantia_aneg_done(struct phy_device *phydev)
-{
-	int reg;
-
-	reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-	return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-}
-
 static int aquantia_config_intr(struct phy_device *phydev)
 {
 	int err;
@@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQ1202",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQ2104",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR105",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR106",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR107",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,
@@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = {
 	.name		= "Aquantia AQR405",
 	.features	= PHY_AQUANTIA_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
-	.aneg_done	= aquantia_aneg_done,
+	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
 	.config_intr	= aquantia_config_intr,
 	.ack_interrupt	= aquantia_ack_interrupt,

From e8a714e086e42972fd0e2d59e90c28eb2d839429 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Mar 2018 16:08:56 -0800
Subject: [PATCH 0565/1678] net: phy: Export gen10g_* functions

In order to remove a fair amount of duplication in the different 10G PHY
drivers, export all gen10g_* functions to be able to make use of those.
While we are at it, rename gen10g_soft_reset() to gen10g_no_soft_reset()
to illustrate what it does.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/phy/phy-c45.c | 20 +++++++++++++-------
 include/linux/phy.h       |  8 ++++++++
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a4576859afae..0017eddc24db 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 
-static int gen10g_config_aneg(struct phy_device *phydev)
+int gen10g_config_aneg(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-static int gen10g_read_status(struct phy_device *phydev)
+int gen10g_read_status(struct phy_device *phydev)
 {
 	u32 mmd_mask = phydev->c45_ids.devices_in_package;
 	int ret;
@@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_read_status);
 
-static int gen10g_soft_reset(struct phy_device *phydev)
+int gen10g_no_soft_reset(struct phy_device *phydev)
 {
 	/* Do nothing for now */
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-static int gen10g_config_init(struct phy_device *phydev)
+int gen10g_config_init(struct phy_device *phydev)
 {
 	/* Temporarily just say we support everything */
 	phydev->supported = SUPPORTED_10000baseT_Full;
@@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-static int gen10g_suspend(struct phy_device *phydev)
+int gen10g_suspend(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_suspend);
 
-static int gen10g_resume(struct phy_device *phydev)
+int gen10g_resume(struct phy_device *phydev)
 {
 	return 0;
 }
+EXPORT_SYMBOL_GPL(gen10g_resume);
 
 struct phy_driver genphy_10g_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
 	.name           = "Generic 10G PHY",
-	.soft_reset	= gen10g_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.config_init    = gen10g_config_init,
 	.features       = 0,
 	.config_aneg    = gen10g_config_aneg,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5a0c3e53e7c2..6e38c699b753 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 
+/* The gen10g_* functions are the old Clause 45 stub */
+int gen10g_config_aneg(struct phy_device *phydev);
+int gen10g_read_status(struct phy_device *phydev);
+int gen10g_no_soft_reset(struct phy_device *phydev);
+int gen10g_config_init(struct phy_device *phydev);
+int gen10g_suspend(struct phy_device *phydev);
+int gen10g_resume(struct phy_device *phydev);
+
 static inline int phy_read_status(struct phy_device *phydev)
 {
 	if (!phydev->drv)

From aebc78a40b8862e27f374fc7016cd9b4654488ed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Mar 2018 16:08:57 -0800
Subject: [PATCH 0566/1678] net: phy: teranetics: Utilize generic functions

Update teranetics_aneg_done() to use genphy_c45_aneg_done() instead of
duplicating that code, and switch to gen10g_* functions where
appropriate instead of maintaining identical copies doing nothing.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/phy/teranetics.c | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index fb2cef764e9a..22f3bdd8206c 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2");
 				MDIO_PHYXS_LNSTAT_SYNC3 | \
 				MDIO_PHYXS_LNSTAT_ALIGN)
 
-static int teranetics_config_init(struct phy_device *phydev)
-{
-	phydev->supported = SUPPORTED_10000baseT_Full;
-	phydev->advertising = SUPPORTED_10000baseT_Full;
-
-	return 0;
-}
-
-static int teranetics_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int teranetics_aneg_done(struct phy_device *phydev)
 {
-	int reg;
-
 	/* auto negotiation state can only be checked when using copper
 	 * port, if using fiber port, just lie it's done.
 	 */
-	if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) {
-		reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
-		return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE);
-	}
+	if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93))
+		return genphy_c45_aneg_done(phydev);
 
 	return 1;
 }
 
-static int teranetics_config_aneg(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int teranetics_read_status(struct phy_device *phydev)
 {
 	int reg;
@@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = {
 	.phy_id		= PHY_ID_TN2020,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Teranetics TN2020",
-	.soft_reset	= teranetics_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.aneg_done	= teranetics_aneg_done,
-	.config_init    = teranetics_config_init,
-	.config_aneg    = teranetics_config_aneg,
+	.config_init    = gen10g_config_init,
+	.config_aneg    = gen10g_config_aneg,
 	.read_status	= teranetics_read_status,
 	.match_phy_device = teranetics_match_phy_device,
 },

From 0adfdb667ab5a4c05765d0f3272afee82b2abf93 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Mar 2018 16:08:58 -0800
Subject: [PATCH 0567/1678] net: phy: cortina: Utilize generic functions

cortina_soft_reset() does the same thing as gen10g_soft_reset(), and
cortina_config_aneg() is actually doing what gen10g_config_init() does
for 10G capable PHYs.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/phy/cortina.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index 9442db221834..8022cd317f62 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
 			    MII_ADDR_C45 | regnum);
 }
 
-static int cortina_config_aneg(struct phy_device *phydev)
-{
-	phydev->supported = SUPPORTED_10000baseT_Full;
-	phydev->advertising = SUPPORTED_10000baseT_Full;
-
-	return 0;
-}
-
 static int cortina_read_status(struct phy_device *phydev)
 {
 	int gpio_int_status, ret = 0;
@@ -61,11 +53,6 @@ err:
 	return ret;
 }
 
-static int cortina_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int cortina_probe(struct phy_device *phydev)
 {
 	u32 phy_id = 0;
@@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = {
 	.phy_id		= PHY_ID_CS4340,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Cortina CS4340",
-	.config_aneg	= cortina_config_aneg,
+	.config_init	= gen10g_config_init,
+	.config_aneg	= gen10g_config_aneg,
 	.read_status	= cortina_read_status,
-	.soft_reset	= cortina_soft_reset,
+	.soft_reset	= gen10g_no_soft_reset,
 	.probe		= cortina_probe,
 },
 };

From 568477045f800ee1c68cb18f1e6e800c4365615c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Mar 2018 16:08:59 -0800
Subject: [PATCH 0568/1678] net: phy: marvell10g: Utilize
 gen10g_no_soft_reset()

We do the same thing as the generic function: nothing, so utilize it.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/phy/marvell10g.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 8a0bd98fdec7..4f1efa02a930 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev)
 	return 0;
 }
 
-/*
- * Resetting the MV88X3310 causes it to become non-responsive.  Avoid
- * setting the reset bit(s).
- */
-static int mv3310_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
-
 static int mv3310_config_init(struct phy_device *phydev)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
@@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = {
 				  SUPPORTED_10000baseT_Full |
 				  SUPPORTED_Backplane,
 		.probe		= mv3310_probe,
-		.soft_reset	= mv3310_soft_reset,
+		.soft_reset	= gen10g_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.config_aneg	= mv3310_config_aneg,
 		.aneg_done	= mv3310_aneg_done,

From 7797dc41417eb0e03f39fc4356f7635bcdef108e Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Tue, 27 Feb 2018 18:22:40 -0500
Subject: [PATCH 0569/1678] socket: skip checking sk_err for
 recvmmsg(MSG_ERRQUEUE)

recvmmsg does not call ___sys_recvmsg when sk_err is set.
That is fine for normal reads but, for MSG_ERRQUEUE, recvmmsg
should always call ___sys_recvmsg regardless of sk->sk_err to
be able to clear error queue. Otherwise, users are not able to
drain the error queue using recvmmsg.

Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 645d32b4872c..d9a1ac233b35 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 	if (!sock)
 		return err;
 
-	err = sock_error(sock->sk);
-	if (err) {
-		datagrams = err;
-		goto out_put;
+	if (likely(!(flags & MSG_ERRQUEUE))) {
+		err = sock_error(sock->sk);
+		if (err) {
+			datagrams = err;
+			goto out_put;
+		}
 	}
 
 	entry = mmsg;

From 3197239d24dcecef1dbc260733b377ade731b748 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:16 -0800
Subject: [PATCH 0570/1678] tools: bpftool: remove unnecessary 'if' to reduce
 indentation

It is obvious we could use 'else if' instead of start a new 'if' in the
touched code.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/prog.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e549e329be82..950d11dd42ab 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -777,27 +777,25 @@ static int do_dump(int argc, char **argv)
 
 		if (json_output)
 			jsonw_null(json_wtr);
-	} else {
-		if (member_len == &info.jited_prog_len) {
-			const char *name = NULL;
+	} else if (member_len == &info.jited_prog_len) {
+		const char *name = NULL;
 
-			if (info.ifindex) {
-				name = ifindex_to_bfd_name_ns(info.ifindex,
-							      info.netns_dev,
-							      info.netns_ino);
-				if (!name)
-					goto err_free;
-			}
-
-			disasm_print_insn(buf, *member_len, opcodes, name);
-		} else {
-			kernel_syms_load(&dd);
-			if (json_output)
-				dump_xlated_json(&dd, buf, *member_len, opcodes);
-			else
-				dump_xlated_plain(&dd, buf, *member_len, opcodes);
-			kernel_syms_destroy(&dd);
+		if (info.ifindex) {
+			name = ifindex_to_bfd_name_ns(info.ifindex,
+						      info.netns_dev,
+						      info.netns_ino);
+			if (!name)
+				goto err_free;
 		}
+
+		disasm_print_insn(buf, *member_len, opcodes, name);
+	} else {
+		kernel_syms_load(&dd);
+		if (json_output)
+			dump_xlated_json(&dd, buf, *member_len, opcodes);
+		else
+			dump_xlated_plain(&dd, buf, *member_len, opcodes);
+		kernel_syms_destroy(&dd);
 	}
 
 	free(buf);

From 73bb5b4f8f1468f7e433a30d8fbe820b24578991 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:17 -0800
Subject: [PATCH 0571/1678] tools: bpftool: factor out xlated dump related code
 into separate file

This patch factors out those code of dumping xlated eBPF instructions into
xlated_dumper.[h|c].

They are quite independent dumper functions, so better to be kept
separately.

New dumper support will be added in later patches in this set.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/prog.c          | 255 +-------------------------
 tools/bpf/bpftool/xlated_dumper.c | 286 ++++++++++++++++++++++++++++++
 tools/bpf/bpftool/xlated_dumper.h |  62 +++++++
 3 files changed, 349 insertions(+), 254 deletions(-)
 create mode 100644 tools/bpf/bpftool/xlated_dumper.c
 create mode 100644 tools/bpf/bpftool/xlated_dumper.h

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 950d11dd42ab..c5afee9838e6 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -48,7 +48,7 @@
 #include <libbpf.h>
 
 #include "main.h"
-#include "disasm.h"
+#include "xlated_dumper.h"
 
 static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_UNSPEC]		= "unspec",
@@ -407,259 +407,6 @@ static int do_show(int argc, char **argv)
 	return err;
 }
 
-#define SYM_MAX_NAME	256
-
-struct kernel_sym {
-	unsigned long address;
-	char name[SYM_MAX_NAME];
-};
-
-struct dump_data {
-	unsigned long address_call_base;
-	struct kernel_sym *sym_mapping;
-	__u32 sym_count;
-	char scratch_buff[SYM_MAX_NAME];
-};
-
-static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
-{
-	return ((struct kernel_sym *)sym_a)->address -
-	       ((struct kernel_sym *)sym_b)->address;
-}
-
-static void kernel_syms_load(struct dump_data *dd)
-{
-	struct kernel_sym *sym;
-	char buff[256];
-	void *tmp, *address;
-	FILE *fp;
-
-	fp = fopen("/proc/kallsyms", "r");
-	if (!fp)
-		return;
-
-	while (!feof(fp)) {
-		if (!fgets(buff, sizeof(buff), fp))
-			break;
-		tmp = realloc(dd->sym_mapping,
-			      (dd->sym_count + 1) *
-			      sizeof(*dd->sym_mapping));
-		if (!tmp) {
-out:
-			free(dd->sym_mapping);
-			dd->sym_mapping = NULL;
-			fclose(fp);
-			return;
-		}
-		dd->sym_mapping = tmp;
-		sym = &dd->sym_mapping[dd->sym_count];
-		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
-			continue;
-		sym->address = (unsigned long)address;
-		if (!strcmp(sym->name, "__bpf_call_base")) {
-			dd->address_call_base = sym->address;
-			/* sysctl kernel.kptr_restrict was set */
-			if (!sym->address)
-				goto out;
-		}
-		if (sym->address)
-			dd->sym_count++;
-	}
-
-	fclose(fp);
-
-	qsort(dd->sym_mapping, dd->sym_count,
-	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
-}
-
-static void kernel_syms_destroy(struct dump_data *dd)
-{
-	free(dd->sym_mapping);
-}
-
-static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
-					     unsigned long key)
-{
-	struct kernel_sym sym = {
-		.address = key,
-	};
-
-	return dd->sym_mapping ?
-	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
-		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
-}
-
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	vprintf(fmt, args);
-	va_end(args);
-}
-
-static const char *print_call_pcrel(struct dump_data *dd,
-				    struct kernel_sym *sym,
-				    unsigned long address,
-				    const struct bpf_insn *insn)
-{
-	if (sym)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%+d#%s", insn->off, sym->name);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%+d#0x%lx", insn->off, address);
-	return dd->scratch_buff;
-}
-
-static const char *print_call_helper(struct dump_data *dd,
-				     struct kernel_sym *sym,
-				     unsigned long address)
-{
-	if (sym)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "%s", sym->name);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "0x%lx", address);
-	return dd->scratch_buff;
-}
-
-static const char *print_call(void *private_data,
-			      const struct bpf_insn *insn)
-{
-	struct dump_data *dd = private_data;
-	unsigned long address = dd->address_call_base + insn->imm;
-	struct kernel_sym *sym;
-
-	sym = kernel_syms_search(dd, address);
-	if (insn->src_reg == BPF_PSEUDO_CALL)
-		return print_call_pcrel(dd, sym, address, insn);
-	else
-		return print_call_helper(dd, sym, address);
-}
-
-static const char *print_imm(void *private_data,
-			     const struct bpf_insn *insn,
-			     __u64 full_imm)
-{
-	struct dump_data *dd = private_data;
-
-	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "map[id:%u]", insn->imm);
-	else
-		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
-			 "0x%llx", (unsigned long long)full_imm);
-	return dd->scratch_buff;
-}
-
-static void dump_xlated_plain(struct dump_data *dd, void *buf,
-			      unsigned int len, bool opcodes)
-{
-	const struct bpf_insn_cbs cbs = {
-		.cb_print	= print_insn,
-		.cb_call	= print_call,
-		.cb_imm		= print_imm,
-		.private_data	= dd,
-	};
-	struct bpf_insn *insn = buf;
-	bool double_insn = false;
-	unsigned int i;
-
-	for (i = 0; i < len / sizeof(*insn); i++) {
-		if (double_insn) {
-			double_insn = false;
-			continue;
-		}
-
-		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-		printf("% 4d: ", i);
-		print_bpf_insn(&cbs, NULL, insn + i, true);
-
-		if (opcodes) {
-			printf("       ");
-			fprint_hex(stdout, insn + i, 8, " ");
-			if (double_insn && i < len - 1) {
-				printf(" ");
-				fprint_hex(stdout, insn + i + 1, 8, " ");
-			}
-			printf("\n");
-		}
-	}
-}
-
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
-{
-	unsigned int l = strlen(fmt);
-	char chomped_fmt[l];
-	va_list args;
-
-	va_start(args, fmt);
-	if (l > 0) {
-		strncpy(chomped_fmt, fmt, l - 1);
-		chomped_fmt[l - 1] = '\0';
-	}
-	jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
-	va_end(args);
-}
-
-static void dump_xlated_json(struct dump_data *dd, void *buf,
-			     unsigned int len, bool opcodes)
-{
-	const struct bpf_insn_cbs cbs = {
-		.cb_print	= print_insn_json,
-		.cb_call	= print_call,
-		.cb_imm		= print_imm,
-		.private_data	= dd,
-	};
-	struct bpf_insn *insn = buf;
-	bool double_insn = false;
-	unsigned int i;
-
-	jsonw_start_array(json_wtr);
-	for (i = 0; i < len / sizeof(*insn); i++) {
-		if (double_insn) {
-			double_insn = false;
-			continue;
-		}
-		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
-
-		jsonw_start_object(json_wtr);
-		jsonw_name(json_wtr, "disasm");
-		print_bpf_insn(&cbs, NULL, insn + i, true);
-
-		if (opcodes) {
-			jsonw_name(json_wtr, "opcodes");
-			jsonw_start_object(json_wtr);
-
-			jsonw_name(json_wtr, "code");
-			jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
-
-			jsonw_name(json_wtr, "src_reg");
-			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
-
-			jsonw_name(json_wtr, "dst_reg");
-			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
-
-			jsonw_name(json_wtr, "off");
-			print_hex_data_json((uint8_t *)(&insn[i].off), 2);
-
-			jsonw_name(json_wtr, "imm");
-			if (double_insn && i < len - 1)
-				print_hex_data_json((uint8_t *)(&insn[i].imm),
-						    12);
-			else
-				print_hex_data_json((uint8_t *)(&insn[i].imm),
-						    4);
-			jsonw_end_object(json_wtr);
-		}
-		jsonw_end_object(json_wtr);
-	}
-	jsonw_end_array(json_wtr);
-}
-
 static int do_dump(int argc, char **argv)
 {
 	struct bpf_prog_info info = {};
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
new file mode 100644
index 000000000000..dfcdf794c9d1
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+	return ((struct kernel_sym *)sym_a)->address -
+	       ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+	struct kernel_sym *sym;
+	char buff[256];
+	void *tmp, *address;
+	FILE *fp;
+
+	fp = fopen("/proc/kallsyms", "r");
+	if (!fp)
+		return;
+
+	while (!feof(fp)) {
+		if (!fgets(buff, sizeof(buff), fp))
+			break;
+		tmp = realloc(dd->sym_mapping,
+			      (dd->sym_count + 1) *
+			      sizeof(*dd->sym_mapping));
+		if (!tmp) {
+out:
+			free(dd->sym_mapping);
+			dd->sym_mapping = NULL;
+			fclose(fp);
+			return;
+		}
+		dd->sym_mapping = tmp;
+		sym = &dd->sym_mapping[dd->sym_count];
+		if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+			continue;
+		sym->address = (unsigned long)address;
+		if (!strcmp(sym->name, "__bpf_call_base")) {
+			dd->address_call_base = sym->address;
+			/* sysctl kernel.kptr_restrict was set */
+			if (!sym->address)
+				goto out;
+		}
+		if (sym->address)
+			dd->sym_count++;
+	}
+
+	fclose(fp);
+
+	qsort(dd->sym_mapping, dd->sym_count,
+	      sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+	free(dd->sym_mapping);
+}
+
+static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+					     unsigned long key)
+{
+	struct kernel_sym sym = {
+		.address = key,
+	};
+
+	return dd->sym_mapping ?
+	       bsearch(&sym, dd->sym_mapping, dd->sym_count,
+		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vprintf(fmt, args);
+	va_end(args);
+}
+
+static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	unsigned int l = strlen(fmt);
+	char chomped_fmt[l];
+	va_list args;
+
+	va_start(args, fmt);
+	if (l > 0) {
+		strncpy(chomped_fmt, fmt, l - 1);
+		chomped_fmt[l - 1] = '\0';
+	}
+	jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+	va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+				    struct kernel_sym *sym,
+				    unsigned long address,
+				    const struct bpf_insn *insn)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#%s", insn->off, sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%+d#0x%lx", insn->off, address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+				     struct kernel_sym *sym,
+				     unsigned long address)
+{
+	if (sym)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "%s", sym->name);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%lx", address);
+	return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+			      const struct bpf_insn *insn)
+{
+	struct dump_data *dd = private_data;
+	unsigned long address = dd->address_call_base + insn->imm;
+	struct kernel_sym *sym;
+
+	sym = kernel_syms_search(dd, address);
+	if (insn->src_reg == BPF_PSEUDO_CALL)
+		return print_call_pcrel(dd, sym, address, insn);
+	else
+		return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+			     const struct bpf_insn *insn,
+			     __u64 full_imm)
+{
+	struct dump_data *dd = private_data;
+
+	if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "map[id:%u]", insn->imm);
+	else
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "0x%llx", (unsigned long long)full_imm);
+	return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+		      bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_json,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn = buf;
+	bool double_insn = false;
+	unsigned int i;
+
+	jsonw_start_array(json_wtr);
+	for (i = 0; i < len / sizeof(*insn); i++) {
+		if (double_insn) {
+			double_insn = false;
+			continue;
+		}
+		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "disasm");
+		print_bpf_insn(&cbs, NULL, insn + i, true);
+
+		if (opcodes) {
+			jsonw_name(json_wtr, "opcodes");
+			jsonw_start_object(json_wtr);
+
+			jsonw_name(json_wtr, "code");
+			jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+			jsonw_name(json_wtr, "src_reg");
+			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+			jsonw_name(json_wtr, "dst_reg");
+			jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+			jsonw_name(json_wtr, "off");
+			print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+			jsonw_name(json_wtr, "imm");
+			if (double_insn && i < len - 1)
+				print_hex_data_json((uint8_t *)(&insn[i].imm),
+						    12);
+			else
+				print_hex_data_json((uint8_t *)(&insn[i].imm),
+						    4);
+			jsonw_end_object(json_wtr);
+		}
+		jsonw_end_object(json_wtr);
+	}
+	jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+		       bool opcodes)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn = buf;
+	bool double_insn = false;
+	unsigned int i;
+
+	for (i = 0; i < len / sizeof(*insn); i++) {
+		if (double_insn) {
+			double_insn = false;
+			continue;
+		}
+
+		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+		printf("% 4d: ", i);
+		print_bpf_insn(&cbs, NULL, insn + i, true);
+
+		if (opcodes) {
+			printf("       ");
+			fprint_hex(stdout, insn + i, 8, " ");
+			if (double_insn && i < len - 1) {
+				printf(" ");
+				fprint_hex(stdout, insn + i + 1, 8, " ");
+			}
+			printf("\n");
+		}
+	}
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
new file mode 100644
index 000000000000..208285f9ab70
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME	256
+
+struct kernel_sym {
+	unsigned long address;
+	char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+	unsigned long address_call_base;
+	struct kernel_sym *sym_mapping;
+	__u32 sym_count;
+	char scratch_buff[SYM_MAX_NAME];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+		      bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+		       bool opcodes);
+
+#endif

From 80331752e990e3f13df1c1d45496424d8eccc29b Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:18 -0800
Subject: [PATCH 0572/1678] tools: bpftool: detect sub-programs from the eBPF
 sequence

This patch detect all sub-programs from the eBPF sequence and keep the
information in the new CFG data structure.

The detection algorithm is basically the same as the one in verifier except
we need to use insn->off instead of insn->imm to get the pc-relative call
offset. Because verifier has modified insn->off/insn->imm during finishing
the verification.

Also, we don't need to do some sanity checks as verifier has done them.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/cfg.c | 147 ++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/cfg.h |  43 ++++++++++++
 2 files changed, 190 insertions(+)
 create mode 100644 tools/bpf/bpftool/cfg.c
 create mode 100644 tools/bpf/bpftool/cfg.h

diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
new file mode 100644
index 000000000000..8330dedb3576
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+
+struct cfg {
+	struct list_head funcs;
+	int func_num;
+};
+
+struct func_node {
+	struct list_head l;
+	struct bpf_insn *start;
+	struct bpf_insn *end;
+	int idx;
+};
+
+#define func_prev(func)		list_prev_entry(func, l)
+#define func_next(func)		list_next_entry(func, l)
+#define cfg_first_func(cfg)	\
+	list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg)	\
+	list_last_entry(&cfg->funcs, struct func_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+	struct func_node *new_func, *func;
+
+	list_for_each_entry(func, &cfg->funcs, l) {
+		if (func->start == insn)
+			return func;
+		else if (func->start > insn)
+			break;
+	}
+
+	func = func_prev(func);
+	new_func = calloc(1, sizeof(*new_func));
+	if (!new_func) {
+		p_err("OOM when allocating FUNC node");
+		return NULL;
+	}
+	new_func->start = insn;
+	new_func->idx = cfg->func_num;
+	list_add(&new_func->l, &func->l);
+	cfg->func_num++;
+
+	return new_func;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+				struct bpf_insn *end)
+{
+	struct func_node *func, *last_func;
+
+	func = cfg_append_func(cfg, cur);
+	if (!func)
+		return true;
+
+	for (; cur < end; cur++) {
+		if (cur->code != (BPF_JMP | BPF_CALL))
+			continue;
+		if (cur->src_reg != BPF_PSEUDO_CALL)
+			continue;
+		func = cfg_append_func(cfg, cur + cur->off + 1);
+		if (!func)
+			return true;
+	}
+
+	last_func = cfg_last_func(cfg);
+	last_func->end = end - 1;
+	func = cfg_first_func(cfg);
+	list_for_each_entry_from(func, &last_func->l, l) {
+		func->end = func_next(func)->start - 1;
+	}
+
+	return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+	int cnt = len / sizeof(*insn);
+
+	INIT_LIST_HEAD(&cfg->funcs);
+
+	return cfg_partition_funcs(cfg, insn, insn + cnt);
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+	struct func_node *func, *func2;
+
+	list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+		list_del(&func->l);
+		free(func);
+	}
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+	struct bpf_insn *insn = buf;
+	struct cfg cfg;
+
+	memset(&cfg, 0, sizeof(cfg));
+	if (cfg_build(&cfg, insn, len))
+		return;
+
+	cfg_destroy(&cfg);
+}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
new file mode 100644
index 000000000000..2cc9bd990b13
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */

From 0824611f9b38d556327916ed0702a7323a88c58b Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:19 -0800
Subject: [PATCH 0573/1678] tools: bpftool: partition basic-block for each
 function in the CFG

This patch partition basic-block for each function in the CFG. The
algorithm is simple, we identify basic-block head in a first traversal,
then second traversal to identify the tail.

We could build extended basic-block (EBB) in next steps. EBB could make the
graph more readable when the eBPF sequence is big.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/cfg.c | 118 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 8330dedb3576..152df904d421 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -49,17 +49,32 @@ struct cfg {
 
 struct func_node {
 	struct list_head l;
+	struct list_head bbs;
 	struct bpf_insn *start;
 	struct bpf_insn *end;
 	int idx;
+	int bb_num;
+};
+
+struct bb_node {
+	struct list_head l;
+	struct bpf_insn *head;
+	struct bpf_insn *tail;
+	int idx;
 };
 
 #define func_prev(func)		list_prev_entry(func, l)
 #define func_next(func)		list_next_entry(func, l)
+#define bb_prev(bb)		list_prev_entry(bb, l)
+#define bb_next(bb)		list_next_entry(bb, l)
 #define cfg_first_func(cfg)	\
 	list_first_entry(&cfg->funcs, struct func_node, l)
 #define cfg_last_func(cfg)	\
 	list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func)	\
+	list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func)	\
+	list_last_entry(&func->bbs, struct bb_node, l)
 
 static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
 {
@@ -86,6 +101,30 @@ static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
 	return new_func;
 }
 
+static struct bb_node *func_append_bb(struct func_node *func,
+				      struct bpf_insn *insn)
+{
+	struct bb_node *new_bb, *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		if (bb->head == insn)
+			return bb;
+		else if (bb->head > insn)
+			break;
+	}
+
+	bb = bb_prev(bb);
+	new_bb = calloc(1, sizeof(*new_bb));
+	if (!new_bb) {
+		p_err("OOM when allocating BB node");
+		return NULL;
+	}
+	new_bb->head = insn;
+	list_add(&new_bb->l, &bb->l);
+
+	return new_bb;
+}
+
 static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
 				struct bpf_insn *end)
 {
@@ -115,13 +154,83 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
 	return false;
 }
 
+static bool func_partition_bb_head(struct func_node *func)
+{
+	struct bpf_insn *cur, *end;
+	struct bb_node *bb;
+
+	cur = func->start;
+	end = func->end;
+	INIT_LIST_HEAD(&func->bbs);
+	bb = func_append_bb(func, cur);
+	if (!bb)
+		return true;
+
+	for (; cur <= end; cur++) {
+		if (BPF_CLASS(cur->code) == BPF_JMP) {
+			u8 opcode = BPF_OP(cur->code);
+
+			if (opcode == BPF_EXIT || opcode == BPF_CALL)
+				continue;
+
+			bb = func_append_bb(func, cur + cur->off + 1);
+			if (!bb)
+				return true;
+
+			if (opcode != BPF_JA) {
+				bb = func_append_bb(func, cur + 1);
+				if (!bb)
+					return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+	struct bb_node *bb, *last;
+	unsigned int bb_idx = 0;
+
+	last = func_last_bb(func);
+	last->tail = func->end;
+	bb = func_first_bb(func);
+	list_for_each_entry_from(bb, &last->l, l) {
+		bb->tail = bb_next(bb)->head - 1;
+		bb->idx = bb_idx++;
+	}
+
+	last->idx = bb_idx++;
+	func->bb_num = bb_idx;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+	if (func_partition_bb_head(func))
+		return true;
+
+	func_partition_bb_tail(func);
+
+	return false;
+}
+
 static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
 {
 	int cnt = len / sizeof(*insn);
+	struct func_node *func;
 
 	INIT_LIST_HEAD(&cfg->funcs);
 
-	return cfg_partition_funcs(cfg, insn, insn + cnt);
+	if (cfg_partition_funcs(cfg, insn, insn + cnt))
+		return true;
+
+	list_for_each_entry(func, &cfg->funcs, l) {
+		if (func_partition_bb(func))
+			return true;
+	}
+
+	return false;
 }
 
 static void cfg_destroy(struct cfg *cfg)
@@ -129,6 +238,13 @@ static void cfg_destroy(struct cfg *cfg)
 	struct func_node *func, *func2;
 
 	list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+		struct bb_node *bb, *bb2;
+
+		list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+			list_del(&bb->l);
+			free(bb);
+		}
+
 		list_del(&func->l);
 		free(func);
 	}

From 6e9eb7e3f8705e5e3f1a6a2b47450ec606ab012b Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:20 -0800
Subject: [PATCH 0574/1678] tools: bpftool: add out edges for each basic-block

This patch adds out edges for each basic-block. We will need these out
edges to finish the .dot graph drawing.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/cfg.c | 162 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 160 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 152df904d421..c57f88cf2834 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -58,15 +58,32 @@ struct func_node {
 
 struct bb_node {
 	struct list_head l;
+	struct list_head e_prevs;
+	struct list_head e_succs;
 	struct bpf_insn *head;
 	struct bpf_insn *tail;
 	int idx;
 };
 
+#define EDGE_FLAG_EMPTY		0x0
+#define EDGE_FLAG_FALLTHROUGH	0x1
+#define EDGE_FLAG_JUMP		0x2
+struct edge_node {
+	struct list_head l;
+	struct bb_node *src;
+	struct bb_node *dst;
+	int flags;
+};
+
+#define ENTRY_BLOCK_INDEX	0
+#define EXIT_BLOCK_INDEX	1
+#define NUM_FIXED_BLOCKS	2
 #define func_prev(func)		list_prev_entry(func, l)
 #define func_next(func)		list_next_entry(func, l)
 #define bb_prev(bb)		list_prev_entry(bb, l)
 #define bb_next(bb)		list_next_entry(bb, l)
+#define entry_bb(func)		func_first_bb(func)
+#define exit_bb(func)		func_last_bb(func)
 #define cfg_first_func(cfg)	\
 	list_first_entry(&cfg->funcs, struct func_node, l)
 #define cfg_last_func(cfg)	\
@@ -120,11 +137,30 @@ static struct bb_node *func_append_bb(struct func_node *func,
 		return NULL;
 	}
 	new_bb->head = insn;
+	INIT_LIST_HEAD(&new_bb->e_prevs);
+	INIT_LIST_HEAD(&new_bb->e_succs);
 	list_add(&new_bb->l, &bb->l);
 
 	return new_bb;
 }
 
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+	struct bb_node *bb;
+
+	bb = calloc(1, sizeof(*bb));
+	if (!bb) {
+		p_err("OOM when allocating BB node");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&bb->e_prevs);
+	INIT_LIST_HEAD(&bb->e_succs);
+	list_add(&bb->l, after);
+
+	return bb;
+}
+
 static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
 				struct bpf_insn *end)
 {
@@ -190,8 +226,8 @@ static bool func_partition_bb_head(struct func_node *func)
 
 static void func_partition_bb_tail(struct func_node *func)
 {
+	unsigned int bb_idx = NUM_FIXED_BLOCKS;
 	struct bb_node *bb, *last;
-	unsigned int bb_idx = 0;
 
 	last = func_last_bb(func);
 	last->tail = func->end;
@@ -205,6 +241,23 @@ static void func_partition_bb_tail(struct func_node *func)
 	func->bb_num = bb_idx;
 }
 
+static bool func_add_special_bb(struct func_node *func)
+{
+	struct bb_node *bb;
+
+	bb = func_insert_dummy_bb(&func->bbs);
+	if (!bb)
+		return true;
+	bb->idx = ENTRY_BLOCK_INDEX;
+
+	bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+	if (!bb)
+		return true;
+	bb->idx = EXIT_BLOCK_INDEX;
+
+	return false;
+}
+
 static bool func_partition_bb(struct func_node *func)
 {
 	if (func_partition_bb_head(func))
@@ -215,6 +268,96 @@ static bool func_partition_bb(struct func_node *func)
 	return false;
 }
 
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+						struct bpf_insn *insn)
+{
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		if (bb->head == insn)
+			return bb;
+	}
+
+	return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+				  int flags)
+{
+	struct edge_node *e;
+
+	e = calloc(1, sizeof(*e));
+	if (!e) {
+		p_err("OOM when allocating edge node");
+		return NULL;
+	}
+
+	if (src)
+		e->src = src;
+	if (dst)
+		e->dst = dst;
+
+	e->flags |= flags;
+
+	return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+	struct bpf_insn *insn;
+	struct edge_node *e;
+	struct bb_node *bb;
+
+	bb = entry_bb(func);
+	e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+	if (!e)
+		return true;
+	list_add_tail(&e->l, &bb->e_succs);
+
+	bb = exit_bb(func);
+	e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+	if (!e)
+		return true;
+	list_add_tail(&e->l, &bb->e_prevs);
+
+	bb = entry_bb(func);
+	bb = bb_next(bb);
+	list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+		e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+		if (!e)
+			return true;
+		e->src = bb;
+
+		insn = bb->tail;
+		if (BPF_CLASS(insn->code) != BPF_JMP ||
+		    BPF_OP(insn->code) == BPF_EXIT) {
+			e->dst = bb_next(bb);
+			e->flags |= EDGE_FLAG_FALLTHROUGH;
+			list_add_tail(&e->l, &bb->e_succs);
+			continue;
+		} else if (BPF_OP(insn->code) == BPF_JA) {
+			e->dst = func_search_bb_with_head(func,
+							  insn + insn->off + 1);
+			e->flags |= EDGE_FLAG_JUMP;
+			list_add_tail(&e->l, &bb->e_succs);
+			continue;
+		}
+
+		e->dst = bb_next(bb);
+		e->flags |= EDGE_FLAG_FALLTHROUGH;
+		list_add_tail(&e->l, &bb->e_succs);
+
+		e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+		if (!e)
+			return true;
+		e->src = bb;
+		e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+		list_add_tail(&e->l, &bb->e_succs);
+	}
+
+	return false;
+}
+
 static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
 {
 	int cnt = len / sizeof(*insn);
@@ -226,7 +369,10 @@ static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
 		return true;
 
 	list_for_each_entry(func, &cfg->funcs, l) {
-		if (func_partition_bb(func))
+		if (func_partition_bb(func) || func_add_special_bb(func))
+			return true;
+
+		if (func_add_bb_edges(func))
 			return true;
 	}
 
@@ -241,6 +387,18 @@ static void cfg_destroy(struct cfg *cfg)
 		struct bb_node *bb, *bb2;
 
 		list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+			struct edge_node *e, *e2;
+
+			list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+				list_del(&e->l);
+				free(e);
+			}
+
+			list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+				list_del(&e->l);
+				free(e);
+			}
+
 			list_del(&bb->l);
 			free(bb);
 		}

From efcef17a6d6575dacca22bce69e139354c5a2170 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:21 -0800
Subject: [PATCH 0575/1678] tools: bpftool: generate .dot graph from CFG
 information

This patch let bpftool print .dot graph file into stdout.

This graph is generated by the following steps:

  - iterate through the function list.
  - generate basic-block(BB) definition for each BB in the function.
  - draw out edges to connect BBs.

This patch is the initial support, the layout and decoration of the .dot
graph could be improved.

Also, it will be useful if we could visualize some performance data from
static analysis.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/cfg.c           | 93 +++++++++++++++++++++++++++++++
 tools/bpf/bpftool/xlated_dumper.c | 52 +++++++++++++++++
 tools/bpf/bpftool/xlated_dumper.h |  2 +
 3 files changed, 147 insertions(+)

diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index c57f88cf2834..f30b3a4a840b 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -41,6 +41,7 @@
 
 #include "cfg.h"
 #include "main.h"
+#include "xlated_dumper.h"
 
 struct cfg {
 	struct list_head funcs;
@@ -408,6 +409,96 @@ static void cfg_destroy(struct cfg *cfg)
 	}
 }
 
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+	const char *shape;
+
+	if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+		shape = "Mdiamond";
+	else
+		shape = "record";
+
+	printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+	       func->idx, bb->idx, shape);
+
+	if (bb->idx == ENTRY_BLOCK_INDEX) {
+		printf("ENTRY");
+	} else if (bb->idx == EXIT_BLOCK_INDEX) {
+		printf("EXIT");
+	} else {
+		unsigned int start_idx;
+		struct dump_data dd = {};
+
+		printf("{");
+		kernel_syms_load(&dd);
+		start_idx = bb->head - func->start;
+		dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+		kernel_syms_destroy(&dd);
+		printf("}");
+	}
+
+	printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+	const char *style = "\"solid,bold\"";
+	const char *color = "black";
+	int func_idx = func->idx;
+	struct edge_node *e;
+	int weight = 10;
+
+	if (list_empty(&bb->e_succs))
+		return;
+
+	list_for_each_entry(e, &bb->e_succs, l) {
+		printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+		       func_idx, e->src->idx, func_idx, e->dst->idx,
+		       style, color, weight);
+		printf("];\n");
+	}
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		draw_bb_node(func, bb);
+	}
+}
+
+static void func_output_edges(struct func_node *func)
+{
+	int func_idx = func->idx;
+	struct bb_node *bb;
+
+	list_for_each_entry(bb, &func->bbs, l) {
+		draw_bb_succ_edges(func, bb);
+	}
+
+	/* Add an invisible edge from ENTRY to EXIT, this is to
+	 * improve the graph layout.
+	 */
+	printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+	       func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+	struct func_node *func;
+
+	printf("digraph \"DOT graph for eBPF program\" {\n");
+	list_for_each_entry(func, &cfg->funcs, l) {
+		printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+		       func->idx, func->idx);
+		func_output_bb_def(func);
+		func_output_edges(func);
+		printf("}\n");
+	}
+	printf("}\n");
+}
+
 void dump_xlated_cfg(void *buf, unsigned int len)
 {
 	struct bpf_insn *insn = buf;
@@ -417,5 +508,7 @@ void dump_xlated_cfg(void *buf, unsigned int len)
 	if (cfg_build(&cfg, insn, len))
 		return;
 
+	cfg_dump(&cfg);
+
 	cfg_destroy(&cfg);
 }
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index dfcdf794c9d1..20da835e9e38 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -123,6 +123,37 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
 	va_end(args);
 }
 
+static void
+print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+	char buf[64], *p;
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	p = buf;
+	while (*p != '\0') {
+		if (*p == '\n') {
+			memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+			/* Align each instruction dump row left. */
+			*p++ = '\\';
+			*p++ = 'l';
+			/* Output multiline concatenation. */
+			*p++ = '\\';
+		} else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+			memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+			/* Escape special character. */
+			*p++ = '\\';
+		}
+
+		p++;
+	}
+
+	printf("%s", buf);
+}
+
 static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
 {
 	unsigned int l = strlen(fmt);
@@ -284,3 +315,24 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
 		}
 	}
 }
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+			   unsigned int start_idx)
+{
+	const struct bpf_insn_cbs cbs = {
+		.cb_print	= print_insn_for_graph,
+		.cb_call	= print_call,
+		.cb_imm		= print_imm,
+		.private_data	= dd,
+	};
+	struct bpf_insn *insn_start = buf_start;
+	struct bpf_insn *insn_end = buf_end;
+	struct bpf_insn *cur = insn_start;
+
+	for (; cur <= insn_end; cur++) {
+		printf("% 4d: ", (int)(cur - insn_start + start_idx));
+		print_bpf_insn(&cbs, NULL, cur, true);
+		if (cur != insn_end)
+			printf(" | ");
+	}
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 208285f9ab70..51c935d38ae2 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -58,5 +58,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
 		      bool opcodes);
 void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
 		       bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+			   unsigned int start_index);
 
 #endif

From b6c1cedb4935a99d315827b990b23230b0eb13f5 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Thu, 1 Mar 2018 18:01:22 -0800
Subject: [PATCH 0576/1678] tools: bpftool: new command-line option and
 documentation for 'visual'

This patch adds new command-line option for visualizing the xlated eBPF
sequence.

Documentations are updated accordingly.

Usage:

  bpftool prog dump xlated id 2 visual

Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/bpftool/Documentation/bpftool-prog.rst | 18 ++++++++++++------
 tools/bpf/bpftool/prog.c                       | 12 +++++++++++-
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index e4ceee7f2dff..67ca6c69376c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -21,7 +21,7 @@ MAP COMMANDS
 =============
 
 |	**bpftool** **prog { show | list }** [*PROG*]
-|	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
+|	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
 |	**bpftool** **prog load** *OBJ* *FILE*
@@ -39,12 +39,18 @@ DESCRIPTION
 		  Output will start with program ID followed by program type and
 		  zero or more named attributes (depending on kernel version).
 
-	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
-		  Dump eBPF instructions of the program from the kernel.
-		  If *FILE* is specified image will be written to a file,
-		  otherwise it will be disassembled and printed to stdout.
+	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+		  Dump eBPF instructions of the program from the kernel. By
+		  default, eBPF will be disassembled and printed to standard
+		  output in human-readable format. In this case, **opcodes**
+		  controls if raw opcodes should be printed as well.
 
-		  **opcodes** controls if raw opcodes will be printed.
+		  If **file** is specified, the binary image will instead be
+		  written to *FILE*.
+
+		  If **visual** is specified, control flow graph (CFG) will be
+		  built instead, and eBPF instructions will be presented with
+		  CFG in DOT format, on standard output.
 
 	**bpftool prog dump jited**  *PROG* [{ **file** *FILE* | **opcodes** }]
 		  Dump jited image (host machine code) of the program.
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c5afee9838e6..f7a810897eac 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -47,6 +47,7 @@
 #include <bpf.h>
 #include <libbpf.h>
 
+#include "cfg.h"
 #include "main.h"
 #include "xlated_dumper.h"
 
@@ -415,6 +416,7 @@ static int do_dump(int argc, char **argv)
 	unsigned int buf_size;
 	char *filepath = NULL;
 	bool opcodes = false;
+	bool visual = false;
 	unsigned char *buf;
 	__u32 *member_len;
 	__u64 *member_ptr;
@@ -453,6 +455,9 @@ static int do_dump(int argc, char **argv)
 	} else if (is_prefix(*argv, "opcodes")) {
 		opcodes = true;
 		NEXT_ARG();
+	} else if (is_prefix(*argv, "visual")) {
+		visual = true;
+		NEXT_ARG();
 	}
 
 	if (argc) {
@@ -536,6 +541,11 @@ static int do_dump(int argc, char **argv)
 		}
 
 		disasm_print_insn(buf, *member_len, opcodes, name);
+	} else if (visual) {
+		if (json_output)
+			jsonw_null(json_wtr);
+		else
+			dump_xlated_cfg(buf, *member_len);
 	} else {
 		kernel_syms_load(&dd);
 		if (json_output)
@@ -596,7 +606,7 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %s %s { show | list } [PROG]\n"
-		"       %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+		"       %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
 		"       %s %s dump jited  PROG [{ file FILE | opcodes }]\n"
 		"       %s %s pin   PROG FILE\n"
 		"       %s %s load  OBJ  FILE\n"

From d96fc832bcb6269d96e33d506f33033d7ed08598 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 1 Mar 2018 18:01:23 -0800
Subject: [PATCH 0577/1678] tools: bpftool: add bash completion for CFG dump

Add bash completion for the "visual" keyword used for dumping the CFG of
eBPF programs with bpftool. Make sure we only complete with this keyword
when we dump "xlated" (and not "jited") instructions.

Acked-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/bash-completion/bpftool | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 08719c54a614..490811b45fa7 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -147,7 +147,7 @@ _bpftool()
 
     # Deal with simplest keywords
     case $prev in
-        help|key|opcodes)
+        help|key|opcodes|visual)
             return 0
             ;;
         tag)
@@ -223,11 +223,16 @@ _bpftool()
                             return 0
                             ;;
                     *)
-                            _bpftool_once_attr 'file'
+                        _bpftool_once_attr 'file'
+                        if _bpftool_search_list 'xlated'; then
+                            COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+                                "$cur" ) )
+                        else
                             COMPREPLY+=( $( compgen -W 'opcodes' -- \
                                 "$cur" ) )
-                            return 0
-                            ;;
+                        fi
+                        return 0
+                        ;;
                     esac
                     ;;
                 pin)

From dcb8c9b4373a583451b1b8a3e916d33de273633d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 28 Feb 2018 14:40:46 -0800
Subject: [PATCH 0578/1678] tcp_bbr: better deal with suboptimal GSO (II)

This is second part of dealing with suboptimal device gso parameters.
In first patch (350c9f484bde "tcp_bbr: better deal with suboptimal GSO")
we dealt with devices having low gso_max_segs

Some devices lower gso_max_size from 64KB to 16 KB (r8152 is an example)

In order to probe an optimal cwnd, we want BBR being not sensitive
to whatever GSO constraint a device can have.

This patch removes tso_segs_goal() CC callback in favor of
min_tso_segs() for CC wanting to override sysctl_tcp_min_tso_segs

Next patch will remove bbr->tso_segs_goal since it does not have
to be persistent.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  6 ++----
 net/ipv4/tcp_bbr.c    | 23 +++++++++++++----------
 net/ipv4/tcp_output.c | 15 ++++++++-------
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92b06c6e7732..9c9b3768b350 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
 #endif
 /* tcp_output.c */
 
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
@@ -981,8 +979,8 @@ struct tcp_congestion_ops {
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
-	/* suggest number of segments for each skb to transmit (optional) */
-	u32 (*tso_segs_goal)(struct sock *sk);
+	/* override sysctl_tcp_min_tso_segs */
+	u32 (*min_tso_segs)(struct sock *sk);
 	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
 	u32 (*sndbuf_expand)(struct sock *sk);
 	/* call when packets are delivered to update cwnd and pacing rate,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index a471f696e13c..afc0567b8a98 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -261,23 +261,26 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 		sk->sk_pacing_rate = rate;
 }
 
-/* Return count of segments we want in the skbs we send, or 0 for default. */
-static u32 bbr_tso_segs_goal(struct sock *sk)
+/* override sysctl_tcp_min_tso_segs */
+static u32 bbr_min_tso_segs(struct sock *sk)
 {
-	struct bbr *bbr = inet_csk_ca(sk);
-
-	return bbr->tso_segs_goal;
+	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
 static void bbr_set_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
-	u32 min_segs;
+	u32 segs, bytes;
 
-	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
-	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
-				 0x7FU);
+	/* Sort of tcp_tso_autosize() but ignoring
+	 * driver provided sk_gso_max_size.
+	 */
+	bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
+
+	bbr->tso_segs_goal = min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -936,7 +939,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.undo_cwnd	= bbr_undo_cwnd,
 	.cwnd_event	= bbr_cwnd_event,
 	.ssthresh	= bbr_ssthresh,
-	.tso_segs_goal	= bbr_tso_segs_goal,
+	.min_tso_segs	= bbr_min_tso_segs,
 	.get_info	= bbr_get_info,
 	.set_state	= bbr_set_state,
 };
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 49d043de3476..383cac0ff0ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
-		     int min_tso_segs)
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+			    int min_tso_segs)
 {
 	u32 bytes, segs;
 
@@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 
 	return segs;
 }
-EXPORT_SYMBOL(tcp_tso_autosize);
 
 /* Return the number of segments we want in the skb we are transmitting.
  * See if congestion control module wants to decide; otherwise, autosize.
@@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize);
 static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
 {
 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+	u32 min_tso, tso_segs;
 
-	if (!tso_segs)
-		tso_segs = tcp_tso_autosize(sk, mss_now,
-				sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+	min_tso = ca_ops->min_tso_segs ?
+			ca_ops->min_tso_segs(sk) :
+			sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+
+	tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
 	return min_t(u32, tso_segs, sk->sk_gso_max_segs);
 }
 

From 71abf467bb630c7e2f4ef33267d98fb7d10d3ce9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 28 Feb 2018 14:40:47 -0800
Subject: [PATCH 0579/1678] tcp_bbr: remove bbr->tso_segs_goal

Its value is computed then immediately used,
there is no need to store it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index afc0567b8a98..c92014cb1e16 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -97,10 +97,9 @@ struct bbr {
 		packet_conservation:1,  /* use packet conservation? */
 		restore_cwnd:1,	     /* decided to revert cwnd to old value */
 		round_start:1,	     /* start of packet-timed tx->ack round? */
-		tso_segs_goal:7,     /* segments we want in each skb we send */
 		idle_restart:1,	     /* restarting after idle? */
 		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
-		unused:5,
+		unused:12,
 		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
 		lt_rtt_cnt:7,	     /* round trips in long-term interval */
 		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
@@ -267,10 +266,9 @@ static u32 bbr_min_tso_segs(struct sock *sk)
 	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
 
-static void bbr_set_tso_segs_goal(struct sock *sk)
+static u32 bbr_tso_segs_goal(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct bbr *bbr = inet_csk_ca(sk);
 	u32 segs, bytes;
 
 	/* Sort of tcp_tso_autosize() but ignoring
@@ -280,7 +278,7 @@ static void bbr_set_tso_segs_goal(struct sock *sk)
 		      GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
 	segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
 
-	bbr->tso_segs_goal = min(segs, 0x7FU);
+	return min(segs, 0x7FU);
 }
 
 /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
@@ -351,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
 
 	/* Allow enough full-sized skbs in flight to utilize end systems. */
-	cwnd += 3 * bbr->tso_segs_goal;
+	cwnd += 3 * bbr_tso_segs_goal(sk);
 
 	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
 	cwnd = (cwnd + 1) & ~1U;
@@ -827,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 
 	bw = bbr_bw(sk);
 	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
-	bbr_set_tso_segs_goal(sk);
 	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
@@ -837,7 +834,6 @@ static void bbr_init(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->prior_cwnd = 0;
-	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
 	bbr->rtt_cnt = 0;
 	bbr->next_rtt_delivered = 0;
 	bbr->prev_ca_state = TCP_CA_Open;

From 06cc7fe7c2951e64cd5e73ea447791c7e6bc3852 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 1 Mar 2018 20:20:08 -0800
Subject: [PATCH 0580/1678] tools: bpftool: support comments in batch files

Replace '#' by '\0' in commands read from batch files in order to avoid
processing the remaining part of the line, thus allowing users to use
comments in the files.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 185acfa229b5..79587e6decae 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -176,6 +176,7 @@ static int do_batch(int argc, char **argv)
 	char buf[65536];
 	int n_argc;
 	FILE *fp;
+	char *cp;
 	int err;
 	int i;
 
@@ -200,6 +201,10 @@ static int do_batch(int argc, char **argv)
 	if (json_output)
 		jsonw_start_array(json_wtr);
 	while (fgets(buf, sizeof(buf), fp)) {
+		cp = strchr(buf, '#');
+		if (cp)
+			*cp = '\0';
+
 		if (strlen(buf) == sizeof(buf) - 1) {
 			errno = E2BIG;
 			break;

From 65d538dde625d93359ca4e33d2311f8598f423a6 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 1 Mar 2018 20:20:09 -0800
Subject: [PATCH 0581/1678] tools: bpftool: support continuation lines in batch
 files

Add support for continuation lines, such as in the following example:

    prog show
    prog dump xlated \
        id 1337 opcodes

This patch is based after the code for support for continuation lines
from file lib/utils.c from package iproute2.

"Lines" in error messages are renamed as "commands", as we count the
number of commands (but we ignore empty lines, comments, and do not add
continuation lines to the count).

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 79587e6decae..cdee4c3d30c3 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -46,6 +46,9 @@
 
 #include "main.h"
 
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
 const char *bin_name;
 static int last_argc;
 static char **last_argv;
@@ -171,9 +174,9 @@ static const struct cmd cmds[] = {
 
 static int do_batch(int argc, char **argv)
 {
+	char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+	char *n_argv[BATCH_ARG_NB_MAX];
 	unsigned int lines = 0;
-	char *n_argv[4096];
-	char buf[65536];
 	int n_argc;
 	FILE *fp;
 	char *cp;
@@ -210,13 +213,38 @@ static int do_batch(int argc, char **argv)
 			break;
 		}
 
+		/* Append continuation lines if any (coming after a line ending
+		 * with '\' in the batch file).
+		 */
+		while ((cp = strstr(buf, "\\\n")) != NULL) {
+			if (!fgets(contline, sizeof(contline), fp) ||
+			    strlen(contline) == 0) {
+				p_err("missing continuation line on command %d",
+				      lines);
+				err = -1;
+				goto err_close;
+			}
+
+			cp = strchr(contline, '#');
+			if (cp)
+				*cp = '\0';
+
+			if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+				p_err("command %d is too long", lines);
+				err = -1;
+				goto err_close;
+			}
+			buf[strlen(buf) - 2] = '\0';
+			strcat(buf, contline);
+		}
+
 		n_argc = 0;
 		n_argv[n_argc] = strtok(buf, " \t\n");
 
 		while (n_argv[n_argc]) {
 			n_argc++;
 			if (n_argc == ARRAY_SIZE(n_argv)) {
-				p_err("line %d has too many arguments, skip",
+				p_err("command %d has too many arguments, skip",
 				      lines);
 				n_argc = 0;
 				break;
@@ -252,7 +280,7 @@ static int do_batch(int argc, char **argv)
 		p_err("reading batch file failed: %s", strerror(errno));
 		err = -1;
 	} else {
-		p_info("processed %d lines", lines);
+		p_info("processed %d commands", lines);
 		err = 0;
 	}
 err_close:

From 416656bbaa57a5be75514498491b7e24c58537c1 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 1 Mar 2018 20:20:10 -0800
Subject: [PATCH 0582/1678] tools: bpftool: read from stdin when batch file
 name is "-"

Make bpftool read its command list from standard input when the name if
the input file is a single dash.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index cdee4c3d30c3..1da54a9b5ea3 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -195,7 +195,10 @@ static int do_batch(int argc, char **argv)
 	}
 	NEXT_ARG();
 
-	fp = fopen(*argv, "r");
+	if (!strcmp(*argv, "-"))
+		fp = stdin;
+	else
+		fp = fopen(*argv, "r");
 	if (!fp) {
 		p_err("Can't open file (%s): %s", *argv, strerror(errno));
 		return -1;
@@ -284,7 +287,8 @@ static int do_batch(int argc, char **argv)
 		err = 0;
 	}
 err_close:
-	fclose(fp);
+	if (fp != stdin)
+		fclose(fp);
 
 	if (json_output)
 		jsonw_end_array(json_wtr);

From 668da745af3c29d5742238ef278a1b2055c97e51 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 1 Mar 2018 20:20:11 -0800
Subject: [PATCH 0583/1678] tools: bpftool: add support for quotations in batch
 files

Improve argument parsing from batch input files in order to support
arguments enclosed between single (') or double quotes ("). For example,
this command can now be parsed in batch mode:

    bpftool prog dump xlated id 1337 file "/tmp/my file with spaces"

The function responsible for parsing command arguments is copied from
its counterpart in lib/utils.c in iproute2 package.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/main.c | 65 +++++++++++++++++++++++++++++++---------
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1da54a9b5ea3..1ec852d21d44 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -160,6 +160,54 @@ void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
 	}
 }
 
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+	static const char ws[] = " \t\r\n";
+	char *cp = line;
+	int n_argc = 0;
+
+	while (*cp) {
+		/* Skip leading whitespace. */
+		cp += strspn(cp, ws);
+
+		if (*cp == '\0')
+			break;
+
+		if (n_argc >= (maxargs - 1)) {
+			p_err("too many arguments to command %d", cmd_nb);
+			return -1;
+		}
+
+		/* Word begins with quote. */
+		if (*cp == '\'' || *cp == '"') {
+			char quote = *cp++;
+
+			n_argv[n_argc++] = cp;
+			/* Find ending quote. */
+			cp = strchr(cp, quote);
+			if (!cp) {
+				p_err("unterminated quoted string in command %d",
+				      cmd_nb);
+				return -1;
+			}
+		} else {
+			n_argv[n_argc++] = cp;
+
+			/* Find end of word. */
+			cp += strcspn(cp, ws);
+			if (*cp == '\0')
+				break;
+		}
+
+		/* Separate words. */
+		*cp++ = 0;
+	}
+	n_argv[n_argc] = NULL;
+
+	return n_argc;
+}
+
 static int do_batch(int argc, char **argv);
 
 static const struct cmd cmds[] = {
@@ -241,22 +289,11 @@ static int do_batch(int argc, char **argv)
 			strcat(buf, contline);
 		}
 
-		n_argc = 0;
-		n_argv[n_argc] = strtok(buf, " \t\n");
-
-		while (n_argv[n_argc]) {
-			n_argc++;
-			if (n_argc == ARRAY_SIZE(n_argv)) {
-				p_err("command %d has too many arguments, skip",
-				      lines);
-				n_argc = 0;
-				break;
-			}
-			n_argv[n_argc] = strtok(NULL, " \t\n");
-		}
-
+		n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
 		if (!n_argc)
 			continue;
+		if (n_argc < 0)
+			goto err_close;
 
 		if (json_output) {
 			jsonw_start_object(json_wtr);

From 03695549aa76e877d596df188c266f06257b6a23 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 1 Mar 2018 14:30:09 +0300
Subject: [PATCH 0584/1678] mac80211_hwsim: Make hwsim_netgroup IDA

hwsim_netgroup counter is declarated as int, and it is incremented
every time a new net is created. After sizeof(int) net are created,
it will overflow, and different net namespaces will have the same
identifier. This patch fixes the problem by introducing IDA instead
of int counter. IDA guarantees, all the net namespaces have the uniq
identifier.

Note, that after we do ida_simple_remove() in hwsim_exit_net(),
and we destroy the ID, later there may be executed destroy_radio()
from the workqueue. But destroy_radio() does not use the ID, so it's OK.

Out of bounds of this patch, just as a report to wireless subsystem
maintainer, destroy_radio() increaments hwsim_radios_generation
without hwsim_radio_lock, so this may need one more patch to fix.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 3c64afa161bf..45ba846bc285 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static int hwsim_netgroup;
+static struct ida hwsim_netgroup_ida = IDA_INIT;
 
 struct hwsim_net {
 	int netgroup;
@@ -267,11 +267,13 @@ static inline int hwsim_net_get_netgroup(struct net *net)
 	return hwsim_net->netgroup;
 }
 
-static inline void hwsim_net_set_netgroup(struct net *net)
+static inline int hwsim_net_set_netgroup(struct net *net)
 {
 	struct hwsim_net *hwsim_net = net_generic(net, hwsim_net_id);
 
-	hwsim_net->netgroup = hwsim_netgroup++;
+	hwsim_net->netgroup = ida_simple_get(&hwsim_netgroup_ida,
+					     0, 0, GFP_KERNEL);
+	return hwsim_net->netgroup >= 0 ? 0 : -ENOMEM;
 }
 
 static inline u32 hwsim_net_get_wmediumd(struct net *net)
@@ -3507,9 +3509,7 @@ failure:
 
 static __net_init int hwsim_init_net(struct net *net)
 {
-	hwsim_net_set_netgroup(net);
-
-	return 0;
+	return hwsim_net_set_netgroup(net);
 }
 
 static void __net_exit hwsim_exit_net(struct net *net)
@@ -3532,6 +3532,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
 		queue_work(hwsim_wq, &data->destroy_work);
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
+
+	ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net));
 }
 
 static struct pernet_operations hwsim_net_ops = {

From 2e75bb2f8b8928aa01d91219a90df1e6fbc7cdd4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 1 Mar 2018 14:30:17 +0300
Subject: [PATCH 0585/1678] net: Convert hwsim_net_ops

These pernet_operations allocate and destroy IDA identifier,
and these actions are synchronized by IDA subsystem locks.
Exit method removes mac80211_hwsim_data enteries from the lists,
and this is synchronized by hwsim_radio_lock with the rest
parallel pernet_operations. Also it queues destroy_radio()
work, and these work already may be executed in parallel
with any pernet_operations (as it's a work :). So, we may
mark these pernet_operations as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 45ba846bc285..7b6c3640a94f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3541,6 +3541,7 @@ static struct pernet_operations hwsim_net_ops = {
 	.exit = hwsim_exit_net,
 	.id   = &hwsim_net_id,
 	.size = sizeof(struct hwsim_net),
+	.async = true,
 };
 
 static void hwsim_exit_netlink(void)

From 8fd4bc8a15b218165c45f44eba1b33a3c7181dfb Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 1 Mar 2018 14:40:19 +0000
Subject: [PATCH 0586/1678] ieee802154: remove unused variable 'val'

Variable 'val' is not being initialized and is later being logically
or'd with DAR_PHY_CTRL4_PROMISCUOUS. Considering this variable is never
being read anyway we may as well remove val altogether.

Cleans up error detected by cppcheck:
drivers/net/ieee802154/mcr20a.c:732: (error) Uninitialized variable: val

Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
---
 drivers/net/ieee802154/mcr20a.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index d9eb22a52551..55a22c761808 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -723,13 +723,11 @@ mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
 	struct mcr20a_local *lp = hw->priv;
 	int ret;
 	u8 rx_frame_filter_reg = 0x0;
-	u8 val;
 
 	dev_dbg(printdev(lp), "%s(%d)\n", __func__, on);
 
 	if (on) {
 		/* All frame types accepted*/
-		val |= DAR_PHY_CTRL4_PROMISCUOUS;
 		rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER);
 		rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT |
 				  IAR_RX_FRAME_FLT_NS_FT);

From ad0bff0570727620840c4005992946cce1dff693 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 2 Mar 2018 14:35:49 +0530
Subject: [PATCH 0587/1678] cxgb4: remove dead code when allocating filter

Error code is already returned earlier if filter exists
at specified location. So, remove dead code trying to
free existing filter.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 3177b0c9bd2d..db92f1858060 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		return ret;
 	}
 
-	/* Clear out any old resources being used by the filter before
-	 * we start constructing the new filter.
-	 */
-	if (f->valid)
-		clear_filter(adapter, f);
-
 	if (is_t6(adapter->params.chip) && fs->type &&
 	    ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
 	    IPV6_ADDR_ANY) {

From 3c34cb9defb0d454be20287c1c7c7417ebce7bb3 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 2 Mar 2018 15:57:07 +0530
Subject: [PATCH 0588/1678] cxgb4: Add TP Congestion map entry for single-port

Add TP Congestion Map entry for single-port T6 cards.

Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index bd41f93f73ed..2c889efc78ea 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6091,6 +6091,7 @@ unsigned int t4_get_tp_ch_map(struct adapter *adap, int pidx)
 
 	case CHELSIO_T6:
 		switch (nports) {
+		case 1:
 		case 2: return 1 << pidx;
 		}
 		break;

From b72c8a7e2ca8c4c085c76d17005241eca2edf93a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 2 Mar 2018 13:42:39 +0000
Subject: [PATCH 0589/1678] net/usb/kalmia: use ARRAY_SIZE for various array
 sizing calculations

Use the ARRAY_SIZE macro on a couple of arrays to determine
size of the arrays. Also fix up alignment to clean up a checkpatch
warning. Improvement suggested by Coccinelle.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/kalmia.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index ce0b0b4e3a57..1ec523b0e932 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -114,14 +114,14 @@ kalmia_init_and_get_ethernet_addr(struct usbnet *dev, u8 *ethernet_addr)
 		return -ENOMEM;
 
 	memcpy(usb_buf, init_msg_1, 12);
-	status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_1)
-		/ sizeof(init_msg_1[0]), usb_buf, 24);
+	status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_1),
+					 usb_buf, 24);
 	if (status != 0)
 		return status;
 
 	memcpy(usb_buf, init_msg_2, 12);
-	status = kalmia_send_init_packet(dev, usb_buf, sizeof(init_msg_2)
-		/ sizeof(init_msg_2[0]), usb_buf, 28);
+	status = kalmia_send_init_packet(dev, usb_buf, ARRAY_SIZE(init_msg_2),
+					 usb_buf, 28);
 	if (status != 0)
 		return status;
 

From f84af33138a827dbdf7ee0df2ed82377b43cd1cc Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:10 +0800
Subject: [PATCH 0590/1678] sctp: factor out sctp_sendmsg_to_asoc from
 sctp_sendmsg

This patch is to move the codes for checking and sending on
one asoc after this asoc has been found or created into
sctp_sendmsg_to_asoc.

Note that 'err != -ESRCH' check is for the case that asoc is
freed when waiting for tx buffer in sctp_sendmsg_to_asoc.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 230 ++++++++++++++++++++--------------------------
 1 file changed, 99 insertions(+), 131 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bf271f8c2dc9..183129e2bc68 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1606,6 +1606,100 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
 			     struct sctp_cmsgs *cmsgs);
 
+static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
+				struct msghdr *msg, size_t msg_len,
+				struct sctp_transport *transport,
+				struct sctp_sndrcvinfo *sinfo)
+{
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+	struct sctp_datamsg *datamsg;
+	bool wait_connect = false;
+	struct sctp_chunk *chunk;
+	long timeo;
+	int err;
+
+	if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
+		err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
+		if (err)
+			goto err;
+	}
+
+	if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+		err = -EMSGSIZE;
+		goto err;
+	}
+
+	if (sctp_state(asoc, CLOSED)) {
+		err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
+		if (err)
+			goto err;
+
+		if (sctp_sk(sk)->strm_interleave) {
+			timeo = sock_sndtimeo(sk, 0);
+			err = sctp_wait_for_connect(asoc, &timeo);
+			if (err)
+				goto err;
+		} else {
+			wait_connect = true;
+		}
+
+		pr_debug("%s: we associated primitively\n", __func__);
+	}
+
+	if (asoc->pmtu_pending)
+		sctp_assoc_pending_pmtu(asoc);
+
+	if (sctp_wspace(asoc) < msg_len)
+		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
+	if (!sctp_wspace(asoc)) {
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+		if (err)
+			goto err;
+	}
+
+	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
+	if (IS_ERR(datamsg)) {
+		err = PTR_ERR(datamsg);
+		goto err;
+	}
+
+	asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
+
+	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
+		sctp_chunk_hold(chunk);
+		sctp_set_owner_w(chunk);
+		chunk->transport = transport;
+	}
+
+	err = sctp_primitive_SEND(net, asoc, datamsg);
+	if (err) {
+		sctp_datamsg_free(datamsg);
+		goto err;
+	}
+
+	pr_debug("%s: we sent primitively\n", __func__);
+
+	sctp_datamsg_put(datamsg);
+
+	if (unlikely(wait_connect)) {
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+		sctp_wait_for_connect(asoc, &timeo);
+	}
+
+	err = msg_len;
+
+err:
+	return err;
+}
+
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
 	struct net *net = sock_net(sk);
@@ -1622,11 +1716,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	sctp_assoc_t associd = 0;
 	struct sctp_cmsgs cmsgs = { NULL };
 	enum sctp_scope scope;
-	bool fill_sinfo_ttl = false, wait_connect = false;
-	struct sctp_datamsg *datamsg;
-	int msg_flags = msg->msg_flags;
+	bool fill_sinfo_ttl = false;
 	__u16 sinfo_flags = 0;
-	long timeo;
 	int err;
 
 	err = 0;
@@ -1923,49 +2014,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		goto out_free;
 	}
 
-	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(asoc);
-
-	/* If fragmentation is disabled and the message length exceeds the
-	 * association fragmentation point, return EMSGSIZE.  The I-D
-	 * does not specify what this error is, but this looks like
-	 * a great fit.
-	 */
-	if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
-		err = -EMSGSIZE;
-		goto out_free;
-	}
-
-	/* Check for invalid stream. */
-	if (sinfo->sinfo_stream >= asoc->stream.outcnt) {
-		err = -EINVAL;
-		goto out_free;
-	}
-
-	/* Allocate sctp_stream_out_ext if not already done */
-	if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
-		err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
-		if (err)
-			goto out_free;
-	}
-
-	if (sctp_wspace(asoc) < msg_len)
-		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
-
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-	if (!sctp_wspace(asoc)) {
-		/* sk can be changed by peel off when waiting for buf. */
-		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-		if (err) {
-			if (err == -ESRCH) {
-				/* asoc is already dead. */
-				new_asoc = NULL;
-				err = -EPIPE;
-			}
-			goto out_free;
-		}
-	}
-
 	/* If an address is passed with the sendto/sendmsg call, it is used
 	 * to override the primary destination address in the TCP model, or
 	 * when SCTP_ADDR_OVER flag is set in the UDP model.
@@ -1980,96 +2028,16 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	} else
 		chunk_tp = NULL;
 
-	/* Auto-connect, if we aren't connected already. */
-	if (sctp_state(asoc, CLOSED)) {
-		err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
-		if (err < 0)
-			goto out_free;
-
-		/* If stream interleave is enabled, wait_connect has to be
-		 * done earlier than data enqueue, as it needs to make data
-		 * or idata according to asoc->intl_enable which is set
-		 * after connection is done.
-		 */
-		if (sctp_sk(asoc->base.sk)->strm_interleave) {
-			timeo = sock_sndtimeo(sk, 0);
-			err = sctp_wait_for_connect(asoc, &timeo);
-			if (err)
-				goto out_unlock;
-		} else {
-			wait_connect = true;
-		}
-
-		pr_debug("%s: we associated primitively\n", __func__);
-	}
-
-	/* Break the message into multiple chunks of maximum size. */
-	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
-	if (IS_ERR(datamsg)) {
-		err = PTR_ERR(datamsg);
-		goto out_free;
-	}
-	asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
-
-	/* Now send the (possibly) fragmented message. */
-	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
-		sctp_chunk_hold(chunk);
-
-		/* Do accounting for the write space.  */
-		sctp_set_owner_w(chunk);
-
-		chunk->transport = chunk_tp;
-	}
-
-	/* Send it to the lower layers.  Note:  all chunks
-	 * must either fail or succeed.   The lower layer
-	 * works that way today.  Keep it that way or this
-	 * breaks.
-	 */
-	err = sctp_primitive_SEND(net, asoc, datamsg);
-	/* Did the lower layer accept the chunk? */
-	if (err) {
-		sctp_datamsg_free(datamsg);
-		goto out_free;
-	}
-
-	pr_debug("%s: we sent primitively\n", __func__);
-
-	sctp_datamsg_put(datamsg);
-	err = msg_len;
-
-	if (unlikely(wait_connect)) {
-		timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT);
-		sctp_wait_for_connect(asoc, &timeo);
-	}
-
-	/* If we are already past ASSOCIATE, the lower
-	 * layers are responsible for association cleanup.
-	 */
-	goto out_unlock;
+	/* Send msg to the asoc */
+	err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, chunk_tp, sinfo);
 
 out_free:
-	if (new_asoc)
+	if (err < 0 && err != -ESRCH && new_asoc)
 		sctp_association_free(asoc);
 out_unlock:
 	release_sock(sk);
-
 out_nounlock:
-	return sctp_error(sk, msg_flags, err);
-
-#if 0
-do_sock_err:
-	if (msg_len)
-		err = msg_len;
-	else
-		err = sock_error(sk);
-	goto out;
-
-do_interrupted:
-	if (msg_len)
-		err = msg_len;
-	goto out;
-#endif /* 0 */
+	return sctp_error(sk, msg->msg_flags, err);
 }
 
 /* This is an extended version of skb_pull() that removes the data from the

From 2bfd80f9edbfc18c6247be929df3b348329141a0 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:11 +0800
Subject: [PATCH 0591/1678] sctp: factor out sctp_sendmsg_new_asoc from
 sctp_sendmsg

This patch is to move the codes for creating a new asoc if
no asoc was found into sctp_sendmsg_new_asoc.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 201 ++++++++++++++++++++--------------------------
 1 file changed, 86 insertions(+), 115 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 183129e2bc68..58bb55dce8f6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1606,6 +1606,87 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
 			     struct sctp_cmsgs *cmsgs);
 
+static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
+				 struct sctp_cmsgs *cmsgs,
+				 union sctp_addr *daddr,
+				 struct sctp_transport **tp)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct net *net = sock_net(sk);
+	struct sctp_association *asoc;
+	enum sctp_scope scope;
+	int err = -EINVAL;
+
+	*tp = NULL;
+
+	if (sflags & (SCTP_EOF | SCTP_ABORT))
+		return -EINVAL;
+
+	if (sctp_style(sk, TCP) && (sctp_sstate(sk, ESTABLISHED) ||
+				    sctp_sstate(sk, CLOSING)))
+		return -EADDRNOTAVAIL;
+
+	if (sctp_endpoint_is_peeled_off(ep, daddr))
+		return -EADDRNOTAVAIL;
+
+	if (!ep->base.bind_addr.port) {
+		if (sctp_autobind(sk))
+			return -EAGAIN;
+	} else {
+		if (ep->base.bind_addr.port < inet_prot_sock(net) &&
+		    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
+			return -EACCES;
+	}
+
+	scope = sctp_scope(daddr);
+
+	asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
+	if (!asoc)
+		return -ENOMEM;
+
+	if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	if (cmsgs->init) {
+		struct sctp_initmsg *init = cmsgs->init;
+
+		if (init->sinit_num_ostreams) {
+			__u16 outcnt = init->sinit_num_ostreams;
+
+			asoc->c.sinit_num_ostreams = outcnt;
+			/* outcnt has been changed, need to re-init stream */
+			err = sctp_stream_init(&asoc->stream, outcnt, 0,
+					       GFP_KERNEL);
+			if (err)
+				goto free;
+		}
+
+		if (init->sinit_max_instreams)
+			asoc->c.sinit_max_instreams = init->sinit_max_instreams;
+
+		if (init->sinit_max_attempts)
+			asoc->max_init_attempts = init->sinit_max_attempts;
+
+		if (init->sinit_max_init_timeo)
+			asoc->max_init_timeo =
+				msecs_to_jiffies(init->sinit_max_init_timeo);
+	}
+
+	*tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN);
+	if (!*tp) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	sctp_association_free(asoc);
+	return err;
+}
+
 static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 				struct msghdr *msg, size_t msg_len,
 				struct sctp_transport *transport,
@@ -1715,7 +1796,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
 	struct sctp_cmsgs cmsgs = { NULL };
-	enum sctp_scope scope;
 	bool fill_sinfo_ttl = false;
 	__u16 sinfo_flags = 0;
 	int err;
@@ -1817,20 +1897,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	if (msg_name) {
 		/* Look for a matching association on the endpoint. */
 		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
-
-		/* If we could not find a matching association on the
-		 * endpoint, make sure that it is not a TCP-style
-		 * socket that already has an association or there is
-		 * no peeled-off association on another socket.
-		 */
-		if (!asoc &&
-		    ((sctp_style(sk, TCP) &&
-		      (sctp_sstate(sk, ESTABLISHED) ||
-		       sctp_sstate(sk, CLOSING))) ||
-		     sctp_endpoint_is_peeled_off(ep, &to))) {
-			err = -EADDRNOTAVAIL;
-			goto out_unlock;
-		}
 	} else {
 		asoc = sctp_id2assoc(sk, associd);
 		if (!asoc) {
@@ -1879,108 +1945,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
 	/* Do we need to create the association?  */
 	if (!asoc) {
-		pr_debug("%s: there is no association yet\n", __func__);
-
-		if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
-			err = -EINVAL;
+		err = sctp_sendmsg_new_asoc(sk, sinfo_flags, &cmsgs, &to,
+					    &transport);
+		if (err)
 			goto out_unlock;
-		}
 
-		/* Check for invalid stream against the stream counts,
-		 * either the default or the user specified stream counts.
-		 */
-		if (sinfo) {
-			if (!sinit || !sinit->sinit_num_ostreams) {
-				/* Check against the defaults. */
-				if (sinfo->sinfo_stream >=
-				    sp->initmsg.sinit_num_ostreams) {
-					err = -EINVAL;
-					goto out_unlock;
-				}
-			} else {
-				/* Check against the requested.  */
-				if (sinfo->sinfo_stream >=
-				    sinit->sinit_num_ostreams) {
-					err = -EINVAL;
-					goto out_unlock;
-				}
-			}
-		}
-
-		/*
-		 * API 3.1.2 bind() - UDP Style Syntax
-		 * If a bind() or sctp_bindx() is not called prior to a
-		 * sendmsg() call that initiates a new association, the
-		 * system picks an ephemeral port and will choose an address
-		 * set equivalent to binding with a wildcard address.
-		 */
-		if (!ep->base.bind_addr.port) {
-			if (sctp_autobind(sk)) {
-				err = -EAGAIN;
-				goto out_unlock;
-			}
-		} else {
-			/*
-			 * If an unprivileged user inherits a one-to-many
-			 * style socket with open associations on a privileged
-			 * port, it MAY be permitted to accept new associations,
-			 * but it SHOULD NOT be permitted to open new
-			 * associations.
-			 */
-			if (ep->base.bind_addr.port < inet_prot_sock(net) &&
-			    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) {
-				err = -EACCES;
-				goto out_unlock;
-			}
-		}
-
-		scope = sctp_scope(&to);
-		new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
-		if (!new_asoc) {
-			err = -ENOMEM;
-			goto out_unlock;
-		}
-		asoc = new_asoc;
-		err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL);
-		if (err < 0) {
-			err = -ENOMEM;
-			goto out_free;
-		}
-
-		/* If the SCTP_INIT ancillary data is specified, set all
-		 * the association init values accordingly.
-		 */
-		if (sinit) {
-			if (sinit->sinit_num_ostreams) {
-				__u16 outcnt = sinit->sinit_num_ostreams;
-
-				asoc->c.sinit_num_ostreams = outcnt;
-				/* outcnt has been changed, so re-init stream */
-				err = sctp_stream_init(&asoc->stream, outcnt, 0,
-						       GFP_KERNEL);
-				if (err)
-					goto out_free;
-			}
-			if (sinit->sinit_max_instreams) {
-				asoc->c.sinit_max_instreams =
-					sinit->sinit_max_instreams;
-			}
-			if (sinit->sinit_max_attempts) {
-				asoc->max_init_attempts
-					= sinit->sinit_max_attempts;
-			}
-			if (sinit->sinit_max_init_timeo) {
-				asoc->max_init_timeo =
-				 msecs_to_jiffies(sinit->sinit_max_init_timeo);
-			}
-		}
-
-		/* Prime the peer's transport structures.  */
-		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
-		if (!transport) {
-			err = -ENOMEM;
-			goto out_free;
-		}
+		asoc = transport->asoc;
+		new_asoc = asoc;
 	}
 
 	/* ASSERT: we have a valid association at this point.  */

From c2666de1fde3a02583c2c4d4af4bc54f7252e891 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:12 +0800
Subject: [PATCH 0592/1678] sctp: factor out sctp_sendmsg_check_sflags from
 sctp_sendmsg

This patch is to move the codes for checking sinfo_flags on one asoc
after this asoc has been found into sctp_sendmsg_check_sflags.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 72 +++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 58bb55dce8f6..93cff9963614 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1687,6 +1687,39 @@ free:
 	return err;
 }
 
+static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
+				     __u16 sflags, struct msghdr *msg,
+				     size_t msg_len)
+{
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
+
+	if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
+		return -EPIPE;
+
+	if (sflags & SCTP_EOF) {
+		pr_debug("%s: shutting down association:%p\n", __func__, asoc);
+		sctp_primitive_SHUTDOWN(net, asoc, NULL);
+
+		return 0;
+	}
+
+	if (sflags & SCTP_ABORT) {
+		struct sctp_chunk *chunk;
+
+		chunk = sctp_make_abort_user(asoc, msg, msg_len);
+		if (!chunk)
+			return -ENOMEM;
+
+		pr_debug("%s: aborting association:%p\n", __func__, asoc);
+		sctp_primitive_ABORT(net, asoc, chunk);
+
+		return 0;
+	}
+
+	return 1;
+}
+
 static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 				struct msghdr *msg, size_t msg_len,
 				struct sctp_transport *transport,
@@ -1783,12 +1816,10 @@ err:
 
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
 	struct sctp_association *new_asoc = NULL, *asoc = NULL;
 	struct sctp_transport *transport, *chunk_tp;
-	struct sctp_chunk *chunk;
 	union sctp_addr to;
 	struct sockaddr *msg_name = NULL;
 	struct sctp_sndrcvinfo default_sinfo;
@@ -1906,41 +1937,10 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	if (asoc) {
-		pr_debug("%s: just looked up association:%p\n", __func__, asoc);
-
-		/* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
-		 * socket that has an association in CLOSED state. This can
-		 * happen when an accepted socket has an association that is
-		 * already CLOSED.
-		 */
-		if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
-			err = -EPIPE;
+		err = sctp_sendmsg_check_sflags(asoc, sinfo_flags, msg,
+						msg_len);
+		if (err <= 0)
 			goto out_unlock;
-		}
-
-		if (sinfo_flags & SCTP_EOF) {
-			pr_debug("%s: shutting down association:%p\n",
-				 __func__, asoc);
-
-			sctp_primitive_SHUTDOWN(net, asoc, NULL);
-			err = 0;
-			goto out_unlock;
-		}
-		if (sinfo_flags & SCTP_ABORT) {
-
-			chunk = sctp_make_abort_user(asoc, msg, msg_len);
-			if (!chunk) {
-				err = -ENOMEM;
-				goto out_unlock;
-			}
-
-			pr_debug("%s: aborting association:%p\n",
-				 __func__, asoc);
-
-			sctp_primitive_ABORT(net, asoc, chunk);
-			err = 0;
-			goto out_unlock;
-		}
 	}
 
 	/* Do we need to create the association?  */

From becef9b1e249c849ca15889586e02668d6d723a2 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:13 +0800
Subject: [PATCH 0593/1678] sctp: factor out sctp_sendmsg_get_daddr from
 sctp_sendmsg

This patch is to move the codes for trying to get daddr from
msg->msg_name into sctp_sendmsg_get_daddr.

Note that after adding 'daddr', 'to' and 'msg_name' can be
deleted.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 58 +++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 93cff9963614..68691d2b3167 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1814,14 +1814,35 @@ err:
 	return err;
 }
 
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+					       const struct msghdr *msg,
+					       struct sctp_cmsgs *cmsgs)
+{
+	union sctp_addr *daddr = NULL;
+	int err;
+
+	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
+		int len = msg->msg_namelen;
+
+		if (len > sizeof(*daddr))
+			len = sizeof(*daddr);
+
+		daddr = (union sctp_addr *)msg->msg_name;
+
+		err = sctp_verify_addr(sk, daddr, len);
+		if (err)
+			return ERR_PTR(err);
+	}
+
+	return daddr;
+}
+
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
 	struct sctp_association *new_asoc = NULL, *asoc = NULL;
 	struct sctp_transport *transport, *chunk_tp;
-	union sctp_addr to;
-	struct sockaddr *msg_name = NULL;
 	struct sctp_sndrcvinfo default_sinfo;
 	struct sctp_sndrcvinfo *sinfo;
 	struct sctp_initmsg *sinit;
@@ -1829,6 +1850,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_cmsgs cmsgs = { NULL };
 	bool fill_sinfo_ttl = false;
 	__u16 sinfo_flags = 0;
+	union sctp_addr *daddr;
 	int err;
 
 	err = 0;
@@ -1851,23 +1873,11 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		goto out_nounlock;
 	}
 
-	/* Fetch the destination address for this packet.  This
-	 * address only selects the association--it is not necessarily
-	 * the address we will send to.
-	 * For a peeled-off socket, msg_name is ignored.
-	 */
-	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
-		int msg_namelen = msg->msg_namelen;
-
-		err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
-				       msg_namelen);
-		if (err)
-			return err;
-
-		if (msg_namelen > sizeof(to))
-			msg_namelen = sizeof(to);
-		memcpy(&to, msg->msg_name, msg_namelen);
-		msg_name = msg->msg_name;
+	/* Get daddr from msg */
+	daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
+	if (IS_ERR(daddr)) {
+		err = PTR_ERR(daddr);
+		goto out_nounlock;
 	}
 
 	sinit = cmsgs.init;
@@ -1925,9 +1935,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	lock_sock(sk);
 
 	/* If a msg_name has been specified, assume this is to be used.  */
-	if (msg_name) {
+	if (daddr) {
 		/* Look for a matching association on the endpoint. */
-		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
+		asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
 	} else {
 		asoc = sctp_id2assoc(sk, associd);
 		if (!asoc) {
@@ -1945,7 +1955,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
 	/* Do we need to create the association?  */
 	if (!asoc) {
-		err = sctp_sendmsg_new_asoc(sk, sinfo_flags, &cmsgs, &to,
+		err = sctp_sendmsg_new_asoc(sk, sinfo_flags, &cmsgs, daddr,
 					    &transport);
 		if (err)
 			goto out_unlock;
@@ -1989,9 +1999,9 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	 * to override the primary destination address in the TCP model, or
 	 * when SCTP_ADDR_OVER flag is set in the UDP model.
 	 */
-	if ((sctp_style(sk, TCP) && msg_name) ||
+	if ((sctp_style(sk, TCP) && daddr) ||
 	    (sinfo_flags & SCTP_ADDR_OVER)) {
-		chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
+		chunk_tp = sctp_assoc_lookup_paddr(asoc, daddr);
 		if (!chunk_tp) {
 			err = -EINVAL;
 			goto out_free;

From 204f817fb9ab1b42c34c27d593cad2992d575f71 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:14 +0800
Subject: [PATCH 0594/1678] sctp: factor out sctp_sendmsg_parse from
 sctp_sendmsg

This patch is to move the codes for parsing msghdr and checking
sk into sctp_sendmsg_parse.

Note that different from before, 'sinfo' in sctp_sendmsg won't
be NULL any more. It gets the value either from cmsgs->srinfo,
cmsgs->sinfo or asoc. With it, the 'sinfo' and 'fill_sinfo_ttl'
check can be removed from sctp_sendmsg.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 172 +++++++++++++++++++---------------------------
 1 file changed, 69 insertions(+), 103 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 68691d2b3167..bf089e59c792 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1606,6 +1606,61 @@ static int sctp_error(struct sock *sk, int flags, int err)
 static int sctp_msghdr_parse(const struct msghdr *msg,
 			     struct sctp_cmsgs *cmsgs);
 
+static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
+			      struct sctp_sndrcvinfo *srinfo,
+			      const struct msghdr *msg, size_t msg_len)
+{
+	__u16 sflags;
+	int err;
+
+	if (sctp_sstate(sk, LISTENING) && sctp_style(sk, TCP))
+		return -EPIPE;
+
+	if (msg_len > sk->sk_sndbuf)
+		return -EMSGSIZE;
+
+	memset(cmsgs, 0, sizeof(*cmsgs));
+	err = sctp_msghdr_parse(msg, cmsgs);
+	if (err) {
+		pr_debug("%s: msghdr parse err:%x\n", __func__, err);
+		return err;
+	}
+
+	memset(srinfo, 0, sizeof(*srinfo));
+	if (cmsgs->srinfo) {
+		srinfo->sinfo_stream = cmsgs->srinfo->sinfo_stream;
+		srinfo->sinfo_flags = cmsgs->srinfo->sinfo_flags;
+		srinfo->sinfo_ppid = cmsgs->srinfo->sinfo_ppid;
+		srinfo->sinfo_context = cmsgs->srinfo->sinfo_context;
+		srinfo->sinfo_assoc_id = cmsgs->srinfo->sinfo_assoc_id;
+		srinfo->sinfo_timetolive = cmsgs->srinfo->sinfo_timetolive;
+	}
+
+	if (cmsgs->sinfo) {
+		srinfo->sinfo_stream = cmsgs->sinfo->snd_sid;
+		srinfo->sinfo_flags = cmsgs->sinfo->snd_flags;
+		srinfo->sinfo_ppid = cmsgs->sinfo->snd_ppid;
+		srinfo->sinfo_context = cmsgs->sinfo->snd_context;
+		srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
+	}
+
+	sflags = srinfo->sinfo_flags;
+	if (!sflags && msg_len)
+		return 0;
+
+	if (sctp_style(sk, TCP) && (sflags & (SCTP_EOF | SCTP_ABORT)))
+		return -EINVAL;
+
+	if (((sflags & SCTP_EOF) && msg_len > 0) ||
+	    (!(sflags & (SCTP_EOF | SCTP_ABORT)) && msg_len == 0))
+		return -EINVAL;
+
+	if ((sflags & SCTP_ADDR_OVER) && !msg->msg_name)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 				 struct sctp_cmsgs *cmsgs,
 				 union sctp_addr *daddr,
@@ -1839,39 +1894,23 @@ static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
 
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
-	struct sctp_sock *sp;
-	struct sctp_endpoint *ep;
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_association *new_asoc = NULL, *asoc = NULL;
 	struct sctp_transport *transport, *chunk_tp;
-	struct sctp_sndrcvinfo default_sinfo;
-	struct sctp_sndrcvinfo *sinfo;
-	struct sctp_initmsg *sinit;
+	struct sctp_sndrcvinfo _sinfo, *sinfo;
 	sctp_assoc_t associd = 0;
 	struct sctp_cmsgs cmsgs = { NULL };
-	bool fill_sinfo_ttl = false;
 	__u16 sinfo_flags = 0;
 	union sctp_addr *daddr;
 	int err;
 
-	err = 0;
-	sp = sctp_sk(sk);
-	ep = sp->ep;
-
-	pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk,
-		 msg, msg_len, ep);
-
-	/* We cannot send a message over a TCP-style listening socket. */
-	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
-		err = -EPIPE;
+	/* Parse and get snd_info */
+	err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
+	if (err)
 		goto out_nounlock;
-	}
 
-	/* Parse out the SCTP CMSGs.  */
-	err = sctp_msghdr_parse(msg, &cmsgs);
-	if (err) {
-		pr_debug("%s: msghdr parse err:%x\n", __func__, err);
-		goto out_nounlock;
-	}
+	sinfo  = &_sinfo;
+	sinfo_flags = sinfo->sinfo_flags;
 
 	/* Get daddr from msg */
 	daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
@@ -1880,58 +1919,6 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		goto out_nounlock;
 	}
 
-	sinit = cmsgs.init;
-	if (cmsgs.sinfo != NULL) {
-		memset(&default_sinfo, 0, sizeof(default_sinfo));
-		default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
-		default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
-		default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
-		default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
-		default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
-
-		sinfo = &default_sinfo;
-		fill_sinfo_ttl = true;
-	} else {
-		sinfo = cmsgs.srinfo;
-	}
-	/* Did the user specify SNDINFO/SNDRCVINFO? */
-	if (sinfo) {
-		sinfo_flags = sinfo->sinfo_flags;
-		associd = sinfo->sinfo_assoc_id;
-	}
-
-	pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__,
-		 msg_len, sinfo_flags);
-
-	/* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
-	if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
-
-	/* If SCTP_EOF is set, no data can be sent. Disallow sending zero
-	 * length messages when SCTP_EOF|SCTP_ABORT is not set.
-	 * If SCTP_ABORT is set, the message length could be non zero with
-	 * the msg_iov set to the user abort reason.
-	 */
-	if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
-	    (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
-
-	/* If SCTP_ADDR_OVER is set, there must be an address
-	 * specified in msg_name.
-	 */
-	if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
-		err = -EINVAL;
-		goto out_nounlock;
-	}
-
-	transport = NULL;
-
-	pr_debug("%s: about to look up association\n", __func__);
-
 	lock_sock(sk);
 
 	/* If a msg_name has been specified, assume this is to be used.  */
@@ -1964,36 +1951,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		new_asoc = asoc;
 	}
 
-	/* ASSERT: we have a valid association at this point.  */
-	pr_debug("%s: we have a valid association\n", __func__);
+	if (!cmsgs.srinfo && !cmsgs.sinfo) {
+		sinfo->sinfo_stream = asoc->default_stream;
+		sinfo->sinfo_ppid = asoc->default_ppid;
+		sinfo->sinfo_context = asoc->default_context;
+		sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+	}
 
-	if (!sinfo) {
-		/* If the user didn't specify SNDINFO/SNDRCVINFO, make up
-		 * one with some defaults.
-		 */
-		memset(&default_sinfo, 0, sizeof(default_sinfo));
-		default_sinfo.sinfo_stream = asoc->default_stream;
-		default_sinfo.sinfo_flags = asoc->default_flags;
-		default_sinfo.sinfo_ppid = asoc->default_ppid;
-		default_sinfo.sinfo_context = asoc->default_context;
-		default_sinfo.sinfo_timetolive = asoc->default_timetolive;
-		default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
-
-		sinfo = &default_sinfo;
-	} else if (fill_sinfo_ttl) {
-		/* In case SNDINFO was specified, we still need to fill
-		 * it with a default ttl from the assoc here.
-		 */
+	if (!cmsgs.srinfo)
 		sinfo->sinfo_timetolive = asoc->default_timetolive;
-	}
-
-	/* API 7.1.7, the sndbuf size per association bounds the
-	 * maximum size of data that can be sent in a single send call.
-	 */
-	if (msg_len > sk->sk_sndbuf) {
-		err = -EMSGSIZE;
-		goto out_free;
-	}
 
 	/* If an address is passed with the sendto/sendmsg call, it is used
 	 * to override the primary destination address in the TCP model, or

From d42cb06e5b542cabea88c9074b2abf90d43757f7 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:15 +0800
Subject: [PATCH 0595/1678] sctp: factor out sctp_sendmsg_update_sinfo from
 sctp_sendmsg

This patch is to move the codes for trying to get sinfo from
asoc into sctp_sendmsg_update_sinfo.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bf089e59c792..bd1a657117f1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1892,6 +1892,21 @@ static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
 	return daddr;
 }
 
+static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
+				      struct sctp_sndrcvinfo *sinfo,
+				      struct sctp_cmsgs *cmsgs)
+{
+	if (!cmsgs->srinfo && !cmsgs->sinfo) {
+		sinfo->sinfo_stream = asoc->default_stream;
+		sinfo->sinfo_ppid = asoc->default_ppid;
+		sinfo->sinfo_context = asoc->default_context;
+		sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+	}
+
+	if (!cmsgs->srinfo)
+		sinfo->sinfo_timetolive = asoc->default_timetolive;
+}
+
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
@@ -1951,15 +1966,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		new_asoc = asoc;
 	}
 
-	if (!cmsgs.srinfo && !cmsgs.sinfo) {
-		sinfo->sinfo_stream = asoc->default_stream;
-		sinfo->sinfo_ppid = asoc->default_ppid;
-		sinfo->sinfo_context = asoc->default_context;
-		sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
-	}
-
-	if (!cmsgs.srinfo)
-		sinfo->sinfo_timetolive = asoc->default_timetolive;
+	/* Update snd_info with the asoc */
+	sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
 
 	/* If an address is passed with the sendto/sendmsg call, it is used
 	 * to override the primary destination address in the TCP model, or

From 8e87c6eb18f9d7427054f28a284d495c174d9970 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:16 +0800
Subject: [PATCH 0596/1678] sctp: remove the unnecessary transport looking up
 from sctp_sendmsg

Now sctp_assoc_lookup_paddr can only be called only if daddr has
been set. But if daddr has been set, sctp_endpoint_lookup_assoc
would be done, where it could already have the transport.

So this unnecessary transport looking up should be removed, but
only reset transport as NULL when SCTP_ADDR_OVER is not set for
UDP type socket.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bd1a657117f1..426031095afe 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1911,7 +1911,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_association *new_asoc = NULL, *asoc = NULL;
-	struct sctp_transport *transport, *chunk_tp;
+	struct sctp_transport *transport = NULL;
 	struct sctp_sndrcvinfo _sinfo, *sinfo;
 	sctp_assoc_t associd = 0;
 	struct sctp_cmsgs cmsgs = { NULL };
@@ -1966,29 +1966,17 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		new_asoc = asoc;
 	}
 
+	if (!sctp_style(sk, TCP) && !(sinfo_flags & SCTP_ADDR_OVER))
+		transport = NULL;
+
 	/* Update snd_info with the asoc */
 	sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
 
-	/* If an address is passed with the sendto/sendmsg call, it is used
-	 * to override the primary destination address in the TCP model, or
-	 * when SCTP_ADDR_OVER flag is set in the UDP model.
-	 */
-	if ((sctp_style(sk, TCP) && daddr) ||
-	    (sinfo_flags & SCTP_ADDR_OVER)) {
-		chunk_tp = sctp_assoc_lookup_paddr(asoc, daddr);
-		if (!chunk_tp) {
-			err = -EINVAL;
-			goto out_free;
-		}
-	} else
-		chunk_tp = NULL;
-
 	/* Send msg to the asoc */
-	err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, chunk_tp, sinfo);
-
-out_free:
+	err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
 	if (err < 0 && err != -ESRCH && new_asoc)
 		sctp_association_free(asoc);
+
 out_unlock:
 	release_sock(sk);
 out_nounlock:

From 007b7e18be74a49b61f89664966ac1477e1c9608 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:17 +0800
Subject: [PATCH 0597/1678] sctp: improve some variables in sctp_sendmsg

This patch mostly is to:

  - rename sinfo_flags as sflags, to make the indents look better, and
    also keep consistent with other sctp_sendmsg_xx functions.

  - replace new_asoc with bool new, no need to define a pointer here,
    as if new_asoc is set, it must be asoc.

  - rename the 'out_nounlock:' as 'out', shorter and nicer.

  - remove associd, only one place is using it now, just use
    sinfo->sinfo_assoc_id directly.

  - remove 'cmsgs' initialization in sctp_sendmsg, as it will be done
    in sctp_sendmsg_parse.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 426031095afe..a1c78fc19d95 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1910,28 +1910,28 @@ static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
-	struct sctp_association *new_asoc = NULL, *asoc = NULL;
 	struct sctp_transport *transport = NULL;
 	struct sctp_sndrcvinfo _sinfo, *sinfo;
-	sctp_assoc_t associd = 0;
-	struct sctp_cmsgs cmsgs = { NULL };
-	__u16 sinfo_flags = 0;
+	struct sctp_association *asoc;
+	struct sctp_cmsgs cmsgs;
 	union sctp_addr *daddr;
+	bool new = false;
+	__u16 sflags;
 	int err;
 
 	/* Parse and get snd_info */
 	err = sctp_sendmsg_parse(sk, &cmsgs, &_sinfo, msg, msg_len);
 	if (err)
-		goto out_nounlock;
+		goto out;
 
 	sinfo  = &_sinfo;
-	sinfo_flags = sinfo->sinfo_flags;
+	sflags = sinfo->sinfo_flags;
 
 	/* Get daddr from msg */
 	daddr = sctp_sendmsg_get_daddr(sk, msg, &cmsgs);
 	if (IS_ERR(daddr)) {
 		err = PTR_ERR(daddr);
-		goto out_nounlock;
+		goto out;
 	}
 
 	lock_sock(sk);
@@ -1941,7 +1941,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		/* Look for a matching association on the endpoint. */
 		asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
 	} else {
-		asoc = sctp_id2assoc(sk, associd);
+		asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
 		if (!asoc) {
 			err = -EPIPE;
 			goto out_unlock;
@@ -1949,24 +1949,23 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	if (asoc) {
-		err = sctp_sendmsg_check_sflags(asoc, sinfo_flags, msg,
-						msg_len);
+		err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
 		if (err <= 0)
 			goto out_unlock;
 	}
 
 	/* Do we need to create the association?  */
 	if (!asoc) {
-		err = sctp_sendmsg_new_asoc(sk, sinfo_flags, &cmsgs, daddr,
+		err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
 					    &transport);
 		if (err)
 			goto out_unlock;
 
 		asoc = transport->asoc;
-		new_asoc = asoc;
+		new = true;
 	}
 
-	if (!sctp_style(sk, TCP) && !(sinfo_flags & SCTP_ADDR_OVER))
+	if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
 		transport = NULL;
 
 	/* Update snd_info with the asoc */
@@ -1974,12 +1973,12 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
 	/* Send msg to the asoc */
 	err = sctp_sendmsg_to_asoc(asoc, msg, msg_len, transport, sinfo);
-	if (err < 0 && err != -ESRCH && new_asoc)
+	if (err < 0 && err != -ESRCH && new)
 		sctp_association_free(asoc);
 
 out_unlock:
 	release_sock(sk);
-out_nounlock:
+out:
 	return sctp_error(sk, msg->msg_flags, err);
 }
 

From 0a3920d28b5490e6c90110156135dbc12c7fc413 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 1 Mar 2018 23:05:18 +0800
Subject: [PATCH 0598/1678] sctp: adjust some codes in a better order in
 sctp_sendmsg

sctp_sendmsg_new_asoc and SCTP_ADDR_OVER check is only necessary
when daddr is set, so move them up to if (daddr) statement.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a1c78fc19d95..7fa76031bb08 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1936,38 +1936,38 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
 	lock_sock(sk);
 
-	/* If a msg_name has been specified, assume this is to be used.  */
+	/* Get and check or create asoc */
 	if (daddr) {
-		/* Look for a matching association on the endpoint. */
 		asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (asoc) {
+			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+							msg_len);
+			if (err <= 0)
+				goto out_unlock;
+		} else {
+			err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
+						    &transport);
+			if (err)
+				goto out_unlock;
+
+			asoc = transport->asoc;
+			new = true;
+		}
+
+		if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
+			transport = NULL;
 	} else {
 		asoc = sctp_id2assoc(sk, sinfo->sinfo_assoc_id);
 		if (!asoc) {
 			err = -EPIPE;
 			goto out_unlock;
 		}
-	}
 
-	if (asoc) {
 		err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len);
 		if (err <= 0)
 			goto out_unlock;
 	}
 
-	/* Do we need to create the association?  */
-	if (!asoc) {
-		err = sctp_sendmsg_new_asoc(sk, sflags, &cmsgs, daddr,
-					    &transport);
-		if (err)
-			goto out_unlock;
-
-		asoc = transport->asoc;
-		new = true;
-	}
-
-	if (!sctp_style(sk, TCP) && !(sflags & SCTP_ADDR_OVER))
-		transport = NULL;
-
 	/* Update snd_info with the asoc */
 	sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
 

From 7efc0b6b666d757e07417f59397e7f5f340e74e0 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:12 -0800
Subject: [PATCH 0599/1678] net/ipv4: Pass net to fib_multipath_hash instead of
 fib_info

fib_multipath_hash only needs net struct to check a sysctl. Make it
clear by passing net instead of fib_info. In the end this allows
alignment between the ipv4 and ipv6 versions.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     | 2 +-
 net/ipv4/fib_semantics.c | 2 +-
 net/ipv4/route.c         | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 8812582a94d5..7c7522e8585b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys);
 #endif
 void fib_select_multipath(struct fib_result *res, int hash);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 181b0d8d589c..e7c602c600ac 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, fl4, skb, NULL);
+		int h = fib_multipath_hash(net, fl4, skb, NULL);
 
 		fib_select_multipath(res, h);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3bb686dac273..1689c569bbc3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1782,10 +1782,9 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 }
 
 /* if skb is set it will be used and fl4 can be NULL */
-int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
-	struct net *net = fi->fib_net;
 	struct flow_keys hash_keys;
 	u32 mhash;
 
@@ -1852,7 +1851,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi && res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(res->fi, NULL, skb, hkeys);
+		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
 		fib_select_multipath(res, h);
 	}

From 6f74b6c259158d89ad258cda8e86240e01052884 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:13 -0800
Subject: [PATCH 0600/1678] net: Align ip_multipath_l3_keys and
 ip6_multipath_l3_keys

Symmetry is good and allows easy comparison that ipv4 and ipv6 are
doing the same thing. To that end, change ip_multipath_l3_keys to
set addresses at the end after the icmp compares, and move the
initialization of ipv6 flow keys to rt6_multipath_hash.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 20 +++++++++++---------
 net/ipv6/route.c |  4 ++--
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1689c569bbc3..df57bc68e8e0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1748,37 +1748,39 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb,
 				 struct flow_keys *hash_keys)
 {
 	const struct iphdr *outer_iph = ip_hdr(skb);
+	const struct iphdr *key_iph = outer_iph;
 	const struct iphdr *inner_iph;
 	const struct icmphdr *icmph;
 	struct iphdr _inner_iph;
 	struct icmphdr _icmph;
 
-	hash_keys->addrs.v4addrs.src = outer_iph->saddr;
-	hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
 	if (likely(outer_iph->protocol != IPPROTO_ICMP))
-		return;
+		goto out;
 
 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
-		return;
+		goto out;
 
 	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
 				   &_icmph);
 	if (!icmph)
-		return;
+		goto out;
 
 	if (icmph->type != ICMP_DEST_UNREACH &&
 	    icmph->type != ICMP_REDIRECT &&
 	    icmph->type != ICMP_TIME_EXCEEDED &&
 	    icmph->type != ICMP_PARAMETERPROB)
-		return;
+		goto out;
 
 	inner_iph = skb_header_pointer(skb,
 				       outer_iph->ihl * 4 + sizeof(_icmph),
 				       sizeof(_inner_iph), &_inner_iph);
 	if (!inner_iph)
-		return;
-	hash_keys->addrs.v4addrs.src = inner_iph->saddr;
-	hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
+		goto out;
+
+	key_iph = inner_iph;
+out:
+	hash_keys->addrs.v4addrs.src = key_iph->saddr;
+	hash_keys->addrs.v4addrs.dst = key_iph->daddr;
 }
 
 /* if skb is set it will be used and fl4 can be NULL */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e2bb40824c85..190d9690dfe0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1815,8 +1815,6 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
 	key_iph = inner_iph;
 	_flkeys = NULL;
 out:
-	memset(keys, 0, sizeof(*keys));
-	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 	if (_flkeys) {
 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
@@ -1837,6 +1835,8 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
 	struct flow_keys hash_keys;
 
 	if (skb) {
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 		ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
 		return flow_hash_from_keys(&hash_keys) >> 1;
 	}

From ec7127a5d53d891b77a11433026dca72c57eaf88 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:14 -0800
Subject: [PATCH 0601/1678] net/ipv4: Simplify fib_multipath_hash with optional
 flow keys

As of commit e37b1e978bec5 ("ipv6: route: dissect flow in input path if
fib rules need it") fib_multipath_hash takes an optional flow keys. If
non-NULL it means the skb has already been dissected. If not set, then
fib_multipath_hash needs to call skb_flow_dissect_flow_keys.

Simplify the logic by setting flkeys to the local stack variable keys.
Simplifies fib_multipath_hash by only have 1 set of instructions
setting hash_keys.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df57bc68e8e0..6a7b3cba3972 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1810,24 +1810,20 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 			/* short-circuit if we already have L4 hash present */
 			if (skb->l4_hash)
 				return skb_get_hash_raw(skb) >> 1;
+
 			memset(&hash_keys, 0, sizeof(hash_keys));
 
-			if (flkeys) {
-				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-				hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
-				hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
-				hash_keys.ports.src = flkeys->ports.src;
-				hash_keys.ports.dst = flkeys->ports.dst;
-				hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
-			} else {
+			if (!flkeys) {
 				skb_flow_dissect_flow_keys(skb, &keys, flag);
-				hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-				hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
-				hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
-				hash_keys.ports.src = keys.ports.src;
-				hash_keys.ports.dst = keys.ports.dst;
-				hash_keys.basic.ip_proto = keys.basic.ip_proto;
+				flkeys = &keys;
 			}
+
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+			hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
+			hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
 		} else {
 			memset(&hash_keys, 0, sizeof(hash_keys));
 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;

From 9a2a537acc75dfd19c4358bc9cb6042bdc60698c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:15 -0800
Subject: [PATCH 0602/1678] net/ipv6: Make rt6_multipath_hash similar to
 fib_multipath_hash

Make rt6_multipath_hash more of a direct parallel to fib_multipath_hash
and reduce stack and overhead in the process: get_hash_from_flowi6 is
just a wrapper around __get_hash_from_flowi6 with another stack
allocation for flow_keys. Move setting the addresses, protocol and
label into rt6_multipath_hash and allow it to make the call to
flow_hash_from_keys.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 190d9690dfe0..5c89af2c54f4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1833,15 +1833,21 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
 		       struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
+	u32 mhash;
 
+	memset(&hash_keys, 0, sizeof(hash_keys));
+	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 	if (skb) {
-		memset(&hash_keys, 0, sizeof(hash_keys));
-		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 		ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
-		return flow_hash_from_keys(&hash_keys) >> 1;
+	} else {
+		hash_keys.addrs.v6addrs.src = fl6->saddr;
+		hash_keys.addrs.v6addrs.dst = fl6->daddr;
+		hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+		hash_keys.basic.ip_proto = fl6->flowi6_proto;
 	}
+	mhash = flow_hash_from_keys(&hash_keys);
 
-	return get_hash_from_flowi6(fl6) >> 1;
+	return mhash >> 1;
 }
 
 void ip6_route_input(struct sk_buff *skb)

From 3192dac64c73d8c0eb4274a3da23d829fb5177af Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:16 -0800
Subject: [PATCH 0603/1678] net: Rename NETEVENT_MULTIPATH_HASH_UPDATE

Rename NETEVENT_MULTIPATH_HASH_UPDATE to
NETEVENT_IPV4_MPATH_HASH_UPDATE to denote it relates to a change
in the IPv4 hash policy.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +-
 include/net/netevent.h                                | 2 +-
 net/ipv4/sysctl_net_ipv4.c                            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 69f16c605b9d..93d48c1b2bf8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2430,7 +2430,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
 		mlxsw_core_schedule_work(&net_work->work);
 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
 		break;
-	case NETEVENT_MULTIPATH_HASH_UPDATE:
+	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
 		net = ptr;
 
 		if (!net_eq(net, &init_net))
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 40e7bab68490..baee605a94ab 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -26,7 +26,7 @@ enum netevent_notif_type {
 	NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
-	NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 89683d868b37..011de9a20ec6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 
 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
-		call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+		call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
 
 	return ret;
 }

From b75cc8f90f07342467b3bd51dbc0054f185032c9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:17 -0800
Subject: [PATCH 0604/1678] net/ipv6: Pass skb to route lookup

IPv6 does path selection for multipath routes deep in the lookup
functions. The next patch adds L4 hash option and needs the skb
for the forward path. To get the skb to the relevant FIB lookup
functions it needs to go through the fib rules layer, so add a
lookup_data argument to the fib_lookup_arg struct.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/cma.c      |  2 +-
 drivers/net/ipvlan/ipvlan_core.c   |  3 +-
 drivers/net/vrf.c                  |  7 +--
 include/net/fib_rules.h            |  1 +
 include/net/ip6_fib.h              |  4 +-
 include/net/ip6_route.h            | 11 +++--
 net/ipv6/anycast.c                 |  2 +-
 net/ipv6/fib6_rules.c              |  8 ++--
 net/ipv6/icmp.c                    |  3 +-
 net/ipv6/ip6_fib.c                 |  3 +-
 net/ipv6/ip6_gre.c                 |  2 +-
 net/ipv6/ip6_tunnel.c              |  4 +-
 net/ipv6/ip6_vti.c                 |  2 +-
 net/ipv6/mcast.c                   |  4 +-
 net/ipv6/netfilter/ip6t_rpfilter.c |  2 +-
 net/ipv6/netfilter/nft_fib_ipv6.c  |  3 +-
 net/ipv6/route.c                   | 72 ++++++++++++++++++------------
 net/ipv6/seg6_local.c              |  4 +-
 18 files changed, 83 insertions(+), 54 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3ae32d1ddd27..915bbd867b61 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
 			   IPV6_ADDR_LINKLOCAL;
 	struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
 					 &src_addr->sin6_addr, net_dev->ifindex,
-					 strict);
+					 NULL, strict);
 	bool ret;
 
 	if (!rt)
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 17daebd19e65..1a8132eb2a3e 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 		};
 
 		skb_dst_drop(skb);
-		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
+					     skb, flags);
 		skb_dst_set(skb, dst);
 		break;
 	}
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index e459e601c57f..c6be49d3a9eb 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 					     const struct net_device *dev,
 					     struct flowi6 *fl6,
 					     int ifindex,
+					     const struct sk_buff *skb,
 					     int flags)
 {
 	struct net_vrf *vrf = netdev_priv(dev);
@@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
 	if (!table)
 		return NULL;
 
-	return ip6_pol_route(net, table, ifindex, fl6, flags);
+	return ip6_pol_route(net, table, ifindex, fl6, skb, flags);
 }
 
 static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
@@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
 	struct net *net = dev_net(vrf_dev);
 	struct rt6_info *rt6;
 
-	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex,
+	rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
 				   RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
 	if (unlikely(!rt6))
 		return;
@@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
 	if (!ipv6_addr_any(&fl6->saddr))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags);
 	if (rt)
 		dst = &rt->dst;
 
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 1c9e17c11953..e5cfcfc7dd93 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -47,6 +47,7 @@ struct fib_rule {
 
 struct fib_lookup_arg {
 	void			*lookup_ptr;
+	const void		*lookup_data;
 	void			*result;
 	struct fib_rule		*rule;
 	u32			table;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 8d906a35b534..5e86fd9dc857 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -350,7 +350,8 @@ struct fib6_table {
 
 typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 					 struct fib6_table *,
-					 struct flowi6 *, int);
+					 struct flowi6 *,
+					 const struct sk_buff *, int);
 
 struct fib6_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
@@ -364,6 +365,7 @@ struct fib6_entry_notifier_info {
 struct fib6_table *fib6_get_table(struct net *net, u32 id);
 struct fib6_table *fib6_new_table(struct net *net, u32 id);
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup);
 
 struct fib6_node *fib6_lookup(struct fib6_node *root,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index da2bde5fda8f..9594f9317952 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-					 struct flowi6 *fl6, int flags);
+					 struct flowi6 *fl6,
+					 const struct sk_buff *skb, int flags);
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 					 struct flowi6 *fl6, int flags);
@@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net,
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-				   int flags);
+				   const struct sk_buff *skb, int flags);
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int ifindex, struct flowi6 *fl6, int flags);
+			       int ifindex, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags);
 
 void ip6_route_init_special_entries(void);
 int ip6_route_init(void);
@@ -126,7 +128,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 }
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-			    const struct in6_addr *saddr, int oif, int flags);
+			    const struct in6_addr *saddr, int oif,
+			    const struct sk_buff *skb, int flags);
 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
 		       struct flow_keys *hkeys);
 
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index d7d0abc7fd0e..c61718dba2e6 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
-		rt = rt6_lookup(net, addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
 		if (rt) {
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 04e5f523e50f..00ef9467f3c0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
 {
 	if (net->ipv6.fib6_has_custom_rules) {
 		struct fib_lookup_arg arg = {
 			.lookup_ptr = lookup,
+			.lookup_data = skb,
 			.flags = FIB_LOOKUP_NOREF,
 		};
 
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
 	} else {
 		struct rt6_info *rt;
 
-		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
 		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
 			return &rt->dst;
 		ip6_rt_put(rt);
-		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 		if (rt->dst.error != -EAGAIN)
 			return &rt->dst;
 		ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 
-	rt = lookup(net, table, flp6, flags);
+	rt = lookup(net, table, flp6, arg->lookup_data, flags);
 	if (rt != net->ipv6.ip6_null_entry) {
 		struct fib6_rule *r = (struct fib6_rule *)rule;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b0778d323b6e..a5d929223820 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 	skb_pull(skb2, nhs);
 	skb_reset_network_header(skb2);
 
-	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+			skb, 0);
 
 	if (rt && rt->dst.dev)
 		skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cab95cf3b39f..2f995e9e3050 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 }
 
 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   const struct sk_buff *skb,
 				   int flags, pol_lookup_t lookup)
 {
 	struct rt6_info *rt;
 
-	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+	rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
 	if (rt->dst.error == -EAGAIN) {
 		ip6_rt_put(rt);
 		rt = net->ipv6.ip6_null_entry;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4f150a394387..83c7766c8c75 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1053,7 +1053,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (!rt)
 			return;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 869e2e6750f7..1124f310df5a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		/* Try to guess incoming interface */
 		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
-				NULL, 0, 0);
+				NULL, 0, skb2, 0);
 
 		if (rt && rt->dst.dev)
 			skb2->dev = rt->dst.dev;
@@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (!rt)
 			return;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c617ea17faa8..a482b854eeea 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t)
 			      (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
 		struct rt6_info *rt = rt6_lookup(t->net,
 						 &p->raddr, &p->laddr,
-						 p->link, strict);
+						 p->link, NULL, strict);
 
 		if (rt)
 			tdev = rt->dst.dev;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d9bb933dd5c4..d1a0cefac273 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	if (ifindex == 0) {
 		struct rt6_info *rt;
-		rt = rt6_lookup(net, addr, NULL, 0, 0);
+		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
 		if (rt) {
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 	struct inet6_dev *idev = NULL;
 
 	if (ifindex == 0) {
-		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
 
 		if (rt) {
 			dev = rt->dst.dev;
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 94deb69bbbda..910a27318f58 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 		lookup_flags |= RT6_LOOKUP_F_IFACE;
 	}
 
-	rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+	rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
 	if (rt->dst.error)
 		goto out;
 
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index cc5174c7254c..3230b3d7b11b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
 	*dest = 0;
  again:
-	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+				      lookup_flags);
 	if (rt->dst.error)
 		goto put_rt_err;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5c89af2c54f4..d2b8368663cb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -452,6 +452,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 
 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
+					     const struct sk_buff *skb,
 					     int strict)
 {
 	struct rt6_info *sibling, *next_sibling;
@@ -460,7 +461,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL);
+		fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -914,7 +915,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
-					     struct flowi6 *fl6, int flags)
+					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
+					     int flags)
 {
 	struct rt6_info *rt, *rt_cache;
 	struct fib6_node *fn;
@@ -929,8 +932,8 @@ restart:
 		rt = rt6_device_match(net, rt, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(rt, fl6,
-						  fl6->flowi6_oif, flags);
+			rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif,
+						  skb, flags);
 	}
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
@@ -954,14 +957,15 @@ restart:
 }
 
 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
-				    int flags)
+				   const struct sk_buff *skb, int flags)
 {
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
 }
 EXPORT_SYMBOL_GPL(ip6_route_lookup);
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
-			    const struct in6_addr *saddr, int oif, int strict)
+			    const struct in6_addr *saddr, int oif,
+			    const struct sk_buff *skb, int strict)
 {
 	struct flowi6 fl6 = {
 		.flowi6_oif = oif,
@@ -975,7 +979,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 	}
 
-	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
 	if (dst->error == 0)
 		return (struct rt6_info *) dst;
 
@@ -1647,7 +1651,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
 }
 
 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
-			       int oif, struct flowi6 *fl6, int flags)
+			       int oif, struct flowi6 *fl6,
+			       const struct sk_buff *skb, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
 	struct rt6_info *rt, *rt_cache;
@@ -1669,7 +1674,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
 	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, strict);
+		rt = rt6_multipath_select(rt, fl6, oif, skb, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
@@ -1768,20 +1773,25 @@ uncached_rt_out:
 }
 EXPORT_SYMBOL_GPL(ip6_pol_route);
 
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-					    struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+					    struct fib6_table *table,
+					    struct flowi6 *fl6,
+					    const struct sk_buff *skb,
+					    int flags)
 {
-	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-					 struct flowi6 *fl6, int flags)
+					 struct flowi6 *fl6,
+					 const struct sk_buff *skb,
+					 int flags)
 {
 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 		flags |= RT6_LOOKUP_F_IFACE;
 
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
@@ -1876,13 +1886,17 @@ void ip6_route_input(struct sk_buff *skb)
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
 		fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+	skb_dst_set(skb,
+		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
 }
 
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-					     struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+					     struct fib6_table *table,
+					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
+					     int flags)
 {
-	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
 }
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1910,7 +1924,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 	else if (sk)
 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
@@ -2159,6 +2173,7 @@ struct ip6rd_flowi {
 static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6,
+					     const struct sk_buff *skb,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2232,8 +2247,9 @@ out:
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
-					const struct flowi6 *fl6,
-					const struct in6_addr *gateway)
+					    const struct flowi6 *fl6,
+					    const struct sk_buff *skb,
+					    const struct in6_addr *gateway)
 {
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct ip6rd_flowi rdfl;
@@ -2241,7 +2257,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
 	rdfl.fl6 = *fl6;
 	rdfl.gateway = *gateway;
 
-	return fib6_rule_lookup(net, &rdfl.fl6,
+	return fib6_rule_lookup(net, &rdfl.fl6, skb,
 				flags, __ip6_route_redirect);
 }
 
@@ -2261,7 +2277,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
 	fl6.flowlabel = ip6_flowinfo(iph);
 	fl6.flowi6_uid = uid;
 
-	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
 	rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
@@ -2283,7 +2299,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
 	fl6.saddr = iph->daddr;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
 
-	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
 	rt6_do_redirect(dst, NULL, skb);
 	dst_release(dst);
 }
@@ -2485,7 +2501,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
 
 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
-	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
 
 	/* if table lookup failed, fall back to full lookup */
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -2548,7 +2564,7 @@ static int ip6_route_check_nh(struct net *net,
 	}
 
 	if (!grt)
-		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
 
 	if (!grt)
 		goto out;
@@ -4613,7 +4629,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		if (!ipv6_addr_any(&fl6.saddr))
 			flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-		dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
 
 		rcu_read_unlock();
 	} else {
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ba3767ef5e93..45722327375a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 
 	if (!tbl_id) {
-		dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+		dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
 	} else {
 		struct fib6_table *table;
 
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 		if (!table)
 			goto out;
 
-		rt = ip6_pol_route(net, table, 0, &fl6, flags);
+		rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
 		dst = &rt->dst;
 	}
 

From b4bac172e90ce4a93df8adf44eb70d91b9d611eb Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:18 -0800
Subject: [PATCH 0605/1678] net/ipv6: Add support for path selection using hash
 of 5-tuple

Some operators prefer IPv6 path selection to use a standard 5-tuple
hash rather than just an L3 hash with the flow the label. To that end
add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb
("net: ipv4: add support for ECMP hash policy choice"). The default
is still L3 which covers source and destination addresses along with
flow label and IPv6 protocol.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Tested-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++
 include/net/ip6_route.h                |  4 +-
 include/net/netevent.h                 |  1 +
 include/net/netns/ipv6.h               |  1 +
 net/ipv6/icmp.c                        |  2 +-
 net/ipv6/route.c                       | 68 ++++++++++++++++++++------
 net/ipv6/sysctl_net_ipv6.c             | 27 ++++++++++
 7 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..783675a730e5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
 	FALSE: disabled
 	Default: FALSE
 
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes.
+	Default: 0 (Layer 3)
+	Possible values:
+	0 - Layer 3 (source and destination addresses plus flow label)
+	1 - Layer 4 (standard 5-tuple)
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9594f9317952..ce2abc0ff102 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif,
 			    const struct sk_buff *skb, int flags);
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
-		       struct flow_keys *hkeys);
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *hkeys);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
diff --git a/include/net/netevent.h b/include/net/netevent.h
index baee605a94ab..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -27,6 +27,7 @@ enum netevent_notif_type {
 	NETEVENT_REDIRECT,	   /* arg is struct netevent_redirect ptr */
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index e286fda09fcf..5b51110435fc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_gc_elasticity;
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
+	int multipath_hash_policy;
 	int flowlabel_consistency;
 	int auto_flowlabels;
 	int icmpv6_time;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a5d929223820..6f84668be6ea 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
-	fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL);
+	fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d2b8368663cb..f0ae58424c45 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+					     struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     const struct sk_buff *skb,
 					     int strict)
@@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	 * case it will always be non-zero. Otherwise now is the time to do it.
 	 */
 	if (!fl6->mp_hash)
-		fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL);
+		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 
 	if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
 		return match;
@@ -932,7 +933,7 @@ restart:
 		rt = rt6_device_match(net, rt, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif,
+			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
 						  skb, flags);
 	}
 	if (rt == net->ipv6.ip6_null_entry) {
@@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 redo_rt6_select:
 	rt = rt6_select(net, fn, oif, strict);
 	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, skb, strict);
+		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
@@ -1839,21 +1840,56 @@ out:
 }
 
 /* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb,
-		       struct flow_keys *flkeys)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
 	struct flow_keys hash_keys;
 	u32 mhash;
 
-	memset(&hash_keys, 0, sizeof(hash_keys));
-	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	if (skb) {
-		ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
-	} else {
-		hash_keys.addrs.v6addrs.src = fl6->saddr;
-		hash_keys.addrs.v6addrs.dst = fl6->daddr;
-		hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
-		hash_keys.basic.ip_proto = fl6->flowi6_proto;
+	switch (net->ipv6.sysctl.multipath_hash_policy) {
+	case 0:
+		memset(&hash_keys, 0, sizeof(hash_keys));
+		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		if (skb) {
+			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+		} else {
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
+	case 1:
+		if (skb) {
+			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+			struct flow_keys keys;
+
+			/* short-circuit if we already have L4 hash present */
+			if (skb->l4_hash)
+				return skb_get_hash_raw(skb) >> 1;
+
+			memset(&hash_keys, 0, sizeof(hash_keys));
+
+                        if (!flkeys) {
+				skb_flow_dissect_flow_keys(skb, &keys, flag);
+				flkeys = &keys;
+			}
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+			hash_keys.ports.src = flkeys->ports.src;
+			hash_keys.ports.dst = flkeys->ports.dst;
+			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+		} else {
+			memset(&hash_keys, 0, sizeof(hash_keys));
+			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+			hash_keys.addrs.v6addrs.src = fl6->saddr;
+			hash_keys.addrs.v6addrs.dst = fl6->daddr;
+			hash_keys.ports.src = fl6->fl6_sport;
+			hash_keys.ports.dst = fl6->fl6_dport;
+			hash_keys.basic.ip_proto = fl6->flowi6_proto;
+		}
+		break;
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
@@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb)
 		flkeys = &_flkeys;
 
 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
-		fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys);
+		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
 	skb_dst_drop(skb);
 	skb_dst_set(skb,
 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 262f791f1b9b..966c42af92f4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/inet_frag.h>
+#include <net/netevent.h>
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
 
+static int zero;
 static int one = 1;
 static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv6.sysctl.multipath_hash_policy);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+	return ret;
+}
 
 static struct ctl_table ipv6_table_template[] = {
 	{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fib_multipath_hash_policy",
+		.data		= &init_net.ipv6.sysctl.multipath_hash_policy,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_rt6_multipath_hash_policy,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
 	ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
 	ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+	ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)

From 5e18b9c550397e4796bdca0f4cb8880fcc127dfe Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:19 -0800
Subject: [PATCH 0606/1678] mlxsw: spectrum_router: Add support for ipv6 hash
 policy update

Similar to 28678f07f127d ("mlxsw: spectrum_router: Update multipath hash
parameters upon netevents") for IPv4, make sure the kernel and asic are
using the same hash algorithm for path selection.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Tested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 93d48c1b2bf8..a8a578610a7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2431,6 +2431,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
 		mlxsw_sp_port_dev_put(mlxsw_sp_port);
 		break;
 	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
+	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
 		net = ptr;
 
 		if (!net_eq(net, &init_net))
@@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 
 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
 {
+	bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;
+
 	mlxsw_sp_mp_hash_header_set(recr2_pl,
 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
 	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
 	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
 	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
-	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
 	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
+	if (only_l3) {
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
+	} else {
+		mlxsw_sp_mp_hash_header_set(recr2_pl,
+					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
+		mlxsw_sp_mp_hash_field_set(recr2_pl,
+					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
+	}
 }
 
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)

From de7a0f871fabe74fff7481caf7d3efe03b58fe58 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:20 -0800
Subject: [PATCH 0607/1678] net: Remove unused get_hash_from_flow functions

__get_hash_from_flowi6 is still used for flowlabels, but the IPv4
variant and the wrappers to both are not used. Remove them.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h        | 16 ----------------
 net/core/flow_dissector.c | 16 ----------------
 2 files changed, 32 deletions(-)

diff --git a/include/net/flow.h b/include/net/flow.h
index 64e7ee9cb980..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family)
 
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
-static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
-{
-	struct flow_keys keys;
-
-	return __get_hash_from_flowi6(fl6, &keys);
-}
-
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys);
-
-static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4)
-{
-	struct flow_keys keys;
-
-	return __get_hash_from_flowi4(fl4, &keys);
-}
-
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 559db9ea8d86..d29f09bc5ff9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
 }
 EXPORT_SYMBOL(__get_hash_from_flowi6);
 
-__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys)
-{
-	memset(keys, 0, sizeof(*keys));
-
-	keys->addrs.v4addrs.src = fl4->saddr;
-	keys->addrs.v4addrs.dst = fl4->daddr;
-	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys->ports.src = fl4->fl4_sport;
-	keys->ports.dst = fl4->fl4_dport;
-	keys->keyid.keyid = fl4->fl4_gre_key;
-	keys->basic.ip_proto = fl4->flowi4_proto;
-
-	return flow_hash_from_keys(keys);
-}
-EXPORT_SYMBOL(__get_hash_from_flowi4);
-
 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
 	{
 		.key_id = FLOW_DISSECTOR_KEY_CONTROL,

From 91a5c1ecba9f23f247b3772e6d54aabfebc0e300 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Mar 2018 08:32:21 -0800
Subject: [PATCH 0608/1678] selftests: forwarding: Add multipath test for L4
 hashing

Add IPv6 multipath test using L4 hashing. Created with inputs from
Ido Schimmel.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Tested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/router_multipath.sh        | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 55595305a604..3bc351008db6 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -235,6 +235,45 @@ multipath4_test()
        sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
 }
 
+multipath6_l4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+       local hash_policy
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+
+       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+}
+
 multipath6_test()
 {
        local desc="$1"
@@ -278,6 +317,11 @@ multipath_test()
 	multipath6_test "ECMP" 1 1
 	multipath6_test "Weighted MP 2:1" 2 1
 	multipath6_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
 }
 
 setup_prepare()

From efab163bbc19e5dbd2b7756c1f26defc9c27d6ba Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Wed, 28 Feb 2018 15:36:19 -0500
Subject: [PATCH 0609/1678] tools: tc-testing: Add notap option

Add a command line arg to suppress tap output.  Handy in case
all the tap output is being supplied by the plugins.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 32 ++++++++++++++++-------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 7b50775abaf6..241eea37e4a4 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -241,12 +241,17 @@ def test_runner(pm, args, filtered_tests):
     testlist = filtered_tests
     tcount = len(testlist)
     index = 1
-    tap = str(index) + ".." + str(tcount) + "\n"
+    tap = ''
     badtest = None
     stage = None
     emergency_exit = False
     emergency_exit_message = ''
 
+    if args.notap:
+        if args.verbose:
+            tap = 'notap requested:  omitting test plan\n'
+    else:
+        tap = str(index) + ".." + str(tcount) + "\n"
     try:
         pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
     except Exception as ee:
@@ -303,15 +308,16 @@ def test_runner(pm, args, filtered_tests):
     # if we failed in setup or teardown,
     # fill in the remaining tests with ok-skipped
     count = index
-    tap += 'about to flush the tap output if tests need to be skipped\n'
-    if tcount + 1 != index:
-        for tidx in testlist[index - 1:]:
-            msg = 'skipped - previous {} failed'.format(stage)
-            tap += 'ok {} - {} # {} {} {}\n'.format(
-                count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
-            count += 1
+    if not args.notap:
+        tap += 'about to flush the tap output if tests need to be skipped\n'
+        if tcount + 1 != index:
+            for tidx in testlist[index - 1:]:
+                msg = 'skipped - previous {} failed'.format(stage)
+                tap += 'ok {} - {} # {} {} {}\n'.format(
+                    count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+                count += 1
 
-    tap += 'done flushing skipped test tap output\n'
+        tap += 'done flushing skipped test tap output\n'
     pm.call_post_suite(index)
 
     return tap
@@ -389,6 +395,9 @@ def set_args(parser):
     parser.add_argument(
         '-v', '--verbose', action='count', default=0,
         help='Show the commands that are being run')
+    parser.add_argument(
+        '-N', '--notap', action='store_true',
+        help='Suppress tap results for command under test')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
     return parser
@@ -601,7 +610,10 @@ def set_operation_mode(pm, args):
         catresults = test_runner(pm, args, alltests)
     else:
         catresults = 'No tests found\n'
-    print('All test results: \n\n{}'.format(catresults))
+    if args.notap:
+        print('Tap output suppression requested\n')
+    else:
+        print('All test results: \n\n{}'.format(catresults))
 
 def main():
     """

From 88c060549a4c555d59965801d1e811b71614c2b7 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 1 Mar 2018 02:02:27 +0100
Subject: [PATCH 0610/1678] dsa: Pass the port to get_sset_count()

By passing the port, we allow different ports to have different
statistics. This is useful since some ports have SERDES interfaces
with their own statistic counters.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c       | 2 +-
 drivers/net/dsa/b53/b53_priv.h         | 2 +-
 drivers/net/dsa/dsa_loop.c             | 2 +-
 drivers/net/dsa/lan9303-core.c         | 2 +-
 drivers/net/dsa/microchip/ksz_common.c | 2 +-
 drivers/net/dsa/mt7530.c               | 2 +-
 drivers/net/dsa/mv88e6xxx/chip.c       | 2 +-
 drivers/net/dsa/qca8k.c                | 2 +-
 include/net/dsa.h                      | 2 +-
 net/dsa/master.c                       | 4 ++--
 net/dsa/slave.c                        | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index db830a1141d9..cd16067265dd 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -852,7 +852,7 @@ void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data)
 }
 EXPORT_SYMBOL(b53_get_ethtool_stats);
 
-int b53_get_sset_count(struct dsa_switch *ds)
+int b53_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct b53_device *dev = ds->priv;
 
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index d954cf36ecd8..1187ebd79287 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -288,7 +288,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
 int b53_configure_vlan(struct dsa_switch *ds);
 void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
 void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
-int b53_get_sset_count(struct dsa_switch *ds);
+int b53_get_sset_count(struct dsa_switch *ds, int port);
 int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index 7aa84ee4e771..f77be9f85cb3 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -86,7 +86,7 @@ static int dsa_loop_setup(struct dsa_switch *ds)
 	return 0;
 }
 
-static int dsa_loop_get_sset_count(struct dsa_switch *ds)
+static int dsa_loop_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return __DSA_LOOP_CNT_MAX;
 }
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 6171c0853ff1..fefa454f3e56 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1007,7 +1007,7 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
 	}
 }
 
-static int lan9303_get_sset_count(struct dsa_switch *ds)
+static int lan9303_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(lan9303_mib);
 }
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 663b0d5b982b..bcb3e6c734f2 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -439,7 +439,7 @@ static void ksz_disable_port(struct dsa_switch *ds, int port,
 	ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, true);
 }
 
-static int ksz_sset_count(struct dsa_switch *ds)
+static int ksz_sset_count(struct dsa_switch *ds, int port)
 {
 	return TOTAL_SWITCH_COUNTER_NUM;
 }
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 8a0bb000d056..511ca134f13f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -604,7 +604,7 @@ mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_get_sset_count(struct dsa_switch *ds)
+mt7530_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(mt7530_mib);
 }
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 24486f96dd39..8c9a30d1b06f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -754,7 +754,7 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
 					      STATS_TYPE_BANK1);
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 9df22ebee822..600d5ad1fbde 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -631,7 +631,7 @@ qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
 }
 
 static int
-qca8k_get_sset_count(struct dsa_switch *ds)
+qca8k_get_sset_count(struct dsa_switch *ds, int port)
 {
 	return ARRAY_SIZE(ar8327_mib);
 }
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0ad17b63684d..60fb4ec8ba61 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -359,7 +359,7 @@ struct dsa_switch_ops {
 	void	(*get_strings)(struct dsa_switch *ds, int port, uint8_t *data);
 	void	(*get_ethtool_stats)(struct dsa_switch *ds,
 				     int port, uint64_t *data);
-	int	(*get_sset_count)(struct dsa_switch *ds);
+	int	(*get_sset_count)(struct dsa_switch *ds, int port);
 
 	/*
 	 * ethtool Wake-on-LAN
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 00589147f042..90e6df0351eb 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -42,7 +42,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset)
 		count += ops->get_sset_count(dev, sset);
 
 	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
-		count += ds->ops->get_sset_count(ds);
+		count += ds->ops->get_sset_count(ds, cpu_dp->index);
 
 	return count;
 }
@@ -76,7 +76,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 		 * constructed earlier
 		 */
 		ds->ops->get_strings(ds, port, ndata);
-		count = ds->ops->get_sset_count(ds);
+		count = ds->ops->get_sset_count(ds, port);
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3376dad6dcfd..18561af7a8f1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -605,7 +605,7 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 
 		count = 4;
 		if (ds->ops->get_sset_count)
-			count += ds->ops->get_sset_count(ds);
+			count += ds->ops->get_sset_count(ds, dp->index);
 
 		return count;
 	}

From c6c8cd5e3ce494419d8894d6a96aa17375b83ca2 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 1 Mar 2018 02:02:28 +0100
Subject: [PATCH 0611/1678] net: dsa: mv88e6xxx: Hold mutex while doing stats
 operations

Until now, there has been no need to hold the reg mutex while getting
the count of statistics, or the strings, because the hardware was not
accessed. When adding support for SERDES statistics, it is necessary
to access the hardware, to determine if a port is using the SERDES
interface. So add mutex lock/unlocks.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 8c9a30d1b06f..27ca0fcb1040 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -724,8 +724,12 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
+	mutex_lock(&chip->reg_lock);
+
 	if (chip->info->ops->stats_get_strings)
 		chip->info->ops->stats_get_strings(chip, data);
+
+	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
@@ -757,11 +761,14 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
 static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int ret = 0;
 
+	mutex_lock(&chip->reg_lock);
 	if (chip->info->ops->stats_get_sset_count)
-		return chip->info->ops->stats_get_sset_count(chip);
+		ret = chip->info->ops->stats_get_sset_count(chip);
+	mutex_unlock(&chip->reg_lock);
 
-	return 0;
+	return ret;
 }
 
 static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,

From 436fe17d273bed9fd8b7bdf4172ea6d9eac0b703 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 1 Mar 2018 02:02:29 +0100
Subject: [PATCH 0612/1678] net: dsa: mv88e6xxx: Allow the SERDES interfaces to
 have statistics

When gettting the number of statistics, the strings and the actual
statistics, call the SERDES ops if implemented. This means the stats
code needs to return the number of strings/stats they have placed into
the data, so that the SERDES strings/stats can follow on.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 76 ++++++++++++++++++++++----------
 drivers/net/dsa/mv88e6xxx/chip.h | 13 ++++--
 2 files changed, 62 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 27ca0fcb1040..243d274aace5 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -689,8 +689,8 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	return value;
 }
 
-static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data, int types)
+static int mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data, int types)
 {
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
@@ -703,31 +703,39 @@ static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
 			j++;
 		}
 	}
+
+	return j;
 }
 
-static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data)
+static int mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data)
 {
-	mv88e6xxx_stats_get_strings(chip, data,
-				    STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+	return mv88e6xxx_stats_get_strings(chip, data,
+					   STATS_TYPE_BANK0 | STATS_TYPE_PORT);
 }
 
-static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
-					uint8_t *data)
+static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+				       uint8_t *data)
 {
-	mv88e6xxx_stats_get_strings(chip, data,
-				    STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+	return mv88e6xxx_stats_get_strings(chip, data,
+					   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
 }
 
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 				  uint8_t *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int count = 0;
 
 	mutex_lock(&chip->reg_lock);
 
 	if (chip->info->ops->stats_get_strings)
-		chip->info->ops->stats_get_strings(chip, data);
+		count = chip->info->ops->stats_get_strings(chip, data);
+
+	if (chip->info->ops->serdes_get_strings) {
+		data += count * ETH_GSTRING_LEN;
+		chip->info->ops->serdes_get_strings(chip, port, data);
+	}
 
 	mutex_unlock(&chip->reg_lock);
 }
@@ -761,19 +769,31 @@ static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
 static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	int ret = 0;
+	int serdes_count = 0;
+	int count = 0;
 
 	mutex_lock(&chip->reg_lock);
 	if (chip->info->ops->stats_get_sset_count)
-		ret = chip->info->ops->stats_get_sset_count(chip);
+		count = chip->info->ops->stats_get_sset_count(chip);
+	if (count < 0)
+		goto out;
+
+	if (chip->info->ops->serdes_get_sset_count)
+		serdes_count = chip->info->ops->serdes_get_sset_count(chip,
+								      port);
+	if (serdes_count < 0)
+		count = serdes_count;
+	else
+		count += serdes_count;
+out:
 	mutex_unlock(&chip->reg_lock);
 
-	return ret;
+	return count;
 }
 
-static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data, int types,
-				      u16 bank1_select, u16 histogram)
+static int mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data, int types,
+				     u16 bank1_select, u16 histogram)
 {
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
@@ -790,18 +810,19 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 			j++;
 		}
 	}
+	return j;
 }
 
-static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_PORT,
 					 0, MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -809,8 +830,8 @@ static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 					 MV88E6XXX_G1_STATS_OP_HIST_RX_TX);
 }
 
-static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
-				      uint64_t *data)
+static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+				     uint64_t *data)
 {
 	return mv88e6xxx_stats_get_stats(chip, port, data,
 					 STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
@@ -821,8 +842,15 @@ static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
 				uint64_t *data)
 {
+	int count = 0;
+
 	if (chip->info->ops->stats_get_stats)
-		chip->info->ops->stats_get_stats(chip, port, data);
+		count = chip->info->ops->stats_get_stats(chip, port, data);
+
+	if (chip->info->ops->serdes_get_stats) {
+		data += count;
+		chip->info->ops->serdes_get_stats(chip, port, data);
+	}
 }
 
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index d6a1391dc268..e1ed1b7ca319 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -386,9 +386,9 @@ struct mv88e6xxx_ops {
 
 	/* Return the number of strings describing statistics */
 	int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
-	void (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
-	void (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
-				uint64_t *data);
+	int (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
+	int (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+			       uint64_t *data);
 	int (*set_cpu_port)(struct mv88e6xxx_chip *chip, int port);
 	int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
 	const struct mv88e6xxx_irq_ops *watchdog_ops;
@@ -398,6 +398,13 @@ struct mv88e6xxx_ops {
 	/* Power on/off a SERDES interface */
 	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, bool on);
 
+	/* Statistics from the SERDES interface */
+	int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
+	void (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
+				   uint8_t *data);
+	void (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+				 uint64_t *data);
+
 	/* VLAN Translation Unit operations */
 	int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
 			   struct mv88e6xxx_vtu_entry *entry);

From eb755c3f6b7deb22ed19e2d3cc4418d3ae510196 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 1 Mar 2018 02:02:30 +0100
Subject: [PATCH 0613/1678] net: dsa: mv88e6xxx: Add helper to determining if
 port has SERDES

Refactor the existing code. This helper will be used for SERDES
statistics.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/serdes.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index f3c01119b3d1..4487d18132a4 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -55,18 +55,30 @@ static int mv88e6352_serdes_power_set(struct mv88e6xxx_chip *chip, bool on)
 	return err;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-	int err;
 	u8 cmode;
+	int err;
 
 	err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
-	if (err)
-		return err;
+	if (err) {
+		dev_err(chip->dev, "failed to read cmode\n");
+		return 0;
+	}
 
 	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
-	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII)) {
+	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
+		return 1;
+
+	return 0;
+}
+
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
+{
+	int err;
+
+	if (mv88e6352_port_has_serdes(chip, port)) {
 		err = mv88e6352_serdes_power_set(chip, on);
 		if (err < 0)
 			return err;

From cda9f4aae3dceed192442807b70a5d34a13c478b Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 1 Mar 2018 02:02:31 +0100
Subject: [PATCH 0614/1678] net: dsa: mv88e6xxx: Get mv88e6352 SERDES
 statistics

Add support for reading the SERDES statistics of the mv88e8352, using
the standard ethtool -S option. The SERDES interface can be mapped to
either port 4 or 5, so only return statistics on those ports, if the
SERDES interface is in use.

The counters are reset on read, so need to be accumulated. Add a per
port structure to hold the stats counters. The 6352 only has a single
SERDES interface and so only one port will using the newly added
array. However the 6390 family has as many SERDES interfaces as ports,
each with statistics counters. Also, PTP has a number of counters per
port which will also need accumulating.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c   |  7 ++-
 drivers/net/dsa/mv88e6xxx/chip.h   | 10 +++-
 drivers/net/dsa/mv88e6xxx/serdes.c | 84 ++++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/serdes.h |  6 ++-
 4 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 243d274aace5..cfd53632a655 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -666,7 +666,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 			return UINT64_MAX;
 
 		low = reg;
-		if (s->sizeof_stat == 4) {
+		if (s->size == 4) {
 			err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
 			if (err)
 				return UINT64_MAX;
@@ -679,7 +679,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 	case STATS_TYPE_BANK0:
 		reg |= s->reg | histogram;
 		mv88e6xxx_g1_stats_read(chip, reg, &low);
-		if (s->sizeof_stat == 8)
+		if (s->size == 8)
 			mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
 		break;
 	default:
@@ -3225,6 +3225,9 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.serdes_power = mv88e6352_serdes_power,
 	.gpio_ops = &mv88e6352_gpio_ops,
 	.avb_ops = &mv88e6352_avb_ops,
+	.serdes_get_sset_count = mv88e6352_serdes_get_sset_count,
+	.serdes_get_strings = mv88e6352_serdes_get_strings,
+	.serdes_get_stats = mv88e6352_serdes_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6390_ops = {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index e1ed1b7ca319..26b9a618cdee 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -191,6 +191,10 @@ struct mv88e6xxx_port_hwtstamp {
 	struct hwtstamp_config tstamp_config;
 };
 
+struct mv88e6xxx_port {
+	u64 serdes_stats[2];
+};
+
 struct mv88e6xxx_chip {
 	const struct mv88e6xxx_info *info;
 
@@ -244,6 +248,7 @@ struct mv88e6xxx_chip {
 	int irq;
 	int device_irq;
 	int watchdog_irq;
+
 	int atu_prob_irq;
 	int vtu_prob_irq;
 	struct kthread_worker *kworker;
@@ -268,6 +273,9 @@ struct mv88e6xxx_chip {
 
 	/* Per-port timestamping resources. */
 	struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS];
+
+	/* Array of port structures. */
+	struct mv88e6xxx_port ports[DSA_MAX_PORTS];
 };
 
 struct mv88e6xxx_bus_ops {
@@ -469,7 +477,7 @@ struct mv88e6xxx_avb_ops {
 
 struct mv88e6xxx_hw_stat {
 	char string[ETH_GSTRING_LEN];
-	int sizeof_stat;
+	size_t size;
 	int reg;
 	int type;
 };
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 4487d18132a4..4756969c1546 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -87,6 +87,90 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)
 	return 0;
 }
 
+struct mv88e6352_serdes_hw_stat {
+	char string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int reg;
+};
+
+static struct mv88e6352_serdes_hw_stat mv88e6352_serdes_hw_stats[] = {
+	{ "serdes_fibre_rx_error", 16, 21 },
+	{ "serdes_PRBS_error", 32, 24 },
+};
+
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+	if (mv88e6352_port_has_serdes(chip, port))
+		return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
+
+	return 0;
+}
+
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				  int port, uint8_t *data)
+{
+	struct mv88e6352_serdes_hw_stat *stat;
+	int i;
+
+	if (!mv88e6352_port_has_serdes(chip, port))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+		stat = &mv88e6352_serdes_hw_stats[i];
+		memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+		       ETH_GSTRING_LEN);
+	}
+}
+
+static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
+					  struct mv88e6352_serdes_hw_stat *stat)
+{
+	u64 val = 0;
+	u16 reg;
+	int err;
+
+	err = mv88e6352_serdes_read(chip, stat->reg, &reg);
+	if (err) {
+		dev_err(chip->dev, "failed to read statistic\n");
+		return 0;
+	}
+
+	val = reg;
+
+	if (stat->sizeof_stat == 32) {
+		err = mv88e6352_serdes_read(chip, stat->reg + 1, &reg);
+		if (err) {
+			dev_err(chip->dev, "failed to read statistic\n");
+			return 0;
+		}
+		val = val << 16 | reg;
+	}
+
+	return val;
+}
+
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+				uint64_t *data)
+{
+	struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
+	struct mv88e6352_serdes_hw_stat *stat;
+	u64 value;
+	int i;
+
+	if (!mv88e6352_port_has_serdes(chip, port))
+		return;
+
+	BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
+		     ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
+		stat = &mv88e6352_serdes_hw_stats[i];
+		value = mv88e6352_serdes_get_stat(chip, stat);
+		mv88e6xxx_port->serdes_stats[i] += value;
+		data[i] = mv88e6xxx_port->serdes_stats[i];
+	}
+}
+
 /* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
 static int mv88e6390_serdes_10g(struct mv88e6xxx_chip *chip, int addr, bool on)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 5c1cd6d8e9a5..641baa75f910 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -44,5 +44,9 @@
 
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
-
+int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				  int port, uint8_t *data);
+void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+				uint64_t *data);
 #endif

From 6b2536039f653e35c96e7461a7456d2246331462 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Sun, 4 Mar 2018 21:02:18 +0100
Subject: [PATCH 0615/1678] batman-adv: Avoid redundant multicast TT entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a node signals that it wants all traffic for a specific protocol
family then there is no need to announce individual multicast addresses
via TT.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/multicast.c | 56 +++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 6eaffe50335a..c7a1305ca7e7 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -101,8 +101,37 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
 	return upper;
 }
 
+/**
+ * batadv_mcast_addr_is_ipv4() - check if multicast MAC is IPv4
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv4 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv4(const u8 *addr)
+{
+	static const u8 prefix[] = {0x01, 0x00, 0x5E};
+
+	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
+/**
+ * batadv_mcast_addr_is_ipv6() - check if multicast MAC is IPv6
+ * @addr: the MAC address to check
+ *
+ * Return: True, if MAC address is one reserved for IPv6 multicast, false
+ * otherwise.
+ */
+static bool batadv_mcast_addr_is_ipv6(const u8 *addr)
+{
+	static const u8 prefix[] = {0x33, 0x33};
+
+	return memcmp(prefix, addr, sizeof(prefix)) == 0;
+}
+
 /**
  * batadv_mcast_mla_softif_get() - get softif multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: the device to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -119,9 +148,12 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_softif_get(struct net_device *dev,
+static int batadv_mcast_mla_softif_get(struct batadv_priv *bat_priv,
+				       struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
+	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct net_device *bridge = batadv_mcast_get_bridge(dev);
 	struct netdev_hw_addr *mc_list_entry;
 	struct batadv_hw_addr *new;
@@ -129,6 +161,12 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
 
 	netif_addr_lock_bh(bridge ? bridge : dev);
 	netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
+		if (all_ipv4 && batadv_mcast_addr_is_ipv4(mc_list_entry->addr))
+			continue;
+
+		if (all_ipv6 && batadv_mcast_addr_is_ipv6(mc_list_entry->addr))
+			continue;
+
 		new = kmalloc(sizeof(*new), GFP_ATOMIC);
 		if (!new) {
 			ret = -ENOMEM;
@@ -193,6 +231,7 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
 
 /**
  * batadv_mcast_mla_bridge_get() - get bridged-in multicast listeners
+ * @bat_priv: the bat priv with all the soft interface information
  * @dev: a bridge slave whose bridge to collect multicast addresses from
  * @mcast_list: a list to put found addresses into
  *
@@ -204,10 +243,13 @@ static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
  * Return: -ENOMEM on memory allocation error or the number of
  * items added to the mcast_list otherwise.
  */
-static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+static int batadv_mcast_mla_bridge_get(struct batadv_priv *bat_priv,
+				       struct net_device *dev,
 				       struct hlist_head *mcast_list)
 {
 	struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+	bool all_ipv4 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV4;
+	bool all_ipv6 = bat_priv->mcast.flags & BATADV_MCAST_WANT_ALL_IPV6;
 	struct br_ip_list *br_ip_entry, *tmp;
 	struct batadv_hw_addr *new;
 	u8 mcast_addr[ETH_ALEN];
@@ -221,6 +263,12 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
 		goto out;
 
 	list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+		if (all_ipv4 && br_ip_entry->addr.proto == htons(ETH_P_IP))
+			continue;
+
+		if (all_ipv6 && br_ip_entry->addr.proto == htons(ETH_P_IPV6))
+			continue;
+
 		batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
 		if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
 			continue;
@@ -568,11 +616,11 @@ static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 	if (!batadv_mcast_mla_tvlv_update(bat_priv))
 		goto update;
 
-	ret = batadv_mcast_mla_softif_get(soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_softif_get(bat_priv, soft_iface, &mcast_list);
 	if (ret < 0)
 		goto out;
 
-	ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+	ret = batadv_mcast_mla_bridge_get(bat_priv, soft_iface, &mcast_list);
 	if (ret < 0)
 		goto out;
 

From 0913667ab3ad9e6c1797384410a0755757bcee55 Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Thu, 1 Mar 2018 15:01:04 +0530
Subject: [PATCH 0616/1678] cxgb4vf: Forcefully link up virtual interfaces

The Virtual Interfaces are connected to an internal switch on the chip
which allows VIs attached to the same port to talk to each other even
when the port link is down.  As a result, we generally want to always
report a VI's link as being "up".

Based on the original work by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c    | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index b7e79e64d2ed..361de86b65b9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -155,8 +155,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 		const char *fc;
 		const struct port_info *pi = netdev_priv(dev);
 
-		netif_carrier_on(dev);
-
 		switch (pi->link_cfg.speed) {
 		case 100:
 			s = "100Mbps";
@@ -202,7 +200,6 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 
 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 	} else {
-		netif_carrier_off(dev);
 		netdev_info(dev, "link down\n");
 	}
 }
@@ -278,6 +275,17 @@ static int link_start(struct net_device *dev)
 	 */
 	if (ret == 0)
 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
+
+	/* The Virtual Interfaces are connected to an internal switch on the
+	 * chip which allows VIs attached to the same port to talk to each
+	 * other even when the port link is down.  As a result, we generally
+	 * want to always report a VI's link as being "up", provided there are
+	 * no errors in enabling vi.
+	 */
+
+	if (ret == 0)
+		netif_carrier_on(dev);
+
 	return ret;
 }
 

From cc1122b00d624ef551b6ff92e57240cbffb7d62a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 1 Mar 2018 10:23:03 +0000
Subject: [PATCH 0617/1678] net: phy: Fix spelling mistake: "advertisment"->
 "advertisement"

Trivial fix to spelling mistake in comments and error message text.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c    |  2 +-
 drivers/net/phy/marvell.c    |  2 +-
 drivers/net/phy/marvell10g.c |  2 +-
 drivers/net/phy/phy-c45.c    |  8 ++++----
 drivers/net/phy/phy-core.c   |  4 ++--
 drivers/net/phy/phylink.c    | 12 ++++++------
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 421feb8f92fe..29b1c88b55cc 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -565,7 +565,7 @@ static int bcm7xxx_28nm_set_tunable(struct phy_device *phydev,
 	if (ret)
 		return ret;
 
-	/* Disable EEE advertisment since this prevents the PHY
+	/* Disable EEE advertisement since this prevents the PHY
 	 * from successfully linking up, trigger auto-negotiation restart
 	 * to let the MAC decide what to do.
 	 */
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 22d9bc9c33a4..98fd6b7ceeec 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -860,7 +860,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 
 		/* There appears to be a bug in the 88e1512 when used in
-		 * SGMII to copper mode, where the AN advertisment register
+		 * SGMII to copper mode, where the AN advertisement register
 		 * clears the pause bits each time a negotiation occurs.
 		 * This means we can never be truely sure what was advertised,
 		 * so disable Pause support.
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 4f1efa02a930..9564916d2d7b 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -308,7 +308,7 @@ static int mv3310_read_status(struct phy_device *phydev)
 		if (val < 0)
 			return val;
 
-		/* Read the link partner's 1G advertisment */
+		/* Read the link partner's 1G advertisement */
 		val = phy_read_mmd(phydev, MDIO_MMD_AN, MV_AN_STAT1000);
 		if (val < 0)
 			return val;
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 0017eddc24db..e1225545362d 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -163,11 +163,11 @@ int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
 EXPORT_SYMBOL_GPL(genphy_c45_read_link);
 
 /**
- * genphy_c45_read_lpa - read the link partner advertisment and pause
+ * genphy_c45_read_lpa - read the link partner advertisement and pause
  * @phydev: target phy_device struct
  *
  * Read the Clause 45 defined base (7.19) and 10G (7.33) status registers,
- * filling in the link partner advertisment, pause and asym_pause members
+ * filling in the link partner advertisement, pause and asym_pause members
  * in @phydev.  This assumes that the auto-negotiation MMD is present, and
  * the backplane bit (7.48.0) is clear.  Clause 45 PHY drivers are expected
  * to fill in the remainder of the link partner advert from vendor registers.
@@ -176,7 +176,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 {
 	int val;
 
-	/* Read the link partner's base page advertisment */
+	/* Read the link partner's base page advertisement */
 	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_LPA);
 	if (val < 0)
 		return val;
@@ -185,7 +185,7 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 	phydev->pause = val & LPA_PAUSE_CAP ? 1 : 0;
 	phydev->asym_pause = val & LPA_PAUSE_ASYM ? 1 : 0;
 
-	/* Read the link partner's 10G advertisment */
+	/* Read the link partner's 10G advertisement */
 	val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
 	if (val < 0)
 		return val;
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 4083f00c97a5..c7da4cbb1103 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -190,10 +190,10 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 }
 
 /**
- * phy_resolve_aneg_linkmode - resolve the advertisments into phy settings
+ * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
  *
- * Resolve our and the link partner advertisments into their corresponding
+ * Resolve our and the link partner advertisements into their corresponding
  * speed and duplex. If full duplex was negotiated, extract the pause mode
  * from the link partner mask.
  */
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 7d1724072325..51a011a349fe 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -364,7 +364,7 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat
 }
 
 /* Flow control is resolved according to our and the link partners
- * advertisments using the following drawn from the 802.3 specs:
+ * advertisements using the following drawn from the 802.3 specs:
  *  Local device  Link partner
  *  Pause AsymDir Pause AsymDir Result
  *    1     X       1     X     TX+RX
@@ -683,7 +683,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 	linkmode_copy(pl->supported, supported);
 	linkmode_copy(pl->link_config.advertising, config.advertising);
 
-	/* Restrict the phy advertisment according to the MAC support. */
+	/* Restrict the phy advertisement according to the MAC support. */
 	ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
 	phy->advertising = advertising;
 	mutex_unlock(&pl->state_mutex);
@@ -887,7 +887,7 @@ void phylink_start(struct phylink *pl)
 
 	/* Apply the link configuration to the MAC when starting. This allows
 	 * a fixed-link to start with the correct parameters, and also
-	 * ensures that we set the appropriate advertisment for Serdes links.
+	 * ensures that we set the appropriate advertisement for Serdes links.
 	 */
 	phylink_resolve_flow(pl, &pl->link_config);
 	phylink_mac_config(pl, &pl->link_config);
@@ -1074,7 +1074,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 
 	config = pl->link_config;
 
-	/* Mask out unsupported advertisments */
+	/* Mask out unsupported advertisements */
 	linkmode_and(config.advertising, kset->link_modes.advertising,
 		     pl->supported);
 
@@ -1119,7 +1119,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 	if (phylink_validate(pl, pl->supported, &config))
 		return -EINVAL;
 
-	/* If autonegotiation is enabled, we must have an advertisment */
+	/* If autonegotiation is enabled, we must have an advertisement */
 	if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
 		return -EINVAL;
 
@@ -1606,7 +1606,7 @@ static int phylink_sfp_module_insert(void *upstream,
 	iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
 	if (iface == PHY_INTERFACE_MODE_NA) {
 		netdev_err(pl->netdev,
-			   "selection of interface failed, advertisment %*pb\n",
+			   "selection of interface failed, advertisement %*pb\n",
 			   __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
 		return -EINVAL;
 	}

From 1ef7286e7f36020e655ebeb82386911b617c5aee Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 1 Mar 2018 13:27:34 +0200
Subject: [PATCH 0618/1678] r8169: Dereference MMIO address immediately before
 use

There is no need to dereference struct rtl8169_private to get mmio_addr
in almost every function in the driver.

Replace it by using pointer to struct rtl8169_private directly.

No functional change intended.

Next step might be a conversion of RTL_Wxx() / RTL_Rxx() macros
to inline functions for sake of type checking.

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 865 +++++++++++----------------
 1 file changed, 335 insertions(+), 530 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 6fd13334aa28..b6098e081a25 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -99,12 +99,12 @@ static const int multicast_filter_limit = 32;
 #define RTL8169_PHY_TIMEOUT	(10*HZ)
 
 /* write/read MMIO register */
-#define RTL_W8(reg, val8)	writeb ((val8), ioaddr + (reg))
-#define RTL_W16(reg, val16)	writew ((val16), ioaddr + (reg))
-#define RTL_W32(reg, val32)	writel ((val32), ioaddr + (reg))
-#define RTL_R8(reg)		readb (ioaddr + (reg))
-#define RTL_R16(reg)		readw (ioaddr + (reg))
-#define RTL_R32(reg)		readl (ioaddr + (reg))
+#define RTL_W8(tp, reg, val8)	writeb((val8), tp->mmio_addr + (reg))
+#define RTL_W16(tp, reg, val16)	writew((val16), tp->mmio_addr + (reg))
+#define RTL_W32(tp, reg, val32)	writel((val32), tp->mmio_addr + (reg))
+#define RTL_R8(tp, reg)		readb(tp->mmio_addr + (reg))
+#define RTL_R16(tp, reg)		readw(tp->mmio_addr + (reg))
+#define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
 enum mac_version {
 	RTL_GIGA_MAC_VER_01 = 0,
@@ -823,7 +823,7 @@ struct rtl8169_private {
 	void (*phy_reset_enable)(struct rtl8169_private *tp);
 	void (*hw_start)(struct net_device *);
 	unsigned int (*phy_reset_pending)(struct rtl8169_private *tp);
-	unsigned int (*link_ok)(void __iomem *);
+	unsigned int (*link_ok)(struct rtl8169_private *tp);
 	int (*do_ioctl)(struct rtl8169_private *tp, struct mii_ioctl_data *data, int cmd);
 	bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *);
 
@@ -978,56 +978,46 @@ static bool rtl_ocp_reg_failure(struct rtl8169_private *tp, u32 reg)
 
 DECLARE_RTL_COND(rtl_ocp_gphy_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(GPHY_OCP) & OCPAR_FLAG;
+	return RTL_R32(tp, GPHY_OCP) & OCPAR_FLAG;
 }
 
 static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return;
 
-	RTL_W32(GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
+	RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
 static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
 
-	RTL_W32(GPHY_OCP, reg << 15);
+	RTL_W32(tp, GPHY_OCP, reg << 15);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
-		(RTL_R32(GPHY_OCP) & 0xffff) : ~0;
+		(RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0;
 }
 
 static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return;
 
-	RTL_W32(OCPDR, OCPAR_FLAG | (reg << 15) | data);
+	RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
 }
 
 static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_ocp_reg_failure(tp, reg))
 		return 0;
 
-	RTL_W32(OCPDR, reg << 15);
+	RTL_W32(tp, OCPDR, reg << 15);
 
-	return RTL_R32(OCPDR);
+	return RTL_R32(tp, OCPDR);
 }
 
 #define OCP_STD_PHY_BASE	0xa400
@@ -1070,16 +1060,12 @@ static int mac_mcu_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_phyar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(PHYAR) & 0x80000000;
+	return RTL_R32(tp, PHYAR) & 0x80000000;
 }
 
 static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
+	RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
 
 	rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
 	/*
@@ -1091,13 +1077,12 @@ static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
-	RTL_W32(PHYAR, 0x0 | (reg & 0x1f) << 16);
+	RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
 	value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
-		RTL_R32(PHYAR) & 0xffff : ~0;
+		RTL_R32(tp, PHYAR) & 0xffff : ~0;
 
 	/*
 	 * According to hardware specs a 20us delay is required after read
@@ -1110,18 +1095,14 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 
 DECLARE_RTL_COND(rtl_ocpar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(OCPAR) & OCPAR_FLAG;
+	return RTL_R32(tp, OCPAR) & OCPAR_FLAG;
 }
 
 static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
-	RTL_W32(OCPAR, OCPAR_GPHY_WRITE_CMD);
-	RTL_W32(EPHY_RXER_NUM, 0);
+	RTL_W32(tp, OCPDR, data | ((reg & OCPDR_REG_MASK) << OCPDR_GPHY_REG_SHIFT));
+	RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
+	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
 }
@@ -1134,51 +1115,46 @@ static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg, int value)
 
 static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168dp_1_mdio_access(tp, reg, OCPDR_READ_CMD);
 
 	mdelay(1);
-	RTL_W32(OCPAR, OCPAR_GPHY_READ_CMD);
-	RTL_W32(EPHY_RXER_NUM, 0);
+	RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
+	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
-		RTL_R32(OCPDR) & OCPDR_DATA_MASK : ~0;
+		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0;
 }
 
 #define R8168DP_1_MDIO_ACCESS_BIT	0x00020000
 
-static void r8168dp_2_mdio_start(void __iomem *ioaddr)
+static void r8168dp_2_mdio_start(struct rtl8169_private *tp)
 {
-	RTL_W32(0xd0, RTL_R32(0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
+	RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) & ~R8168DP_1_MDIO_ACCESS_BIT);
 }
 
-static void r8168dp_2_mdio_stop(void __iomem *ioaddr)
+static void r8168dp_2_mdio_stop(struct rtl8169_private *tp)
 {
-	RTL_W32(0xd0, RTL_R32(0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
+	RTL_W32(tp, 0xd0, RTL_R32(tp, 0xd0) | R8168DP_1_MDIO_ACCESS_BIT);
 }
 
 static void r8168dp_2_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	r8168dp_2_mdio_start(ioaddr);
+	r8168dp_2_mdio_start(tp);
 
 	r8169_mdio_write(tp, reg, value);
 
-	r8168dp_2_mdio_stop(ioaddr);
+	r8168dp_2_mdio_stop(tp);
 }
 
 static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	int value;
 
-	r8168dp_2_mdio_start(ioaddr);
+	r8168dp_2_mdio_start(tp);
 
 	value = r8169_mdio_read(tp, reg);
 
-	r8168dp_2_mdio_stop(ioaddr);
+	r8168dp_2_mdio_stop(tp);
 
 	return value;
 }
@@ -1223,16 +1199,12 @@ static int rtl_mdio_read(struct net_device *dev, int phy_id, int location)
 
 DECLARE_RTL_COND(rtl_ephyar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(EPHYAR) & EPHYAR_FLAG;
+	return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
 }
 
 static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
+	RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
 		(reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
@@ -1242,41 +1214,33 @@ static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 
 static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
+	RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
-		RTL_R32(EPHYAR) & EPHYAR_DATA_MASK : ~0;
+		RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(ERIAR) & ERIAR_FLAG;
+	return RTL_R32(tp, ERIAR) & ERIAR_FLAG;
 }
 
 static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
 			  u32 val, int type)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	BUG_ON((addr & 3) || (mask == 0));
-	RTL_W32(ERIDR, val);
-	RTL_W32(ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
+	RTL_W32(tp, ERIDR, val);
+	RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
 static u32 rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
+	RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
-		RTL_R32(ERIDR) : ~0;
+		RTL_R32(tp, ERIDR) : ~0;
 }
 
 static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
@@ -1290,11 +1254,9 @@ static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
 	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
-		RTL_R32(OCPDR) : ~0;
+		RTL_R32(tp, OCPDR) : ~0;
 }
 
 static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1322,10 +1284,8 @@ static u32 ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
 			      u32 data)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(OCPDR, data);
-	RTL_W32(OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPDR, data);
+	RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
 	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
 }
 
@@ -1387,19 +1347,15 @@ DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(IBISR0) & 0x20;
+	return RTL_R8(tp, IBISR0) & 0x20;
 }
 
 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+	RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
 	rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
-	RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
-	RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+	RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
+	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
@@ -1512,49 +1468,37 @@ static void rtl_write_exgmac_batch(struct rtl8169_private *tp,
 
 DECLARE_RTL_COND(rtl_efusear_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(EFUSEAR) & EFUSEAR_FLAG;
+	return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
 }
 
 static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
+	RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
-		RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
+		RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
 static u16 rtl_get_events(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R16(IntrStatus);
+	return RTL_R16(tp, IntrStatus);
 }
 
 static void rtl_ack_events(struct rtl8169_private *tp, u16 bits)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrStatus, bits);
+	RTL_W16(tp, IntrStatus, bits);
 	mmiowb();
 }
 
 static void rtl_irq_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrMask, 0);
+	RTL_W16(tp, IntrMask, 0);
 	mmiowb();
 }
 
 static void rtl_irq_enable(struct rtl8169_private *tp, u16 bits)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W16(IntrMask, bits);
+	RTL_W16(tp, IntrMask, bits);
 }
 
 #define RTL_EVENT_NAPI_RX	(RxOK | RxErr)
@@ -1568,18 +1512,14 @@ static void rtl_irq_enable_all(struct rtl8169_private *tp)
 
 static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_irq_disable(tp);
 	rtl_ack_events(tp, RTL_EVENT_NAPI | tp->event_slow);
-	RTL_R8(ChipCmd);
+	RTL_R8(tp, ChipCmd);
 }
 
 static unsigned int rtl8169_tbi_reset_pending(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(TBICSR) & TBIReset;
+	return RTL_R32(tp, TBICSR) & TBIReset;
 }
 
 static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
@@ -1587,21 +1527,19 @@ static unsigned int rtl8169_xmii_reset_pending(struct rtl8169_private *tp)
 	return rtl_readphy(tp, MII_BMCR) & BMCR_RESET;
 }
 
-static unsigned int rtl8169_tbi_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_tbi_link_ok(struct rtl8169_private *tp)
 {
-	return RTL_R32(TBICSR) & TBILinkOk;
+	return RTL_R32(tp, TBICSR) & TBILinkOk;
 }
 
-static unsigned int rtl8169_xmii_link_ok(void __iomem *ioaddr)
+static unsigned int rtl8169_xmii_link_ok(struct rtl8169_private *tp)
 {
-	return RTL_R8(PHYstatus) & LinkStatus;
+	return RTL_R8(tp, PHYstatus) & LinkStatus;
 }
 
 static void rtl8169_tbi_reset_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(TBICSR, RTL_R32(TBICSR) | TBIReset);
+	RTL_W32(tp, TBICSR, RTL_R32(tp, TBICSR) | TBIReset);
 }
 
 static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
@@ -1614,7 +1552,6 @@ static void rtl8169_xmii_reset_enable(struct rtl8169_private *tp)
 
 static void rtl_link_chg_patch(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct net_device *dev = tp->dev;
 
 	if (!netif_running(dev))
@@ -1622,12 +1559,12 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_38) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
 				      ERIAR_EXGMAC);
-		} else if (RTL_R8(PHYstatus) & _100bps) {
+		} else if (RTL_R8(tp, PHYstatus) & _100bps) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x0000001f,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1645,7 +1582,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 			     ERIAR_EXGMAC);
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_35 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_36) {
-		if (RTL_R8(PHYstatus) & _1000bpsF) {
+		if (RTL_R8(tp, PHYstatus) & _1000bpsF) {
 			rtl_eri_write(tp, 0x1bc, ERIAR_MASK_1111, 0x00000011,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_1111, 0x00000005,
@@ -1657,7 +1594,7 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 				      ERIAR_EXGMAC);
 		}
 	} else if (tp->mac_version == RTL_GIGA_MAC_VER_37) {
-		if (RTL_R8(PHYstatus) & _10bps) {
+		if (RTL_R8(tp, PHYstatus) & _10bps) {
 			rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x4d02,
 				      ERIAR_EXGMAC);
 			rtl_eri_write(tp, 0x1dc, ERIAR_MASK_0011, 0x0060,
@@ -1670,10 +1607,9 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 }
 
 static void rtl8169_check_link_status(struct net_device *dev,
-				      struct rtl8169_private *tp,
-				      void __iomem *ioaddr)
+				      struct rtl8169_private *tp)
 {
-	if (tp->link_ok(ioaddr)) {
+	if (tp->link_ok(tp)) {
 		rtl_link_chg_patch(tp);
 		/* This is to cancel a scheduled suspend if there's one. */
 		pm_request_resume(&tp->pci_dev->dev);
@@ -1691,15 +1627,14 @@ static void rtl8169_check_link_status(struct net_device *dev,
 
 static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u8 options;
 	u32 wolopts = 0;
 
-	options = RTL_R8(Config1);
+	options = RTL_R8(tp, Config1);
 	if (!(options & PMEnable))
 		return 0;
 
-	options = RTL_R8(Config3);
+	options = RTL_R8(tp, Config3);
 	if (options & LinkUp)
 		wolopts |= WAKE_PHY;
 	switch (tp->mac_version) {
@@ -1729,7 +1664,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 		break;
 	}
 
-	options = RTL_R8(Config5);
+	options = RTL_R8(tp, Config5);
 	if (options & UWF)
 		wolopts |= WAKE_UCAST;
 	if (options & BWF)
@@ -1762,7 +1697,6 @@ static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned int i, tmp;
 	static const struct {
 		u32 opt;
@@ -1778,7 +1712,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	};
 	u8 options;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_34:
@@ -1820,28 +1754,28 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	}
 
 	for (i = 0; i < tmp; i++) {
-		options = RTL_R8(cfg[i].reg) & ~cfg[i].mask;
+		options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
 		if (wolopts & cfg[i].opt)
 			options |= cfg[i].mask;
-		RTL_W8(cfg[i].reg, options);
+		RTL_W8(tp, cfg[i].reg, options);
 	}
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
-		options = RTL_R8(Config1) & ~PMEnable;
+		options = RTL_R8(tp, Config1) & ~PMEnable;
 		if (wolopts)
 			options |= PMEnable;
-		RTL_W8(Config1, options);
+		RTL_W8(tp, Config1, options);
 		break;
 	default:
-		options = RTL_R8(Config2) & ~PME_SIGNAL;
+		options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
 		if (wolopts)
 			options |= PME_SIGNAL;
-		RTL_W8(Config2, options);
+		RTL_W8(tp, Config2, options);
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -1896,16 +1830,15 @@ static int rtl8169_set_speed_tbi(struct net_device *dev,
 				 u8 autoneg, u16 speed, u8 duplex, u32 ignored)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	int ret = 0;
 	u32 reg;
 
-	reg = RTL_R32(TBICSR);
+	reg = RTL_R32(tp, TBICSR);
 	if ((autoneg == AUTONEG_DISABLE) && (speed == SPEED_1000) &&
 	    (duplex == DUPLEX_FULL)) {
-		RTL_W32(TBICSR, reg & ~(TBINwEnable | TBINwRestart));
+		RTL_W32(tp, TBICSR, reg & ~(TBINwEnable | TBINwRestart));
 	} else if (autoneg == AUTONEG_ENABLE)
-		RTL_W32(TBICSR, reg | TBINwEnable | TBINwRestart);
+		RTL_W32(tp, TBICSR, reg | TBINwEnable | TBINwRestart);
 	else {
 		netif_warn(tp, link, dev,
 			   "incorrect speed setting refused in TBI mode\n");
@@ -2030,16 +1963,15 @@ static void __rtl8169_set_features(struct net_device *dev,
 				   netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 rx_config;
 
-	rx_config = RTL_R32(RxConfig);
+	rx_config = RTL_R32(tp, RxConfig);
 	if (features & NETIF_F_RXALL)
 		rx_config |= (AcceptErr | AcceptRunt);
 	else
 		rx_config &= ~(AcceptErr | AcceptRunt);
 
-	RTL_W32(RxConfig, rx_config);
+	RTL_W32(tp, RxConfig, rx_config);
 
 	if (features & NETIF_F_RXCSUM)
 		tp->cp_cmd |= RxChkSum;
@@ -2051,10 +1983,10 @@ static void __rtl8169_set_features(struct net_device *dev,
 	else
 		tp->cp_cmd &= ~RxVlan;
 
-	tp->cp_cmd |= RTL_R16(CPlusCmd) & ~(RxVlan | RxChkSum);
+	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) & ~(RxVlan | RxChkSum);
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
-	RTL_R16(CPlusCmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	RTL_R16(tp, CPlusCmd);
 }
 
 static int rtl8169_set_features(struct net_device *dev,
@@ -2091,7 +2023,6 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
 					  struct ethtool_link_ksettings *cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 status;
 	u32 supported, advertising;
 
@@ -2099,7 +2030,7 @@ static int rtl8169_get_link_ksettings_tbi(struct net_device *dev,
 		SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | SUPPORTED_FIBRE;
 	cmd->base.port = PORT_FIBRE;
 
-	status = RTL_R32(TBICSR);
+	status = RTL_R32(tp, TBICSR);
 	advertising = (status & TBINwEnable) ?  ADVERTISED_Autoneg : 0;
 	cmd->base.autoneg = !!(status & TBINwEnable);
 
@@ -2214,23 +2145,20 @@ static int rtl8169_get_sset_count(struct net_device *dev, int sset)
 
 DECLARE_RTL_COND(rtl_counters_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CounterAddrLow) & (CounterReset | CounterDump);
+	return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
 static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	dma_addr_t paddr = tp->counters_phys_addr;
 	u32 cmd;
 
-	RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
-	RTL_R32(CounterAddrHigh);
+	RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+	RTL_R32(tp, CounterAddrHigh);
 	cmd = (u64)paddr & DMA_BIT_MASK(32);
-	RTL_W32(CounterAddrLow, cmd);
-	RTL_W32(CounterAddrLow, cmd | counter_cmd);
+	RTL_W32(tp, CounterAddrLow, cmd);
+	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
 	return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
@@ -2252,13 +2180,12 @@ static bool rtl8169_reset_counters(struct net_device *dev)
 static bool rtl8169_update_counters(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
 	/*
 	 * Some chips are unable to dump tally counters when the receiver
 	 * is disabled.
 	 */
-	if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
+	if ((RTL_R8(tp, ChipCmd) & CmdRxEnb) == 0)
 		return true;
 
 	return rtl8169_do_counters(dev, CounterDump);
@@ -2438,7 +2365,6 @@ static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
 static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	const struct rtl_coalesce_info *ci;
 	const struct rtl_coalesce_scale *scale;
 	struct {
@@ -2458,10 +2384,10 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	if (IS_ERR(ci))
 		return PTR_ERR(ci);
 
-	scale = &ci->scalev[RTL_R16(CPlusCmd) & 3];
+	scale = &ci->scalev[RTL_R16(tp, CPlusCmd) & 3];
 
 	/* read IntrMitigate and adjust according to scale */
-	for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
+	for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
 		*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
 		w >>= RTL_COALESCE_SHIFT;
 		*p->usecs = w & RTL_COALESCE_MASK;
@@ -2508,7 +2434,6 @@ static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	const struct rtl_coalesce_scale *scale;
 	struct {
 		u32 frames;
@@ -2556,11 +2481,11 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 	rtl_lock_work(tp);
 
-	RTL_W16(IntrMitigate, swab16(w));
+	RTL_W16(tp, IntrMitigate, swab16(w));
 
 	tp->cp_cmd = (tp->cp_cmd & ~3) | cp01;
-	RTL_W16(CPlusCmd, tp->cp_cmd);
-	RTL_R16(CPlusCmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
+	RTL_R16(tp, CPlusCmd);
 
 	rtl_unlock_work(tp);
 
@@ -2590,17 +2515,16 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 				    struct net_device *dev, u8 default_version)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	/*
 	 * The driver currently handles the 8168Bf and the 8168Be identically
 	 * but they can be identified more specifically through the test below
 	 * if needed:
 	 *
-	 * (RTL_R32(TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
+	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x500000 ? 8168Bf : 8168Be
 	 *
 	 * Same thing for the 8101Eb and the 8101Ec:
 	 *
-	 * (RTL_R32(TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
+	 * (RTL_R32(tp, TxConfig) & 0x700000) == 0x200000 ? 8101Eb : 8101Ec
 	 */
 	static const struct rtl_mac_info {
 		u32 mask;
@@ -2698,7 +2622,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
 	const struct rtl_mac_info *p = mac_info;
 	u32 reg;
 
-	reg = RTL_R32(TxConfig);
+	reg = RTL_R32(tp, TxConfig);
 	while ((reg & p->mask) != p->val)
 		p++;
 	tp->mac_version = p->mac_version;
@@ -4579,7 +4503,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
 static void rtl_phy_work(struct rtl8169_private *tp)
 {
 	struct timer_list *timer = &tp->timer;
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned long timeout = RTL8169_PHY_TIMEOUT;
 
 	assert(tp->mac_version > RTL_GIGA_MAC_VER_01);
@@ -4593,7 +4516,7 @@ static void rtl_phy_work(struct rtl8169_private *tp)
 		goto out_mod_timer;
 	}
 
-	if (tp->link_ok(ioaddr))
+	if (tp->link_ok(tp))
 		return;
 
 	netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
@@ -4631,21 +4554,17 @@ static void rtl8169_phy_reset(struct net_device *dev,
 
 static bool rtl_tbi_enabled(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
-	    (RTL_R8(PHYstatus) & TBI_Enable);
+	    (RTL_R8(tp, PHYstatus) & TBI_Enable);
 }
 
 static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_hw_phy_config(dev);
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
 		dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(0x82, 0x01);
+		RTL_W8(tp, 0x82, 0x01);
 	}
 
 	pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
@@ -4655,7 +4574,7 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02) {
 		dprintk("Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
-		RTL_W8(0x82, 0x01);
+		RTL_W8(tp, 0x82, 0x01);
 		dprintk("Set PHY Reg 0x0bh = 0x00h\n");
 		rtl_writephy(tp, 0x0b, 0x0000); //w 0x0b 15 0 0
 	}
@@ -4675,22 +4594,20 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 
 static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_lock_work(tp);
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W32(MAC4, addr[4] | addr[5] << 8);
-	RTL_R32(MAC4);
+	RTL_W32(tp, MAC4, addr[4] | addr[5] << 8);
+	RTL_R32(tp, MAC4);
 
-	RTL_W32(MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
-	RTL_R32(MAC0);
+	RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
+	RTL_R32(tp, MAC0);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_34)
 		rtl_rar_exgmac_set(tp, addr);
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	rtl_unlock_work(tp);
 }
@@ -4810,8 +4727,6 @@ static void rtl_speed_down(struct rtl8169_private *tp)
 
 static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25:
 	case RTL_GIGA_MAC_VER_26:
@@ -4835,7 +4750,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W32(RxConfig, RTL_R32(RxConfig) |
+		RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) |
 			AcceptBroadcast | AcceptMulticast | AcceptMyPhys);
 		break;
 	default:
@@ -4868,8 +4783,6 @@ static void r810x_phy_power_up(struct rtl8169_private *tp)
 
 static void r810x_pll_power_down(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (rtl_wol_pll_power_down(tp))
 		return;
 
@@ -4884,15 +4797,13 @@ static void r810x_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_16:
 		break;
 	default:
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	}
 }
 
 static void r810x_pll_power_up(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r810x_phy_power_up(tp);
 
 	switch (tp->mac_version) {
@@ -4905,10 +4816,10 @@ static void r810x_pll_power_up(struct rtl8169_private *tp)
 		break;
 	case RTL_GIGA_MAC_VER_47:
 	case RTL_GIGA_MAC_VER_48:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	default:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
 		break;
 	}
 }
@@ -4975,14 +4886,12 @@ static void r8168_phy_power_down(struct rtl8169_private *tp)
 
 static void r8168_pll_power_down(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	if (r8168_check_dash(tp))
 		return;
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_23 ||
 	     tp->mac_version == RTL_GIGA_MAC_VER_24) &&
-	    (RTL_R16(CPlusCmd) & ASF)) {
+	    (RTL_R16(tp, CPlusCmd) & ASF)) {
 		return;
 	}
 
@@ -5008,22 +4917,20 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
 		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0x00000000,
 			     0xfc000000, ERIAR_EXGMAC);
-		RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	}
 }
 
 static void r8168_pll_power_up(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25:
 	case RTL_GIGA_MAC_VER_26:
@@ -5032,19 +4939,19 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_31:
 	case RTL_GIGA_MAC_VER_32:
 	case RTL_GIGA_MAC_VER_33:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80);
 		break;
 	case RTL_GIGA_MAC_VER_44:
 	case RTL_GIGA_MAC_VER_45:
 	case RTL_GIGA_MAC_VER_46:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
-		RTL_W8(PMCH, RTL_R8(PMCH) | 0xc0);
+		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
 		rtl_w0w1_eri(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000,
 			     0x00000000, ERIAR_EXGMAC);
 		break;
@@ -5134,8 +5041,6 @@ static void rtl_init_pll_power_ops(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_01:
 	case RTL_GIGA_MAC_VER_02:
@@ -5151,7 +5056,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_15:
 	case RTL_GIGA_MAC_VER_16:
 	case RTL_GIGA_MAC_VER_17:
-		RTL_W32(RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_18:
 	case RTL_GIGA_MAC_VER_19:
@@ -5162,7 +5067,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_24:
 	case RTL_GIGA_MAC_VER_34:
 	case RTL_GIGA_MAC_VER_35:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
@@ -5176,10 +5081,10 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_49:
 	case RTL_GIGA_MAC_VER_50:
 	case RTL_GIGA_MAC_VER_51:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
 	default:
-		RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
 		break;
 	}
 }
@@ -5191,71 +5096,55 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp)
 
 static void rtl_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	rtl_generic_op(tp, tp->jumbo_ops.enable);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	rtl_generic_op(tp, tp->jumbo_ops.disable);
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 }
 
 static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) | Jumbo_En1);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
 	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) & ~Jumbo_En1);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
 	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 }
 
 static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 }
 
 static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(MaxTxPacketSize, 0x3f);
-	RTL_W8(Config3, RTL_R8(Config3) | Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) | 0x01);
+	RTL_W8(tp, MaxTxPacketSize, 0x3f);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
 	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(MaxTxPacketSize, 0x0c);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Jumbo_En0);
-	RTL_W8(Config4, RTL_R8(Config4) & ~0x01);
+	RTL_W8(tp, MaxTxPacketSize, 0x0c);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
 	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
@@ -5273,20 +5162,16 @@ static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 
 static void r8168b_1_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168b_0_hw_jumbo_enable(tp);
 
-	RTL_W8(Config4, RTL_R8(Config4) | (1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | (1 << 0));
 }
 
 static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	r8168b_0_hw_jumbo_disable(tp);
 
-	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
@@ -5353,16 +5238,12 @@ static void rtl_init_jumbo_ops(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(ChipCmd) & CmdReset;
+	return RTL_R8(tp, ChipCmd) & CmdReset;
 }
 
 static void rtl_hw_reset(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W8(ChipCmd, CmdReset);
+	RTL_W8(tp, ChipCmd, CmdReset);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
@@ -5413,29 +5294,21 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 
 static void rtl_rx_close(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(RxConfig, RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
+	RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK);
 }
 
 DECLARE_RTL_COND(rtl_npq_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(TxPoll) & NPQ;
+	return RTL_R8(tp, TxPoll) & NPQ;
 }
 
 DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(TxConfig) & TXCFG_EMPTY;
+	return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Disable interrupts */
 	rtl8169_irq_mask_and_ack(tp);
 
@@ -5462,10 +5335,10 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 		   tp->mac_version == RTL_GIGA_MAC_VER_49 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_50 ||
 		   tp->mac_version == RTL_GIGA_MAC_VER_51) {
-		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
 	} else {
-		RTL_W8(ChipCmd, RTL_R8(ChipCmd) | StopReq);
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		udelay(100);
 	}
 
@@ -5474,10 +5347,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 
 static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Set DMA burst size and Interframe Gap Time */
-	RTL_W32(TxConfig, (TX_DMA_BURST << TxDMAShift) |
+	RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
 		(InterFrameGap << TxInterFrameGapShift));
 }
 
@@ -5490,36 +5361,35 @@ static void rtl_hw_start(struct net_device *dev)
 	rtl_irq_enable_all(tp);
 }
 
-static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp,
-					 void __iomem *ioaddr)
+static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 {
 	/*
 	 * Magic spell: some iop3xx ARM board needs the TxDescAddrHigh
 	 * register to be written before TxDescAddrLow to work.
 	 * Switching from MMIO to I/O access fixes the issue as well.
 	 */
-	RTL_W32(TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
-	RTL_W32(TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
-	RTL_W32(RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
-	RTL_W32(RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, TxDescStartAddrHigh, ((u64) tp->TxPhyAddr) >> 32);
+	RTL_W32(tp, TxDescStartAddrLow, ((u64) tp->TxPhyAddr) & DMA_BIT_MASK(32));
+	RTL_W32(tp, RxDescAddrHigh, ((u64) tp->RxPhyAddr) >> 32);
+	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
+static u16 rtl_rw_cpluscmd(struct rtl8169_private *tp)
 {
 	u16 cmd;
 
-	cmd = RTL_R16(CPlusCmd);
-	RTL_W16(CPlusCmd, cmd);
+	cmd = RTL_R16(tp, CPlusCmd);
+	RTL_W16(tp, CPlusCmd, cmd);
 	return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
+static void rtl_set_rx_max_size(struct rtl8169_private *tp, unsigned int rx_buf_sz)
 {
 	/* Low hurts. Let's disable the filtering. */
-	RTL_W16(RxMaxSize, rx_buf_sz + 1);
+	RTL_W16(tp, RxMaxSize, rx_buf_sz + 1);
 }
 
-static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
 {
 	static const struct rtl_cfg2_info {
 		u32 mac_version;
@@ -5535,10 +5405,10 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 	unsigned int i;
 	u32 clk;
 
-	clk = RTL_R8(Config2) & PCI_Clock_66MHz;
+	clk = RTL_R8(tp, Config2) & PCI_Clock_66MHz;
 	for (i = 0; i < ARRAY_SIZE(cfg2_info); i++, p++) {
 		if ((p->mac_version == mac_version) && (p->clk == clk)) {
-			RTL_W32(0x7c, p->val);
+			RTL_W32(tp, 0x7c, p->val);
 			break;
 		}
 	}
@@ -5547,7 +5417,6 @@ static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
 static void rtl_set_rx_mode(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 mc_filter[2];	/* Multicast hash filter */
 	int rx_mode;
 	u32 tmp = 0;
@@ -5579,7 +5448,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	if (dev->features & NETIF_F_RXALL)
 		rx_mode |= (AcceptErr | AcceptRunt);
 
-	tmp = (RTL_R32(RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
+	tmp = (RTL_R32(tp, RxConfig) & ~RX_CONFIG_ACCEPT_MASK) | rx_mode;
 
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06) {
 		u32 data = mc_filter[0];
@@ -5591,35 +5460,34 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	if (tp->mac_version == RTL_GIGA_MAC_VER_35)
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 
-	RTL_W32(MAR0 + 4, mc_filter[1]);
-	RTL_W32(MAR0 + 0, mc_filter[0]);
+	RTL_W32(tp, MAR0 + 4, mc_filter[1]);
+	RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
-	RTL_W32(RxConfig, tmp);
+	RTL_W32(tp, RxConfig, tmp);
 }
 
 static void rtl_hw_start_8169(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_05) {
-		RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) | PCIMulRW);
+		RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) | PCIMulRW);
 		pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x08);
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_03 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_04)
-		RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_init_rxcfg(tp);
 
-	RTL_W8(EarlyTxThres, NoEarlyTx);
+	RTL_W8(tp, EarlyTxThres, NoEarlyTx);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_01 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_02 ||
@@ -5627,7 +5495,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 	    tp->mac_version == RTL_GIGA_MAC_VER_04)
 		rtl_set_rx_tx_config_registers(tp);
 
-	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
+	tp->cp_cmd |= rtl_rw_cpluscmd(tp) | PCIMulRW;
 
 	if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
 	    tp->mac_version == RTL_GIGA_MAC_VER_03) {
@@ -5636,37 +5504,37 @@ static void rtl_hw_start_8169(struct net_device *dev)
 		tp->cp_cmd |= (1 << 14);
 	}
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl8169_set_magic_reg(ioaddr, tp->mac_version);
+	rtl8169_set_magic_reg(tp, tp->mac_version);
 
 	/*
 	 * Undocumented corner. Supposedly:
 	 * (TxTimer << 12) | (TxPackets << 8) | (RxTimer << 4) | RxPackets
 	 */
-	RTL_W16(IntrMitigate, 0x0000);
+	RTL_W16(tp, IntrMitigate, 0x0000);
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	if (tp->mac_version != RTL_GIGA_MAC_VER_01 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_02 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_03 &&
 	    tp->mac_version != RTL_GIGA_MAC_VER_04) {
-		RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 		rtl_set_rx_tx_config_registers(tp);
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
-	RTL_W32(RxMissed, 0);
+	RTL_W32(tp, RxMissed, 0);
 
 	rtl_set_rx_mode(dev);
 
 	/* no early-rx interrupts */
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
@@ -5700,17 +5568,13 @@ static void rtl_csi_access_enable_2(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_csiar_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R32(CSIAR) & CSIAR_FLAG;
+	return RTL_R32(tp, CSIAR) & CSIAR_FLAG;
 }
 
 static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
@@ -5718,21 +5582,17 @@ static void r8169_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8169_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
 		CSIAR_FUNC_NIC);
 
@@ -5741,21 +5601,17 @@ static void r8402_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8402_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIDR, value);
-	RTL_W32(CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
+	RTL_W32(tp, CSIDR, value);
+	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT |
 		CSIAR_FUNC_NIC2);
 
@@ -5764,13 +5620,11 @@ static void r8411_csi_write(struct rtl8169_private *tp, int addr, int value)
 
 static u32 r8411_csi_read(struct rtl8169_private *tp, int addr)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	RTL_W32(CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
+	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | CSIAR_FUNC_NIC2 |
 		CSIAR_BYTE_ENABLE << CSIAR_BYTE_ENABLE_SHIFT);
 
 	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
-		RTL_R32(CSIDR) : ~0;
+		RTL_R32(tp, CSIDR) : ~0;
 }
 
 static void rtl_init_csi_ops(struct rtl8169_private *tp)
@@ -5846,17 +5700,16 @@ static void rtl_enable_clock_request(struct pci_dev *pdev)
 
 static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u8 data;
 
-	data = RTL_R8(Config3);
+	data = RTL_R8(tp, Config3);
 
 	if (enable)
 		data |= Rdy_to_L23;
 	else
 		data &= ~Rdy_to_L23;
 
-	RTL_W8(Config3, data);
+	RTL_W8(tp, Config3, data);
 }
 
 #define R8168_CPCMD_QUIRK_MASK (\
@@ -5872,12 +5725,11 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN) {
 		rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
@@ -5887,30 +5739,27 @@ static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	rtl_hw_start_8168bb(tp);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	RTL_W8(Config4, RTL_R8(Config4) & ~(1 << 0));
+	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0));
 }
 
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
-	RTL_W8(Config1, RTL_R8(Config1) | Speed_down);
+	RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_disable_clock_request(pdev);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
@@ -5932,42 +5781,39 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	/* Magic. */
-	RTL_W8(DBG_REG, 0x20);
+	RTL_W8(tp, DBG_REG, 0x20);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168c_1[] = {
 		{ 0x02, 0x0800,	0x1000 },
 		{ 0x03, 0,	0x0002 },
@@ -5976,7 +5822,7 @@ static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
+	RTL_W8(tp, DBG_REG, 0x06 | FIX_NAK_1 | FIX_NAK_2);
 
 	rtl_ephy_init(tp, e_info_8168c_1, ARRAY_SIZE(e_info_8168c_1));
 
@@ -6011,24 +5857,22 @@ static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl_csi_access_enable_2(tp);
 
 	rtl_disable_clock_request(pdev);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
+	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl_csi_access_enable_1(tp);
@@ -6036,14 +5880,13 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	rtl_disable_clock_request(pdev);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168d_4[] = {
 		{ 0x0b, 0x0000,	0x0048 },
@@ -6055,7 +5898,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
 
@@ -6064,7 +5907,6 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_1[] = {
 		{ 0x00, 0x0200,	0x0100 },
@@ -6089,20 +5931,19 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	rtl_disable_clock_request(pdev);
 
 	/* Reset tx FIFO pointer */
-	RTL_W32(MISC, RTL_R32(MISC) | TXPLA_RST);
-	RTL_W32(MISC, RTL_R32(MISC) & ~TXPLA_RST);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
 
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_2[] = {
 		{ 0x09, 0x0000,	0x0080 },
@@ -6125,24 +5966,23 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x10, 0x00, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_disable_clock_request(pdev);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl_csi_access_enable_2(tp);
@@ -6160,20 +6000,19 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060, ERIAR_EXGMAC);
 
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_disable_clock_request(pdev);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
-	RTL_W32(MISC, RTL_R32(MISC) | PWM_EN);
-	RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
 }
 
 static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168f_1[] = {
 		{ 0x06, 0x00c0,	0x0020 },
 		{ 0x08, 0x0001,	0x0002 },
@@ -6188,7 +6027,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -6210,10 +6049,9 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6228,14 +6066,14 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
 	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
@@ -6245,7 +6083,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168g_1[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x37d0,	0x0820 },
@@ -6256,14 +6093,13 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
 }
 
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168g_2[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
@@ -6274,14 +6110,13 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168g_2, ARRAY_SIZE(e_info_8168g_2));
 }
 
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8411_2[] = {
 		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
@@ -6293,14 +6128,13 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
 }
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	int rg_saw_cnt;
 	u32 data;
@@ -6314,11 +6148,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC);
@@ -6338,19 +6172,19 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
 	rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC);
 
@@ -6398,12 +6232,11 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl8168ep_stop_cmac(tp);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC);
@@ -6421,25 +6254,24 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC);
 
-	RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN);
-	RTL_W8(MaxTxPacketSize, EarlySize);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
+	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 
 	/* Adjust EEE LED frequency */
-	RTL_W8(EEE_LED, RTL_R8(EEE_LED) & ~0x07);
+	RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
 	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~TX_10M_PS_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
 
 static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168ep_1[] = {
 		{ 0x00, 0xffff,	0x10ab },
 		{ 0x06, 0xffff,	0xf030 },
@@ -6449,8 +6281,8 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
 
 	rtl_hw_start_8168ep(tp);
@@ -6458,7 +6290,6 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8168ep_2[] = {
 		{ 0x00, 0xffff,	0x10a3 },
 		{ 0x19, 0xffff,	0xfc00 },
@@ -6466,19 +6297,18 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
 
 	rtl_hw_start_8168ep(tp);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 }
 
 static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 data;
 	static const struct ephy_info e_info_8168ep_3[] = {
 		{ 0x00, 0xffff,	0x10a3 },
@@ -6488,14 +6318,14 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 	};
 
 	/* disable aspm and clock request before access ephy */
-	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
-	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
+	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
 	rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
 
 	rtl_hw_start_8168ep(tp);
 
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
-	RTL_W8(MISC_1, RTL_R8(MISC_1) & ~PFM_D3COLD_EN);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
 
 	data = r8168_mac_ocp_read(tp, 0xd3e2);
 	data &= 0xf000;
@@ -6514,19 +6344,18 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
 static void rtl_hw_start_8168(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
-	tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
+	tp->cp_cmd |= RTL_R16(tp, CPlusCmd) | PktCntrDisable | INTT_1;
 
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	RTL_W16(IntrMitigate, 0x5151);
+	RTL_W16(tp, IntrMitigate, 0x5151);
 
 	/* Work around for RxFIFO overflow. */
 	if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
@@ -6534,11 +6363,11 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		tp->event_slow &= ~RxOverflow;
 	}
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	rtl_set_rx_tx_config_registers(tp);
 
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
@@ -6644,13 +6473,13 @@ static void rtl_hw_start_8168(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 #define R810X_CPCMD_QUIRK_MASK (\
@@ -6666,7 +6495,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8102e_1[] = {
 		{ 0x01,	0, 0x6e65 },
@@ -6682,32 +6510,31 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	RTL_W8(DBG_REG, FIX_NAK_1);
+	RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
 	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(Config1,
+	RTL_W8(tp, Config1,
 	       LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
-	cfg1 = RTL_R8(Config1);
+	cfg1 = RTL_R8(tp, Config1);
 	if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
-		RTL_W8(Config1, cfg1 & ~LEDS0);
+		RTL_W8(tp, Config1, cfg1 & ~LEDS0);
 
 	rtl_ephy_init(tp, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
 }
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	rtl_csi_access_enable_2(tp);
 
 	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
-	RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
+	RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
+	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 }
 
 static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
@@ -6719,7 +6546,6 @@ static void rtl_hw_start_8102e_3(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8105e_1[] = {
 		{ 0x07,	0, 0x4000 },
 		{ 0x19,	0, 0x0200 },
@@ -6732,13 +6558,13 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp)
 	};
 
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
 	/* Disable Early Tally Counter */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) & ~0x010000);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) & ~0x010000);
 
-	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
 
 	rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1));
 
@@ -6753,7 +6579,6 @@ static void rtl_hw_start_8105e_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8402(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	static const struct ephy_info e_info_8402[] = {
 		{ 0x19,	0xffff, 0xff64 },
 		{ 0x1e,	0, 0x4000 }
@@ -6762,10 +6587,10 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 	rtl_csi_access_enable_2(tp);
 
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
@@ -6784,14 +6609,12 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8106(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* Force LAN exit from ASPM if Rx/Tx are not idle */
-	RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800);
+	RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800);
 
-	RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
-	RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET);
-	RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN);
+	RTL_W32(tp, MISC, (RTL_R32(tp, MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
+	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
@@ -6799,7 +6622,6 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
 static void rtl_hw_start_8101(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 
 	if (tp->mac_version >= RTL_GIGA_MAC_VER_30)
@@ -6810,16 +6632,16 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		pcie_capability_set_word(pdev, PCI_EXP_DEVCTL,
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 
-	RTL_W8(Cfg9346, Cfg9346_Unlock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
 
-	RTL_W8(MaxTxPacketSize, TxPacketMax);
+	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
+	rtl_set_rx_max_size(tp, rx_buf_sz);
 
 	tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
-	RTL_W16(CPlusCmd, tp->cp_cmd);
+	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl_set_rx_tx_desc_registers(tp, ioaddr);
+	rtl_set_rx_tx_desc_registers(tp);
 
 	rtl_set_rx_tx_config_registers(tp);
 
@@ -6859,17 +6681,17 @@ static void rtl_hw_start_8101(struct net_device *dev)
 		break;
 	}
 
-	RTL_W8(Cfg9346, Cfg9346_Lock);
+	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
-	RTL_W16(IntrMitigate, 0x0000);
+	RTL_W16(tp, IntrMitigate, 0x0000);
 
-	RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
+	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 
 	rtl_set_rx_mode(dev);
 
-	RTL_R8(IntrMask);
+	RTL_R8(tp, IntrMask);
 
-	RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
+	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);
 }
 
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
@@ -7079,7 +6901,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 	napi_enable(&tp->napi);
 	rtl_hw_start(dev);
 	netif_wake_queue(dev);
-	rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+	rtl8169_check_link_status(dev, tp);
 }
 
 static void rtl8169_tx_timeout(struct net_device *dev)
@@ -7327,7 +7149,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	struct rtl8169_private *tp = netdev_priv(dev);
 	unsigned int entry = tp->cur_tx % NUM_TX_DESC;
 	struct TxDesc *txd = tp->TxDescArray + entry;
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct device *d = &tp->pci_dev->dev;
 	dma_addr_t mapping;
 	u32 status, len;
@@ -7387,7 +7208,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
 	tp->cur_tx += frags + 1;
 
-	RTL_W8(TxPoll, NPQ);
+	RTL_W8(tp, TxPoll, NPQ);
 
 	mmiowb();
 
@@ -7458,11 +7279,9 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
 	/* The infamous DAC f*ckup only happens at boot time */
 	if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) {
-		void __iomem *ioaddr = tp->mmio_addr;
-
 		netif_info(tp, intr, dev, "disabling PCI DAC\n");
 		tp->cp_cmd &= ~PCIDAC;
-		RTL_W16(CPlusCmd, tp->cp_cmd);
+		RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 		dev->features &= ~NETIF_F_HIGHDMA;
 	}
 
@@ -7528,11 +7347,8 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		 * of start_xmit activity is detected (if it is not detected,
 		 * it is slow enough). -- FR
 		 */
-		if (tp->cur_tx != dirty_tx) {
-			void __iomem *ioaddr = tp->mmio_addr;
-
-			RTL_W8(TxPoll, NPQ);
-		}
+		if (tp->cur_tx != dirty_tx)
+			RTL_W8(tp, TxPoll, NPQ);
 	}
 }
 
@@ -7713,7 +7529,7 @@ static void rtl_slow_event_work(struct rtl8169_private *tp)
 		rtl8169_pcierr_interrupt(dev);
 
 	if (status & LinkChg)
-		rtl8169_check_link_status(dev, tp, tp->mmio_addr);
+		rtl8169_check_link_status(dev, tp);
 
 	rtl_irq_enable_all(tp);
 }
@@ -7785,21 +7601,20 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void rtl8169_rx_missed(struct net_device *dev, void __iomem *ioaddr)
+static void rtl8169_rx_missed(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
 	if (tp->mac_version > RTL_GIGA_MAC_VER_06)
 		return;
 
-	dev->stats.rx_missed_errors += (RTL_R32(RxMissed) & 0xffffff);
-	RTL_W32(RxMissed, 0);
+	dev->stats.rx_missed_errors += RTL_R32(tp, RxMissed) & 0xffffff;
+	RTL_W32(tp, RxMissed, 0);
 }
 
 static void rtl8169_down(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 
 	del_timer_sync(&tp->timer);
 
@@ -7812,7 +7627,7 @@ static void rtl8169_down(struct net_device *dev)
 	 * as netif_running is not true (rtl8169_interrupt, rtl8169_reset_task)
 	 * and napi is disabled (rtl8169_poll).
 	 */
-	rtl8169_rx_missed(dev, ioaddr);
+	rtl8169_rx_missed(dev);
 
 	/* Give a racing hard_start_xmit a few cycles to complete. */
 	synchronize_sched();
@@ -7868,7 +7683,6 @@ static void rtl8169_netpoll(struct net_device *dev)
 static int rtl_open(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	int retval = -ENOMEM;
 
@@ -7927,7 +7741,7 @@ static int rtl_open(struct net_device *dev)
 	tp->saved_wolopts = 0;
 	pm_runtime_put_sync(&pdev->dev);
 
-	rtl8169_check_link_status(dev, tp, ioaddr);
+	rtl8169_check_link_status(dev, tp);
 out:
 	return retval;
 
@@ -7951,7 +7765,6 @@ static void
 rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
 	struct rtl8169_counters *counters = tp->counters;
 	unsigned int start;
@@ -7959,7 +7772,7 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	pm_runtime_get_noresume(&pdev->dev);
 
 	if (netif_running(dev) && pm_runtime_active(&pdev->dev))
-		rtl8169_rx_missed(dev, ioaddr);
+		rtl8169_rx_missed(dev);
 
 	do {
 		start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp);
@@ -8082,7 +7895,7 @@ static int rtl8169_runtime_suspend(struct device *device)
 	rtl8169_net_suspend(dev);
 
 	/* Update counters before going runtime suspend */
-	rtl8169_rx_missed(dev, tp->mmio_addr);
+	rtl8169_rx_missed(dev);
 	rtl8169_update_counters(dev);
 
 	return 0;
@@ -8143,8 +7956,6 @@ static const struct dev_pm_ops rtl8169_pm_ops = {
 
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
 	/* WoL fails with 8168b when the receiver is disabled. */
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_11:
@@ -8152,9 +7963,9 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_17:
 		pci_clear_master(tp->pci_dev);
 
-		RTL_W8(ChipCmd, CmdRxEnb);
+		RTL_W8(tp, ChipCmd, CmdRxEnb);
 		/* PCI commit */
-		RTL_R8(ChipCmd);
+		RTL_R8(tp, ChipCmd);
 		break;
 	default:
 		break;
@@ -8264,13 +8075,12 @@ static const struct rtl_cfg_info {
 
 static int rtl_alloc_irq(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned int flags;
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
-		RTL_W8(Cfg9346, Cfg9346_Unlock);
-		RTL_W8(Config2, RTL_R8(Config2) & ~MSIEnable);
-		RTL_W8(Cfg9346, Cfg9346_Lock);
+		RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable);
+		RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 		flags = PCI_IRQ_LEGACY;
 	} else {
 		flags = PCI_IRQ_ALL_TYPES;
@@ -8281,26 +8091,21 @@ static int rtl_alloc_irq(struct rtl8169_private *tp)
 
 DECLARE_RTL_COND(rtl_link_list_ready_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return RTL_R8(MCU) & LINK_LIST_RDY;
+	return RTL_R8(tp, MCU) & LINK_LIST_RDY;
 }
 
 DECLARE_RTL_COND(rtl_rxtx_empty_cond)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
-
-	return (RTL_R8(MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+	return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
 }
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-	void __iomem *ioaddr = tp->mmio_addr;
 	u32 data;
 
 	tp->ocp_base = OCP_STD_PHY_BASE;
 
-	RTL_W32(MISC, RTL_R32(MISC) | RXDV_GATED_EN);
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
 		return;
@@ -8308,9 +8113,9 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
 		return;
 
-	RTL_W8(ChipCmd, RTL_R8(ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
+	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
 	msleep(1);
-	RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB);
+	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	data = r8168_mac_ocp_read(tp, 0xe8de);
 	data &= ~(1 << 14);
@@ -8540,7 +8345,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			rtl_rar_set(tp, (u8 *)mac_addr);
 	}
 	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = RTL_R8(MAC0 + i);
+		dev->dev_addr[i] = RTL_R8(tp, MAC0 + i);
 
 	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
@@ -8608,7 +8413,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
 		   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
-		   (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff),
+		   (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
 		   pci_irq_vector(pdev, 0));
 	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {
 		netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, "

From 93a00d4d613fb6363c618c941b0283c2df6bd64e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 1 Mar 2018 13:27:35 +0200
Subject: [PATCH 0619/1678] r8169: switch to device-managed functions in probe
 (part 2)

This is a follow up to the commit

  4c45d24a759d ("r8169: switch to device-managed functions in probe")

to move towards managed resources even more.

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b6098e081a25..7055db161b1b 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -8169,7 +8169,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct rtl8169_private *tp;
 	struct mii_if_info *mii;
 	struct net_device *dev;
-	void __iomem *ioaddr;
 	int chipset, i;
 	int rc;
 
@@ -8227,20 +8226,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -ENODEV;
 	}
 
-	rc = pci_request_regions(pdev, MODULENAME);
+	rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
 	if (rc < 0) {
-		netif_err(tp, probe, dev, "could not request regions\n");
+		netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
 		return rc;
 	}
 
-	/* ioremap MMIO region */
-	ioaddr = devm_ioremap(&pdev->dev, pci_resource_start(pdev, region),
-			      R8169_REGS_SIZE);
-	if (!ioaddr) {
-		netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n");
-		return -EIO;
-	}
-	tp->mmio_addr = ioaddr;
+	tp->mmio_addr = pcim_iomap_table(pdev)[region];
 
 	if (!pci_is_pcie(pdev))
 		netif_info(tp, probe, dev, "not PCI Express\n");
@@ -8412,7 +8404,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, dev);
 
 	netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n",
-		   rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr,
+		   rtl_chip_infos[chipset].name, tp->mmio_addr, dev->dev_addr,
 		   (u32)(RTL_R32(tp, TxConfig) & 0x9cf0f8ff),
 		   pci_irq_vector(pdev, 0));
 	if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) {

From 129fbeecbccdeaa4f89aaea5d1698aae44ae4b4b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 1 Mar 2018 16:42:40 +0000
Subject: [PATCH 0620/1678] net: amd8111e: remove redundant assignment to
 'tx_index'

The variable tx_index is being initialized with a value that is never
read and re-assigned a little later, hence the initialization is redundant
and can be removed.

Cleans up clang warning:
drivers/net/ethernet/amd/amd8111e.c:652:6: warning: Value stored to
'tx_index' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/amd8111e.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 358f7ab77c70..c99e3e845ac0 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -649,7 +649,7 @@ static void amd8111e_free_ring(struct amd8111e_priv *lp)
 static int amd8111e_tx(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
-	int tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
+	int tx_index;
 	int status;
 	/* Complete all the transmit packet */
 	while (lp->tx_complete_idx != lp->tx_idx){

From 4a464a2b06cebf7ba5b387fcdadf790cdeb36ac9 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:19 -0800
Subject: [PATCH 0621/1678] enic: Check inner ip proto for pseudo header csum

To compute pseudo IP header csum, we need to check the inner header for
encap pkt, not outer IP header.

Also add pseudo csum for IPv6 inner pkt.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f202ba72a811..252285894968 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -635,12 +635,25 @@ static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq,
 
 static void enic_preload_tcp_csum_encap(struct sk_buff *skb)
 {
-	if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
+	const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+	switch (eth->h_proto) {
+	case ntohs(ETH_P_IP):
 		inner_ip_hdr(skb)->check = 0;
 		inner_tcp_hdr(skb)->check =
 			~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr,
 					   inner_ip_hdr(skb)->daddr, 0,
 					   IPPROTO_TCP, 0);
+		break;
+	case ntohs(ETH_P_IPV6):
+		inner_tcp_hdr(skb)->check =
+			~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr,
+					 &inner_ipv6_hdr(skb)->daddr, 0,
+					 IPPROTO_TCP, 0);
+		break;
+	default:
+		WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload");
+		break;
 	}
 }
 

From d11790941dd315e2c82bce7c228807329dcf0be4 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:20 -0800
Subject: [PATCH 0622/1678] enic: Add vxlan offload support for IPv6 pkts

New adaptors supports vxlan offload for inner IPv6 and outer IPv6 vxlan
pkts.

Fw sets BIT(0) & BIT(1) in a1 if hw supports ipv6 inner & outer pkt
offload.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic.h        |  1 +
 drivers/net/ethernet/cisco/enic/enic_main.c   | 44 +++++++++++++++----
 drivers/net/ethernet/cisco/enic/vnic_dev.c    |  5 +--
 drivers/net/ethernet/cisco/enic/vnic_dev.h    |  2 +-
 drivers/net/ethernet/cisco/enic/vnic_devcmd.h |  3 ++
 5 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 9b218f0e5a4c..83be9a5e8daa 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -140,6 +140,7 @@ struct enic_rfs_flw_tbl {
 struct vxlan_offload {
 	u16 vxlan_udp_port_number;
 	u8 patch_level;
+	u8 flags;
 };
 
 /* Per-instance private data structure */
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 252285894968..848aac477cff 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -191,8 +191,16 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 		goto error;
 	}
 
-	if (ti->sa_family != AF_INET) {
-		netdev_info(netdev, "vxlan: only IPv4 offload supported");
+	switch (ti->sa_family) {
+	case AF_INET6:
+		if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) {
+			netdev_info(netdev, "vxlan: only IPv4 offload supported");
+			goto error;
+		}
+		/* Fall through */
+	case AF_INET:
+		break;
+	default:
 		goto error;
 	}
 
@@ -271,22 +279,37 @@ static netdev_features_t enic_features_check(struct sk_buff *skb,
 	struct enic *enic = netdev_priv(dev);
 	struct udphdr *udph;
 	u16 port = 0;
-	u16 proto;
+	u8 proto;
 
 	if (!skb->encapsulation)
 		return features;
 
 	features = vxlan_features_check(skb, features);
 
-	/* hardware only supports IPv4 vxlan tunnel */
-	if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IPV6):
+		if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6))
+			goto out;
+		proto = ipv6_hdr(skb)->nexthdr;
+		break;
+	case htons(ETH_P_IP):
+		proto = ip_hdr(skb)->protocol;
+		break;
+	default:
 		goto out;
+	}
 
-	/* hardware does not support offload of ipv6 inner pkt */
-	if (eth->h_proto != ntohs(ETH_P_IP))
+	switch (eth->h_proto) {
+	case ntohs(ETH_P_IPV6):
+		if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6))
+			goto out;
+		/* Fall through */
+	case ntohs(ETH_P_IP):
+		break;
+	default:
 		goto out;
+	}
 
-	proto = ip_hdr(skb)->protocol;
 
 	if (proto == IPPROTO_UDP) {
 		udph = udp_hdr(skb);
@@ -2914,9 +2937,11 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->hw_features |= NETIF_F_RXCSUM;
 	if (ENIC_SETTING(enic, VXLAN)) {
 		u64 patch_level;
+		u64 a1 = 0;
 
 		netdev->hw_enc_features |= NETIF_F_RXCSUM		|
 					   NETIF_F_TSO			|
+					   NETIF_F_TSO6			|
 					   NETIF_F_TSO_ECN		|
 					   NETIF_F_GSO_UDP_TUNNEL	|
 					   NETIF_F_HW_CSUM		|
@@ -2935,9 +2960,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 */
 		err = vnic_dev_get_supported_feature_ver(enic->vdev,
 							 VIC_FEATURE_VXLAN,
-							 &patch_level);
+							 &patch_level, &a1);
 		if (err)
 			patch_level = 0;
+		enic->vxlan.flags = (u8)a1;
 		/* mask bits that are supported by driver
 		 */
 		patch_level &= BIT_ULL(0) | BIT_ULL(2);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 39bad67422dd..b60fb6e3e775 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -1269,14 +1269,13 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 }
 
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-				       u64 *supported_versions)
+				       u64 *supported_versions, u64 *a1)
 {
 	u64 a0 = feature;
 	int wait = 1000;
-	u64 a1 = 0;
 	int ret;
 
-	ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
+	ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, a1, wait);
 	if (!ret)
 		*supported_versions = a0;
 
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h
index 9d43d6bb9907..db160f459852 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h
@@ -183,6 +183,6 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config);
 int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 				 u16 vxlan_udp_port_number);
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
-				       u64 *supported_versions);
+				       u64 *supported_versions, u64 *a1);
 
 #endif /* _VNIC_DEV_H_ */
diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
index d83880b0d468..69529a3516cd 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
@@ -697,6 +697,9 @@ enum overlay_ofld_cmd {
 
 #define OVERLAY_CFG_VXLAN_PORT_UPDATE	0
 
+#define ENIC_VXLAN_INNER_IPV6		BIT(0)
+#define ENIC_VXLAN_OUTER_IPV6		BIT(1)
+
 /* Use this enum to get the supported versions for each of these features
  * If you need to use the devcmd_get_supported_feature_version(), add
  * the new feature into this enum and install function handler in devcmd.c

From 7e24c64253ed134a3c699cceda6963ac1fa6f4c8 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:21 -0800
Subject: [PATCH 0623/1678] enic: Check if hw supports multi wq with vxlan
 offload

Some adaptors do not support vxlan offload when multi wq is configured.

If hw supports multi wq, BIT(2) is set in a1.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic_main.c   | 5 +++++
 drivers/net/ethernet/cisco/enic/vnic_devcmd.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 848aac477cff..3280a05f9cf1 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -212,6 +212,11 @@ static void enic_udp_tunnel_add(struct net_device *netdev,
 
 		goto error;
 	}
+	if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) &&
+	    !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) {
+		netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter");
+		goto error;
+	}
 
 	err = vnic_dev_overlay_offload_cfg(enic->vdev,
 					   OVERLAY_CFG_VXLAN_PORT_UPDATE,
diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
index 69529a3516cd..8fce9ef1c9bc 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
@@ -699,6 +699,7 @@ enum overlay_ofld_cmd {
 
 #define ENIC_VXLAN_INNER_IPV6		BIT(0)
 #define ENIC_VXLAN_OUTER_IPV6		BIT(1)
+#define ENIC_VXLAN_MULTI_WQ		BIT(2)
 
 /* Use this enum to get the supported versions for each of these features
  * If you need to use the devcmd_get_supported_feature_version(), add

From 48398b6e7065691011b09f34d6c31d74013090b6 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:22 -0800
Subject: [PATCH 0624/1678] enic: set UDP rss flag

New hardware needs UDP flag set to enable UDP L4 rss hash. Add ethtool
get option to display supported rss flow hash.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cisco/enic/enic_ethtool.c    | 36 +++++++++++++++++++
 drivers/net/ethernet/cisco/enic/enic_main.c   |  4 ++-
 drivers/net/ethernet/cisco/enic/vnic_dev.c    | 17 +++++++++
 drivers/net/ethernet/cisco/enic/vnic_dev.h    |  1 +
 drivers/net/ethernet/cisco/enic/vnic_nic.h    |  1 +
 5 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index efb9333c7cf8..869006c2002d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -474,6 +474,39 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
 	return 0;
 }
 
+static int enic_get_rx_flow_hash(struct enic *enic, struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V6_FLOW:
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case UDP_V6_FLOW:
+	case UDP_V4_FLOW:
+		if (vnic_dev_capable_udp_rss(enic->vdev))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			  u32 *rule_locs)
 {
@@ -500,6 +533,9 @@ static int enic_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		ret = enic_grxclsrule(enic, cmd);
 		spin_unlock_bh(&enic->rfs_h.lock);
 		break;
+	case ETHTOOL_GRXFH:
+		ret = enic_get_rx_flow_hash(enic, cmd);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 3280a05f9cf1..5213bc01a6e9 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2316,7 +2316,7 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 {
 	struct device *dev = enic_get_dev(enic);
 	const u8 rss_default_cpu = 0;
-	const u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
+	u8 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 |
 		NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 |
 		NIC_CFG_RSS_HASH_TYPE_IPV6 |
 		NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
@@ -2324,6 +2324,8 @@ static int enic_set_rss_nic_cfg(struct enic *enic)
 	const u8 rss_base_cpu = 0;
 	u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1);
 
+	if (vnic_dev_capable_udp_rss(enic->vdev))
+		rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP;
 	if (rss_enable) {
 		if (!enic_set_rsskey(enic)) {
 			if (enic_set_rsscpu(enic, rss_hash_bits)) {
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index b60fb6e3e775..a2b376055b2d 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -1281,3 +1281,20 @@ int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
 
 	return ret;
 }
+
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
+{
+	u64 a0 = CMD_NIC_CFG, a1 = 0;
+	u64 rss_hash_type;
+	int wait = 1000;
+	int err;
+
+	err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
+	if (err || !a0)
+		return 0;
+
+	rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
+			NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;
+
+	return (rss_hash_type & NIC_CFG_RSS_HASH_TYPE_UDP);
+}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.h b/drivers/net/ethernet/cisco/enic/vnic_dev.h
index db160f459852..59d4cc8fbb85 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.h
@@ -184,5 +184,6 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
 				 u16 vxlan_udp_port_number);
 int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
 				       u64 *supported_versions, u64 *a1);
+bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev);
 
 #endif /* _VNIC_DEV_H_ */
diff --git a/drivers/net/ethernet/cisco/enic/vnic_nic.h b/drivers/net/ethernet/cisco/enic/vnic_nic.h
index 995a50dd4c99..5a93db0d7afc 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_nic.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_nic.h
@@ -47,6 +47,7 @@
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6		(1 << 4)
 #define NIC_CFG_RSS_HASH_TYPE_IPV6_EX		(1 << 5)
 #define NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX	(1 << 6)
+#define NIC_CFG_RSS_HASH_TYPE_UDP		(1 << 7)
 
 static inline void vnic_set_nic_cfg(u32 *nic_cfg,
 	u8 rss_default_cpu, u8 rss_hash_type,

From e8588e268509292550634d9a35f2723a207683b2 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:23 -0800
Subject: [PATCH 0625/1678] enic: enable rq before updating rq descriptors

rq should be enabled before posting the buffers to rq desc. If not hw sees
stale value and casuses DMAR errors.

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 5213bc01a6e9..243d5c5fd5e3 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1939,6 +1939,8 @@ static int enic_open(struct net_device *netdev)
 	}
 
 	for (i = 0; i < enic->rq_count; i++) {
+		/* enable rq before updating rq desc */
+		vnic_rq_enable(&enic->rq[i]);
 		vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
 		/* Need at least one buffer on ring to get going */
 		if (vnic_rq_desc_used(&enic->rq[i]) == 0) {
@@ -1950,8 +1952,6 @@ static int enic_open(struct net_device *netdev)
 
 	for (i = 0; i < enic->wq_count; i++)
 		vnic_wq_enable(&enic->wq[i]);
-	for (i = 0; i < enic->rq_count; i++)
-		vnic_rq_enable(&enic->rq[i]);
 
 	if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
 		enic_dev_add_station_addr(enic);
@@ -1977,8 +1977,12 @@ static int enic_open(struct net_device *netdev)
 	return 0;
 
 err_out_free_rq:
-	for (i = 0; i < enic->rq_count; i++)
+	for (i = 0; i < enic->rq_count; i++) {
+		err = vnic_rq_disable(&enic->rq[i]);
+		if (err)
+			return err;
 		vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+	}
 	enic_dev_notify_unset(enic);
 err_out_free_intr:
 	enic_unset_affinity_hint(enic);

From 5de0c022f1b0bce073cb04dd69ed7982805e5763 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Thu, 1 Mar 2018 11:07:24 -0800
Subject: [PATCH 0626/1678] enic: set IG desc cache flag in open

New adapter needs CMD_OPENF_IG_DESCCACHE flag to be set. If this flag is
not set, fw flushes the global IG desc cache. This flag is nop in older
adapter.

Also increment driver version

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic.h        | 2 +-
 drivers/net/ethernet/cisco/enic/enic_main.c   | 3 ++-
 drivers/net/ethernet/cisco/enic/vnic_devcmd.h | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 83be9a5e8daa..0dd64acd2a3f 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -33,7 +33,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"2.3.0.45"
+#define DRV_VERSION		"2.3.0.53"
 #define DRV_COPYRIGHT		"Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX		6
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 243d5c5fd5e3..a25fb95492a0 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2196,9 +2196,10 @@ static int enic_dev_wait(struct vnic_dev *vdev,
 static int enic_dev_open(struct enic *enic)
 {
 	int err;
+	u32 flags = CMD_OPENF_IG_DESCCACHE;
 
 	err = enic_dev_wait(enic->vdev, vnic_dev_open,
-		vnic_dev_open_done, 0);
+		vnic_dev_open_done, flags);
 	if (err)
 		dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n",
 			err);
diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
index 8fce9ef1c9bc..41de4ba622a1 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
+++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h
@@ -439,6 +439,7 @@ enum vnic_devcmd_cmd {
 
 /* flags for CMD_OPEN */
 #define CMD_OPENF_OPROM		0x1	/* open coming from option rom */
+#define CMD_OPENF_IG_DESCCACHE	0x2	/* Do not flush IG DESC cache */
 
 /* flags for CMD_INIT */
 #define CMD_INITF_DEFAULT_MAC	0x1	/* init with default mac addr */

From 77a5196a804e34ce5e215ef84d5e1de332e9c529 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Thu, 1 Mar 2018 13:49:57 -0800
Subject: [PATCH 0627/1678] gre: add sequence number for collect md mode.

Currently GRE sequence number can only be used in native
tunnel mode.  This patch adds sequence number support for
gre collect metadata mode.  RFC2890 defines GRE sequence
number to be specific to the traffic flow identified by the
key.  However, this patch does not implement per-key seqno.
The sequence number is shared in the same tunnel device.
That is, different tunnel keys using the same collect_md
tunnel share single sequence number.

Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |  1 +
 net/core/filter.c        |  4 +++-
 net/ipv4/ip_gre.c        |  7 +++++--
 net/ipv6/ip6_gre.c       | 13 ++++++++-----
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index db6bdc375126..2a66769e5875 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -800,6 +800,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_SEQ_NUMBER		(1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c121adbdbaa..33edfa8372fd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2991,7 +2991,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 	struct ip_tunnel_info *info;
 
 	if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
-			       BPF_F_DONT_FRAGMENT)))
+			       BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
 		return -EINVAL;
 	if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
 		switch (size) {
@@ -3025,6 +3025,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 		info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
 	if (flags & BPF_F_ZERO_CSUM_TX)
 		info->key.tun_flags &= ~TUNNEL_CSUM;
+	if (flags & BPF_F_SEQ_NUMBER)
+		info->key.tun_flags |= TUNNEL_SEQ;
 
 	info->key.tun_id = cpu_to_be64(from->tunnel_id);
 	info->key.tos = from->tunnel_tos;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0fe1d69b5df4..95fd225f402e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -522,6 +522,7 @@ err_free_skb:
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 			__be16 proto)
 {
+	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
@@ -545,9 +546,11 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 		goto err_free_rt;
 
-	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+	flags = tun_info->key.tun_flags &
+		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 	gre_build_header(skb, tunnel_hlen, flags, proto,
-			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+			 tunnel_id_to_key32(tun_info->key.tun_id),
+			 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 83c7766c8c75..18a3dfbd0300 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -695,9 +695,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 	else
 		fl6->daddr = tunnel->parms.raddr;
 
-	if (tunnel->parms.o_flags & TUNNEL_SEQ)
-		tunnel->o_seqno++;
-
 	/* Push GRE header. */
 	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
 
@@ -720,14 +717,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
 		dsfield = key->tos;
-		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+		flags = key->tun_flags &
+			(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 		tunnel->tun_hlen = gre_calc_hlen(flags);
 
 		gre_build_header(skb, tunnel->tun_hlen,
 				 flags, protocol,
-				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+				 tunnel_id_to_key32(tun_info->key.tun_id),
+				 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+						      : 0);
 
 	} else {
+		if (tunnel->parms.o_flags & TUNNEL_SEQ)
+			tunnel->o_seqno++;
+
 		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 				 protocol, tunnel->parms.o_key,
 				 htonl(tunnel->o_seqno));

From 5f280b60d26fe6379a8179d70d932ec73b77411b Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Thu, 1 Mar 2018 13:49:58 -0800
Subject: [PATCH 0628/1678] samples/bpf: add gre sequence number test.

The patch adds tests for GRE sequence number
support for metadata mode tunnel.

Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/bpf/tcbpf2_kern.c      | 6 ++++--
 samples/bpf/test_tunnel_bpf.sh | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index efdc16d195ff..9a8db7bd6db4 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -52,7 +52,8 @@ int _gre_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_tos = 0;
 	key.tunnel_ttl = 64;
 
-	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
 		ERROR(ret);
 		return TC_ACT_SHOT;
@@ -92,7 +93,8 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_label = 0xabcde;
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
-				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
 		ERROR(ret);
 		return TC_ACT_SHOT;
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 43ce049996ee..c265863ccdf9 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -23,7 +23,8 @@ function config_device {
 function add_gre_tunnel {
 	# in namespace
 	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
 	ip netns exec at_ns0 ip link set dev $DEV_NS up
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
 
@@ -43,7 +44,7 @@ function add_ip6gretap_tunnel {
 
 	# in namespace
 	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
 		local ::11 remote ::22
 
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24

From 053533fc7577ddbe14b2a7c4a8fc70ce3fbb3547 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Fri, 2 Mar 2018 11:22:20 +0900
Subject: [PATCH 0629/1678] selftests: rtnetlink: remove testns on test fail

This patch removes testns after test failure so that next test can
continue with clean ns

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index a622eeecc3a6..e6f485235435 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -517,6 +517,7 @@ kci_test_gretap()
 	ip link help gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: gretap: iproute2 too old"
+		ip netns del "$testns"
 		return 1
 	fi
 
@@ -543,6 +544,7 @@ kci_test_gretap()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: gretap"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: gretap"
@@ -565,6 +567,7 @@ kci_test_ip6gretap()
 	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6gretap: iproute2 too old"
+		ip netns del "$testns"
 		return 1
 	fi
 
@@ -591,6 +594,7 @@ kci_test_ip6gretap()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ip6gretap"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: ip6gretap"
@@ -655,6 +659,7 @@ kci_test_erspan()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: erspan"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: erspan"
@@ -720,6 +725,7 @@ kci_test_ip6erspan()
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ip6erspan"
+		ip netns del "$testns"
 		return 1
 	fi
 	echo "PASS: ip6erspan"

From 51568d69407d939e898a150b13cf48d226f53303 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 2 Mar 2018 17:29:14 +0800
Subject: [PATCH 0630/1678] virtio-net: re enable XDP_REDIRECT for mergeable
 buffer

XDP_REDIRECT support for mergeable buffer was removed since commit
7324f5399b06 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
case"). This is because we don't reserve enough tailroom for struct
skb_shared_info which breaks XDP assumption. So this patch fixes this
by reserving enough tailroom and using fixed size of rx buffer.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9bb9e562b893..426dcf7dad4a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 	page_off += *len;
 
 	while (--*num_buf) {
+		int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 		unsigned int buflen;
 		void *buf;
 		int off;
@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
 		/* guard against a misconfigured or uncooperative backend that
 		 * is sending packet larger than the MTU.
 		 */
-		if ((page_off + buflen) > PAGE_SIZE) {
+		if ((page_off + buflen + tailroom) > PAGE_SIZE) {
 			put_page(p);
 			goto err_buf;
 		}
@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	unsigned int truesize;
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 	bool sent;
+	int err;
 
 	head_skb = NULL;
 
@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		void *data;
 		u32 act;
 
-		/* This happens when rx buffer size is underestimated */
+		/* This happens when rx buffer size is underestimated
+		 * or headroom is not enough because of the buffer
+		 * was refilled before XDP is set. This should only
+		 * happen for the first several packets, so we don't
+		 * care much about its performance.
+		 */
 		if (unlikely(num_buf > 1 ||
 			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
-		if (act != XDP_PASS)
-			ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
 		switch (act) {
 		case XDP_PASS:
 			/* recalculate offset to account for any header
@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				goto err_xdp;
 			rcu_read_unlock();
 			goto xdp_xmit;
+		case XDP_REDIRECT:
+			err = xdp_do_redirect(dev, &xdp, xdp_prog);
+			if (err) {
+				if (unlikely(xdp_page != page))
+					put_page(xdp_page);
+				goto err_xdp;
+			}
+			*xdp_xmit = true;
+			if (unlikely(xdp_page != page))
+				goto err_xdp;
+			rcu_read_unlock();
+			goto xdp_xmit;
 		default:
 			bpf_warn_invalid_xdp_action(act);
 		case XDP_ABORTED:
@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 }
 
 static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
-					  struct ewma_pkt_len *avg_pkt_len)
+					  struct ewma_pkt_len *avg_pkt_len,
+					  unsigned int room)
 {
 	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
 	unsigned int len;
 
-	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+	if (room)
+		return PAGE_SIZE - room;
+
+	len = hdr_len +	clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
 				rq->min_buf_len, PAGE_SIZE - hdr_len);
+
 	return ALIGN(len, L1_CACHE_BYTES);
 }
 
@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 {
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+	unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
 	char *buf;
 	void *ctx;
 	int err;
 	unsigned int len, hole;
 
-	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
-	if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+	/* Extra tailroom is needed to satisfy XDP's assumption. This
+	 * means rx frags coalescing won't work, but consider we've
+	 * disabled GSO for XDP, it won't be a big issue.
+	 */
+	len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+	if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
 		return -ENOMEM;
 
 	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
 	buf += headroom; /* advance address leaving hole at front of pkt */
 	get_page(alloc_frag->page);
-	alloc_frag->offset += len + headroom;
+	alloc_frag->offset += len + room;
 	hole = alloc_frag->size - alloc_frag->offset;
-	if (hole < len + headroom) {
+	if (hole < len + room) {
 		/* To avoid internal fragmentation, if there is very likely not
 		 * enough space for another buffer, add the remaining space to
 		 * the current buffer.
@@ -2576,12 +2603,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
 {
 	struct virtnet_info *vi = netdev_priv(queue->dev);
 	unsigned int queue_index = get_netdev_rx_queue_index(queue);
+	unsigned int headroom = virtnet_get_headroom(vi);
+	unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
 	struct ewma_pkt_len *avg;
 
 	BUG_ON(queue_index >= vi->max_queue_pairs);
 	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
 	return sprintf(buf, "%u\n",
-		       get_mergeable_buf_len(&vi->rq[queue_index], avg));
+		       get_mergeable_buf_len(&vi->rq[queue_index], avg,
+				       SKB_DATA_ALIGN(headroom + tailroom)));
 }
 
 static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =

From 3518e40b3cd8ebce52edaad417ecd1d560f15f1b Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 2 Mar 2018 16:03:32 +0100
Subject: [PATCH 0631/1678] ipvlan: forbid vlan devices on top of ipvlan

Currently we allow the creation of 8021q devices on top of
ipvlan, but such devices are nonfunctional, as the underlying
ipvlan rx_hanlder hook can't match the relevant traffic.

Be explicit and forbid the creation of such nonfunctional devices.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 3efc1c92c6a7..4cbe9e27287d 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -178,7 +178,7 @@ static int ipvlan_init(struct net_device *dev)
 	dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
 		     (phy_dev->state & IPVLAN_STATE_MASK);
 	dev->features = phy_dev->features & IPVLAN_FEATURES;
-	dev->features |= NETIF_F_LLTX;
+	dev->features |= NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED;
 	dev->gso_max_size = phy_dev->gso_max_size;
 	dev->gso_max_segs = phy_dev->gso_max_segs;
 	dev->hard_header_len = phy_dev->hard_header_len;

From d143b9e3055358e160d8d5975d889ce5c149625a Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Fri, 2 Mar 2018 20:52:01 -0500
Subject: [PATCH 0632/1678] net sched actions: corrected extack message

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 1f65d6ada9ff..a54fa7b8c217 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1083,7 +1083,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
 			 RTM_NEWACTION, 0, 0) <= 0) {
-		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while deleting TC action");
+		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
 	}

From e574c0eec56a746445cab3f8f7b9a0302dd101f5 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 2 Mar 2018 18:29:04 -0800
Subject: [PATCH 0633/1678] liquidio: Corrected Rx bytes counting

Corrected stats mismatch between Host Tx and its peer Rx stats

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 32ae63b6f20e..8b1ee83134e3 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -571,7 +571,8 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)),
 
 		napi_gro_receive(napi, skb);
 
-		droq->stats.rx_bytes_received += len;
+		droq->stats.rx_bytes_received += len -
+			rh->r_dh.len * BYTES_PER_DHLEN_UNIT;
 		droq->stats.rx_pkts_received++;
 	} else {
 		recv_buffer_free(skb);

From e6c6a92905210484a84a0cae5b013570b7a67b6f Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Sun, 4 Mar 2018 14:12:04 +0200
Subject: [PATCH 0634/1678] net: Make RX-FCS and LRO mutually exclusive

LRO and RX-FCS offloads cannot be enabled at the same time since it is
not clear what should happen to the FCS of each coalesced packet.
The FCS is not really part of the TCP payload, hence cannot be merged
into one big packet. On the other hand, providing one big LRO packet
with one FCS contradicts the RX-FCS feature goal.

Use the fix features mechanism in order to prevent intersection of the
features and drop LRO in case RX-FCS is requested.

Enabling RX-FCS while LRO is enabled will result in:
$ ethtool -K ens6 rx-fcs on
Actual changes:
large-receive-offload: off [requested on]
rx-fcs: on

Signed-off-by: Gal Pressman <galp@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 40fb3aed5df2..8b51f923ce99 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7542,6 +7542,12 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
+	/* LRO feature cannot be combined with RX-FCS */
+	if ((features & NETIF_F_LRO) && (features & NETIF_F_RXFCS)) {
+		netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+		features &= ~NETIF_F_LRO;
+	}
+
 	return features;
 }
 

From 200066a5bdcc2d4527562ee3760d405b6ec5b5d5 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 4 Mar 2018 16:35:26 +0200
Subject: [PATCH 0635/1678] selftests: Extend the tc action test for action
 mirror

Currently the tc action test is used only to test mirred redirect
action. This patch extends it for mirred mirror.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/tc_actions.sh       | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 8ab5cf0a960b..6b18ba2d3982 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -45,8 +45,10 @@ switch_destroy()
 	simple_if_fini $swp1 192.0.2.2/24
 }
 
-mirred_egress_redirect_test()
+mirred_egress_test()
 {
+	local action=$1
+
 	RET=0
 
 	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
@@ -59,19 +61,19 @@ mirred_egress_redirect_test()
 	check_fail $? "Matched without redirect rule inserted"
 
 	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
-		$tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+		$tcflags dst_ip 192.0.2.2 action mirred egress $action \
 		dev $swp2
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q
 
 	tc_check_packets "dev $h2 ingress" 101 1
-	check_err $? "Did not match incoming redirected packet"
+	check_err $? "Did not match incoming $action packet"
 
 	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
 	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
 
-	log_test "mirred egress redirect ($tcflags)"
+	log_test "mirred egress $action ($tcflags)"
 }
 
 gact_drop_and_ok_test()
@@ -180,7 +182,8 @@ setup_prepare
 setup_wait
 
 gact_drop_and_ok_test
-mirred_egress_redirect_test
+mirred_egress_test "redirect"
+mirred_egress_test "mirror"
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
@@ -188,7 +191,8 @@ if [[ $? -ne 0 ]]; then
 else
 	tcflags="skip_sw"
 	gact_drop_and_ok_test
-	mirred_egress_redirect_test
+	mirred_egress_test "redirect"
+	mirred_egress_test "mirror"
 	gact_trap_test
 fi
 

From 87ecc95d81d951b0984f2eb9c5c118cb68d0dce8 Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Sun, 4 Mar 2018 10:38:35 -0800
Subject: [PATCH 0636/1678] tcp: add send queue size stat in
 SCM_TIMESTAMPING_OPT_STATS

This patch adds TCP_NLA_SENDQ_SIZE stat into SCM_TIMESTAMPING_OPT_STATS.
It reports no. of bytes present in send queue, when timestamp is
generated.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 1 +
 net/ipv4/tcp.c           | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b4a4f64635fa..93bad2128ef6 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -241,6 +241,7 @@ enum {
 	TCP_NLA_MIN_RTT,        /* minimum RTT */
 	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
 
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a33539798bf6..162ba4227446 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3031,7 +3031,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	u32 rate;
 
 	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-			  3 * nla_total_size(sizeof(u32)) +
+			  4 * nla_total_size(sizeof(u32)) +
 			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
@@ -3061,6 +3061,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
 	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+
+	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
 	return stats;
 }
 

From be631892948060f44b1ceee3132be1266932071e Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Sun, 4 Mar 2018 10:38:36 -0800
Subject: [PATCH 0637/1678] tcp: add ca_state stat in
 SCM_TIMESTAMPING_OPT_STATS

This patch adds TCP_NLA_CA_STATE stat into SCM_TIMESTAMPING_OPT_STATS.
It reports ca_state of socket, when timestamp is generated.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 1 +
 net/ipv4/tcp.c           | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 93bad2128ef6..4c0ae0faf7ca 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -242,6 +242,7 @@ enum {
 	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
 	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
+	TCP_NLA_CA_STATE,	/* ca_state of socket */
 
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 162ba4227446..fb350f740f69 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3032,7 +3032,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
 			  4 * nla_total_size(sizeof(u32)) +
-			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
+			  3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
 
@@ -3063,6 +3063,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
 
 	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
+	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
 	return stats;
 }
 

From 955dc68cb9b23b42999cafe6df3684309bc686c6 Mon Sep 17 00:00:00 2001
From: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Date: Mon, 5 Mar 2018 11:39:05 +1100
Subject: [PATCH 0638/1678] net/ncsi: Add generic netlink family

Add a generic netlink family for NCSI. This supports three commands;
NCSI_CMD_PKG_INFO which returns information on packages and their
associated channels, NCSI_CMD_SET_INTERFACE which allows a specific
package or package/channel combination to be set as the preferred
choice, and NCSI_CMD_CLEAR_INTERFACE which clears any preferred setting.

Signed-off-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ncsi.h | 115 +++++++++++
 net/ncsi/Makefile         |   2 +-
 net/ncsi/internal.h       |   3 +
 net/ncsi/ncsi-manage.c    |  30 ++-
 net/ncsi/ncsi-netlink.c   | 421 ++++++++++++++++++++++++++++++++++++++
 net/ncsi/ncsi-netlink.h   |  20 ++
 6 files changed, 586 insertions(+), 5 deletions(-)
 create mode 100644 include/uapi/linux/ncsi.h
 create mode 100644 net/ncsi/ncsi-netlink.c
 create mode 100644 net/ncsi/ncsi-netlink.h

diff --git a/include/uapi/linux/ncsi.h b/include/uapi/linux/ncsi.h
new file mode 100644
index 000000000000..4c292ecbb748
--- /dev/null
+++ b/include/uapi/linux/ncsi.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __UAPI_NCSI_NETLINK_H__
+#define __UAPI_NCSI_NETLINK_H__
+
+/**
+ * enum ncsi_nl_commands - supported NCSI commands
+ *
+ * @NCSI_CMD_UNSPEC: unspecified command to catch errors
+ * @NCSI_CMD_PKG_INFO: list package and channel attributes. Requires
+ *	NCSI_ATTR_IFINDEX. If NCSI_ATTR_PACKAGE_ID is specified returns the
+ *	specific package and its channels - otherwise a dump request returns
+ *	all packages and their associated channels.
+ * @NCSI_CMD_SET_INTERFACE: set preferred package and channel combination.
+ *	Requires NCSI_ATTR_IFINDEX and the preferred NCSI_ATTR_PACKAGE_ID and
+ *	optionally the preferred NCSI_ATTR_CHANNEL_ID.
+ * @NCSI_CMD_CLEAR_INTERFACE: clear any preferred package/channel combination.
+ *	Requires NCSI_ATTR_IFINDEX.
+ * @NCSI_CMD_MAX: highest command number
+ */
+enum ncsi_nl_commands {
+	NCSI_CMD_UNSPEC,
+	NCSI_CMD_PKG_INFO,
+	NCSI_CMD_SET_INTERFACE,
+	NCSI_CMD_CLEAR_INTERFACE,
+
+	__NCSI_CMD_AFTER_LAST,
+	NCSI_CMD_MAX = __NCSI_CMD_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_attrs - General NCSI netlink attributes
+ *
+ * @NCSI_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_ATTR_IFINDEX: ifindex of network device using NCSI
+ * @NCSI_ATTR_PACKAGE_LIST: nested array of NCSI_PKG_ATTR attributes
+ * @NCSI_ATTR_PACKAGE_ID: package ID
+ * @NCSI_ATTR_CHANNEL_ID: channel ID
+ * @NCSI_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_attrs {
+	NCSI_ATTR_UNSPEC,
+	NCSI_ATTR_IFINDEX,
+	NCSI_ATTR_PACKAGE_LIST,
+	NCSI_ATTR_PACKAGE_ID,
+	NCSI_ATTR_CHANNEL_ID,
+
+	__NCSI_ATTR_AFTER_LAST,
+	NCSI_ATTR_MAX = __NCSI_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_pkg_attrs - NCSI netlink package-specific attributes
+ *
+ * @NCSI_PKG_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_PKG_ATTR: nested array of package attributes
+ * @NCSI_PKG_ATTR_ID: package ID
+ * @NCSI_PKG_ATTR_FORCED: flag signifying a package has been set as preferred
+ * @NCSI_PKG_ATTR_CHANNEL_LIST: nested array of NCSI_CHANNEL_ATTR attributes
+ * @NCSI_PKG_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_pkg_attrs {
+	NCSI_PKG_ATTR_UNSPEC,
+	NCSI_PKG_ATTR,
+	NCSI_PKG_ATTR_ID,
+	NCSI_PKG_ATTR_FORCED,
+	NCSI_PKG_ATTR_CHANNEL_LIST,
+
+	__NCSI_PKG_ATTR_AFTER_LAST,
+	NCSI_PKG_ATTR_MAX = __NCSI_PKG_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum ncsi_nl_channel_attrs - NCSI netlink channel-specific attributes
+ *
+ * @NCSI_CHANNEL_ATTR_UNSPEC: unspecified attributes to catch errors
+ * @NCSI_CHANNEL_ATTR: nested array of channel attributes
+ * @NCSI_CHANNEL_ATTR_ID: channel ID
+ * @NCSI_CHANNEL_ATTR_VERSION_MAJOR: channel major version number
+ * @NCSI_CHANNEL_ATTR_VERSION_MINOR: channel minor version number
+ * @NCSI_CHANNEL_ATTR_VERSION_STR: channel version string
+ * @NCSI_CHANNEL_ATTR_LINK_STATE: channel link state flags
+ * @NCSI_CHANNEL_ATTR_ACTIVE: channels with this flag are in
+ *	NCSI_CHANNEL_ACTIVE state
+ * @NCSI_CHANNEL_ATTR_FORCED: flag signifying a channel has been set as
+ *	preferred
+ * @NCSI_CHANNEL_ATTR_VLAN_LIST: nested array of NCSI_CHANNEL_ATTR_VLAN_IDs
+ * @NCSI_CHANNEL_ATTR_VLAN_ID: VLAN ID being filtered on this channel
+ * @NCSI_CHANNEL_ATTR_MAX: highest attribute number
+ */
+enum ncsi_nl_channel_attrs {
+	NCSI_CHANNEL_ATTR_UNSPEC,
+	NCSI_CHANNEL_ATTR,
+	NCSI_CHANNEL_ATTR_ID,
+	NCSI_CHANNEL_ATTR_VERSION_MAJOR,
+	NCSI_CHANNEL_ATTR_VERSION_MINOR,
+	NCSI_CHANNEL_ATTR_VERSION_STR,
+	NCSI_CHANNEL_ATTR_LINK_STATE,
+	NCSI_CHANNEL_ATTR_ACTIVE,
+	NCSI_CHANNEL_ATTR_FORCED,
+	NCSI_CHANNEL_ATTR_VLAN_LIST,
+	NCSI_CHANNEL_ATTR_VLAN_ID,
+
+	__NCSI_CHANNEL_ATTR_AFTER_LAST,
+	NCSI_CHANNEL_ATTR_MAX = __NCSI_CHANNEL_ATTR_AFTER_LAST - 1
+};
+
+#endif /* __UAPI_NCSI_NETLINK_H__ */
diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile
index dd12b564f2e7..436ef68331f2 100644
--- a/net/ncsi/Makefile
+++ b/net/ncsi/Makefile
@@ -1,4 +1,4 @@
 #
 # Makefile for NCSI API
 #
-obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o
+obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o ncsi-netlink.o
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index d30f7bd741d0..8da84312cd3b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -276,6 +276,8 @@ struct ncsi_dev_priv {
 	unsigned int        package_num;     /* Number of packages         */
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_channel *hot_channel;    /* Channel was ever active    */
+	struct ncsi_package *force_package;  /* Force a specific package   */
+	struct ncsi_channel *force_channel;  /* Force a specific channel   */
 	struct ncsi_request requests[256];   /* Request table              */
 	unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX	1
@@ -318,6 +320,7 @@ extern spinlock_t ncsi_dev_lock;
 	list_for_each_entry_rcu(nc, &np->channels, node)
 
 /* Resources */
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index);
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data);
 int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index c989211bbabc..c3695ba0cf94 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -12,7 +12,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 
 #include <net/ncsi.h>
 #include <net/net_namespace.h>
@@ -23,6 +22,7 @@
 
 #include "internal.h"
 #include "ncsi-pkt.h"
+#include "ncsi-netlink.h"
 
 LIST_HEAD(ncsi_dev_list);
 DEFINE_SPINLOCK(ncsi_dev_lock);
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
 	return sizes[table];
 }
 
-static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
 {
 	struct ncsi_channel_filter *ncf;
 	int size;
@@ -965,20 +965,37 @@ error:
 
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
-	struct ncsi_package *np;
-	struct ncsi_channel *nc, *found, *hot_nc;
+	struct ncsi_package *np, *force_package;
+	struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
 	struct ncsi_channel_mode *ncm;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ndp->lock, flags);
 	hot_nc = ndp->hot_channel;
+	force_channel = ndp->force_channel;
+	force_package = ndp->force_package;
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
+	/* Force a specific channel whether or not it has link if we have been
+	 * configured to do so
+	 */
+	if (force_package && force_channel) {
+		found = force_channel;
+		ncm = &found->modes[NCSI_MODE_LINK];
+		if (!(ncm->data[2] & 0x1))
+			netdev_info(ndp->ndev.dev,
+				    "NCSI: Channel %u forced, but it is link down\n",
+				    found->id);
+		goto out;
+	}
+
 	/* The search is done once an inactive channel with up
 	 * link is found.
 	 */
 	found = NULL;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (ndp->force_package && np != ndp->force_package)
+			continue;
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
 			spin_lock_irqsave(&nc->lock, flags);
 
@@ -1594,6 +1611,9 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	ndp->ptype.dev = dev;
 	dev_add_pack(&ndp->ptype);
 
+	/* Set up generic netlink interface */
+	ncsi_init_netlink(dev);
+
 	return nd;
 }
 EXPORT_SYMBOL_GPL(ncsi_register_dev);
@@ -1673,6 +1693,8 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
 #endif
 	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
+	ncsi_unregister_netlink(nd->dev);
+
 	kfree(ndp);
 }
 EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
new file mode 100644
index 000000000000..d4201665a580
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <net/genetlink.h>
+#include <net/ncsi.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/ncsi.h>
+
+#include "internal.h"
+#include "ncsi-netlink.h"
+
+static struct genl_family ncsi_genl_family;
+
+static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
+	[NCSI_ATTR_IFINDEX] =		{ .type = NLA_U32 },
+	[NCSI_ATTR_PACKAGE_LIST] =	{ .type = NLA_NESTED },
+	[NCSI_ATTR_PACKAGE_ID] =	{ .type = NLA_U32 },
+	[NCSI_ATTR_CHANNEL_ID] =	{ .type = NLA_U32 },
+};
+
+static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
+{
+	struct ncsi_dev_priv *ndp;
+	struct net_device *dev;
+	struct ncsi_dev *nd;
+	struct ncsi_dev;
+
+	if (!net)
+		return NULL;
+
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev) {
+		pr_err("NCSI netlink: No device for ifindex %u\n", ifindex);
+		return NULL;
+	}
+
+	nd = ncsi_find_dev(dev);
+	ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
+
+	dev_put(dev);
+	return ndp;
+}
+
+static int ncsi_write_channel_info(struct sk_buff *skb,
+				   struct ncsi_dev_priv *ndp,
+				   struct ncsi_channel *nc)
+{
+	struct nlattr *vid_nest;
+	struct ncsi_channel_filter *ncf;
+	struct ncsi_channel_mode *m;
+	u32 *data;
+	int i;
+
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_ID, nc->id);
+	m = &nc->modes[NCSI_MODE_LINK];
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
+	if (nc->state == NCSI_CHANNEL_ACTIVE)
+		nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
+	if (ndp->force_channel == nc)
+		nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
+
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+	nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
+
+	vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
+	if (!vid_nest)
+		return -ENOMEM;
+	ncf = nc->filters[NCSI_FILTER_VLAN];
+	i = -1;
+	if (ncf) {
+		while ((i = find_next_bit((void *)&ncf->bitmap, ncf->total,
+					  i + 1)) < ncf->total) {
+			data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, i);
+			/* Uninitialised channels will have 'zero' vlan ids */
+			if (!data || !*data)
+				continue;
+			nla_put_u16(skb, NCSI_CHANNEL_ATTR_VLAN_ID,
+				    *(u16 *)data);
+		}
+	}
+	nla_nest_end(skb, vid_nest);
+
+	return 0;
+}
+
+static int ncsi_write_package_info(struct sk_buff *skb,
+				   struct ncsi_dev_priv *ndp, unsigned int id)
+{
+	struct nlattr *pnest, *cnest, *nest;
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	bool found;
+	int rc;
+
+	if (id > ndp->package_num) {
+		netdev_info(ndp->ndev.dev, "NCSI: No package with id %u\n", id);
+		return -ENODEV;
+	}
+
+	found = false;
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		if (np->id != id)
+			continue;
+		pnest = nla_nest_start(skb, NCSI_PKG_ATTR);
+		if (!pnest)
+			return -ENOMEM;
+		nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
+		if (ndp->force_package == np)
+			nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
+		cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
+		if (!cnest) {
+			nla_nest_cancel(skb, pnest);
+			return -ENOMEM;
+		}
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR);
+			if (!nest) {
+				nla_nest_cancel(skb, cnest);
+				nla_nest_cancel(skb, pnest);
+				return -ENOMEM;
+			}
+			rc = ncsi_write_channel_info(skb, ndp, nc);
+			if (rc) {
+				nla_nest_cancel(skb, nest);
+				nla_nest_cancel(skb, cnest);
+				nla_nest_cancel(skb, pnest);
+				return rc;
+			}
+			nla_nest_end(skb, nest);
+		}
+		nla_nest_end(skb, cnest);
+		nla_nest_end(skb, pnest);
+		found = true;
+	}
+
+	if (!found)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned int package_id;
+	struct sk_buff *skb;
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(genl_info_net(info),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+			  &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
+	if (!hdr) {
+		kfree(skb);
+		return -EMSGSIZE;
+	}
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+
+	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	rc = ncsi_write_package_info(skb, ndp, package_id);
+
+	if (rc) {
+		nla_nest_cancel(skb, attr);
+		goto err;
+	}
+
+	nla_nest_end(skb, attr);
+
+	genlmsg_end(skb, hdr);
+	return genlmsg_reply(skb, info);
+
+err:
+	genlmsg_cancel(skb, hdr);
+	kfree(skb);
+	return rc;
+}
+
+static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	struct nlattr *attrs[NCSI_ATTR_MAX];
+	struct ncsi_package *np, *package;
+	struct ncsi_dev_priv *ndp;
+	unsigned int package_id;
+	struct nlattr *attr;
+	void *hdr;
+	int rc;
+
+	rc = genlmsg_parse(cb->nlh, &ncsi_genl_family, attrs, NCSI_ATTR_MAX,
+			   ncsi_genl_policy, NULL);
+	if (rc)
+		return rc;
+
+	if (!attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(skb->sk)),
+			       nla_get_u32(attrs[NCSI_ATTR_IFINDEX]));
+
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = cb->args[0];
+	package = NULL;
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id)
+			package = np;
+
+	if (!package)
+		return 0; /* done */
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &ncsi_genl_family, 0,  NCSI_CMD_PKG_INFO);
+	if (!hdr) {
+		rc = -EMSGSIZE;
+		goto err;
+	}
+
+	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	rc = ncsi_write_package_info(skb, ndp, package->id);
+	if (rc) {
+		nla_nest_cancel(skb, attr);
+		goto err;
+	}
+
+	nla_nest_end(skb, attr);
+	genlmsg_end(skb, hdr);
+
+	cb->args[0] = package_id + 1;
+
+	return skb->len;
+err:
+	genlmsg_cancel(skb, hdr);
+	return rc;
+}
+
+static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_package *np, *package;
+	struct ncsi_channel *nc, *channel;
+	u32 package_id, channel_id;
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+	package = NULL;
+
+	spin_lock_irqsave(&ndp->lock, flags);
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np)
+		if (np->id == package_id)
+			package = np;
+	if (!package) {
+		/* The user has set a package that does not exist */
+		return -ERANGE;
+	}
+
+	channel = NULL;
+	if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+		/* Allow any channel */
+		channel_id = NCSI_RESERVED_CHANNEL;
+	} else {
+		channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+		NCSI_FOR_EACH_CHANNEL(package, nc)
+			if (nc->id == channel_id)
+				channel = nc;
+	}
+
+	if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
+		/* The user has set a channel that does not exist on this
+		 * package
+		 */
+		netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
+			    channel_id);
+		return -ERANGE;
+	}
+
+	ndp->force_package = package;
+	ndp->force_channel = channel;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+
+	netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
+		    package_id, channel_id,
+		    channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
+{
+	struct ncsi_dev_priv *ndp;
+	unsigned long flags;
+
+	if (!info || !info->attrs)
+		return -EINVAL;
+
+	if (!info->attrs[NCSI_ATTR_IFINDEX])
+		return -EINVAL;
+
+	ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+			       nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+	if (!ndp)
+		return -ENODEV;
+
+	/* Clear any override */
+	spin_lock_irqsave(&ndp->lock, flags);
+	ndp->force_package = NULL;
+	ndp->force_channel = NULL;
+	spin_unlock_irqrestore(&ndp->lock, flags);
+	netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
+
+	/* Bounce the NCSI channel to set changes */
+	ncsi_stop_dev(&ndp->ndev);
+	ncsi_start_dev(&ndp->ndev);
+
+	return 0;
+}
+
+static const struct genl_ops ncsi_ops[] = {
+	{
+		.cmd = NCSI_CMD_PKG_INFO,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_pkg_info_nl,
+		.dumpit = ncsi_pkg_info_all_nl,
+		.flags = 0,
+	},
+	{
+		.cmd = NCSI_CMD_SET_INTERFACE,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_set_interface_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NCSI_CMD_CLEAR_INTERFACE,
+		.policy = ncsi_genl_policy,
+		.doit = ncsi_clear_interface_nl,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_family ncsi_genl_family __ro_after_init = {
+	.name = "NCSI",
+	.version = 0,
+	.maxattr = NCSI_ATTR_MAX,
+	.module = THIS_MODULE,
+	.ops = ncsi_ops,
+	.n_ops = ARRAY_SIZE(ncsi_ops),
+};
+
+int ncsi_init_netlink(struct net_device *dev)
+{
+	int rc;
+
+	rc = genl_register_family(&ncsi_genl_family);
+	if (rc)
+		netdev_err(dev, "ncsi: failed to register netlink family\n");
+
+	return rc;
+}
+
+int ncsi_unregister_netlink(struct net_device *dev)
+{
+	int rc;
+
+	rc = genl_unregister_family(&ncsi_genl_family);
+	if (rc)
+		netdev_err(dev, "ncsi: failed to unregister netlink family\n");
+
+	return rc;
+}
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
new file mode 100644
index 000000000000..91a5c256f8c4
--- /dev/null
+++ b/net/ncsi/ncsi-netlink.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright Samuel Mendoza-Jonas, IBM Corporation 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NCSI_NETLINK_H__
+#define __NCSI_NETLINK_H__
+
+#include <linux/netdevice.h>
+
+#include "internal.h"
+
+int ncsi_init_netlink(struct net_device *dev);
+int ncsi_unregister_netlink(struct net_device *dev);
+
+#endif /* __NCSI_NETLINK_H__ */

From 190f887c3cd0d07e53a7d44162c951451f4f5528 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 4 Mar 2018 17:37:47 -0800
Subject: [PATCH 0639/1678] selftests: forwarding: Add suppport to create veth
 interfaces

For tests using veth interfaces, the test infrastructure can create
the netdevs if they do not exist. Arguably this is a preferred approach
since the tests require p$N and p$(N+1) to be pairs.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/forwarding.config.sample   |  5 +++
 tools/testing/selftests/net/forwarding/lib.sh | 35 +++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index ab235c124f20..df54c9eb5100 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -14,6 +14,11 @@ NETIFS[p6]=veth5
 NETIFS[p7]=veth6
 NETIFS[p8]=veth7
 
+NETIF_TYPE=veth
+
+# only virtual interfaces (veth) can be created by test infra
+#NETIF_CREATE=yes
+
 ##############################################################################
 # Defines
 
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index d0af52109360..273511ef2b43 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -76,6 +76,41 @@ done
 ##############################################################################
 # Network interfaces configuration
 
+create_netif_veth()
+{
+	local i
+
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		local j=$((i+1))
+
+		ip link show dev ${NETIFS[p$i]} &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			ip link add ${NETIFS[p$i]} type veth \
+				peer name ${NETIFS[p$j]}
+			if [[ $? -ne 0 ]]; then
+				echo "Failed to create netif"
+				exit 1
+			fi
+		fi
+		i=$j
+	done
+}
+
+create_netif()
+{
+	case "$NETIF_TYPE" in
+	veth) create_netif_veth
+	      ;;
+	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+	   exit 1
+	   ;;
+	esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+	create_netif
+fi
+
 for i in $(eval echo {1..$NUM_NETIFS}); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then

From ec012f3b85159102662264bb5c3436c849d2b1b9 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:30:41 +0300
Subject: [PATCH 0640/1678] net: Convert broute_net_ops, frame_filter_net_ops
 and frame_nat_net_ops

These pernet_operations use ebt_register_table() and
ebt_unregister_table() to act on the tables, which
are used as argument in ebt_do_table(), called from
ebtables hooks.

Since there are no net-related bridge packets in-flight,
when the init and exit methods are called, these
pernet_operations are safe to be executed in parallel
with any other.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebtable_broute.c | 1 +
 net/bridge/netfilter/ebtable_filter.c | 1 +
 net/bridge/netfilter/ebtable_nat.c    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 276b60262981..f070b5e5b9dd 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -77,6 +77,7 @@ static void __net_exit broute_net_exit(struct net *net)
 static struct pernet_operations broute_net_ops = {
 	.init = broute_net_init,
 	.exit = broute_net_exit,
+	.async = true,
 };
 
 static int __init ebtable_broute_init(void)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index c41da5fac84f..4151afc8efcc 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -105,6 +105,7 @@ static void __net_exit frame_filter_net_exit(struct net *net)
 static struct pernet_operations frame_filter_net_ops = {
 	.init = frame_filter_net_init,
 	.exit = frame_filter_net_exit,
+	.async = true,
 };
 
 static int __init ebtable_filter_init(void)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 08df7406ecb3..b8da2dfe2ec5 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -105,6 +105,7 @@ static void __net_exit frame_nat_net_exit(struct net *net)
 static struct pernet_operations frame_nat_net_ops = {
 	.init = frame_nat_net_init,
 	.exit = frame_nat_net_exit,
+	.async = true,
 };
 
 static int __init ebtable_nat_init(void)

From 3822034569ac8ae39556e50fa165dc7d4276f452 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:30:50 +0300
Subject: [PATCH 0641/1678] net: Convert log pernet_operations

These pernet_operations use nf_log_set() and nf_log_unset()
in their methods:

	nf_log_bridge_net_ops
	nf_log_arp_net_ops
	nf_log_ipv4_net_ops
	nf_log_ipv6_net_ops
	nf_log_netdev_net_ops

Nobody can send such a packet to a net before it's became
registered, nobody can send a packet after all netdevices
are unregistered. So, these pernet_operations are able
to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/nf_log_bridge.c | 1 +
 net/ipv4/netfilter/nf_log_arp.c      | 1 +
 net/ipv4/netfilter/nf_log_ipv4.c     | 1 +
 net/ipv6/netfilter/nf_log_ipv6.c     | 1 +
 net/netfilter/nf_log_netdev.c        | 1 +
 5 files changed, 5 insertions(+)

diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index bd2b3c78f59b..91bfc2ac055a 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -48,6 +48,7 @@ static void __net_exit nf_log_bridge_net_exit(struct net *net)
 static struct pernet_operations nf_log_bridge_net_ops = {
 	.init = nf_log_bridge_net_init,
 	.exit = nf_log_bridge_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_bridge_init(void)
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index df5c2a2061a4..162293469ac2 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -122,6 +122,7 @@ static void __net_exit nf_log_arp_net_exit(struct net *net)
 static struct pernet_operations nf_log_arp_net_ops = {
 	.init = nf_log_arp_net_init,
 	.exit = nf_log_arp_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_arp_init(void)
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..7a06de140f3c 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -358,6 +358,7 @@ static void __net_exit nf_log_ipv4_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv4_net_ops = {
 	.init = nf_log_ipv4_net_init,
 	.exit = nf_log_ipv4_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_ipv4_init(void)
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index b397a8fe88b9..0220e584589c 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -390,6 +390,7 @@ static void __net_exit nf_log_ipv6_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv6_net_ops = {
 	.init = nf_log_ipv6_net_init,
 	.exit = nf_log_ipv6_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_ipv6_init(void)
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
index 350eb147754d..254c2c6bde48 100644
--- a/net/netfilter/nf_log_netdev.c
+++ b/net/netfilter/nf_log_netdev.c
@@ -47,6 +47,7 @@ static void __net_exit nf_log_netdev_net_exit(struct net *net)
 static struct pernet_operations nf_log_netdev_net_ops = {
 	.init = nf_log_netdev_net_init,
 	.exit = nf_log_netdev_net_exit,
+	.async = true,
 };
 
 static int __init nf_log_netdev_init(void)

From c60a246cd366b5da5a12cc972a25e41bc536706f Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:00 +0300
Subject: [PATCH 0642/1678] net: Convert arp_tables_net_ops and
 ip6_tables_net_ops

These pernet_operations call xt_proto_init() and xt_proto_fini(),
which just register and unregister /proc entries.
They are safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 1 +
 net/ipv6/netfilter/ip6_tables.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e3e420f3ba7b..c36ffce3c812 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1635,6 +1635,7 @@ static void __net_exit arp_tables_net_exit(struct net *net)
 static struct pernet_operations arp_tables_net_ops = {
 	.init = arp_tables_net_init,
 	.exit = arp_tables_net_exit,
+	.async = true,
 };
 
 static int __init arp_tables_init(void)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62358b93bbac..4de8ac1e5af4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1928,6 +1928,7 @@ static void __net_exit ip6_tables_net_exit(struct net *net)
 static struct pernet_operations ip6_tables_net_ops = {
 	.init = ip6_tables_net_init,
 	.exit = ip6_tables_net_exit,
+	.async = true,
 };
 
 static int __init ip6_tables_init(void)

From d217472410a033cdef5b8fba4393b466ee3c3bbc Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:10 +0300
Subject: [PATCH 0643/1678] net: Convert caif_net_ops

Init method just allocates memory for new cfg, and
assigns net_generic(net, caif_net_id). Despite there is
synchronize_rcu() on error path in cfcnfg_create(),
in real this function does not use global lists,
so it looks like this synchronize_rcu() is some legacy
inheritance. Exit method removes caif devices under
rtnl_lock().

There could be a problem, if someone from foreign net
pernet_operations dereference caif_net_id of this net.
It's dereferenced in get_cfcnfg() and caif_device_list().

get_cfcnfg() is used from netdevice notifiers, where
they are called under rtnl_lock(). The notifiers can't
be called from foreign nets pernet_operations. Also,
it's used from caif_disconnect_client() and from
caif_connect_client(). The both of the functions work
with caif socket, and there is the only possibility
to have a socket, when the net is dead. This may happen
only of the socket was created as kern using sk_alloc().
Grep by PF_CAIF shows we do not create kern caif sockets,
so get_cfcnfg() is safe.

caif_device_list() is used in netdevice notifiers and exit
method under rtnl lock. Also, from caif_get() used in
the netdev notifiers and in caif_flow_cb(). The last item
is skb destructor. Since there are no kernel caif sockets
nobody can send net a packet in parallel with init/exit,
so this is also safe.

So, these pernet_operations are safe to be async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e0adcd123f48..7a78268cc572 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -544,6 +544,7 @@ static struct pernet_operations caif_net_ops = {
 	.exit = caif_exit_net,
 	.id   = &caif_net_id,
 	.size = sizeof(struct caif_net),
+	.async = true,
 };
 
 /* Initialize Caif devices list */

From 111da7adc1276d44d20c3973f4b39d62f83df056 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:19 +0300
Subject: [PATCH 0644/1678] net: Convert cangw_pernet_ops

These pernet_operations have a deal with cgw_list,
and the rest of accesses are made under rtnl_lock().
The only exception is cgw_dump_jobs(), which is
accessed under rcu_read_lock(). cgw_dump_jobs() is
called on netlink request, and it does not seem,
foreign pernet_operations want to send a net such
the messages. So, we mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/gw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/can/gw.c b/net/can/gw.c
index 398dd0395ad9..08e97668d5cf 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1010,6 +1010,7 @@ static void __net_exit cangw_pernet_exit(struct net *net)
 static struct pernet_operations cangw_pernet_ops = {
 	.init = cangw_pernet_init,
 	.exit = cangw_pernet_exit,
+	.async = true,
 };
 
 static __init int cgw_module_init(void)

From 5368bd72cd7aacd0e3f80f61e9b2a096fc81cc10 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:28 +0300
Subject: [PATCH 0645/1678] net: Convert dccp_v4_ops

These pernet_operations create and destroy net::dccp::v4_ctl_sk.
It looks like another pernet_operations don't want to send
dccp packets to dying or creating net. Batch method similar
to ipv4/ipv6 sockets and it has to be safe to be executed
in parallel with anything else. So, we mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e65fcb45c3f6..13ad28ab1e79 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1031,6 +1031,7 @@ static struct pernet_operations dccp_v4_ops = {
 	.init	= dccp_v4_init_net,
 	.exit	= dccp_v4_exit_net,
 	.exit_batch = dccp_v4_exit_batch,
+	.async	= true,
 };
 
 static int __init dccp_v4_init(void)

From 16b0c0c4d9a799979b0d3f387821c93bcc301f79 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:37 +0300
Subject: [PATCH 0646/1678] net: Convert dccp_v6_ops

These pernet_operations looks similar to dccp_v4_ops,
and they are also safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv6.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5df7857fc0f3..2f48c020f8c3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1116,6 +1116,7 @@ static struct pernet_operations dccp_v6_ops = {
 	.init   = dccp_v6_init_net,
 	.exit   = dccp_v6_exit_net,
 	.exit_batch = dccp_v6_exit_batch,
+	.async	= true,
 };
 
 static int __init dccp_v6_init(void)

From 6c6c566e6dd80bec6f5bbb9c36e397ed9b109cdb Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:47 +0300
Subject: [PATCH 0647/1678] net: Convert fou_net_ops

These pernet_operations initialize and destroy
pernet net_generic(net, fou_net_id) list.
The rest of net_generic(net, fou_net_id) accesses
may happen after netlink message, and in-tree
pernet_operations do not send FOU_GENL_NAME messages.
So, these pernet_operations are safe to be marked
as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fou.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..d3e1a9af478b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1081,6 +1081,7 @@ static struct pernet_operations fou_net_ops = {
 	.exit = fou_exit_net,
 	.id   = &fou_net_id,
 	.size = sizeof(struct fou_net),
+	.async = true,
 };
 
 static int __init fou_init(void)

From a5a179b6dff15ae0b89cb601b0a0242cff60b8e3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:31:55 +0300
Subject: [PATCH 0648/1678] net: Convert ip_set_net_ops

These pernet_operations initialize and destroy
net_generic(net, ip_set_net_id)-related data.
Since ip_set is under CONFIG_IP_SET, it's easy
to watch drivers, which depend on this config.
All of them are in net/netfilter/ipset directory,
except of net/netfilter/xt_set.c. There are no
more drivers, which use ip_set, and all of
the above don't register another pernet_operations.
Also, there are is no indirect users, as header
file include/linux/netfilter/ipset/ip_set.h does
not define indirect users by something like this:

	#ifdef CONFIG_IP_SET
	extern func(void);
	#else
	static inline func(void);
	#endif

So, there are no more pernet operations, dereferencing
net_generic(net, ip_set_net_id).

ip_set_net_ops are OK to be executed in parallel
for several net, so we mark them as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipset/ip_set_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 975a85a48d39..2523ebe2b3cc 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2094,7 +2094,8 @@ static struct pernet_operations ip_set_net_ops = {
 	.init	= ip_set_net_init,
 	.exit   = ip_set_net_exit,
 	.id	= &ip_set_net_id,
-	.size	= sizeof(struct ip_set_net)
+	.size	= sizeof(struct ip_set_net),
+	.async	= true,
 };
 
 static int __init

From 467d14b3073960645902ea0d18c1cbe645013638 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:32:06 +0300
Subject: [PATCH 0649/1678] net: Convert nf_conntrack_net_ops

These pernet_operations register and unregister sysctl and /proc
entries. Exit batch method also waits till all per-net conntracks
are dead. Thus, they are safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_standalone.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9123fdec5e14..3cdce391362e 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -705,6 +705,7 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
 	.init		= nf_conntrack_pernet_init,
 	.exit_batch	= nf_conntrack_pernet_exit,
+	.async		= true,
 };
 
 static int __init nf_conntrack_standalone_init(void)

From b04a3d098c4ca176849ee579880c63c052ce6776 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:32:15 +0300
Subject: [PATCH 0650/1678] net: Convert ctnetlink_net_ops

These pernet_operations register and unregister
two conntrack notifiers, and they seem to be safe
to be executed in parallel.

General/not related to async pernet_operations JFI:
ctnetlink_net_exit_batch() actions are grouped in batch,
and this could look like there is synchronize_rcu()
is forgotten. But there is synchronize_rcu() on module
exit patch (in ctnetlink_exit()), so this batch may
be reworked as simple .exit method.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dd177ebee9aa..8884d302d33a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3417,6 +3417,7 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations ctnetlink_net_ops = {
 	.init		= ctnetlink_net_init,
 	.exit_batch	= ctnetlink_net_exit_batch,
+	.async		= true,
 };
 
 static int __init ctnetlink_init(void)

From c29babb7fe26e1507e8c236abbfed6e944e42651 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 5 Mar 2018 14:32:23 +0300
Subject: [PATCH 0651/1678] net: Convert proto_gre_net_ops

These pernet_operations register and unregister sysctl.
nf_conntrack_l4proto_gre4->init_net is simple memory
initializer. Also, exit method removes gre keymap_list,
which is per-net. This looks safe to be executed
in parallel with other pernet_operations.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_gre.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d049ea5a3770..9bcd72fe91f9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -406,6 +406,7 @@ static struct pernet_operations proto_gre_net_ops = {
 	.exit = proto_gre_net_exit,
 	.id   = &proto_gre_net_id,
 	.size = sizeof(struct netns_proto_gre),
+	.async = true,
 };
 
 static int __init nf_ct_proto_gre_init(void)

From 1b8b062a99dc76ba55b3ded97e90717768da47a9 Mon Sep 17 00:00:00 2001
From: Corinna Vinschen <vinschen@redhat.com>
Date: Wed, 17 Jan 2018 11:53:39 +0100
Subject: [PATCH 0652/1678] igb: add VF trust infrastructure

* Add a per-VF value to know if a VF is trusted, by default don't
  trust VFs.

* Implement netdev op to trust VFs (igb_ndo_set_vf_trust) and add
  trust status to ndo_get_vf_config output.

* Allow a trusted VF to change MAC and MAC filters even if MAC
  has been administratively set.

Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |  1 +
 drivers/net/ethernet/intel/igb/igb_main.c | 30 ++++++++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 1c6b8d9176a8..55d6f17d5799 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -109,6 +109,7 @@ struct vf_data_storage {
 	u16 pf_qos;
 	u16 tx_rate;
 	bool spoofchk_enabled;
+	bool trusted;
 };
 
 /* Number of unicast MAC filters reserved for the PF in the RAR registers */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b88fae785369..33f23ce99796 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -190,6 +190,8 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf,
+				bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 				 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
@@ -2527,6 +2529,7 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
+	.ndo_set_vf_trust	= igb_ndo_set_vf_trust,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= igb_netpoll,
@@ -6383,6 +6386,9 @@ static int igb_vf_configure(struct igb_adapter *adapter, int vf)
 	/* By default spoof check is enabled for all VFs */
 	adapter->vf_data[vf].spoofchk_enabled = true;
 
+	/* By default VFs are not trusted */
+	adapter->vf_data[vf].trusted = false;
+
 	return 0;
 }
 
@@ -6940,13 +6946,13 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
 		}
 		break;
 	case E1000_VF_MAC_FILTER_ADD:
-		if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
 			dev_warn(&pdev->dev,
 				 "VF %d requested MAC filter but is administratively denied\n",
 				 vf);
 			return -EINVAL;
 		}
-
 		if (!is_valid_ether_addr(addr)) {
 			dev_warn(&pdev->dev,
 				 "VF %d attempted to set invalid MAC filter\n",
@@ -6998,7 +7004,8 @@ static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
 	int ret = 0;
 
 	if (!info) {
-		if (vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
 			dev_warn(&pdev->dev,
 				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
 				 vf);
@@ -8934,6 +8941,22 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 	return 0;
 }
 
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+	if (adapter->vf_data[vf].trusted == setting)
+		return 0;
+
+	adapter->vf_data[vf].trusted = setting;
+
+	dev_info(&adapter->pdev->dev, "VF %u is %strusted\n",
+		 vf, setting ? "" : "not ");
+	return 0;
+}
+
 static int igb_ndo_get_vf_config(struct net_device *netdev,
 				 int vf, struct ifla_vf_info *ivi)
 {
@@ -8947,6 +8970,7 @@ static int igb_ndo_get_vf_config(struct net_device *netdev,
 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
 	ivi->qos = adapter->vf_data[vf].pf_qos;
 	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+	ivi->trusted = adapter->vf_data[vf].trusted;
 	return 0;
 }
 

From 17a0b9add6e954b0a6c0108775df6584886b3b21 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 23 Jan 2018 13:28:41 +0300
Subject: [PATCH 0653/1678] igb: Do not call netif_device_detach() when PCIe
 link goes missing

When the driver notices that PCIe link is gone by reading 0xffffffff
from a register it clears hw->hw_addr and then calls netif_device_detach().
This happens when the PCIe device is physically unplugged for example
the user disconnected the Thunderbolt cable.

However, netif_device_detach() prevents netif_unregister() from bringing
the device down properly including tearing down MSI-X vectors. This
triggers following crash during the driver removal:

  igb 0000:0b:00.0 enp11s0f0: PCIe link lost, device now detached
  ------------[ cut here ]------------
  kernel BUG at drivers/pci/msi.c:352!
  invalid opcode: 0000 [#1] PREEMPT SMP PTI
  ...
  Call Trace:
   pci_disable_msix+0xc9/0xf0
   igb_reset_interrupt_capability+0x58/0x60 [igb]
   igb_remove+0x90/0x100 [igb]
   pci_device_remove+0x31/0xa0
   device_release_driver_internal+0x152/0x210
   pci_stop_bus_device+0x78/0xa0
   pci_stop_bus_device+0x38/0xa0
   pci_stop_bus_device+0x38/0xa0
   pci_stop_bus_device+0x26/0xa0
   pci_stop_bus_device+0x38/0xa0
   pci_stop_and_remove_bus_device+0x9/0x20
   trim_stale_devices+0xee/0x130
   ? _raw_spin_unlock_irqrestore+0xf/0x30
   trim_stale_devices+0x8f/0x130
   ? _raw_spin_unlock_irqrestore+0xf/0x30
   trim_stale_devices+0xa1/0x130
   ? get_slot_status+0x8b/0xc0
   acpiphp_check_bridge.part.7+0xf9/0x140
   acpiphp_hotplug_notify+0x170/0x1f0
   ...

To prevent the crash do not call netif_device_detach() in igb_rd32().
This should be fine because hw->hw_addr is set to NULL preventing future
hardware access of the now missing device.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=198181
Reported-by: Ferenc Boldog <ferenc.boldog@gmail.com>
Reported-by: Nikolay Bogoychev <nheart@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 33f23ce99796..229b72aab17d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -776,8 +776,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
 	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
 		struct net_device *netdev = igb->netdev;
 		hw->hw_addr = NULL;
-		netif_device_detach(netdev);
-		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		netdev_err(netdev, "PCIe link lost\n");
 	}
 
 	return value;

From 0a6f2f05a2f50cdfea863887ffc3ad315fa4daf1 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 6 Feb 2018 20:47:59 +0100
Subject: [PATCH 0654/1678] igb: Fix a test with HWTSTAMP_TX_ON

'HWTSTAMP_TX_ON' should be handled as a value, not as a bit mask.
The modified code should behave the same, because HWTSTAMP_TX_ON is 1
and no other possible values of 'tx_type' would match the test.
However, this is more future-proof, should other values be allowed one day.

See 'struct hwtstamp_config' in 'include/uapi/linux/net_tstamp.h'

This fixes a warning reported by smatch:
   igb_xmit_frame_ring() warn: bit shifter 'HWTSTAMP_TX_ON' used for logical '&'

Fixes: 26bd4e2db06be ("igb: protect TX timestamping from API misuse")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 229b72aab17d..715bb32e6901 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5749,7 +5749,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 
-		if (adapter->tstamp_config.tx_type & HWTSTAMP_TX_ON &&
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
 		    !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
 					   &adapter->state)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;

From 76b12974a3981db2a1ae60d62f55dd839d07ac85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sun, 4 Mar 2018 03:29:51 +0100
Subject: [PATCH 0655/1678] net: core: dst_cache: Fix a typo in a comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index 72fd5067c353..844906fbf8c9 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -71,7 +71,7 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
  *	dst_cache_reset - invalidate the cache contents
  *	@dst_cache: the cache
  *
- *	This do not free the cached dst to avoid races and contentions.
+ *	This does not free the cached dst to avoid races and contentions.
  *	the dst will be freed on later cache lookup.
  */
 static inline void dst_cache_reset(struct dst_cache *dst_cache)

From 4c1342d967cb556ea1c0f34271b125deeb25f0f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sun, 4 Mar 2018 03:29:52 +0100
Subject: [PATCH 0656/1678] net: core: dst_cache_set_ip6: Rename 'addr'
 parameter to 'saddr' for consistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The other dst_cache_{get,set}_ip{4,6} functions, and the doc comment for
dst_cache_set_ip6 use 'saddr' for their source address parameter. Rename
the parameter to increase consistency.

This fixes the following kernel-doc warnings:

./include/net/dst_cache.h:58: warning: Function parameter or member 'addr' not described in 'dst_cache_set_ip6'
./include/net/dst_cache.h:58: warning: Excess function parameter 'saddr' description in 'dst_cache_set_ip6'

Fixes: 911362c70df5 ("net: add dst_cache support")
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst_cache.h | 2 +-
 net/core/dst_cache.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index 844906fbf8c9..67634675e919 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -54,7 +54,7 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
  *	local BH must be disabled.
  */
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-		       const struct in6_addr *addr);
+		       const struct in6_addr *saddr);
 
 /**
  *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 554d36449231..64cef977484a 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -107,7 +107,7 @@ EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
 
 #if IS_ENABLED(CONFIG_IPV6)
 void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
-		       const struct in6_addr *addr)
+		       const struct in6_addr *saddr)
 {
 	struct dst_cache_pcpu *idst;
 
@@ -117,7 +117,7 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
 	idst = this_cpu_ptr(dst_cache->cache);
 	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
 				  rt6_get_cookie((struct rt6_info *)dst));
-	idst->in6_saddr = *addr;
+	idst->in6_saddr = *saddr;
 }
 EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
 

From 8eb1a8590f5ca114fabf16ebb26a4bce0255ace9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sun, 4 Mar 2018 03:29:53 +0100
Subject: [PATCH 0657/1678] net: core: dst: Add kernel-doc for 'net' parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the following kernel-doc warning:

./include/net/dst.h:366: warning: Function parameter or member 'net' not described in 'skb_tunnel_rx'

Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path")
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/dst.h b/include/net/dst.h
index c63d2c37f6e9..b3219cd8a5a1 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -356,6 +356,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
  *	skb_tunnel_rx - prepare skb for rx reinsert
  *	@skb: buffer
  *	@dev: tunnel device
+ *	@net: netns for packet i/o
  *
  *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
  *	so make some cleanups, and perform accounting.

From 01d049366529544f1df44139f5ca225d4c36ec31 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Mon, 5 Mar 2018 15:16:50 +0100
Subject: [PATCH 0658/1678] net: mvpp2: use the same buffer pool for all ports

This patch configures the buffer manager long pool for all ports part of
the same CP. Long pool separation between ports is redundant since there
are no performance improvement when different pools are used.

Signed-off-by: Stefan Chulski <stefanc@marvell.com>
[Antoine: cosmetic cleanup, commit message]
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 76 ++++++++++++++--------------
 1 file changed, 37 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9418f6eed086..d6cce0fcc55c 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -808,23 +808,23 @@ enum mvpp2_prs_l3_cast {
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* BM constants */
-#define MVPP2_BM_POOLS_NUM		8
 #define MVPP2_BM_LONG_BUF_NUM		1024
 #define MVPP2_BM_SHORT_BUF_NUM		2048
 #define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
 #define MVPP2_BM_POOL_PTR_ALIGN		128
-#define MVPP2_BM_SWF_LONG_POOL(port)	((port > 2) ? 2 : port)
-#define MVPP2_BM_SWF_SHORT_POOL		3
 
 /* BM cookie (32 bits) definition */
 #define MVPP2_BM_COOKIE_POOL_OFFS	8
 #define MVPP2_BM_COOKIE_CPU_OFFS	24
 
+#define MVPP2_BM_SHORT_FRAME_SIZE		512
+#define MVPP2_BM_LONG_FRAME_SIZE		2048
 /* BM short pool packet size
  * These value assure that for SWF the total number
  * of bytes allocated for each buffer will be 512
  */
-#define MVPP2_BM_SHORT_PKT_SIZE		MVPP2_RX_MAX_PKT_SIZE(512)
+#define MVPP2_BM_SHORT_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_SHORT_FRAME_SIZE)
+#define MVPP2_BM_LONG_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_LONG_FRAME_SIZE)
 
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
@@ -832,12 +832,17 @@ enum mvpp2_prs_l3_cast {
 #define MVPP2_MAX_THREADS		8
 #define MVPP2_MAX_QVECS			MVPP2_MAX_THREADS
 
-enum mvpp2_bm_type {
-	MVPP2_BM_FREE,
-	MVPP2_BM_SWF_LONG,
-	MVPP2_BM_SWF_SHORT
+enum mvpp2_bm_pool_log_num {
+	MVPP2_BM_SHORT,
+	MVPP2_BM_LONG,
+	MVPP2_BM_POOLS_NUM
 };
 
+static struct {
+	int pkt_size;
+	int buf_num;
+} mvpp2_pools[MVPP2_BM_POOLS_NUM];
+
 /* GMAC MIB Counters register definitions */
 #define MVPP21_MIB_COUNTERS_OFFSET		0x1000
 #define MVPP21_MIB_COUNTERS_PORT_SZ		0x400
@@ -1266,7 +1271,6 @@ struct mvpp2_cls_lookup_entry {
 struct mvpp2_bm_pool {
 	/* Pool number in the range 0-7 */
 	int id;
-	enum mvpp2_bm_type type;
 
 	/* Buffer Pointers Pool External (BPPE) size */
 	int size;
@@ -4195,7 +4199,6 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
 	val |= MVPP2_BM_START_MASK;
 	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
 
-	bm_pool->type = MVPP2_BM_FREE;
 	bm_pool->size = size;
 	bm_pool->pkt_size = 0;
 	bm_pool->buf_num = 0;
@@ -4345,6 +4348,17 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
 	return 0;
 }
 
+static void mvpp2_setup_bm_pool(void)
+{
+	/* Short pool */
+	mvpp2_pools[MVPP2_BM_SHORT].buf_num  = MVPP2_BM_SHORT_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_SHORT].pkt_size = MVPP2_BM_SHORT_PKT_SIZE;
+
+	/* Long pool */
+	mvpp2_pools[MVPP2_BM_LONG].buf_num  = MVPP2_BM_LONG_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_LONG].pkt_size = MVPP2_BM_LONG_PKT_SIZE;
+}
+
 /* Attach long pool to rxq */
 static void mvpp2_rxq_long_pool_set(struct mvpp2_port *port,
 				    int lrxq, int long_pool)
@@ -4483,13 +4497,11 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
 	bm_pool->buf_num += i;
 
 	netdev_dbg(port->dev,
-		   "%s pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
-		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   "pool %d: pkt_size=%4d, buf_size=%4d, total_size=%4d\n",
 		   bm_pool->id, bm_pool->pkt_size, buf_size, total_size);
 
 	netdev_dbg(port->dev,
-		   "%s pool %d: %d of %d buffers added\n",
-		   bm_pool->type == MVPP2_BM_SWF_SHORT ? "short" : " long",
+		   "pool %d: %d of %d buffers added\n",
 		   bm_pool->id, i, buf_num);
 	return i;
 }
@@ -4498,25 +4510,20 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
  * pool pointer on success
  */
 static struct mvpp2_bm_pool *
-mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
-		  int pkt_size)
+mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 {
 	struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
 	int num;
 
-	if (new_pool->type != MVPP2_BM_FREE && new_pool->type != type) {
-		netdev_err(port->dev, "mixing pool types is forbidden\n");
+	if (pool >= MVPP2_BM_POOLS_NUM) {
+		netdev_err(port->dev, "Invalid pool %d\n", pool);
 		return NULL;
 	}
 
-	if (new_pool->type == MVPP2_BM_FREE)
-		new_pool->type = type;
-
 	/* Allocate buffers in case BM pool is used as long pool, but packet
 	 * size doesn't match MTU or BM pool hasn't being used yet
 	 */
-	if (((type == MVPP2_BM_SWF_LONG) && (pkt_size > new_pool->pkt_size)) ||
-	    (new_pool->pkt_size == 0)) {
+	if (new_pool->pkt_size == 0) {
 		int pkts_num;
 
 		/* Set default buffer number or free all the buffers in case
@@ -4524,9 +4531,7 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type,
 		 */
 		pkts_num = new_pool->buf_num;
 		if (pkts_num == 0)
-			pkts_num = type == MVPP2_BM_SWF_LONG ?
-				   MVPP2_BM_LONG_BUF_NUM :
-				   MVPP2_BM_SHORT_BUF_NUM;
+			pkts_num = mvpp2_pools[pool].buf_num;
 		else
 			mvpp2_bm_bufs_free(port->dev->dev.parent,
 					   port->priv, new_pool);
@@ -4558,9 +4563,8 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 	if (!port->pool_long) {
 		port->pool_long =
-		       mvpp2_bm_pool_use(port, MVPP2_BM_SWF_LONG_POOL(port->id),
-					 MVPP2_BM_SWF_LONG,
-					 port->pkt_size);
+			mvpp2_bm_pool_use(port, MVPP2_BM_LONG,
+					  mvpp2_pools[MVPP2_BM_LONG].pkt_size);
 		if (!port->pool_long)
 			return -ENOMEM;
 
@@ -4572,9 +4576,8 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 	if (!port->pool_short) {
 		port->pool_short =
-			mvpp2_bm_pool_use(port, MVPP2_BM_SWF_SHORT_POOL,
-					  MVPP2_BM_SWF_SHORT,
-					  MVPP2_BM_SHORT_PKT_SIZE);
+			mvpp2_bm_pool_use(port, MVPP2_BM_SHORT,
+					  mvpp2_pools[MVPP2_BM_SHORT].pkt_size);
 		if (!port->pool_short)
 			return -ENOMEM;
 
@@ -4593,7 +4596,6 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct mvpp2_bm_pool *port_pool = port->pool_long;
 	int num, pkts_num = port_pool->buf_num;
-	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
 	/* Update BM pool with new buffer size */
 	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
@@ -4602,18 +4604,12 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 		return -EIO;
 	}
 
-	port_pool->pkt_size = pkt_size;
-	port_pool->frag_size = SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
-		MVPP2_SKB_SHINFO_SIZE;
 	num = mvpp2_bm_bufs_add(port, port_pool, pkts_num);
 	if (num != pkts_num) {
 		WARN(1, "pool %d: %d of %d allocated\n",
 		     port_pool->id, num, pkts_num);
 		return -EIO;
 	}
-
-	mvpp2_bm_pool_bufsize_set(port->priv, port_pool,
-				  MVPP2_RX_BUF_SIZE(port_pool->pkt_size));
 	dev->mtu = mtu;
 	netdev_update_features(dev);
 	return 0;
@@ -8630,6 +8626,8 @@ static int mvpp2_probe(struct platform_device *pdev)
 			priv->sysctrl_base = NULL;
 	}
 
+	mvpp2_setup_bm_pool();
+
 	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		u32 addr_space_sz;
 

From effbf5f58d64b1d1f93cb687d9797b42f291d5fd Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Mon, 5 Mar 2018 15:16:51 +0100
Subject: [PATCH 0659/1678] net: mvpp2: update the BM buffer free/destroy logic

The buffer free routine is updated to release only given a number of
buffers, and the destroy routine now checks the actual number of buffers
in the (BPPI and BPPE) HW counters before draining the pools. This
change helps getting jumbo frames support.

Signed-off-by: Stefan Chulski <stefanc@marvell.com>
[Antoine: cosmetic cleanup, commit message]
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 45 +++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index d6cce0fcc55c..c7b8093395c9 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -258,6 +258,7 @@
 #define MVPP2_BM_BPPI_READ_PTR_REG(pool)	(0x6100 + ((pool) * 4))
 #define MVPP2_BM_BPPI_PTRS_NUM_REG(pool)	(0x6140 + ((pool) * 4))
 #define     MVPP2_BM_BPPI_PTR_NUM_MASK		0x7ff
+#define MVPP22_BM_POOL_PTRS_NUM_MASK		0xfff8
 #define     MVPP2_BM_BPPI_PREFETCH_FULL_MASK	BIT(16)
 #define MVPP2_BM_POOL_CTRL_REG(pool)		(0x6200 + ((pool) * 4))
 #define     MVPP2_BM_START_MASK			BIT(0)
@@ -4251,11 +4252,17 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv,
 
 /* Free all buffers from the pool */
 static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
-			       struct mvpp2_bm_pool *bm_pool)
+			       struct mvpp2_bm_pool *bm_pool, int buf_num)
 {
 	int i;
 
-	for (i = 0; i < bm_pool->buf_num; i++) {
+	if (buf_num > bm_pool->buf_num) {
+		WARN(1, "Pool does not have so many bufs pool(%d) bufs(%d)\n",
+		     bm_pool->id, buf_num);
+		buf_num = bm_pool->buf_num;
+	}
+
+	for (i = 0; i < buf_num; i++) {
 		dma_addr_t buf_dma_addr;
 		phys_addr_t buf_phys_addr;
 		void *data;
@@ -4277,16 +4284,39 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
 	bm_pool->buf_num -= i;
 }
 
+/* Check number of buffers in BM pool */
+int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+{
+	int buf_num = 0;
+
+	buf_num += mvpp2_read(priv, MVPP2_BM_POOL_PTRS_NUM_REG(bm_pool->id)) &
+				    MVPP22_BM_POOL_PTRS_NUM_MASK;
+	buf_num += mvpp2_read(priv, MVPP2_BM_BPPI_PTRS_NUM_REG(bm_pool->id)) &
+				    MVPP2_BM_BPPI_PTR_NUM_MASK;
+
+	/* HW has one buffer ready which is not reflected in the counters */
+	if (buf_num)
+		buf_num += 1;
+
+	return buf_num;
+}
+
 /* Cleanup pool */
 static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
 				 struct mvpp2 *priv,
 				 struct mvpp2_bm_pool *bm_pool)
 {
+	int buf_num;
 	u32 val;
 
-	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool);
-	if (bm_pool->buf_num) {
-		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+	mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
+
+	/* Check buffer counters after free */
+	buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
+	if (buf_num) {
+		WARN(1, "cannot free all buffers in pool %d, buf_num left %d\n",
+		     bm_pool->id, bm_pool->buf_num);
 		return 0;
 	}
 
@@ -4534,7 +4564,7 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 			pkts_num = mvpp2_pools[pool].buf_num;
 		else
 			mvpp2_bm_bufs_free(port->dev->dev.parent,
-					   port->priv, new_pool);
+					   port->priv, new_pool, pkts_num);
 
 		new_pool->pkt_size = pkt_size;
 		new_pool->frag_size =
@@ -4598,7 +4628,8 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 	int num, pkts_num = port_pool->buf_num;
 
 	/* Update BM pool with new buffer size */
-	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool);
+	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool,
+			   port_pool->buf_num);
 	if (port_pool->buf_num) {
 		WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
 		return -EIO;

From 93ff130f1c2bae00c0378f065e441e988b0cd1e7 Mon Sep 17 00:00:00 2001
From: Yan Markman <ymarkman@marvell.com>
Date: Mon, 5 Mar 2018 15:16:52 +0100
Subject: [PATCH 0660/1678] net: mvpp2: use a data size of 10kB for Tx FIFO on
 port 0

This patch sets the Tx FIFO data size on port 0 to 10kB. This prepares
the PPv2 driver for the Jumbo frame support addition as the hardware
will need big enough Tx FIFO buffers when dealing with frames going
through an interface with an MTU of 9000.

Signed-off-by: Yan Markman <ymarkman@marvell.com>
[Antoine: commit message, small reworks.]
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index c7b8093395c9..c37d2be8496b 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -44,6 +44,7 @@
 #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port)	(0x20 + 4 * (port))
 #define MVPP2_RX_MIN_PKT_SIZE_REG		0x60
 #define MVPP2_RX_FIFO_INIT_REG			0x64
+#define MVPP22_TX_FIFO_THRESH_REG(port)		(0x8840 + 4 * (port))
 #define MVPP22_TX_FIFO_SIZE_REG(port)		(0x8860 + 4 * (port))
 
 /* RX DMA Top Registers */
@@ -542,6 +543,11 @@
 /* TX FIFO constants */
 #define MVPP22_TX_FIFO_DATA_SIZE_10KB		0xa
 #define MVPP22_TX_FIFO_DATA_SIZE_3KB		0x3
+#define MVPP2_TX_FIFO_THRESHOLD_MIN		256
+#define MVPP2_TX_FIFO_THRESHOLD_10KB	\
+	(MVPP22_TX_FIFO_DATA_SIZE_10KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
+#define MVPP2_TX_FIFO_THRESHOLD_3KB	\
+	(MVPP22_TX_FIFO_DATA_SIZE_3KB * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN)
 
 /* RX buffer constants */
 #define MVPP2_SKB_SHINFO_SIZE \
@@ -8456,14 +8462,25 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1);
 }
 
-/* Initialize Tx FIFO's */
+/* Initialize Tx FIFO's: the total FIFO size is 19kB on PPv2.2 and 10G
+ * interfaces must have a Tx FIFO size of 10kB. As only port 0 can do 10G,
+ * configure its Tx FIFO size to 10kB and the others ports Tx FIFO size to 3kB.
+ */
 static void mvpp22_tx_fifo_init(struct mvpp2 *priv)
 {
-	int port;
+	int port, size, thrs;
 
-	for (port = 0; port < MVPP2_MAX_PORTS; port++)
-		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port),
-			    MVPP22_TX_FIFO_DATA_SIZE_3KB);
+	for (port = 0; port < MVPP2_MAX_PORTS; port++) {
+		if (port == 0) {
+			size = MVPP22_TX_FIFO_DATA_SIZE_10KB;
+			thrs = MVPP2_TX_FIFO_THRESHOLD_10KB;
+		} else {
+			size = MVPP22_TX_FIFO_DATA_SIZE_3KB;
+			thrs = MVPP2_TX_FIFO_THRESHOLD_3KB;
+		}
+		mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), size);
+		mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), thrs);
+	}
 }
 
 static void mvpp2_axi_init(struct mvpp2 *priv)

From 381c56712db43af1112a21cce6bdc37281890cdb Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Mon, 5 Mar 2018 15:16:53 +0100
Subject: [PATCH 0661/1678] net: mvpp2: enable UDP/TCP checksum over IPv6

This patch adds the NETIF_F_IPV6_CSUM to the driver's features to enable
UDP/TCP checksum over IPv6. No extra configuration of the engine is
needed on top of the IPv4 counterpart, which already is in the features
list (NETIF_F_IP_CSUM).

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index c37d2be8496b..ede4f6af0159 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -8321,7 +8321,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		}
 	}
 
-	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		   NETIF_F_TSO;
 	dev->features = features | NETIF_F_RXCSUM;
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
 			    NETIF_F_HW_VLAN_CTAG_FILTER;

From 576193f2d57904cb78454d7b73eecfcac74fdf22 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Mon, 5 Mar 2018 15:16:54 +0100
Subject: [PATCH 0662/1678] net: mvpp2: jumbo frames support

This patch adds the support for jumbo frames in the Marvell PPv2 driver.
A third buffer pool is added with 10KB buffers, which is used if the MTU
is higher than 1518B for packets larger than 1518B. Please note only the
port 0 supports hardware checksum offload due to the Tx FIFO size
limitation.

Signed-off-by: Stefan Chulski <stefanc@marvell.com>
[Antoine: cosmetic cleanup, commit message]
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 98 +++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index ede4f6af0159..ac0a0dc8f157 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -815,6 +815,7 @@ enum mvpp2_prs_l3_cast {
 #define MVPP22_RSS_TABLE_ENTRIES	32
 
 /* BM constants */
+#define MVPP2_BM_JUMBO_BUF_NUM		512
 #define MVPP2_BM_LONG_BUF_NUM		1024
 #define MVPP2_BM_SHORT_BUF_NUM		2048
 #define MVPP2_BM_POOL_SIZE_MAX		(16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
@@ -826,12 +827,14 @@ enum mvpp2_prs_l3_cast {
 
 #define MVPP2_BM_SHORT_FRAME_SIZE		512
 #define MVPP2_BM_LONG_FRAME_SIZE		2048
+#define MVPP2_BM_JUMBO_FRAME_SIZE		10240
 /* BM short pool packet size
  * These value assure that for SWF the total number
  * of bytes allocated for each buffer will be 512
  */
 #define MVPP2_BM_SHORT_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_SHORT_FRAME_SIZE)
 #define MVPP2_BM_LONG_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_LONG_FRAME_SIZE)
+#define MVPP2_BM_JUMBO_PKT_SIZE	MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_JUMBO_FRAME_SIZE)
 
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
@@ -842,6 +845,7 @@ enum mvpp2_prs_l3_cast {
 enum mvpp2_bm_pool_log_num {
 	MVPP2_BM_SHORT,
 	MVPP2_BM_LONG,
+	MVPP2_BM_JUMBO,
 	MVPP2_BM_POOLS_NUM
 };
 
@@ -4393,6 +4397,10 @@ static void mvpp2_setup_bm_pool(void)
 	/* Long pool */
 	mvpp2_pools[MVPP2_BM_LONG].buf_num  = MVPP2_BM_LONG_BUF_NUM;
 	mvpp2_pools[MVPP2_BM_LONG].pkt_size = MVPP2_BM_LONG_PKT_SIZE;
+
+	/* Jumbo pool */
+	mvpp2_pools[MVPP2_BM_JUMBO].buf_num  = MVPP2_BM_JUMBO_BUF_NUM;
+	mvpp2_pools[MVPP2_BM_JUMBO].pkt_size = MVPP2_BM_JUMBO_PKT_SIZE;
 }
 
 /* Attach long pool to rxq */
@@ -4596,15 +4604,28 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
 static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 {
 	int rxq;
+	enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
+
+	/* If port pkt_size is higher than 1518B:
+	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+	 */
+	if (port->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
+		long_log_pool = MVPP2_BM_JUMBO;
+		short_log_pool = MVPP2_BM_LONG;
+	} else {
+		long_log_pool = MVPP2_BM_LONG;
+		short_log_pool = MVPP2_BM_SHORT;
+	}
 
 	if (!port->pool_long) {
 		port->pool_long =
-			mvpp2_bm_pool_use(port, MVPP2_BM_LONG,
-					  mvpp2_pools[MVPP2_BM_LONG].pkt_size);
+			mvpp2_bm_pool_use(port, long_log_pool,
+					  mvpp2_pools[long_log_pool].pkt_size);
 		if (!port->pool_long)
 			return -ENOMEM;
 
-		port->pool_long->port_map |= (1 << port->id);
+		port->pool_long->port_map |= BIT(port->id);
 
 		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
@@ -4612,12 +4633,12 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 	if (!port->pool_short) {
 		port->pool_short =
-			mvpp2_bm_pool_use(port, MVPP2_BM_SHORT,
-					  mvpp2_pools[MVPP2_BM_SHORT].pkt_size);
+			mvpp2_bm_pool_use(port, short_log_pool,
+					  mvpp2_pools[long_log_pool].pkt_size);
 		if (!port->pool_short)
 			return -ENOMEM;
 
-		port->pool_short->port_map |= (1 << port->id);
+		port->pool_short->port_map |= BIT(port->id);
 
 		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_short_pool_set(port, rxq,
@@ -4630,24 +4651,49 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2_bm_pool *port_pool = port->pool_long;
-	int num, pkts_num = port_pool->buf_num;
+	enum mvpp2_bm_pool_log_num new_long_pool;
+	int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
 
-	/* Update BM pool with new buffer size */
-	mvpp2_bm_bufs_free(dev->dev.parent, port->priv, port_pool,
-			   port_pool->buf_num);
-	if (port_pool->buf_num) {
-		WARN(1, "cannot free all buffers in pool %d\n", port_pool->id);
-		return -EIO;
+	/* If port MTU is higher than 1518B:
+	 * HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
+	 * else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
+	 */
+	if (pkt_size > MVPP2_BM_LONG_PKT_SIZE)
+		new_long_pool = MVPP2_BM_JUMBO;
+	else
+		new_long_pool = MVPP2_BM_LONG;
+
+	if (new_long_pool != port->pool_long->id) {
+		/* Remove port from old short & long pool */
+		port->pool_long = mvpp2_bm_pool_use(port, port->pool_long->id,
+						    port->pool_long->pkt_size);
+		port->pool_long->port_map &= ~BIT(port->id);
+		port->pool_long = NULL;
+
+		port->pool_short = mvpp2_bm_pool_use(port, port->pool_short->id,
+						     port->pool_short->pkt_size);
+		port->pool_short->port_map &= ~BIT(port->id);
+		port->pool_short = NULL;
+
+		port->pkt_size =  pkt_size;
+
+		/* Add port to new short & long pool */
+		mvpp2_swf_bm_pool_init(port);
+
+		/* Update L4 checksum when jumbo enable/disable on port */
+		if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) {
+			dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+			dev->hw_features &= ~(NETIF_F_IP_CSUM |
+					      NETIF_F_IPV6_CSUM);
+		} else {
+			dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+			dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+		}
 	}
 
-	num = mvpp2_bm_bufs_add(port, port_pool, pkts_num);
-	if (num != pkts_num) {
-		WARN(1, "pool %d: %d of %d allocated\n",
-		     port_pool->id, num, pkts_num);
-		return -EIO;
-	}
 	dev->mtu = mtu;
+	dev->wanted_features = dev->features;
+
 	netdev_update_features(dev);
 	return 0;
 }
@@ -8326,13 +8372,19 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->features = features | NETIF_F_RXCSUM;
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
 			    NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (port->pool_long->id == MVPP2_BM_JUMBO && port->id != 0) {
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+		dev->hw_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+	}
+
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
 
-	/* MTU range: 68 - 9676 */
+	/* MTU range: 68 - 9704 */
 	dev->min_mtu = ETH_MIN_MTU;
-	/* 9676 == 9700 - 20 and rounding to 8 */
-	dev->max_mtu = 9676;
+	/* 9704 == 9728 - 20 and rounding to 8 */
+	dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
 
 	err = register_netdev(dev);
 	if (err < 0) {

From ae0662f84b105776734cb089703a7bf834bac195 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Sat, 20 Jan 2018 04:27:58 +0800
Subject: [PATCH 0663/1678] netfilter: nf_tables:
 nf_tables_obj_lookup_byhandle() can be static

Fixes: 3ecbfd65f50e ("netfilter: nf_tables: allocate handle and delete objects via handle")
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8b9fe30de0cd..8cc7fc970f0c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4328,9 +4328,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 }
 EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
 
-struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
-						 const struct nlattr *nla,
-						 u32 objtype, u8 genmask)
+static struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
+							const struct nlattr *nla,
+							u32 objtype, u8 genmask)
 {
 	struct nft_object *obj;
 
@@ -4850,7 +4850,7 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
 }
 EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
 
-struct nft_flowtable *
+static struct nft_flowtable *
 nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
 				    const struct nlattr *nla, u8 genmask)
 {

From cceae76ef3a1181242e4f7b559a7bfc904a9855c Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 11 Feb 2018 19:17:20 +0900
Subject: [PATCH 0664/1678] netfilter: nfnetlink_acct: remove useless parameter

parameter skb in nfnl_acct_overquota is not used anywhere.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_acct.h | 3 +--
 net/netfilter/nfnetlink_acct.c           | 3 +--
 net/netfilter/xt_nfacct.c                | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index b4d741195c28..beee8bffe49e 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -16,6 +16,5 @@ struct nf_acct;
 struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
-			struct nf_acct *nfacct);
+int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 88d427f9f9e6..b9505bcd3827 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -467,8 +467,7 @@ static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
 			  GFP_ATOMIC);
 }
 
-int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
-			struct nf_acct *nfacct)
+int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct)
 {
 	u64 now;
 	u64 *quota;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index c8674deed4eb..6b56f4170860 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -28,7 +28,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct);
+	overquota = nfnl_acct_overquota(xt_net(par), info->nfacct);
 
 	return overquota == NFACCT_UNDERQUOTA ? false : true;
 }

From 580c7d9e4cc69802189b872ad2df0d704c649441 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 11 Feb 2018 22:57:29 +0900
Subject: [PATCH 0665/1678] netfilter: xt_cluster: get rid of
 xt_cluster_ipv6_is_multicast

If use the ipv6_addr_is_multicast instead of xt_cluster_ipv6_is_multicast,
then we can reduce code size.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_cluster.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 0068688995c8..dfbdbb2fc0ed 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -59,13 +59,6 @@ xt_cluster_hash(const struct nf_conn *ct,
 	return reciprocal_scale(hash, info->total_nodes);
 }
 
-static inline bool
-xt_cluster_ipv6_is_multicast(const struct in6_addr *addr)
-{
-	__be32 st = addr->s6_addr32[0];
-	return ((st & htonl(0xFF000000)) == htonl(0xFF000000));
-}
-
 static inline bool
 xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 {
@@ -76,8 +69,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 		is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr);
 		break;
 	case NFPROTO_IPV6:
-		is_multicast =
-			xt_cluster_ipv6_is_multicast(&ipv6_hdr(skb)->daddr);
+		is_multicast = ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr);
 		break;
 	default:
 		WARN_ON(1);

From 433029ecc62788296cacca50ceb24db90c17a4a2 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 11 Feb 2018 23:28:18 +0900
Subject: [PATCH 0666/1678] netfilter: nf_conntrack_broadcast: remove useless
 parameter

parameter protoff in nf_conntrack_broadcast_help is not used anywhere.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 3 +--
 net/netfilter/nf_conntrack_broadcast.c      | 1 -
 net/netfilter/nf_conntrack_netbios_ns.c     | 5 +++--
 net/netfilter/nf_conntrack_snmp.c           | 5 +++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index fc39bbaf107c..32c2a94a219d 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -132,8 +132,7 @@ void nf_conntrack_helper_pernet_fini(struct net *net);
 int nf_conntrack_helper_init(void);
 void nf_conntrack_helper_fini(void);
 
-int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int protoff,
-				struct nf_conn *ct,
+int nf_conntrack_broadcast_help(struct sk_buff *skb, struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
 				unsigned int timeout);
 
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index ecc3ab784633..a1086bdec242 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -20,7 +20,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 
 int nf_conntrack_broadcast_help(struct sk_buff *skb,
-				unsigned int protoff,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
 				unsigned int timeout)
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 496ce173f0c1..a4a59dc7cf17 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -41,9 +41,10 @@ static struct nf_conntrack_expect_policy exp_policy = {
 };
 
 static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
-		   struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+			   struct nf_conn *ct,
+			   enum ip_conntrack_info ctinfo)
 {
-	return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+	return nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
 }
 
 static struct nf_conntrack_helper helper __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 87b95a2c270c..2d0f8e010821 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -36,11 +36,12 @@ int (*nf_nat_snmp_hook)(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
 
 static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
-		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+			       struct nf_conn *ct,
+			       enum ip_conntrack_info ctinfo)
 {
 	typeof(nf_nat_snmp_hook) nf_nat_snmp;
 
-	nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+	nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout);
 
 	nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
 	if (nf_nat_snmp && ct->status & IPS_NAT_MASK)

From 2db3fec507bd822ed81c031221b97701238949dc Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Tue, 13 Feb 2018 08:25:57 -0600
Subject: [PATCH 0667/1678] netfilter: ipt_ah: return boolean instead of
 integer

Return statements in functions returning bool should use
true/false instead of 1/0.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_ah.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index a787d07f6cb7..7c6c20eaf4db 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -47,7 +47,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 */
 		pr_debug("Dropping evil AH tinygram.\n");
 		par->hotdrop = true;
-		return 0;
+		return false;
 	}
 
 	return spi_match(ahinfo->spis[0], ahinfo->spis[1],

From f31e5f1a891f989f107e8caa6b49dd4df0e12265 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 16 Feb 2018 18:04:56 +0800
Subject: [PATCH 0668/1678] netfilter: unlock xt_table earlier in __do_replace

Now it's doing cleanup_entry for oldinfo under the xt_table lock,
but it's not really necessary. After the replacement job is done
in xt_replace_table, oldinfo is not used elsewhere any more, and
it can be freed without xt_table lock safely.

The important thing is that rtnl_lock is called in some xt_target
destroy, which means rtnl_lock, a big lock is used in xt_table
lock, a smaller one. It usually could be the reason why a dead
lock may happen.

Besides, all xt_target/match checkentry is called out of xt_table
lock. It's better also to move all cleanup_entry calling out of
xt_table lock, just as do_replace_finish does for ebtables.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 3 ++-
 net/ipv4/netfilter/ip_tables.c  | 3 ++-
 net/ipv6/netfilter/ip6_tables.c | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c36ffce3c812..a0c7ce76879c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -925,6 +925,8 @@ static int __do_replace(struct net *net, const char *name,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
+	xt_table_unlock(t);
+
 	get_old_counters(oldinfo, counters);
 
 	/* Decrease module usage counts and free resource */
@@ -939,7 +941,6 @@ static int __do_replace(struct net *net, const char *name,
 		net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
 	}
 	vfree(counters);
-	xt_table_unlock(t);
 	return ret;
 
  put_module:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d4f7584d2dbe..4f7153e25e0b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1087,6 +1087,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
+	xt_table_unlock(t);
+
 	get_old_counters(oldinfo, counters);
 
 	/* Decrease module usage counts and free resource */
@@ -1100,7 +1102,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 		net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
 	}
 	vfree(counters);
-	xt_table_unlock(t);
 	return ret;
 
  put_module:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4de8ac1e5af4..6c44033decab 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1105,6 +1105,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
+	xt_table_unlock(t);
+
 	get_old_counters(oldinfo, counters);
 
 	/* Decrease module usage counts and free resource */
@@ -1118,7 +1120,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 		net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
 	}
 	vfree(counters);
-	xt_table_unlock(t);
 	return ret;
 
  put_module:

From 07a9da51b4b6aece8bc71e0b1b601fc4c3eb8b64 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:27 +0100
Subject: [PATCH 0669/1678] netfilter: x_tables: check standard verdicts in
 core

Userspace must provide a valid verdict to the standard target.

The verdict can be either a jump (signed int > 0), or a return code.

Allowed return codes are either RETURN (pop from stack), NF_ACCEPT, DROP
and QUEUE (latter is allowed for legacy reasons).

Jump offsets (verdict > 0) are checked in more detail later on when
loop-detection is performed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c |  5 ----
 net/ipv4/netfilter/ip_tables.c  |  5 ----
 net/ipv6/netfilter/ip6_tables.c |  5 ----
 net/netfilter/x_tables.c        | 49 +++++++++++++++++++++++++++++----
 4 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a0c7ce76879c..c9ffa884a4ee 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -334,11 +334,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 			     t->verdict < 0) || visited) {
 				unsigned int oldpos, size;
 
-				if ((strcmp(t->target.u.user.name,
-					    XT_STANDARD_TARGET) == 0) &&
-				    t->verdict < -NF_MAX_VERDICT - 1)
-					return 0;
-
 				/* Return: backtrack through the last
 				 * big jump.
 				 */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4f7153e25e0b..c9b57a6bf96a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -402,11 +402,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			     t->verdict < 0) || visited) {
 				unsigned int oldpos, size;
 
-				if ((strcmp(t->target.u.user.name,
-					    XT_STANDARD_TARGET) == 0) &&
-				    t->verdict < -NF_MAX_VERDICT - 1)
-					return 0;
-
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6c44033decab..f46954221933 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -420,11 +420,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			     t->verdict < 0) || visited) {
 				unsigned int oldpos, size;
 
-				if ((strcmp(t->target.u.user.name,
-					    XT_STANDARD_TARGET) == 0) &&
-				    t->verdict < -NF_MAX_VERDICT - 1)
-					return 0;
-
 				/* Return: backtrack through the last
 				   big jump. */
 				do {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d9deebe599ec..2e4d423e58e6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -654,6 +654,31 @@ struct compat_xt_standard_target {
 	compat_uint_t verdict;
 };
 
+static bool verdict_ok(int verdict)
+{
+	if (verdict > 0)
+		return true;
+
+	if (verdict < 0) {
+		int v = -verdict - 1;
+
+		if (verdict == XT_RETURN)
+			return true;
+
+		switch (v) {
+		case NF_ACCEPT: return true;
+		case NF_DROP: return true;
+		case NF_QUEUE: return true;
+		default:
+			break;
+		}
+
+		return false;
+	}
+
+	return false;
+}
+
 int xt_compat_check_entry_offsets(const void *base, const char *elems,
 				  unsigned int target_offset,
 				  unsigned int next_offset)
@@ -675,9 +700,15 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
 	if (target_offset + t->u.target_size > next_offset)
 		return -EINVAL;
 
-	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
-	    COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
-		return -EINVAL;
+	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
+		const struct compat_xt_standard_target *st = (const void *)t;
+
+		if (COMPAT_XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
+			return -EINVAL;
+
+		if (!verdict_ok(st->verdict))
+			return -EINVAL;
+	}
 
 	/* compat_xt_entry match has less strict alignment requirements,
 	 * otherwise they are identical.  In case of padding differences
@@ -757,9 +788,15 @@ int xt_check_entry_offsets(const void *base,
 	if (target_offset + t->u.target_size > next_offset)
 		return -EINVAL;
 
-	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
-	    XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
-		return -EINVAL;
+	if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0) {
+		const struct xt_standard_target *st = (const void *)t;
+
+		if (XT_ALIGN(target_offset + sizeof(*st)) != next_offset)
+			return -EINVAL;
+
+		if (!verdict_ok(st->verdict))
+			return -EINVAL;
+	}
 
 	return xt_check_entry_match(elems, base + target_offset,
 				    __alignof__(struct xt_entry_match));

From 472ebdcd15ebdb8ebe20474ef1ce09abcb241e7d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:28 +0100
Subject: [PATCH 0670/1678] netfilter: x_tables: check error target size too

Check that userspace ERROR target (custom user-defined chains) match
expected format, and the chain name is null terminated.

This is irrelevant for kernel, but iptables itself relies on sane input
when it dumps rules from kernel.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2e4d423e58e6..f045bb4f7063 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -654,6 +654,11 @@ struct compat_xt_standard_target {
 	compat_uint_t verdict;
 };
 
+struct compat_xt_error_target {
+	struct compat_xt_entry_target t;
+	char errorname[XT_FUNCTION_MAXNAMELEN];
+};
+
 static bool verdict_ok(int verdict)
 {
 	if (verdict > 0)
@@ -679,6 +684,12 @@ static bool verdict_ok(int verdict)
 	return false;
 }
 
+static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
+			const char *msg, unsigned int msglen)
+{
+	return usersize == kernsize && strnlen(msg, msglen) < msglen;
+}
+
 int xt_compat_check_entry_offsets(const void *base, const char *elems,
 				  unsigned int target_offset,
 				  unsigned int next_offset)
@@ -708,6 +719,12 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
 
 		if (!verdict_ok(st->verdict))
 			return -EINVAL;
+	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
+		const struct compat_xt_error_target *et = (const void *)t;
+
+		if (!error_tg_ok(t->u.target_size, sizeof(*et),
+				 et->errorname, sizeof(et->errorname)))
+			return -EINVAL;
 	}
 
 	/* compat_xt_entry match has less strict alignment requirements,
@@ -796,6 +813,12 @@ int xt_check_entry_offsets(const void *base,
 
 		if (!verdict_ok(st->verdict))
 			return -EINVAL;
+	} else if (strcmp(t->u.user.name, XT_ERROR_TARGET) == 0) {
+		const struct xt_error_target *et = (const void *)t;
+
+		if (!error_tg_ok(t->u.target_size, sizeof(*et),
+				 et->errorname, sizeof(et->errorname)))
+			return -EINVAL;
 	}
 
 	return xt_check_entry_match(elems, base + target_offset,

From 1b293e30f759b03f246baae862bdf35e57b2c39e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:29 +0100
Subject: [PATCH 0671/1678] netfilter: x_tables: move hook entry checks into
 core

Allow followup patch to change on location instead of three.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  2 ++
 net/ipv4/netfilter/arp_tables.c    | 13 +++----------
 net/ipv4/netfilter/ip_tables.c     | 13 +++----------
 net/ipv6/netfilter/ip6_tables.c    | 13 +++----------
 net/netfilter/x_tables.c           | 29 +++++++++++++++++++++++++++++
 5 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1313b35c3ab7..fa0c19c328f1 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -281,6 +281,8 @@ int xt_check_entry_offsets(const void *base, const char *elems,
 			   unsigned int target_offset,
 			   unsigned int next_offset);
 
+int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks);
+
 unsigned int *xt_alloc_entry_offsets(unsigned int size);
 bool xt_find_jump_offset(const unsigned int *offsets,
 			 unsigned int target, unsigned int size);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c9ffa884a4ee..be5821215ea0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -555,16 +555,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 	if (i != repl->num_entries)
 		goto out_free;
 
-	/* Check hooks all assigned */
-	for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
-		/* Only hooks which are valid */
-		if (!(repl->valid_hooks & (1 << i)))
-			continue;
-		if (newinfo->hook_entry[i] == 0xFFFFFFFF)
-			goto out_free;
-		if (newinfo->underflow[i] == 0xFFFFFFFF)
-			goto out_free;
-	}
+	ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+	if (ret)
+		goto out_free;
 
 	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
 		ret = -ELOOP;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c9b57a6bf96a..29bda9484a33 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -702,16 +702,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	if (i != repl->num_entries)
 		goto out_free;
 
-	/* Check hooks all assigned */
-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
-		/* Only hooks which are valid */
-		if (!(repl->valid_hooks & (1 << i)))
-			continue;
-		if (newinfo->hook_entry[i] == 0xFFFFFFFF)
-			goto out_free;
-		if (newinfo->underflow[i] == 0xFFFFFFFF)
-			goto out_free;
-	}
+	ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+	if (ret)
+		goto out_free;
 
 	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
 		ret = -ELOOP;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f46954221933..ba3776a4d305 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -720,16 +720,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	if (i != repl->num_entries)
 		goto out_free;
 
-	/* Check hooks all assigned */
-	for (i = 0; i < NF_INET_NUMHOOKS; i++) {
-		/* Only hooks which are valid */
-		if (!(repl->valid_hooks & (1 << i)))
-			continue;
-		if (newinfo->hook_entry[i] == 0xFFFFFFFF)
-			goto out_free;
-		if (newinfo->underflow[i] == 0xFFFFFFFF)
-			goto out_free;
-	}
+	ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+	if (ret)
+		goto out_free;
 
 	if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
 		ret = -ELOOP;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f045bb4f7063..5d8ba89a8da8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -518,6 +518,35 @@ static int xt_check_entry_match(const char *match, const char *target,
 	return 0;
 }
 
+/** xt_check_table_hooks - check hook entry points are sane
+ *
+ * @info xt_table_info to check
+ * @valid_hooks - hook entry points that we can enter from
+ *
+ * Validates that the hook entry and underflows points are set up.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks)
+{
+	unsigned int i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow));
+
+	for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) {
+		if (!(valid_hooks & (1 << i)))
+			continue;
+
+		if (info->hook_entry[i] == 0xFFFFFFFF)
+			return -EINVAL;
+		if (info->underflow[i] == 0xFFFFFFFF)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(xt_check_table_hooks);
+
 #ifdef CONFIG_COMPAT
 int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {

From e816a2ce49e49e3906f614009c919334a0c6ba6a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:30 +0100
Subject: [PATCH 0672/1678] netfilter: x_tables: enforce unique and ascending
 entry points

Harmless from kernel point of view, but iptables assumes that this is
true when decoding a ruleset.

iptables walks the dumped blob from kernel, and, for each entry that
creates a new chain it prints out rule/chain information.
Base chains (hook entry points) are thus only shown when they appear
in the rule blob.  One base chain that is referenced multiple times
in hook blob is then only printed once.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 5d8ba89a8da8..4e6cbb38e616 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -529,10 +529,15 @@ static int xt_check_entry_match(const char *match, const char *target,
  */
 int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_hooks)
 {
-	unsigned int i;
+	const char *err = "unsorted underflow";
+	unsigned int i, max_uflow, max_entry;
+	bool check_hooks = false;
 
 	BUILD_BUG_ON(ARRAY_SIZE(info->hook_entry) != ARRAY_SIZE(info->underflow));
 
+	max_entry = 0;
+	max_uflow = 0;
+
 	for (i = 0; i < ARRAY_SIZE(info->hook_entry); i++) {
 		if (!(valid_hooks & (1 << i)))
 			continue;
@@ -541,9 +546,33 @@ int xt_check_table_hooks(const struct xt_table_info *info, unsigned int valid_ho
 			return -EINVAL;
 		if (info->underflow[i] == 0xFFFFFFFF)
 			return -EINVAL;
+
+		if (check_hooks) {
+			if (max_uflow > info->underflow[i])
+				goto error;
+
+			if (max_uflow == info->underflow[i]) {
+				err = "duplicate underflow";
+				goto error;
+			}
+			if (max_entry > info->hook_entry[i]) {
+				err = "unsorted entry";
+				goto error;
+			}
+			if (max_entry == info->hook_entry[i]) {
+				err = "duplicate entry";
+				goto error;
+			}
+		}
+		max_entry = info->hook_entry[i];
+		max_uflow = info->underflow[i];
+		check_hooks = true;
 	}
 
 	return 0;
+error:
+	pr_err_ratelimited("%s at hook %d\n", err, i);
+	return -EINVAL;
 }
 EXPORT_SYMBOL(xt_check_table_hooks);
 

From 19926968ea86a286aa6fbea16ee3f2e7442f10f0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:31 +0100
Subject: [PATCH 0673/1678] netfilter: x_tables: cap allocations at 512 mbyte

Arbitrary limit, however, this still allows huge rulesets
(> 1 million rules).  This helps with automated fuzzer as it prevents
oom-killer invocation.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 4e6cbb38e616..dc68ac49614a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 
 #define XT_PCPU_BLOCK_SIZE 4096
+#define XT_MAX_TABLE_SIZE	(512 * 1024 * 1024)
 
 struct compat_delta {
 	unsigned int offset; /* offset in kernel */
@@ -1117,7 +1118,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	struct xt_table_info *info = NULL;
 	size_t sz = sizeof(*info) + size;
 
-	if (sz < sizeof(*info))
+	if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
 		return NULL;
 
 	/* __GFP_NORETRY is not fully supported by kvmalloc but it should

From 9d5c12a7c08f67999772065afd50fb222072114e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:32 +0100
Subject: [PATCH 0674/1678] netfilter: x_tables: limit allocation requests for
 blob rule heads

This is a very conservative limit (134217728 rules), but good
enough to not trigger frequent oom from syzkaller.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index dc68ac49614a..01f8e122e74e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -894,6 +894,9 @@ EXPORT_SYMBOL(xt_check_entry_offsets);
  */
 unsigned int *xt_alloc_entry_offsets(unsigned int size)
 {
+	if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int))
+		return NULL;
+
 	return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO);
 
 }

From c84ca954ac9fa67a6ce27f91f01e4451c74fd8f6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:33 +0100
Subject: [PATCH 0675/1678] netfilter: x_tables: add counters allocation
 wrapper

allows to have size checks in a single spot.
This is supposed to reduce oom situations when fuzz-testing xtables.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  1 +
 net/ipv4/netfilter/arp_tables.c    |  2 +-
 net/ipv4/netfilter/ip_tables.c     |  2 +-
 net/ipv6/netfilter/ip6_tables.c    |  2 +-
 net/netfilter/x_tables.c           | 15 +++++++++++++++
 5 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index fa0c19c328f1..0bd93c589a8c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -301,6 +301,7 @@ int xt_data_to_user(void __user *dst, const void *src,
 
 void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 				 struct xt_counters_info *info, bool compat);
+struct xt_counters *xt_counters_alloc(unsigned int counters);
 
 struct xt_table *xt_register_table(struct net *net,
 				   const struct xt_table *table,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index be5821215ea0..82ba09b50fdb 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -883,7 +883,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct arpt_entry *iter;
 
 	ret = 0;
-	counters = vzalloc(num_counters * sizeof(struct xt_counters));
+	counters = xt_counters_alloc(num_counters);
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 29bda9484a33..4901ca6c3e09 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1045,7 +1045,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ipt_entry *iter;
 
 	ret = 0;
-	counters = vzalloc(num_counters * sizeof(struct xt_counters));
+	counters = xt_counters_alloc(num_counters);
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index ba3776a4d305..e84cec49b60f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1063,7 +1063,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ip6t_entry *iter;
 
 	ret = 0;
-	counters = vzalloc(num_counters * sizeof(struct xt_counters));
+	counters = xt_counters_alloc(num_counters);
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 01f8e122e74e..82b1f8f52ac6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1290,6 +1290,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
 	return 0;
 }
 
+struct xt_counters *xt_counters_alloc(unsigned int counters)
+{
+	struct xt_counters *mem;
+
+	if (counters == 0 || counters > INT_MAX / sizeof(*mem))
+		return NULL;
+
+	counters *= sizeof(*mem);
+	if (counters > XT_MAX_TABLE_SIZE)
+		return NULL;
+
+	return vzalloc(counters);
+}
+EXPORT_SYMBOL(xt_counters_alloc);
+
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
 	      unsigned int num_counters,

From 9782a11efc072faaf91d4aa60e9d23553f918029 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:34 +0100
Subject: [PATCH 0676/1678] netfilter: compat: prepare xt_compat_init_offsets
 to return errors

should have no impact, function still always returns 0.
This patch is only to ease review.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  2 +-
 net/bridge/netfilter/ebtables.c    | 10 ++++++++--
 net/ipv4/netfilter/arp_tables.c    | 10 +++++++---
 net/ipv4/netfilter/ip_tables.c     |  8 ++++++--
 net/ipv6/netfilter/ip6_tables.c    | 10 +++++++---
 net/netfilter/x_tables.c           |  4 +++-
 6 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 0bd93c589a8c..7bd896dc78df 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -510,7 +510,7 @@ void xt_compat_unlock(u_int8_t af);
 
 int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
 void xt_compat_flush_offsets(u_int8_t af);
-void xt_compat_init_offsets(u_int8_t af, unsigned int number);
+int xt_compat_init_offsets(u8 af, unsigned int number);
 int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
 
 int xt_compat_match_offset(const struct xt_match *match);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 02c4b409d317..217aa79f7b2a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1819,10 +1819,14 @@ static int compat_table_info(const struct ebt_table_info *info,
 {
 	unsigned int size = info->entries_size;
 	const void *entries = info->entries;
+	int ret;
 
 	newinfo->entries_size = size;
 
-	xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+	ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
+	if (ret)
+		return ret;
+
 	return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
 							entries, newinfo);
 }
@@ -2245,7 +2249,9 @@ static int compat_do_replace(struct net *net, void __user *user,
 
 	xt_compat_lock(NFPROTO_BRIDGE);
 
-	xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+	ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
+	if (ret < 0)
+		goto out_unlock;
 	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
 	if (ret < 0)
 		goto out_unlock;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 82ba09b50fdb..aaafdbd15ad3 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -769,7 +769,9 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries;
-	xt_compat_init_offsets(NFPROTO_ARP, info->number);
+	ret = xt_compat_init_offsets(NFPROTO_ARP, info->number);
+	if (ret)
+		return ret;
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1156,7 +1158,7 @@ static int translate_compat_table(struct xt_table_info **pinfo,
 	struct compat_arpt_entry *iter0;
 	struct arpt_replace repl;
 	unsigned int size;
-	int ret = 0;
+	int ret;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1165,7 +1167,9 @@ static int translate_compat_table(struct xt_table_info **pinfo,
 
 	j = 0;
 	xt_compat_lock(NFPROTO_ARP);
-	xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+	ret = xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
+	if (ret)
+		goto out_unlock;
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, compatr->size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4901ca6c3e09..f9063513f9d1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -933,7 +933,9 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries;
-	xt_compat_init_offsets(AF_INET, info->number);
+	ret = xt_compat_init_offsets(AF_INET, info->number);
+	if (ret)
+		return ret;
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1407,7 +1409,9 @@ translate_compat_table(struct net *net,
 
 	j = 0;
 	xt_compat_lock(AF_INET);
-	xt_compat_init_offsets(AF_INET, compatr->num_entries);
+	ret = xt_compat_init_offsets(AF_INET, compatr->num_entries);
+	if (ret)
+		goto out_unlock;
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, compatr->size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e84cec49b60f..3c36a4c77f29 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -950,7 +950,9 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries;
-	xt_compat_init_offsets(AF_INET6, info->number);
+	ret = xt_compat_init_offsets(AF_INET6, info->number);
+	if (ret)
+		return ret;
 	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
 		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
 		if (ret != 0)
@@ -1414,7 +1416,7 @@ translate_compat_table(struct net *net,
 	struct compat_ip6t_entry *iter0;
 	struct ip6t_replace repl;
 	unsigned int size;
-	int ret = 0;
+	int ret;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1423,7 +1425,9 @@ translate_compat_table(struct net *net,
 
 	j = 0;
 	xt_compat_lock(AF_INET6);
-	xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+	ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+	if (ret)
+		goto out_unlock;
 	/* Walk through entries, checking offsets. */
 	xt_entry_foreach(iter0, entry0, compatr->size) {
 		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 82b1f8f52ac6..e878c85a9268 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -632,10 +632,12 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
-void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+int xt_compat_init_offsets(u8 af, unsigned int number)
 {
 	xt[af].number = number;
 	xt[af].cur = 0;
+
+	return 0;
 }
 EXPORT_SYMBOL(xt_compat_init_offsets);
 

From 7d7d7e02111e9a4dc9d0658597f528f815d820fd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:35 +0100
Subject: [PATCH 0677/1678] netfilter: compat: reject huge allocation requests

no need to bother even trying to allocating huge compat offset arrays,
such ruleset is rejected later on anyway becaus we refuse to allocate
overly large rule blobs.

However, compat translation happens before blob allocation, so we should
add a check there too.

This is supposed to help with fuzzing by avoiding oom-killer.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e878c85a9268..33724b08b8f0 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -582,14 +582,8 @@ int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
 	struct xt_af *xp = &xt[af];
 
-	if (!xp->compat_tab) {
-		if (!xp->number)
-			return -EINVAL;
-		xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
-		if (!xp->compat_tab)
-			return -ENOMEM;
-		xp->cur = 0;
-	}
+	if (WARN_ON(!xp->compat_tab))
+		return -ENOMEM;
 
 	if (xp->cur >= xp->number)
 		return -EINVAL;
@@ -634,6 +628,22 @@ EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
 int xt_compat_init_offsets(u8 af, unsigned int number)
 {
+	size_t mem;
+
+	if (!number || number > (INT_MAX / sizeof(struct compat_delta)))
+		return -EINVAL;
+
+	if (WARN_ON(xt[af].compat_tab))
+		return -EINVAL;
+
+	mem = sizeof(struct compat_delta) * number;
+	if (mem > XT_MAX_TABLE_SIZE)
+		return -ENOMEM;
+
+	xt[af].compat_tab = vmalloc(mem);
+	if (!xt[af].compat_tab)
+		return -ENOMEM;
+
 	xt[af].number = number;
 	xt[af].cur = 0;
 

From 89370860686a54fc0642c7ae68213cc1fc6d8e04 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:36 +0100
Subject: [PATCH 0678/1678] netfilter: x_tables: make sure compat af mutex is
 held

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 33724b08b8f0..7521e8a72c06 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -582,6 +582,8 @@ int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
 	struct xt_af *xp = &xt[af];
 
+	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
 	if (WARN_ON(!xp->compat_tab))
 		return -ENOMEM;
 
@@ -599,6 +601,8 @@ EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
 void xt_compat_flush_offsets(u_int8_t af)
 {
+	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
 	if (xt[af].compat_tab) {
 		vfree(xt[af].compat_tab);
 		xt[af].compat_tab = NULL;
@@ -630,6 +634,8 @@ int xt_compat_init_offsets(u8 af, unsigned int number)
 {
 	size_t mem;
 
+	WARN_ON(!mutex_is_locked(&xt[af].compat_mutex));
+
 	if (!number || number > (INT_MAX / sizeof(struct compat_delta)))
 		return -EINVAL;
 

From 0d7df906a0e78079a02108b06d32c3ef2238ad25 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 27 Feb 2018 19:42:37 +0100
Subject: [PATCH 0679/1678] netfilter: x_tables: ensure last rule in base chain
 matches underflow/policy

Harmless from kernel point of view, but again iptables assumes that
this is true when decoding ruleset coming from kernel.

If a (syzkaller generated) ruleset doesn't have the underflow/policy
stored as the last rule in the base chain, then iptables will abort()
because it doesn't find the chain policy.

libiptc assumes that the policy is the last rule in the basechain, which
is only true for iptables-generated rulesets.

Unfortunately this needs code duplication -- the functions need the
struct layout of the rule head, but that is different for
ip/ip6/arptables.

NB: pr_warn could be pr_debug but in case this break rulesets somehow its
useful to know why blob was rejected.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 17 ++++++++++++++++-
 net/ipv4/netfilter/ip_tables.c  | 17 ++++++++++++++++-
 net/ipv6/netfilter/ip6_tables.c | 17 ++++++++++++++++-
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index aaafdbd15ad3..f366ff1cfc19 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -309,10 +309,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 	for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct arpt_entry *e = entry0 + pos;
+		unsigned int last_pos, depth;
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
 
+		depth = 0;
+		last_pos = pos;
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
 
@@ -343,6 +346,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 					pos = e->counters.pcnt;
 					e->counters.pcnt = 0;
 
+					if (depth)
+						--depth;
 					/* We're at the start. */
 					if (pos == oldpos)
 						goto next;
@@ -367,6 +372,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
+
+					if (entry0 + newpos != arpt_next_entry(e))
+						++depth;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -377,8 +385,15 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
+			if (depth == 0)
+				last_pos = pos;
+		}
+next:
+		if (last_pos != newinfo->underflow[hook]) {
+			pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n",
+					   last_pos, newinfo->underflow[hook], hook);
+			return 0;
 		}
-next:		;
 	}
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f9063513f9d1..2362ca2c9e0c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -378,10 +378,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ipt_entry *e = entry0 + pos;
+		unsigned int last_pos, depth;
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
 
+		depth = 0;
+		last_pos = pos;
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
 
@@ -410,6 +413,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					pos = e->counters.pcnt;
 					e->counters.pcnt = 0;
 
+					if (depth)
+						--depth;
 					/* We're at the start. */
 					if (pos == oldpos)
 						goto next;
@@ -434,6 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
+
+					if (entry0 + newpos != ipt_next_entry(e))
+						++depth;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -444,8 +452,15 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
+			if (depth == 0)
+				last_pos = pos;
+		}
+next:
+		if (last_pos != newinfo->underflow[hook]) {
+			pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n",
+					   last_pos, newinfo->underflow[hook], hook);
+			return 0;
 		}
-next:		;
 	}
 	return 1;
 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 3c36a4c77f29..004508753abc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -396,10 +396,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ip6t_entry *e = entry0 + pos;
+		unsigned int last_pos, depth;
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
 
+		depth = 0;
+		last_pos = pos;
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
 
@@ -428,6 +431,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					pos = e->counters.pcnt;
 					e->counters.pcnt = 0;
 
+					if (depth)
+						--depth;
 					/* We're at the start. */
 					if (pos == oldpos)
 						goto next;
@@ -452,6 +457,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
+
+					if (entry0 + newpos != ip6t_next_entry(e))
+						++depth;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -462,8 +470,15 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
+			if (depth == 0)
+				last_pos = pos;
+		}
+next:
+		if (last_pos != newinfo->underflow[hook]) {
+			pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n",
+					   last_pos, newinfo->underflow[hook], hook);
+			return 0;
 		}
-next:		;
 	}
 	return 1;
 }

From 3427b2ab63faccafe774ea997fc2da7faf690c5a Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 1 Mar 2018 18:58:38 -0800
Subject: [PATCH 0680/1678] netfilter: make xt_rateest hash table per net

As suggested by Eric, we need to make the xt_rateest
hash table and its lock per netns to reduce lock
contentions.

Cc: Florian Westphal <fw@strlen.de>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/xt_rateest.h |  4 +-
 net/netfilter/xt_RATEEST.c         | 91 +++++++++++++++++++++---------
 net/netfilter/xt_rateest.c         | 10 ++--
 3 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index b1db13772554..832ab69efda5 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -21,7 +21,7 @@ struct xt_rateest {
 	struct net_rate_estimator __rcu *rate_est;
 };
 
-struct xt_rateest *xt_rateest_lookup(const char *name);
-void xt_rateest_put(struct xt_rateest *est);
+struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name);
+void xt_rateest_put(struct net *net, struct xt_rateest *est);
 
 #endif /* _XT_RATEEST_H */
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 141c295191f6..dec843cadf46 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -14,15 +14,21 @@
 #include <linux/slab.h>
 #include <net/gen_stats.h>
 #include <net/netlink.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_RATEEST.h>
 #include <net/netfilter/xt_rateest.h>
 
-static DEFINE_MUTEX(xt_rateest_mutex);
-
 #define RATEEST_HSIZE	16
-static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
+
+struct xt_rateest_net {
+	struct mutex hash_lock;
+	struct hlist_head hash[RATEEST_HSIZE];
+};
+
+static unsigned int xt_rateest_id;
+
 static unsigned int jhash_rnd __read_mostly;
 
 static unsigned int xt_rateest_hash(const char *name)
@@ -31,21 +37,23 @@ static unsigned int xt_rateest_hash(const char *name)
 	       (RATEEST_HSIZE - 1);
 }
 
-static void xt_rateest_hash_insert(struct xt_rateest *est)
+static void xt_rateest_hash_insert(struct xt_rateest_net *xn,
+				   struct xt_rateest *est)
 {
 	unsigned int h;
 
 	h = xt_rateest_hash(est->name);
-	hlist_add_head(&est->list, &rateest_hash[h]);
+	hlist_add_head(&est->list, &xn->hash[h]);
 }
 
-static struct xt_rateest *__xt_rateest_lookup(const char *name)
+static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn,
+					      const char *name)
 {
 	struct xt_rateest *est;
 	unsigned int h;
 
 	h = xt_rateest_hash(name);
-	hlist_for_each_entry(est, &rateest_hash[h], list) {
+	hlist_for_each_entry(est, &xn->hash[h], list) {
 		if (strcmp(est->name, name) == 0) {
 			est->refcnt++;
 			return est;
@@ -55,20 +63,23 @@ static struct xt_rateest *__xt_rateest_lookup(const char *name)
 	return NULL;
 }
 
-struct xt_rateest *xt_rateest_lookup(const char *name)
+struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name)
 {
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
 	struct xt_rateest *est;
 
-	mutex_lock(&xt_rateest_mutex);
-	est = __xt_rateest_lookup(name);
-	mutex_unlock(&xt_rateest_mutex);
+	mutex_lock(&xn->hash_lock);
+	est = __xt_rateest_lookup(xn, name);
+	mutex_unlock(&xn->hash_lock);
 	return est;
 }
 EXPORT_SYMBOL_GPL(xt_rateest_lookup);
 
-void xt_rateest_put(struct xt_rateest *est)
+void xt_rateest_put(struct net *net, struct xt_rateest *est)
 {
-	mutex_lock(&xt_rateest_mutex);
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+
+	mutex_lock(&xn->hash_lock);
 	if (--est->refcnt == 0) {
 		hlist_del(&est->list);
 		gen_kill_estimator(&est->rate_est);
@@ -78,7 +89,7 @@ void xt_rateest_put(struct xt_rateest *est)
 		 */
 		kfree_rcu(est, rcu);
 	}
-	mutex_unlock(&xt_rateest_mutex);
+	mutex_unlock(&xn->hash_lock);
 }
 EXPORT_SYMBOL_GPL(xt_rateest_put);
 
@@ -98,6 +109,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 {
+	struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id);
 	struct xt_rateest_target_info *info = par->targinfo;
 	struct xt_rateest *est;
 	struct {
@@ -108,10 +120,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 
 	net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
 
-	mutex_lock(&xt_rateest_mutex);
-	est = __xt_rateest_lookup(info->name);
+	mutex_lock(&xn->hash_lock);
+	est = __xt_rateest_lookup(xn, info->name);
 	if (est) {
-		mutex_unlock(&xt_rateest_mutex);
+		mutex_unlock(&xn->hash_lock);
 		/*
 		 * If estimator parameters are specified, they must match the
 		 * existing estimator.
@@ -119,7 +131,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		if ((!info->interval && !info->ewma_log) ||
 		    (info->interval != est->params.interval ||
 		     info->ewma_log != est->params.ewma_log)) {
-			xt_rateest_put(est);
+			xt_rateest_put(par->net, est);
 			return -EINVAL;
 		}
 		info->est = est;
@@ -148,14 +160,14 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		goto err2;
 
 	info->est = est;
-	xt_rateest_hash_insert(est);
-	mutex_unlock(&xt_rateest_mutex);
+	xt_rateest_hash_insert(xn, est);
+	mutex_unlock(&xn->hash_lock);
 	return 0;
 
 err2:
 	kfree(est);
 err1:
-	mutex_unlock(&xt_rateest_mutex);
+	mutex_unlock(&xn->hash_lock);
 	return ret;
 }
 
@@ -163,7 +175,7 @@ static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
 {
 	struct xt_rateest_target_info *info = par->targinfo;
 
-	xt_rateest_put(info->est);
+	xt_rateest_put(par->net, info->est);
 }
 
 static struct xt_target xt_rateest_tg_reg __read_mostly = {
@@ -178,19 +190,46 @@ static struct xt_target xt_rateest_tg_reg __read_mostly = {
 	.me         = THIS_MODULE,
 };
 
+static __net_init int xt_rateest_net_init(struct net *net)
+{
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+	int i;
+
+	mutex_init(&xn->hash_lock);
+	for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
+		INIT_HLIST_HEAD(&xn->hash[i]);
+	return 0;
+}
+
+static void __net_exit xt_rateest_net_exit(struct net *net)
+{
+	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
+		WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
+}
+
+static struct pernet_operations xt_rateest_net_ops = {
+	.init = xt_rateest_net_init,
+	.exit = xt_rateest_net_exit,
+	.id   = &xt_rateest_id,
+	.size = sizeof(struct xt_rateest_net),
+};
+
 static int __init xt_rateest_tg_init(void)
 {
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
-		INIT_HLIST_HEAD(&rateest_hash[i]);
+	int err = register_pernet_subsys(&xt_rateest_net_ops);
 
+	if (err)
+		return err;
 	return xt_register_target(&xt_rateest_tg_reg);
 }
 
 static void __exit xt_rateest_tg_fini(void)
 {
 	xt_unregister_target(&xt_rateest_tg_reg);
+	unregister_pernet_subsys(&xt_rateest_net_ops);
 }
 
 
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 755d2f6693a2..bf77326861af 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -95,13 +95,13 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 	}
 
 	ret  = -ENOENT;
-	est1 = xt_rateest_lookup(info->name1);
+	est1 = xt_rateest_lookup(par->net, info->name1);
 	if (!est1)
 		goto err1;
 
 	est2 = NULL;
 	if (info->flags & XT_RATEEST_MATCH_REL) {
-		est2 = xt_rateest_lookup(info->name2);
+		est2 = xt_rateest_lookup(par->net, info->name2);
 		if (!est2)
 			goto err2;
 	}
@@ -111,7 +111,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 	return 0;
 
 err2:
-	xt_rateest_put(est1);
+	xt_rateest_put(par->net, est1);
 err1:
 	return ret;
 }
@@ -120,9 +120,9 @@ static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	struct xt_rateest_match_info *info = par->matchinfo;
 
-	xt_rateest_put(info->est1);
+	xt_rateest_put(par->net, info->est1);
 	if (info->est2)
-		xt_rateest_put(info->est2);
+		xt_rateest_put(par->net, info->est2);
 }
 
 static struct xt_match xt_rateest_mt_reg __read_mostly = {

From 010eacd968a73ddcb8592b14c1607e1004120ede Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 2 Mar 2018 14:59:54 +0100
Subject: [PATCH 0681/1678] netfilter: xt_limit: Spelling s/maxmum/maximum/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_limit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 55d18cd67635..9f098ecb2449 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -46,7 +46,7 @@ MODULE_ALIAS("ip6t_limit");
 
    See Alexey's formal explanation in net/sched/sch_tbf.c.
 
-   To get the maxmum range, we multiply by this factor (ie. you get N
+   To get the maximum range, we multiply by this factor (ie. you get N
    credits per jiffy).  We want to allow a rate as low as 1 per day
    (slowest userspace tool allows), which means
    CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32. ie. */

From c33251a3c63395ae8b0ffe5acfe206e4c8bb35ee Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Wed, 31 Jan 2018 15:07:33 +0200
Subject: [PATCH 0682/1678] IB/mlx5: Removed not used parameters

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 2 --
 drivers/infiniband/hw/mlx5/qp.c   | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index ee55d7d64554..23511638fbbc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4585,8 +4585,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 		goto err_free_port;
 
 	if (!mlx5_core_mp_enabled(mdev)) {
-		int i;
-
 		for (i = 1; i <= dev->num_ports; i++) {
 			err = get_port_caps(dev, i);
 			if (err)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 5663530ea5fd..0e67e3682bca 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2153,7 +2153,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 					struct ib_qp_init_attr *attr,
 					struct mlx5_ib_create_qp *ucmd)
 {
-	struct mlx5_ib_dev *dev;
 	struct mlx5_ib_qp *qp;
 	int err = 0;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -2162,8 +2161,6 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 	if (!attr->srq || !attr->recv_cq)
 		return ERR_PTR(-EINVAL);
 
-	dev = to_mdev(pd->device);
-
 	err = get_qp_user_index(to_mucontext(pd->uobject->context),
 				ucmd, sizeof(*ucmd), &uidx);
 	if (err)

From 46f3ee4f3a6e84c32e554c80d6533b465de7ff99 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Thu, 22 Feb 2018 17:40:52 +0200
Subject: [PATCH 0683/1678] net/mlx5: Fixed compilation issue when
 CONFIG_MLX5_ACCEL is disabled

IPSec init and cleanup functions also depends on linux/mlx5/driver.h.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index d6e20fea9554..67cda8871f5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -34,10 +34,10 @@
 #ifndef __MLX5_ACCEL_IPSEC_H__
 #define __MLX5_ACCEL_IPSEC_H__
 
-#ifdef CONFIG_MLX5_ACCEL
-
 #include <linux/mlx5/driver.h>
 
+#ifdef CONFIG_MLX5_ACCEL
+
 enum {
 	MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
 	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),

From ef927a9c168722c4ab5e40838d7fe6b63f978763 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Sun, 11 Feb 2018 17:12:44 +0200
Subject: [PATCH 0684/1678] net/mlx5e: Wait for FPGA command responses with a
 timeout

Generally, FPGA IPSec commands must always complete.
We want to wait for one minute for them to complete gracefully also
when killing a process.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 35d0e33381ca..95f9c5a8619b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -39,6 +39,7 @@
 #include "fpga/core.h"
 
 #define SBU_QP_QUEUE_SIZE 8
+#define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC	(60 * 1000)
 
 enum mlx5_ipsec_response_syndrome {
 	MLX5_IPSEC_RESPONSE_SUCCESS = 0,
@@ -217,12 +218,14 @@ void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
 int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
 {
 	struct mlx5_ipsec_command_context *context = ctx;
+	unsigned long timeout =
+		msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
 	int res;
 
-	res = wait_for_completion_killable(&context->complete);
-	if (res) {
+	res = wait_for_completion_timeout(&context->complete, timeout);
+	if (!res) {
 		mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
-		return -EINTR;
+		return -ETIMEDOUT;
 	}
 
 	if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)

From dc7debec07803f52d04dbd98d8047cae04dfe8ec Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Sun, 28 Jan 2018 17:25:35 +0200
Subject: [PATCH 0685/1678] net/mlx5e: Fixed sleeping inside atomic context

We can't allocate with GFP_KERNEL inside spinlock.
Actually ida_simple doesn't require spinlock so remove it.

Fixes: 547eede070eb ("net/mlx5e: IPSec, Innova IPSec offload infrastructure")
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c    | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index bac5103efad3..710521181143 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -74,18 +74,16 @@ static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
 	if (ret < 0)
-		goto out;
+		return ret;
 
+	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	sa_entry->handle = ret;
 	hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
-	ret = 0;
-
-out:
 	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
-	return ret;
+
+	return 0;
 }
 
 static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
@@ -101,13 +99,10 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
 static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-	unsigned long flags;
 
 	/* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
 	synchronize_rcu();
-	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
 	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
-	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
 }
 
 static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)

From 1c9a10ebc77a6f123c701ba31d0c35bbf7414cde Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Mon, 29 Jan 2018 13:09:12 +0200
Subject: [PATCH 0686/1678] net/mlx5e: Removed not need synchronize_rcu

This is already done by xfrm layer between state_dev_del callback
to state_dev_free callback.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 710521181143..1b49afca65c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -100,8 +100,8 @@ static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
 
-	/* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
-	synchronize_rcu();
+	/* xfrm already doing sync rcu between del and free callbacks */
+
 	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
 }
 

From 04e87170b0964d22c67245b739f56c3fe12fda54 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Sun, 19 Nov 2017 15:51:13 +0000
Subject: [PATCH 0687/1678] net/mlx5: FPGA and IPSec initialization to be
 before flow steering

Some flow steering namespace initialization (i.e. egress namespace)
might depend on FPGA capabilities. Changing the initialization order
such that the FPGA will be initialized before flow steering.

Flow steering fs cmds initialization might depend on
IPSec capabilities. Changing the initialization order such
that the IPSec will be initialized before flow steering as well.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/main.c    | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8cc22bf80c87..03972eed02cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1173,6 +1173,18 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_affinity_hints;
 	}
 
+	err = mlx5_fpga_device_start(dev);
+	if (err) {
+		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
+		goto err_fpga_start;
+	}
+
+	err = mlx5_accel_ipsec_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+		goto err_ipsec_start;
+	}
+
 	err = mlx5_init_fs(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1191,17 +1203,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_sriov;
 	}
 
-	err = mlx5_fpga_device_start(dev);
-	if (err) {
-		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-		goto err_fpga_start;
-	}
-	err = mlx5_accel_ipsec_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
-		goto err_ipsec_start;
-	}
-
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
 	} else {
@@ -1219,17 +1220,18 @@ out:
 	return 0;
 
 err_reg_dev:
-	mlx5_accel_ipsec_cleanup(dev);
-err_ipsec_start:
-	mlx5_fpga_device_stop(dev);
-
-err_fpga_start:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
 	mlx5_cleanup_fs(dev);
 
 err_fs:
+	mlx5_accel_ipsec_cleanup(dev);
+
+err_ipsec_start:
+	mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
 	mlx5_irq_clear_affinity_hints(dev);
 
 err_affinity_hints:
@@ -1296,11 +1298,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
-	mlx5_accel_ipsec_cleanup(dev);
-	mlx5_fpga_device_stop(dev);
-
 	mlx5_sriov_detach(dev);
 	mlx5_cleanup_fs(dev);
+	mlx5_accel_ipsec_cleanup(dev);
+	mlx5_fpga_device_stop(dev);
 	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);

From 075572d4b7aec1fcd6e1bd66ee8e0e06501f719f Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Wed, 16 Aug 2017 09:33:30 +0300
Subject: [PATCH 0688/1678] IB/mlx5: Pass mlx5_flow_act struct instead of
 multiple arguments

Group and pass all function arguments of parse_flow_attr call in one
common struct mlx5_flow_act.

This patch passes all the action arguments of parse_flow_attr in one common
struct mlx5_flow_act. It allows us to scale the number of actions without adding
new arguments to the function.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 23511638fbbc..1b305367a817 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2316,7 +2316,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 #define IPV6_VERSION 6
 static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			   u32 *match_v, const union ib_flow_spec *ib_spec,
-			   u32 *tag_id, bool *is_drop)
+			   struct mlx5_flow_act *action)
 {
 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   misc_parameters);
@@ -2534,13 +2534,13 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 		if (ib_spec->flow_tag.tag_id >= BIT(24))
 			return -EINVAL;
 
-		*tag_id = ib_spec->flow_tag.tag_id;
+		action->flow_tag = ib_spec->flow_tag.tag_id;
 		break;
 	case IB_FLOW_SPEC_ACTION_DROP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
 					 LAST_DROP_FIELD))
 			return -EOPNOTSUPP;
-		*is_drop = true;
+		action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
 		break;
 	default:
 		return -EINVAL;
@@ -2793,13 +2793,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 {
 	struct mlx5_flow_table	*ft = ft_prio->flow_table;
 	struct mlx5_ib_flow_handler *handler;
-	struct mlx5_flow_act flow_act = {0};
+	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_destination *rule_dst = dst;
 	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
 	unsigned int spec_index;
-	u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
-	bool is_drop = false;
 	int err = 0;
 	int dest_num = 1;
 
@@ -2818,7 +2816,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
 		err = parse_flow_attr(dev->mdev, spec->match_criteria,
 				      spec->match_value,
-				      ib_flow, &flow_tag, &is_drop);
+				      ib_flow, &flow_act);
 		if (err < 0)
 			goto free;
 
@@ -2841,8 +2839,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	}
 
 	spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-	if (is_drop) {
-		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
 		rule_dst = NULL;
 		dest_num = 0;
 	} else {
@@ -2850,15 +2847,14 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 		    MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}
 
-	if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+	if (flow_act.flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
-			     flow_tag, flow_attr->type);
+			     flow_act.flow_tag, flow_attr->type);
 		err = -EINVAL;
 		goto free;
 	}
-	flow_act.flow_tag = flow_tag;
 	handler->rule = mlx5_add_flow_rules(ft, spec,
 					    &flow_act,
 					    rule_dst, dest_num);

From a9db0ecf1578894ea3405f3eb5a441508840d479 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Wed, 16 Aug 2017 09:43:48 +0300
Subject: [PATCH 0689/1678] {net,IB}/mlx5: Add has_tag to mlx5_flow_act

The has_tag member will indicate whether a tag action was specified
in flow specification.

A flow tag 0 = MLX5_FS_DEFAULT_FLOW_TAG is assumed a valid flow tag
that is currently used by mlx5 RDMA driver, whereas in HW flow_tag = 0
means that the user doesn't care about flow_tag.  HW always provide
a flow_tag = 0 if all flow tags requested on a specific flow are 0.

So we need a way (in the driver) to differentiate between a user really
requesting flow_tag = 0 and a user who does not care, in order to be
able to report conflicting flow tags on a specific flow.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Reviewed-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                 | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +-
 include/linux/mlx5/fs.h                           | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 1b305367a817..d50ace805995 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2535,6 +2535,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			return -EINVAL;
 
 		action->flow_tag = ib_spec->flow_tag.tag_id;
+		action->has_flow_tag = true;
 		break;
 	case IB_FLOW_SPEC_ACTION_DROP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -2847,7 +2848,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 		    MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}
 
-	if (flow_act.flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+	if (flow_act.has_flow_tag &&
 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index fd98b0dc610f..eeff1fac77ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -675,6 +675,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
+		.has_flow_tag = true,
 		.flow_tag = attr->flow_tag,
 		.encap_id = 0,
 	};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index c025c98700e4..d81da6920be8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1443,7 +1443,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
 		return -EEXIST;
 	}
 
-	if (fte->flow_tag != flow_act->flow_tag) {
+	if (flow_act->has_flow_tag && fte->flow_tag != flow_act->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
 			       fte->flow_tag,
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index a0b48afcb422..f580bc4c2443 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -141,6 +141,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
 struct mlx5_flow_act {
 	u32 action;
+	bool has_flow_tag;
 	u32 flow_tag;
 	u32 encap_id;
 	u32 modify_id;

From af76c50198d1db136bbf293f3700c80722116831 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Sun, 20 Aug 2017 15:46:51 +0300
Subject: [PATCH 0690/1678] net/mlx5: Add shim layer between fs and cmd

The shim layer allows each namespace to define possibly different
functionality for add/delete/update commands. The shim layer
introduced here, will be used to support flow steering with the FPGA.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 191 +++++++++++++++---
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.h  |  72 ++++---
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |  84 ++++----
 .../net/ethernet/mellanox/mlx5/core/fs_core.h |   1 +
 4 files changed, 248 insertions(+), 100 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 881e2e55840c..c3eaddb43e57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -39,9 +39,81 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn,
-			    bool disconnect)
+static int mlx5_cmd_stub_update_root_ft(struct mlx5_core_dev *dev,
+					struct mlx5_flow_table *ft,
+					u32 underlay_qpn,
+					bool disconnect)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_table(struct mlx5_core_dev *dev,
+					   u16 vport,
+					   enum fs_flow_table_op_mod op_mod,
+					   enum fs_flow_table_type type,
+					   unsigned int level,
+					   unsigned int log_size,
+					   struct mlx5_flow_table *next_ft,
+					   unsigned int *table_id, u32 flags)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_table(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_modify_flow_table(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   struct mlx5_flow_table *next_ft)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_flow_group(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   u32 *in,
+					   unsigned int *group_id)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_destroy_flow_group(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft,
+					    unsigned int group_id)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_create_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct mlx5_flow_group *group,
+				    struct fs_fte *fte)
+{
+	return 0;
+}
+
+static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int group_id,
+				    int modify_mask,
+				    struct fs_fte *fte)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int index)
+{
+	return 0;
+}
+
+static int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
+				   struct mlx5_flow_table *ft, u32 underlay_qpn,
+				   bool disconnect)
 {
 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
@@ -71,12 +143,14 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-			       u16 vport,
-			       enum fs_flow_table_op_mod op_mod,
-			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id, u32 flags)
+static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
+				      u16 vport,
+				      enum fs_flow_table_op_mod op_mod,
+				      enum fs_flow_table_type type,
+				      unsigned int level,
+				      unsigned int log_size,
+				      struct mlx5_flow_table *next_ft,
+				      unsigned int *table_id, u32 flags)
 {
 	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
@@ -125,8 +199,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 	return err;
 }
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft)
+static int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
+				       struct mlx5_flow_table *ft)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
@@ -143,9 +217,9 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       struct mlx5_flow_table *next_ft)
+static int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct mlx5_flow_table *next_ft)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
@@ -188,10 +262,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       u32 *in,
-			       unsigned int *group_id)
+static int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      u32 *in,
+				      unsigned int *group_id)
 {
 	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -213,9 +287,9 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
 	return err;
 }
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft,
-				unsigned int group_id)
+static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
+				       struct mlx5_flow_table *ft,
+				       unsigned int group_id)
 {
 	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
@@ -332,19 +406,21 @@ err_out:
 	return err;
 }
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			struct fs_fte *fte)
+static int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       struct mlx5_flow_group *group,
+			       struct fs_fte *fte)
 {
+	unsigned int group_id = group->id;
+
 	return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			int modify_mask,
-			struct fs_fte *fte)
+static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       unsigned int group_id,
+			       int modify_mask,
+			       struct fs_fte *fte)
 {
 	int opmod;
 	int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev,
@@ -357,9 +433,9 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 	return	mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte);
 }
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned int index)
+static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
+			       struct mlx5_flow_table *ft,
+			       unsigned int index)
 {
 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
@@ -610,3 +686,52 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds = {
+	.create_flow_table = mlx5_cmd_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_modify_flow_table,
+	.create_flow_group = mlx5_cmd_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_destroy_flow_group,
+	.create_fte = mlx5_cmd_create_fte,
+	.update_fte = mlx5_cmd_update_fte,
+	.delete_fte = mlx5_cmd_delete_fte,
+	.update_root_ft = mlx5_cmd_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
+	.create_flow_table = mlx5_cmd_stub_create_flow_table,
+	.destroy_flow_table = mlx5_cmd_stub_destroy_flow_table,
+	.modify_flow_table = mlx5_cmd_stub_modify_flow_table,
+	.create_flow_group = mlx5_cmd_stub_create_flow_group,
+	.destroy_flow_group = mlx5_cmd_stub_destroy_flow_group,
+	.create_fte = mlx5_cmd_stub_create_fte,
+	.update_fte = mlx5_cmd_stub_update_fte,
+	.delete_fte = mlx5_cmd_stub_delete_fte,
+	.update_root_ft = mlx5_cmd_stub_update_root_ft,
+};
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
+{
+	return &mlx5_flow_cmds;
+}
+
+static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_stub_cmds(void)
+{
+	return &mlx5_flow_cmd_stubs;
+}
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type)
+{
+	switch (type) {
+	case FS_FT_NIC_RX:
+	case FS_FT_ESW_EGRESS_ACL:
+	case FS_FT_ESW_INGRESS_ACL:
+	case FS_FT_FDB:
+	case FS_FT_SNIFFER_RX:
+	case FS_FT_SNIFFER_TX:
+		return mlx5_fs_cmd_get_fw_cmds();
+	default:
+		return mlx5_fs_cmd_get_stub_cmds();
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 71e2d0f37ad9..81c82f48d93e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -33,46 +33,52 @@
 #ifndef _MLX5_FS_CMD_
 #define _MLX5_FS_CMD_
 
-int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
-			       u16 vport,
-			       enum fs_flow_table_op_mod op_mod,
-			       enum fs_flow_table_type type, unsigned int level,
-			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id, u32 flags);
+#include "fs_core.h"
 
-int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft);
+struct mlx5_flow_cmds {
+	int (*create_flow_table)(struct mlx5_core_dev *dev,
+				 u16 vport,
+				 enum fs_flow_table_op_mod op_mod,
+				 enum fs_flow_table_type type,
+				 unsigned int level, unsigned int log_size,
+				 struct mlx5_flow_table *next_ft,
+				 unsigned int *table_id, u32 flags);
+	int (*destroy_flow_table)(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table *ft);
 
-int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       struct mlx5_flow_table *next_ft);
+	int (*modify_flow_table)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft,
+				 struct mlx5_flow_table *next_ft);
 
-int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
-			       struct mlx5_flow_table *ft,
-			       u32 *in, unsigned int *group_id);
+	int (*create_flow_group)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft,
+				 u32 *in,
+				 unsigned int *group_id);
 
-int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
-				struct mlx5_flow_table *ft,
-				unsigned int group_id);
+	int (*destroy_flow_group)(struct mlx5_core_dev *dev,
+				  struct mlx5_flow_table *ft,
+				  unsigned int group_id);
 
-int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			struct fs_fte *fte);
+	int (*create_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_group *fg,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned group_id,
-			int modify_mask,
-			struct fs_fte *fte);
+	int (*update_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int group_id,
+			  int modify_mask,
+			  struct fs_fte *fte);
 
-int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
-			struct mlx5_flow_table *ft,
-			unsigned int index);
+	int (*delete_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int index);
 
-int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
-			    struct mlx5_flow_table *ft, u32 underlay_qpn,
-			    bool disconnect);
+	int (*update_root_ft)(struct mlx5_core_dev *dev,
+			      struct mlx5_flow_table *ft,
+			      u32 underlay_qpn,
+			      bool disconnect);
+};
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
@@ -90,4 +96,6 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 			  struct mlx5_cmd_fc_bulk *b, u32 id,
 			  u64 *packets, u64 *bytes);
 
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d81da6920be8..f3a654b96b98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -425,15 +425,17 @@ static void del_sw_prio(struct fs_node *node)
 
 static void del_hw_flow_table(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
 	int err;
 
 	fs_get_obj(ft, node);
 	dev = get_dev(&ft->node);
+	root = find_root(&ft->node);
 
 	if (node->active) {
-		err = mlx5_cmd_destroy_flow_table(dev, ft);
+		err = root->cmds->destroy_flow_table(dev, ft);
 		if (err)
 			mlx5_core_warn(dev, "flow steering can't destroy ft\n");
 	}
@@ -454,6 +456,7 @@ static void del_sw_flow_table(struct fs_node *node)
 
 static void del_sw_hw_rule(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_rule *rule;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
@@ -488,8 +491,9 @@ static void del_sw_hw_rule(struct fs_node *node)
 		update_fte = true;
 	}
 out:
+	root = find_root(&ft->node);
 	if (update_fte && fte->dests_size) {
-		err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
+		err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "%s can't del rule fg id=%d fte_index=%d\n",
@@ -500,6 +504,7 @@ out:
 
 static void del_hw_fte(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct mlx5_core_dev *dev;
@@ -512,9 +517,9 @@ static void del_hw_fte(struct fs_node *node)
 
 	trace_mlx5_fs_del_fte(fte);
 	dev = get_dev(&ft->node);
+	root = find_root(&ft->node);
 	if (node->active) {
-		err = mlx5_cmd_delete_fte(dev, ft,
-					  fte->index);
+		err = root->cmds->delete_fte(dev, ft, fte->index);
 		if (err)
 			mlx5_core_warn(dev,
 				       "flow steering can't delete fte in index %d of flow group id %d\n",
@@ -542,6 +547,7 @@ static void del_sw_fte(struct fs_node *node)
 
 static void del_hw_flow_group(struct fs_node *node)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_group *fg;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
@@ -551,7 +557,8 @@ static void del_hw_flow_group(struct fs_node *node)
 	dev = get_dev(&ft->node);
 	trace_mlx5_fs_del_fg(fg);
 
-	if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+	root = find_root(&ft->node);
+	if (fg->node.active && root->cmds->destroy_flow_group(dev, ft, fg->id))
 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
 			       fg->id, ft->id);
 }
@@ -797,15 +804,14 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
 			       struct fs_prio *prio,
 			       struct mlx5_flow_table *ft)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&prio->node);
 	struct mlx5_flow_table *iter;
 	int i = 0;
 	int err;
 
 	fs_for_each_ft(iter, prio) {
 		i++;
-		err = mlx5_cmd_modify_flow_table(dev,
-						 iter,
-						 ft);
+		err = root->cmds->modify_flow_table(dev, iter, ft);
 		if (err) {
 			mlx5_core_warn(dev, "Failed to modify flow table %d\n",
 				       iter->id);
@@ -853,12 +859,12 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false);
+		err = root->cmds->update_root_ft(root->dev, ft, qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = mlx5_cmd_update_root_ft(root->dev, ft, qpn,
-						      false);
+			err = root->cmds->update_root_ft(root->dev, ft,
+							 qpn, false);
 			if (err)
 				break;
 		}
@@ -877,6 +883,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 					 struct mlx5_flow_destination *dest)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
@@ -891,10 +898,9 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	fs_get_obj(ft, fg->node.parent);
 
 	memcpy(&rule->dest_attr, dest, sizeof(*dest));
-	err = mlx5_cmd_update_fte(get_dev(&ft->node),
-				  ft, fg->id,
-				  modify_mask,
-				  fte);
+	root = find_root(&ft->node);
+	err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+				     modify_mask, fte);
 	up_write_ref_node(&fte->node);
 
 	return err;
@@ -1035,9 +1041,9 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
-	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-					 ft->level, log_table_sz, next_ft, &ft->id,
-					 ft->flags);
+	err = root->cmds->create_flow_table(root->dev, ft->vport, ft->op_mod,
+					    ft->type, ft->level, log_table_sz,
+					    next_ft, &ft->id, ft->flags);
 	if (err)
 		goto free_ft;
 
@@ -1053,7 +1059,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	mutex_unlock(&root->chain_lock);
 	return ft;
 destroy_ft:
-	mlx5_cmd_destroy_flow_table(root->dev, ft);
+	root->cmds->destroy_flow_table(root->dev, ft);
 free_ft:
 	kfree(ft);
 unlock_root:
@@ -1125,6 +1131,7 @@ EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 					       u32 *fg_in)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
 					    fg_in, match_criteria);
 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
@@ -1152,7 +1159,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	if (IS_ERR(fg))
 		return fg;
 
-	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+	err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id);
 	if (err) {
 		tree_put_node(&fg->node);
 		return ERR_PTR(err);
@@ -1275,6 +1282,7 @@ add_rule_fte(struct fs_fte *fte,
 	     int dest_num,
 	     bool update_action)
 {
+	struct mlx5_flow_root_namespace *root;
 	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
 	int modify_mask = 0;
@@ -1290,12 +1298,13 @@ add_rule_fte(struct fs_fte *fte,
 		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
 	fs_get_obj(ft, fg->node.parent);
+	root = find_root(&fg->node);
 	if (!(fte->status & FS_FTE_STATUS_EXISTING))
-		err = mlx5_cmd_create_fte(get_dev(&ft->node),
-					  ft, fg->id, fte);
+		err = root->cmds->create_fte(get_dev(&ft->node),
+					     ft, fg, fte);
 	else
-		err = mlx5_cmd_update_fte(get_dev(&ft->node),
-					  ft, fg->id, modify_mask, fte);
+		err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id,
+						     modify_mask, fte);
 	if (err)
 		goto free_handle;
 
@@ -1360,6 +1369,7 @@ out:
 static int create_auto_flow_group(struct mlx5_flow_table *ft,
 				  struct mlx5_flow_group *fg)
 {
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_core_dev *dev = get_dev(&ft->node);
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	void *match_criteria_addr;
@@ -1380,7 +1390,7 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
 	memcpy(match_criteria_addr, fg->mask.match_criteria,
 	       sizeof(fg->mask.match_criteria));
 
-	err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id);
+	err = root->cmds->create_flow_group(dev, ft, in, &fg->id);
 	if (!err) {
 		fg->node.active = true;
 		trace_mlx5_fs_add_fg(fg);
@@ -1912,7 +1922,6 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 		return 0;
 
 	new_root_ft = find_next_ft(ft);
-
 	if (!new_root_ft) {
 		root->root_ft = NULL;
 		return 0;
@@ -1921,13 +1930,14 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 	if (list_empty(&root->underlay_qpns)) {
 		/* Don't set any QPN (zero) in case QPN list is empty */
 		qpn = 0;
-		err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn,
-					      false);
+		err = root->cmds->update_root_ft(root->dev, new_root_ft,
+						 qpn, false);
 	} else {
 		list_for_each_entry(uqp, &root->underlay_qpns, list) {
 			qpn = uqp->qpn;
-			err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
-						      qpn, false);
+			err = root->cmds->update_root_ft(root->dev,
+							 new_root_ft, qpn,
+							 false);
 			if (err)
 				break;
 		}
@@ -2229,10 +2239,11 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
 	return 0;
 }
 
-static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering,
-						       enum fs_flow_table_type
-						       table_type)
+static struct mlx5_flow_root_namespace
+*create_root_ns(struct mlx5_flow_steering *steering,
+		enum fs_flow_table_type table_type)
 {
+	const struct mlx5_flow_cmds *cmds = mlx5_fs_cmd_get_default(table_type);
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_namespace *ns;
 
@@ -2243,6 +2254,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
 
 	root_ns->dev = steering->dev;
 	root_ns->table_type = table_type;
+	root_ns->cmds = cmds;
 
 	INIT_LIST_HEAD(&root_ns->underlay_qpns);
 
@@ -2634,7 +2646,8 @@ int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 		goto update_ft_fail;
 	}
 
-	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false);
+	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+					 false);
 	if (err) {
 		mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
 			       underlay_qpn, err);
@@ -2677,7 +2690,8 @@ int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
 		goto out;
 	}
 
-	err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true);
+	err = root->cmds->update_root_ft(dev, root->root_ft, underlay_qpn,
+					 true);
 	if (err)
 		mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
 			       underlay_qpn, err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 05262708f14b..45791c792296 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -224,6 +224,7 @@ struct mlx5_flow_root_namespace {
 	/* Should be held when chaining flow tables */
 	struct mutex			chain_lock;
 	struct list_head		underlay_qpns;
+	const struct mlx5_flow_cmds	*cmds;
 };
 
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev);

From 5f4183781a303da5ab6731b8c19328c5b9df89fa Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Sun, 18 Feb 2018 13:17:17 +0200
Subject: [PATCH 0691/1678] net/mlx5: Add empty egress namespace to flow
 steering core

Currently, we don't support egress flow steering namespace in mlx5
flow steering core implementation. However, when we want to encrypt
a packet, we model it as a flow steering rule in the egress path.
To overcome this, we add an empty egress namespace to flow steering.
This namespace is initialized only when ipsec support exists.
In the future, this will grow to a full blown full steering
implementation, resembling the ingress path.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/diag/fs_tracepoint.c   |  3 ++
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  1 +
 .../net/ethernet/mellanox/mlx5/core/fs_core.c | 28 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/fs_core.h |  2 ++
 include/linux/mlx5/fs.h                       |  1 +
 include/linux/mlx5/mlx5_ifc.h                 |  1 +
 6 files changed, 36 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 0be4575b58a2..3816b4506561 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -246,6 +246,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
 		trace_seq_printf(p, "counter_id=%u\n", counter_id);
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_PORT:
+		trace_seq_printf(p, "port\n");
+		break;
 	}
 
 	trace_seq_putc(p, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index c3eaddb43e57..ed3ea80a24be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -731,6 +731,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
 	case FS_FT_SNIFFER_RX:
 	case FS_FT_SNIFFER_TX:
 		return mlx5_fs_cmd_get_fw_cmds();
+	case FS_FT_NIC_TX:
 	default:
 		return mlx5_fs_cmd_get_stub_cmds();
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f3a654b96b98..5c111186d103 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -37,6 +37,7 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "diag/fs_tracepoint.h"
+#include "accel/ipsec.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -2049,6 +2050,11 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			return &steering->sniffer_tx_root_ns->ns;
 		else
 			return NULL;
+	case MLX5_FLOW_NAMESPACE_EGRESS:
+		if (steering->egress_root_ns)
+			return &steering->egress_root_ns->ns;
+		else
+			return NULL;
 	default:
 		return NULL;
 	}
@@ -2413,6 +2419,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	cleanup_root_ns(steering->fdb_root_ns);
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
+	cleanup_root_ns(steering->egress_root_ns);
 	mlx5_cleanup_fc_stats(dev);
 	kmem_cache_destroy(steering->ftes_cache);
 	kmem_cache_destroy(steering->fgs_cache);
@@ -2558,6 +2565,20 @@ cleanup_root_ns:
 	return err;
 }
 
+static int init_egress_root_ns(struct mlx5_flow_steering *steering)
+{
+	struct fs_prio *prio;
+
+	steering->egress_root_ns = create_root_ns(steering,
+						  FS_FT_NIC_TX);
+	if (!steering->egress_root_ns)
+		return -ENOMEM;
+
+	/* create 1 prio*/
+	prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
+	return PTR_ERR_OR_ZERO(prio);
+}
+
 int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering;
@@ -2623,6 +2644,13 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
+	if (mlx5_accel_ipsec_device_caps(steering->dev) &
+	    MLX5_ACCEL_IPSEC_DEVICE) {
+		err = init_egress_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
 	return 0;
 err:
 	mlx5_cleanup_fs(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 45791c792296..8586af9ce514 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -48,6 +48,7 @@ enum fs_node_type {
 
 enum fs_flow_table_type {
 	FS_FT_NIC_RX          = 0x0,
+	FS_FT_NIC_TX          = 0x1,
 	FS_FT_ESW_EGRESS_ACL  = 0x2,
 	FS_FT_ESW_INGRESS_ACL = 0x3,
 	FS_FT_FDB             = 0X4,
@@ -75,6 +76,7 @@ struct mlx5_flow_steering {
 	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_rx_root_ns;
+	struct mlx5_flow_root_namespace	*egress_root_ns;
 };
 
 struct fs_node {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index f580bc4c2443..744ea228acea 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -69,6 +69,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_ESW_INGRESS,
 	MLX5_FLOW_NAMESPACE_SNIFFER_RX,
 	MLX5_FLOW_NAMESPACE_SNIFFER_TX,
+	MLX5_FLOW_NAMESPACE_EGRESS,
 };
 
 struct mlx5_flow_table;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f4e417686f62..9bc4ea0cf5a9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1091,6 +1091,7 @@ enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
 	MLX5_FLOW_DESTINATION_TYPE_TIR          = 0x2,
 
+	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
 };
 

From d2ec6a35e834f953790cfea8a4f8a1415d19a431 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 9 Nov 2017 12:12:15 +0000
Subject: [PATCH 0692/1678] net/mlx5: Embed mlx5_flow_act into fs_fte

fte objects contain the match value and action. Currently, extending
the actions require in adding them both to the API and fs_fte.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/diag/fs_tracepoint.h   |  4 ++--
 .../net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 13 +++++-----
 .../net/ethernet/mellanox/mlx5/core/fs_core.c | 24 +++++++++----------
 .../net/ethernet/mellanox/mlx5/core/fs_core.h |  5 +---
 4 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 80eef4163f52..a6ba57fbb414 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -163,9 +163,9 @@ TRACE_EVENT(mlx5_fs_set_fte,
 			   fs_get_obj(__entry->fg, fte->node.parent);
 			   __entry->group_index = __entry->fg->id;
 			   __entry->index = fte->index;
-			   __entry->action = fte->action;
+			   __entry->action = fte->action.action;
 			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-			   __entry->flow_tag = fte->flow_tag;
+			   __entry->flow_tag = fte->action.flow_tag;
 			   memcpy(__entry->mask_outer,
 				  MLX5_ADDR_OF(fte_match_param,
 					       &__entry->fg->mask.match_criteria,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index ed3ea80a24be..d9d7dd439bb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -340,16 +340,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
-	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
-	MLX5_SET(flow_context, in_flow_context, action, fte->action);
-	MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
-	MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id);
+	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
+	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+	MLX5_SET(flow_context, in_flow_context, modify_header_id,
+		 fte->action.modify_id);
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, sizeof(fte->val));
 
 	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
-	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
@@ -375,7 +376,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			 list_size);
 	}
 
-	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
 					log_max_flow_counter,
 					ft->type));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 5c111186d103..2e4a1d4e0cea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -481,12 +481,12 @@ static void del_sw_hw_rule(struct fs_node *node)
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
-		fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		update_fte = true;
 		goto out;
 	}
 
-	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+	if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
 		update_fte = true;
@@ -623,10 +623,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
 
 	memcpy(fte->val, match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
-	fte->flow_tag = flow_act->flow_tag;
-	fte->action = flow_act->action;
-	fte->encap_id = flow_act->encap_id;
-	fte->modify_id = flow_act->modify_id;
+	fte->action = *flow_act;
 
 	tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
 
@@ -892,7 +889,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	int err = 0;
 
 	fs_get_obj(fte, rule->node.parent);
-	if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+	if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return -EINVAL;
 	down_write_ref_node(&fte->node);
 	fs_get_obj(fg, fte->node.parent);
@@ -1448,16 +1445,17 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 
 static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
 {
-	if (check_conflicting_actions(flow_act->action, fte->action)) {
+	if (check_conflicting_actions(flow_act->action, fte->action.action)) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "Found two FTEs with conflicting actions\n");
 		return -EEXIST;
 	}
 
-	if (flow_act->has_flow_tag && fte->flow_tag != flow_act->flow_tag) {
+	if (flow_act->has_flow_tag &&
+	    fte->action.flow_tag != flow_act->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
-			       fte->flow_tag,
+			       fte->action.flow_tag,
 			       flow_act->flow_tag);
 		return -EEXIST;
 	}
@@ -1481,12 +1479,12 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	if (ret)
 		return ERR_PTR(ret);
 
-	old_action = fte->action;
-	fte->action |= flow_act->action;
+	old_action = fte->action.action;
+	fte->action.action |= flow_act->action;
 	handle = add_rule_fte(fte, fg, dest, dest_num,
 			      old_action != flow_act->action);
 	if (IS_ERR(handle)) {
-		fte->action = old_action;
+		fte->action.action = old_action;
 		return handle;
 	}
 	trace_mlx5_fs_set_fte(fte, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 8586af9ce514..e26d3e9d5f9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -176,11 +176,8 @@ struct fs_fte {
 	struct fs_node			node;
 	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	u32				dests_size;
-	u32				flow_tag;
 	u32				index;
-	u32				action;
-	u32				encap_id;
-	u32				modify_id;
+	struct mlx5_flow_act		action;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 	struct rhash_head		hash;

From 3346c4873733a109bea29467308a754038b886a9 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Sun, 20 Aug 2017 15:13:08 +0300
Subject: [PATCH 0693/1678] {net,IB}/mlx5: Add flow steering helpers

Add helper functions that check if a protocol is
part of a flow steering match criteria.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c |   7 +-
 include/linux/mlx5/fs_helpers.h   | 134 ++++++++++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h     |   8 +-
 3 files changed, 143 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/mlx5/fs_helpers.h

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d50ace805995..d9474b95d8e5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -59,6 +59,7 @@
 #include "mlx5_ib.h"
 #include "ib_rep.h"
 #include "cmd.h"
+#include <linux/mlx5/fs_helpers.h>
 
 #define DRIVER_NAME "mlx5_ib"
 #define DRIVER_VERSION "5.0-0"
@@ -2312,8 +2313,6 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 		   offsetof(typeof(filter), field) -\
 		   sizeof(filter.field))
 
-#define IPV4_VERSION 4
-#define IPV6_VERSION 6
 static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			   u32 *match_v, const union ib_flow_spec *ib_spec,
 			   struct mlx5_flow_act *action)
@@ -2399,7 +2398,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ip_version, 0xf);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 ip_version, IPV4_VERSION);
+				 ip_version, MLX5_FS_IPV4_VERSION);
 		} else {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ethertype, 0xffff);
@@ -2438,7 +2437,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ip_version, 0xf);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 ip_version, IPV6_VERSION);
+				 ip_version, MLX5_FS_IPV6_VERSION);
 		} else {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 ethertype, 0xffff);
diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h
new file mode 100644
index 000000000000..7b476bbae731
--- /dev/null
+++ b/include/linux/mlx5/fs_helpers.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MLX5_FS_HELPERS_
+#define _MLX5_FS_HELPERS_
+
+#include <linux/mlx5/mlx5_ifc.h>
+
+#define MLX5_FS_IPV4_VERSION 4
+#define MLX5_FS_IPV6_VERSION 6
+
+static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c,
+						  const u32 *match_v, u8 match)
+{
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+
+	return MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_protocol) == 0xff &&
+		MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol) == match;
+}
+
+static inline bool mlx5_fs_is_outer_tcp_flow(const u32 *match_c,
+					     const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_TCP);
+}
+
+static inline bool mlx5_fs_is_outer_udp_flow(const u32 *match_c,
+					     const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipproto_flow(match_c, match_v, IPPROTO_UDP);
+}
+
+static inline bool mlx5_fs_is_vxlan_flow(const u32 *match_c)
+{
+	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					   misc_parameters);
+
+	return MLX5_GET(fte_match_set_misc, misc_params_c, vxlan_vni);
+}
+
+static inline bool _mlx5_fs_is_outer_ipv_flow(struct mlx5_core_dev *mdev,
+					      const u32 *match_c,
+					      const u32 *match_v, int version)
+{
+	int match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+						  ft_field_support.outer_ip_version);
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					     outer_headers);
+
+	if (!match_ipv) {
+		u16 ethertype;
+
+		switch (version) {
+		case MLX5_FS_IPV4_VERSION:
+			ethertype = ETH_P_IP;
+			break;
+		case MLX5_FS_IPV6_VERSION:
+			ethertype = ETH_P_IPV6;
+			break;
+		default:
+			return false;
+		}
+
+		return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+				ethertype) == 0xffff &&
+			MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+				 ethertype) == ethertype;
+	}
+
+	return MLX5_GET(fte_match_set_lyr_2_4, headers_c,
+			ip_version) == 0xf &&
+		MLX5_GET(fte_match_set_lyr_2_4, headers_v,
+			 ip_version) == version;
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv4_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+			   const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+					  MLX5_FS_IPV4_VERSION);
+}
+
+static inline bool
+mlx5_fs_is_outer_ipv6_flow(struct mlx5_core_dev *mdev, const u32 *match_c,
+			   const u32 *match_v)
+{
+	return _mlx5_fs_is_outer_ipv_flow(mdev, match_c, match_v,
+					  MLX5_FS_IPV6_VERSION);
+}
+
+static inline bool mlx5_fs_is_outer_ipsec_flow(const u32 *match_c)
+{
+	void *misc_params_c =
+			MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters);
+
+	return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+}
+
+#endif
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9bc4ea0cf5a9..14ad84afe8ba 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -295,7 +295,9 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         inner_tcp_dport[0x1];
 	u8         inner_tcp_flags[0x1];
 	u8         reserved_at_37[0x9];
-	u8         reserved_at_40[0x1a];
+	u8         reserved_at_40[0x17];
+	u8	   outer_esp_spi[0x1];
+	u8	   reserved_at_58[0x2];
 	u8         bth_dst_qp[0x1];
 
 	u8         reserved_at_5b[0x25];
@@ -437,7 +439,9 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 
 	u8         reserved_at_120[0x28];
 	u8         bth_dst_qp[0x18];
-	u8         reserved_at_160[0xa0];
+	u8	   reserved_at_160[0x20];
+	u8	   outer_esp_spi[0x20];
+	u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_cmd_pas_bits {

From e810bf5e96e327500cc6334f9d56c8047aaabcff Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Sun, 18 Feb 2018 15:00:54 +0200
Subject: [PATCH 0694/1678] net/mlx5: Flow steering cmd interface should get
 the fte when deleting

Previously, deleting a flow steering entry only got the index.
Since the FPGA implementation of FTE's deletion might need to dig
inside the FTE itself, we would like to get the FTE's context.
Changing the interface to pass the FTE context.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h  | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index d9d7dd439bb9..645f83cac34d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -106,7 +106,7 @@ static int mlx5_cmd_stub_update_fte(struct mlx5_core_dev *dev,
 
 static int mlx5_cmd_stub_delete_fte(struct mlx5_core_dev *dev,
 				    struct mlx5_flow_table *ft,
-				    unsigned int index)
+				    struct fs_fte *fte)
 {
 	return 0;
 }
@@ -436,7 +436,7 @@ static int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
 
 static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 			       struct mlx5_flow_table *ft,
-			       unsigned int index)
+			       struct fs_fte *fte)
 {
 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
@@ -444,7 +444,7 @@ static int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
 	MLX5_SET(delete_fte_in, in, table_type, ft->type);
 	MLX5_SET(delete_fte_in, in, table_id, ft->id);
-	MLX5_SET(delete_fte_in, in, flow_index, index);
+	MLX5_SET(delete_fte_in, in, flow_index, fte->index);
 	if (ft->vport) {
 		MLX5_SET(delete_fte_in, in, vport_number, ft->vport);
 		MLX5_SET(delete_fte_in, in, other_vport, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 81c82f48d93e..6228ba7bfa1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -72,7 +72,7 @@ struct mlx5_flow_cmds {
 
 	int (*delete_fte)(struct mlx5_core_dev *dev,
 			  struct mlx5_flow_table *ft,
-			  unsigned int index);
+			  struct fs_fte *fte);
 
 	int (*update_root_ft)(struct mlx5_core_dev *dev,
 			      struct mlx5_flow_table *ft,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2e4a1d4e0cea..4e456c292ce4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -520,7 +520,7 @@ static void del_hw_fte(struct fs_node *node)
 	dev = get_dev(&ft->node);
 	root = find_root(&ft->node);
 	if (node->active) {
-		err = root->cmds->delete_fte(dev, ft, fte->index);
+		err = root->cmds->delete_fte(dev, ft, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "flow steering can't delete fte in index %d of flow group id %d\n",

From 415a13296a1a49639cabf9d2fe92267810caa47a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 5 Mar 2018 15:49:59 -0600
Subject: [PATCH 0695/1678] xfrm_policy: use true and false for boolean values

Assign true or false to boolean variables instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 12bd415d349e..2b7babb66175 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1740,7 +1740,7 @@ static void xfrm_pcpu_work_fn(struct work_struct *work)
 void xfrm_policy_cache_flush(void)
 {
 	struct xfrm_dst *old;
-	bool found = 0;
+	bool found = false;
 	int cpu;
 
 	might_sleep();

From 30855ffc29b972d3316e2adcf8029c00c36fad60 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 1 Mar 2018 15:23:28 +0300
Subject: [PATCH 0696/1678] net: Make account struct net to memcg

The patch adds SLAB_ACCOUNT to flags of net_cachep cache,
which enables accounting of struct net memory to memcg kmem.
Since number of net_namespaces may be significant, user
want to know, how much there were consumed, and control.

Note, that we do not account net_generic to the same memcg,
where net was accounted, moreover, we don't do this at all (*).
We do not want the situation, when single memcg memory deficit
prevents us to register new pernet_operations.

(*)Even despite there is !current process accounting already
available in linux-next. See kmalloc_memcg() there for the details.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 690e78c6af45..c340d5cfbdec 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -882,7 +882,7 @@ static int __init net_ns_init(void)
 #ifdef CONFIG_NET_NS
 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 					SMP_CACHE_BYTES,
-					SLAB_PANIC, NULL);
+					SLAB_PANIC|SLAB_ACCOUNT, NULL);
 
 	/* Create workqueue for cleanup */
 	netns_wq = create_singlethread_workqueue("netns");

From 5c3d0fd4b2c047cedb1cbdad6e2d6bc9abfec256 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Sat, 3 Mar 2018 23:39:54 +0100
Subject: [PATCH 0697/1678] ravb: remove erroneous comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When addressing a review comment in a early version of the offending
patch a comment where left in which should have been removed. Remove the
comment to keep it consistent with the code.

Fixes: 75efa06f457bbed3 ("ravb: add support for changing MTU")
Reported-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 54a6265da7a0..68f122140966 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -346,7 +346,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
 	int ring_size;
 	int i;
 
-	/* +16 gets room from the status from the card. */
 	priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
 		ETH_HLEN + VLAN_HLEN;
 

From ed63afb8a318f6b3558d76afba7809daee4f28e5 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 5 Mar 2018 20:44:18 +0800
Subject: [PATCH 0698/1678] sctp: add support for PR-SCTP Information for
 sendmsg

This patch is to add support for PR-SCTP Information for sendmsg,
as described in section 5.3.7 of RFC6458.

With this option, you can specify pr_policy and pr_value for user
data in sendmsg.

It's also a necessary send info for sctp_sendv.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 include/uapi/linux/sctp.h  | 15 +++++++++++++++
 net/sctp/socket.c          | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 03e92dda1813..d40a2a329888 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2112,6 +2112,7 @@ struct sctp_cmsgs {
 	struct sctp_initmsg *init;
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
+	struct sctp_prinfo *prinfo;
 };
 
 /* Structure for tracking memory objects */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 4c4db14786bd..0dd1f82a4fa8 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -260,6 +260,19 @@ struct sctp_nxtinfo {
 	sctp_assoc_t nxt_assoc_id;
 };
 
+/* 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+ */
+struct sctp_prinfo {
+	__u16 pr_policy;
+	__u32 pr_value;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -293,6 +306,8 @@ typedef enum sctp_cmsg_type {
 #define SCTP_RCVINFO	SCTP_RCVINFO
 	SCTP_NXTINFO,		/* 5.3.6 SCTP Next Receive Information Structure */
 #define SCTP_NXTINFO	SCTP_NXTINFO
+	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
+#define SCTP_PRINFO	SCTP_PRINFO
 } sctp_cmsg_t;
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7fa76031bb08..fdde697b37e7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1644,6 +1644,12 @@ static int sctp_sendmsg_parse(struct sock *sk, struct sctp_cmsgs *cmsgs,
 		srinfo->sinfo_assoc_id = cmsgs->sinfo->snd_assoc_id;
 	}
 
+	if (cmsgs->prinfo) {
+		srinfo->sinfo_timetolive = cmsgs->prinfo->pr_value;
+		SCTP_PR_SET_POLICY(srinfo->sinfo_flags,
+				   cmsgs->prinfo->pr_policy);
+	}
+
 	sflags = srinfo->sinfo_flags;
 	if (!sflags && msg_len)
 		return 0;
@@ -1901,9 +1907,12 @@ static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
 		sinfo->sinfo_ppid = asoc->default_ppid;
 		sinfo->sinfo_context = asoc->default_context;
 		sinfo->sinfo_assoc_id = sctp_assoc2id(asoc);
+
+		if (!cmsgs->prinfo)
+			sinfo->sinfo_flags = asoc->default_flags;
 	}
 
-	if (!cmsgs->srinfo)
+	if (!cmsgs->srinfo && !cmsgs->prinfo)
 		sinfo->sinfo_timetolive = asoc->default_timetolive;
 }
 
@@ -7749,6 +7758,26 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
+		case SCTP_PRINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.7 SCTP PR-SCTP Information Structure (SCTP_PRINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_PRINFO    struct sctp_prinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_prinfo)))
+				return -EINVAL;
+
+			cmsgs->prinfo = CMSG_DATA(cmsg);
+			if (cmsgs->prinfo->pr_policy & ~SCTP_PR_SCTP_MASK)
+				return -EINVAL;
+
+			if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
+				cmsgs->prinfo->pr_value = 0;
+			break;
 		default:
 			return -EINVAL;
 		}

From 2c0dbaa0c43d04d8d6daf52adb724c5789676b15 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 5 Mar 2018 20:44:19 +0800
Subject: [PATCH 0699/1678] sctp: add support for SCTP_DSTADDRV4/6 Information
 for sendmsg

This patch is to add support for Destination IPv4/6 Address options
for sendmsg, as described in section 5.3.9/10 of RFC6458.

With this option, you can provide more than one destination addrs
to sendmsg when creating asoc, like sctp_connectx.

It's also a necessary send info for sctp_sendv.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 include/uapi/linux/sctp.h  |  6 +++
 net/sctp/socket.c          | 77 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d40a2a329888..ec6e46b7e119 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2113,6 +2113,7 @@ struct sctp_cmsgs {
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
 	struct sctp_prinfo *prinfo;
+	struct msghdr *addrs_msg;
 };
 
 /* Structure for tracking memory objects */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 0dd1f82a4fa8..a1bc35098033 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -308,6 +308,12 @@ typedef enum sctp_cmsg_type {
 #define SCTP_NXTINFO	SCTP_NXTINFO
 	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
 #define SCTP_PRINFO	SCTP_PRINFO
+	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure (RESERVED) */
+#define SCTP_AUTHINFO	SCTP_AUTHINFO
+	SCTP_DSTADDRV4,		/* 5.3.9 SCTP Destination IPv4 Address Structure */
+#define SCTP_DSTADDRV4	SCTP_DSTADDRV4
+	SCTP_DSTADDRV6,		/* 5.3.10 SCTP Destination IPv6 Address Structure */
+#define SCTP_DSTADDRV6	SCTP_DSTADDRV6
 } sctp_cmsg_t;
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fdde697b37e7..067b57a330b1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1676,6 +1676,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 	struct net *net = sock_net(sk);
 	struct sctp_association *asoc;
 	enum sctp_scope scope;
+	struct cmsghdr *cmsg;
 	int err = -EINVAL;
 
 	*tp = NULL;
@@ -1741,6 +1742,67 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 		goto free;
 	}
 
+	if (!cmsgs->addrs_msg)
+		return 0;
+
+	/* sendv addr list parse */
+	for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
+		struct sctp_transport *transport;
+		struct sctp_association *old;
+		union sctp_addr _daddr;
+		int dlen;
+
+		if (cmsg->cmsg_level != IPPROTO_SCTP ||
+		    (cmsg->cmsg_type != SCTP_DSTADDRV4 &&
+		     cmsg->cmsg_type != SCTP_DSTADDRV6))
+			continue;
+
+		daddr = &_daddr;
+		memset(daddr, 0, sizeof(*daddr));
+		dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
+		if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
+			if (dlen < sizeof(struct in_addr))
+				goto free;
+
+			dlen = sizeof(struct in_addr);
+			daddr->v4.sin_family = AF_INET;
+			daddr->v4.sin_port = htons(asoc->peer.port);
+			memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
+		} else {
+			if (dlen < sizeof(struct in6_addr))
+				goto free;
+
+			dlen = sizeof(struct in6_addr);
+			daddr->v6.sin6_family = AF_INET6;
+			daddr->v6.sin6_port = htons(asoc->peer.port);
+			memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
+		}
+		err = sctp_verify_addr(sk, daddr, sizeof(*daddr));
+		if (err)
+			goto free;
+
+		old = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
+		if (old && old != asoc) {
+			if (old->state >= SCTP_STATE_ESTABLISHED)
+				err = -EISCONN;
+			else
+				err = -EALREADY;
+			goto free;
+		}
+
+		if (sctp_endpoint_is_peeled_off(ep, daddr)) {
+			err = -EADDRNOTAVAIL;
+			goto free;
+		}
+
+		transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL,
+						SCTP_UNKNOWN);
+		if (!transport) {
+			err = -ENOMEM;
+			goto free;
+		}
+	}
+
 	return 0;
 
 free:
@@ -7778,6 +7840,21 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 			if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
 				cmsgs->prinfo->pr_value = 0;
 			break;
+		case SCTP_DSTADDRV4:
+		case SCTP_DSTADDRV6:
+			/* SCTP Socket API Extension
+			 * 5.3.9/10 SCTP Destination IPv4/6 Address Structure (SCTP_DSTADDRV4/6)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type         cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_DSTADDRV4 struct in_addr
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_DSTADDRV6 struct in6_addr
+			 */
+			cmsgs->addrs_msg = my_msg;
+			break;
 		default:
 			return -EINVAL;
 		}

From 4910280503f3af2857d5aa77e35b22d93a8960a8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 5 Mar 2018 20:44:20 +0800
Subject: [PATCH 0700/1678] sctp: add support for snd flag SCTP_SENDALL process
 in sendmsg

This patch is to add support for snd flag SCTP_SENDALL process
in sendmsg, as described in section 5.3.4 of RFC6458.

With this flag, you can send the same data to all the asocs of
this sk once.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |  2 ++
 net/sctp/socket.c         | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index a1bc35098033..e94b6d297ad9 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -284,6 +284,8 @@ enum sctp_sinfo_flags {
 	SCTP_ADDR_OVER		= (1 << 1), /* Override the primary destination. */
 	SCTP_ABORT		= (1 << 2), /* Send an ABORT message to the peer. */
 	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
+	/* 2 bits here have been used by SCTP_PR_SCTP_MASK */
+	SCTP_SENDALL		= (1 << 6),
 	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
 	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 067b57a330b1..7d3476a4860d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1820,6 +1820,10 @@ static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
 	if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP))
 		return -EPIPE;
 
+	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP) &&
+	    !sctp_state(asoc, ESTABLISHED))
+		return 0;
+
 	if (sflags & SCTP_EOF) {
 		pr_debug("%s: shutting down association:%p\n", __func__, asoc);
 		sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@ -2007,6 +2011,29 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 
 	lock_sock(sk);
 
+	/* SCTP_SENDALL process */
+	if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) {
+		list_for_each_entry(asoc, &ep->asocs, asocs) {
+			err = sctp_sendmsg_check_sflags(asoc, sflags, msg,
+							msg_len);
+			if (err == 0)
+				continue;
+			if (err < 0)
+				goto out_unlock;
+
+			sctp_sendmsg_update_sinfo(asoc, sinfo, &cmsgs);
+
+			err = sctp_sendmsg_to_asoc(asoc, msg, msg_len,
+						   NULL, sinfo);
+			if (err < 0)
+				goto out_unlock;
+
+			iov_iter_revert(&msg->msg_iter, err);
+		}
+
+		goto out_unlock;
+	}
+
 	/* Get and check or create asoc */
 	if (daddr) {
 		asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport);
@@ -7792,8 +7819,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
 			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-			      SCTP_ABORT | SCTP_EOF))
+			      SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+			      SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
 
@@ -7816,8 +7843,8 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 
 			if (cmsgs->sinfo->snd_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
-			      SCTP_SACK_IMMEDIATELY | SCTP_PR_SCTP_MASK |
-			      SCTP_ABORT | SCTP_EOF))
+			      SCTP_SACK_IMMEDIATELY | SCTP_SENDALL |
+			      SCTP_PR_SCTP_MASK | SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
 		case SCTP_PRINFO:

From 23f0703c125be490f70501b6b24ed5645775c56a Mon Sep 17 00:00:00 2001
From: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Date: Mon, 5 Mar 2018 14:23:30 -0500
Subject: [PATCH 0701/1678] lan743x: Add main source files for new lan743x
 driver

Add main source files for new lan743x driver

Signed-off-by: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/Kconfig        |   10 +
 drivers/net/ethernet/microchip/Makefile       |    3 +
 drivers/net/ethernet/microchip/lan743x_main.c | 2781 +++++++++++++++++
 drivers/net/ethernet/microchip/lan743x_main.h |  597 ++++
 4 files changed, 3391 insertions(+)
 create mode 100644 drivers/net/ethernet/microchip/lan743x_main.c
 create mode 100644 drivers/net/ethernet/microchip/lan743x_main.h

diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig
index 36a09d94b368..71dca8bd51ac 100644
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -42,4 +42,14 @@ config ENCX24J600
       To compile this driver as a module, choose M here. The module will be
       called encx24j600.
 
+config LAN743X
+	tristate "LAN743x support"
+	depends on PCI
+	select PHYLIB
+	---help---
+	  Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip
+
+	  To compile this driver as a module, choose M here. The module will be
+	  called lan743x.
+
 endif # NET_VENDOR_MICROCHIP
diff --git a/drivers/net/ethernet/microchip/Makefile b/drivers/net/ethernet/microchip/Makefile
index ff78f621b59a..2e982cc249fb 100644
--- a/drivers/net/ethernet/microchip/Makefile
+++ b/drivers/net/ethernet/microchip/Makefile
@@ -4,3 +4,6 @@
 
 obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ENCX24J600) += encx24j600.o encx24j600-regmap.o
+obj-$(CONFIG_LAN743X) += lan743x.o
+
+lan743x-objs := lan743x_main.o
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
new file mode 100644
index 000000000000..04022ec5ae1e
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -0,0 +1,2781 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/microchipphy.h>
+#include <linux/net_tstamp.h>
+#include <linux/phy.h>
+#include <linux/rtnetlink.h>
+#include <linux/iopoll.h>
+#include "lan743x_main.h"
+
+static void lan743x_pci_cleanup(struct lan743x_adapter *adapter)
+{
+	pci_release_selected_regions(adapter->pdev,
+				     pci_select_bars(adapter->pdev,
+						     IORESOURCE_MEM));
+	pci_disable_device(adapter->pdev);
+}
+
+static int lan743x_pci_init(struct lan743x_adapter *adapter,
+			    struct pci_dev *pdev)
+{
+	unsigned long bars = 0;
+	int ret;
+
+	adapter->pdev = pdev;
+	ret = pci_enable_device_mem(pdev);
+	if (ret)
+		goto return_error;
+
+	netif_info(adapter, probe, adapter->netdev,
+		   "PCI: Vendor ID = 0x%04X, Device ID = 0x%04X\n",
+		   pdev->vendor, pdev->device);
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	if (!test_bit(0, &bars))
+		goto disable_device;
+
+	ret = pci_request_selected_regions(pdev, bars, DRIVER_NAME);
+	if (ret)
+		goto disable_device;
+
+	pci_set_master(pdev);
+	return 0;
+
+disable_device:
+	pci_disable_device(adapter->pdev);
+
+return_error:
+	return ret;
+}
+
+u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+{
+	return ioread32(&adapter->csr.csr_address[offset]);
+}
+
+void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 data)
+{
+	iowrite32(data, &adapter->csr.csr_address[offset]);
+}
+
+#define LAN743X_CSR_READ_OP(offset)	lan743x_csr_read(adapter, offset)
+
+static int lan743x_csr_light_reset(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	data = lan743x_csr_read(adapter, HW_CFG);
+	data |= HW_CFG_LRST_;
+	lan743x_csr_write(adapter, HW_CFG, data);
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, HW_CFG, data,
+				  !(data & HW_CFG_LRST_), 100000, 10000000);
+}
+
+static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter,
+				    int offset, u32 bit_mask,
+				    int target_value, int usleep_min,
+				    int usleep_max, int count)
+{
+	u32 data;
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, offset, data,
+				  target_value == ((data & bit_mask) ? 1 : 0),
+				  usleep_max, usleep_min * count);
+}
+
+static int lan743x_csr_init(struct lan743x_adapter *adapter)
+{
+	struct lan743x_csr *csr = &adapter->csr;
+	resource_size_t bar_start, bar_length;
+	int result;
+
+	bar_start = pci_resource_start(adapter->pdev, 0);
+	bar_length = pci_resource_len(adapter->pdev, 0);
+	csr->csr_address = devm_ioremap(&adapter->pdev->dev,
+					bar_start, bar_length);
+	if (!csr->csr_address) {
+		result = -ENOMEM;
+		goto clean_up;
+	}
+
+	csr->id_rev = lan743x_csr_read(adapter, ID_REV);
+	csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV);
+	netif_info(adapter, probe, adapter->netdev,
+		   "ID_REV = 0x%08X, FPGA_REV = %d.%d\n",
+		   csr->id_rev,	FPGA_REV_GET_MAJOR_(csr->fpga_rev),
+		   FPGA_REV_GET_MINOR_(csr->fpga_rev));
+	if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) {
+		result = -ENODEV;
+		goto clean_up;
+	}
+
+	csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+	switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) {
+	case ID_REV_CHIP_REV_A0_:
+		csr->flags |= LAN743X_CSR_FLAG_IS_A0;
+		csr->flags &= ~LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR;
+		break;
+	case ID_REV_CHIP_REV_B0_:
+		csr->flags |= LAN743X_CSR_FLAG_IS_B0;
+		break;
+	}
+
+	result = lan743x_csr_light_reset(adapter);
+	if (result)
+		goto clean_up;
+	return 0;
+clean_up:
+	return result;
+}
+
+static void lan743x_intr_software_isr(void *context)
+{
+	struct lan743x_adapter *adapter = context;
+	struct lan743x_intr *intr = &adapter->intr;
+	u32 int_sts;
+
+	int_sts = lan743x_csr_read(adapter, INT_STS);
+	if (int_sts & INT_BIT_SW_GP_) {
+		lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
+		intr->software_isr_flag = 1;
+	}
+}
+
+static void lan743x_tx_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_tx *tx = context;
+	struct lan743x_adapter *adapter = tx->adapter;
+	bool enable_flag = true;
+	u32 int_en = 0;
+
+	int_en = lan743x_csr_read(adapter, INT_EN_SET);
+	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+		lan743x_csr_write(adapter, INT_EN_CLR,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+	}
+
+	if (int_sts & INT_BIT_DMA_TX_(tx->channel_number)) {
+		u32 ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+		u32 dmac_int_sts;
+		u32 dmac_int_en;
+
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+			dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+		else
+			dmac_int_sts = ioc_bit;
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+			dmac_int_en = lan743x_csr_read(adapter,
+						       DMAC_INT_EN_SET);
+		else
+			dmac_int_en = ioc_bit;
+
+		dmac_int_en &= ioc_bit;
+		dmac_int_sts &= dmac_int_en;
+		if (dmac_int_sts & ioc_bit) {
+			napi_schedule(&tx->napi);
+			enable_flag = false;/* poll func will enable later */
+		}
+	}
+
+	if (enable_flag)
+		/* enable isr */
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+}
+
+static void lan743x_rx_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_rx *rx = context;
+	struct lan743x_adapter *adapter = rx->adapter;
+	bool enable_flag = true;
+
+	if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR) {
+		lan743x_csr_write(adapter, INT_EN_CLR,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+
+	if (int_sts & INT_BIT_DMA_RX_(rx->channel_number)) {
+		u32 rx_frame_bit = DMAC_INT_BIT_RXFRM_(rx->channel_number);
+		u32 dmac_int_sts;
+		u32 dmac_int_en;
+
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ)
+			dmac_int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+		else
+			dmac_int_sts = rx_frame_bit;
+		if (flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK)
+			dmac_int_en = lan743x_csr_read(adapter,
+						       DMAC_INT_EN_SET);
+		else
+			dmac_int_en = rx_frame_bit;
+
+		dmac_int_en &= rx_frame_bit;
+		dmac_int_sts &= dmac_int_en;
+		if (dmac_int_sts & rx_frame_bit) {
+			napi_schedule(&rx->napi);
+			enable_flag = false;/* poll funct will enable later */
+		}
+	}
+
+	if (enable_flag) {
+		/* enable isr */
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+}
+
+static void lan743x_intr_shared_isr(void *context, u32 int_sts, u32 flags)
+{
+	struct lan743x_adapter *adapter = context;
+	unsigned int channel;
+
+	if (int_sts & INT_BIT_ALL_RX_) {
+		for (channel = 0; channel < LAN743X_USED_RX_CHANNELS;
+			channel++) {
+			u32 int_bit = INT_BIT_DMA_RX_(channel);
+
+			if (int_sts & int_bit) {
+				lan743x_rx_isr(&adapter->rx[channel],
+					       int_bit, flags);
+				int_sts &= ~int_bit;
+			}
+		}
+	}
+	if (int_sts & INT_BIT_ALL_TX_) {
+		for (channel = 0; channel < LAN743X_USED_TX_CHANNELS;
+			channel++) {
+			u32 int_bit = INT_BIT_DMA_TX_(channel);
+
+			if (int_sts & int_bit) {
+				lan743x_tx_isr(&adapter->tx[channel],
+					       int_bit, flags);
+				int_sts &= ~int_bit;
+			}
+		}
+	}
+	if (int_sts & INT_BIT_ALL_OTHER_) {
+		if (int_sts & INT_BIT_SW_GP_) {
+			lan743x_intr_software_isr(adapter);
+			int_sts &= ~INT_BIT_SW_GP_;
+		}
+	}
+	if (int_sts)
+		lan743x_csr_write(adapter, INT_EN_CLR, int_sts);
+}
+
+static irqreturn_t lan743x_intr_entry_isr(int irq, void *ptr)
+{
+	struct lan743x_vector *vector = ptr;
+	struct lan743x_adapter *adapter = vector->adapter;
+	irqreturn_t result = IRQ_NONE;
+	u32 int_enables;
+	u32 int_sts;
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ) {
+		int_sts = lan743x_csr_read(adapter, INT_STS);
+	} else if (vector->flags &
+		   (LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C |
+		   LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)) {
+		int_sts = lan743x_csr_read(adapter, INT_STS_R2C);
+	} else {
+		/* use mask as implied status */
+		int_sts = vector->int_mask | INT_BIT_MAS_;
+	}
+
+	if (!(int_sts & INT_BIT_MAS_))
+		goto irq_done;
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR)
+		/* disable vector interrupt */
+		lan743x_csr_write(adapter,
+				  INT_VEC_EN_CLR,
+				  INT_VEC_EN_(vector->vector_index));
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR)
+		/* disable master interrupt */
+		lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK) {
+		int_enables = lan743x_csr_read(adapter, INT_EN_SET);
+	} else {
+		/*  use vector mask as implied enable mask */
+		int_enables = vector->int_mask;
+	}
+
+	int_sts &= int_enables;
+	int_sts &= vector->int_mask;
+	if (int_sts) {
+		if (vector->handler) {
+			vector->handler(vector->context,
+					int_sts, vector->flags);
+		} else {
+			/* disable interrupts on this vector */
+			lan743x_csr_write(adapter, INT_EN_CLR,
+					  vector->int_mask);
+		}
+		result = IRQ_HANDLED;
+	}
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET)
+		/* enable master interrupt */
+		lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+
+	if (vector->flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET)
+		/* enable vector interrupt */
+		lan743x_csr_write(adapter,
+				  INT_VEC_EN_SET,
+				  INT_VEC_EN_(vector->vector_index));
+irq_done:
+	return result;
+}
+
+static int lan743x_intr_test_isr(struct lan743x_adapter *adapter)
+{
+	struct lan743x_intr *intr = &adapter->intr;
+	int result = -ENODEV;
+	int timeout = 10;
+
+	intr->software_isr_flag = 0;
+
+	/* enable interrupt */
+	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_SW_GP_);
+
+	/* activate interrupt here */
+	lan743x_csr_write(adapter, INT_SET, INT_BIT_SW_GP_);
+	while ((timeout > 0) && (!(intr->software_isr_flag))) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+
+	if (intr->software_isr_flag)
+		result = 0;
+
+	/* disable interrupts */
+	lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_);
+	return result;
+}
+
+static int lan743x_intr_register_isr(struct lan743x_adapter *adapter,
+				     int vector_index, u32 flags,
+				     u32 int_mask,
+				     lan743x_vector_handler handler,
+				     void *context)
+{
+	struct lan743x_vector *vector = &adapter->intr.vector_list
+					[vector_index];
+	int ret;
+
+	vector->adapter = adapter;
+	vector->flags = flags;
+	vector->vector_index = vector_index;
+	vector->int_mask = int_mask;
+	vector->handler = handler;
+	vector->context = context;
+
+	ret = request_irq(vector->irq,
+			  lan743x_intr_entry_isr,
+			  (flags & LAN743X_VECTOR_FLAG_IRQ_SHARED) ?
+			  IRQF_SHARED : 0, DRIVER_NAME, vector);
+	if (ret) {
+		vector->handler = NULL;
+		vector->context = NULL;
+		vector->int_mask = 0;
+		vector->flags = 0;
+	}
+	return ret;
+}
+
+static void lan743x_intr_unregister_isr(struct lan743x_adapter *adapter,
+					int vector_index)
+{
+	struct lan743x_vector *vector = &adapter->intr.vector_list
+					[vector_index];
+
+	free_irq(vector->irq, vector);
+	vector->handler = NULL;
+	vector->context = NULL;
+	vector->int_mask = 0;
+	vector->flags = 0;
+}
+
+static u32 lan743x_intr_get_vector_flags(struct lan743x_adapter *adapter,
+					 u32 int_mask)
+{
+	int index;
+
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+		if (adapter->intr.vector_list[index].int_mask & int_mask)
+			return adapter->intr.vector_list[index].flags;
+	}
+	return 0;
+}
+
+static void lan743x_intr_close(struct lan743x_adapter *adapter)
+{
+	struct lan743x_intr *intr = &adapter->intr;
+	int index = 0;
+
+	lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_MAS_);
+	lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0x000000FF);
+
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++) {
+		if (intr->flags & INTR_FLAG_IRQ_REQUESTED(index)) {
+			lan743x_intr_unregister_isr(adapter, index);
+			intr->flags &= ~INTR_FLAG_IRQ_REQUESTED(index);
+		}
+	}
+
+	if (intr->flags & INTR_FLAG_MSI_ENABLED) {
+		pci_disable_msi(adapter->pdev);
+		intr->flags &= ~INTR_FLAG_MSI_ENABLED;
+	}
+
+	if (intr->flags & INTR_FLAG_MSIX_ENABLED) {
+		pci_disable_msix(adapter->pdev);
+		intr->flags &= ~INTR_FLAG_MSIX_ENABLED;
+	}
+}
+
+static int lan743x_intr_open(struct lan743x_adapter *adapter)
+{
+	struct msix_entry msix_entries[LAN743X_MAX_VECTOR_COUNT];
+	struct lan743x_intr *intr = &adapter->intr;
+	u32 int_vec_en_auto_clr = 0;
+	u32 int_vec_map0 = 0;
+	u32 int_vec_map1 = 0;
+	int ret = -ENODEV;
+	int index = 0;
+	u32 flags = 0;
+
+	intr->number_of_vectors = 0;
+
+	/* Try to set up MSIX interrupts */
+	memset(&msix_entries[0], 0,
+	       sizeof(struct msix_entry) * LAN743X_MAX_VECTOR_COUNT);
+	for (index = 0; index < LAN743X_MAX_VECTOR_COUNT; index++)
+		msix_entries[index].entry = index;
+	ret = pci_enable_msix_range(adapter->pdev,
+				    msix_entries, 1,
+				    1 + LAN743X_USED_TX_CHANNELS +
+				    LAN743X_USED_RX_CHANNELS);
+
+	if (ret > 0) {
+		intr->flags |= INTR_FLAG_MSIX_ENABLED;
+		intr->number_of_vectors = ret;
+		intr->using_vectors = true;
+		for (index = 0; index < intr->number_of_vectors; index++)
+			intr->vector_list[index].irq = msix_entries
+						       [index].vector;
+		netif_info(adapter, ifup, adapter->netdev,
+			   "using MSIX interrupts, number of vectors = %d\n",
+			   intr->number_of_vectors);
+	}
+
+	/* If MSIX failed try to setup using MSI interrupts */
+	if (!intr->number_of_vectors) {
+		if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+			if (!pci_enable_msi(adapter->pdev)) {
+				intr->flags |= INTR_FLAG_MSI_ENABLED;
+				intr->number_of_vectors = 1;
+				intr->using_vectors = true;
+				intr->vector_list[0].irq =
+					adapter->pdev->irq;
+				netif_info(adapter, ifup, adapter->netdev,
+					   "using MSI interrupts, number of vectors = %d\n",
+					   intr->number_of_vectors);
+			}
+		}
+	}
+
+	/* If MSIX, and MSI failed, setup using legacy interrupt */
+	if (!intr->number_of_vectors) {
+		intr->number_of_vectors = 1;
+		intr->using_vectors = false;
+		intr->vector_list[0].irq = intr->irq;
+		netif_info(adapter, ifup, adapter->netdev,
+			   "using legacy interrupts\n");
+	}
+
+	/* At this point we must have at least one irq */
+	lan743x_csr_write(adapter, INT_VEC_EN_CLR, 0xFFFFFFFF);
+
+	/* map all interrupts to vector 0 */
+	lan743x_csr_write(adapter, INT_VEC_MAP0, 0x00000000);
+	lan743x_csr_write(adapter, INT_VEC_MAP1, 0x00000000);
+	lan743x_csr_write(adapter, INT_VEC_MAP2, 0x00000000);
+	flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+		LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+		LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+		LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+
+	if (intr->using_vectors) {
+		flags |= LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			 LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+	} else {
+		flags |= LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR |
+			 LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET |
+			 LAN743X_VECTOR_FLAG_IRQ_SHARED;
+	}
+
+	if (adapter->csr.flags & LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR;
+		flags &= ~LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK;
+		flags |= LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C;
+		flags |= LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C;
+	}
+
+	ret = lan743x_intr_register_isr(adapter, 0, flags,
+					INT_BIT_ALL_RX_ | INT_BIT_ALL_TX_ |
+					INT_BIT_ALL_OTHER_,
+					lan743x_intr_shared_isr, adapter);
+	if (ret)
+		goto clean_up;
+	intr->flags |= INTR_FLAG_IRQ_REQUESTED(0);
+
+	if (intr->using_vectors)
+		lan743x_csr_write(adapter, INT_VEC_EN_SET,
+				  INT_VEC_EN_(0));
+
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		lan743x_csr_write(adapter, INT_MOD_CFG0, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG1, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG2, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG3, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG4, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG5, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG6, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_CFG7, LAN743X_INT_MOD);
+		lan743x_csr_write(adapter, INT_MOD_MAP0, 0x00005432);
+		lan743x_csr_write(adapter, INT_MOD_MAP1, 0x00000001);
+		lan743x_csr_write(adapter, INT_MOD_MAP2, 0x00FFFFFF);
+	}
+
+	/* enable interrupts */
+	lan743x_csr_write(adapter, INT_EN_SET, INT_BIT_MAS_);
+	ret = lan743x_intr_test_isr(adapter);
+	if (ret)
+		goto clean_up;
+
+	if (intr->number_of_vectors > 1) {
+		int number_of_tx_vectors = intr->number_of_vectors - 1;
+
+		if (number_of_tx_vectors > LAN743X_USED_TX_CHANNELS)
+			number_of_tx_vectors = LAN743X_USED_TX_CHANNELS;
+		flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+			LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+		if (adapter->csr.flags &
+		   LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+		}
+
+		for (index = 0; index < number_of_tx_vectors; index++) {
+			u32 int_bit = INT_BIT_DMA_TX_(index);
+			int vector = index + 1;
+
+			/* map TX interrupt to vector */
+			int_vec_map1 |= INT_VEC_MAP1_TX_VEC_(index, vector);
+			lan743x_csr_write(adapter, INT_VEC_MAP1, int_vec_map1);
+			if (flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+						  int_vec_en_auto_clr);
+			}
+
+			/* Remove TX interrupt from shared mask */
+			intr->vector_list[0].int_mask &= ~int_bit;
+			ret = lan743x_intr_register_isr(adapter, vector, flags,
+							int_bit, lan743x_tx_isr,
+							&adapter->tx[index]);
+			if (ret)
+				goto clean_up;
+			intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+			if (!(flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET))
+				lan743x_csr_write(adapter, INT_VEC_EN_SET,
+						  INT_VEC_EN_(vector));
+		}
+	}
+	if ((intr->number_of_vectors - LAN743X_USED_TX_CHANNELS) > 1) {
+		int number_of_rx_vectors = intr->number_of_vectors -
+					   LAN743X_USED_TX_CHANNELS - 1;
+
+		if (number_of_rx_vectors > LAN743X_USED_RX_CHANNELS)
+			number_of_rx_vectors = LAN743X_USED_RX_CHANNELS;
+
+		flags = LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ |
+			LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK |
+			LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR |
+			LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET;
+
+		if (adapter->csr.flags &
+		    LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) {
+			flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET |
+				LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR |
+				LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR;
+		}
+		for (index = 0; index < number_of_rx_vectors; index++) {
+			int vector = index + 1 + LAN743X_USED_TX_CHANNELS;
+			u32 int_bit = INT_BIT_DMA_RX_(index);
+
+			/* map RX interrupt to vector */
+			int_vec_map0 |= INT_VEC_MAP0_RX_VEC_(index, vector);
+			lan743x_csr_write(adapter, INT_VEC_MAP0, int_vec_map0);
+			if (flags &
+			    LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) {
+				int_vec_en_auto_clr |= INT_VEC_EN_(vector);
+				lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR,
+						  int_vec_en_auto_clr);
+			}
+
+			/* Remove RX interrupt from shared mask */
+			intr->vector_list[0].int_mask &= ~int_bit;
+			ret = lan743x_intr_register_isr(adapter, vector, flags,
+							int_bit, lan743x_rx_isr,
+							&adapter->rx[index]);
+			if (ret)
+				goto clean_up;
+			intr->flags |= INTR_FLAG_IRQ_REQUESTED(vector);
+
+			lan743x_csr_write(adapter, INT_VEC_EN_SET,
+					  INT_VEC_EN_(vector));
+		}
+	}
+	return 0;
+
+clean_up:
+	lan743x_intr_close(adapter);
+	return ret;
+}
+
+static int lan743x_dp_write(struct lan743x_adapter *adapter,
+			    u32 select, u32 addr, u32 length, u32 *buf)
+{
+	int ret = -EIO;
+	u32 dp_sel;
+	int i;
+
+	mutex_lock(&adapter->dp_lock);
+	if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+				     1, 40, 100, 100))
+		goto unlock;
+	dp_sel = lan743x_csr_read(adapter, DP_SEL);
+	dp_sel &= ~DP_SEL_MASK_;
+	dp_sel |= select;
+	lan743x_csr_write(adapter, DP_SEL, dp_sel);
+
+	for (i = 0; i < length; i++) {
+		lan743x_csr_write(adapter, DP_ADDR, addr + i);
+		lan743x_csr_write(adapter, DP_DATA_0, buf[i]);
+		lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
+		if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
+					     1, 40, 100, 100))
+			goto unlock;
+	}
+	ret = 0;
+
+unlock:
+	mutex_unlock(&adapter->dp_lock);
+	return ret;
+}
+
+static u32 lan743x_mac_mii_access(u16 id, u16 index, int read)
+{
+	u32 ret;
+
+	ret = (id << MAC_MII_ACC_PHY_ADDR_SHIFT_) &
+		MAC_MII_ACC_PHY_ADDR_MASK_;
+	ret |= (index << MAC_MII_ACC_MIIRINDA_SHIFT_) &
+		MAC_MII_ACC_MIIRINDA_MASK_;
+
+	if (read)
+		ret |= MAC_MII_ACC_MII_READ_;
+	else
+		ret |= MAC_MII_ACC_MII_WRITE_;
+	ret |= MAC_MII_ACC_MII_BUSY_;
+
+	return ret;
+}
+
+static int lan743x_mac_mii_wait_till_not_busy(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, MAC_MII_ACC, data,
+				  !(data & MAC_MII_ACC_MII_BUSY_), 0, 1000000);
+}
+
+static int lan743x_mdiobus_read(struct mii_bus *bus, int phy_id, int index)
+{
+	struct lan743x_adapter *adapter = bus->priv;
+	u32 val, mii_access;
+	int ret;
+
+	/* comfirm MII not busy */
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	/* set the address, index & direction (read from PHY) */
+	mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_READ);
+	lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+
+	val = lan743x_csr_read(adapter, MAC_MII_DATA);
+	return (int)(val & 0xFFFF);
+}
+
+static int lan743x_mdiobus_write(struct mii_bus *bus,
+				 int phy_id, int index, u16 regval)
+{
+	struct lan743x_adapter *adapter = bus->priv;
+	u32 val, mii_access;
+	int ret;
+
+	/* confirm MII not busy */
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	if (ret < 0)
+		return ret;
+	val = (u32)regval;
+	lan743x_csr_write(adapter, MAC_MII_DATA, val);
+
+	/* set the address, index & direction (write to PHY) */
+	mii_access = lan743x_mac_mii_access(phy_id, index, MAC_MII_WRITE);
+	lan743x_csr_write(adapter, MAC_MII_ACC, mii_access);
+	ret = lan743x_mac_mii_wait_till_not_busy(adapter);
+	return ret;
+}
+
+static void lan743x_mac_set_address(struct lan743x_adapter *adapter,
+				    u8 *addr)
+{
+	u32 addr_lo, addr_hi;
+
+	addr_lo = addr[0] |
+		addr[1] << 8 |
+		addr[2] << 16 |
+		addr[3] << 24;
+	addr_hi = addr[4] |
+		addr[5] << 8;
+	lan743x_csr_write(adapter, MAC_RX_ADDRL, addr_lo);
+	lan743x_csr_write(adapter, MAC_RX_ADDRH, addr_hi);
+
+	ether_addr_copy(adapter->mac_address, addr);
+	netif_info(adapter, drv, adapter->netdev,
+		   "MAC address set to %pM\n", addr);
+}
+
+static int lan743x_mac_init(struct lan743x_adapter *adapter)
+{
+	bool mac_address_valid = true;
+	struct net_device *netdev;
+	u32 mac_addr_hi = 0;
+	u32 mac_addr_lo = 0;
+	u32 data;
+	int ret;
+
+	netdev = adapter->netdev;
+	lan743x_csr_write(adapter, MAC_CR, MAC_CR_RST_);
+	ret = lan743x_csr_wait_for_bit(adapter, MAC_CR, MAC_CR_RST_,
+				       0, 1000, 20000, 100);
+	if (ret)
+		return ret;
+
+	/* setup auto duplex, and speed detection */
+	data = lan743x_csr_read(adapter, MAC_CR);
+	data |= MAC_CR_ADD_ | MAC_CR_ASD_;
+	data |= MAC_CR_CNTR_RST_;
+	lan743x_csr_write(adapter, MAC_CR, data);
+
+	mac_addr_hi = lan743x_csr_read(adapter, MAC_RX_ADDRH);
+	mac_addr_lo = lan743x_csr_read(adapter, MAC_RX_ADDRL);
+	adapter->mac_address[0] = mac_addr_lo & 0xFF;
+	adapter->mac_address[1] = (mac_addr_lo >> 8) & 0xFF;
+	adapter->mac_address[2] = (mac_addr_lo >> 16) & 0xFF;
+	adapter->mac_address[3] = (mac_addr_lo >> 24) & 0xFF;
+	adapter->mac_address[4] = mac_addr_hi & 0xFF;
+	adapter->mac_address[5] = (mac_addr_hi >> 8) & 0xFF;
+
+	if (((mac_addr_hi & 0x0000FFFF) == 0x0000FFFF) &&
+	    mac_addr_lo == 0xFFFFFFFF) {
+		mac_address_valid = false;
+	} else if (!is_valid_ether_addr(adapter->mac_address)) {
+		mac_address_valid = false;
+	}
+
+	if (!mac_address_valid)
+		random_ether_addr(adapter->mac_address);
+	lan743x_mac_set_address(adapter, adapter->mac_address);
+	ether_addr_copy(netdev->dev_addr, adapter->mac_address);
+	return 0;
+}
+
+static int lan743x_mac_open(struct lan743x_adapter *adapter)
+{
+	int ret = 0;
+	u32 temp;
+
+	temp = lan743x_csr_read(adapter, MAC_RX);
+	lan743x_csr_write(adapter, MAC_RX, temp | MAC_RX_RXEN_);
+	temp = lan743x_csr_read(adapter, MAC_TX);
+	lan743x_csr_write(adapter, MAC_TX, temp | MAC_TX_TXEN_);
+	return ret;
+}
+
+static void lan743x_mac_close(struct lan743x_adapter *adapter)
+{
+	u32 temp;
+
+	temp = lan743x_csr_read(adapter, MAC_TX);
+	temp &= ~MAC_TX_TXEN_;
+	lan743x_csr_write(adapter, MAC_TX, temp);
+	lan743x_csr_wait_for_bit(adapter, MAC_TX, MAC_TX_TXD_,
+				 1, 1000, 20000, 100);
+
+	temp = lan743x_csr_read(adapter, MAC_RX);
+	temp &= ~MAC_RX_RXEN_;
+	lan743x_csr_write(adapter, MAC_RX, temp);
+	lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+				 1, 1000, 20000, 100);
+}
+
+static void lan743x_mac_flow_ctrl_set_enables(struct lan743x_adapter *adapter,
+					      bool tx_enable, bool rx_enable)
+{
+	u32 flow_setting = 0;
+
+	/* set maximum pause time because when fifo space frees
+	 * up a zero value pause frame will be sent to release the pause
+	 */
+	flow_setting = MAC_FLOW_CR_FCPT_MASK_;
+	if (tx_enable)
+		flow_setting |= MAC_FLOW_CR_TX_FCEN_;
+	if (rx_enable)
+		flow_setting |= MAC_FLOW_CR_RX_FCEN_;
+	lan743x_csr_write(adapter, MAC_FLOW, flow_setting);
+}
+
+static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu)
+{
+	int enabled = 0;
+	u32 mac_rx = 0;
+
+	mac_rx = lan743x_csr_read(adapter, MAC_RX);
+	if (mac_rx & MAC_RX_RXEN_) {
+		enabled = 1;
+		if (mac_rx & MAC_RX_RXD_) {
+			lan743x_csr_write(adapter, MAC_RX, mac_rx);
+			mac_rx &= ~MAC_RX_RXD_;
+		}
+		mac_rx &= ~MAC_RX_RXEN_;
+		lan743x_csr_write(adapter, MAC_RX, mac_rx);
+		lan743x_csr_wait_for_bit(adapter, MAC_RX, MAC_RX_RXD_,
+					 1, 1000, 20000, 100);
+		lan743x_csr_write(adapter, MAC_RX, mac_rx | MAC_RX_RXD_);
+	}
+
+	mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_);
+	mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) &
+		  MAC_RX_MAX_SIZE_MASK_);
+	lan743x_csr_write(adapter, MAC_RX, mac_rx);
+
+	if (enabled) {
+		mac_rx |= MAC_RX_RXEN_;
+		lan743x_csr_write(adapter, MAC_RX, mac_rx);
+	}
+	return 0;
+}
+
+/* PHY */
+static int lan743x_phy_reset(struct lan743x_adapter *adapter)
+{
+	u32 data;
+
+	/* Only called with in probe, and before mdiobus_register */
+
+	data = lan743x_csr_read(adapter, PMT_CTL);
+	data |= PMT_CTL_ETH_PHY_RST_;
+	lan743x_csr_write(adapter, PMT_CTL, data);
+
+	return readx_poll_timeout(LAN743X_CSR_READ_OP, PMT_CTL, data,
+				  (!(data & PMT_CTL_ETH_PHY_RST_) &&
+				  (data & PMT_CTL_READY_)),
+				  50000, 1000000);
+}
+
+static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
+					   u8 duplex, u16 local_adv,
+					   u16 remote_adv)
+{
+	struct lan743x_phy *phy = &adapter->phy;
+	u8 cap;
+
+	if (phy->fc_autoneg)
+		cap = mii_resolve_flowctrl_fdx(local_adv, remote_adv);
+	else
+		cap = phy->fc_request_control;
+
+	lan743x_mac_flow_ctrl_set_enables(adapter,
+					  cap & FLOW_CTRL_TX,
+					  cap & FLOW_CTRL_RX);
+}
+
+static int lan743x_phy_init(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev;
+	int ret;
+
+	netdev = adapter->netdev;
+	ret = lan743x_phy_reset(adapter);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void lan743x_phy_link_status_change(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+
+	phy_print_status(phydev);
+	if (phydev->state == PHY_RUNNING) {
+		struct ethtool_link_ksettings ksettings;
+		struct lan743x_phy *phy = NULL;
+		int remote_advertisement = 0;
+		int local_advertisement = 0;
+
+		phy = &adapter->phy;
+		memset(&ksettings, 0, sizeof(ksettings));
+		phy_ethtool_get_link_ksettings(netdev, &ksettings);
+		local_advertisement = phy_read(phydev, MII_ADVERTISE);
+		if (local_advertisement < 0)
+			return;
+
+		remote_advertisement = phy_read(phydev, MII_LPA);
+		if (remote_advertisement < 0)
+			return;
+
+		lan743x_phy_update_flowcontrol(adapter,
+					       ksettings.base.duplex,
+					       local_advertisement,
+					       remote_advertisement);
+	}
+}
+
+static void lan743x_phy_close(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	phy_stop(netdev->phydev);
+	phy_disconnect(netdev->phydev);
+	netdev->phydev = NULL;
+}
+
+static int lan743x_phy_open(struct lan743x_adapter *adapter)
+{
+	struct lan743x_phy *phy = &adapter->phy;
+	struct phy_device *phydev;
+	struct net_device *netdev;
+	int ret = -EIO;
+	u32 mii_adv;
+
+	netdev = adapter->netdev;
+	phydev = phy_find_first(adapter->mdiobus);
+	if (!phydev)
+		goto return_error;
+
+	ret = phy_connect_direct(netdev, phydev,
+				 lan743x_phy_link_status_change,
+				 PHY_INTERFACE_MODE_GMII);
+	if (ret)
+		goto return_error;
+
+	/* MAC doesn't support 1000T Half */
+	phydev->supported &= ~SUPPORTED_1000baseT_Half;
+
+	/* support both flow controls */
+	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
+	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+	mii_adv = (u32)mii_advertise_flowctrl(phy->fc_request_control);
+	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
+	phy->fc_autoneg = phydev->autoneg;
+
+	phy_start(phydev);
+	phy_start_aneg(phydev);
+	return 0;
+
+return_error:
+	return ret;
+}
+
+static void lan743x_rfe_update_mac_address(struct lan743x_adapter *adapter)
+{
+	u8 *mac_addr;
+	u32 mac_addr_hi = 0;
+	u32 mac_addr_lo = 0;
+
+	/* Add mac address to perfect Filter */
+	mac_addr = adapter->mac_address;
+	mac_addr_lo = ((((u32)(mac_addr[0])) << 0) |
+		      (((u32)(mac_addr[1])) << 8) |
+		      (((u32)(mac_addr[2])) << 16) |
+		      (((u32)(mac_addr[3])) << 24));
+	mac_addr_hi = ((((u32)(mac_addr[4])) << 0) |
+		      (((u32)(mac_addr[5])) << 8));
+
+	lan743x_csr_write(adapter, RFE_ADDR_FILT_LO(0), mac_addr_lo);
+	lan743x_csr_write(adapter, RFE_ADDR_FILT_HI(0),
+			  mac_addr_hi | RFE_ADDR_FILT_HI_VALID_);
+}
+
+static void lan743x_rfe_set_multicast(struct lan743x_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	u32 hash_table[DP_SEL_VHF_HASH_LEN];
+	u32 rfctl;
+	u32 data;
+
+	rfctl = lan743x_csr_read(adapter, RFE_CTL);
+	rfctl &= ~(RFE_CTL_AU_ | RFE_CTL_AM_ |
+		 RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_);
+	rfctl |= RFE_CTL_AB_;
+	if (netdev->flags & IFF_PROMISC) {
+		rfctl |= RFE_CTL_AM_ | RFE_CTL_AU_;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI)
+			rfctl |= RFE_CTL_AM_;
+	}
+
+	memset(hash_table, 0, DP_SEL_VHF_HASH_LEN * sizeof(u32));
+	if (netdev_mc_count(netdev)) {
+		struct netdev_hw_addr *ha;
+		int i;
+
+		rfctl |= RFE_CTL_DA_PERFECT_;
+		i = 1;
+		netdev_for_each_mc_addr(ha, netdev) {
+			/* set first 32 into Perfect Filter */
+			if (i < 33) {
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_HI(i), 0);
+				data = ha->addr[3];
+				data = ha->addr[2] | (data << 8);
+				data = ha->addr[1] | (data << 8);
+				data = ha->addr[0] | (data << 8);
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_LO(i), data);
+				data = ha->addr[5];
+				data = ha->addr[4] | (data << 8);
+				data |= RFE_ADDR_FILT_HI_VALID_;
+				lan743x_csr_write(adapter,
+						  RFE_ADDR_FILT_HI(i), data);
+			} else {
+				u32 bitnum = (ether_crc(ETH_ALEN, ha->addr) >>
+					     23) & 0x1FF;
+				hash_table[bitnum / 32] |= (1 << (bitnum % 32));
+				rfctl |= RFE_CTL_MCAST_HASH_;
+			}
+			i++;
+		}
+	}
+
+	lan743x_dp_write(adapter, DP_SEL_RFE_RAM,
+			 DP_SEL_VHF_VLAN_LEN,
+			 DP_SEL_VHF_HASH_LEN, hash_table);
+	lan743x_csr_write(adapter, RFE_CTL, rfctl);
+}
+
+static int lan743x_dmac_init(struct lan743x_adapter *adapter)
+{
+	u32 data = 0;
+
+	lan743x_csr_write(adapter, DMAC_CMD, DMAC_CMD_SWR_);
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD, DMAC_CMD_SWR_,
+				 0, 1000, 20000, 100);
+	switch (DEFAULT_DMA_DESCRIPTOR_SPACING) {
+	case DMA_DESCRIPTOR_SPACING_16:
+		data = DMAC_CFG_MAX_DSPACE_16_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_32:
+		data = DMAC_CFG_MAX_DSPACE_32_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_64:
+		data = DMAC_CFG_MAX_DSPACE_64_;
+		break;
+	case DMA_DESCRIPTOR_SPACING_128:
+		data = DMAC_CFG_MAX_DSPACE_128_;
+		break;
+	default:
+		return -EPERM;
+	}
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= DMAC_CFG_COAL_EN_;
+	data |= DMAC_CFG_CH_ARB_SEL_RX_HIGH_;
+	data |= DMAC_CFG_MAX_READ_REQ_SET_(6);
+	lan743x_csr_write(adapter, DMAC_CFG, data);
+	data = DMAC_COAL_CFG_TIMER_LIMIT_SET_(1);
+	data |= DMAC_COAL_CFG_TIMER_TX_START_;
+	data |= DMAC_COAL_CFG_FLUSH_INTS_;
+	data |= DMAC_COAL_CFG_INT_EXIT_COAL_;
+	data |= DMAC_COAL_CFG_CSR_EXIT_COAL_;
+	data |= DMAC_COAL_CFG_TX_THRES_SET_(0x0A);
+	data |= DMAC_COAL_CFG_RX_THRES_SET_(0x0C);
+	lan743x_csr_write(adapter, DMAC_COAL_CFG, data);
+	data = DMAC_OBFF_TX_THRES_SET_(0x08);
+	data |= DMAC_OBFF_RX_THRES_SET_(0x0A);
+	lan743x_csr_write(adapter, DMAC_OBFF_CFG, data);
+	return 0;
+}
+
+static int lan743x_dmac_tx_get_state(struct lan743x_adapter *adapter,
+				     int tx_channel)
+{
+	u32 dmac_cmd = 0;
+
+	dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+	return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+				      DMAC_CMD_START_T_(tx_channel)),
+				      (dmac_cmd &
+				      DMAC_CMD_STOP_T_(tx_channel)));
+}
+
+static int lan743x_dmac_tx_wait_till_stopped(struct lan743x_adapter *adapter,
+					     int tx_channel)
+{
+	int timeout = 100;
+	int result = 0;
+
+	while (timeout &&
+	       ((result = lan743x_dmac_tx_get_state(adapter, tx_channel)) ==
+	       DMAC_CHANNEL_STATE_STOP_PENDING)) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+	if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+		result = -ENODEV;
+	return result;
+}
+
+static int lan743x_dmac_rx_get_state(struct lan743x_adapter *adapter,
+				     int rx_channel)
+{
+	u32 dmac_cmd = 0;
+
+	dmac_cmd = lan743x_csr_read(adapter, DMAC_CMD);
+	return DMAC_CHANNEL_STATE_SET((dmac_cmd &
+				      DMAC_CMD_START_R_(rx_channel)),
+				      (dmac_cmd &
+				      DMAC_CMD_STOP_R_(rx_channel)));
+}
+
+static int lan743x_dmac_rx_wait_till_stopped(struct lan743x_adapter *adapter,
+					     int rx_channel)
+{
+	int timeout = 100;
+	int result = 0;
+
+	while (timeout &&
+	       ((result = lan743x_dmac_rx_get_state(adapter, rx_channel)) ==
+	       DMAC_CHANNEL_STATE_STOP_PENDING)) {
+		usleep_range(1000, 20000);
+		timeout--;
+	}
+	if (result == DMAC_CHANNEL_STATE_STOP_PENDING)
+		result = -ENODEV;
+	return result;
+}
+
+static void lan743x_tx_release_desc(struct lan743x_tx *tx,
+				    int descriptor_index, bool cleanup)
+{
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_tx_descriptor *descriptor = NULL;
+	u32 descriptor_type = 0;
+
+	descriptor = &tx->ring_cpu_ptr[descriptor_index];
+	buffer_info = &tx->buffer_info[descriptor_index];
+	if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_ACTIVE))
+		goto done;
+
+	descriptor_type = (descriptor->data0) &
+			  TX_DESC_DATA0_DTYPE_MASK_;
+	if (descriptor_type == TX_DESC_DATA0_DTYPE_DATA_)
+		goto clean_up_data_descriptor;
+	else
+		goto clear_active;
+
+clean_up_data_descriptor:
+	if (buffer_info->dma_ptr) {
+		if (buffer_info->flags &
+		    TX_BUFFER_INFO_FLAG_SKB_FRAGMENT) {
+			dma_unmap_page(&tx->adapter->pdev->dev,
+				       buffer_info->dma_ptr,
+				       buffer_info->buffer_length,
+				       DMA_TO_DEVICE);
+		} else {
+			dma_unmap_single(&tx->adapter->pdev->dev,
+					 buffer_info->dma_ptr,
+					 buffer_info->buffer_length,
+					 DMA_TO_DEVICE);
+		}
+		buffer_info->dma_ptr = 0;
+		buffer_info->buffer_length = 0;
+	}
+	if (buffer_info->skb) {
+		dev_kfree_skb(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+
+clear_active:
+	buffer_info->flags &= ~TX_BUFFER_INFO_FLAG_ACTIVE;
+
+done:
+	memset(buffer_info, 0, sizeof(*buffer_info));
+	memset(descriptor, 0, sizeof(*descriptor));
+}
+
+static int lan743x_tx_next_index(struct lan743x_tx *tx, int index)
+{
+	return ((++index) % tx->ring_size);
+}
+
+static void lan743x_tx_release_completed_descriptors(struct lan743x_tx *tx)
+{
+	while ((*tx->head_cpu_ptr) != (tx->last_head)) {
+		lan743x_tx_release_desc(tx, tx->last_head, false);
+		tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+	}
+}
+
+static void lan743x_tx_release_all_descriptors(struct lan743x_tx *tx)
+{
+	u32 original_head = 0;
+
+	original_head = tx->last_head;
+	do {
+		lan743x_tx_release_desc(tx, tx->last_head, true);
+		tx->last_head = lan743x_tx_next_index(tx, tx->last_head);
+	} while (tx->last_head != original_head);
+	memset(tx->ring_cpu_ptr, 0,
+	       sizeof(*tx->ring_cpu_ptr) * (tx->ring_size));
+	memset(tx->buffer_info, 0,
+	       sizeof(*tx->buffer_info) * (tx->ring_size));
+}
+
+static int lan743x_tx_get_desc_cnt(struct lan743x_tx *tx,
+				   struct sk_buff *skb)
+{
+	int result = 1; /* 1 for the main skb buffer */
+	int nr_frags = 0;
+
+	if (skb_is_gso(skb))
+		result++; /* requires an extension descriptor */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	result += nr_frags; /* 1 for each fragment buffer */
+	return result;
+}
+
+static int lan743x_tx_get_avail_desc(struct lan743x_tx *tx)
+{
+	int last_head = tx->last_head;
+	int last_tail = tx->last_tail;
+
+	if (last_tail >= last_head)
+		return tx->ring_size - last_tail + last_head - 1;
+	else
+		return last_head - last_tail - 1;
+}
+
+static int lan743x_tx_frame_start(struct lan743x_tx *tx,
+				  unsigned char *first_buffer,
+				  unsigned int first_buffer_length,
+				  unsigned int frame_length,
+				  bool check_sum)
+{
+	/* called only from within lan743x_tx_xmit_frame.
+	 * assuming tx->ring_lock has already been acquired.
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	struct device *dev = &adapter->pdev->dev;
+	dma_addr_t dma_ptr;
+
+	tx->frame_flags |= TX_FRAME_FLAG_IN_PROGRESS;
+	tx->frame_first = tx->last_tail;
+	tx->frame_tail = tx->frame_first;
+
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	dma_ptr = dma_map_single(dev, first_buffer, first_buffer_length,
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_ptr))
+		return -ENOMEM;
+
+	tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+	tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+	tx_descriptor->data3 = (frame_length << 16) &
+		TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = dma_ptr;
+	buffer_info->buffer_length = first_buffer_length;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+	tx->frame_data0 = (first_buffer_length &
+		TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+		TX_DESC_DATA0_DTYPE_DATA_ |
+		TX_DESC_DATA0_FS_ |
+		TX_DESC_DATA0_FCS_;
+
+	if (check_sum)
+		tx->frame_data0 |= TX_DESC_DATA0_ICE_ |
+				   TX_DESC_DATA0_IPE_ |
+				   TX_DESC_DATA0_TPE_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+	return 0;
+}
+
+static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx,
+				     unsigned int frame_length)
+{
+	/* called only from within lan743x_tx_xmit_frame.
+	 * assuming tx->ring_lock has already been acquired.
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+
+	/* wrap up previous descriptor */
+	tx->frame_data0 |= TX_DESC_DATA0_EXT_;
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	tx_descriptor->data0 = tx->frame_data0;
+
+	/* move to next descriptor */
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+
+	/* add extension descriptor */
+	tx_descriptor->data1 = 0;
+	tx_descriptor->data2 = 0;
+	tx_descriptor->data3 = 0;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = 0;
+	buffer_info->buffer_length = 0;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+
+	tx->frame_data0 = (frame_length & TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_) |
+			  TX_DESC_DATA0_DTYPE_EXT_ |
+			  TX_DESC_DATA0_EXT_LSO_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+}
+
+static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx,
+					 const struct skb_frag_struct *fragment,
+					 unsigned int frame_length)
+{
+	/* called only from within lan743x_tx_xmit_frame
+	 * assuming tx->ring_lock has already been acquired
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	struct device *dev = &adapter->pdev->dev;
+	unsigned int fragment_length = 0;
+	dma_addr_t dma_ptr;
+
+	fragment_length = skb_frag_size(fragment);
+	if (!fragment_length)
+		return 0;
+
+	/* wrap up previous descriptor */
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	tx_descriptor->data0 = tx->frame_data0;
+
+	/* move to next descriptor */
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	dma_ptr = skb_frag_dma_map(dev, fragment,
+				   0, fragment_length,
+				   DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_ptr)) {
+		int desc_index;
+
+		/* cleanup all previously setup descriptors */
+		desc_index = tx->frame_first;
+		while (desc_index != tx->frame_tail) {
+			lan743x_tx_release_desc(tx, desc_index, true);
+			desc_index = lan743x_tx_next_index(tx, desc_index);
+		}
+		dma_wmb();
+		tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+		tx->frame_first = 0;
+		tx->frame_data0 = 0;
+		tx->frame_tail = 0;
+		return -ENOMEM;
+	}
+
+	tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr);
+	tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr);
+	tx_descriptor->data3 = (frame_length << 16) &
+			       TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_;
+
+	buffer_info->skb = NULL;
+	buffer_info->dma_ptr = dma_ptr;
+	buffer_info->buffer_length = fragment_length;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_ACTIVE;
+	buffer_info->flags |= TX_BUFFER_INFO_FLAG_SKB_FRAGMENT;
+
+	tx->frame_data0 = (fragment_length & TX_DESC_DATA0_BUF_LENGTH_MASK_) |
+			  TX_DESC_DATA0_DTYPE_DATA_ |
+			  TX_DESC_DATA0_FCS_;
+
+	/* data0 will be programmed in one of other frame assembler functions */
+	return 0;
+}
+
+static void lan743x_tx_frame_end(struct lan743x_tx *tx,
+				 struct sk_buff *skb,
+				 bool ignore_sync)
+{
+	/* called only from within lan743x_tx_xmit_frame
+	 * assuming tx->ring_lock has already been acquired
+	 */
+	struct lan743x_tx_descriptor *tx_descriptor = NULL;
+	struct lan743x_tx_buffer_info *buffer_info = NULL;
+	struct lan743x_adapter *adapter = tx->adapter;
+	u32 tx_tail_flags = 0;
+
+	/* wrap up previous descriptor */
+	tx->frame_data0 |= TX_DESC_DATA0_LS_;
+	tx->frame_data0 |= TX_DESC_DATA0_IOC_;
+
+	tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail];
+	buffer_info = &tx->buffer_info[tx->frame_tail];
+	buffer_info->skb = skb;
+	if (ignore_sync)
+		buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC;
+
+	tx_descriptor->data0 = tx->frame_data0;
+	tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail);
+	tx->last_tail = tx->frame_tail;
+
+	dma_wmb();
+
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+		tx_tail_flags |= TX_TAIL_SET_TOP_INT_VEC_EN_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET)
+		tx_tail_flags |= TX_TAIL_SET_DMAC_INT_EN_ |
+		TX_TAIL_SET_TOP_INT_EN_;
+
+	lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+			  tx_tail_flags | tx->frame_tail);
+	tx->frame_flags &= ~TX_FRAME_FLAG_IN_PROGRESS;
+}
+
+static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
+					 struct sk_buff *skb)
+{
+	int required_number_of_descriptors = 0;
+	unsigned int start_frame_length = 0;
+	unsigned int frame_length = 0;
+	unsigned int head_length = 0;
+	unsigned long irq_flags = 0;
+	bool ignore_sync = false;
+	int nr_frags = 0;
+	bool gso = false;
+	int j;
+
+	required_number_of_descriptors = lan743x_tx_get_desc_cnt(tx, skb);
+
+	spin_lock_irqsave(&tx->ring_lock, irq_flags);
+	if (required_number_of_descriptors >
+		lan743x_tx_get_avail_desc(tx)) {
+		if (required_number_of_descriptors > (tx->ring_size - 1)) {
+			dev_kfree_skb(skb);
+		} else {
+			/* save to overflow buffer */
+			tx->overflow_skb = skb;
+			netif_stop_queue(tx->adapter->netdev);
+		}
+		goto unlock;
+	}
+
+	/* space available, transmit skb  */
+	head_length = skb_headlen(skb);
+	frame_length = skb_pagelen(skb);
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	start_frame_length = frame_length;
+	gso = skb_is_gso(skb);
+	if (gso) {
+		start_frame_length = max(skb_shinfo(skb)->gso_size,
+					 (unsigned short)8);
+	}
+
+	if (lan743x_tx_frame_start(tx,
+				   skb->data, head_length,
+				   start_frame_length,
+				   skb->ip_summed == CHECKSUM_PARTIAL)) {
+		dev_kfree_skb(skb);
+		goto unlock;
+	}
+
+	if (gso)
+		lan743x_tx_frame_add_lso(tx, frame_length);
+
+	if (nr_frags <= 0)
+		goto finish;
+
+	for (j = 0; j < nr_frags; j++) {
+		const struct skb_frag_struct *frag;
+
+		frag = &(skb_shinfo(skb)->frags[j]);
+		if (lan743x_tx_frame_add_fragment(tx, frag, frame_length)) {
+			/* upon error no need to call
+			 *	lan743x_tx_frame_end
+			 * frame assembler clean up was performed inside
+			 *	lan743x_tx_frame_add_fragment
+			 */
+			dev_kfree_skb(skb);
+			goto unlock;
+		}
+	}
+
+finish:
+	lan743x_tx_frame_end(tx, skb, ignore_sync);
+
+unlock:
+	spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+	return NETDEV_TX_OK;
+}
+
+static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
+{
+	struct lan743x_tx *tx = container_of(napi, struct lan743x_tx, napi);
+	struct lan743x_adapter *adapter = tx->adapter;
+	bool start_transmitter = false;
+	unsigned long irq_flags = 0;
+	u32 ioc_bit = 0;
+	u32 int_sts = 0;
+
+	adapter = tx->adapter;
+	ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
+	int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C)
+		lan743x_csr_write(adapter, DMAC_INT_STS, ioc_bit);
+	spin_lock_irqsave(&tx->ring_lock, irq_flags);
+
+	/* clean up tx ring */
+	lan743x_tx_release_completed_descriptors(tx);
+	if (netif_queue_stopped(adapter->netdev)) {
+		if (tx->overflow_skb) {
+			if (lan743x_tx_get_desc_cnt(tx, tx->overflow_skb) <=
+				lan743x_tx_get_avail_desc(tx))
+				start_transmitter = true;
+		} else {
+			netif_wake_queue(adapter->netdev);
+		}
+	}
+	spin_unlock_irqrestore(&tx->ring_lock, irq_flags);
+
+	if (start_transmitter) {
+		/* space is now available, transmit overflow skb */
+		lan743x_tx_xmit_frame(tx, tx->overflow_skb);
+		tx->overflow_skb = NULL;
+		netif_wake_queue(adapter->netdev);
+	}
+
+	if (!napi_complete_done(napi, weight))
+		goto done;
+
+	/* enable isr */
+	lan743x_csr_write(adapter, INT_EN_SET,
+			  INT_BIT_DMA_TX_(tx->channel_number));
+	lan743x_csr_read(adapter, INT_STS);
+
+done:
+	return weight;
+}
+
+static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
+{
+	if (tx->head_cpu_ptr) {
+		pci_free_consistent(tx->adapter->pdev,
+				    sizeof(*tx->head_cpu_ptr),
+				    (void *)(tx->head_cpu_ptr),
+				    tx->head_dma_ptr);
+		tx->head_cpu_ptr = NULL;
+		tx->head_dma_ptr = 0;
+	}
+	kfree(tx->buffer_info);
+	tx->buffer_info = NULL;
+
+	if (tx->ring_cpu_ptr) {
+		pci_free_consistent(tx->adapter->pdev,
+				    tx->ring_allocation_size,
+				    tx->ring_cpu_ptr,
+				    tx->ring_dma_ptr);
+		tx->ring_allocation_size = 0;
+		tx->ring_cpu_ptr = NULL;
+		tx->ring_dma_ptr = 0;
+	}
+	tx->ring_size = 0;
+}
+
+static int lan743x_tx_ring_init(struct lan743x_tx *tx)
+{
+	size_t ring_allocation_size = 0;
+	void *cpu_ptr = NULL;
+	dma_addr_t dma_ptr;
+	int ret = -ENOMEM;
+
+	tx->ring_size = LAN743X_TX_RING_SIZE;
+	if (tx->ring_size & ~TX_CFG_B_TX_RING_LEN_MASK_) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	ring_allocation_size = ALIGN(tx->ring_size *
+				     sizeof(struct lan743x_tx_descriptor),
+				     PAGE_SIZE);
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+					ring_allocation_size, &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	tx->ring_allocation_size = ring_allocation_size;
+	tx->ring_cpu_ptr = (struct lan743x_tx_descriptor *)cpu_ptr;
+	tx->ring_dma_ptr = dma_ptr;
+
+	cpu_ptr = kcalloc(tx->ring_size, sizeof(*tx->buffer_info), GFP_KERNEL);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	tx->buffer_info = (struct lan743x_tx_buffer_info *)cpu_ptr;
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(tx->adapter->pdev,
+					sizeof(*tx->head_cpu_ptr), &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	tx->head_cpu_ptr = cpu_ptr;
+	tx->head_dma_ptr = dma_ptr;
+	if (tx->head_dma_ptr & 0x3) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	lan743x_tx_ring_cleanup(tx);
+	return ret;
+}
+
+static void lan743x_tx_close(struct lan743x_tx *tx)
+{
+	struct lan743x_adapter *adapter = tx->adapter;
+
+	lan743x_csr_write(adapter,
+			  DMAC_CMD,
+			  DMAC_CMD_STOP_T_(tx->channel_number));
+	lan743x_dmac_tx_wait_till_stopped(adapter, tx->channel_number);
+
+	lan743x_csr_write(adapter,
+			  DMAC_INT_EN_CLR,
+			  DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+	lan743x_csr_write(adapter, INT_EN_CLR,
+			  INT_BIT_DMA_TX_(tx->channel_number));
+	napi_disable(&tx->napi);
+	netif_napi_del(&tx->napi);
+
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_DIS_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+				 FCT_TX_CTL_EN_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	lan743x_tx_release_all_descriptors(tx);
+
+	if (tx->overflow_skb) {
+		dev_kfree_skb(tx->overflow_skb);
+		tx->overflow_skb = NULL;
+	}
+
+	lan743x_tx_ring_cleanup(tx);
+}
+
+static int lan743x_tx_open(struct lan743x_tx *tx)
+{
+	struct lan743x_adapter *adapter = NULL;
+	u32 data = 0;
+	int ret;
+
+	adapter = tx->adapter;
+	ret = lan743x_tx_ring_init(tx);
+	if (ret)
+		return ret;
+
+	/* initialize fifo */
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_RESET_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_TX_CTL,
+				 FCT_TX_CTL_RESET_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* enable fifo */
+	lan743x_csr_write(adapter, FCT_TX_CTL,
+			  FCT_TX_CTL_EN_(tx->channel_number));
+
+	/* reset tx channel */
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_TX_SWR_(tx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+				 DMAC_CMD_TX_SWR_(tx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* Write TX_BASE_ADDR */
+	lan743x_csr_write(adapter,
+			  TX_BASE_ADDRH(tx->channel_number),
+			  DMA_ADDR_HIGH32(tx->ring_dma_ptr));
+	lan743x_csr_write(adapter,
+			  TX_BASE_ADDRL(tx->channel_number),
+			  DMA_ADDR_LOW32(tx->ring_dma_ptr));
+
+	/* Write TX_CFG_B */
+	data = lan743x_csr_read(adapter, TX_CFG_B(tx->channel_number));
+	data &= ~TX_CFG_B_TX_RING_LEN_MASK_;
+	data |= ((tx->ring_size) & TX_CFG_B_TX_RING_LEN_MASK_);
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= TX_CFG_B_TDMABL_512_;
+	lan743x_csr_write(adapter, TX_CFG_B(tx->channel_number), data);
+
+	/* Write TX_CFG_A */
+	data = TX_CFG_A_TX_TMR_HPWB_SEL_IOC_ | TX_CFG_A_TX_HP_WB_EN_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		data |= TX_CFG_A_TX_HP_WB_ON_INT_TMR_;
+		data |= TX_CFG_A_TX_PF_THRES_SET_(0x10);
+		data |= TX_CFG_A_TX_PF_PRI_THRES_SET_(0x04);
+		data |= TX_CFG_A_TX_HP_WB_THRES_SET_(0x07);
+	}
+	lan743x_csr_write(adapter, TX_CFG_A(tx->channel_number), data);
+
+	/* Write TX_HEAD_WRITEBACK_ADDR */
+	lan743x_csr_write(adapter,
+			  TX_HEAD_WRITEBACK_ADDRH(tx->channel_number),
+			  DMA_ADDR_HIGH32(tx->head_dma_ptr));
+	lan743x_csr_write(adapter,
+			  TX_HEAD_WRITEBACK_ADDRL(tx->channel_number),
+			  DMA_ADDR_LOW32(tx->head_dma_ptr));
+
+	/* set last head */
+	tx->last_head = lan743x_csr_read(adapter, TX_HEAD(tx->channel_number));
+
+	/* write TX_TAIL */
+	tx->last_tail = 0;
+	lan743x_csr_write(adapter, TX_TAIL(tx->channel_number),
+			  (u32)(tx->last_tail));
+	tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+							 INT_BIT_DMA_TX_
+							 (tx->channel_number));
+	netif_napi_add(adapter->netdev,
+		       &tx->napi, lan743x_tx_napi_poll,
+		       tx->ring_size - 1);
+	napi_enable(&tx->napi);
+
+	data = 0;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+		data |= TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+		data |= TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+		data |= TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_;
+	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+		data |= TX_CFG_C_TX_INT_EN_R2C_;
+	lan743x_csr_write(adapter, TX_CFG_C(tx->channel_number), data);
+
+	if (!(tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET))
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_TX_(tx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+			  DMAC_INT_BIT_TX_IOC_(tx->channel_number));
+
+	/*  start dmac channel */
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_START_T_(tx->channel_number));
+	return 0;
+}
+
+static int lan743x_rx_next_index(struct lan743x_rx *rx, int index)
+{
+	return ((++index) % rx->ring_size);
+}
+
+static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+	int length = 0;
+
+	length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING);
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+	buffer_info->skb = __netdev_alloc_skb(rx->adapter->netdev,
+					      length,
+					      GFP_ATOMIC | GFP_DMA);
+	if (!(buffer_info->skb))
+		return -ENOMEM;
+	buffer_info->dma_ptr = dma_map_single(&rx->adapter->pdev->dev,
+					      buffer_info->skb->data,
+					      length,
+					      DMA_FROM_DEVICE);
+	if (dma_mapping_error(&rx->adapter->pdev->dev,
+			      buffer_info->dma_ptr)) {
+		buffer_info->dma_ptr = 0;
+		return -ENOMEM;
+	}
+
+	buffer_info->buffer_length = length;
+	descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+	descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+	descriptor->data3 = 0;
+	descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+			    (length & RX_DESC_DATA0_BUF_LENGTH_MASK_));
+	skb_reserve(buffer_info->skb, RX_HEAD_PADDING);
+
+	return 0;
+}
+
+static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+
+	descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr);
+	descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr);
+	descriptor->data3 = 0;
+	descriptor->data0 = (RX_DESC_DATA0_OWN_ |
+			    ((buffer_info->buffer_length) &
+			    RX_DESC_DATA0_BUF_LENGTH_MASK_));
+}
+
+static void lan743x_rx_release_ring_element(struct lan743x_rx *rx, int index)
+{
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+
+	descriptor = &rx->ring_cpu_ptr[index];
+	buffer_info = &rx->buffer_info[index];
+
+	memset(descriptor, 0, sizeof(*descriptor));
+
+	if (buffer_info->dma_ptr) {
+		dma_unmap_single(&rx->adapter->pdev->dev,
+				 buffer_info->dma_ptr,
+				 buffer_info->buffer_length,
+				 DMA_FROM_DEVICE);
+		buffer_info->dma_ptr = 0;
+	}
+
+	if (buffer_info->skb) {
+		dev_kfree_skb(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+
+	memset(buffer_info, 0, sizeof(*buffer_info));
+}
+
+static int lan743x_rx_process_packet(struct lan743x_rx *rx)
+{
+	struct skb_shared_hwtstamps *hwtstamps = NULL;
+	int result = RX_PROCESS_RESULT_NOTHING_TO_DO;
+	struct lan743x_rx_buffer_info *buffer_info;
+	struct lan743x_rx_descriptor *descriptor;
+	int current_head_index = -1;
+	int extension_index = -1;
+	int first_index = -1;
+	int last_index = -1;
+
+	current_head_index = *rx->head_cpu_ptr;
+	if (current_head_index < 0 || current_head_index >= rx->ring_size)
+		goto done;
+
+	if (rx->last_head < 0 || rx->last_head >= rx->ring_size)
+		goto done;
+
+	if (rx->last_head != current_head_index) {
+		descriptor = &rx->ring_cpu_ptr[rx->last_head];
+		if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+			goto done;
+
+		if (!(descriptor->data0 & RX_DESC_DATA0_FS_))
+			goto done;
+
+		first_index = rx->last_head;
+		if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+			last_index = rx->last_head;
+		} else {
+			int index;
+
+			index = lan743x_rx_next_index(rx, first_index);
+			while (index != current_head_index) {
+				descriptor = &rx->ring_cpu_ptr[index];
+				if (descriptor->data0 & RX_DESC_DATA0_OWN_)
+					goto done;
+
+				if (descriptor->data0 & RX_DESC_DATA0_LS_) {
+					last_index = index;
+					break;
+				}
+				index = lan743x_rx_next_index(rx, index);
+			}
+		}
+		if (last_index >= 0) {
+			descriptor = &rx->ring_cpu_ptr[last_index];
+			if (descriptor->data0 & RX_DESC_DATA0_EXT_) {
+				/* extension is expected to follow */
+				int index = lan743x_rx_next_index(rx,
+								  last_index);
+				if (index != current_head_index) {
+					descriptor = &rx->ring_cpu_ptr[index];
+					if (descriptor->data0 &
+					    RX_DESC_DATA0_OWN_) {
+						goto done;
+					}
+					if (descriptor->data0 &
+					    RX_DESC_DATA0_EXT_) {
+						extension_index = index;
+					} else {
+						goto done;
+					}
+				} else {
+					/* extension is not yet available */
+					/* prevent processing of this packet */
+					first_index = -1;
+					last_index = -1;
+				}
+			}
+		}
+	}
+	if (first_index >= 0 && last_index >= 0) {
+		int real_last_index = last_index;
+		struct sk_buff *skb = NULL;
+		u32 ts_sec = 0;
+		u32 ts_nsec = 0;
+
+		/* packet is available */
+		if (first_index == last_index) {
+			/* single buffer packet */
+			int packet_length;
+
+			buffer_info = &rx->buffer_info[first_index];
+			skb = buffer_info->skb;
+			descriptor = &rx->ring_cpu_ptr[first_index];
+
+			/* unmap from dma */
+			if (buffer_info->dma_ptr) {
+				dma_unmap_single(&rx->adapter->pdev->dev,
+						 buffer_info->dma_ptr,
+						 buffer_info->buffer_length,
+						 DMA_FROM_DEVICE);
+				buffer_info->dma_ptr = 0;
+				buffer_info->buffer_length = 0;
+			}
+			buffer_info->skb = NULL;
+			packet_length =	RX_DESC_DATA0_FRAME_LENGTH_GET_
+					(descriptor->data0);
+			skb_put(skb, packet_length - 4);
+			skb->protocol = eth_type_trans(skb,
+						       rx->adapter->netdev);
+			lan743x_rx_allocate_ring_element(rx, first_index);
+		} else {
+			int index = first_index;
+
+			/* multi buffer packet not supported */
+			/* this should not happen since
+			 * buffers are allocated to be at least jumbo size
+			 */
+
+			/* clean up buffers */
+			if (first_index <= last_index) {
+				while ((index >= first_index) &&
+				       (index <= last_index)) {
+					lan743x_rx_release_ring_element(rx,
+									index);
+					lan743x_rx_allocate_ring_element(rx,
+									 index);
+					index = lan743x_rx_next_index(rx,
+								      index);
+				}
+			} else {
+				while ((index >= first_index) ||
+				       (index <= last_index)) {
+					lan743x_rx_release_ring_element(rx,
+									index);
+					lan743x_rx_allocate_ring_element(rx,
+									 index);
+					index = lan743x_rx_next_index(rx,
+								      index);
+				}
+			}
+		}
+
+		if (extension_index >= 0) {
+			descriptor = &rx->ring_cpu_ptr[extension_index];
+			buffer_info = &rx->buffer_info[extension_index];
+
+			ts_sec = descriptor->data1;
+			ts_nsec = (descriptor->data2 &
+				  RX_DESC_DATA2_TS_NS_MASK_);
+			lan743x_rx_reuse_ring_element(rx, extension_index);
+			real_last_index = extension_index;
+		}
+
+		if (!skb) {
+			result = RX_PROCESS_RESULT_PACKET_DROPPED;
+			goto move_forward;
+		}
+
+		if (extension_index < 0)
+			goto pass_packet_to_os;
+		hwtstamps = skb_hwtstamps(skb);
+		if (hwtstamps)
+			hwtstamps->hwtstamp = ktime_set(ts_sec, ts_nsec);
+
+pass_packet_to_os:
+		/* pass packet to OS */
+		napi_gro_receive(&rx->napi, skb);
+		result = RX_PROCESS_RESULT_PACKET_RECEIVED;
+
+move_forward:
+		/* push tail and head forward */
+		rx->last_tail = real_last_index;
+		rx->last_head = lan743x_rx_next_index(rx, real_last_index);
+	}
+done:
+	return result;
+}
+
+static int lan743x_rx_napi_poll(struct napi_struct *napi, int weight)
+{
+	struct lan743x_rx *rx = container_of(napi, struct lan743x_rx, napi);
+	struct lan743x_adapter *adapter = rx->adapter;
+	u32 rx_tail_flags = 0;
+	int count;
+
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C) {
+		/* clear int status bit before reading packet */
+		lan743x_csr_write(adapter, DMAC_INT_STS,
+				  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	}
+	count = 0;
+	while (count < weight) {
+		int rx_process_result = -1;
+
+		rx_process_result = lan743x_rx_process_packet(rx);
+		if (rx_process_result == RX_PROCESS_RESULT_PACKET_RECEIVED) {
+			count++;
+		} else if (rx_process_result ==
+			RX_PROCESS_RESULT_NOTHING_TO_DO) {
+			break;
+		} else if (rx_process_result ==
+			RX_PROCESS_RESULT_PACKET_DROPPED) {
+			continue;
+		}
+	}
+	rx->frame_count += count;
+	if (count == weight)
+		goto done;
+
+	if (!napi_complete_done(napi, count))
+		goto done;
+
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET)
+		rx_tail_flags |= RX_TAIL_SET_TOP_INT_VEC_EN_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET) {
+		rx_tail_flags |= RX_TAIL_SET_TOP_INT_EN_;
+	} else {
+		lan743x_csr_write(adapter, INT_EN_SET,
+				  INT_BIT_DMA_RX_(rx->channel_number));
+	}
+
+	/* update RX_TAIL */
+	lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+			  rx_tail_flags | rx->last_tail);
+done:
+	return count;
+}
+
+static void lan743x_rx_ring_cleanup(struct lan743x_rx *rx)
+{
+	if (rx->buffer_info && rx->ring_cpu_ptr) {
+		int index;
+
+		for (index = 0; index < rx->ring_size; index++)
+			lan743x_rx_release_ring_element(rx, index);
+	}
+
+	if (rx->head_cpu_ptr) {
+		pci_free_consistent(rx->adapter->pdev,
+				    sizeof(*rx->head_cpu_ptr),
+				    rx->head_cpu_ptr,
+				    rx->head_dma_ptr);
+		rx->head_cpu_ptr = NULL;
+		rx->head_dma_ptr = 0;
+	}
+
+	kfree(rx->buffer_info);
+	rx->buffer_info = NULL;
+
+	if (rx->ring_cpu_ptr) {
+		pci_free_consistent(rx->adapter->pdev,
+				    rx->ring_allocation_size,
+				    rx->ring_cpu_ptr,
+				    rx->ring_dma_ptr);
+		rx->ring_allocation_size = 0;
+		rx->ring_cpu_ptr = NULL;
+		rx->ring_dma_ptr = 0;
+	}
+
+	rx->ring_size = 0;
+	rx->last_head = 0;
+}
+
+static int lan743x_rx_ring_init(struct lan743x_rx *rx)
+{
+	size_t ring_allocation_size = 0;
+	dma_addr_t dma_ptr = 0;
+	void *cpu_ptr = NULL;
+	int ret = -ENOMEM;
+	int index = 0;
+
+	rx->ring_size = LAN743X_RX_RING_SIZE;
+	if (rx->ring_size <= 1) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	if (rx->ring_size & ~RX_CFG_B_RX_RING_LEN_MASK_) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	ring_allocation_size = ALIGN(rx->ring_size *
+				     sizeof(struct lan743x_rx_descriptor),
+				     PAGE_SIZE);
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+					ring_allocation_size, &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	rx->ring_allocation_size = ring_allocation_size;
+	rx->ring_cpu_ptr = (struct lan743x_rx_descriptor *)cpu_ptr;
+	rx->ring_dma_ptr = dma_ptr;
+
+	cpu_ptr = kcalloc(rx->ring_size, sizeof(*rx->buffer_info),
+			  GFP_KERNEL);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+	rx->buffer_info = (struct lan743x_rx_buffer_info *)cpu_ptr;
+	dma_ptr = 0;
+	cpu_ptr = pci_zalloc_consistent(rx->adapter->pdev,
+					sizeof(*rx->head_cpu_ptr), &dma_ptr);
+	if (!cpu_ptr) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	rx->head_cpu_ptr = cpu_ptr;
+	rx->head_dma_ptr = dma_ptr;
+	if (rx->head_dma_ptr & 0x3) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	rx->last_head = 0;
+	for (index = 0; index < rx->ring_size; index++) {
+		ret = lan743x_rx_allocate_ring_element(rx, index);
+		if (ret)
+			goto cleanup;
+	}
+	return 0;
+
+cleanup:
+	lan743x_rx_ring_cleanup(rx);
+	return ret;
+}
+
+static void lan743x_rx_close(struct lan743x_rx *rx)
+{
+	struct lan743x_adapter *adapter = rx->adapter;
+
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_DIS_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+				 FCT_RX_CTL_EN_(rx->channel_number),
+				 0, 1000, 20000, 100);
+
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_STOP_R_(rx->channel_number));
+	lan743x_dmac_rx_wait_till_stopped(adapter, rx->channel_number);
+
+	lan743x_csr_write(adapter, DMAC_INT_EN_CLR,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, INT_EN_CLR,
+			  INT_BIT_DMA_RX_(rx->channel_number));
+	napi_disable(&rx->napi);
+
+	netif_napi_del(&rx->napi);
+
+	lan743x_rx_ring_cleanup(rx);
+}
+
+static int lan743x_rx_open(struct lan743x_rx *rx)
+{
+	struct lan743x_adapter *adapter = rx->adapter;
+	u32 data = 0;
+	int ret;
+
+	rx->frame_count = 0;
+	ret = lan743x_rx_ring_init(rx);
+	if (ret)
+		goto return_error;
+
+	netif_napi_add(adapter->netdev,
+		       &rx->napi, lan743x_rx_napi_poll,
+		       rx->ring_size - 1);
+
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_RX_SWR_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, DMAC_CMD,
+				 DMAC_CMD_RX_SWR_(rx->channel_number),
+				 0, 1000, 20000, 100);
+
+	/* set ring base address */
+	lan743x_csr_write(adapter,
+			  RX_BASE_ADDRH(rx->channel_number),
+			  DMA_ADDR_HIGH32(rx->ring_dma_ptr));
+	lan743x_csr_write(adapter,
+			  RX_BASE_ADDRL(rx->channel_number),
+			  DMA_ADDR_LOW32(rx->ring_dma_ptr));
+
+	/* set rx write back address */
+	lan743x_csr_write(adapter,
+			  RX_HEAD_WRITEBACK_ADDRH(rx->channel_number),
+			  DMA_ADDR_HIGH32(rx->head_dma_ptr));
+	lan743x_csr_write(adapter,
+			  RX_HEAD_WRITEBACK_ADDRL(rx->channel_number),
+			  DMA_ADDR_LOW32(rx->head_dma_ptr));
+	data = RX_CFG_A_RX_HP_WB_EN_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0)) {
+		data |= (RX_CFG_A_RX_WB_ON_INT_TMR_ |
+			RX_CFG_A_RX_WB_THRES_SET_(0x7) |
+			RX_CFG_A_RX_PF_THRES_SET_(16) |
+			RX_CFG_A_RX_PF_PRI_THRES_SET_(4));
+	}
+
+	/* set RX_CFG_A */
+	lan743x_csr_write(adapter,
+			  RX_CFG_A(rx->channel_number), data);
+
+	/* set RX_CFG_B */
+	data = lan743x_csr_read(adapter, RX_CFG_B(rx->channel_number));
+	data &= ~RX_CFG_B_RX_PAD_MASK_;
+	if (!RX_HEAD_PADDING)
+		data |= RX_CFG_B_RX_PAD_0_;
+	else
+		data |= RX_CFG_B_RX_PAD_2_;
+	data &= ~RX_CFG_B_RX_RING_LEN_MASK_;
+	data |= ((rx->ring_size) & RX_CFG_B_RX_RING_LEN_MASK_);
+	data |= RX_CFG_B_TS_ALL_RX_;
+	if (!(adapter->csr.flags & LAN743X_CSR_FLAG_IS_A0))
+		data |= RX_CFG_B_RDMABL_512_;
+
+	lan743x_csr_write(adapter, RX_CFG_B(rx->channel_number), data);
+	rx->vector_flags = lan743x_intr_get_vector_flags(adapter,
+							 INT_BIT_DMA_RX_
+							 (rx->channel_number));
+
+	/* set RX_CFG_C */
+	data = 0;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR)
+		data |= RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR)
+		data |= RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C)
+		data |= RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_;
+	if (rx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C)
+		data |= RX_CFG_C_RX_INT_EN_R2C_;
+	lan743x_csr_write(adapter, RX_CFG_C(rx->channel_number), data);
+
+	rx->last_tail = ((u32)(rx->ring_size - 1));
+	lan743x_csr_write(adapter, RX_TAIL(rx->channel_number),
+			  rx->last_tail);
+	rx->last_head = lan743x_csr_read(adapter, RX_HEAD(rx->channel_number));
+	if (rx->last_head) {
+		ret = -EIO;
+		goto napi_delete;
+	}
+
+	napi_enable(&rx->napi);
+
+	lan743x_csr_write(adapter, INT_EN_SET,
+			  INT_BIT_DMA_RX_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_STS,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_INT_EN_SET,
+			  DMAC_INT_BIT_RXFRM_(rx->channel_number));
+	lan743x_csr_write(adapter, DMAC_CMD,
+			  DMAC_CMD_START_R_(rx->channel_number));
+
+	/* initialize fifo */
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_RESET_(rx->channel_number));
+	lan743x_csr_wait_for_bit(adapter, FCT_RX_CTL,
+				 FCT_RX_CTL_RESET_(rx->channel_number),
+				 0, 1000, 20000, 100);
+	lan743x_csr_write(adapter, FCT_FLOW(rx->channel_number),
+			  FCT_FLOW_CTL_REQ_EN_ |
+			  FCT_FLOW_CTL_ON_THRESHOLD_SET_(0x2A) |
+			  FCT_FLOW_CTL_OFF_THRESHOLD_SET_(0xA));
+
+	/* enable fifo */
+	lan743x_csr_write(adapter, FCT_RX_CTL,
+			  FCT_RX_CTL_EN_(rx->channel_number));
+	return 0;
+
+napi_delete:
+	netif_napi_del(&rx->napi);
+	lan743x_rx_ring_cleanup(rx);
+
+return_error:
+	return ret;
+}
+
+static int lan743x_netdev_close(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int index;
+
+	lan743x_tx_close(&adapter->tx[0]);
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++)
+		lan743x_rx_close(&adapter->rx[index]);
+
+	lan743x_phy_close(adapter);
+
+	lan743x_mac_close(adapter);
+
+	lan743x_intr_close(adapter);
+
+	return 0;
+}
+
+static int lan743x_netdev_open(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int index;
+	int ret;
+
+	ret = lan743x_intr_open(adapter);
+	if (ret)
+		goto return_error;
+
+	ret = lan743x_mac_open(adapter);
+	if (ret)
+		goto close_intr;
+
+	ret = lan743x_phy_open(adapter);
+	if (ret)
+		goto close_mac;
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		ret = lan743x_rx_open(&adapter->rx[index]);
+		if (ret)
+			goto close_rx;
+	}
+
+	ret = lan743x_tx_open(&adapter->tx[0]);
+	if (ret)
+		goto close_rx;
+
+	return 0;
+
+close_rx:
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		if (adapter->rx[index].ring_cpu_ptr)
+			lan743x_rx_close(&adapter->rx[index]);
+	}
+	lan743x_phy_close(adapter);
+
+close_mac:
+	lan743x_mac_close(adapter);
+
+close_intr:
+	lan743x_intr_close(adapter);
+
+return_error:
+	netif_warn(adapter, ifup, adapter->netdev,
+		   "Error opening LAN743x\n");
+	return ret;
+}
+
+static netdev_tx_t lan743x_netdev_xmit_frame(struct sk_buff *skb,
+					     struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	return lan743x_tx_xmit_frame(&adapter->tx[0], skb);
+}
+
+static int lan743x_netdev_ioctl(struct net_device *netdev,
+				struct ifreq *ifr, int cmd)
+{
+	if (!netif_running(netdev))
+		return -EINVAL;
+	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+static void lan743x_netdev_set_multicast(struct net_device *netdev)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	lan743x_rfe_set_multicast(adapter);
+}
+
+static int lan743x_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	ret = lan743x_mac_set_mtu(adapter, new_mtu);
+	if (!ret)
+		netdev->mtu = new_mtu;
+	return ret;
+}
+
+static void lan743x_netdev_get_stats64(struct net_device *netdev,
+				       struct rtnl_link_stats64 *stats)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	stats->rx_packets = lan743x_csr_read(adapter, STAT_RX_TOTAL_FRAMES);
+	stats->tx_packets = lan743x_csr_read(adapter, STAT_TX_TOTAL_FRAMES);
+	stats->rx_bytes = lan743x_csr_read(adapter,
+					   STAT_RX_UNICAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_RX_BROADCAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_RX_MULTICAST_BYTE_COUNT);
+	stats->tx_bytes = lan743x_csr_read(adapter,
+					   STAT_TX_UNICAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_TX_BROADCAST_BYTE_COUNT) +
+			  lan743x_csr_read(adapter,
+					   STAT_TX_MULTICAST_BYTE_COUNT);
+	stats->rx_errors = lan743x_csr_read(adapter, STAT_RX_FCS_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_ALIGNMENT_ERRORS) +
+			   lan743x_csr_read(adapter, STAT_RX_JABBER_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_UNDERSIZE_FRAME_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_RX_OVERSIZE_FRAME_ERRORS);
+	stats->tx_errors = lan743x_csr_read(adapter, STAT_TX_FCS_ERRORS) +
+			   lan743x_csr_read(adapter,
+					    STAT_TX_EXCESS_DEFERRAL_ERRORS) +
+			   lan743x_csr_read(adapter, STAT_TX_CARRIER_ERRORS);
+	stats->rx_dropped = lan743x_csr_read(adapter,
+					     STAT_RX_DROPPED_FRAMES);
+	stats->tx_dropped = lan743x_csr_read(adapter,
+					     STAT_TX_EXCESSIVE_COLLISION);
+	stats->multicast = lan743x_csr_read(adapter,
+					    STAT_RX_MULTICAST_FRAMES) +
+			   lan743x_csr_read(adapter,
+					    STAT_TX_MULTICAST_FRAMES);
+	stats->collisions = lan743x_csr_read(adapter,
+					     STAT_TX_SINGLE_COLLISIONS) +
+			    lan743x_csr_read(adapter,
+					     STAT_TX_MULTIPLE_COLLISIONS) +
+			    lan743x_csr_read(adapter,
+					     STAT_TX_LATE_COLLISIONS);
+}
+
+static int lan743x_netdev_set_mac_address(struct net_device *netdev,
+					  void *addr)
+{
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+	struct sockaddr *sock_addr = addr;
+	int ret;
+
+	ret = eth_prepare_mac_addr_change(netdev, sock_addr);
+	if (ret)
+		return ret;
+	ether_addr_copy(netdev->dev_addr, sock_addr->sa_data);
+	lan743x_mac_set_address(adapter, sock_addr->sa_data);
+	lan743x_rfe_update_mac_address(adapter);
+	return 0;
+}
+
+static const struct net_device_ops lan743x_netdev_ops = {
+	.ndo_open		= lan743x_netdev_open,
+	.ndo_stop		= lan743x_netdev_close,
+	.ndo_start_xmit		= lan743x_netdev_xmit_frame,
+	.ndo_do_ioctl		= lan743x_netdev_ioctl,
+	.ndo_set_rx_mode	= lan743x_netdev_set_multicast,
+	.ndo_change_mtu		= lan743x_netdev_change_mtu,
+	.ndo_get_stats64	= lan743x_netdev_get_stats64,
+	.ndo_set_mac_address	= lan743x_netdev_set_mac_address,
+};
+
+static void lan743x_hardware_cleanup(struct lan743x_adapter *adapter)
+{
+	lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+}
+
+static void lan743x_mdiobus_cleanup(struct lan743x_adapter *adapter)
+{
+	mdiobus_unregister(adapter->mdiobus);
+}
+
+static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
+{
+	unregister_netdev(adapter->netdev);
+
+	lan743x_mdiobus_cleanup(adapter);
+	lan743x_hardware_cleanup(adapter);
+	lan743x_pci_cleanup(adapter);
+}
+
+static int lan743x_hardware_init(struct lan743x_adapter *adapter,
+				 struct pci_dev *pdev)
+{
+	struct lan743x_tx *tx;
+	int index;
+	int ret;
+
+	adapter->intr.irq = adapter->pdev->irq;
+	lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
+	mutex_init(&adapter->dp_lock);
+	ret = lan743x_mac_init(adapter);
+	if (ret)
+		return ret;
+
+	ret = lan743x_phy_init(adapter);
+	if (ret)
+		return ret;
+
+	lan743x_rfe_update_mac_address(adapter);
+
+	ret = lan743x_dmac_init(adapter);
+	if (ret)
+		return ret;
+
+	for (index = 0; index < LAN743X_USED_RX_CHANNELS; index++) {
+		adapter->rx[index].adapter = adapter;
+		adapter->rx[index].channel_number = index;
+	}
+
+	tx = &adapter->tx[0];
+	tx->adapter = adapter;
+	tx->channel_number = 0;
+	spin_lock_init(&tx->ring_lock);
+	return 0;
+}
+
+static int lan743x_mdiobus_init(struct lan743x_adapter *adapter)
+{
+	int ret;
+
+	adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev);
+	if (!(adapter->mdiobus)) {
+		ret = -ENOMEM;
+		goto return_error;
+	}
+
+	adapter->mdiobus->priv = (void *)adapter;
+	adapter->mdiobus->read = lan743x_mdiobus_read;
+	adapter->mdiobus->write = lan743x_mdiobus_write;
+	adapter->mdiobus->name = "lan743x-mdiobus";
+	snprintf(adapter->mdiobus->id, MII_BUS_ID_SIZE,
+		 "pci-%s", pci_name(adapter->pdev));
+
+	/* set to internal PHY id */
+	adapter->mdiobus->phy_mask = ~(u32)BIT(1);
+
+	/* register mdiobus */
+	ret = mdiobus_register(adapter->mdiobus);
+	if (ret < 0)
+		goto return_error;
+	return 0;
+
+return_error:
+	return ret;
+}
+
+/* lan743x_pcidev_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @id: entry in lan743x_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int lan743x_pcidev_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	struct lan743x_adapter *adapter = NULL;
+	struct net_device *netdev = NULL;
+	int ret = -ENODEV;
+
+	netdev = devm_alloc_etherdev(&pdev->dev,
+				     sizeof(struct lan743x_adapter));
+	if (!netdev)
+		goto return_error;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE |
+			      NETIF_MSG_LINK | NETIF_MSG_IFUP |
+			      NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED;
+	netdev->max_mtu = LAN743X_MAX_FRAME_SIZE;
+
+	ret = lan743x_pci_init(adapter, pdev);
+	if (ret)
+		goto return_error;
+
+	ret = lan743x_csr_init(adapter);
+	if (ret)
+		goto cleanup_pci;
+
+	ret = lan743x_hardware_init(adapter, pdev);
+	if (ret)
+		goto cleanup_pci;
+
+	ret = lan743x_mdiobus_init(adapter);
+	if (ret)
+		goto cleanup_hardware;
+
+	adapter->netdev->netdev_ops = &lan743x_netdev_ops;
+	adapter->netdev->features = NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM;
+	adapter->netdev->hw_features = adapter->netdev->features;
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	ret = register_netdev(adapter->netdev);
+	if (ret < 0)
+		goto cleanup_mdiobus;
+	return 0;
+
+cleanup_mdiobus:
+	lan743x_mdiobus_cleanup(adapter);
+
+cleanup_hardware:
+	lan743x_hardware_cleanup(adapter);
+
+cleanup_pci:
+	lan743x_pci_cleanup(adapter);
+
+return_error:
+	pr_warn("Initialization failed\n");
+	return ret;
+}
+
+/**
+ * lan743x_pcidev_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * this is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void lan743x_pcidev_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	lan743x_full_cleanup(adapter);
+}
+
+static void lan743x_pcidev_shutdown(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct lan743x_adapter *adapter = netdev_priv(netdev);
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	/* close netdev when netdev is at running state.
+	 * For instance, it is true when system goes to sleep by pm-suspend
+	 * However, it is false when system goes to sleep by suspend GUI menu
+	 */
+	if (netif_running(netdev))
+		lan743x_netdev_close(netdev);
+	rtnl_unlock();
+
+	/* clean up lan743x portion */
+	lan743x_hardware_cleanup(adapter);
+}
+
+static const struct pci_device_id lan743x_pcidev_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+	{ 0, }
+};
+
+static struct pci_driver lan743x_pcidev_driver = {
+	.name     = DRIVER_NAME,
+	.id_table = lan743x_pcidev_tbl,
+	.probe    = lan743x_pcidev_probe,
+	.remove   = lan743x_pcidev_remove,
+	.shutdown = lan743x_pcidev_shutdown,
+};
+
+module_pci_driver(lan743x_pcidev_driver);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
new file mode 100644
index 000000000000..73b463a9df61
--- /dev/null
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -0,0 +1,597 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (C) 2018 Microchip Technology Inc. */
+
+#ifndef _LAN743X_H
+#define _LAN743X_H
+
+#define DRIVER_AUTHOR   "Bryan Whitehead <Bryan.Whitehead@microchip.com>"
+#define DRIVER_DESC "LAN743x PCIe Gigabit Ethernet Driver"
+#define DRIVER_NAME "lan743x"
+
+/* Register Definitions */
+#define ID_REV				(0x00)
+#define ID_REV_IS_VALID_CHIP_ID_(id_rev)	\
+	(((id_rev) & 0xFFF00000) == 0x74300000)
+#define ID_REV_CHIP_REV_MASK_		(0x0000FFFF)
+#define ID_REV_CHIP_REV_A0_		(0x00000000)
+#define ID_REV_CHIP_REV_B0_		(0x00000010)
+
+#define FPGA_REV			(0x04)
+#define FPGA_REV_GET_MINOR_(fpga_rev)	(((fpga_rev) >> 8) & 0x000000FF)
+#define FPGA_REV_GET_MAJOR_(fpga_rev)	((fpga_rev) & 0x000000FF)
+
+#define HW_CFG					(0x010)
+#define HW_CFG_LRST_				BIT(1)
+
+#define PMT_CTL					(0x014)
+#define PMT_CTL_READY_				BIT(7)
+#define PMT_CTL_ETH_PHY_RST_			BIT(4)
+
+#define DP_SEL				(0x024)
+#define DP_SEL_DPRDY_			BIT(31)
+#define DP_SEL_MASK_			(0x0000001F)
+#define DP_SEL_RFE_RAM			(0x00000001)
+
+#define DP_SEL_VHF_HASH_LEN		(16)
+#define DP_SEL_VHF_VLAN_LEN		(128)
+
+#define DP_CMD				(0x028)
+#define DP_CMD_WRITE_			(0x00000001)
+
+#define DP_ADDR				(0x02C)
+
+#define DP_DATA_0			(0x030)
+
+#define FCT_RX_CTL			(0xAC)
+#define FCT_RX_CTL_EN_(channel)		BIT(28 + (channel))
+#define FCT_RX_CTL_DIS_(channel)	BIT(24 + (channel))
+#define FCT_RX_CTL_RESET_(channel)	BIT(20 + (channel))
+
+#define FCT_TX_CTL			(0xC4)
+#define FCT_TX_CTL_EN_(channel)		BIT(28 + (channel))
+#define FCT_TX_CTL_DIS_(channel)	BIT(24 + (channel))
+#define FCT_TX_CTL_RESET_(channel)	BIT(20 + (channel))
+
+#define FCT_FLOW(rx_channel)			(0xE0 + ((rx_channel) << 2))
+#define FCT_FLOW_CTL_OFF_THRESHOLD_		(0x00007F00)
+#define FCT_FLOW_CTL_OFF_THRESHOLD_SET_(value)	\
+	((value << 8) & FCT_FLOW_CTL_OFF_THRESHOLD_)
+#define FCT_FLOW_CTL_REQ_EN_			BIT(7)
+#define FCT_FLOW_CTL_ON_THRESHOLD_		(0x0000007F)
+#define FCT_FLOW_CTL_ON_THRESHOLD_SET_(value)	\
+	((value << 0) & FCT_FLOW_CTL_ON_THRESHOLD_)
+
+#define MAC_CR				(0x100)
+#define MAC_CR_ADD_			BIT(12)
+#define MAC_CR_ASD_			BIT(11)
+#define MAC_CR_CNTR_RST_		BIT(5)
+#define MAC_CR_RST_			BIT(0)
+
+#define MAC_RX				(0x104)
+#define MAC_RX_MAX_SIZE_SHIFT_		(16)
+#define MAC_RX_MAX_SIZE_MASK_		(0x3FFF0000)
+#define MAC_RX_RXD_			BIT(1)
+#define MAC_RX_RXEN_			BIT(0)
+
+#define MAC_TX				(0x108)
+#define MAC_TX_TXD_			BIT(1)
+#define MAC_TX_TXEN_			BIT(0)
+
+#define MAC_FLOW			(0x10C)
+#define MAC_FLOW_CR_TX_FCEN_		BIT(30)
+#define MAC_FLOW_CR_RX_FCEN_		BIT(29)
+#define MAC_FLOW_CR_FCPT_MASK_		(0x0000FFFF)
+
+#define MAC_RX_ADDRH			(0x118)
+
+#define MAC_RX_ADDRL			(0x11C)
+
+#define MAC_MII_ACC			(0x120)
+#define MAC_MII_ACC_PHY_ADDR_SHIFT_	(11)
+#define MAC_MII_ACC_PHY_ADDR_MASK_	(0x0000F800)
+#define MAC_MII_ACC_MIIRINDA_SHIFT_	(6)
+#define MAC_MII_ACC_MIIRINDA_MASK_	(0x000007C0)
+#define MAC_MII_ACC_MII_READ_		(0x00000000)
+#define MAC_MII_ACC_MII_WRITE_		(0x00000002)
+#define MAC_MII_ACC_MII_BUSY_		BIT(0)
+
+#define MAC_MII_DATA			(0x124)
+
+/* offset 0x400 - 0x500, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_HI(x)		(0x400 + (8 * (x)))
+#define RFE_ADDR_FILT_HI_VALID_		BIT(31)
+
+/* offset 0x404 - 0x504, x may range from 0 to 32, for a total of 33 entries */
+#define RFE_ADDR_FILT_LO(x)		(0x404 + (8 * (x)))
+
+#define RFE_CTL				(0x508)
+#define RFE_CTL_AB_			BIT(10)
+#define RFE_CTL_AM_			BIT(9)
+#define RFE_CTL_AU_			BIT(8)
+#define RFE_CTL_MCAST_HASH_		BIT(3)
+#define RFE_CTL_DA_PERFECT_		BIT(1)
+
+#define INT_STS				(0x780)
+#define INT_BIT_DMA_RX_(channel)	BIT(24 + (channel))
+#define INT_BIT_ALL_RX_			(0x0F000000)
+#define INT_BIT_DMA_TX_(channel)	BIT(16 + (channel))
+#define INT_BIT_ALL_TX_			(0x000F0000)
+#define INT_BIT_SW_GP_			BIT(9)
+#define INT_BIT_ALL_OTHER_		(0x00000280)
+#define INT_BIT_MAS_			BIT(0)
+
+#define INT_SET				(0x784)
+
+#define INT_EN_SET			(0x788)
+
+#define INT_EN_CLR			(0x78C)
+
+#define INT_STS_R2C			(0x790)
+
+#define INT_VEC_EN_SET			(0x794)
+#define INT_VEC_EN_CLR			(0x798)
+#define INT_VEC_EN_AUTO_CLR		(0x79C)
+#define INT_VEC_EN_(vector_index)	BIT(0 + vector_index)
+
+#define INT_VEC_MAP0			(0x7A0)
+#define INT_VEC_MAP0_RX_VEC_(channel, vector)	\
+	(((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP1			(0x7A4)
+#define INT_VEC_MAP1_TX_VEC_(channel, vector)	\
+	(((u32)(vector)) << ((channel) << 2))
+
+#define INT_VEC_MAP2			(0x7A8)
+
+#define INT_MOD_MAP0			(0x7B0)
+
+#define INT_MOD_MAP1			(0x7B4)
+
+#define INT_MOD_MAP2			(0x7B8)
+
+#define INT_MOD_CFG0			(0x7C0)
+#define INT_MOD_CFG1			(0x7C4)
+#define INT_MOD_CFG2			(0x7C8)
+#define INT_MOD_CFG3			(0x7CC)
+#define INT_MOD_CFG4			(0x7D0)
+#define INT_MOD_CFG5			(0x7D4)
+#define INT_MOD_CFG6			(0x7D8)
+#define INT_MOD_CFG7			(0x7DC)
+
+#define DMAC_CFG				(0xC00)
+#define DMAC_CFG_COAL_EN_			BIT(16)
+#define DMAC_CFG_CH_ARB_SEL_RX_HIGH_		(0x00000000)
+#define DMAC_CFG_MAX_READ_REQ_MASK_		(0x00000070)
+#define DMAC_CFG_MAX_READ_REQ_SET_(val)	\
+	((((u32)(val)) << 4) & DMAC_CFG_MAX_READ_REQ_MASK_)
+#define DMAC_CFG_MAX_DSPACE_16_			(0x00000000)
+#define DMAC_CFG_MAX_DSPACE_32_			(0x00000001)
+#define DMAC_CFG_MAX_DSPACE_64_			BIT(1)
+#define DMAC_CFG_MAX_DSPACE_128_		(0x00000003)
+
+#define DMAC_COAL_CFG				(0xC04)
+#define DMAC_COAL_CFG_TIMER_LIMIT_MASK_		(0xFFF00000)
+#define DMAC_COAL_CFG_TIMER_LIMIT_SET_(val)	\
+	((((u32)(val)) << 20) & DMAC_COAL_CFG_TIMER_LIMIT_MASK_)
+#define DMAC_COAL_CFG_TIMER_TX_START_		BIT(19)
+#define DMAC_COAL_CFG_FLUSH_INTS_		BIT(18)
+#define DMAC_COAL_CFG_INT_EXIT_COAL_		BIT(17)
+#define DMAC_COAL_CFG_CSR_EXIT_COAL_		BIT(16)
+#define DMAC_COAL_CFG_TX_THRES_MASK_		(0x0000FF00)
+#define DMAC_COAL_CFG_TX_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & DMAC_COAL_CFG_TX_THRES_MASK_)
+#define DMAC_COAL_CFG_RX_THRES_MASK_		(0x000000FF)
+#define DMAC_COAL_CFG_RX_THRES_SET_(val)	\
+	(((u32)(val)) & DMAC_COAL_CFG_RX_THRES_MASK_)
+
+#define DMAC_OBFF_CFG				(0xC08)
+#define DMAC_OBFF_TX_THRES_MASK_		(0x0000FF00)
+#define DMAC_OBFF_TX_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & DMAC_OBFF_TX_THRES_MASK_)
+#define DMAC_OBFF_RX_THRES_MASK_		(0x000000FF)
+#define DMAC_OBFF_RX_THRES_SET_(val)	\
+	(((u32)(val)) & DMAC_OBFF_RX_THRES_MASK_)
+
+#define DMAC_CMD				(0xC0C)
+#define DMAC_CMD_SWR_				BIT(31)
+#define DMAC_CMD_TX_SWR_(channel)		BIT(24 + (channel))
+#define DMAC_CMD_START_T_(channel)		BIT(20 + (channel))
+#define DMAC_CMD_STOP_T_(channel)		BIT(16 + (channel))
+#define DMAC_CMD_RX_SWR_(channel)		BIT(8 + (channel))
+#define DMAC_CMD_START_R_(channel)		BIT(4 + (channel))
+#define DMAC_CMD_STOP_R_(channel)		BIT(0 + (channel))
+
+#define DMAC_INT_STS				(0xC10)
+#define DMAC_INT_EN_SET				(0xC14)
+#define DMAC_INT_EN_CLR				(0xC18)
+#define DMAC_INT_BIT_RXFRM_(channel)		BIT(16 + (channel))
+#define DMAC_INT_BIT_TX_IOC_(channel)		BIT(0 + (channel))
+
+#define RX_CFG_A(channel)			(0xC40 + ((channel) << 6))
+#define RX_CFG_A_RX_WB_ON_INT_TMR_		BIT(30)
+#define RX_CFG_A_RX_WB_THRES_MASK_		(0x1F000000)
+#define RX_CFG_A_RX_WB_THRES_SET_(val)	\
+	((((u32)(val)) << 24) & RX_CFG_A_RX_WB_THRES_MASK_)
+#define RX_CFG_A_RX_PF_THRES_MASK_		(0x001F0000)
+#define RX_CFG_A_RX_PF_THRES_SET_(val)	\
+	((((u32)(val)) << 16) & RX_CFG_A_RX_PF_THRES_MASK_)
+#define RX_CFG_A_RX_PF_PRI_THRES_MASK_		(0x00001F00)
+#define RX_CFG_A_RX_PF_PRI_THRES_SET_(val)	\
+	((((u32)(val)) << 8) & RX_CFG_A_RX_PF_PRI_THRES_MASK_)
+#define RX_CFG_A_RX_HP_WB_EN_			BIT(5)
+
+#define RX_CFG_B(channel)			(0xC44 + ((channel) << 6))
+#define RX_CFG_B_TS_ALL_RX_			BIT(29)
+#define RX_CFG_B_RX_PAD_MASK_			(0x03000000)
+#define RX_CFG_B_RX_PAD_0_			(0x00000000)
+#define RX_CFG_B_RX_PAD_2_			(0x02000000)
+#define RX_CFG_B_RDMABL_512_			(0x00040000)
+#define RX_CFG_B_RX_RING_LEN_MASK_		(0x0000FFFF)
+
+#define RX_BASE_ADDRH(channel)			(0xC48 + ((channel) << 6))
+
+#define RX_BASE_ADDRL(channel)			(0xC4C + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRH(channel)	(0xC50 + ((channel) << 6))
+
+#define RX_HEAD_WRITEBACK_ADDRL(channel)	(0xC54 + ((channel) << 6))
+
+#define RX_HEAD(channel)			(0xC58 + ((channel) << 6))
+
+#define RX_TAIL(channel)			(0xC5C + ((channel) << 6))
+#define RX_TAIL_SET_TOP_INT_EN_			BIT(30)
+#define RX_TAIL_SET_TOP_INT_VEC_EN_		BIT(29)
+
+#define RX_CFG_C(channel)			(0xC64 + ((channel) << 6))
+#define RX_CFG_C_RX_TOP_INT_EN_AUTO_CLR_	BIT(6)
+#define RX_CFG_C_RX_INT_EN_R2C_			BIT(4)
+#define RX_CFG_C_RX_DMA_INT_STS_AUTO_CLR_	BIT(3)
+#define RX_CFG_C_RX_INT_STS_R2C_MODE_MASK_	(0x00000007)
+
+#define TX_CFG_A(channel)			(0xD40 + ((channel) << 6))
+#define TX_CFG_A_TX_HP_WB_ON_INT_TMR_		BIT(30)
+#define TX_CFG_A_TX_TMR_HPWB_SEL_IOC_		(0x10000000)
+#define TX_CFG_A_TX_PF_THRES_MASK_		(0x001F0000)
+#define TX_CFG_A_TX_PF_THRES_SET_(value)	\
+	((((u32)(value)) << 16) & TX_CFG_A_TX_PF_THRES_MASK_)
+#define TX_CFG_A_TX_PF_PRI_THRES_MASK_		(0x00001F00)
+#define TX_CFG_A_TX_PF_PRI_THRES_SET_(value)	\
+	((((u32)(value)) << 8) & TX_CFG_A_TX_PF_PRI_THRES_MASK_)
+#define TX_CFG_A_TX_HP_WB_EN_			BIT(5)
+#define TX_CFG_A_TX_HP_WB_THRES_MASK_		(0x0000000F)
+#define TX_CFG_A_TX_HP_WB_THRES_SET_(value)	\
+	(((u32)(value)) & TX_CFG_A_TX_HP_WB_THRES_MASK_)
+
+#define TX_CFG_B(channel)			(0xD44 + ((channel) << 6))
+#define TX_CFG_B_TDMABL_512_			(0x00040000)
+#define TX_CFG_B_TX_RING_LEN_MASK_		(0x0000FFFF)
+
+#define TX_BASE_ADDRH(channel)			(0xD48 + ((channel) << 6))
+
+#define TX_BASE_ADDRL(channel)			(0xD4C + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRH(channel)	(0xD50 + ((channel) << 6))
+
+#define TX_HEAD_WRITEBACK_ADDRL(channel)	(0xD54 + ((channel) << 6))
+
+#define TX_HEAD(channel)			(0xD58 + ((channel) << 6))
+
+#define TX_TAIL(channel)			(0xD5C + ((channel) << 6))
+#define TX_TAIL_SET_DMAC_INT_EN_		BIT(31)
+#define TX_TAIL_SET_TOP_INT_EN_			BIT(30)
+#define TX_TAIL_SET_TOP_INT_VEC_EN_		BIT(29)
+
+#define TX_CFG_C(channel)			(0xD64 + ((channel) << 6))
+#define TX_CFG_C_TX_TOP_INT_EN_AUTO_CLR_	BIT(6)
+#define TX_CFG_C_TX_DMA_INT_EN_AUTO_CLR_	BIT(5)
+#define TX_CFG_C_TX_INT_EN_R2C_			BIT(4)
+#define TX_CFG_C_TX_DMA_INT_STS_AUTO_CLR_	BIT(3)
+#define TX_CFG_C_TX_INT_STS_R2C_MODE_MASK_	(0x00000007)
+
+/* MAC statistics registers */
+#define STAT_RX_FCS_ERRORS			(0x1200)
+#define STAT_RX_ALIGNMENT_ERRORS		(0x1204)
+#define STAT_RX_JABBER_ERRORS			(0x120C)
+#define STAT_RX_UNDERSIZE_FRAME_ERRORS		(0x1210)
+#define STAT_RX_OVERSIZE_FRAME_ERRORS		(0x1214)
+#define STAT_RX_DROPPED_FRAMES			(0x1218)
+#define STAT_RX_UNICAST_BYTE_COUNT		(0x121C)
+#define STAT_RX_BROADCAST_BYTE_COUNT		(0x1220)
+#define STAT_RX_MULTICAST_BYTE_COUNT		(0x1224)
+#define STAT_RX_MULTICAST_FRAMES		(0x1230)
+#define STAT_RX_TOTAL_FRAMES			(0x1254)
+
+#define STAT_TX_FCS_ERRORS			(0x1280)
+#define STAT_TX_EXCESS_DEFERRAL_ERRORS		(0x1284)
+#define STAT_TX_CARRIER_ERRORS			(0x1288)
+#define STAT_TX_SINGLE_COLLISIONS		(0x1290)
+#define STAT_TX_MULTIPLE_COLLISIONS		(0x1294)
+#define STAT_TX_EXCESSIVE_COLLISION		(0x1298)
+#define STAT_TX_LATE_COLLISIONS			(0x129C)
+#define STAT_TX_UNICAST_BYTE_COUNT		(0x12A0)
+#define STAT_TX_BROADCAST_BYTE_COUNT		(0x12A4)
+#define STAT_TX_MULTICAST_BYTE_COUNT		(0x12A8)
+#define STAT_TX_MULTICAST_FRAMES		(0x12B4)
+#define STAT_TX_TOTAL_FRAMES			(0x12D8)
+
+/* End of Register definitions */
+
+#define LAN743X_MAX_RX_CHANNELS		(4)
+#define LAN743X_MAX_TX_CHANNELS		(1)
+struct lan743x_adapter;
+
+#define LAN743X_USED_RX_CHANNELS	(4)
+#define LAN743X_USED_TX_CHANNELS	(1)
+#define LAN743X_INT_MOD	(400)
+
+#if (LAN743X_USED_RX_CHANNELS > LAN743X_MAX_RX_CHANNELS)
+#error Invalid LAN743X_USED_RX_CHANNELS
+#endif
+#if (LAN743X_USED_TX_CHANNELS > LAN743X_MAX_TX_CHANNELS)
+#error Invalid LAN743X_USED_TX_CHANNELS
+#endif
+
+/* PCI */
+/* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
+#define PCI_VENDOR_ID_SMSC		PCI_VENDOR_ID_EFAR
+#define PCI_DEVICE_ID_SMSC_LAN7430	(0x7430)
+
+#define PCI_CONFIG_LENGTH		(0x1000)
+
+/* CSR */
+#define CSR_LENGTH					(0x2000)
+
+#define LAN743X_CSR_FLAG_IS_A0				BIT(0)
+#define LAN743X_CSR_FLAG_IS_B0				BIT(1)
+#define LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR	BIT(8)
+
+struct lan743x_csr {
+	u32 flags;
+	u8 __iomem *csr_address;
+	u32 id_rev;
+	u32 fpga_rev;
+};
+
+/* INTERRUPTS */
+typedef void(*lan743x_vector_handler)(void *context, u32 int_sts, u32 flags);
+
+#define LAN743X_VECTOR_FLAG_IRQ_SHARED			BIT(0)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_READ		BIT(1)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_R2C		BIT(2)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C		BIT(3)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CHECK		BIT(4)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_CLEAR		BIT(5)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_R2C		BIT(6)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_CLEAR		BIT(7)
+#define LAN743X_VECTOR_FLAG_MASTER_ENABLE_SET		BIT(8)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_CLEAR	BIT(9)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_ISR_SET	BIT(10)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR	BIT(11)
+#define LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET	BIT(12)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR	BIT(13)
+#define LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET	BIT(14)
+#define LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR	BIT(15)
+
+struct lan743x_vector {
+	int			irq;
+	u32			flags;
+	struct lan743x_adapter	*adapter;
+	int			vector_index;
+	u32			int_mask;
+	lan743x_vector_handler	handler;
+	void			*context;
+};
+
+#define LAN743X_MAX_VECTOR_COUNT	(8)
+
+struct lan743x_intr {
+	int			flags;
+
+	unsigned int		irq;
+
+	struct lan743x_vector	vector_list[LAN743X_MAX_VECTOR_COUNT];
+	int			number_of_vectors;
+	bool			using_vectors;
+
+	int			software_isr_flag;
+};
+
+#define LAN743X_MAX_FRAME_SIZE			(9 * 1024)
+
+/* PHY */
+struct lan743x_phy {
+	bool	fc_autoneg;
+	u8	fc_request_control;
+};
+
+/* TX */
+struct lan743x_tx_descriptor;
+struct lan743x_tx_buffer_info;
+
+#define GPIO_QUEUE_STARTED		(0)
+#define GPIO_TX_FUNCTION		(1)
+#define GPIO_TX_COMPLETION		(2)
+#define GPIO_TX_FRAGMENT		(3)
+
+#define TX_FRAME_FLAG_IN_PROGRESS	BIT(0)
+
+struct lan743x_tx {
+	struct lan743x_adapter *adapter;
+	u32	vector_flags;
+	int	channel_number;
+
+	int	ring_size;
+	size_t	ring_allocation_size;
+	struct lan743x_tx_descriptor *ring_cpu_ptr;
+	dma_addr_t ring_dma_ptr;
+	/* ring_lock: used to prevent concurrent access to tx ring */
+	spinlock_t ring_lock;
+	u32		frame_flags;
+	u32		frame_first;
+	u32		frame_data0;
+	u32		frame_tail;
+
+	struct lan743x_tx_buffer_info *buffer_info;
+
+	u32		*head_cpu_ptr;
+	dma_addr_t	head_dma_ptr;
+	int		last_head;
+	int		last_tail;
+
+	struct napi_struct napi;
+
+	struct sk_buff *overflow_skb;
+};
+
+/* RX */
+struct lan743x_rx_descriptor;
+struct lan743x_rx_buffer_info;
+
+struct lan743x_rx {
+	struct lan743x_adapter *adapter;
+	u32	vector_flags;
+	int	channel_number;
+
+	int	ring_size;
+	size_t	ring_allocation_size;
+	struct lan743x_rx_descriptor *ring_cpu_ptr;
+	dma_addr_t ring_dma_ptr;
+
+	struct lan743x_rx_buffer_info *buffer_info;
+
+	u32		*head_cpu_ptr;
+	dma_addr_t	head_dma_ptr;
+	u32		last_head;
+	u32		last_tail;
+
+	struct napi_struct napi;
+
+	u32		frame_count;
+};
+
+struct lan743x_adapter {
+	struct net_device       *netdev;
+	struct mii_bus		*mdiobus;
+	int                     msg_enable;
+	struct pci_dev		*pdev;
+	struct lan743x_csr      csr;
+	struct lan743x_intr     intr;
+
+	/* lock, used to prevent concurrent access to data port */
+	struct mutex		dp_lock;
+
+	u8			mac_address[ETH_ALEN];
+
+	struct lan743x_phy      phy;
+	struct lan743x_tx       tx[LAN743X_MAX_TX_CHANNELS];
+	struct lan743x_rx       rx[LAN743X_MAX_RX_CHANNELS];
+};
+
+#define LAN743X_COMPONENT_FLAG_RX(channel)  BIT(20 + (channel))
+
+#define INTR_FLAG_IRQ_REQUESTED(vector_index)	BIT(0 + vector_index)
+#define INTR_FLAG_MSI_ENABLED			BIT(8)
+#define INTR_FLAG_MSIX_ENABLED			BIT(9)
+
+#define MAC_MII_READ            1
+#define MAC_MII_WRITE           0
+
+#define PHY_FLAG_OPENED     BIT(0)
+#define PHY_FLAG_ATTACHED   BIT(1)
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF))
+#else
+#define DMA_ADDR_HIGH32(dma_addr)   ((u32)(0))
+#endif
+#define DMA_ADDR_LOW32(dma_addr) ((u32)((dma_addr) & 0xFFFFFFFF))
+#define DMA_DESCRIPTOR_SPACING_16       (16)
+#define DMA_DESCRIPTOR_SPACING_32       (32)
+#define DMA_DESCRIPTOR_SPACING_64       (64)
+#define DMA_DESCRIPTOR_SPACING_128      (128)
+#define DEFAULT_DMA_DESCRIPTOR_SPACING  (L1_CACHE_BYTES)
+
+#define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \
+	(((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0))
+#define DMAC_CHANNEL_STATE_INITIAL      DMAC_CHANNEL_STATE_SET(0, 0)
+#define DMAC_CHANNEL_STATE_STARTED      DMAC_CHANNEL_STATE_SET(1, 0)
+#define DMAC_CHANNEL_STATE_STOP_PENDING DMAC_CHANNEL_STATE_SET(1, 1)
+#define DMAC_CHANNEL_STATE_STOPPED      DMAC_CHANNEL_STATE_SET(0, 1)
+
+/* TX Descriptor bits */
+#define TX_DESC_DATA0_DTYPE_MASK_		(0xC0000000)
+#define TX_DESC_DATA0_DTYPE_DATA_		(0x00000000)
+#define TX_DESC_DATA0_DTYPE_EXT_		(0x40000000)
+#define TX_DESC_DATA0_FS_			(0x20000000)
+#define TX_DESC_DATA0_LS_			(0x10000000)
+#define TX_DESC_DATA0_EXT_			(0x08000000)
+#define TX_DESC_DATA0_IOC_			(0x04000000)
+#define TX_DESC_DATA0_ICE_			(0x00400000)
+#define TX_DESC_DATA0_IPE_			(0x00200000)
+#define TX_DESC_DATA0_TPE_			(0x00100000)
+#define TX_DESC_DATA0_FCS_			(0x00020000)
+#define TX_DESC_DATA0_BUF_LENGTH_MASK_		(0x0000FFFF)
+#define TX_DESC_DATA0_EXT_LSO_			(0x00200000)
+#define TX_DESC_DATA0_EXT_PAY_LENGTH_MASK_	(0x000FFFFF)
+#define TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_	(0x3FFF0000)
+
+struct lan743x_tx_descriptor {
+	u32     data0;
+	u32     data1;
+	u32     data2;
+	u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define TX_BUFFER_INFO_FLAG_ACTIVE		BIT(0)
+#define TX_BUFFER_INFO_FLAG_IGNORE_SYNC		BIT(2)
+#define TX_BUFFER_INFO_FLAG_SKB_FRAGMENT	BIT(3)
+struct lan743x_tx_buffer_info {
+	int flags;
+	struct sk_buff *skb;
+	dma_addr_t      dma_ptr;
+	unsigned int    buffer_length;
+};
+
+#define LAN743X_TX_RING_SIZE    (50)
+
+/* OWN bit is set. ie, Descs are owned by RX DMAC */
+#define RX_DESC_DATA0_OWN_                (0x00008000)
+/* OWN bit is clear. ie, Descs are owned by host */
+#define RX_DESC_DATA0_FS_                 (0x80000000)
+#define RX_DESC_DATA0_LS_                 (0x40000000)
+#define RX_DESC_DATA0_FRAME_LENGTH_MASK_  (0x3FFF0000)
+#define RX_DESC_DATA0_FRAME_LENGTH_GET_(data0)	\
+	(((data0) & RX_DESC_DATA0_FRAME_LENGTH_MASK_) >> 16)
+#define RX_DESC_DATA0_EXT_                (0x00004000)
+#define RX_DESC_DATA0_BUF_LENGTH_MASK_    (0x00003FFF)
+#define RX_DESC_DATA2_TS_NS_MASK_         (0x3FFFFFFF)
+
+#if ((NET_IP_ALIGN != 0) && (NET_IP_ALIGN != 2))
+#error NET_IP_ALIGN must be 0 or 2
+#endif
+
+#define RX_HEAD_PADDING		NET_IP_ALIGN
+
+struct lan743x_rx_descriptor {
+	u32     data0;
+	u32     data1;
+	u32     data2;
+	u32     data3;
+} __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING);
+
+#define RX_BUFFER_INFO_FLAG_ACTIVE      BIT(0)
+struct lan743x_rx_buffer_info {
+	int flags;
+	struct sk_buff *skb;
+
+	dma_addr_t      dma_ptr;
+	unsigned int    buffer_length;
+};
+
+#define LAN743X_RX_RING_SIZE        (65)
+
+#define RX_PROCESS_RESULT_NOTHING_TO_DO     (0)
+#define RX_PROCESS_RESULT_PACKET_RECEIVED   (1)
+#define RX_PROCESS_RESULT_PACKET_DROPPED    (2)
+
+#endif /* _LAN743X_H */

From e8cf7c271bfc5f720e3f6ffdb96586fda8486b4c Mon Sep 17 00:00:00 2001
From: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Date: Mon, 5 Mar 2018 14:23:31 -0500
Subject: [PATCH 0702/1678] lan743x: Update MAINTAINERS to include lan743x
 driver

Update MAINTAINERS to include lan743x driver

Signed-off-by: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d632b83c4ce..214c9bca232a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9161,6 +9161,13 @@ F:	drivers/net/dsa/microchip/*
 F:	include/linux/platform_data/microchip-ksz.h
 F:	Documentation/devicetree/bindings/net/dsa/ksz.txt
 
+MICROCHIP LAN743X ETHERNET DRIVER
+M:	Bryan Whitehead <bryan.whitehead@microchip.com>
+M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/microchip/lan743x_*
+
 MICROCHIP USB251XB DRIVER
 M:	Richard Leitner <richard.leitner@skidata.com>
 L:	linux-usb@vger.kernel.org

From c9efb15a9981400c797665dc4844ab562ed3c424 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 5 Mar 2018 15:56:14 -0600
Subject: [PATCH 0703/1678] tipc: bcast: use true and false for boolean values

Assign true or false to boolean variables instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 37892b3909af..f3711176be45 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -574,5 +574,5 @@ void tipc_nlist_purge(struct tipc_nlist *nl)
 {
 	tipc_dest_list_purge(&nl->list);
 	nl->remote = 0;
-	nl->local = 0;
+	nl->local = false;
 }

From 22936c3e6e8cdd9641fdf1362dc9369dd35246c8 Mon Sep 17 00:00:00 2001
From: Brandon Streiff <brandon.streiff@ni.com>
Date: Mon, 5 Mar 2018 16:05:22 -0600
Subject: [PATCH 0704/1678] dt-bindings: net: dsa: marvell: describe
 compatibility string

There are two compatibility strings for mv88e6xxx, but it isn't clear
from the documentation why only those two exist when the mv88e6xxx driver
supports more than the 6085 and 6190. Briefly describe how the compatible
property is used, and provide guidance on which to use.

The model list comes from looking at port_base_addr values (0x0 vs 0x10)
in drivers/net/dsa/mv88e6xxx/chip.c.

Signed-off-by: Brandon Streiff <brandon.streiff@ni.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/marvell.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 1d4d0f49c9d0..caf71e2fe24a 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -13,9 +13,18 @@ placed as a child node of an mdio device.
 The properties described here are those specific to Marvell devices.
 Additional required and optional properties can be found in dsa.txt.
 
+The compatibility string is used only to find an identification register,
+which is at a different MDIO base address in different switch families.
+- "marvell,mv88e6085"	: Switch has base address 0x10. Use with models:
+			  6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165,
+			  6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
+			  6341, 6350, 6351, 6352
+- "marvell,mv88e6190"	: Switch has base address 0x00. Use with models:
+			  6190, 6190X, 6191, 6290, 6390, 6390X
+
 Required properties:
 - compatible		: Should be one of "marvell,mv88e6085" or
-			  "marvell,mv88e6190"
+			  "marvell,mv88e6190" as indicated above
 - reg			: Address on the MII bus for the switch.
 
 Optional properties:

From 9a21ac943240d0353b726495d08e377a257ace52 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 5 Mar 2018 16:11:54 -0600
Subject: [PATCH 0705/1678] ipv6: ndisc: use true and false for boolean values

Assign true or false to boolean variables instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0a19ce3a6f7f..8af5eef464c1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 	}
 
 	if (!dev->addr_len)
-		inc_opt = 0;
+		inc_opt = false;
 	if (inc_opt)
 		optlen += ndisc_opt_addr_space(dev,
 					       NDISC_NEIGHBOUR_ADVERTISEMENT);

From 6e61e10a8a9655424cf03c0fb44d282b40f402a2 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Tue, 6 Mar 2018 13:05:06 +0800
Subject: [PATCH 0706/1678] net: mvpp2: mvpp2_check_hw_buf_num() can be static

Fixes: effbf5f58d64 ("net: mvpp2: update the BM buffer free/destroy logic")
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index ac0a0dc8f157..338c94d44aea 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -4295,7 +4295,7 @@ static void mvpp2_bm_bufs_free(struct device *dev, struct mvpp2 *priv,
 }
 
 /* Check number of buffers in BM pool */
-int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
+static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_pool)
 {
 	int buf_num = 0;
 

From 4d1e46a55ea9f8d693ceabe1ba4b056e55fb6625 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Tue, 6 Mar 2018 17:31:32 +0900
Subject: [PATCH 0707/1678] selftests/net: fix in_netns.sh script

execute the subprocess in netns using 'ip netns exec'

Fixes: cc30c93fa020 ("selftests/net: ignore background traffic in psock_fanout")
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/in_netns.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
index f57a2eaad069..88795b510b32 100755
--- a/tools/testing/selftests/net/in_netns.sh
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -19,5 +19,5 @@ cleanup() {
 trap cleanup EXIT
 setup
 
-"$@"
+ip netns exec "${NETNS}" "$@"
 exit "$?"

From 4fff2d33c707938c537b7aeb2c511226f8a20313 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 5 Mar 2018 22:34:27 +0100
Subject: [PATCH 0708/1678] net: phy: remove phy_error from
 phy_disable_interrupts

All callers of phy_disable_interrupts() call phy_error() in the error
case. Therefore we don't need to do this within the function too.
This change also allows us to use phy_disable_interrupts() in code
holding phydev->lock (because phy_error() can take this lock).

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a6f924fee584..c5aa773eacdc 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -662,19 +662,10 @@ static int phy_disable_interrupts(struct phy_device *phydev)
 	/* Disable PHY interrupts */
 	err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
 	if (err)
-		goto phy_err;
+		return err;
 
 	/* Clear the interrupt */
-	err = phy_clear_interrupt(phydev);
-	if (err)
-		goto phy_err;
-
-	return 0;
-
-phy_err:
-	phy_error(phydev);
-
-	return err;
+	return phy_clear_interrupt(phydev);
 }
 
 /**

From 70a55c32ee6f3781d294d64c75380c94f4b99ae8 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 5 Mar 2018 22:34:46 +0100
Subject: [PATCH 0709/1678] net: phy: use phy_disable_interrupts in phy_stop

Now that phy_disable_interrupts() can't take lock phydev->lock any longer,
we can use it to simplify phy_stop().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c5aa773eacdc..c2d9027be863 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -765,13 +765,8 @@ void phy_stop(struct phy_device *phydev)
 	if (PHY_HALTED == phydev->state)
 		goto out_unlock;
 
-	if (phy_interrupt_is_valid(phydev)) {
-		/* Disable PHY Interrupts */
-		phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED);
-
-		/* Clear any pending interrupts */
-		phy_clear_interrupt(phydev);
-	}
+	if (phy_interrupt_is_valid(phydev))
+		phy_disable_interrupts(phydev);
 
 	phydev->state = PHY_HALTED;
 

From 1ec54cb44e6731c3cb251bcf9251d65a4b4f6306 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 6 Mar 2018 10:56:31 +0100
Subject: [PATCH 0710/1678] net: unpollute priv_flags space

the ipvlan device driver defines and uses 2 bits inside the priv_flags
net_device field. Such bits and the related helper are used only
inside the ipvlan device driver, and the core networking does not
need to be aware of them.

This change moves netif_is_ipvlan* helper in the ipvlan driver and
re-implement them looking for ipvlan specific symbols instead of
using priv_flags.

Overall this frees two bits inside priv_flags - and move the following
ones to avoid gaps - without any intended functional change.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan.h      |  6 ++++++
 drivers/net/ipvlan/ipvlan_main.c | 10 ++++++----
 include/linux/netdevice.h        | 32 ++++++++------------------------
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index a115f12bf130..c818b9bdab6e 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -177,4 +177,10 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 void ipvlan_link_delete(struct net_device *dev, struct list_head *head);
 void ipvlan_link_setup(struct net_device *dev);
 int ipvlan_link_register(struct rtnl_link_ops *ops);
+
+static inline bool netif_is_ipvlan_port(const struct net_device *dev)
+{
+	return dev->rx_handler == ipvlan_handle_frame;
+}
+
 #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 4cbe9e27287d..23fd5ab180e8 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -129,7 +129,6 @@ static int ipvlan_port_create(struct net_device *dev)
 	if (err)
 		goto err;
 
-	dev->priv_flags |= IFF_IPVLAN_MASTER;
 	return 0;
 
 err:
@@ -142,7 +141,6 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 	struct sk_buff *skb;
 
-	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
 	if (port->mode == IPVLAN_MODE_L3S) {
 		dev->priv_flags &= ~IFF_L3MDEV_MASTER;
 		ipvlan_unregister_nf_hook(dev_net(dev));
@@ -423,6 +421,12 @@ static const struct header_ops ipvlan_header_ops = {
 	.cache_update	= eth_header_cache_update,
 };
 
+static bool netif_is_ipvlan(const struct net_device *dev)
+{
+	/* both ipvlan and ipvtap devices use the same netdev_ops */
+	return dev->netdev_ops == &ipvlan_netdev_ops;
+}
+
 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
 					     struct ethtool_link_ksettings *cmd)
 {
@@ -600,8 +604,6 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	 */
 	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 
-	dev->priv_flags |= IFF_IPVLAN_SLAVE;
-
 	err = register_netdevice(dev);
 	if (err < 0)
 		return err;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dbe6344b727a..95a613a7cc1c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1381,8 +1381,6 @@ struct net_device_ops {
  * @IFF_MACVLAN: Macvlan device
  * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account
  *	underlying stacked devices
- * @IFF_IPVLAN_MASTER: IPvlan master device
- * @IFF_IPVLAN_SLAVE: IPvlan slave device
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
@@ -1412,16 +1410,14 @@ enum netdev_priv_flags {
 	IFF_LIVE_ADDR_CHANGE		= 1<<15,
 	IFF_MACVLAN			= 1<<16,
 	IFF_XMIT_DST_RELEASE_PERM	= 1<<17,
-	IFF_IPVLAN_MASTER		= 1<<18,
-	IFF_IPVLAN_SLAVE		= 1<<19,
-	IFF_L3MDEV_MASTER		= 1<<20,
-	IFF_NO_QUEUE			= 1<<21,
-	IFF_OPENVSWITCH			= 1<<22,
-	IFF_L3MDEV_SLAVE		= 1<<23,
-	IFF_TEAM			= 1<<24,
-	IFF_RXFH_CONFIGURED		= 1<<25,
-	IFF_PHONY_HEADROOM		= 1<<26,
-	IFF_MACSEC			= 1<<27,
+	IFF_L3MDEV_MASTER		= 1<<18,
+	IFF_NO_QUEUE			= 1<<19,
+	IFF_OPENVSWITCH			= 1<<20,
+	IFF_L3MDEV_SLAVE		= 1<<21,
+	IFF_TEAM			= 1<<22,
+	IFF_RXFH_CONFIGURED		= 1<<23,
+	IFF_PHONY_HEADROOM		= 1<<24,
+	IFF_MACSEC			= 1<<25,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1442,8 +1438,6 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
 #define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
-#define IFF_IPVLAN_MASTER		IFF_IPVLAN_MASTER
-#define IFF_IPVLAN_SLAVE		IFF_IPVLAN_SLAVE
 #define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
@@ -4223,16 +4217,6 @@ static inline bool netif_is_macvlan_port(const struct net_device *dev)
 	return dev->priv_flags & IFF_MACVLAN_PORT;
 }
 
-static inline bool netif_is_ipvlan(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_IPVLAN_SLAVE;
-}
-
-static inline bool netif_is_ipvlan_port(const struct net_device *dev)
-{
-	return dev->priv_flags & IFF_IPVLAN_MASTER;
-}
-
 static inline bool netif_is_bond_master(const struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;

From 630b21a26ae02bd180aebbccad248c64daf1da4b Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 6 Mar 2018 12:10:45 +0100
Subject: [PATCH 0711/1678] net: phy: mdio-mux: slience probe defer error

If we fail to register the mdio bus due to probe defer, we should not
print an error message. Just be silent in this case.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mux-mmioreg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 2573ab012f16..70f6115530af 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -163,8 +163,9 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 			    mdio_mux_mmioreg_switch_fn,
 			    &s->mux_handle, s, NULL);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n",
-			np);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"failed to register mdio-mux bus %pOF\n", np);
 		return ret;
 	}
 

From b1312b85012245923b5cb59ac324a5649eefefcb Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Tue, 6 Mar 2018 23:54:07 +0800
Subject: [PATCH 0712/1678] net: dsa: mv88e6xxx: fix boolreturn.cocci warnings

drivers/net/dsa/mv88e6xxx/serdes.c:66:9-10: WARNING: return of 0/1 in function 'mv88e6352_port_has_serdes' with return type bool

 Return statements in functions returning bool should use
 true/false instead of 1/0.
Generated by: scripts/coccinelle/misc/boolreturn.cocci

Fixes: eb755c3f6b7d ("net: dsa: mv88e6xxx: Add helper to determining if port has SERDES")
CC: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/serdes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 4756969c1546..b6166424216a 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -63,15 +63,15 @@ static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 	err = mv88e6xxx_port_get_cmode(chip, port, &cmode);
 	if (err) {
 		dev_err(chip->dev, "failed to read cmode\n");
-		return 0;
+		return false;
 	}
 
 	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASE_X) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASE_X) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_SGMII))
-		return 1;
+		return true;
 
-	return 0;
+	return false;
 }
 
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on)

From 7d99429afa9393968261a425b1e1b57298e48a75 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Wed, 7 Mar 2018 02:23:23 +0800
Subject: [PATCH 0713/1678] enic: fix boolreturn.cocci warnings

drivers/net/ethernet/cisco/enic/vnic_dev.c:1294:9-10: WARNING: return of 0/1 in function 'vnic_dev_capable_udp_rss' with return type bool

 Return statements in functions returning bool should use
 true/false instead of 1/0.
Generated by: scripts/coccinelle/misc/boolreturn.cocci

Fixes: 48398b6e7065 ("enic: set UDP rss flag")
CC: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/vnic_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index a2b376055b2d..76cdd4c9d11f 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -1291,7 +1291,7 @@ bool vnic_dev_capable_udp_rss(struct vnic_dev *vdev)
 
 	err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait);
 	if (err || !a0)
-		return 0;
+		return false;
 
 	rss_hash_type = (a1 >> NIC_CFG_RSS_HASH_TYPE_SHIFT) &
 			NIC_CFG_RSS_HASH_TYPE_MASK_FIELD;

From d8c13f2271ec5178c52fbde072ec7b562651ed9d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 6 Mar 2018 11:12:53 -0800
Subject: [PATCH 0714/1678] net/mlx4_en: try to use high order pages for RX
 rings

RX rings can fit most of the time in a contiguous piece of memory,
so lets use kvzalloc_node/kvfree instead of vzalloc_node/vfree

Note that kvzalloc_node() automatically falls back to another node,
there is no need to do the fallback ourselves.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c2c6bd7578fd..05787efef492 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -291,13 +291,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 
 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
 					sizeof(struct mlx4_en_rx_alloc));
-	ring->rx_info = vzalloc_node(tmp, node);
+	ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node);
 	if (!ring->rx_info) {
-		ring->rx_info = vzalloc(tmp);
-		if (!ring->rx_info) {
-			err = -ENOMEM;
-			goto err_xdp_info;
-		}
+		err = -ENOMEM;
+		goto err_xdp_info;
 	}
 
 	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
@@ -318,7 +315,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	return 0;
 
 err_info:
-	vfree(ring->rx_info);
+	kvfree(ring->rx_info);
 	ring->rx_info = NULL;
 err_xdp_info:
 	xdp_rxq_info_unreg(&ring->xdp_rxq);
@@ -447,7 +444,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 		bpf_prog_put(old_prog);
 	xdp_rxq_info_unreg(&ring->xdp_rxq);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
-	vfree(ring->rx_info);
+	kvfree(ring->rx_info);
 	ring->rx_info = NULL;
 	kfree(ring);
 	*pring = NULL;

From d1f1b9cbf34c667d06adc44136e448cade8e28d9 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 6 Mar 2018 22:16:27 +0100
Subject: [PATCH 0715/1678] selftests: net: Introduce first PMTU test

One single test implemented so far: test_pmtu_vti6_exception
checks that the PMTU of a route exception, caused by a tunnel
exceeding the link layer MTU, is affected by administrative
changes of the tunnel MTU. Creation of the route exception is
checked too.

Requested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile |   2 +-
 tools/testing/selftests/net/pmtu.sh  | 163 +++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/net/pmtu.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 229a038966e3..785fc18a16b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755
index 000000000000..6c19c148cef8
--- /dev/null
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -0,0 +1,163 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations
+#
+# Tests currently implemented:
+#
+# - test_pmtu_vti6_exception
+#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is
+#	created by exceeding link layer MTU with ping to other endpoint. Then
+#	decrease and increase MTU of tunnel, checking that route exception PMTU
+#	changes accordingly
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+setup_namespaces() {
+	ip netns add ${NS_A} || return 0
+	ip netns add ${NS_B}
+
+	return 1
+}
+
+setup_veth() {
+	${ns_a} ip link add veth_a type veth peer name veth_b || return 0
+	${ns_a} ip link set veth_b netns ${NS_B}
+
+	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+	${ns_a} ip link set veth_a up
+	${ns_b} ip link set veth_b up
+
+	return 1
+}
+
+setup_vti6() {
+	${ns_a} ip link add vti_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 || return 0
+	${ns_b} ip link add vti_b type vti6 local ${veth6_b_addr} remote ${veth6_a_addr} key 10
+
+	${ns_a} ip addr add ${vti6_a_addr}/${vti6_mask} dev vti_a
+	${ns_b} ip addr add ${vti6_b_addr}/${vti6_mask} dev vti_b
+
+	${ns_a} ip link set vti_a up
+	${ns_b} ip link set vti_b up
+
+	sleep 1
+
+	return 1
+}
+
+setup_xfrm() {
+	${ns_a} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 0
+	${ns_a} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_a} ip -6 xfrm policy add dir out mark 10 tmpl src ${veth6_a_addr} dst ${veth6_b_addr} proto esp mode tunnel
+	${ns_a} ip -6 xfrm policy add dir in mark 10 tmpl src ${veth6_b_addr} dst ${veth6_a_addr} proto esp mode tunnel
+
+	${ns_b} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -6 xfrm policy add dir out mark 10 tmpl src ${veth6_b_addr} dst ${veth6_a_addr} proto esp mode tunnel
+	${ns_b} ip -6 xfrm policy add dir in mark 10 tmpl src ${veth6_a_addr} dst ${veth6_b_addr} proto esp mode tunnel
+
+	return 1
+}
+
+setup() {
+	tunnel_type="$1"
+
+	[ "$(id -u)" -ne 0 ] && echo "SKIP: need to run as root" && exit 0
+
+	setup_namespaces && echo "SKIP: namespaces not supported" && exit 0
+	setup_veth && echo "SKIP: veth not supported" && exit 0
+
+	case ${tunnel_type} in
+	"vti6")
+		setup_vti6 && echo "SKIP: vti6 not supported" && exit 0
+		setup_xfrm && echo "SKIP: xfrm not supported" && exit 0
+		;;
+	*)
+		;;
+	esac
+}
+
+cleanup() {
+	ip netns del ${NS_A} 2 > /dev/null
+	ip netns del ${NS_B} 2 > /dev/null
+}
+
+mtu() {
+	ns_cmd="${1}"
+	dev="${2}"
+	mtu="${3}"
+
+	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+route_get_dst_exception() {
+	dst="${1}"
+
+	${ns_a} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+	dst="${1}"
+
+	exception="$(route_get_dst_exception ${dst})"
+	next=0
+	for i in ${exception}; do
+		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ "${i}" = "mtu" ] && next=1
+	done
+}
+
+test_pmtu_vti6_exception() {
+	setup vti6
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}" veth_a 4000
+	mtu "${ns_b}" veth_b 4000
+	mtu "${ns_a}" vti_a 5000
+	mtu "${ns_b}" vti_b 5000
+	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+	# Check that exception was created
+	if [ "$(route_get_dst_pmtu_from_exception ${vti6_b_addr})" = "" ]; then
+		echo "FAIL: Tunnel exceeding link layer MTU didn't create route exception"
+		exit 1
+	fi
+
+	# Decrease tunnel MTU, check for PMTU decrease in route exception
+	mtu "${ns_a}" vti_a 3000
+
+	if [ "$(route_get_dst_pmtu_from_exception ${vti6_b_addr})" -ne 3000 ]; then
+		echo "FAIL: Decreasing tunnel MTU didn't decrease route exception PMTU"
+		exit 1
+	fi
+
+	# Increase tunnel MTU, check for PMTU increase in route exception
+	mtu "${ns_a}" vti_a 9000
+	if [ "$(route_get_dst_pmtu_from_exception ${vti6_b_addr})" -ne 9000 ]; then
+		echo "FAIL: Increasing tunnel MTU didn't increase route exception PMTU"
+		exit 1
+	fi
+
+	echo "PASS"
+}
+
+trap cleanup EXIT
+
+test_pmtu_vti6_exception
+
+exit 0

From 4f06717fb063ad4ef38bd5e0ec1881bc784c325b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 7 Mar 2018 10:46:57 +0100
Subject: [PATCH 0716/1678] net: kalmia: clean up bind error path

Drop bogus call to usb_driver_release_interface() from an error path in
the usbnet bind() callback, which is called during interface probe. At
this point the interface is not bound and usb_driver_release_interface()
returns early.

Also remove the bogus call to clear the interface data, which is owned
by the usbnet driver and would not even have been set by the time bind()
is called.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/kalmia.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c
index 1ec523b0e932..bd2ba3659028 100644
--- a/drivers/net/usb/kalmia.c
+++ b/drivers/net/usb/kalmia.c
@@ -150,12 +150,8 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->rx_urb_size = dev->hard_mtu * 10; // Found as optimal after testing
 
 	status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr);
-
-	if (status) {
-		usb_set_intfdata(intf, NULL);
-		usb_driver_release_interface(driver_of(intf), intf);
+	if (status)
 		return status;
-	}
 
 	memcpy(dev->net->dev_addr, ethernet_addr, ETH_ALEN);
 

From bb5441fc3cc0e11be0ff9bbc6a00a74b3d2b0153 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 7 Mar 2018 10:46:58 +0100
Subject: [PATCH 0717/1678] net: cdc_eem: clean up bind error path

Drop bogus call to usb_driver_release_interface() from an error path in
the usbnet bind() callback, which is called during interface probe. At
this point the interface is not bound and usb_driver_release_interface()
returns early.

Also remove the bogus call to clear the interface data, which is owned
by the usbnet driver and would not even have been set by the time bind()
is called.

Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_eem.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index f7180f8db39e..61ea4eaace5d 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -83,11 +83,8 @@ static int eem_bind(struct usbnet *dev, struct usb_interface *intf)
 	int status = 0;
 
 	status = usbnet_get_endpoints(dev, intf);
-	if (status < 0) {
-		usb_set_intfdata(intf, NULL);
-		usb_driver_release_interface(driver_of(intf), intf);
+	if (status < 0)
 		return status;
-	}
 
 	/* no jumbogram (16K) support for now */
 

From 0c17db05ec982607121b8f01cf4ce03513964d9c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 7 Mar 2018 13:57:59 +0100
Subject: [PATCH 0718/1678] selftests: forwarding: fix "ok" action test

Fix the "ok" action test so it checks that packet that is okayed does not
continue to be processed by other rules. Fix error message as well.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/tc_actions.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 6b18ba2d3982..ac6b6a1057d8 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -96,7 +96,10 @@ gact_drop_and_ok_test()
 		-t ip -q
 
 	tc_check_packets "dev $swp1 ingress" 101 1
-	check_err $? "Did not see trapped packet"
+	check_err $? "Did not see passed packet"
+
+	tc_check_packets "dev $swp1 ingress" 102 2
+	check_fail $? "Packet was dropped and it should not reach here"
 
 	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
 	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower

From dff58a09d77e152056a4803ff89b6ec3fc6c9b2c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 7 Mar 2018 13:58:00 +0100
Subject: [PATCH 0719/1678] selftests: forwarding: fix flags passed to first
 drop rule in gact_drop_and_ok_test

Fix copy&paste error and pass proper flags.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/tc_actions.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index ac6b6a1057d8..3a6385ebd5d0 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -81,7 +81,7 @@ gact_drop_and_ok_test()
 	RET=0
 
 	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
-		skip_hw dst_ip 192.0.2.2 action drop
+		$tcflags dst_ip 192.0.2.2 action drop
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q

From ce2a27c761acaba032f61f8322ff9447fd084671 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 7 Mar 2018 15:18:03 +0100
Subject: [PATCH 0720/1678] net: mvpp2: Simplify MAC filtering function
 parameters

The mvpp2_prs_mac_da_accept function takes into parameter both the
struct representing the controller and the port id. This is meaningful
when we want to create TCAM entries for non-initialized ports, but in
this case we expect the port to be initialized before starting adding or
removing MAC addresses to the per-port filter.

This commit changes the function so that it takes struct mvpp2_port as
a parameter instead.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 30 ++++++++++++++--------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 338c94d44aea..8796456c5556 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -3817,16 +3817,17 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 }
 
 /* Update parser's mac da entry */
-static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
-				   const u8 *da, bool add)
+static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
+				   bool add)
 {
-	struct mvpp2_prs_entry *pe;
-	unsigned int pmap, len, ri;
 	unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct mvpp2 *priv = port->priv;
+	unsigned int pmap, len, ri;
+	struct mvpp2_prs_entry *pe;
 	int tid;
 
 	/* Scan TCAM and see if entry with this <MAC DA, port> already exist */
-	pe = mvpp2_prs_mac_da_range_find(priv, (1 << port), da, mask,
+	pe = mvpp2_prs_mac_da_range_find(priv, BIT(port->id), da, mask,
 					 MVPP2_PRS_UDF_MAC_DEF);
 
 	/* No such entry */
@@ -3861,7 +3862,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
 	}
 
 	/* Update port mask */
-	mvpp2_prs_tcam_port_set(pe, port, add);
+	mvpp2_prs_tcam_port_set(pe, port->id, add);
 
 	/* Invalidate the entry if no ports are left enabled */
 	pmap = mvpp2_prs_tcam_port_map_get(pe);
@@ -3917,13 +3918,12 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 	int err;
 
 	/* Remove old parser entry */
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, dev->dev_addr,
-				      false);
+	err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, false);
 	if (err)
 		return err;
 
 	/* Add new parser entry */
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, da, true);
+	err = mvpp2_prs_mac_da_accept(port, da, true);
 	if (err)
 		return err;
 
@@ -3959,7 +3959,8 @@ static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
 
 		if (is_multicast_ether_addr(da) && !is_broadcast_ether_addr(da))
 			/* Delete this entry */
-			mvpp2_prs_mac_da_accept(priv, port, da, false);
+			mvpp2_prs_mac_da_accept(priv->port_list[port], da,
+						false);
 	}
 }
 
@@ -7380,15 +7381,14 @@ static int mvpp2_open(struct net_device *dev)
 			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	int err;
 
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id, mac_bcast, true);
+	err = mvpp2_prs_mac_da_accept(port, mac_bcast, true);
 	if (err) {
 		netdev_err(dev, "mvpp2_prs_mac_da_accept BC failed\n");
 		return err;
 	}
-	err = mvpp2_prs_mac_da_accept(port->priv, port->id,
-				      dev->dev_addr, true);
+	err = mvpp2_prs_mac_da_accept(port, dev->dev_addr, true);
 	if (err) {
-		netdev_err(dev, "mvpp2_prs_mac_da_accept MC failed\n");
+		netdev_err(dev, "mvpp2_prs_mac_da_accept own addr failed\n");
 		return err;
 	}
 	err = mvpp2_prs_tag_mode_set(port->priv, port->id, MVPP2_TAG_TYPE_MH);
@@ -7520,7 +7520,7 @@ retry:
 
 	if (!allmulti) {
 		netdev_for_each_mc_addr(ha, dev) {
-			if (mvpp2_prs_mac_da_accept(priv, id, ha->addr, true)) {
+			if (mvpp2_prs_mac_da_accept(port, ha->addr, true)) {
 				allmulti = true;
 				goto retry;
 			}

From 10fea26ce2aaf46c4da834664fb5f8476108a783 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Wed, 7 Mar 2018 15:18:04 +0100
Subject: [PATCH 0721/1678] net: mvpp2: Add support for unicast filtering

Marvell PPv2 controller can be used to implement packet filtering based
on the destination MAC address. This is already used to implement
multicast filtering. This patch adds support for Unicast filtering.

Filtering is based on so-called "TCAM entries" to implement filtering.
Due to their limited number and the fact that these are also used for
other purposes, we reserve 80 entries for both unicast and multicast
filters. On top of the broadcast address, and each interface's own MAC
address, we reserve 25 entries per port, 4 for unicast filters, 21 for
multicast.

Whenever unicast or multicast range for one port is full, the filtering
is disabled and port goes into promiscuous mode for the given type of
addresses.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 292 +++++++++++++++------------
 1 file changed, 159 insertions(+), 133 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 8796456c5556..9bd35f2291d6 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -601,6 +601,9 @@ enum mvpp2_tag_type {
 #define MVPP2_PRS_TCAM_PROTO_MASK	0xff
 #define MVPP2_PRS_TCAM_PROTO_MASK_L	0x3f
 #define MVPP2_PRS_DBL_VLANS_MAX		100
+#define MVPP2_PRS_CAST_MASK		BIT(0)
+#define MVPP2_PRS_MCAST_VAL		BIT(0)
+#define MVPP2_PRS_UCAST_VAL		0x0
 
 /* Tcam structure:
  * - lookup ID - 4 bits
@@ -624,6 +627,19 @@ enum mvpp2_tag_type {
 
 #define MVPP2_PRS_VID_TCAM_BYTE         2
 
+/* TCAM range for unicast and multicast filtering. We have 25 entries per port,
+ * with 4 dedicated to UC filtering and the rest to multicast filtering.
+ * Additionnally we reserve one entry for the broadcast address, and one for
+ * each port's own address.
+ */
+#define MVPP2_PRS_MAC_UC_MC_FILT_MAX	25
+#define MVPP2_PRS_MAC_RANGE_SIZE	80
+
+/* Number of entries per port dedicated to UC and MC filtering */
+#define MVPP2_PRS_MAC_UC_FILT_MAX	4
+#define MVPP2_PRS_MAC_MC_FILT_MAX	(MVPP2_PRS_MAC_UC_MC_FILT_MAX - \
+					 MVPP2_PRS_MAC_UC_FILT_MAX)
+
 /* There is a TCAM range reserved for VLAN filtering entries, range size is 33
  * 10 VLAN ID filter entries per port
  * 1 default VLAN filter entry per port
@@ -639,36 +655,40 @@ enum mvpp2_tag_type {
 #define MVPP2_PE_DROP_ALL		0
 #define MVPP2_PE_FIRST_FREE_TID		1
 
+/* MAC filtering range */
+#define MVPP2_PE_MAC_RANGE_END		(MVPP2_PE_VID_FILT_RANGE_START - 1)
+#define MVPP2_PE_MAC_RANGE_START	(MVPP2_PE_MAC_RANGE_END - \
+						MVPP2_PRS_MAC_RANGE_SIZE + 1)
 /* VLAN filtering range */
 #define MVPP2_PE_VID_FILT_RANGE_END     (MVPP2_PRS_TCAM_SRAM_SIZE - 31)
 #define MVPP2_PE_VID_FILT_RANGE_START   (MVPP2_PE_VID_FILT_RANGE_END - \
 					 MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1)
 #define MVPP2_PE_LAST_FREE_TID          (MVPP2_PE_VID_FILT_RANGE_START - 1)
 #define MVPP2_PE_IP6_EXT_PROTO_UN	(MVPP2_PRS_TCAM_SRAM_SIZE - 30)
-#define MVPP2_PE_MAC_MC_IP6		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
-#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
-#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
-#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 26)
-#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 21)
-#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 20)
-#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
-#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
-#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
-#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
-#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
-#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
-#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
-#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
-#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
-#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
-#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
-#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
-#define MVPP2_PE_VID_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
-#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
-#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
-#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 4)
-#define MVPP2_PE_MAC_MC_ALL		(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
-#define MVPP2_PE_MAC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
+#define MVPP2_PE_IP6_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 29)
+#define MVPP2_PE_IP4_ADDR_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 28)
+#define MVPP2_PE_LAST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 27)
+#define MVPP2_PE_FIRST_DEFAULT_FLOW	(MVPP2_PRS_TCAM_SRAM_SIZE - 22)
+#define MVPP2_PE_EDSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 21)
+#define MVPP2_PE_EDSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 20)
+#define MVPP2_PE_DSA_TAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 19)
+#define MVPP2_PE_DSA_UNTAGGED		(MVPP2_PRS_TCAM_SRAM_SIZE - 18)
+#define MVPP2_PE_ETYPE_EDSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 17)
+#define MVPP2_PE_ETYPE_EDSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 16)
+#define MVPP2_PE_ETYPE_DSA_TAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 15)
+#define MVPP2_PE_ETYPE_DSA_UNTAGGED	(MVPP2_PRS_TCAM_SRAM_SIZE - 14)
+#define MVPP2_PE_MH_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 13)
+#define MVPP2_PE_DSA_DEFAULT		(MVPP2_PRS_TCAM_SRAM_SIZE - 12)
+#define MVPP2_PE_IP6_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 11)
+#define MVPP2_PE_IP4_PROTO_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 10)
+#define MVPP2_PE_ETH_TYPE_UN		(MVPP2_PRS_TCAM_SRAM_SIZE - 9)
+#define MVPP2_PE_VID_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 8)
+#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT	(MVPP2_PRS_TCAM_SRAM_SIZE - 7)
+#define MVPP2_PE_VLAN_DBL		(MVPP2_PRS_TCAM_SRAM_SIZE - 6)
+#define MVPP2_PE_VLAN_NONE		(MVPP2_PRS_TCAM_SRAM_SIZE - 5)
+/* reserved */
+#define MVPP2_PE_MAC_MC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 3)
+#define MVPP2_PE_MAC_UC_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 2)
 #define MVPP2_PE_MAC_NON_PROMISCUOUS	(MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 
 #define MVPP2_PRS_VID_PORT_FIRST(port)	(MVPP2_PE_VID_FILT_RANGE_START + \
@@ -798,6 +818,12 @@ enum mvpp2_prs_lookup {
 	MVPP2_PRS_LU_LAST,
 };
 
+/* L2 cast enum */
+enum mvpp2_prs_l2_cast {
+	MVPP2_PRS_L2_UNI_CAST,
+	MVPP2_PRS_L2_MULTI_CAST,
+};
+
 /* L3 cast enum */
 enum mvpp2_prs_l3_cast {
 	MVPP2_PRS_L3_UNI_CAST,
@@ -1973,78 +1999,43 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
 	mvpp2_prs_hw_write(priv, &pe);
 }
 
-/* Set port to promiscuous mode */
-static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, bool add)
+/* Set port to unicast or multicast promiscuous mode */
+static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port,
+				      enum mvpp2_prs_l2_cast l2_cast, bool add)
 {
 	struct mvpp2_prs_entry pe;
+	unsigned char cast_match;
+	unsigned int ri;
+	int tid;
 
-	/* Promiscuous mode - Accept unknown packets */
-
-	if (priv->prs_shadow[MVPP2_PE_MAC_PROMISCUOUS].valid) {
-		/* Entry exist - update port only */
-		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-		mvpp2_prs_hw_read(priv, &pe);
+	if (l2_cast == MVPP2_PRS_L2_UNI_CAST) {
+		cast_match = MVPP2_PRS_UCAST_VAL;
+		tid = MVPP2_PE_MAC_UC_PROMISCUOUS;
+		ri = MVPP2_PRS_RI_L2_UCAST;
 	} else {
-		/* Entry doesn't exist - create new */
-		memset(&pe, 0, sizeof(pe));
-		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-		pe.index = MVPP2_PE_MAC_PROMISCUOUS;
-
-		/* Continue - set next lookup */
-		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
-
-		/* Set result info bits */
-		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_UCAST,
-					 MVPP2_PRS_RI_L2_CAST_MASK);
-
-		/* Shift to ethertype */
-		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
-					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-
-		/* Mask all ports */
-		mvpp2_prs_tcam_port_map_set(&pe, 0);
-
-		/* Update shadow table */
-		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+		cast_match = MVPP2_PRS_MCAST_VAL;
+		tid = MVPP2_PE_MAC_MC_PROMISCUOUS;
+		ri = MVPP2_PRS_RI_L2_MCAST;
 	}
 
-	/* Update port mask */
-	mvpp2_prs_tcam_port_set(&pe, port, add);
-
-	mvpp2_prs_hw_write(priv, &pe);
-}
-
-/* Accept multicast */
-static void mvpp2_prs_mac_multi_set(struct mvpp2 *priv, int port, int index,
-				    bool add)
-{
-	struct mvpp2_prs_entry pe;
-	unsigned char da_mc;
-
-	/* Ethernet multicast address first byte is
-	 * 0x01 for IPv4 and 0x33 for IPv6
-	 */
-	da_mc = (index == MVPP2_PE_MAC_MC_ALL) ? 0x01 : 0x33;
-
-	if (priv->prs_shadow[index].valid) {
-		/* Entry exist - update port only */
-		pe.index = index;
+	/* promiscuous mode - Accept unknown unicast or multicast packets */
+	if (priv->prs_shadow[tid].valid) {
+		pe.index = tid;
 		mvpp2_prs_hw_read(priv, &pe);
 	} else {
-		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
-		pe.index = index;
+		pe.index = tid;
 
 		/* Continue - set next lookup */
 		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
 
 		/* Set result info bits */
-		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L2_MCAST,
-					 MVPP2_PRS_RI_L2_CAST_MASK);
+		mvpp2_prs_sram_ri_update(&pe, ri, MVPP2_PRS_RI_L2_CAST_MASK);
 
-		/* Update tcam entry data first byte */
-		mvpp2_prs_tcam_data_byte_set(&pe, 0, da_mc, 0xff);
+		/* Match UC or MC addresses */
+		mvpp2_prs_tcam_data_byte_set(&pe, 0, cast_match,
+					     MVPP2_PRS_CAST_MASK);
 
 		/* Shift to ethertype */
 		mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
@@ -2758,11 +2749,10 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv)
 	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
 	mvpp2_prs_hw_write(priv, &pe);
 
-	/* place holders only - no ports */
+	/* Create dummy entries for drop all and promiscuous modes */
 	mvpp2_prs_mac_drop_all_set(priv, 0, false);
-	mvpp2_prs_mac_promisc_set(priv, 0, false);
-	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false);
-	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false);
+	mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false);
 }
 
 /* Set default entries for various types of dsa packets */
@@ -3794,8 +3784,8 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
 
 	/* Go through the all entires with MVPP2_PRS_LU_MAC */
-	for (tid = MVPP2_PE_FIRST_FREE_TID;
-	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+	for (tid = MVPP2_PE_MAC_RANGE_START;
+	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
 		unsigned int entry_pmap;
 
 		if (!priv->prs_shadow[tid].valid ||
@@ -3836,18 +3826,10 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
 			return 0;
 
 		/* Create new TCAM entry */
-		/* Find first range mac entry*/
-		for (tid = MVPP2_PE_FIRST_FREE_TID;
-		     tid <= MVPP2_PE_LAST_FREE_TID; tid++)
-			if (priv->prs_shadow[tid].valid &&
-			    (priv->prs_shadow[tid].lu == MVPP2_PRS_LU_MAC) &&
-			    (priv->prs_shadow[tid].udf ==
-						       MVPP2_PRS_UDF_MAC_RANGE))
-				break;
-
 		/* Go through the all entries from first to last */
-		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
-						tid - 1);
+		tid = mvpp2_prs_tcam_first_free(priv,
+						MVPP2_PE_MAC_RANGE_START,
+						MVPP2_PE_MAC_RANGE_END);
 		if (tid < 0)
 			return tid;
 
@@ -3886,12 +3868,16 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
 		mvpp2_prs_tcam_data_byte_set(pe, len, da[len], 0xff);
 
 	/* Set result info bits */
-	if (is_broadcast_ether_addr(da))
+	if (is_broadcast_ether_addr(da)) {
 		ri = MVPP2_PRS_RI_L2_BCAST;
-	else if (is_multicast_ether_addr(da))
+	} else if (is_multicast_ether_addr(da)) {
 		ri = MVPP2_PRS_RI_L2_MCAST;
-	else
-		ri = MVPP2_PRS_RI_L2_UCAST | MVPP2_PRS_RI_MAC_ME_MASK;
+	} else {
+		ri = MVPP2_PRS_RI_L2_UCAST;
+
+		if (ether_addr_equal(da, port->dev->dev_addr))
+			ri |= MVPP2_PRS_RI_MAC_ME_MASK;
+	}
 
 	mvpp2_prs_sram_ri_update(pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
 				 MVPP2_PRS_RI_MAC_ME_MASK);
@@ -3933,14 +3919,15 @@ static int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 	return 0;
 }
 
-/* Delete all port's multicast simple (not range) entries */
-static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
+static void mvpp2_prs_mac_del_all(struct mvpp2_port *port)
 {
+	struct mvpp2 *priv = port->priv;
 	struct mvpp2_prs_entry pe;
+	unsigned long pmap;
 	int index, tid;
 
-	for (tid = MVPP2_PE_FIRST_FREE_TID;
-	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
+	for (tid = MVPP2_PE_MAC_RANGE_START;
+	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
 		unsigned char da[ETH_ALEN], da_mask[ETH_ALEN];
 
 		if (!priv->prs_shadow[tid].valid ||
@@ -3948,19 +3935,29 @@ static void mvpp2_prs_mcast_del_all(struct mvpp2 *priv, int port)
 		    (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF))
 			continue;
 
-		/* Only simple mac entries */
 		pe.index = tid;
 		mvpp2_prs_hw_read(priv, &pe);
 
+		pmap = mvpp2_prs_tcam_port_map_get(&pe);
+
+		/* We only want entries active on this port */
+		if (!test_bit(port->id, &pmap))
+			continue;
+
 		/* Read mac addr from entry */
 		for (index = 0; index < ETH_ALEN; index++)
 			mvpp2_prs_tcam_data_byte_get(&pe, index, &da[index],
 						     &da_mask[index]);
 
-		if (is_multicast_ether_addr(da) && !is_broadcast_ether_addr(da))
-			/* Delete this entry */
-			mvpp2_prs_mac_da_accept(priv->port_list[port], da,
-						false);
+		/* Special cases : Don't remove broadcast and port's own
+		 * address
+		 */
+		if (is_broadcast_ether_addr(da) ||
+		    ether_addr_equal(da, port->dev->dev_addr))
+			continue;
+
+		/* Remove entry from TCAM */
+		mvpp2_prs_mac_da_accept(port, da, false);
 	}
 }
 
@@ -7502,36 +7499,64 @@ static int mvpp2_stop(struct net_device *dev)
 	return 0;
 }
 
+static int mvpp2_prs_mac_da_accept_list(struct mvpp2_port *port,
+					struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha;
+	int ret;
+
+	netdev_hw_addr_list_for_each(ha, list) {
+		ret = mvpp2_prs_mac_da_accept(port, ha->addr, true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void mvpp2_set_rx_promisc(struct mvpp2_port *port, bool enable)
+{
+	if (!enable && (port->dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+		mvpp2_prs_vid_enable_filtering(port);
+	else
+		mvpp2_prs_vid_disable_filtering(port);
+
+	mvpp2_prs_mac_promisc_set(port->priv, port->id,
+				  MVPP2_PRS_L2_UNI_CAST, enable);
+
+	mvpp2_prs_mac_promisc_set(port->priv, port->id,
+				  MVPP2_PRS_L2_MULTI_CAST, enable);
+}
+
 static void mvpp2_set_rx_mode(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	struct mvpp2 *priv = port->priv;
-	struct netdev_hw_addr *ha;
-	int id = port->id;
-	bool allmulti = dev->flags & IFF_ALLMULTI;
 
-retry:
-	mvpp2_prs_mac_promisc_set(priv, id, dev->flags & IFF_PROMISC);
-	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_ALL, allmulti);
-	mvpp2_prs_mac_multi_set(priv, id, MVPP2_PE_MAC_MC_IP6, allmulti);
+	/* Clear the whole UC and MC list */
+	mvpp2_prs_mac_del_all(port);
 
-	/* Remove all port->id's mcast enries */
-	mvpp2_prs_mcast_del_all(priv, id);
-
-	if (!allmulti) {
-		netdev_for_each_mc_addr(ha, dev) {
-			if (mvpp2_prs_mac_da_accept(port, ha->addr, true)) {
-				allmulti = true;
-				goto retry;
-			}
-		}
+	if (dev->flags & IFF_PROMISC) {
+		mvpp2_set_rx_promisc(port, true);
+		return;
 	}
 
-	/* Disable VLAN filtering in promiscuous mode */
-	if (dev->flags & IFF_PROMISC)
-		mvpp2_prs_vid_disable_filtering(port);
-	else
-		mvpp2_prs_vid_enable_filtering(port);
+	mvpp2_set_rx_promisc(port, false);
+
+	if (netdev_uc_count(dev) > MVPP2_PRS_MAC_UC_FILT_MAX ||
+	    mvpp2_prs_mac_da_accept_list(port, &dev->uc))
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_UNI_CAST, true);
+
+	if (dev->flags & IFF_ALLMULTI) {
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_MULTI_CAST, true);
+		return;
+	}
+
+	if (netdev_mc_count(dev) > MVPP2_PRS_MAC_MC_FILT_MAX ||
+	    mvpp2_prs_mac_da_accept_list(port, &dev->mc))
+		mvpp2_prs_mac_promisc_set(port->priv, port->id,
+					  MVPP2_PRS_L2_MULTI_CAST, true);
 }
 
 static int mvpp2_set_mac_address(struct net_device *dev, void *p)
@@ -8380,6 +8405,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	dev->vlan_features |= features;
 	dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
+	dev->priv_flags |= IFF_UNICAST_FLT;
 
 	/* MTU range: 68 - 9704 */
 	dev->min_mtu = ETH_MIN_MTU;

From 334e6413134bf8335ec93a9e60213cbc61ef7a62 Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Wed, 7 Mar 2018 09:40:57 -0800
Subject: [PATCH 0722/1678] sock: Fix SO_ZEROCOPY switch case

Fix the SO_ZEROCOPY switch case on sock_setsockopt() avoiding the
ret values to be overwritten by the one set on the default case.

Fixes: 28190752c7092 ("sock: permit SO_ZEROCOPY on PF_RDS socket")
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 507d8c6c4319..27f218bba43f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1062,8 +1062,9 @@ set_rcvbuf:
 				ret = -EINVAL;
 			else
 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
-			break;
 		}
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;

From c33b3b9fcfa4fe5350b5d13bdb87dab061351d77 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 7 Mar 2018 12:03:33 -0600
Subject: [PATCH 0723/1678] cxgb3: remove VLA usage

Remove VLA usage and change the 'len' argument to a u8 and use a 256
byte buffer on the stack. Notice that these lengths are limited by the
encoding field in the VPD structure, which is a u8 [1].

[1] https://marc.info/?l=linux-netdev&m=152044354814024&w=2

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
index a89721fad633..080918af773c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c
@@ -681,18 +681,18 @@ int t3_seeprom_wp(struct adapter *adapter, int enable)
 	return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0);
 }
 
-static int vpdstrtouint(char *s, int len, unsigned int base, unsigned int *val)
+static int vpdstrtouint(char *s, u8 len, unsigned int base, unsigned int *val)
 {
-	char tok[len + 1];
+	char tok[256];
 
 	memcpy(tok, s, len);
 	tok[len] = 0;
 	return kstrtouint(strim(tok), base, val);
 }
 
-static int vpdstrtou16(char *s, int len, unsigned int base, u16 *val)
+static int vpdstrtou16(char *s, u8 len, unsigned int base, u16 *val)
 {
-	char tok[len + 1];
+	char tok[256];
 
 	memcpy(tok, s, len);
 	tok[len] = 0;

From d40a126b16ea851f858f97b0e214d97c8355cecb Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 6 Mar 2018 07:22:33 -0800
Subject: [PATCH 0724/1678] rds: refactor zcopy code into
 rds_message_zcopy_from_user

Move the large block of code predicated on zcopy from
rds_message_copy_from_user into a new function,
rds_message_zcopy_from_user()

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 106 ++++++++++++++++++++++++++--------------------
 1 file changed, 59 insertions(+), 47 deletions(-)

diff --git a/net/rds/message.c b/net/rds/message.c
index 116cf87ccb89..c36edbb92c34 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -333,14 +333,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	return rm;
 }
 
-int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
-			       bool zcopy)
+int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
-	unsigned long to_copy, nbytes;
 	unsigned long sg_off;
 	struct scatterlist *sg;
 	int ret = 0;
 	int length = iov_iter_count(from);
+	int total_copied = 0;
+	struct sk_buff *skb;
 
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -350,54 +350,66 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
 	sg = rm->data.op_sg;
 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
-	if (zcopy) {
-		int total_copied = 0;
-		struct sk_buff *skb;
+	skb = alloc_skb(0, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	BUILD_BUG_ON(sizeof(skb->cb) < max_t(int, sizeof(struct rds_znotifier),
+					     sizeof(struct rds_zcopy_cookies)));
+	rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
+	if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
+				    length)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	while (iov_iter_count(from)) {
+		struct page *pages;
+		size_t start;
+		ssize_t copied;
 
-		skb = alloc_skb(0, GFP_KERNEL);
-		if (!skb)
-			return -ENOMEM;
-		BUILD_BUG_ON(sizeof(skb->cb) <
-			     max_t(int, sizeof(struct rds_znotifier),
-				   sizeof(struct rds_zcopy_cookies)));
-		rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
-		if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
-					    length)) {
-			ret = -ENOMEM;
+		copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+					    1, &start);
+		if (copied < 0) {
+			struct mmpin *mmp;
+			int i;
+
+			for (i = 0; i < rm->data.op_nents; i++)
+				put_page(sg_page(&rm->data.op_sg[i]));
+			mmp = &rm->data.op_mmp_znotifier->z_mmp;
+			mm_unaccount_pinned_pages(mmp);
+			ret = -EFAULT;
 			goto err;
 		}
-		while (iov_iter_count(from)) {
-			struct page *pages;
-			size_t start;
-			ssize_t copied;
-
-			copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
-						    1, &start);
-			if (copied < 0) {
-				struct mmpin *mmp;
-				int i;
-
-				for (i = 0; i < rm->data.op_nents; i++)
-					put_page(sg_page(&rm->data.op_sg[i]));
-				mmp = &rm->data.op_mmp_znotifier->z_mmp;
-				mm_unaccount_pinned_pages(mmp);
-				ret = -EFAULT;
-				goto err;
-			}
-			total_copied += copied;
-			iov_iter_advance(from, copied);
-			length -= copied;
-			sg_set_page(sg, pages, copied, start);
-			rm->data.op_nents++;
-			sg++;
-		}
-		WARN_ON_ONCE(length != 0);
-		return ret;
+		total_copied += copied;
+		iov_iter_advance(from, copied);
+		length -= copied;
+		sg_set_page(sg, pages, copied, start);
+		rm->data.op_nents++;
+		sg++;
+	}
+	WARN_ON_ONCE(length != 0);
+	return ret;
 err:
-		consume_skb(skb);
-		rm->data.op_mmp_znotifier = NULL;
-		return ret;
-	} /* zcopy */
+	consume_skb(skb);
+	rm->data.op_mmp_znotifier = NULL;
+	return ret;
+}
+
+int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
+			       bool zcopy)
+{
+	unsigned long to_copy, nbytes;
+	unsigned long sg_off;
+	struct scatterlist *sg;
+	int ret = 0;
+
+	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
+
+	/* now allocate and copy in the data payload.  */
+	sg = rm->data.op_sg;
+	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
+
+	if (zcopy)
+		return rds_message_zcopy_from_user(rm, from);
 
 	while (iov_iter_count(from)) {
 		if (!sg_page(sg)) {

From 9426bbc6de99b8649d897b94e8f5916b58195643 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 6 Mar 2018 07:22:34 -0800
Subject: [PATCH 0725/1678] rds: use list structure to track information for
 zerocopy completion notification

Commit 401910db4cd4 ("rds: deliver zerocopy completion notification
with data") removes support fo r zerocopy completion notification
on the sk_error_queue, thus we no longer need to track the cookie
information in sk_buff structures.

This commit removes the struct sk_buff_head rs_zcookie_queue by
a simpler list that results in a smaller memory footprint as well
as more efficient memory_allocation time.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/af_rds.c  |  6 ++--
 net/rds/message.c | 77 ++++++++++++++++++++++++++++++-----------------
 net/rds/rds.h     | 23 ++++++++++----
 net/rds/recv.c    | 23 +++++++++-----
 4 files changed, 85 insertions(+), 44 deletions(-)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index f7126108a811..ab751a150f70 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -77,7 +77,7 @@ static int rds_release(struct socket *sock)
 	rds_send_drop_to(rs, NULL);
 	rds_rdma_drop_keys(rs);
 	rds_notify_queue_get(rs, NULL);
-	__skb_queue_purge(&rs->rs_zcookie_queue);
+	rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue);
 
 	spin_lock_bh(&rds_sock_lock);
 	list_del_init(&rs->rs_item);
@@ -180,7 +180,7 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 	}
 	if (!list_empty(&rs->rs_recv_queue) ||
 	    !list_empty(&rs->rs_notify_queue) ||
-	    !skb_queue_empty(&rs->rs_zcookie_queue))
+	    !list_empty(&rs->rs_zcookie_queue.zcookie_head))
 		mask |= (EPOLLIN | EPOLLRDNORM);
 	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
 		mask |= (EPOLLOUT | EPOLLWRNORM);
@@ -515,7 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	INIT_LIST_HEAD(&rs->rs_recv_queue);
 	INIT_LIST_HEAD(&rs->rs_notify_queue);
 	INIT_LIST_HEAD(&rs->rs_cong_list);
-	skb_queue_head_init(&rs->rs_zcookie_queue);
+	rds_message_zcopy_queue_init(&rs->rs_zcookie_queue);
 	spin_lock_init(&rs->rs_rdma_lock);
 	rs->rs_rdma_keys = RB_ROOT;
 	rs->rs_rx_traces = 0;
diff --git a/net/rds/message.c b/net/rds/message.c
index c36edbb92c34..90dcdcfe9f62 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -48,7 +48,6 @@ static unsigned int	rds_exthdr_size[__RDS_EXTHDR_MAX] = {
 [RDS_EXTHDR_GEN_NUM]	= sizeof(u32),
 };
 
-
 void rds_message_addref(struct rds_message *rm)
 {
 	rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
@@ -56,9 +55,9 @@ void rds_message_addref(struct rds_message *rm)
 }
 EXPORT_SYMBOL_GPL(rds_message_addref);
 
-static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
+static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
 {
-	struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb;
+	struct rds_zcopy_cookies *ck = &info->zcookies;
 	int ncookies = ck->num;
 
 	if (ncookies == RDS_MAX_ZCOOKIES)
@@ -68,38 +67,61 @@ static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie)
 	return true;
 }
 
+struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+{
+	return container_of(znotif, struct rds_msg_zcopy_info, znotif);
+}
+
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q)
+{
+	unsigned long flags;
+	LIST_HEAD(copy);
+	struct rds_msg_zcopy_info *info, *tmp;
+
+	spin_lock_irqsave(&q->lock, flags);
+	list_splice(&q->zcookie_head, &copy);
+	INIT_LIST_HEAD(&q->zcookie_head);
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	list_for_each_entry_safe(info, tmp, &copy, rs_zcookie_next) {
+		list_del(&info->rs_zcookie_next);
+		kfree(info);
+	}
+}
+
 static void rds_rm_zerocopy_callback(struct rds_sock *rs,
 				     struct rds_znotifier *znotif)
 {
-	struct sk_buff *skb, *tail;
-	unsigned long flags;
-	struct sk_buff_head *q;
+	struct rds_msg_zcopy_info *info;
+	struct rds_msg_zcopy_queue *q;
 	u32 cookie = znotif->z_cookie;
 	struct rds_zcopy_cookies *ck;
+	struct list_head *head;
+	unsigned long flags;
 
+	mm_unaccount_pinned_pages(&znotif->z_mmp);
 	q = &rs->rs_zcookie_queue;
 	spin_lock_irqsave(&q->lock, flags);
-	tail = skb_peek_tail(q);
-
-	if (tail && skb_zcookie_add(tail, cookie)) {
-		spin_unlock_irqrestore(&q->lock, flags);
-		mm_unaccount_pinned_pages(&znotif->z_mmp);
-		consume_skb(rds_skb_from_znotifier(znotif));
-		/* caller invokes rds_wake_sk_sleep() */
-		return;
+	head = &q->zcookie_head;
+	if (!list_empty(head)) {
+		info = list_entry(head, struct rds_msg_zcopy_info,
+				  rs_zcookie_next);
+		if (info && rds_zcookie_add(info, cookie)) {
+			spin_unlock_irqrestore(&q->lock, flags);
+			kfree(rds_info_from_znotifier(znotif));
+			/* caller invokes rds_wake_sk_sleep() */
+			return;
+		}
 	}
 
-	skb = rds_skb_from_znotifier(znotif);
-	ck = (struct rds_zcopy_cookies *)skb->cb;
+	info = rds_info_from_znotifier(znotif);
+	ck = &info->zcookies;
 	memset(ck, 0, sizeof(*ck));
-	WARN_ON(!skb_zcookie_add(skb, cookie));
-
-	__skb_queue_tail(q, skb);
+	WARN_ON(!rds_zcookie_add(info, cookie));
+	list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
 
 	spin_unlock_irqrestore(&q->lock, flags);
 	/* caller invokes rds_wake_sk_sleep() */
-
-	mm_unaccount_pinned_pages(&znotif->z_mmp);
 }
 
 /*
@@ -340,7 +362,7 @@ int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 	int ret = 0;
 	int length = iov_iter_count(from);
 	int total_copied = 0;
-	struct sk_buff *skb;
+	struct rds_msg_zcopy_info *info;
 
 	rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
 
@@ -350,12 +372,11 @@ int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 	sg = rm->data.op_sg;
 	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
-	skb = alloc_skb(0, GFP_KERNEL);
-	if (!skb)
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
 		return -ENOMEM;
-	BUILD_BUG_ON(sizeof(skb->cb) < max_t(int, sizeof(struct rds_znotifier),
-					     sizeof(struct rds_zcopy_cookies)));
-	rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
+	INIT_LIST_HEAD(&info->rs_zcookie_next);
+	rm->data.op_mmp_znotifier = &info->znotif;
 	if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
 				    length)) {
 		ret = -ENOMEM;
@@ -389,7 +410,7 @@ int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 	WARN_ON_ONCE(length != 0);
 	return ret;
 err:
-	consume_skb(skb);
+	kfree(info);
 	rm->data.op_mmp_znotifier = NULL;
 	return ret;
 }
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 33b16353d8f3..74cd27c661de 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -357,16 +357,27 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_FLUSH		8
 
 struct rds_znotifier {
-	struct list_head	z_list;
 	struct mmpin		z_mmp;
 	u32			z_cookie;
 };
 
-#define	RDS_ZCOPY_SKB(__skb)	((struct rds_znotifier *)&((__skb)->cb[0]))
+struct rds_msg_zcopy_info {
+	struct list_head rs_zcookie_next;
+	union {
+		struct rds_znotifier znotif;
+		struct rds_zcopy_cookies zcookies;
+	};
+};
 
-static inline struct sk_buff *rds_skb_from_znotifier(struct rds_znotifier *z)
+struct rds_msg_zcopy_queue {
+	struct list_head zcookie_head;
+	spinlock_t lock; /* protects zcookie_head queue */
+};
+
+static inline void rds_message_zcopy_queue_init(struct rds_msg_zcopy_queue *q)
 {
-	return container_of((void *)z, struct sk_buff, cb);
+	spin_lock_init(&q->lock);
+	INIT_LIST_HEAD(&q->zcookie_head);
 }
 
 struct rds_message {
@@ -603,8 +614,7 @@ struct rds_sock {
 	/* Socket receive path trace points*/
 	u8			rs_rx_traces;
 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
-
-	struct sk_buff_head	rs_zcookie_queue;
+	struct rds_msg_zcopy_queue rs_zcookie_queue;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -803,6 +813,7 @@ void rds_message_addref(struct rds_message *rm);
 void rds_message_put(struct rds_message *rm);
 void rds_message_wait(struct rds_message *rm);
 void rds_message_unmapped(struct rds_message *rm);
+void rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *info);
 
 static inline void rds_message_make_checksum(struct rds_header *hdr)
 {
diff --git a/net/rds/recv.c b/net/rds/recv.c
index d50747725221..de50e2126e40 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -579,9 +579,10 @@ out:
 
 static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
 {
-	struct sk_buff *skb;
-	struct sk_buff_head *q = &rs->rs_zcookie_queue;
+	struct rds_msg_zcopy_queue *q = &rs->rs_zcookie_queue;
+	struct rds_msg_zcopy_info *info = NULL;
 	struct rds_zcopy_cookies *done;
+	unsigned long flags;
 
 	if (!msg->msg_control)
 		return false;
@@ -590,16 +591,24 @@ static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg)
 	    msg->msg_controllen < CMSG_SPACE(sizeof(*done)))
 		return false;
 
-	skb = skb_dequeue(q);
-	if (!skb)
+	spin_lock_irqsave(&q->lock, flags);
+	if (!list_empty(&q->zcookie_head)) {
+		info = list_entry(q->zcookie_head.next,
+				  struct rds_msg_zcopy_info, rs_zcookie_next);
+		list_del(&info->rs_zcookie_next);
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+	if (!info)
 		return false;
-	done = (struct rds_zcopy_cookies *)skb->cb;
+	done = &info->zcookies;
 	if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done),
 		     done)) {
-		skb_queue_head(q, skb);
+		spin_lock_irqsave(&q->lock, flags);
+		list_add(&info->rs_zcookie_next, &q->zcookie_head);
+		spin_unlock_irqrestore(&q->lock, flags);
 		return false;
 	}
-	consume_skb(skb);
+	kfree(info);
 	return true;
 }
 

From a366e300ae9fc466d333e6d8f2bc5d58ed248041 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 7 Mar 2018 08:43:19 -0800
Subject: [PATCH 0726/1678] ip6mr: remove synchronize_rcu() in favor of
 SOCK_RCU_FREE

Kirill found that recently added synchronize_rcu() call in
ip6mr_sk_done()
was slowing down netns dismantle and posted a patch to use it only if
the socket
was found.

I instead suggested to get rid of this call, and use instead
SOCK_RCU_FREE

We might later change IPv4 side to use the same technique and unify
both stacks. IPv4 does not use synchronize_rcu() but has a call_rcu()
that could be replaced by SOCK_RCU_FREE.

Tested:
 time for i in {1..1000}; do unshare -n /bin/false;done

 Before : real 7m18.911s
 After : real 10.187s

Fixes: 8571ab479a6e ("ip6mr: Make mroute_sk rcu-based")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Yuval Mintz <yuvalm@mellanox.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 2a38f9de45d3..7345bd6c4b7d 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1443,6 +1443,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
 		err = -EADDRINUSE;
 	} else {
 		rcu_assign_pointer(mrt->mroute_sk, sk);
+		sock_set_flag(sk, SOCK_RCU_FREE);
 		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	write_unlock_bh(&mrt_lock);
@@ -1472,6 +1473,10 @@ int ip6mr_sk_done(struct sock *sk)
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			write_lock_bh(&mrt_lock);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+			/* Note that mroute_sk had SOCK_RCU_FREE set,
+			 * so the RCU grace period before sk freeing
+			 * is guaranteed by sk_destruct()
+			 */
 			net->ipv6.devconf_all->mc_forwarding--;
 			write_unlock_bh(&mrt_lock);
 			inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1485,7 +1490,6 @@ int ip6mr_sk_done(struct sock *sk)
 		}
 	}
 	rtnl_unlock();
-	synchronize_rcu();
 
 	return err;
 }

From d83a69c2b1b17a8aedc7a75ba8b620bfd2057e93 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 7 Mar 2018 15:44:48 -0800
Subject: [PATCH 0727/1678] net/mlx5: Use MLX5_IPSEC_DEV macro for ipsec caps

Fix build break of mlx5_accel_ipsec_device_caps is not defined when
MLX5_ACCEL is not selected, use MLX5_IPSEC_DEV instead which handles
such case.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reported-by: Doug Ledford <dledford@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 4e456c292ce4..f836e6b76f65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2642,8 +2642,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (mlx5_accel_ipsec_device_caps(steering->dev) &
-	    MLX5_ACCEL_IPSEC_DEVICE) {
+	if (MLX5_IPSEC_DEV(dev)) {
 		err = init_egress_root_ns(steering);
 		if (err)
 			goto err;

From 581fdddee420cebe2cb781cb3c84c82676a86949 Mon Sep 17 00:00:00 2001
From: Yossi Kuperman <yossiku@mellanox.com>
Date: Sun, 22 Oct 2017 19:43:58 +0300
Subject: [PATCH 0728/1678] net/mlx5: IPSec, Generalize sandbox QP commands

The current code assume only SA QP commands.
Refactor in order to pave the way for new QP commands:
1. Generic cmd response format.
2. SA cmd checks are in dedicated functions.
3. Aligned debug prints.

Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  | 118 ++++++++++--------
 include/linux/mlx5/mlx5_ifc_fpga.h            |  16 +++
 2 files changed, 82 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 95f9c5a8619b..e0f32b025e06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -41,35 +41,23 @@
 #define SBU_QP_QUEUE_SIZE 8
 #define MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC	(60 * 1000)
 
-enum mlx5_ipsec_response_syndrome {
-	MLX5_IPSEC_RESPONSE_SUCCESS = 0,
-	MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
-	MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
-	MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
-};
-
-enum mlx5_fpga_ipsec_sacmd_status {
-	MLX5_FPGA_IPSEC_SACMD_PENDING,
-	MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
-	MLX5_FPGA_IPSEC_SACMD_COMPLETE,
+enum mlx5_fpga_ipsec_cmd_status {
+	MLX5_FPGA_IPSEC_CMD_PENDING,
+	MLX5_FPGA_IPSEC_CMD_SEND_FAIL,
+	MLX5_FPGA_IPSEC_CMD_COMPLETE,
 };
 
 struct mlx5_ipsec_command_context {
 	struct mlx5_fpga_dma_buf buf;
-	struct mlx5_accel_ipsec_sa sa;
-	enum mlx5_fpga_ipsec_sacmd_status status;
+	enum mlx5_fpga_ipsec_cmd_status status;
+	struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
 	int status_code;
 	struct completion complete;
 	struct mlx5_fpga_device *dev;
 	struct list_head list; /* Item in pending_cmds */
+	u8 command[0];
 };
 
-struct mlx5_ipsec_sadb_resp {
-	__be32 syndrome;
-	__be32 sw_sa_handle;
-	u8 reserved[24];
-} __packed;
-
 struct mlx5_fpga_ipsec {
 	struct list_head pending_cmds;
 	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
@@ -105,21 +93,22 @@ static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
 				       buf);
 		mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
 			       status);
-		context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+		context->status = MLX5_FPGA_IPSEC_CMD_SEND_FAIL;
 		complete(&context->complete);
 	}
 }
 
-static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+static inline
+int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
 {
 	switch (syndrome) {
-	case MLX5_IPSEC_RESPONSE_SUCCESS:
+	case MLX5_FPGA_IPSEC_RESPONSE_SUCCESS:
 		return 0;
-	case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+	case MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE:
 		return -EEXIST;
-	case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+	case MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST:
 		return -EINVAL;
-	case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+	case MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
 		return -EIO;
 	}
 	return -EIO;
@@ -127,9 +116,9 @@ static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
 
 static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 {
-	struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
+	struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
 	struct mlx5_ipsec_command_context *context;
-	enum mlx5_ipsec_response_syndrome syndrome;
+	enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
 	struct mlx5_fpga_device *fdev = cb_arg;
 	unsigned long flags;
 
@@ -139,8 +128,8 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 		return;
 	}
 
-	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
-		      ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+	mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x\n",
+		      ntohl(resp->syndrome));
 
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
@@ -156,51 +145,48 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 	}
 	mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
 
-	if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
-		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
-			      ntohl(context->sa.sw_sa_handle),
-			      ntohl(resp->sw_sa_handle));
-		return;
-	}
-
 	syndrome = ntohl(resp->syndrome);
 	context->status_code = syndrome_to_errno(syndrome);
-	context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+	context->status = MLX5_FPGA_IPSEC_CMD_COMPLETE;
+	memcpy(&context->resp, resp, sizeof(*resp));
 
 	if (context->status_code)
-		mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+		mlx5_fpga_warn(fdev, "IPSec command failed with syndrome %08x\n",
 			       syndrome);
+
 	complete(&context->complete);
 }
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd)
+static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
+				      const void *cmd, int cmd_size)
 {
 	struct mlx5_ipsec_command_context *context;
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned long flags;
-	int res = 0;
+	int res;
 
-	BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
 	if (!fdev || !fdev->ipsec)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (cmd_size & 3)
+		return ERR_PTR(-EINVAL);
+
+	context = kzalloc(sizeof(*context) + cmd_size, GFP_ATOMIC);
 	if (!context)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(&context->sa, cmd, sizeof(*cmd));
-	context->buf.complete = mlx5_fpga_ipsec_send_complete;
-	context->buf.sg[0].size = sizeof(context->sa);
-	context->buf.sg[0].data = &context->sa;
-	init_completion(&context->complete);
+	context->status = MLX5_FPGA_IPSEC_CMD_PENDING;
 	context->dev = fdev;
+	context->buf.complete = mlx5_fpga_ipsec_send_complete;
+	init_completion(&context->complete);
+	memcpy(&context->command, cmd, cmd_size);
+	context->buf.sg[0].size = cmd_size;
+	context->buf.sg[0].data = &context->command;
+
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
 	spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
 
-	context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
-
 	res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
 	if (res) {
 		mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
@@ -215,7 +201,7 @@ void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
 	return context;
 }
 
-int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 {
 	struct mlx5_ipsec_command_context *context = ctx;
 	unsigned long timeout =
@@ -228,11 +214,39 @@ int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
 		return -ETIMEDOUT;
 	}
 
-	if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+	if (context->status == MLX5_FPGA_IPSEC_CMD_COMPLETE)
 		res = context->status_code;
 	else
 		res = -EIO;
 
+	return res;
+}
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+				  struct mlx5_accel_ipsec_sa *cmd)
+{
+	return mlx5_fpga_ipsec_cmd_exec(mdev, cmd, sizeof(*cmd));
+}
+
+int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+{
+	struct mlx5_ipsec_command_context *context = ctx;
+	struct mlx5_accel_ipsec_sa *sa;
+	int res;
+
+	res = mlx5_fpga_ipsec_cmd_wait(ctx);
+	if (res)
+		goto out;
+
+	sa = (struct mlx5_accel_ipsec_sa *)&context->command;
+	if (sa->sw_sa_handle != context->resp.sw_sa_handle) {
+		mlx5_fpga_err(context->dev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+			      ntohl(sa->sw_sa_handle),
+			      ntohl(context->resp.sw_sa_handle));
+		res = -EIO;
+	}
+
+out:
 	kfree(context);
 	return res;
 }
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 255a88d08078..7283fe780f93 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -429,4 +429,20 @@ struct mlx5_ifc_ipsec_counters_bits {
 	u8         dropped_cmd[0x40];
 };
 
+enum mlx5_ifc_fpga_ipsec_response_syndrome {
+	MLX5_FPGA_IPSEC_RESPONSE_SUCCESS = 0,
+	MLX5_FPGA_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+	MLX5_FPGA_IPSEC_RESPONSE_SADB_ISSUE = 2,
+	MLX5_FPGA_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_resp {
+	__be32 syndrome;
+	union {
+		__be32 sw_sa_handle;
+		__be32 flags;
+	};
+	u8 reserved[24];
+} __packed;
+
 #endif /* MLX5_IFC_FPGA_H */

From 788a8210764ce2977095010931959c87b60c2f51 Mon Sep 17 00:00:00 2001
From: Yossi Kuperman <yossiku@mellanox.com>
Date: Sun, 22 Oct 2017 19:45:45 +0300
Subject: [PATCH 0729/1678] net/mlx5e: IPSec, Add support for ESP trailer
 removal by hardware

Current hardware decrypts and authenticates incoming ESP packets.
Subsequently, the software extracts the nexthdr field, truncates the
trailer and adjusts csum accordingly.

With this patch and a capable device, the trailer is being removed
by the hardware and the nexthdr field is conveyed via PET. This way
we avoid both the need to access the trailer (cache miss) and to
compute its relative checksum, which significantly improve
the performance.

Experiment shows that trailer removal improves the performance by
2Gbps, (netperf). Both forwarding and host-to-host configurations.

Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/accel/ipsec.h |  2 +
 .../mellanox/mlx5/core/en_accel/ipsec.c       |  2 +
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  1 +
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c  | 10 +++-
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  | 54 +++++++++++++++++++
 include/linux/mlx5/mlx5_ifc_fpga.h            | 13 ++++-
 6 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 67cda8871f5a..4da9611a753d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -43,6 +43,7 @@ enum {
 	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
 	MLX5_ACCEL_IPSEC_ESP = BIT(3),
 	MLX5_ACCEL_IPSEC_LSO = BIT(4),
+	MLX5_ACCEL_IPSEC_NO_TRAILER = BIT(5),
 };
 
 #define MLX5_IPSEC_SADB_IP_AH       BIT(7)
@@ -55,6 +56,7 @@ enum {
 enum {
 	MLX5_IPSEC_CMD_ADD_SA = 0,
 	MLX5_IPSEC_CMD_DEL_SA = 1,
+	MLX5_IPSEC_CMD_SET_CAP = 5,
 };
 
 enum mlx5_accel_ipsec_enc_mode {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 1b49afca65c0..460a613059fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -378,6 +378,8 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	ida_init(&ipsec->halloc);
 	ipsec->en_priv = priv;
 	ipsec->en_priv->ipsec = ipsec;
+	ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
+			       MLX5_ACCEL_IPSEC_NO_TRAILER);
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 56e00baf16cc..bffc3ed0574a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -77,6 +77,7 @@ struct mlx5e_ipsec_stats {
 struct mlx5e_ipsec {
 	struct mlx5e_priv *en_priv;
 	DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+	bool no_trailer;
 	spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
 	struct ida halloc;
 	struct mlx5e_ipsec_sw_stats sw_stats;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 6a7c8b04447e..64c549a06678 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -42,10 +42,11 @@
 enum {
 	MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
 	MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+	MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO = 0x17,
 };
 
 struct mlx5e_ipsec_rx_metadata {
-	unsigned char   reserved;
+	unsigned char   nexthdr;
 	__be32		sa_handle;
 } __packed;
 
@@ -301,10 +302,17 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
 	switch (mdata->syndrome) {
 	case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
 		xo->status = CRYPTO_SUCCESS;
+		if (likely(priv->ipsec->no_trailer)) {
+			xo->flags |= XFRM_ESP_NO_TRAILER;
+			xo->proto = mdata->content.rx.nexthdr;
+		}
 		break;
 	case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
 		xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
 		break;
+	case MLX5E_IPSEC_RX_SYNDROME_BAD_PROTO:
+		xo->status = CRYPTO_INVALID_PROTOCOL;
+		break;
 	default:
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
 		return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index e0f32b025e06..3b10d46dc821 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -273,6 +273,9 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
 		ret |= MLX5_ACCEL_IPSEC_LSO;
 
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
+		ret |= MLX5_ACCEL_IPSEC_NO_TRAILER;
+
 	return ret;
 }
 
@@ -335,6 +338,46 @@ out:
 	return ret;
 }
 
+static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
+{
+	struct mlx5_ipsec_command_context *context;
+	struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
+	int err;
+
+	cmd.cmd = htonl(MLX5_IPSEC_CMD_SET_CAP);
+	cmd.flags = htonl(flags);
+	context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
+	if (IS_ERR(context)) {
+		err = PTR_ERR(context);
+		goto out;
+	}
+
+	err = mlx5_fpga_ipsec_cmd_wait(context);
+	if (err)
+		goto out;
+
+	if ((context->resp.flags & cmd.flags) != cmd.flags) {
+		mlx5_fpga_err(context->dev, "Failed to set capabilities. cmd 0x%08x vs resp 0x%08x\n",
+			      cmd.flags,
+			      context->resp.flags);
+		err = -EIO;
+	}
+
+out:
+	return err;
+}
+
+static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
+{
+	u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
+	u32 flags = 0;
+
+	if (dev_caps & MLX5_ACCEL_IPSEC_NO_TRAILER)
+		flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
+
+	return mlx5_fpga_ipsec_set_caps(mdev, flags);
+}
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_conn_attr init_attr = {0};
@@ -372,8 +415,19 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 		goto error;
 	}
 	fdev->ipsec->conn = conn;
+
+	err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
+	if (err) {
+		mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
+			      err);
+		goto err_destroy_conn;
+	}
+
 	return 0;
 
+err_destroy_conn:
+	mlx5_fpga_sbu_conn_destroy(conn);
+
 error:
 	kfree(fdev->ipsec);
 	fdev->ipsec = NULL;
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 7283fe780f93..643544db180b 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -373,7 +373,8 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
 struct mlx5_ifc_ipsec_extended_cap_bits {
 	u8         encapsulation[0x20];
 
-	u8         reserved_0[0x15];
+	u8         reserved_0[0x14];
+	u8         rx_no_trailer[0x1];
 	u8         ipv4_fragment[0x1];
 	u8         ipv6[0x1];
 	u8         esn[0x1];
@@ -445,4 +446,14 @@ struct mlx5_ifc_fpga_ipsec_cmd_resp {
 	u8 reserved[24];
 } __packed;
 
+enum mlx5_ifc_fpga_ipsec_cap {
+	MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0),
+};
+
+struct mlx5_ifc_fpga_ipsec_cmd_cap {
+	__be32 cmd;
+	__be32 flags;
+	u8 reserved[24];
+} __packed;
+
 #endif /* MLX5_IFC_FPGA_H */

From 65802f480008066636a43173b12388bb3fb7bd3a Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Tue, 16 Jan 2018 16:12:22 +0200
Subject: [PATCH 0730/1678] net/mlx5: IPSec, Add command V2 support

This patch adds V2 command support.
New fpga devices support extended features (udp encap, esn etc...), this
features require new hardware sadb format therefore we have a new version
of commands to manipulate it.

Signed-off-by: Yossef Efraim <yossefe@mellanox.com>
Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/accel/ipsec.c |  9 ++-
 .../ethernet/mellanox/mlx5/core/accel/ipsec.h | 21 +++++--
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 56 +++++++++----------
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  | 11 ++--
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.h  |  5 +-
 include/linux/mlx5/mlx5_ifc_fpga.h            |  4 +-
 6 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 53e69edaedde..b88ae12d9066 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -40,10 +40,17 @@
 void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
 				   struct mlx5_accel_ipsec_sa *cmd)
 {
+	int cmd_size;
+
 	if (!MLX5_IPSEC_DEV(mdev))
 		return ERR_PTR(-EOPNOTSUPP);
 
-	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
+	if (mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_V2_CMD)
+		cmd_size = sizeof(*cmd);
+	else
+		cmd_size = sizeof(cmd->ipsec_sa_v1);
+
+	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd, cmd_size);
 }
 
 int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 4da9611a753d..14a2e95e82c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -44,6 +44,7 @@ enum {
 	MLX5_ACCEL_IPSEC_ESP = BIT(3),
 	MLX5_ACCEL_IPSEC_LSO = BIT(4),
 	MLX5_ACCEL_IPSEC_NO_TRAILER = BIT(5),
+	MLX5_ACCEL_IPSEC_V2_CMD = BIT(7),
 };
 
 #define MLX5_IPSEC_SADB_IP_AH       BIT(7)
@@ -56,6 +57,9 @@ enum {
 enum {
 	MLX5_IPSEC_CMD_ADD_SA = 0,
 	MLX5_IPSEC_CMD_DEL_SA = 1,
+	MLX5_IPSEC_CMD_ADD_SA_V2 = 2,
+	MLX5_IPSEC_CMD_DEL_SA_V2 = 3,
+	MLX5_IPSEC_CMD_MOD_SA_V2 = 4,
 	MLX5_IPSEC_CMD_SET_CAP = 5,
 };
 
@@ -68,7 +72,7 @@ enum mlx5_accel_ipsec_enc_mode {
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
 			      MLX5_ACCEL_IPSEC_DEVICE)
 
-struct mlx5_accel_ipsec_sa {
+struct mlx5_accel_ipsec_sa_v1 {
 	__be32 cmd;
 	u8 key_enc[32];
 	u8 key_auth[32];
@@ -88,10 +92,19 @@ struct mlx5_accel_ipsec_sa {
 	__be32 sw_sa_handle;
 	__be16 tfclen;
 	u8 enc_mode;
-	u8 sip_masklen;
-	u8 dip_masklen;
+	u8 reserved1[2];
 	u8 flags;
-	u8 reserved[2];
+	u8 reserved2[2];
+};
+
+struct mlx5_accel_ipsec_sa {
+	struct mlx5_accel_ipsec_sa_v1 ipsec_sa_v1;
+	__be16 udp_sp;
+	__be16 udp_dp;
+	u8 reserved1[4];
+	__be32 esn;
+	__be16 vid;     /* only 12 bits, rest is reserved */
+	__be16 reserved2;
 } __packed;
 
 /**
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 460a613059fe..a8c3fe7cff0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -133,50 +133,46 @@ static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entr
 
 	memset(hw_sa, 0, sizeof(*hw_sa));
 
-	if (op == MLX5_IPSEC_CMD_ADD_SA) {
-		crypto_data_len = (x->aead->alg_key_len + 7) / 8;
-		key_len = crypto_data_len - 4; /* 4 bytes salt at end */
-		aead = x->data;
-		geniv_ctx = crypto_aead_ctx(aead);
-		ivsize = crypto_aead_ivsize(aead);
+	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+	aead = x->data;
+	geniv_ctx = crypto_aead_ctx(aead);
+	ivsize = crypto_aead_ivsize(aead);
 
-		memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
-		/* Duplicate 128 bit key twice according to HW layout */
-		if (key_len == 16)
-			memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
-		memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
-		hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
-	}
+	memcpy(&hw_sa->ipsec_sa_v1.key_enc, x->aead->alg_key, key_len);
+	/* Duplicate 128 bit key twice according to HW layout */
+	if (key_len == 16)
+		memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], x->aead->alg_key, key_len);
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, geniv_ctx->salt, ivsize);
+	hw_sa->ipsec_sa_v1.gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
 
-	hw_sa->cmd = htonl(op);
-	hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
+	hw_sa->ipsec_sa_v1.cmd = htonl(op);
+	hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
 	if (x->props.family == AF_INET) {
-		hw_sa->sip[3] = x->props.saddr.a4;
-		hw_sa->dip[3] = x->id.daddr.a4;
-		hw_sa->sip_masklen = 32;
-		hw_sa->dip_masklen = 32;
+		hw_sa->ipsec_sa_v1.sip[3] = x->props.saddr.a4;
+		hw_sa->ipsec_sa_v1.dip[3] = x->id.daddr.a4;
 	} else {
-		memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
-		memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
-		hw_sa->sip_masklen = 128;
-		hw_sa->dip_masklen = 128;
-		hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
+		memcpy(hw_sa->ipsec_sa_v1.sip, x->props.saddr.a6,
+		       sizeof(hw_sa->ipsec_sa_v1.sip));
+		memcpy(hw_sa->ipsec_sa_v1.dip, x->id.daddr.a6,
+		       sizeof(hw_sa->ipsec_sa_v1.dip));
+		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_IPV6;
 	}
-	hw_sa->spi = x->id.spi;
-	hw_sa->sw_sa_handle = htonl(sa_entry->handle);
+	hw_sa->ipsec_sa_v1.spi = x->id.spi;
+	hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(sa_entry->handle);
 	switch (x->id.proto) {
 	case IPPROTO_ESP:
-		hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
+		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_IP_ESP;
 		break;
 	case IPPROTO_AH:
-		hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
+		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_IP_AH;
 		break;
 	default:
 		break;
 	}
-	hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
+	hw_sa->ipsec_sa_v1.enc_mode = mlx5e_ipsec_enc_mode(x);
 	if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
-		hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_DIR_SX;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 3b10d46dc821..fa5b5a0888ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -223,9 +223,9 @@ static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 }
 
 void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd)
+				  struct mlx5_accel_ipsec_sa *cmd, int cmd_size)
 {
-	return mlx5_fpga_ipsec_cmd_exec(mdev, cmd, sizeof(*cmd));
+	return mlx5_fpga_ipsec_cmd_exec(mdev, cmd, cmd_size);
 }
 
 int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
@@ -239,9 +239,9 @@ int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
 		goto out;
 
 	sa = (struct mlx5_accel_ipsec_sa *)&context->command;
-	if (sa->sw_sa_handle != context->resp.sw_sa_handle) {
+	if (sa->ipsec_sa_v1.sw_sa_handle != context->resp.sw_sa_handle) {
 		mlx5_fpga_err(context->dev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
-			      ntohl(sa->sw_sa_handle),
+			      ntohl(sa->ipsec_sa_v1.sw_sa_handle),
 			      ntohl(context->resp.sw_sa_handle));
 		res = -EIO;
 	}
@@ -276,6 +276,9 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
 		ret |= MLX5_ACCEL_IPSEC_NO_TRAILER;
 
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, v2_command))
+		ret |= MLX5_ACCEL_IPSEC_V2_CMD;
+
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 26a3e4b56972..e5ec29f56532 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -39,7 +39,7 @@
 #ifdef CONFIG_MLX5_FPGA
 
 void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd);
+				  struct mlx5_accel_ipsec_sa *cmd, int cmd_size);
 int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
 
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
@@ -53,7 +53,8 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
 #else
 
 static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-						struct mlx5_accel_ipsec_sa *cmd)
+						struct mlx5_accel_ipsec_sa *cmd,
+						int cmd_size)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 643544db180b..dd7e4538159c 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -373,7 +373,9 @@ struct mlx5_ifc_fpga_destroy_qp_out_bits {
 struct mlx5_ifc_ipsec_extended_cap_bits {
 	u8         encapsulation[0x20];
 
-	u8         reserved_0[0x14];
+	u8         reserved_0[0x12];
+	u8         v2_command[0x1];
+	u8         udp_encap[0x1];
 	u8         rx_no_trailer[0x1];
 	u8         ipv4_fragment[0x1];
 	u8         ipv6[0x1];

From 1d2005e2040b95af4c861e40cf806ff44cd7c883 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Mon, 29 Jan 2018 15:05:50 +0200
Subject: [PATCH 0731/1678] net/mlx5: Export ipsec capabilities

We will need that for ipsec verbs.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/accel/ipsec.c |  3 +-
 .../ethernet/mellanox/mlx5/core/accel/ipsec.h | 14 +----
 .../mellanox/mlx5/core/en_accel/ipsec.c       |  9 +--
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  | 14 ++---
 include/linux/mlx5/accel.h                    | 57 +++++++++++++++++++
 5 files changed, 73 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/mlx5/accel.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index b88ae12d9066..375ba438e7cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -45,7 +45,7 @@ void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
 	if (!MLX5_IPSEC_DEV(mdev))
 		return ERR_PTR(-EOPNOTSUPP);
 
-	if (mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_V2_CMD)
+	if (mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_V2_CMD)
 		cmd_size = sizeof(*cmd);
 	else
 		cmd_size = sizeof(cmd->ipsec_sa_v1);
@@ -62,6 +62,7 @@ u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_device_caps(mdev);
 }
+EXPORT_SYMBOL_GPL(mlx5_accel_ipsec_device_caps);
 
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 14a2e95e82c3..421ed71a029b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -35,18 +35,10 @@
 #define __MLX5_ACCEL_IPSEC_H__
 
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/accel.h>
 
 #ifdef CONFIG_MLX5_ACCEL
 
-enum {
-	MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
-	MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
-	MLX5_ACCEL_IPSEC_ESP = BIT(3),
-	MLX5_ACCEL_IPSEC_LSO = BIT(4),
-	MLX5_ACCEL_IPSEC_NO_TRAILER = BIT(5),
-	MLX5_ACCEL_IPSEC_V2_CMD = BIT(7),
-};
-
 #define MLX5_IPSEC_SADB_IP_AH       BIT(7)
 #define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
 #define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
@@ -70,7 +62,7 @@ enum mlx5_accel_ipsec_enc_mode {
 };
 
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
-			      MLX5_ACCEL_IPSEC_DEVICE)
+			      MLX5_ACCEL_IPSEC_CAP_DEVICE)
 
 struct mlx5_accel_ipsec_sa_v1 {
 	__be32 cmd;
@@ -126,8 +118,6 @@ void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
  */
 int mlx5_accel_ipsec_sa_cmd_wait(void *context);
 
-u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
-
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				   unsigned int count);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index a8c3fe7cff0f..6f4a01620cc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -242,7 +242,8 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		return -EINVAL;
 	}
 	if (x->props.family == AF_INET6 &&
-	    !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+	     MLX5_ACCEL_IPSEC_CAP_IPV6)) {
 		netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
 		return -EINVAL;
 	}
@@ -375,7 +376,7 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	ipsec->en_priv = priv;
 	ipsec->en_priv->ipsec = ipsec;
 	ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
-			       MLX5_ACCEL_IPSEC_NO_TRAILER);
+			       MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
 }
@@ -422,7 +423,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	if (!priv->ipsec)
 		return;
 
-	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
 	    !MLX5_CAP_ETH(mdev, swp)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
 		return;
@@ -441,7 +442,7 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
 
-	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_LSO) ||
 	    !MLX5_CAP_ETH(mdev, swp_lso)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index fa5b5a0888ec..e7e28277733d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -257,7 +257,7 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	u32 ret = 0;
 
 	if (mlx5_fpga_is_ipsec_device(mdev))
-		ret |= MLX5_ACCEL_IPSEC_DEVICE;
+		ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
 	else
 		return ret;
 
@@ -265,19 +265,19 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 		return ret;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
-		ret |= MLX5_ACCEL_IPSEC_ESP;
+		ret |= MLX5_ACCEL_IPSEC_CAP_ESP;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
-		ret |= MLX5_ACCEL_IPSEC_IPV6;
+		ret |= MLX5_ACCEL_IPSEC_CAP_IPV6;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
-		ret |= MLX5_ACCEL_IPSEC_LSO;
+		ret |= MLX5_ACCEL_IPSEC_CAP_LSO;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
-		ret |= MLX5_ACCEL_IPSEC_NO_TRAILER;
+		ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
 
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, v2_command))
-		ret |= MLX5_ACCEL_IPSEC_V2_CMD;
+		ret |= MLX5_ACCEL_IPSEC_CAP_V2_CMD;
 
 	return ret;
 }
@@ -375,7 +375,7 @@ static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
 	u32 dev_caps = mlx5_fpga_ipsec_device_caps(mdev);
 	u32 flags = 0;
 
-	if (dev_caps & MLX5_ACCEL_IPSEC_NO_TRAILER)
+	if (dev_caps & MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER)
 		flags |= MLX5_FPGA_IPSEC_CAP_NO_TRAILER;
 
 	return mlx5_fpga_ipsec_set_caps(mdev, flags);
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
new file mode 100644
index 000000000000..601280c782d3
--- /dev/null
+++ b/include/linux/mlx5/accel.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_H__
+#define __MLX5_ACCEL_H__
+
+#include <linux/mlx5/driver.h>
+
+enum mlx5_accel_ipsec_caps {
+	MLX5_ACCEL_IPSEC_CAP_DEVICE		= 1 << 0,
+	MLX5_ACCEL_IPSEC_CAP_ESP		= 1 << 2,
+	MLX5_ACCEL_IPSEC_CAP_IPV6		= 1 << 3,
+	MLX5_ACCEL_IPSEC_CAP_LSO		= 1 << 4,
+	MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER	= 1 << 5,
+	MLX5_ACCEL_IPSEC_CAP_V2_CMD		= 1 << 6,
+};
+
+#ifdef CONFIG_MLX5_ACCEL
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
+
+#endif
+#endif

From af9fe19d660e333ca9b0a6e1506e684a1126b9e7 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Wed, 17 Jan 2018 11:20:33 +0200
Subject: [PATCH 0732/1678] net/mlx5: Added required metadata capability for
 ipsec

Currently our device requires additional metadata in packet
to perform ipsec crypto offload.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 6 ++++--
 include/linux/mlx5/accel.h                           | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index e7e28277733d..8de992ba7230 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -256,10 +256,12 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	u32 ret = 0;
 
-	if (mlx5_fpga_is_ipsec_device(mdev))
+	if (mlx5_fpga_is_ipsec_device(mdev)) {
 		ret |= MLX5_ACCEL_IPSEC_CAP_DEVICE;
-	else
+		ret |= MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA;
+	} else {
 		return ret;
+	}
 
 	if (!fdev->ipsec)
 		return ret;
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index 601280c782d3..b674af63689b 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -38,6 +38,7 @@
 
 enum mlx5_accel_ipsec_caps {
 	MLX5_ACCEL_IPSEC_CAP_DEVICE		= 1 << 0,
+	MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA	= 1 << 1,
 	MLX5_ACCEL_IPSEC_CAP_ESP		= 1 << 2,
 	MLX5_ACCEL_IPSEC_CAP_IPV6		= 1 << 3,
 	MLX5_ACCEL_IPSEC_CAP_LSO		= 1 << 4,

From d6c4f0298cec8c4c88d33aca17c066995e92fe91 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Thu, 18 Jan 2018 13:05:48 +0200
Subject: [PATCH 0733/1678] net/mlx5: Refactor accel IPSec code

The current code has one layer that executed FPGA commands and
the Ethernet part directly used this code. Since downstream patches
introduces support for IPSec in mlx5_ib, we need to provide some
abstractions. This patch refactors the accel code into one layer
that creates a software IPSec transformation and another one which
creates the actual hardware context.
The internal command implementation is now hidden in the FPGA
core layer. The code also adds the ability to share FPGA hardware
contexts. If two contexts are the same, only a reference count
is taken.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/accel/ipsec.c |  58 ++-
 .../ethernet/mellanox/mlx5/core/accel/ipsec.h |  97 +----
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 146 +++----
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  | 391 ++++++++++++++++--
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.h  |  55 ++-
 include/linux/mlx5/accel.h                    |  83 +++-
 include/linux/mlx5/mlx5_ifc_fpga.h            |  59 +++
 7 files changed, 666 insertions(+), 223 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index 375ba438e7cf..ab5bc82855fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -37,27 +37,6 @@
 #include "mlx5_core.h"
 #include "fpga/ipsec.h"
 
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				   struct mlx5_accel_ipsec_sa *cmd)
-{
-	int cmd_size;
-
-	if (!MLX5_IPSEC_DEV(mdev))
-		return ERR_PTR(-EOPNOTSUPP);
-
-	if (mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_V2_CMD)
-		cmd_size = sizeof(*cmd);
-	else
-		cmd_size = sizeof(cmd->ipsec_sa_v1);
-
-	return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd, cmd_size);
-}
-
-int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
-{
-	return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
-}
-
 u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_device_caps(mdev);
@@ -75,6 +54,21 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 	return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
 }
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       const __be32 saddr[4],
+				       const __be32 daddr[4],
+				       const __be32 spi, bool is_ipv6)
+{
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
+					     spi, is_ipv6);
+}
+
+void mlx5_accel_esp_free_hw_context(void *context)
+{
+	mlx5_fpga_ipsec_delete_sa_ctx(context);
+}
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return mlx5_fpga_ipsec_init(mdev);
@@ -84,3 +78,25 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	mlx5_fpga_ipsec_cleanup(mdev);
 }
+
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags)
+{
+	struct mlx5_accel_esp_xfrm *xfrm;
+
+	xfrm = mlx5_fpga_esp_create_xfrm(mdev, attrs, flags);
+	if (IS_ERR(xfrm))
+		return xfrm;
+
+	xfrm->mdev = mdev;
+	return xfrm;
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_create_xfrm);
+
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+	mlx5_fpga_esp_destroy_xfrm(xfrm);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 421ed71a029b..024dbd22a89b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -39,89 +39,20 @@
 
 #ifdef CONFIG_MLX5_ACCEL
 
-#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
-#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
-#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
-#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
-#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
-#define MLX5_IPSEC_SADB_IPV6        BIT(2)
-
-enum {
-	MLX5_IPSEC_CMD_ADD_SA = 0,
-	MLX5_IPSEC_CMD_DEL_SA = 1,
-	MLX5_IPSEC_CMD_ADD_SA_V2 = 2,
-	MLX5_IPSEC_CMD_DEL_SA_V2 = 3,
-	MLX5_IPSEC_CMD_MOD_SA_V2 = 4,
-	MLX5_IPSEC_CMD_SET_CAP = 5,
-};
-
-enum mlx5_accel_ipsec_enc_mode {
-	MLX5_IPSEC_SADB_MODE_NONE = 0,
-	MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
-	MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
-};
-
 #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
 			      MLX5_ACCEL_IPSEC_CAP_DEVICE)
 
-struct mlx5_accel_ipsec_sa_v1 {
-	__be32 cmd;
-	u8 key_enc[32];
-	u8 key_auth[32];
-	__be32 sip[4];
-	__be32 dip[4];
-	union {
-		struct {
-			__be32 reserved;
-			u8 salt_iv[8];
-			__be32 salt;
-		} __packed gcm;
-		struct {
-			u8 salt[16];
-		} __packed cbc;
-	};
-	__be32 spi;
-	__be32 sw_sa_handle;
-	__be16 tfclen;
-	u8 enc_mode;
-	u8 reserved1[2];
-	u8 flags;
-	u8 reserved2[2];
-};
-
-struct mlx5_accel_ipsec_sa {
-	struct mlx5_accel_ipsec_sa_v1 ipsec_sa_v1;
-	__be16 udp_sp;
-	__be16 udp_dp;
-	u8 reserved1[4];
-	__be32 esn;
-	__be16 vid;     /* only 12 bits, rest is reserved */
-	__be16 reserved2;
-} __packed;
-
-/**
- * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
- * @mdev: mlx5 device
- * @cmd: command to execute
- * May be called from atomic context. Returns context pointer, or error
- * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
- * context, to cleanup the context pointer
- */
-void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				   struct mlx5_accel_ipsec_sa *cmd);
-
-/**
- * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
- * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
- * Sleeps (killable) until command execution is complete.
- * Returns the command result, or -EINTR if killed
- */
-int mlx5_accel_ipsec_sa_cmd_wait(void *context);
-
 unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				   unsigned int count);
 
+void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       const __be32 saddr[4],
+				       const __be32 daddr[4],
+				       const __be32 spi, bool is_ipv6);
+void mlx5_accel_esp_free_hw_context(void *context);
+
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
@@ -129,6 +60,20 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 #define MLX5_IPSEC_DEV(mdev) false
 
+static inline void *
+mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
+				 struct mlx5_accel_esp_xfrm *xfrm,
+				 const __be32 saddr[4],
+				 const __be32 daddr[4],
+				 const __be32 spi, bool is_ipv6)
+{
+	return NULL;
+}
+
+static inline void mlx5_accel_esp_free_hw_context(void *context)
+{
+}
+
 static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 6f4a01620cc3..59df3dbd2e65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -47,7 +47,8 @@ struct mlx5e_ipsec_sa_entry {
 	unsigned int handle; /* Handle in SADB_RX */
 	struct xfrm_state *x;
 	struct mlx5e_ipsec *ipsec;
-	void *context;
+	struct mlx5_accel_esp_xfrm *xfrm;
+	void *hw_context;
 };
 
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
@@ -105,74 +106,51 @@ static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
 }
 
-static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
-{
-	unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
-
-	switch (key_len) {
-	case 16:
-		return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
-	case 32:
-		return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
-	default:
-		netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
-			    key_len, x->aead->alg_name);
-		return -1;
-	}
-}
-
-static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
-				    struct mlx5_accel_ipsec_sa *hw_sa)
+static void
+mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
+				   struct mlx5_accel_esp_xfrm_attrs *attrs)
 {
 	struct xfrm_state *x = sa_entry->x;
+	struct aes_gcm_keymat *aes_gcm = &attrs->keymat.aes_gcm;
 	struct aead_geniv_ctx *geniv_ctx;
-	unsigned int crypto_data_len;
 	struct crypto_aead *aead;
-	unsigned int key_len;
+	unsigned int crypto_data_len, key_len;
 	int ivsize;
 
-	memset(hw_sa, 0, sizeof(*hw_sa));
+	memset(attrs, 0, sizeof(*attrs));
 
+	/* key */
 	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
 	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+
+	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
+	aes_gcm->key_len = key_len * 8;
+
+	/* salt and seq_iv */
 	aead = x->data;
 	geniv_ctx = crypto_aead_ctx(aead);
 	ivsize = crypto_aead_ivsize(aead);
+	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
+	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
+	       sizeof(aes_gcm->salt));
 
-	memcpy(&hw_sa->ipsec_sa_v1.key_enc, x->aead->alg_key, key_len);
-	/* Duplicate 128 bit key twice according to HW layout */
-	if (key_len == 16)
-		memcpy(&hw_sa->ipsec_sa_v1.key_enc[16], x->aead->alg_key, key_len);
-	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, geniv_ctx->salt, ivsize);
-	hw_sa->ipsec_sa_v1.gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
+	/* iv len */
+	aes_gcm->icv_len = x->aead->alg_icv_len;
 
-	hw_sa->ipsec_sa_v1.cmd = htonl(op);
-	hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
-	if (x->props.family == AF_INET) {
-		hw_sa->ipsec_sa_v1.sip[3] = x->props.saddr.a4;
-		hw_sa->ipsec_sa_v1.dip[3] = x->id.daddr.a4;
-	} else {
-		memcpy(hw_sa->ipsec_sa_v1.sip, x->props.saddr.a6,
-		       sizeof(hw_sa->ipsec_sa_v1.sip));
-		memcpy(hw_sa->ipsec_sa_v1.dip, x->id.daddr.a6,
-		       sizeof(hw_sa->ipsec_sa_v1.dip));
-		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_IPV6;
-	}
-	hw_sa->ipsec_sa_v1.spi = x->id.spi;
-	hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(sa_entry->handle);
-	switch (x->id.proto) {
-	case IPPROTO_ESP:
-		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_IP_ESP;
-		break;
-	case IPPROTO_AH:
-		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_IP_AH;
-		break;
-	default:
-		break;
-	}
-	hw_sa->ipsec_sa_v1.enc_mode = mlx5e_ipsec_enc_mode(x);
-	if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
-		hw_sa->ipsec_sa_v1.flags |= MLX5_IPSEC_SADB_DIR_SX;
+	/* rx handle */
+	attrs->sa_handle = sa_entry->handle;
+
+	/* algo type */
+	attrs->keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+	/* action */
+	attrs->action = (!(x->xso.flags & XFRM_OFFLOAD_INBOUND)) ?
+			MLX5_ACCEL_ESP_ACTION_ENCRYPT :
+			MLX5_ACCEL_ESP_ACTION_DECRYPT;
+	/* flags */
+	attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
+			MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
+			MLX5_ACCEL_ESP_FLAGS_TUNNEL;
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
@@ -254,9 +232,10 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
 	struct net_device *netdev = x->xso.dev;
-	struct mlx5_accel_ipsec_sa hw_sa;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
 	struct mlx5e_priv *priv;
-	void *context;
+	__be32 saddr[4] = {0}, daddr[4] = {0}, spi;
+	bool is_ipv6 = false;
 	int err;
 
 	priv = netdev_priv(netdev);
@@ -285,20 +264,41 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 		}
 	}
 
-	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
-	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-	if (IS_ERR(context)) {
-		err = PTR_ERR(context);
+	/* create xfrm */
+	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+	sa_entry->xfrm =
+		mlx5_accel_esp_create_xfrm(priv->mdev, &attrs,
+					   MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
+	if (IS_ERR(sa_entry->xfrm)) {
+		err = PTR_ERR(sa_entry->xfrm);
 		goto err_sadb_rx;
 	}
 
-	err = mlx5_accel_ipsec_sa_cmd_wait(context);
-	if (err)
-		goto err_sadb_rx;
+	/* create hw context */
+	if (x->props.family == AF_INET) {
+		saddr[3] = x->props.saddr.a4;
+		daddr[3] = x->id.daddr.a4;
+	} else {
+		memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
+		memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
+		is_ipv6 = true;
+	}
+	spi = x->id.spi;
+	sa_entry->hw_context =
+			mlx5_accel_esp_create_hw_context(priv->mdev,
+							 sa_entry->xfrm,
+							 saddr, daddr, spi,
+							 is_ipv6);
+	if (IS_ERR(sa_entry->hw_context)) {
+		err = PTR_ERR(sa_entry->hw_context);
+		goto err_xfrm;
+	}
 
 	x->xso.offload_handle = (unsigned long)sa_entry;
 	goto out;
 
+err_xfrm:
+	mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 err_sadb_rx:
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
@@ -313,8 +313,6 @@ out:
 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry;
-	struct mlx5_accel_ipsec_sa hw_sa;
-	void *context;
 
 	if (!x->xso.offload_handle)
 		return;
@@ -324,19 +322,11 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
-
-	mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
-	context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
-	if (IS_ERR(context))
-		return;
-
-	sa_entry->context = context;
 }
 
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
 	struct mlx5e_ipsec_sa_entry *sa_entry;
-	int res;
 
 	if (!x->xso.offload_handle)
 		return;
@@ -344,11 +334,9 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
 	WARN_ON(sa_entry->x != x);
 
-	res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
-	sa_entry->context = NULL;
-	if (res) {
-		/* Leftover object will leak */
-		return;
+	if (sa_entry->hw_context) {
+		mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
+		mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 	}
 
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 8de992ba7230..daae44c937f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -31,6 +31,7 @@
  *
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mlx5/driver.h>
 
 #include "mlx5_core.h"
@@ -47,7 +48,7 @@ enum mlx5_fpga_ipsec_cmd_status {
 	MLX5_FPGA_IPSEC_CMD_COMPLETE,
 };
 
-struct mlx5_ipsec_command_context {
+struct mlx5_fpga_ipsec_cmd_context {
 	struct mlx5_fpga_dma_buf buf;
 	enum mlx5_fpga_ipsec_cmd_status status;
 	struct mlx5_ifc_fpga_ipsec_cmd_resp resp;
@@ -58,11 +59,43 @@ struct mlx5_ipsec_command_context {
 	u8 command[0];
 };
 
+struct mlx5_fpga_esp_xfrm;
+
+struct mlx5_fpga_ipsec_sa_ctx {
+	struct rhash_head		hash;
+	struct mlx5_ifc_fpga_ipsec_sa	hw_sa;
+	struct mlx5_core_dev		*dev;
+	struct mlx5_fpga_esp_xfrm	*fpga_xfrm;
+};
+
+struct mlx5_fpga_esp_xfrm {
+	unsigned int			num_rules;
+	struct mlx5_fpga_ipsec_sa_ctx	*sa_ctx;
+	struct mutex			lock; /* xfrm lock */
+	struct mlx5_accel_esp_xfrm	accel_xfrm;
+};
+
+static const struct rhashtable_params rhash_sa = {
+	.key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	.head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+};
+
 struct mlx5_fpga_ipsec {
 	struct list_head pending_cmds;
 	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
 	u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
 	struct mlx5_fpga_conn *conn;
+
+	/* Map hardware SA           -->  SA context
+	 *     (mlx5_fpga_ipsec_sa)       (mlx5_fpga_ipsec_sa_ctx)
+	 * We will use this hash to avoid SAs duplication in fpga which
+	 * aren't allowed
+	 */
+	struct rhashtable sa_hash;	/* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
+	struct mutex sa_hash_lock;
 };
 
 static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
@@ -86,10 +119,10 @@ static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
 					  struct mlx5_fpga_dma_buf *buf,
 					  u8 status)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 
 	if (status) {
-		context = container_of(buf, struct mlx5_ipsec_command_context,
+		context = container_of(buf, struct mlx5_fpga_ipsec_cmd_context,
 				       buf);
 		mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
 			       status);
@@ -117,7 +150,7 @@ int syndrome_to_errno(enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome)
 static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 {
 	struct mlx5_ifc_fpga_ipsec_cmd_resp *resp = buf->sg[0].data;
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 	enum mlx5_ifc_fpga_ipsec_response_syndrome syndrome;
 	struct mlx5_fpga_device *fdev = cb_arg;
 	unsigned long flags;
@@ -133,7 +166,7 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 
 	spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
 	context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
-					   struct mlx5_ipsec_command_context,
+					   struct mlx5_fpga_ipsec_cmd_context,
 					   list);
 	if (context)
 		list_del(&context->list);
@@ -160,7 +193,7 @@ static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
 static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
 				      const void *cmd, int cmd_size)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned long flags;
 	int res;
@@ -203,7 +236,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(struct mlx5_core_dev *mdev,
 
 static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 {
-	struct mlx5_ipsec_command_context *context = ctx;
+	struct mlx5_fpga_ipsec_cmd_context *context = ctx;
 	unsigned long timeout =
 		msecs_to_jiffies(MLX5_FPGA_IPSEC_CMD_TIMEOUT_MSEC);
 	int res;
@@ -222,33 +255,49 @@ static int mlx5_fpga_ipsec_cmd_wait(void *ctx)
 	return res;
 }
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd, int cmd_size)
+static inline bool is_v2_sadb_supported(struct mlx5_fpga_ipsec *fipsec)
 {
-	return mlx5_fpga_ipsec_cmd_exec(mdev, cmd, cmd_size);
+	if (MLX5_GET(ipsec_extended_cap, fipsec->caps, v2_command))
+		return true;
+	return false;
 }
 
-int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+static int mlx5_fpga_ipsec_update_hw_sa(struct mlx5_fpga_device *fdev,
+					struct mlx5_ifc_fpga_ipsec_sa *hw_sa,
+					int opcode)
 {
-	struct mlx5_ipsec_command_context *context = ctx;
-	struct mlx5_accel_ipsec_sa *sa;
-	int res;
+	struct mlx5_core_dev *dev = fdev->mdev;
+	struct mlx5_ifc_fpga_ipsec_sa *sa;
+	struct mlx5_fpga_ipsec_cmd_context *cmd_context;
+	size_t sa_cmd_size;
+	int err;
 
-	res = mlx5_fpga_ipsec_cmd_wait(ctx);
-	if (res)
+	hw_sa->ipsec_sa_v1.cmd = htonl(opcode);
+	if (is_v2_sadb_supported(fdev->ipsec))
+		sa_cmd_size = sizeof(*hw_sa);
+	else
+		sa_cmd_size = sizeof(hw_sa->ipsec_sa_v1);
+
+	cmd_context = (struct mlx5_fpga_ipsec_cmd_context *)
+			mlx5_fpga_ipsec_cmd_exec(dev, hw_sa, sa_cmd_size);
+	if (IS_ERR(cmd_context))
+		return PTR_ERR(cmd_context);
+
+	err = mlx5_fpga_ipsec_cmd_wait(cmd_context);
+	if (err)
 		goto out;
 
-	sa = (struct mlx5_accel_ipsec_sa *)&context->command;
-	if (sa->ipsec_sa_v1.sw_sa_handle != context->resp.sw_sa_handle) {
-		mlx5_fpga_err(context->dev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+	sa = (struct mlx5_ifc_fpga_ipsec_sa *)&cmd_context->command;
+	if (sa->ipsec_sa_v1.sw_sa_handle != cmd_context->resp.sw_sa_handle) {
+		mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
 			      ntohl(sa->ipsec_sa_v1.sw_sa_handle),
-			      ntohl(context->resp.sw_sa_handle));
-		res = -EIO;
+			      ntohl(cmd_context->resp.sw_sa_handle));
+		err = -EIO;
 	}
 
 out:
-	kfree(context);
-	return res;
+	kfree(cmd_context);
+	return err;
 }
 
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
@@ -278,9 +327,6 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
 		ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
 
-	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, v2_command))
-		ret |= MLX5_ACCEL_IPSEC_CAP_V2_CMD;
-
 	return ret;
 }
 
@@ -345,11 +391,11 @@ out:
 
 static int mlx5_fpga_ipsec_set_caps(struct mlx5_core_dev *mdev, u32 flags)
 {
-	struct mlx5_ipsec_command_context *context;
+	struct mlx5_fpga_ipsec_cmd_context *context;
 	struct mlx5_ifc_fpga_ipsec_cmd_cap cmd = {0};
 	int err;
 
-	cmd.cmd = htonl(MLX5_IPSEC_CMD_SET_CAP);
+	cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP);
 	cmd.flags = htonl(flags);
 	context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd));
 	if (IS_ERR(context)) {
@@ -383,6 +429,200 @@ static int mlx5_fpga_ipsec_enable_supported_caps(struct mlx5_core_dev *mdev)
 	return mlx5_fpga_ipsec_set_caps(mdev, flags);
 }
 
+static void
+mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
+			      const struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+			      struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+	const struct aes_gcm_keymat *aes_gcm = &xfrm_attrs->keymat.aes_gcm;
+
+	/* key */
+	memcpy(&hw_sa->ipsec_sa_v1.key_enc, aes_gcm->aes_key,
+	       aes_gcm->key_len / 8);
+	/* Duplicate 128 bit key twice according to HW layout */
+	if (aes_gcm->key_len == 128)
+		memcpy(&hw_sa->ipsec_sa_v1.key_enc[16],
+		       aes_gcm->aes_key, aes_gcm->key_len / 8);
+
+	/* salt and seq_iv */
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt_iv, &aes_gcm->seq_iv,
+	       sizeof(aes_gcm->seq_iv));
+	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
+	       sizeof(aes_gcm->salt));
+
+	/* rx handle */
+	hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
+
+	/* enc mode */
+	switch (aes_gcm->key_len) {
+	case 128:
+		hw_sa->ipsec_sa_v1.enc_mode =
+			MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128;
+		break;
+	case 256:
+		hw_sa->ipsec_sa_v1.enc_mode =
+			MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128;
+		break;
+	}
+
+	/* flags */
+	hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_SA_VALID |
+			MLX5_FPGA_IPSEC_SA_SPI_EN |
+			MLX5_FPGA_IPSEC_SA_IP_ESP;
+
+	if (xfrm_attrs->action & MLX5_ACCEL_ESP_ACTION_ENCRYPT)
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_DIR_SX;
+	else
+		hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_DIR_SX;
+}
+
+static void
+mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
+			    struct mlx5_accel_esp_xfrm_attrs *xfrm_attrs,
+			    const __be32 saddr[4],
+			    const __be32 daddr[4],
+			    const __be32 spi, bool is_ipv6,
+			    struct mlx5_ifc_fpga_ipsec_sa *hw_sa)
+{
+	mlx5_fpga_ipsec_build_hw_xfrm(mdev, xfrm_attrs, hw_sa);
+
+	/* IPs */
+	memcpy(hw_sa->ipsec_sa_v1.sip, saddr, sizeof(hw_sa->ipsec_sa_v1.sip));
+	memcpy(hw_sa->ipsec_sa_v1.dip, daddr, sizeof(hw_sa->ipsec_sa_v1.dip));
+
+	/* SPI */
+	hw_sa->ipsec_sa_v1.spi = spi;
+
+	/* flags */
+	if (is_ipv6)
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
+}
+
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+				    struct mlx5_accel_esp_xfrm *accel_xfrm,
+				    const __be32 saddr[4],
+				    const __be32 daddr[4],
+				    const __be32 spi, bool is_ipv6)
+{
+	struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			container_of(accel_xfrm, typeof(*fpga_xfrm),
+				     accel_xfrm);
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	int opcode, err;
+	void *context;
+
+	/* alloc SA */
+	sa_ctx = kzalloc(sizeof(*sa_ctx), GFP_KERNEL);
+	if (!sa_ctx)
+		return ERR_PTR(-ENOMEM);
+
+	sa_ctx->dev = mdev;
+
+	/* build candidate SA */
+	mlx5_fpga_ipsec_build_hw_sa(mdev, &accel_xfrm->attrs,
+				    saddr, daddr, spi, is_ipv6,
+				    &sa_ctx->hw_sa);
+
+	mutex_lock(&fpga_xfrm->lock);
+
+	if (fpga_xfrm->sa_ctx) {        /* multiple rules for same accel_xfrm */
+		/* all rules must be with same IPs and SPI */
+		if (memcmp(&sa_ctx->hw_sa, &fpga_xfrm->sa_ctx->hw_sa,
+			   sizeof(sa_ctx->hw_sa))) {
+			context = ERR_PTR(-EINVAL);
+			goto exists;
+		}
+
+		++fpga_xfrm->num_rules;
+		context = fpga_xfrm->sa_ctx;
+		goto exists;
+	}
+
+	/* This is unbounded fpga_xfrm, try to add to hash */
+	mutex_lock(&fipsec->sa_hash_lock);
+
+	err = rhashtable_lookup_insert_fast(&fipsec->sa_hash, &sa_ctx->hash,
+					    rhash_sa);
+	if (err) {
+		/* Can't bound different accel_xfrm to already existing sa_ctx.
+		 * This is because we can't support multiple ketmats for
+		 * same IPs and SPI
+		 */
+		context = ERR_PTR(-EEXIST);
+		goto unlock_hash;
+	}
+
+	/* Bound accel_xfrm to sa_ctx */
+	opcode = is_v2_sadb_supported(fdev->ipsec) ?
+			MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 :
+			MLX5_FPGA_IPSEC_CMD_OP_ADD_SA;
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+	sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err) {
+		context = ERR_PTR(err);
+		goto delete_hash;
+	}
+
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+	++fpga_xfrm->num_rules;
+	fpga_xfrm->sa_ctx = sa_ctx;
+	sa_ctx->fpga_xfrm = fpga_xfrm;
+
+	mutex_unlock(&fpga_xfrm->lock);
+
+	return sa_ctx;
+
+delete_hash:
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+				       rhash_sa));
+unlock_hash:
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+exists:
+	mutex_unlock(&fpga_xfrm->lock);
+	kfree(sa_ctx);
+	return context;
+}
+
+static void
+mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
+{
+	struct mlx5_fpga_device *fdev = sa_ctx->dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	int opcode = is_v2_sadb_supported(fdev->ipsec) ?
+			MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 :
+			MLX5_FPGA_IPSEC_CMD_OP_DEL_SA;
+	int err;
+
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &sa_ctx->hw_sa, opcode);
+	sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err) {
+		WARN_ON(err);
+		return;
+	}
+
+	mutex_lock(&fipsec->sa_hash_lock);
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
+				       rhash_sa));
+	mutex_unlock(&fipsec->sa_hash_lock);
+}
+
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			((struct mlx5_fpga_ipsec_sa_ctx *)context)->fpga_xfrm;
+
+	mutex_lock(&fpga_xfrm->lock);
+	if (!--fpga_xfrm->num_rules) {
+		mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx);
+		fpga_xfrm->sa_ctx = NULL;
+	}
+	mutex_unlock(&fpga_xfrm->lock);
+}
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_conn_attr init_attr = {0};
@@ -421,15 +661,23 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 	}
 	fdev->ipsec->conn = conn;
 
+	err = rhashtable_init(&fdev->ipsec->sa_hash, &rhash_sa);
+	if (err)
+		goto err_destroy_conn;
+	mutex_init(&fdev->ipsec->sa_hash_lock);
+
 	err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
 	if (err) {
 		mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
 			      err);
-		goto err_destroy_conn;
+		goto err_destroy_hash;
 	}
 
 	return 0;
 
+err_destroy_hash:
+	rhashtable_destroy(&fdev->ipsec->sa_hash);
+
 err_destroy_conn:
 	mlx5_fpga_sbu_conn_destroy(conn);
 
@@ -446,7 +694,92 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 	if (!mlx5_fpga_is_ipsec_device(mdev))
 		return;
 
+	rhashtable_destroy(&fdev->ipsec->sa_hash);
+
 	mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
 	kfree(fdev->ipsec);
 	fdev->ipsec = NULL;
 }
+
+static int
+mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
+				  const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	if (attrs->tfc_pad) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with tfc padding\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->replay_type != MLX5_ACCEL_ESP_REPLAY_NONE) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with anti replay\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat_type != MLX5_ACCEL_ESP_KEYMAT_AES_GCM) {
+		mlx5_core_err(mdev, "Only aes gcm keymat is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.iv_algo !=
+	    MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ) {
+		mlx5_core_err(mdev, "Only iv sequence algo is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.icv_len != 128) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (attrs->keymat.aes_gcm.key_len != 128 &&
+	    attrs->keymat.aes_gcm.key_len != 256) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) &&
+	    (!MLX5_GET(ipsec_extended_cap, mdev->fpga->ipsec->caps,
+		       v2_command))) {
+		mlx5_core_err(mdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+
+	if (!(flags & MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA)) {
+		mlx5_core_warn(mdev, "Tried to create an esp action without metadata\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+		mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	fpga_xfrm = kzalloc(sizeof(*fpga_xfrm), GFP_KERNEL);
+	if (!fpga_xfrm)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&fpga_xfrm->lock);
+	memcpy(&fpga_xfrm->accel_xfrm.attrs, attrs,
+	       sizeof(fpga_xfrm->accel_xfrm.attrs));
+
+	return &fpga_xfrm->accel_xfrm;
+}
+
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
+			container_of(xfrm, struct mlx5_fpga_esp_xfrm,
+				     accel_xfrm);
+	/* assuming no sa_ctx are connected to this xfrm_ctx */
+	kfree(fpga_xfrm);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index e5ec29f56532..7ad1e2cd3fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -38,32 +38,29 @@
 
 #ifdef CONFIG_MLX5_FPGA
 
-void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-				  struct mlx5_accel_ipsec_sa *cmd, int cmd_size);
-int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
-
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				  unsigned int counters_count);
 
+void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+				    struct mlx5_accel_esp_xfrm *accel_xfrm,
+				    const __be32 saddr[4],
+				    const __be32 daddr[4],
+				    const __be32 spi, bool is_ipv6);
+void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
+struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags);
+void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+
 #else
 
-static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
-						struct mlx5_accel_ipsec_sa *cmd,
-						int cmd_size)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 {
 	return 0;
@@ -81,6 +78,20 @@ static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
 	return 0;
 }
 
+static inline void *
+mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
+			      struct mlx5_accel_esp_xfrm *accel_xfrm,
+			      const __be32 saddr[4],
+			      const __be32 daddr[4],
+			      const __be32 spi, bool is_ipv6)
+{
+	return NULL;
+}
+
+static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
+{
+}
+
 static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	return 0;
@@ -90,6 +101,18 @@ static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			  u32 flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
+{
+}
+
 #endif /* CONFIG_MLX5_FPGA */
 
 #endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index b674af63689b..da6de465ea6d 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -36,23 +36,102 @@
 
 #include <linux/mlx5/driver.h>
 
-enum mlx5_accel_ipsec_caps {
+enum mlx5_accel_esp_aes_gcm_keymat_iv_algo {
+	MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ,
+};
+
+enum mlx5_accel_esp_flags {
+	MLX5_ACCEL_ESP_FLAGS_TUNNEL            = 0,    /* Default */
+	MLX5_ACCEL_ESP_FLAGS_TRANSPORT         = 1UL << 0,
+	MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED     = 1UL << 1,
+	MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP = 1UL << 2,
+};
+
+enum mlx5_accel_esp_action {
+	MLX5_ACCEL_ESP_ACTION_DECRYPT,
+	MLX5_ACCEL_ESP_ACTION_ENCRYPT,
+};
+
+enum mlx5_accel_esp_keymats {
+	MLX5_ACCEL_ESP_KEYMAT_AES_NONE,
+	MLX5_ACCEL_ESP_KEYMAT_AES_GCM,
+};
+
+enum mlx5_accel_esp_replay {
+	MLX5_ACCEL_ESP_REPLAY_NONE,
+	MLX5_ACCEL_ESP_REPLAY_BMP,
+};
+
+struct aes_gcm_keymat {
+	u64   seq_iv;
+	enum mlx5_accel_esp_aes_gcm_keymat_iv_algo iv_algo;
+
+	u32   salt;
+	u32   icv_len;
+
+	u32   key_len;
+	u32   aes_key[256 / 32];
+};
+
+struct mlx5_accel_esp_xfrm_attrs {
+	enum mlx5_accel_esp_action action;
+	u32   esn;
+	u32   spi;
+	u32   seq;
+	u32   tfc_pad;
+	u32   flags;
+	u32   sa_handle;
+	enum mlx5_accel_esp_replay replay_type;
+	union {
+		struct {
+			u32 size;
+
+		} bmp;
+	} replay;
+	enum mlx5_accel_esp_keymats keymat_type;
+	union {
+		struct aes_gcm_keymat aes_gcm;
+	} keymat;
+};
+
+struct mlx5_accel_esp_xfrm {
+	struct mlx5_core_dev  *mdev;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
+};
+
+enum {
+	MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA = 1UL << 0,
+};
+
+enum mlx5_accel_ipsec_cap {
 	MLX5_ACCEL_IPSEC_CAP_DEVICE		= 1 << 0,
 	MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA	= 1 << 1,
 	MLX5_ACCEL_IPSEC_CAP_ESP		= 1 << 2,
 	MLX5_ACCEL_IPSEC_CAP_IPV6		= 1 << 3,
 	MLX5_ACCEL_IPSEC_CAP_LSO		= 1 << 4,
 	MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER	= 1 << 5,
-	MLX5_ACCEL_IPSEC_CAP_V2_CMD		= 1 << 6,
 };
 
 #ifdef CONFIG_MLX5_ACCEL
 
 u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
 
+struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags);
+void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+
 #else
 
 static inline u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev) { return 0; }
 
+static inline struct mlx5_accel_esp_xfrm *
+mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
+			   u32 flags) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {}
+
 #endif
 #endif
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index dd7e4538159c..debcc57de43a 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -448,6 +448,15 @@ struct mlx5_ifc_fpga_ipsec_cmd_resp {
 	u8 reserved[24];
 } __packed;
 
+enum mlx5_ifc_fpga_ipsec_cmd_opcode {
+	MLX5_FPGA_IPSEC_CMD_OP_ADD_SA = 0,
+	MLX5_FPGA_IPSEC_CMD_OP_DEL_SA = 1,
+	MLX5_FPGA_IPSEC_CMD_OP_ADD_SA_V2 = 2,
+	MLX5_FPGA_IPSEC_CMD_OP_DEL_SA_V2 = 3,
+	MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2 = 4,
+	MLX5_FPGA_IPSEC_CMD_OP_SET_CAP = 5,
+};
+
 enum mlx5_ifc_fpga_ipsec_cap {
 	MLX5_FPGA_IPSEC_CAP_NO_TRAILER = BIT(0),
 };
@@ -458,4 +467,54 @@ struct mlx5_ifc_fpga_ipsec_cmd_cap {
 	u8 reserved[24];
 } __packed;
 
+enum mlx5_ifc_fpga_ipsec_sa_flags {
+	MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2),
+	MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3),
+	MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4),
+	MLX5_FPGA_IPSEC_SA_SA_VALID = BIT(5),
+	MLX5_FPGA_IPSEC_SA_IP_ESP = BIT(6),
+	MLX5_FPGA_IPSEC_SA_IP_AH = BIT(7),
+};
+
+enum mlx5_ifc_fpga_ipsec_sa_enc_mode {
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_NONE = 0,
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_128_AUTH_128 = 1,
+	MLX5_FPGA_IPSEC_SA_ENC_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+struct mlx5_ifc_fpga_ipsec_sa_v1 {
+	__be32 cmd;
+	u8 key_enc[32];
+	u8 key_auth[32];
+	__be32 sip[4];
+	__be32 dip[4];
+	union {
+		struct {
+			__be32 reserved;
+			u8 salt_iv[8];
+			__be32 salt;
+		} __packed gcm;
+		struct {
+			u8 salt[16];
+		} __packed cbc;
+	};
+	__be32 spi;
+	__be32 sw_sa_handle;
+	__be16 tfclen;
+	u8 enc_mode;
+	u8 reserved1[2];
+	u8 flags;
+	u8 reserved2[2];
+};
+
+struct mlx5_ifc_fpga_ipsec_sa {
+	struct mlx5_ifc_fpga_ipsec_sa_v1 ipsec_sa_v1;
+	__be16 udp_sp;
+	__be16 udp_dp;
+	u8 reserved1[4];
+	__be32 esn;
+	__be16 vid;	/* only 12 bits, rest is reserved */
+	__be16 reserved2;
+} __packed;
+
 #endif /* MLX5_IFC_FPGA_H */

From 05564d0ae075b7a73339eaa05296c3034e439c32 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Sun, 18 Feb 2018 15:07:20 +0200
Subject: [PATCH 0734/1678] net/mlx5: Add flow-steering commands for FPGA IPSec
 implementation

In order to add a context to the FPGA, we need to get both the software
transform context (which includes the keys, etc) and the
source/destination IPs (which are included in the steering
rule). Therefore, we register new set of firmware like commands for
the FPGA. Each time a rule is added, the steering core infrastructure
calls the FPGA command layer. If the rule is intended for the FPGA,
it combines the IPs information with the software transformation
context and creates the respective hardware transform.
Afterwards, it calls the standard steering command layer.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/accel/ipsec.c |   7 +
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  | 724 ++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.h  |  24 +
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |   5 +
 .../net/ethernet/mellanox/mlx5/core/main.c    |   2 +
 include/linux/mlx5/accel.h                    |   5 +
 include/linux/mlx5/fs.h                       |   3 +
 7 files changed, 770 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index ab5bc82855fd..9f1b1939716a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -100,3 +100,10 @@ void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
 	mlx5_fpga_esp_destroy_xfrm(xfrm);
 }
 EXPORT_SYMBOL_GPL(mlx5_accel_esp_destroy_xfrm);
+
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			       const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	return mlx5_fpga_esp_modify_xfrm(xfrm, attrs);
+}
+EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index daae44c937f0..7b43fa269117 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -33,8 +33,12 @@
 
 #include <linux/rhashtable.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/fs.h>
+#include <linux/rbtree.h>
 
 #include "mlx5_core.h"
+#include "fs_cmd.h"
 #include "fpga/ipsec.h"
 #include "fpga/sdk.h"
 #include "fpga/core.h"
@@ -75,6 +79,12 @@ struct mlx5_fpga_esp_xfrm {
 	struct mlx5_accel_esp_xfrm	accel_xfrm;
 };
 
+struct mlx5_fpga_ipsec_rule {
+	struct rb_node			node;
+	struct fs_fte			*fte;
+	struct mlx5_fpga_ipsec_sa_ctx	*ctx;
+};
+
 static const struct rhashtable_params rhash_sa = {
 	.key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
 	.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
@@ -84,11 +94,15 @@ static const struct rhashtable_params rhash_sa = {
 };
 
 struct mlx5_fpga_ipsec {
+	struct mlx5_fpga_device *fdev;
 	struct list_head pending_cmds;
 	spinlock_t pending_cmds_lock; /* Protects pending_cmds */
 	u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
 	struct mlx5_fpga_conn *conn;
 
+	struct notifier_block	fs_notifier_ingress_bypass;
+	struct notifier_block	fs_notifier_egress;
+
 	/* Map hardware SA           -->  SA context
 	 *     (mlx5_fpga_ipsec_sa)       (mlx5_fpga_ipsec_sa_ctx)
 	 * We will use this hash to avoid SAs duplication in fpga which
@@ -96,6 +110,12 @@ struct mlx5_fpga_ipsec {
 	 */
 	struct rhashtable sa_hash;	/* hw_sa -> mlx5_fpga_ipsec_sa_ctx */
 	struct mutex sa_hash_lock;
+
+	/* Tree holding all rules for this fpga device
+	 * Key for searching a rule (mlx5_fpga_ipsec_rule) is (ft, id)
+	 */
+	struct rb_root rules_rb;
+	struct mutex rules_rb_lock; /* rules lock */
 };
 
 static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
@@ -498,6 +518,127 @@ mlx5_fpga_ipsec_build_hw_sa(struct mlx5_core_dev *mdev,
 		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_IPV6;
 }
 
+static bool is_full_mask(const void *p, size_t len)
+{
+	WARN_ON(len % 4);
+
+	return !memchr_inv(p, 0xff, len);
+}
+
+static bool validate_fpga_full_mask(struct mlx5_core_dev *dev,
+				    const u32 *match_c,
+				    const u32 *match_v)
+{
+	const void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+						 match_c,
+						 misc_parameters);
+	const void *headers_c = MLX5_ADDR_OF(fte_match_param,
+					     match_c,
+					     outer_headers);
+	const void *headers_v = MLX5_ADDR_OF(fte_match_param,
+					     match_v,
+					     outer_headers);
+
+	if (mlx5_fs_is_outer_ipv4_flow(dev, headers_c, headers_v)) {
+		const void *s_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    src_ipv4_src_ipv6.ipv4_layout.ipv4);
+		const void *d_ipv4_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+		if (!is_full_mask(s_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+							      ipv4)) ||
+		    !is_full_mask(d_ipv4_c, MLX5_FLD_SZ_BYTES(ipv4_layout,
+							      ipv4)))
+			return false;
+	} else {
+		const void *s_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    src_ipv4_src_ipv6.ipv6_layout.ipv6);
+		const void *d_ipv6_c = MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+						    headers_c,
+						    dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+
+		if (!is_full_mask(s_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+							      ipv6)) ||
+		    !is_full_mask(d_ipv6_c, MLX5_FLD_SZ_BYTES(ipv6_layout,
+							      ipv6)))
+			return false;
+	}
+
+	if (!is_full_mask(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+				       outer_esp_spi),
+			  MLX5_FLD_SZ_BYTES(fte_match_set_misc, outer_esp_spi)))
+		return false;
+
+	return true;
+}
+
+static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
+				    u8 match_criteria_enable,
+				    const u32 *match_c,
+				    const u32 *match_v)
+{
+	u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+	bool ipv6_flow;
+
+	ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
+
+	if (!(match_criteria_enable & MLX5_MATCH_OUTER_HEADERS) ||
+	    mlx5_fs_is_outer_udp_flow(match_c, match_v) ||
+	    mlx5_fs_is_outer_tcp_flow(match_c, match_v) ||
+	    mlx5_fs_is_vxlan_flow(match_c) ||
+	    !(mlx5_fs_is_outer_ipv4_flow(dev, match_c, match_v) ||
+	      ipv6_flow))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_DEVICE))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_ESP) &&
+	    mlx5_fs_is_outer_ipsec_flow(match_c))
+		return false;
+
+	if (!(ipsec_dev_caps & MLX5_ACCEL_IPSEC_CAP_IPV6) &&
+	    ipv6_flow)
+		return false;
+
+	if (!validate_fpga_full_mask(dev, match_c, match_v))
+		return false;
+
+	return true;
+}
+
+static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
+					   u8 match_criteria_enable,
+					   const u32 *match_c,
+					   const u32 *match_v,
+					   struct mlx5_flow_act *flow_act)
+{
+	const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					   outer_headers);
+	bool is_dmac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_47_16) ||
+			MLX5_GET(fte_match_set_lyr_2_4, outer_c, dmac_15_0);
+	bool is_smac = MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_47_16) ||
+			MLX5_GET(fte_match_set_lyr_2_4, outer_c, smac_15_0);
+	int ret;
+
+	ret = mlx5_is_fpga_ipsec_rule(dev, match_criteria_enable, match_c,
+				      match_v);
+	if (!ret)
+		return ret;
+
+	if (is_dmac || is_smac ||
+	    (match_criteria_enable &
+	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
+	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
+	     flow_act->has_flow_tag)
+		return false;
+
+	return true;
+}
+
 void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
 				    struct mlx5_accel_esp_xfrm *accel_xfrm,
 				    const __be32 saddr[4],
@@ -587,6 +728,73 @@ exists:
 	return context;
 }
 
+static void *
+mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
+				 struct fs_fte *fte,
+				 bool is_egress)
+{
+	struct mlx5_accel_esp_xfrm *accel_xfrm;
+	__be32 saddr[4], daddr[4], spi;
+	struct mlx5_flow_group *fg;
+	bool is_ipv6 = false;
+
+	fs_get_obj(fg, fte->node.parent);
+	/* validate */
+	if (is_egress &&
+	    !mlx5_is_fpga_egress_ipsec_rule(mdev,
+					    fg->mask.match_criteria_enable,
+					    fg->mask.match_criteria,
+					    fte->val,
+					    &fte->action))
+		return ERR_PTR(-EINVAL);
+	else if (!mlx5_is_fpga_ipsec_rule(mdev,
+					  fg->mask.match_criteria_enable,
+					  fg->mask.match_criteria,
+					  fte->val))
+		return ERR_PTR(-EINVAL);
+
+	/* get xfrm context */
+	accel_xfrm =
+		(struct mlx5_accel_esp_xfrm *)fte->action.esp_id;
+
+	/* IPs */
+	if (mlx5_fs_is_outer_ipv4_flow(mdev, fg->mask.match_criteria,
+				       fte->val)) {
+		memcpy(&saddr[3],
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+				    fte->val,
+				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
+				    sizeof(saddr[3]));
+		memcpy(&daddr[3],
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4,
+				    fte->val,
+				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+				    sizeof(daddr[3]));
+	} else {
+		memcpy(saddr,
+		       MLX5_ADDR_OF(fte_match_param,
+				    fte->val,
+				    outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
+				    sizeof(saddr));
+		memcpy(daddr,
+		       MLX5_ADDR_OF(fte_match_param,
+				    fte->val,
+				    outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+				    sizeof(daddr));
+		is_ipv6 = true;
+	}
+
+	/* SPI */
+	spi = MLX5_GET_BE(typeof(spi),
+			  fte_match_param, fte->val,
+			  misc_parameters.outer_esp_spi);
+
+	/* create */
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
+					     saddr, daddr,
+					     spi, is_ipv6);
+}
+
 static void
 mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
 {
@@ -623,6 +831,389 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context)
 	mutex_unlock(&fpga_xfrm->lock);
 }
 
+static inline struct mlx5_fpga_ipsec_rule *
+_rule_search(struct rb_root *root, struct fs_fte *fte)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct mlx5_fpga_ipsec_rule *rule =
+				container_of(node, struct mlx5_fpga_ipsec_rule,
+					     node);
+
+		if (rule->fte < fte)
+			node = node->rb_left;
+		else if (rule->fte > fte)
+			node = node->rb_right;
+		else
+			return rule;
+	}
+	return NULL;
+}
+
+static struct mlx5_fpga_ipsec_rule *
+rule_search(struct mlx5_fpga_ipsec *ipsec_dev, struct fs_fte *fte)
+{
+	struct mlx5_fpga_ipsec_rule *rule;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	rule = _rule_search(&ipsec_dev->rules_rb, fte);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+	return rule;
+}
+
+static inline int _rule_insert(struct rb_root *root,
+			       struct mlx5_fpga_ipsec_rule *rule)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct mlx5_fpga_ipsec_rule *this =
+				container_of(*new, struct mlx5_fpga_ipsec_rule,
+					     node);
+
+		parent = *new;
+		if (rule->fte < this->fte)
+			new = &((*new)->rb_left);
+		else if (rule->fte > this->fte)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&rule->node, parent, new);
+	rb_insert_color(&rule->node, root);
+
+	return 0;
+}
+
+static int rule_insert(struct mlx5_fpga_ipsec *ipsec_dev,
+		       struct mlx5_fpga_ipsec_rule *rule)
+{
+	int ret;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	ret = _rule_insert(&ipsec_dev->rules_rb, rule);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+
+	return ret;
+}
+
+static inline void _rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+				struct mlx5_fpga_ipsec_rule *rule)
+{
+	struct rb_root *root = &ipsec_dev->rules_rb;
+
+	mutex_lock(&ipsec_dev->rules_rb_lock);
+	rb_erase(&rule->node, root);
+	mutex_unlock(&ipsec_dev->rules_rb_lock);
+}
+
+static void rule_delete(struct mlx5_fpga_ipsec *ipsec_dev,
+			struct mlx5_fpga_ipsec_rule *rule)
+{
+	_rule_delete(ipsec_dev, rule);
+	kfree(rule);
+}
+
+struct mailbox_mod {
+	uintptr_t			saved_esp_id;
+	u32				saved_action;
+	u32				saved_outer_esp_spi_value;
+};
+
+static void restore_spec_mailbox(struct fs_fte *fte,
+				 struct mailbox_mod *mbox_mod)
+{
+	char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+					   fte->val,
+					   misc_parameters);
+
+	MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+		 mbox_mod->saved_outer_esp_spi_value);
+	fte->action.action |= mbox_mod->saved_action;
+	fte->action.esp_id = (uintptr_t)mbox_mod->saved_esp_id;
+}
+
+static void modify_spec_mailbox(struct mlx5_core_dev *mdev,
+				struct fs_fte *fte,
+				struct mailbox_mod *mbox_mod)
+{
+	char *misc_params_v = MLX5_ADDR_OF(fte_match_param,
+					   fte->val,
+					   misc_parameters);
+
+	mbox_mod->saved_esp_id = fte->action.esp_id;
+	mbox_mod->saved_action = fte->action.action &
+			(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+			 MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+	mbox_mod->saved_outer_esp_spi_value =
+			MLX5_GET(fte_match_set_misc, misc_params_v,
+				 outer_esp_spi);
+
+	fte->action.esp_id = 0;
+	fte->action.action &= ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+				MLX5_FLOW_CONTEXT_ACTION_DECRYPT);
+	if (!MLX5_CAP_FLOWTABLE(mdev,
+				flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+		MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, 0);
+}
+
+static enum fs_flow_table_type egress_to_fs_ft(bool egress)
+{
+	return egress ? FS_FT_NIC_TX : FS_FT_NIC_RX;
+}
+
+static int fpga_ipsec_fs_create_flow_group(struct mlx5_core_dev *dev,
+					   struct mlx5_flow_table *ft,
+					   u32 *in,
+					   unsigned int *group_id,
+					   bool is_egress)
+{
+	int (*create_flow_group)(struct mlx5_core_dev *dev,
+				 struct mlx5_flow_table *ft, u32 *in,
+				 unsigned int *group_id) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_flow_group;
+	char *misc_params_c = MLX5_ADDR_OF(create_flow_group_in, in,
+					   match_criteria.misc_parameters);
+	u32 saved_outer_esp_spi_mask;
+	u8 match_criteria_enable;
+	int ret;
+
+	if (MLX5_CAP_FLOWTABLE(dev,
+			       flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+		return create_flow_group(dev, ft, in, group_id);
+
+	match_criteria_enable =
+		MLX5_GET(create_flow_group_in, in, match_criteria_enable);
+	saved_outer_esp_spi_mask =
+		MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+	if (!match_criteria_enable || !saved_outer_esp_spi_mask)
+		return create_flow_group(dev, ft, in, group_id);
+
+	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, 0);
+
+	if (!(*misc_params_c) &&
+	    !memcmp(misc_params_c, misc_params_c + 1, MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
+		MLX5_SET(create_flow_group_in, in, match_criteria_enable,
+			 match_criteria_enable & ~MLX5_MATCH_MISC_PARAMETERS);
+
+	ret = create_flow_group(dev, ft, in, group_id);
+
+	MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, saved_outer_esp_spi_mask);
+	MLX5_SET(create_flow_group_in, in, match_criteria_enable, match_criteria_enable);
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct mlx5_flow_group *fg,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*create_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct mlx5_flow_group *fg,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->create_fte;
+	struct mlx5_fpga_device *fdev = dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_ipsec_rule *rule;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return create_fte(dev, ft, fg, fte);
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return -ENOMEM;
+
+	rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
+	if (IS_ERR(rule->ctx)) {
+		kfree(rule);
+		return PTR_ERR(rule->ctx);
+	}
+
+	rule->fte = fte;
+	WARN_ON(rule_insert(fipsec, rule));
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = create_fte(dev, ft, fg, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+	if (ret) {
+		_rule_delete(fipsec, rule);
+		mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+		kfree(rule);
+	}
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_update_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    unsigned int group_id,
+				    int modify_mask,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*update_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  unsigned int group_id,
+			  int modify_mask,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->update_fte;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return update_fte(dev, ft, group_id, modify_mask, fte);
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = update_fte(dev, ft, group_id, modify_mask, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+
+	return ret;
+}
+
+static int fpga_ipsec_fs_delete_fte(struct mlx5_core_dev *dev,
+				    struct mlx5_flow_table *ft,
+				    struct fs_fte *fte,
+				    bool is_egress)
+{
+	int (*delete_fte)(struct mlx5_core_dev *dev,
+			  struct mlx5_flow_table *ft,
+			  struct fs_fte *fte) =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(is_egress))->delete_fte;
+	struct mlx5_fpga_device *fdev = dev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_ipsec_rule *rule;
+	bool is_esp = fte->action.esp_id;
+	struct mailbox_mod mbox_mod;
+	int ret;
+
+	if (!is_esp ||
+	    !(fte->action.action &
+	      (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+	       MLX5_FLOW_CONTEXT_ACTION_DECRYPT)))
+		return delete_fte(dev, ft, fte);
+
+	rule = rule_search(fipsec, fte);
+	if (!rule)
+		return -ENOENT;
+
+	mlx5_fpga_ipsec_delete_sa_ctx(rule->ctx);
+	rule_delete(fipsec, rule);
+
+	modify_spec_mailbox(dev, fte, &mbox_mod);
+	ret = delete_fte(dev, ft, fte);
+	restore_spec_mailbox(fte, &mbox_mod);
+
+	return ret;
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_egress(struct mlx5_core_dev *dev,
+					    struct mlx5_flow_table *ft,
+					    u32 *in,
+					    unsigned int *group_id)
+{
+	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     struct mlx5_flow_group *fg,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     unsigned int group_id,
+				     int modify_mask,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+					true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_egress(struct mlx5_core_dev *dev,
+				     struct mlx5_flow_table *ft,
+				     struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_delete_fte(dev, ft, fte, true);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_flow_group_ingress(struct mlx5_core_dev *dev,
+					     struct mlx5_flow_table *ft,
+					     u32 *in,
+					     unsigned int *group_id)
+{
+	return fpga_ipsec_fs_create_flow_group(dev, ft, in, group_id, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_create_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct mlx5_flow_group *fg,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_create_fte(dev, ft, fg, fte, false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_update_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      unsigned int group_id,
+				      int modify_mask,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_update_fte(dev, ft, group_id, modify_mask, fte,
+					false);
+}
+
+static int
+mlx5_fpga_ipsec_fs_delete_fte_ingress(struct mlx5_core_dev *dev,
+				      struct mlx5_flow_table *ft,
+				      struct fs_fte *fte)
+{
+	return fpga_ipsec_fs_delete_fte(dev, ft, fte, false);
+}
+
+static struct mlx5_flow_cmds fpga_ipsec_ingress;
+static struct mlx5_flow_cmds fpga_ipsec_egress;
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	switch (type) {
+	case FS_FT_NIC_RX:
+		return &fpga_ipsec_ingress;
+	case FS_FT_NIC_TX:
+		return &fpga_ipsec_egress;
+	default:
+		WARN_ON(true);
+		return NULL;
+	}
+}
+
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_conn_attr init_attr = {0};
@@ -637,6 +1228,8 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 	if (!fdev->ipsec)
 		return -ENOMEM;
 
+	fdev->ipsec->fdev = fdev;
+
 	err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
 				     fdev->ipsec->caps);
 	if (err) {
@@ -666,6 +1259,9 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 		goto err_destroy_conn;
 	mutex_init(&fdev->ipsec->sa_hash_lock);
 
+	fdev->ipsec->rules_rb = RB_ROOT;
+	mutex_init(&fdev->ipsec->rules_rb_lock);
+
 	err = mlx5_fpga_ipsec_enable_supported_caps(mdev);
 	if (err) {
 		mlx5_fpga_err(fdev, "Failed to enable IPSec extended capabilities: %d\n",
@@ -687,6 +1283,17 @@ error:
 	return err;
 }
 
+static void destroy_rules_rb(struct rb_root *root)
+{
+	struct mlx5_fpga_ipsec_rule *r, *tmp;
+
+	rbtree_postorder_for_each_entry_safe(r, tmp, root, node) {
+		rb_erase(&r->node, root);
+		mlx5_fpga_ipsec_delete_sa_ctx(r->ctx);
+		kfree(r);
+	}
+}
+
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -694,6 +1301,7 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 	if (!mlx5_fpga_is_ipsec_device(mdev))
 		return;
 
+	destroy_rules_rb(&fdev->ipsec->rules_rb);
 	rhashtable_destroy(&fdev->ipsec->sa_hash);
 
 	mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
@@ -701,6 +1309,49 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 	fdev->ipsec = NULL;
 }
 
+void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+	/* ingress */
+	fpga_ipsec_ingress.create_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->create_flow_table;
+	fpga_ipsec_ingress.destroy_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_table;
+	fpga_ipsec_ingress.modify_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->modify_flow_table;
+	fpga_ipsec_ingress.create_flow_group =
+		mlx5_fpga_ipsec_fs_create_flow_group_ingress;
+	fpga_ipsec_ingress.destroy_flow_group =
+		 mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->destroy_flow_group;
+	fpga_ipsec_ingress.create_fte =
+		mlx5_fpga_ipsec_fs_create_fte_ingress;
+	fpga_ipsec_ingress.update_fte =
+		mlx5_fpga_ipsec_fs_update_fte_ingress;
+	fpga_ipsec_ingress.delete_fte =
+		mlx5_fpga_ipsec_fs_delete_fte_ingress;
+	fpga_ipsec_ingress.update_root_ft =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(false))->update_root_ft;
+
+	/* egress */
+	fpga_ipsec_egress.create_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->create_flow_table;
+	fpga_ipsec_egress.destroy_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_table;
+	fpga_ipsec_egress.modify_flow_table =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->modify_flow_table;
+	fpga_ipsec_egress.create_flow_group =
+		mlx5_fpga_ipsec_fs_create_flow_group_egress;
+	fpga_ipsec_egress.destroy_flow_group =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->destroy_flow_group;
+	fpga_ipsec_egress.create_fte =
+		mlx5_fpga_ipsec_fs_create_fte_egress;
+	fpga_ipsec_egress.update_fte =
+		mlx5_fpga_ipsec_fs_update_fte_egress;
+	fpga_ipsec_egress.delete_fte =
+		mlx5_fpga_ipsec_fs_delete_fte_egress;
+	fpga_ipsec_egress.update_root_ft =
+		mlx5_fs_cmd_get_default(egress_to_fs_ft(true))->update_root_ft;
+}
+
 static int
 mlx5_fpga_esp_validate_xfrm_attrs(struct mlx5_core_dev *mdev,
 				  const struct mlx5_accel_esp_xfrm_attrs *attrs)
@@ -783,3 +1434,76 @@ void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
 	/* assuming no sa_ctx are connected to this xfrm_ctx */
 	kfree(fpga_xfrm);
 }
+
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			      const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	struct mlx5_core_dev *mdev = xfrm->mdev;
+	struct mlx5_fpga_device *fdev = mdev->fpga;
+	struct mlx5_fpga_ipsec *fipsec = fdev->ipsec;
+	struct mlx5_fpga_esp_xfrm *fpga_xfrm;
+	struct mlx5_ifc_fpga_ipsec_sa org_hw_sa;
+
+	int err = 0;
+
+	if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs)))
+		return 0;
+
+	if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) {
+		mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (is_v2_sadb_supported(fipsec)) {
+		mlx5_core_warn(mdev, "Modify esp is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	fpga_xfrm = container_of(xfrm, struct mlx5_fpga_esp_xfrm, accel_xfrm);
+
+	mutex_lock(&fpga_xfrm->lock);
+
+	if (!fpga_xfrm->sa_ctx)
+		/* Unbounded xfrm, chane only sw attrs */
+		goto change_sw_xfrm_attrs;
+
+	/* copy original hw sa */
+	memcpy(&org_hw_sa, &fpga_xfrm->sa_ctx->hw_sa, sizeof(org_hw_sa));
+	mutex_lock(&fipsec->sa_hash_lock);
+	/* remove original hw sa from hash */
+	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+				       &fpga_xfrm->sa_ctx->hash, rhash_sa));
+	/* update hw_sa with new xfrm attrs*/
+	mlx5_fpga_ipsec_build_hw_xfrm(xfrm->mdev, attrs,
+				      &fpga_xfrm->sa_ctx->hw_sa);
+	/* try to insert new hw_sa to hash */
+	err = rhashtable_insert_fast(&fipsec->sa_hash,
+				     &fpga_xfrm->sa_ctx->hash, rhash_sa);
+	if (err)
+		goto rollback_sa;
+
+	/* modify device with new hw_sa */
+	err = mlx5_fpga_ipsec_update_hw_sa(fdev, &fpga_xfrm->sa_ctx->hw_sa,
+					   MLX5_FPGA_IPSEC_CMD_OP_MOD_SA_V2);
+	fpga_xfrm->sa_ctx->hw_sa.ipsec_sa_v1.cmd = 0;
+	if (err)
+		WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash,
+					       &fpga_xfrm->sa_ctx->hash,
+					       rhash_sa));
+rollback_sa:
+	if (err) {
+		/* return original hw_sa to hash */
+		memcpy(&fpga_xfrm->sa_ctx->hw_sa, &org_hw_sa,
+		       sizeof(org_hw_sa));
+		WARN_ON(rhashtable_insert_fast(&fipsec->sa_hash,
+					       &fpga_xfrm->sa_ctx->hash,
+					       rhash_sa));
+	}
+	mutex_unlock(&fipsec->sa_hash_lock);
+
+change_sw_xfrm_attrs:
+	if (!err)
+		memcpy(&xfrm->attrs, attrs, sizeof(xfrm->attrs));
+	mutex_unlock(&fpga_xfrm->lock);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 7ad1e2cd3fb0..2b5e63b0d4d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -35,6 +35,7 @@
 #define __MLX5_FPGA_IPSEC_H__
 
 #include "accel/ipsec.h"
+#include "fs_cmd.h"
 
 #ifdef CONFIG_MLX5_FPGA
 
@@ -52,12 +53,18 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
 
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_build_fs_cmds(void);
 
 struct mlx5_accel_esp_xfrm *
 mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
 			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
 			  u32 flags);
 void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			      const struct mlx5_accel_esp_xfrm_attrs *attrs);
+
+const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
 
 #else
 
@@ -101,6 +108,10 @@ static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
+static inline void mlx5_fpga_ipsec_build_fs_cmds(void)
+{
+}
+
 static inline struct mlx5_accel_esp_xfrm *
 mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev,
 			  const struct mlx5_accel_esp_xfrm_attrs *attrs,
@@ -113,6 +124,19 @@ static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm)
 {
 }
 
+static inline int
+mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			  const struct mlx5_accel_esp_xfrm_attrs *attrs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	return mlx5_fs_cmd_get_default(type);
+}
+
 #endif /* CONFIG_MLX5_FPGA */
 
 #endif	/* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f836e6b76f65..09aad58c8e06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -38,6 +38,7 @@
 #include "fs_cmd.h"
 #include "diag/fs_tracepoint.h"
 #include "accel/ipsec.h"
+#include "fpga/ipsec.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -2251,6 +2252,10 @@ static struct mlx5_flow_root_namespace
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_namespace *ns;
 
+	if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+	    (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
+		cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
+
 	/* Create the root namespace */
 	root_ns = kvzalloc(sizeof(*root_ns), GFP_KERNEL);
 	if (!root_ns)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 03972eed02cd..08c33657677c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -58,6 +58,7 @@
 #include "eswitch.h"
 #include "lib/mlx5.h"
 #include "fpga/core.h"
+#include "fpga/ipsec.h"
 #include "accel/ipsec.h"
 #include "lib/clock.h"
 
@@ -1658,6 +1659,7 @@ static int __init init(void)
 	get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
 
 	mlx5_core_verify_params();
+	mlx5_fpga_ipsec_build_fs_cmds();
 	mlx5_register_debugfs();
 
 	err = pci_register_driver(&mlx5_core_driver);
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index da6de465ea6d..6c694709b0a2 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -121,6 +121,8 @@ mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
 			   const struct mlx5_accel_esp_xfrm_attrs *attrs,
 			   u32 flags);
 void mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm);
+int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			       const struct mlx5_accel_esp_xfrm_attrs *attrs);
 
 #else
 
@@ -132,6 +134,9 @@ mlx5_accel_esp_create_xfrm(struct mlx5_core_dev *mdev,
 			   u32 flags) { return ERR_PTR(-EOPNOTSUPP); }
 static inline void
 mlx5_accel_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) {}
+static inline int
+mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
+			   const struct mlx5_accel_esp_xfrm_attrs *attrs) { return -EOPNOTSUPP; }
 
 #endif
 #endif
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 744ea228acea..b957e52434f8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -40,6 +40,8 @@
 
 enum {
 	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO	= 1 << 16,
+	MLX5_FLOW_CONTEXT_ACTION_ENCRYPT	= 1 << 17,
+	MLX5_FLOW_CONTEXT_ACTION_DECRYPT	= 1 << 18,
 };
 
 enum {
@@ -146,6 +148,7 @@ struct mlx5_flow_act {
 	u32 flow_tag;
 	u32 encap_id;
 	u32 modify_id;
+	uintptr_t esp_id;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \

From 75ef3f551572822a392ca9b03486bf09163cc668 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Thu, 18 Jan 2018 16:31:55 +0200
Subject: [PATCH 0735/1678] net/mlx5e: Added common function for
 to_ipsec_sa_entry

New function for getting driver internal sa entry from xfrm state.
All checks are done in one function.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 59df3dbd2e65..f5b1d60f96f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -51,6 +51,21 @@ struct mlx5e_ipsec_sa_entry {
 	void *hw_context;
 };
 
+static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa;
+
+	if (!x)
+		return NULL;
+
+	sa = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	if (!sa)
+		return NULL;
+
+	WARN_ON(sa->x != x);
+	return sa;
+}
+
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
 					      unsigned int handle)
 {
@@ -312,28 +327,22 @@ out:
 
 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-	WARN_ON(sa_entry->x != x);
-
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
 		mlx5e_ipsec_sadb_rx_del(sa_entry);
 }
 
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
-	WARN_ON(sa_entry->x != x);
-
 	if (sa_entry->hw_context) {
 		mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
 		mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);

From cb01008390bb0645d4728c7f8825e32d4b540a30 Mon Sep 17 00:00:00 2001
From: Aviad Yehezkel <aviadye@mellanox.com>
Date: Thu, 18 Jan 2018 16:02:17 +0200
Subject: [PATCH 0736/1678] net/mlx5: IPSec, Add support for ESN

Currently ESN is not supported with IPSec device offload.

This patch adds ESN support to IPsec device offload.
Implementing new xfrm device operation to synchronize offloading device
ESN with xfrm received SN. New QP command to update SA state at the
following:

           ESN 1                    ESN 2                  ESN 3
|-----------*-----------|-----------*-----------|-----------*
^           ^           ^           ^           ^           ^

^ - marks where QP command invoked to update the SA ESN state
    machine.
| - marks the start of the ESN scope (0-2^32-1). At this point move SA
    ESN overlap bit to zero and increment ESN.
* - marks the middle of the ESN scope (2^31). At this point move SA
    ESN overlap bit to one.

Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Yossef Efraim <yossefe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec.c       | 118 ++++++++++++++++--
 .../mellanox/mlx5/core/en_accel/ipsec.h       |  23 ++++
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c  |  29 ++++-
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h  |   5 +
 .../ethernet/mellanox/mlx5/core/fpga/ipsec.c  |  22 ++++
 include/linux/mlx5/accel.h                    |   2 +
 include/linux/mlx5/mlx5_ifc_fpga.h            |   2 +
 7 files changed, 189 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index f5b1d60f96f5..cf58c9637904 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -38,18 +38,9 @@
 #include <linux/module.h>
 
 #include "en.h"
-#include "accel/ipsec.h"
 #include "en_accel/ipsec.h"
 #include "en_accel/ipsec_rxtx.h"
 
-struct mlx5e_ipsec_sa_entry {
-	struct hlist_node hlist; /* Item in SADB_RX hashtable */
-	unsigned int handle; /* Handle in SADB_RX */
-	struct xfrm_state *x;
-	struct mlx5e_ipsec *ipsec;
-	struct mlx5_accel_esp_xfrm *xfrm;
-	void *hw_context;
-};
 
 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
 {
@@ -121,6 +112,40 @@ static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
 	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
 }
 
+static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+	struct xfrm_replay_state_esn *replay_esn;
+	u32 seq_bottom;
+	u8 overlap;
+	u32 *esn;
+
+	if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) {
+		sa_entry->esn_state.trigger = 0;
+		return false;
+	}
+
+	replay_esn = sa_entry->x->replay_esn;
+	seq_bottom = replay_esn->seq - replay_esn->replay_window + 1;
+	overlap = sa_entry->esn_state.overlap;
+
+	sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x,
+						    htonl(seq_bottom));
+	esn = &sa_entry->esn_state.esn;
+
+	sa_entry->esn_state.trigger = 1;
+	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
+		++(*esn);
+		sa_entry->esn_state.overlap = 0;
+		return true;
+	} else if (unlikely(!overlap &&
+			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
+		sa_entry->esn_state.overlap = 1;
+		return true;
+	}
+
+	return false;
+}
+
 static void
 mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 				   struct mlx5_accel_esp_xfrm_attrs *attrs)
@@ -152,6 +177,14 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 	/* iv len */
 	aes_gcm->icv_len = x->aead->alg_icv_len;
 
+	/* esn */
+	if (sa_entry->esn_state.trigger) {
+		attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+		attrs->esn = sa_entry->esn_state.esn;
+		if (sa_entry->esn_state.overlap)
+			attrs->flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+	}
+
 	/* rx handle */
 	attrs->sa_handle = sa_entry->handle;
 
@@ -187,7 +220,9 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
 		netdev_info(netdev, "Cannot offload compressed xfrm states\n");
 		return -EINVAL;
 	}
-	if (x->props.flags & XFRM_STATE_ESN) {
+	if (x->props.flags & XFRM_STATE_ESN &&
+	    !(mlx5_accel_ipsec_device_caps(priv->mdev) &
+	    MLX5_ACCEL_IPSEC_CAP_ESN)) {
 		netdev_info(netdev, "Cannot offload ESN xfrm states\n");
 		return -EINVAL;
 	}
@@ -277,8 +312,14 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 			netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
 			goto err_entry;
 		}
+	} else {
+		sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+				mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
 	}
 
+	/* check esn */
+	mlx5e_ipsec_update_esn_state(sa_entry);
+
 	/* create xfrm */
 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
 	sa_entry->xfrm =
@@ -344,6 +385,7 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 		return;
 
 	if (sa_entry->hw_context) {
+		flush_workqueue(sa_entry->ipsec->wq);
 		mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
 		mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 	}
@@ -374,6 +416,12 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 	ipsec->en_priv->ipsec = ipsec;
 	ipsec->no_trailer = !!(mlx5_accel_ipsec_device_caps(priv->mdev) &
 			       MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER);
+	ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0,
+					    priv->netdev->name);
+	if (!ipsec->wq) {
+		kfree(ipsec);
+		return -ENOMEM;
+	}
 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
 	return 0;
 }
@@ -385,6 +433,9 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return;
 
+	drain_workqueue(ipsec->wq);
+	destroy_workqueue(ipsec->wq);
+
 	ida_destroy(&ipsec->halloc);
 	kfree(ipsec);
 	priv->ipsec = NULL;
@@ -405,11 +456,58 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	return true;
 }
 
+struct mlx5e_ipsec_modify_state_work {
+	struct work_struct		work;
+	struct mlx5_accel_esp_xfrm_attrs attrs;
+	struct mlx5e_ipsec_sa_entry	*sa_entry;
+};
+
+static void _update_xfrm_state(struct work_struct *work)
+{
+	int ret;
+	struct mlx5e_ipsec_modify_state_work *modify_work =
+		container_of(work, struct mlx5e_ipsec_modify_state_work, work);
+	struct mlx5e_ipsec_sa_entry *sa_entry = modify_work->sa_entry;
+
+	ret = mlx5_accel_esp_modify_xfrm(sa_entry->xfrm,
+					 &modify_work->attrs);
+	if (ret)
+		netdev_warn(sa_entry->ipsec->en_priv->netdev,
+			    "Not an IPSec offload device\n");
+
+	kfree(modify_work);
+}
+
+static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
+{
+	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
+	struct mlx5e_ipsec_modify_state_work *modify_work;
+	bool need_update;
+
+	if (!sa_entry)
+		return;
+
+	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
+	if (!need_update)
+		return;
+
+	modify_work = kzalloc(sizeof(*modify_work), GFP_ATOMIC);
+	if (!modify_work)
+		return;
+
+	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs);
+	modify_work->sa_entry = sa_entry;
+
+	INIT_WORK(&modify_work->work, _update_xfrm_state);
+	WARN_ON(!queue_work(sa_entry->ipsec->wq, &modify_work->work));
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
 	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
+	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index bffc3ed0574a..1198fc1eba4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -40,7 +40,11 @@
 #include <net/xfrm.h>
 #include <linux/idr.h>
 
+#include "accel/ipsec.h"
+
 #define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_IPSEC_ESN_SCOPE_MID 0x80000000L
+
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
@@ -82,6 +86,25 @@ struct mlx5e_ipsec {
 	struct ida halloc;
 	struct mlx5e_ipsec_sw_stats sw_stats;
 	struct mlx5e_ipsec_stats stats;
+	struct workqueue_struct *wq;
+};
+
+struct mlx5e_ipsec_esn_state {
+	u32 esn;
+	u8 trigger: 1;
+	u8 overlap: 1;
+};
+
+struct mlx5e_ipsec_sa_entry {
+	struct hlist_node hlist; /* Item in SADB_RX hashtable */
+	struct mlx5e_ipsec_esn_state esn_state;
+	unsigned int handle; /* Handle in SADB_RX */
+	struct xfrm_state *x;
+	struct mlx5e_ipsec *ipsec;
+	struct mlx5_accel_esp_xfrm *xfrm;
+	void *hw_context;
+	void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x,
+			  struct xfrm_offload *xo);
 };
 
 void mlx5e_ipsec_build_inverse_table(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 64c549a06678..c245d8e78509 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -176,7 +176,30 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
 	}
 }
 
-static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+			    struct xfrm_offload *xo)
+{
+	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+	__u32 oseq = replay_esn->oseq;
+	int iv_offset;
+	__be64 seqno;
+	u32 seq_hi;
+
+	if (unlikely(skb_is_gso(skb) && oseq < MLX5E_IPSEC_ESN_SCOPE_MID &&
+		     MLX5E_IPSEC_ESN_SCOPE_MID < (oseq - skb_shinfo(skb)->gso_segs))) {
+		seq_hi = xo->seq.hi - 1;
+	} else {
+		seq_hi = xo->seq.hi;
+	}
+
+	/* Place the SN in the IV field */
+	seqno = cpu_to_be64(xo->seq.low + ((u64)seq_hi << 32));
+	iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+	skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+			struct xfrm_offload *xo)
 {
 	int iv_offset;
 	__be64 seqno;
@@ -228,6 +251,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct mlx5e_ipsec_metadata *mdata;
+	struct mlx5e_ipsec_sa_entry *sa_entry;
 	struct xfrm_state *x;
 
 	if (!xo)
@@ -262,7 +286,8 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 		goto drop;
 	}
 	mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
-	mlx5e_ipsec_set_iv(skb, xo);
+	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+	sa_entry->set_iv_op(skb, x, xo);
 	mlx5e_ipsec_set_metadata(skb, mdata, xo);
 
 	return skb;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index e37ae2598dbb..2bfbbef1b054 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -37,6 +37,7 @@
 #ifdef CONFIG_MLX5_EN_IPSEC
 
 #include <linux/skbuff.h>
+#include <net/xfrm.h>
 #include "en.h"
 
 struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
@@ -46,6 +47,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_ipsec_inverse_table_init(void);
 bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
 			       netdev_features_t features);
+void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
+			    struct xfrm_offload *xo);
+void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
+			struct xfrm_offload *xo);
 struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 					  struct mlx5e_tx_wqe *wqe,
 					  struct sk_buff *skb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 7b43fa269117..4f1568528738 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -347,6 +347,11 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
 	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, rx_no_trailer))
 		ret |= MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER;
 
+	if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esn)) {
+		ret |= MLX5_ACCEL_IPSEC_CAP_ESN;
+		ret |= MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN;
+	}
+
 	return ret;
 }
 
@@ -470,6 +475,23 @@ mlx5_fpga_ipsec_build_hw_xfrm(struct mlx5_core_dev *mdev,
 	memcpy(&hw_sa->ipsec_sa_v1.gcm.salt, &aes_gcm->salt,
 	       sizeof(aes_gcm->salt));
 
+	/* esn */
+	if (xfrm_attrs->flags & MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED) {
+		hw_sa->ipsec_sa_v1.flags |= MLX5_FPGA_IPSEC_SA_ESN_EN;
+		hw_sa->ipsec_sa_v1.flags |=
+				(xfrm_attrs->flags &
+				 MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+					MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+		hw_sa->esn = htonl(xfrm_attrs->esn);
+	} else {
+		hw_sa->ipsec_sa_v1.flags &= ~MLX5_FPGA_IPSEC_SA_ESN_EN;
+		hw_sa->ipsec_sa_v1.flags &=
+				~(xfrm_attrs->flags &
+				  MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP) ?
+					MLX5_FPGA_IPSEC_SA_ESN_OVERLAP : 0;
+		hw_sa->esn = 0;
+	}
+
 	/* rx handle */
 	hw_sa->ipsec_sa_v1.sw_sa_handle = htonl(xfrm_attrs->sa_handle);
 
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index 6c694709b0a2..70e7e5673ce9 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -110,6 +110,8 @@ enum mlx5_accel_ipsec_cap {
 	MLX5_ACCEL_IPSEC_CAP_IPV6		= 1 << 3,
 	MLX5_ACCEL_IPSEC_CAP_LSO		= 1 << 4,
 	MLX5_ACCEL_IPSEC_CAP_RX_NO_TRAILER	= 1 << 5,
+	MLX5_ACCEL_IPSEC_CAP_ESN		= 1 << 6,
+	MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN	= 1 << 7,
 };
 
 #ifdef CONFIG_MLX5_ACCEL
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index debcc57de43a..ec052491ba3d 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -468,6 +468,8 @@ struct mlx5_ifc_fpga_ipsec_cmd_cap {
 } __packed;
 
 enum mlx5_ifc_fpga_ipsec_sa_flags {
+	MLX5_FPGA_IPSEC_SA_ESN_EN = BIT(0),
+	MLX5_FPGA_IPSEC_SA_ESN_OVERLAP = BIT(1),
 	MLX5_FPGA_IPSEC_SA_IPV6 = BIT(2),
 	MLX5_FPGA_IPSEC_SA_DIR_SX = BIT(3),
 	MLX5_FPGA_IPSEC_SA_SPI_EN = BIT(4),

From 31135eb3887daf2ed3e88fbefc36243357a9008f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 28 Feb 2018 11:18:13 +0200
Subject: [PATCH 0737/1678] net/mlx5: Fix wrongly assigned CQ reference counter

The kernel compiled with CONFIG_REFCOUNT_FULL produces the following
error. The reason to it that initial value of refcount_t is supposed
to be more than 0, change it.

[    3.106634] ------------[ cut here ]------------
[    3.107756] refcount_t: increment on 0; use-after-free.
[    3.109130] WARNING: CPU: 0 PID: 1 at lib/refcount.c:153 refcount_inc+0x27/0x30
[    3.110085] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1-00028-gf683e04bdccc #137
[    3.110085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
[    3.110085] RIP: 0010:refcount_inc+0x27/0x30
[    3.110085] RSP: 0000:ffffaa620000fba0 EFLAGS: 00010286
[    3.110085] RAX: 0000000000000000 RBX: ffff9a6d1a1821c8 RCX: ffffffff98a50f48
[    3.110085] RDX: 0000000000000001 RSI: 0000000000000086 RDI: 0000000000000246
[    3.110085] RBP: ffff9a6d1ac800a0 R08: 0000000000000289 R09: 000000000000000a
[    3.110085] R10: fffff03bc0682840 R11: ffffffff9949856d R12: ffff9a6d1b4a4000
[    3.110085] R13: 0000000000000000 R14: ffff9a6d1a0a6c00 R15: ffffaa620000fc5c
[    3.110085] FS:  0000000000000000(0000) GS:ffff9a6d1fc00000(0000) knlGS:0000000000000000
[    3.110085] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.110085] CR2: 0000000000000000 CR3: 000000000ba0a000 CR4: 00000000000006b0
[    3.110085] Call Trace:
[    3.110085]  mlx5_core_create_cq+0xde/0x250
[    3.110085]  ? __kmalloc+0x1ce/0x1e0
[    3.110085]  mlx5e_create_cq+0x15c/0x1e0
[    3.110085]  mlx5e_open_drop_rq+0xea/0x190
[    3.110085]  mlx5e_attach_netdev+0x53/0x140
[    3.110085]  mlx5e_attach+0x3d/0x60
[    3.110085]  mlx5e_add+0x11d/0x2f0
[    3.110085]  mlx5_add_device+0x77/0x170
[    3.110085]  mlx5_register_interface+0x74/0xc0
[    3.110085]  ? set_debug_rodata+0x11/0x11
[    3.110085]  init+0x67/0x72
[    3.110085]  ? mlx4_en_init_ptys2ethtool_map+0x346/0x346
[    3.110085]  do_one_initcall+0x98/0x147
[    3.110085]  ? set_debug_rodata+0x11/0x11
[    3.110085]  kernel_init_freeable+0x164/0x1e0
[    3.110085]  ? rest_init+0xb0/0xb0
[    3.110085]  kernel_init+0xa/0x100
[    3.110085]  ret_from_fork+0x35/0x40
[    3.110085] Code: 00 00 00 00 e8 ab ff ff ff 84 c0 74 02 f3 c3 80 3d 3b c3 64 01 00 75 f5 48 c7 c7 68 0b 81 98 c6 05 2b c3 64 01 01 e8 79 d7 a3 ff <0f> ff c3 66 0f 1f 44 00 00 8b 06 83 f8 ff 74 39 31 c9 39 f8 89
[    3.110085] ---[ end trace a0068e1c68438a74 ]---

Fixes: f105b45bf77c ("net/mlx5: CQ hold/put API")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 669ed16938b3..a4179122a279 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -109,8 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
 	cq->eq         = eq;
-	refcount_set(&cq->refcount, 0);
-	mlx5_cq_hold(cq);
+	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
 		cq->comp = mlx5_add_cq_to_tasklet;

From 95da0cdb723260362fc126a563285ac66a193da7 Mon Sep 17 00:00:00 2001
From: Teng Qin <qinteng@fb.com>
Date: Tue, 6 Mar 2018 10:55:01 -0800
Subject: [PATCH 0738/1678] bpf: add support to read sample address in bpf
 program

This commit adds new field "addr" to bpf_perf_event_data which could be
read and used by bpf programs attached to perf events. The value of the
field is copied from bpf_perf_event_data_kern.addr and contains the
address value recorded by specifying sample_type with PERF_SAMPLE_ADDR
when calling perf_event_open.

Signed-off-by: Teng Qin <qinteng@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf_perf_event.h |  1 +
 kernel/trace/bpf_trace.c            | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/include/uapi/linux/bpf_perf_event.h
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
 struct bpf_perf_event_data {
 	bpf_user_pt_regs_t regs;
 	__u64 sample_period;
+	__u64 addr;
 };
 
 #endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c0a9e310d715..c634e093951f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -726,8 +726,7 @@ const struct bpf_prog_ops tracepoint_prog_ops = {
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
-	const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
-					 sample_period);
+	const int size_u64 = sizeof(u64);
 
 	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
 		return false;
@@ -738,8 +737,13 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
 
 	switch (off) {
 	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
-		bpf_ctx_record_field_size(info, size_sp);
-		if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
+			return false;
+		break;
+	case bpf_ctx_range(struct bpf_perf_event_data, addr):
+		bpf_ctx_record_field_size(info, size_u64);
+		if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
 			return false;
 		break;
 	default:
@@ -766,6 +770,14 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct perf_sample_data, period, 8,
 						     target_size));
 		break;
+	case offsetof(struct bpf_perf_event_data, addr):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+						       data), si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, data));
+		*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct perf_sample_data, addr, 8,
+						     target_size));
+		break;
 	default:
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
 						       regs), si->dst_reg, si->src_reg,

From 12fe12253c56a26e591ceefbdf0998b391022003 Mon Sep 17 00:00:00 2001
From: Teng Qin <qinteng@fb.com>
Date: Tue, 6 Mar 2018 10:55:02 -0800
Subject: [PATCH 0739/1678] samples/bpf: add example to test reading address

This commit adds additional test in the trace_event example, by
attaching the bpf program to MEM_UOPS_RETIRED.LOCK_LOADS event with
PERF_SAMPLE_ADDR requested, and print the lock address value read from
the bpf program to trace_pipe.

Signed-off-by: Teng Qin <qinteng@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/trace_event_kern.c |  4 ++++
 samples/bpf/trace_event_user.c | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index a77a583d94d4..7068fbdde951 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -39,6 +39,7 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 {
 	char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
 	char time_fmt2[] = "Get Time Failed, ErrCode: %d";
+	char addr_fmt[] = "Address recorded on event: %llx";
 	char fmt[] = "CPU-%d period %lld ip %llx";
 	u32 cpu = bpf_get_smp_processor_id();
 	struct bpf_perf_event_value value_buf;
@@ -64,6 +65,9 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
 	else
 	  bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
 
+	if (ctx->addr != 0)
+	  bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
+
 	val = bpf_map_lookup_elem(&counts, &key);
 	if (val)
 		(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index bf4f1b6d9a52..56f7a259a7c9 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -215,6 +215,17 @@ static void test_bpf_perf_event(void)
 		/* Intel Instruction Retired */
 		.config = 0xc0,
 	};
+	struct perf_event_attr attr_type_raw_lock_load = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_RAW,
+		/* Intel MEM_UOPS_RETIRED.LOCK_LOADS */
+		.config = 0x21d0,
+		/* Request to record lock address from PEBS */
+		.sample_type = PERF_SAMPLE_ADDR,
+		/* Record address value requires precise event */
+		.precise_ip = 2,
+	};
 
 	printf("Test HW_CPU_CYCLES\n");
 	test_perf_event_all_cpu(&attr_type_hw);
@@ -236,6 +247,10 @@ static void test_bpf_perf_event(void)
 	test_perf_event_all_cpu(&attr_type_raw);
 	test_perf_event_task(&attr_type_raw);
 
+	printf("Test Lock Load\n");
+	test_perf_event_all_cpu(&attr_type_raw_lock_load);
+	test_perf_event_task(&attr_type_raw_lock_load);
+
 	printf("*** PASS ***\n");
 }
 

From ae5799dc7ec77fe0382d58cdcba9f6d1204157ae Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 8 Mar 2018 10:29:30 +0100
Subject: [PATCH 0740/1678] ipvlan: properly annotate rx_handler access

The rx_handler field is rcu-protected, but I forgot to use the
proper accessor while refactoring netif_is_ipvlan_port(). Such
function only check the rx_handler value, so it is safe, but we need
to properly read rx_handler via rcu_access_pointer() to avoid sparse
warnings.

Fixes: 1ec54cb44e67 ("net: unpollute priv_flags space")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index c818b9bdab6e..adb826f55e60 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -180,7 +180,7 @@ int ipvlan_link_register(struct rtnl_link_ops *ops);
 
 static inline bool netif_is_ipvlan_port(const struct net_device *dev)
 {
-	return dev->rx_handler == ipvlan_handle_frame;
+	return rcu_access_pointer(dev->rx_handler) == ipvlan_handle_frame;
 }
 
 #endif /* __IPVLAN_H */

From dcf1bcb6aed7785ccb098163cca5a2f9e4175663 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Thu, 8 Mar 2018 11:17:23 +0100
Subject: [PATCH 0741/1678] selftests/net: enable fragments for
 fib-onlink-tests

We miss CONFIG_* fragments so test fib-onlink-tests.sh can do:
ip li add lisa type vrf table 1101
ip li add veth1 type veth peer name veth2

And the follow message occurs if it isn't enabled:
Configuring interfaces
RTNETLINK answers: Operation not supported

This enables for NET_NRF (and friends) and VETH so we can create a vrf
table and veth.

Fixes: 153e1b84f477 ("selftests: Add FIB onlink tests")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/config | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 7177bea1fdfa..6a75a3ea44ad 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -2,3 +2,8 @@ CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y

From 67ae686b3e1422a819500f35152cdd74f6dab6ce Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Thu, 8 Mar 2018 12:52:25 +0200
Subject: [PATCH 0742/1678] devlink: Change dpipe/resource get privileges

Let dpipe/resource be retrieved by unprivileged users.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1b5bf0d1cee9..f23e5ed7c90f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2744,22 +2744,22 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
 		.doit = devlink_nl_cmd_dpipe_table_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
 		.doit = devlink_nl_cmd_dpipe_entries_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
 		.doit = devlink_nl_cmd_dpipe_headers_get,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
@@ -2779,8 +2779,8 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.cmd = DEVLINK_CMD_RESOURCE_DUMP,
 		.doit = devlink_nl_cmd_resource_dump,
 		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
 	},
 	{
 		.cmd = DEVLINK_CMD_RELOAD,

From f5e084b82783baca0df3c0d27bda2926ceaa1caa Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 8 Mar 2018 19:41:50 +0800
Subject: [PATCH 0743/1678] net: hns3: VF should get the real rss_size instead
 of rss_size_max

VF driver should get the real rss_size which is assigned
by host PF, not rss_size_max.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index f38fc5ce9f51..31383a61d290 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -291,7 +291,7 @@ static int hclge_get_vf_queue_info(struct hclge_vport *vport,
 
 	/* get the queue related info */
 	memcpy(&resp_data[0], &vport->alloc_tqps, sizeof(u16));
-	memcpy(&resp_data[2], &hdev->rss_size_max, sizeof(u16));
+	memcpy(&resp_data[2], &vport->nic.kinfo.rss_size, sizeof(u16));
 	memcpy(&resp_data[4], &hdev->num_desc, sizeof(u16));
 	memcpy(&resp_data[6], &hdev->rx_buf_len, sizeof(u16));
 

From 090e3b5350d995c8b8d93f817dccd8c1ce9952c5 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 8 Mar 2018 19:41:51 +0800
Subject: [PATCH 0744/1678] net: hns3: set the cmdq out_vld bit to 0 after used

Driver check the out_vld bit when get a new cmdq BD, if the bit is 1,
the BD is valid. driver Should set the bit 0 after used and hw will
set the bit 1 if get a valid BD.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c   | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 31383a61d290..6d48ebfcc509 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -410,6 +410,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 				req->msg[0]);
 			break;
 		}
+		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index e39cad285fa9..18283ef4ce81 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -171,6 +171,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 				req->msg[0]);
 			break;
 		}
+		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
 		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	}

From f18f0d4d68096ff73eea93c80b1fe5df5514b07f Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 8 Mar 2018 19:41:52 +0800
Subject: [PATCH 0745/1678] net: hns3: fix endian issue when PF get mbx message
 flag

This patch fixes the endian issue when PF get mbx message flag.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 6d48ebfcc509..f332de6004fb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -333,11 +333,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclge_vport *vport;
 	struct hclge_desc *desc;
-	int ret;
+	int ret, flag;
 
+	flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	/* handle all the mailbox requests in the queue */
-	while (hnae_get_bit(crq->desc[crq->next_to_use].flag,
-			    HCLGE_CMDQ_RX_OUTVLD_B)) {
+	while (hnae_get_bit(flag, HCLGE_CMDQ_RX_OUTVLD_B)) {
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
@@ -412,6 +412,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 		}
 		crq->desc[crq->next_to_use].flag = 0;
 		hclge_mbx_ring_ptr_move_crq(crq);
+		flag = le16_to_cpu(crq->desc[crq->next_to_use].flag);
 	}
 
 	/* Write back CMDQ_RQ header pointer, M7 need this pointer */

From 814e0274fd830c1611715ab2c1ba4a7bdb97121b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 8 Mar 2018 19:41:53 +0800
Subject: [PATCH 0746/1678] net: hns3: fix the queue id for tqp enable&&reset

Command HCLGE_OPC_CFG_COM_TQP_QUEUE should use queue id in the
function, but command HCLGE_OPC_RESET_TQP_QUEUE should use global
queue id.
This patch fixes the queue id about queue enable/disable/reset.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 50 +++++++++----------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 32bc6f68e297..39bc0e6d6ab4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3717,20 +3717,11 @@ static int hclge_ae_start(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i, queue_id, ret;
+	int i, ret;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
-		/* todo clear interrupt */
-		/* ring enable */
-		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	for (i = 0; i < vport->alloc_tqps; i++)
+		hclge_tqp_enable(hdev, i, 0, true);
 
-		hclge_tqp_enable(hdev, queue_id, 0, true);
-	}
 	/* mac enable */
 	hclge_cfg_mac_mode(hdev, true);
 	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
@@ -3750,19 +3741,11 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i, queue_id;
+	int i;
 
-	for (i = 0; i < vport->alloc_tqps; i++) {
-		/* Ring disable */
-		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
-		if (queue_id < 0) {
-			dev_warn(&hdev->pdev->dev,
-				 "Get invalid queue id, ignore it\n");
-			continue;
-		}
+	for (i = 0; i < vport->alloc_tqps; i++)
+		hclge_tqp_enable(hdev, i, 0, false);
 
-		hclge_tqp_enable(hdev, queue_id, 0, false);
-	}
 	/* Mac disable */
 	hclge_cfg_mac_mode(hdev, false);
 
@@ -4848,21 +4831,36 @@ static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
 	return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
 }
 
+static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
+					  u16 queue_id)
+{
+	struct hnae3_queue *queue;
+	struct hclge_tqp *tqp;
+
+	queue = handle->kinfo.tqp[queue_id];
+	tqp = container_of(queue, struct hclge_tqp, q);
+
+	return tqp->index;
+}
+
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	int reset_try_times = 0;
 	int reset_status;
+	u16 queue_gid;
 	int ret;
 
+	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
+
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
 		return;
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, true);
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev,
 			 "Send reset tqp cmd fail, ret = %d\n", ret);
@@ -4873,7 +4871,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 		/* Wait for tqp hw reset */
 		msleep(20);
-		reset_status = hclge_get_reset_status(hdev, queue_id);
+		reset_status = hclge_get_reset_status(hdev, queue_gid);
 		if (reset_status)
 			break;
 	}
@@ -4883,7 +4881,7 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 		return;
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, false);
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
 	if (ret) {
 		dev_warn(&hdev->pdev->dev,
 			 "Deassert the soft reset fail, ret = %d\n", ret);

From 678335a123e98e5af8ec50e04f16b59643e2af81 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 8 Mar 2018 19:41:54 +0800
Subject: [PATCH 0747/1678] net: hns3: set the max ring num when alloc netdev

HNS3 driver should alloc netdev with max support ring num, as
driver support change netdev count by ethtool -L.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 601b6295d3f8..c93694510027 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -249,6 +249,16 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev)
 	return 0;
 }
 
+static u16 hns3_get_max_available_channels(struct hnae3_handle *h)
+{
+	u16 free_tqps, max_rss_size, max_tqps;
+
+	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
+	max_tqps = h->kinfo.num_tc * max_rss_size;
+
+	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
+}
+
 static int hns3_nic_net_up(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -3013,7 +3023,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	int ret;
 
 	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
-				   handle->kinfo.num_tqps);
+				   hns3_get_max_available_channels(handle));
 	if (!netdev)
 		return -ENOMEM;
 
@@ -3336,17 +3346,6 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
 	return ret;
 }
 
-static u16 hns3_get_max_available_channels(struct net_device *netdev)
-{
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-	u16 free_tqps, max_rss_size, max_tqps;
-
-	h->ae_algo->ops->get_tqps_and_rss_info(h, &free_tqps, &max_rss_size);
-	max_tqps = h->kinfo.num_tc * max_rss_size;
-
-	return min_t(u16, max_tqps, (free_tqps + h->kinfo.num_tqps));
-}
-
 static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -3397,12 +3396,12 @@ int hns3_set_channels(struct net_device *netdev,
 	if (ch->rx_count || ch->tx_count)
 		return -EINVAL;
 
-	if (new_tqp_num > hns3_get_max_available_channels(netdev) ||
+	if (new_tqp_num > hns3_get_max_available_channels(h) ||
 	    new_tqp_num < kinfo->num_tc) {
 		dev_err(&netdev->dev,
 			"Change tqps fail, the tqp range is from %d to %d",
 			kinfo->num_tc,
-			hns3_get_max_available_channels(netdev));
+			hns3_get_max_available_channels(h));
 		return -EINVAL;
 	}
 

From cc719218e9539720160046a49369dc44dce1889b Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Thu, 8 Mar 2018 19:41:55 +0800
Subject: [PATCH 0748/1678] net: hns3: add support for VF driver inner
 interface hclgevf_ops.get_tqps_and_rss_info

This patch adds support for VF driver inner interface
hclgevf_ops.get_tqps_and_rss_info. This interface will be
used in the initialization process.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 0d89965f7928..c9a916a3d6e8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1447,6 +1447,15 @@ static void hclgevf_get_channels(struct hnae3_handle *handle,
 	ch->combined_count = hdev->num_tqps;
 }
 
+static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle,
+					  u16 *free_tqps, u16 *max_rss_size)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	*free_tqps = 0;
+	*max_rss_size = hdev->rss_size_max;
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
@@ -1477,6 +1486,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_fw_version = hclgevf_get_fw_version,
 	.set_vlan_filter = hclgevf_set_vlan_filter,
 	.get_channels = hclgevf_get_channels,
+	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
 };
 
 static struct hnae3_ae_algo ae_algovf = {

From 459d153d9916ea48b1550bbb6f2959dc03bff011 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Tue, 6 Mar 2018 18:11:14 +0100
Subject: [PATCH 0749/1678] net/sched: cls_flower: Add support to handle first
 frag as match field

Allow setting firstfrag as matching option in tc flower classifier.

 # tc filter add dev eth0 protocol ip parent ffff: \
     flower indev eth0 \
        ip_flags firstfrag
     action mirred egress redirect dev eth1

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 1 +
 net/sched/cls_flower.c       | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7cafb26df555..be05e66c167b 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -475,6 +475,7 @@ enum {
 
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
 };
 
 /* Match-all classifier */
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7d0ce2c40f93..d964e60c730e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -511,6 +511,9 @@ static int fl_set_key_flags(struct nlattr **tb,
 
 	fl_set_key_flag(key, mask, flags_key, flags_mask,
 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+	fl_set_key_flag(key, mask, flags_key, flags_mask,
+			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+			FLOW_DIS_FIRST_FRAG);
 
 	return 0;
 }
@@ -1130,6 +1133,9 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 
 	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
 			TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+	fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+			TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
+			FLOW_DIS_FIRST_FRAG);
 
 	_key = cpu_to_be32(key);
 	_mask = cpu_to_be32(mask);

From 997266a4a02de882de11c7abecad0a3a42ac84b3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:39:06 +0300
Subject: [PATCH 0750/1678] net: Convert ip6 tables pernet_operations

The pernet_operations:

    ip6table_filter_net_ops
    ip6table_mangle_net_ops
    ip6table_nat_net_ops
    ip6table_raw_net_ops
    ip6table_security_net_ops

have exit methods, which call ip6t_unregister_table().
ip6table_filter_net_ops has init method registering
filter table.

Since there must not be in-flight ipv6 packets at the time
of pernet_operations execution and since pernet_operations
don't send ipv6 packets each other, these pernet_operations
are safe to be async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6table_filter.c   | 1 +
 net/ipv6/netfilter/ip6table_mangle.c   | 1 +
 net/ipv6/netfilter/ip6table_nat.c      | 1 +
 net/ipv6/netfilter/ip6table_raw.c      | 1 +
 net/ipv6/netfilter/ip6table_security.c | 1 +
 5 files changed, 5 insertions(+)

diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 1343077dde93..06561c84c0bc 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -87,6 +87,7 @@ static void __net_exit ip6table_filter_net_exit(struct net *net)
 static struct pernet_operations ip6table_filter_net_ops = {
 	.init = ip6table_filter_net_init,
 	.exit = ip6table_filter_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_filter_init(void)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b0524b18c4fb..a11e25936b45 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -107,6 +107,7 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_mangle_net_ops = {
 	.exit = ip6table_mangle_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_mangle_init(void)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 47306e45a80a..4475fd300bb6 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -131,6 +131,7 @@ static void __net_exit ip6table_nat_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_nat_net_ops = {
 	.exit	= ip6table_nat_net_exit,
+	.async	= true,
 };
 
 static int __init ip6table_nat_init(void)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 710fa0806c37..a88f3b1995b1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -75,6 +75,7 @@ static void __net_exit ip6table_raw_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_raw_net_ops = {
 	.exit = ip6table_raw_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_raw_init(void)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index cf26ccb04056..320048c008dc 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -74,6 +74,7 @@ static void __net_exit ip6table_security_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_security_net_ops = {
 	.exit = ip6table_security_net_exit,
+	.async = true,
 };
 
 static int __init ip6table_security_init(void)

From 649b9826cc733fe410207d28d94984354e023b21 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:39:14 +0300
Subject: [PATCH 0751/1678] net: Convert xfrm_user_net_ops

These pernet_operations create and destroy net::xfrm::nlsk
socket of NETLINK_XFRM. There is only entry point, where
it's dereferenced, it's xfrm_user_rcv_msg(). There is no
in-kernel senders to this socket.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7f52b8eb177d..aff2e84ec761 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3258,6 +3258,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
 static struct pernet_operations xfrm_user_net_ops = {
 	.init	    = xfrm_user_net_init,
 	.exit_batch = xfrm_user_net_exit,
+	.async	    = true,
 };
 
 static int __init xfrm_user_init(void)

From c7c5e435e44e585f84472f88b323553c794844c4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:39:23 +0300
Subject: [PATCH 0752/1678] net: Convert nf_tables_net_ops

These pernet_operations looks nicely separated per-net.
Exit method unregisters net's nf tables objects.
We allow them be executed in parallel.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_tables_api.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 558593e6a0a3..8e19c86d1aa6 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6596,6 +6596,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 static struct pernet_operations nf_tables_net_ops = {
 	.init	= nf_tables_init_net,
 	.exit	= nf_tables_exit_net,
+	.async	= true,
 };
 
 static int __init nf_tables_module_init(void)

From 5a8e9be69d163fcd3442c4ed8fbaaaf0c7f464e6 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:39:33 +0300
Subject: [PATCH 0753/1678] net: Convert nfnetlink_net_ops

These pernet_operations create and destroy net::nfnl
socket of NETLINK_NETFILTER code. There are no other
places, where such type the socket is created, except
these pernet_operations. It seem other pernet_operations
depending on CONFIG_NETFILTER_NETLINK send messages
to this socket. So, we mark it async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 03ead8a9e90c..84fc4954862d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -566,6 +566,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations nfnetlink_net_ops = {
 	.init		= nfnetlink_net_init,
 	.exit_batch	= nfnetlink_net_exit_batch,
+	.async		= true,
 };
 
 static int __init nfnetlink_init(void)

From cf51503a03f7ff89d8152a3132843330be90729e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:39:42 +0300
Subject: [PATCH 0754/1678] net: Convert nfnl_acct_ops

These pernet_operations look closed in themself,
and there are no other users of net::nfnl_acct_list
outside. They are safe to be executed for several
net namespaces in parallel.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_acct.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 88d427f9f9e6..8d9f18bb8840 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -515,6 +515,7 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
 static struct pernet_operations nfnl_acct_ops = {
         .init   = nfnl_acct_net_init,
         .exit   = nfnl_acct_net_exit,
+	.async	= true,
 };
 
 static int __init nfnl_acct_init(void)

From ffdf72bc1eed9a56aa266bde2d425f85b4d4ca18 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:39:51 +0300
Subject: [PATCH 0755/1678] net: Convert cttimeout_ops

These pernet_operations also look closed in themself.
Exit method touch only per-net structures, so it's
safe to execute them for several net namespaces in parallel.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_cttimeout.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 95b04702a655..6819300f7fb7 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -586,6 +586,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 static struct pernet_operations cttimeout_ops = {
 	.init	= cttimeout_net_init,
 	.exit	= cttimeout_net_exit,
+	.async	= true,
 };
 
 static int __init cttimeout_init(void)

From 74f26bbf505a8e1bb9a6ad9726d5bbe625607783 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:00 +0300
Subject: [PATCH 0756/1678] net: Convert nfnl_log_net_ops

These pernet_operations create and destroy /proc entries.
Also, exit method unsets nfulnl_logger. The logger is not
set by default, and it becomes bound via userspace request.
So, they look safe to be made async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 7b46aa4c478d..b21ef79849a1 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1108,6 +1108,7 @@ static struct pernet_operations nfnl_log_net_ops = {
 	.exit	= nfnl_log_net_exit,
 	.id	= &nfnl_log_net_id,
 	.size	= sizeof(struct nfnl_log_net),
+	.async	= true,
 };
 
 static int __init nfnetlink_log_init(void)

From bd54dce0796522b669f5be45b96e02fa94738de9 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:09 +0300
Subject: [PATCH 0757/1678] net: Convert nfnl_queue_net_ops

These pernet_operations register and unregister net::nf::queue_handler
and /proc entry. The handler is accessed only under RCU, so this looks
safe to convert them.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_queue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8bba23160a68..59e2833c17f1 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1528,6 +1528,7 @@ static struct pernet_operations nfnl_queue_net_ops = {
 	.exit_batch	= nfnl_queue_net_exit_batch,
 	.id		= &nfnl_queue_net_id,
 	.size		= sizeof(struct nfnl_queue_net),
+	.async		= true,
 };
 
 static int __init nfnetlink_queue_init(void)

From 59d269731e2bcfb72d29f2e0281d6631aef1ff8e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:19 +0300
Subject: [PATCH 0758/1678] net: Convert pg_net_ops

These pernet_operations create per-net pktgen threads
and /proc entries. These pernet subsys looks closed
in itself, and there are no pernet_operations outside
this file, which are interested in the threads.
Init and/or exit methods look safe to be executed
in parallel.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b8ab5c829511..d81bddd1bb80 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3851,6 +3851,7 @@ static struct pernet_operations pg_net_ops = {
 	.exit = pg_net_exit,
 	.id   = &pg_net_id,
 	.size = sizeof(struct pktgen_net),
+	.async = true,
 };
 
 static int __init pg_init(void)

From 93623f2b00297f0eb4437fc2e47ad0d4fd58a3ad Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:28 +0300
Subject: [PATCH 0759/1678] net: Convert arptable_filter_net_ops

These pernet_operations unregister net::ipv4::arptable_filter.
Another net/pernet_operations do not send arp packets to foreign
net namespaces. So, we mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arptable_filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 8f8713b4388f..49c2490193ae 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -65,6 +65,7 @@ static void __net_exit arptable_filter_net_exit(struct net *net)
 
 static struct pernet_operations arptable_filter_net_ops = {
 	.exit = arptable_filter_net_exit,
+	.async = true,
 };
 
 static int __init arptable_filter_init(void)

From 7ba81869d1f6f985e64031e161b4c009cc15be99 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:36 +0300
Subject: [PATCH 0760/1678] net: Convert iptable_mangle_net_ops

These pernet_operations unregister net::ipv4::iptable_mangle table.
Another net/pernet_operations do not send ipv4 packets to foreign
net namespaces. So, we mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_mangle.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index dea138ca8925..f6074059531a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -113,6 +113,7 @@ static void __net_exit iptable_mangle_net_exit(struct net *net)
 
 static struct pernet_operations iptable_mangle_net_ops = {
 	.exit = iptable_mangle_net_exit,
+	.async = true,
 };
 
 static int __init iptable_mangle_init(void)

From 06a8a67b5dac661a0f0b865926ceab2e62d512cd Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:45 +0300
Subject: [PATCH 0761/1678] net: Convert iptable_nat_net_ops

These pernet_operations unregister net::ipv4::nat_table table.
Another net/pernet_operations do not send ipv4 packets to foreign
net namespaces. So, we mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_nat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0f7255cc65ee..b771af74be79 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -129,6 +129,7 @@ static void __net_exit iptable_nat_net_exit(struct net *net)
 
 static struct pernet_operations iptable_nat_net_ops = {
 	.exit	= iptable_nat_net_exit,
+	.async	= true,
 };
 
 static int __init iptable_nat_init(void)

From 65f828c35261c46d848e9d789ccf29c2fe7127a8 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:40:58 +0300
Subject: [PATCH 0762/1678] net: Convert iptable_raw_net_ops

These pernet_operations unregister net::ipv4::iptable_raw table.
Another net/pernet_operations do not send ipv4 packets to foreign
net namespaces. So, we mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_raw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 960625aabf04..963753e50842 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -76,6 +76,7 @@ static void __net_exit iptable_raw_net_exit(struct net *net)
 
 static struct pernet_operations iptable_raw_net_ops = {
 	.exit = iptable_raw_net_exit,
+	.async = true,
 };
 
 static int __init iptable_raw_init(void)

From 8dbc6e2eaeccccecd23888bd14c10a2c384c280f Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:41:07 +0300
Subject: [PATCH 0763/1678] net: Convert iptable_security_net_ops

These pernet_operations unregister net::ipv4::iptable_security table.
Another net/pernet_operations do not send ipv4 packets to foreign
net namespaces. So, we mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/iptable_security.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index e5379fe57b64..c40d6b3d8b6a 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -76,6 +76,7 @@ static void __net_exit iptable_security_net_exit(struct net *net)
 
 static struct pernet_operations iptable_security_net_ops = {
 	.exit = iptable_security_net_exit,
+	.async = true,
 };
 
 static int __init iptable_security_init(void)

From e8a95ad46378162f22c9293bde276d7c4030f31e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:41:16 +0300
Subject: [PATCH 0764/1678] net: Convert ipv4_net_ops

These pernet_operations register and unregister bunch
of nf_conntrack_l4proto. Exit method unregisters related
sysctl, init method calls init_net and get_net_proto.
The whole builtin_l4proto4 array has pretty simple
init_net and get_net_proto methods. The first one register
sysctl table, the second one is just RO memory dereference.
So, these pernet_operations are safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index b50721d9d30e..6531f69db010 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -399,6 +399,7 @@ static struct pernet_operations ipv4_net_ops = {
 	.exit = ipv4_net_exit,
 	.id = &conntrack4_net_id,
 	.size = sizeof(struct conntrack4_net),
+	.async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv4_init(void)

From 1fd2c55705aebac31bf8f759c0750dc3c796cf47 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 7 Mar 2018 12:41:23 +0300
Subject: [PATCH 0765/1678] net: Convet ipv6_net_ops

These pernet_operations are similar to ipv4_net_ops.
They are safe to be async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 663827ee3cf8..ba54bb3bd1e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,7 @@ static struct pernet_operations ipv6_net_ops = {
 	.exit = ipv6_net_exit,
 	.id = &conntrack6_net_id,
 	.size = sizeof(struct conntrack6_net),
+	.async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv6_init(void)

From 46e371f0e78a82186a83cbcb4f4b8850417c7dd5 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Wed, 7 Mar 2018 15:38:48 -0800
Subject: [PATCH 0766/1678] openvswitch: fix vport packet length check.

When sending a packet to a tunnel device, the dev's hard_header_len
could be larger than the skb->len in function packet_length().
In the case of ip6gretap/erspan, hard_header_len = LL_MAX_HEADER + t_hlen,
which is around 180, and an ARP packet sent to this tunnel has
skb->len = 42.  This causes the 'unsign int length' to become super
large because it is negative value, causing the later ovs_vport_send
to drop it due to over-mtu size.  The patch fixes it by setting it to 0.

Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index b6c8524032a0..f81c1d0ddff4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -464,10 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	return 0;
 }
 
-static unsigned int packet_length(const struct sk_buff *skb,
-				  struct net_device *dev)
+static int packet_length(const struct sk_buff *skb,
+			 struct net_device *dev)
 {
-	unsigned int length = skb->len - dev->hard_header_len;
+	int length = skb->len - dev->hard_header_len;
 
 	if (!skb_vlan_tag_present(skb) &&
 	    eth_type_vlan(skb->protocol))
@@ -478,7 +478,7 @@ static unsigned int packet_length(const struct sk_buff *skb,
 	 * account for 802.1ad. e.g. is_skb_forwardable().
 	 */
 
-	return length;
+	return length > 0 ? length : 0;
 }
 
 void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)

From f873866a05cc4ca718e4f2fb6f2601a7869f3e12 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Wed, 7 Mar 2018 17:51:45 -0600
Subject: [PATCH 0767/1678] ibmvnic: Clean up device close

Remove some dead code now that RX pools are being cleaned. This
was included to wait until any pending RX queue interrupts are
processed, but NAPI polling should be disabled by this point.

Another minor change is to use the net device parameter for any
print functions instead of accessing it from the adapter structure.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 765407179fdd..fca0533d517f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1162,7 +1162,7 @@ static int __ibmvnic_close(struct net_device *netdev)
 	if (adapter->tx_scrq) {
 		for (i = 0; i < adapter->req_tx_queues; i++)
 			if (adapter->tx_scrq[i]->irq) {
-				netdev_dbg(adapter->netdev,
+				netdev_dbg(netdev,
 					   "Disabling tx_scrq[%d] irq\n", i);
 				disable_irq(adapter->tx_scrq[i]->irq);
 			}
@@ -1174,18 +1174,8 @@ static int __ibmvnic_close(struct net_device *netdev)
 
 	if (adapter->rx_scrq) {
 		for (i = 0; i < adapter->req_rx_queues; i++) {
-			int retries = 10;
-
-			while (pending_scrq(adapter, adapter->rx_scrq[i])) {
-				retries--;
-				mdelay(100);
-
-				if (retries == 0)
-					break;
-			}
-
 			if (adapter->rx_scrq[i]->irq) {
-				netdev_dbg(adapter->netdev,
+				netdev_dbg(netdev,
 					   "Disabling rx_scrq[%d] irq\n", i);
 				disable_irq(adapter->rx_scrq[i]->irq);
 			}

From 01d9bd792d1603761dcc2644ccf4cc35a4658ab6 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Wed, 7 Mar 2018 17:51:46 -0600
Subject: [PATCH 0768/1678] ibmvnic: Reorganize device close

Introduce a function to halt network operations and clean up any
unused or outstanding socket buffers. Then, during device close,
disable backing adapter before halting all queues and performing
cleanup. This ensures all backing device operations will be
stopped before the driver cleans up shared resources.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index fca0533d517f..d93f28658a2c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1143,14 +1143,11 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 	}
 }
 
-static int __ibmvnic_close(struct net_device *netdev)
+static void ibmvnic_cleanup(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	int rc = 0;
 	int i;
 
-	adapter->state = VNIC_CLOSING;
-
 	/* ensure that transmissions are stopped if called by do_reset */
 	if (adapter->resetting)
 		netif_tx_disable(netdev);
@@ -1168,10 +1165,6 @@ static int __ibmvnic_close(struct net_device *netdev)
 			}
 	}
 
-	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
-	if (rc)
-		return rc;
-
 	if (adapter->rx_scrq) {
 		for (i = 0; i < adapter->req_rx_queues; i++) {
 			if (adapter->rx_scrq[i]->irq) {
@@ -1183,8 +1176,20 @@ static int __ibmvnic_close(struct net_device *netdev)
 	}
 	clean_rx_pools(adapter);
 	clean_tx_pools(adapter);
+}
+
+static int __ibmvnic_close(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	adapter->state = VNIC_CLOSING;
+	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
+	if (rc)
+		return rc;
+	ibmvnic_cleanup(netdev);
 	adapter->state = VNIC_CLOSED;
-	return rc;
+	return 0;
 }
 
 static int ibmvnic_close(struct net_device *netdev)

From 18b8d6bbad47efd9bd1595c57271fbacc5eb0d46 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Wed, 7 Mar 2018 17:51:47 -0600
Subject: [PATCH 0769/1678] ibmvnic: Do not disable device during failover or
 partition migration

During a device failover or partition migration reset, it is not
necessary to disable the backing adapter since it should not be
running yet and its Command-Response Queue is closed. Sending
device commands during this time could result in an error or
timeout disrupting the reset process. In these cases, just halt
transmissions, clean up resources, and continue with reset.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index d93f28658a2c..7be4b06f69d2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1653,12 +1653,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		rc = ibmvnic_reenable_crq_queue(adapter);
 		if (rc)
 			return 0;
+		ibmvnic_cleanup(netdev);
+	} else if (rwi->reset_reason == VNIC_RESET_FAILOVER) {
+		ibmvnic_cleanup(netdev);
+	} else {
+		rc = __ibmvnic_close(netdev);
+		if (rc)
+			return rc;
 	}
 
-	rc = __ibmvnic_close(netdev);
-	if (rc)
-		return rc;
-
 	if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM ||
 	    adapter->wait_for_reset) {
 		release_resources(adapter);

From fcbedd0f29692c0aea3a32d41c8739de7420f140 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Wed, 7 Mar 2018 22:12:24 -0800
Subject: [PATCH 0770/1678] liquidio: Resolved mbox read issue while reading
 more than one 64bit data

Corrected length check when data received in the mbox is more than one
64 bit data value

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
index 57af7df74ced..28e74ee23ff8 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -87,7 +87,7 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
 	}
 
 	if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
-		if (mbox->mbox_req.recv_len < msg.s.len) {
+		if (mbox->mbox_req.recv_len < mbox->mbox_req.msg.s.len) {
 			ret = 0;
 		} else {
 			mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVING;
@@ -96,7 +96,8 @@ int octeon_mbox_read(struct octeon_mbox *mbox)
 		}
 	} else {
 		if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
-			if (mbox->mbox_resp.recv_len < msg.s.len) {
+			if (mbox->mbox_resp.recv_len <
+			    mbox->mbox_resp.msg.s.len) {
 				ret = 0;
 			} else {
 				mbox->state &=

From cecd8d81ac5a7551d70c331b7f4ef7eb2609f336 Mon Sep 17 00:00:00 2001
From: Prasad Kanneganti <prasad.kanneganti@cavium.com>
Date: Wed, 7 Mar 2018 22:23:32 -0800
Subject: [PATCH 0771/1678] liquidio: avoid doing useless work

Avoid doing useless work by making sure that the response_list is not empty
before scheduling work to process it.

Signed-off-by: Prasad Kanneganti <prasad.kanneganti@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/request_manager.c  | 5 +++++
 drivers/net/ethernet/cavium/liquidio/response_manager.c | 6 ++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index e07d2093b971..2766af05b89e 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -366,6 +366,7 @@ int
 lio_process_iq_request_list(struct octeon_device *oct,
 			    struct octeon_instr_queue *iq, u32 napi_budget)
 {
+	struct cavium_wq *cwq = &oct->dma_comp_wq;
 	int reqtype;
 	void *buf;
 	u32 old = iq->flush_index;
@@ -450,6 +451,10 @@ lio_process_iq_request_list(struct octeon_device *oct,
 						   bytes_compl);
 	iq->flush_index = old;
 
+	if (atomic_read(&oct->response_list
+			[OCTEON_ORDERED_SC_LIST].pending_req_count))
+		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
+
 	return inst_count;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 3d691c69f74d..fe5b53700576 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -49,7 +49,6 @@ int octeon_setup_response_list(struct octeon_device *oct)
 	INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion);
 	cwq->wk.ctxptr = oct;
 	oct->cmd_resp_state = OCT_DRV_ONLINE;
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
 
 	return ret;
 }
@@ -164,5 +163,8 @@ static void oct_poll_req_completion(struct work_struct *work)
 	struct cavium_wq *cwq = &oct->dma_comp_wq;
 
 	lio_process_ordered_list(oct, 0);
-	queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50));
+
+	if (atomic_read(&oct->response_list
+			[OCTEON_ORDERED_SC_LIST].pending_req_count))
+		queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(1));
 }

From 50db64b090d0f457a3266fe80e2c841eba621b9d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 8 Mar 2018 12:36:04 +0300
Subject: [PATCH 0772/1678] net/ncsi: use kfree_skb() instead of kfree()

We're supposed to use kfree_skb() to free these sk_buffs.

Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ncsi/ncsi-netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index d4201665a580..b73239b76349 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -183,7 +183,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
 	hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
 			  &ncsi_genl_family, 0, NCSI_CMD_PKG_INFO);
 	if (!hdr) {
-		kfree(skb);
+		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
 
@@ -204,7 +204,7 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
 
 err:
 	genlmsg_cancel(skb, hdr);
-	kfree(skb);
+	kfree_skb(skb);
 	return rc;
 }
 

From 054f34da60dfa46457ca39dd111bb2b393575dab Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 8 Mar 2018 12:36:28 +0300
Subject: [PATCH 0773/1678] net/ncsi: unlock on error in
 ncsi_set_interface_nl()

There are two error paths which are missing unlocks in this function.

Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ncsi/ncsi-netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index b73239b76349..05fcfb4fbe1d 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -299,6 +299,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
 			package = np;
 	if (!package) {
 		/* The user has set a package that does not exist */
+		spin_unlock_irqrestore(&ndp->lock, flags);
 		return -ERANGE;
 	}
 
@@ -317,6 +318,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
 		/* The user has set a channel that does not exist on this
 		 * package
 		 */
+		spin_unlock_irqrestore(&ndp->lock, flags);
 		netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
 			    channel_id);
 		return -ERANGE;

From 496c7f3caed5c56ba4ab6767a9cf7e7aa8bd8f41 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Thu, 8 Mar 2018 18:56:14 +0800
Subject: [PATCH 0774/1678] rds: rds_message_zcopy_from_user() can be static

Fixes: d40a126b16ea ("rds: refactor zcopy code into rds_message_zcopy_from_user")
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/message.c b/net/rds/message.c
index 90dcdcfe9f62..3c610d5fe7ae 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -355,7 +355,7 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 	return rm;
 }
 
-int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
+static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
 	unsigned long sg_off;
 	struct scatterlist *sg;

From 571e6776add4f499661e761e03e46ec0f6d66243 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Thu, 8 Mar 2018 19:37:30 +0800
Subject: [PATCH 0775/1678] rds: rds_info_from_znotifier() can be static

Fixes: 9426bbc6de99 ("rds: use list structure to track information for zerocopy completion notification")
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/message.c b/net/rds/message.c
index 3c610d5fe7ae..4462e4c9bd7d 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -67,7 +67,7 @@ static inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie)
 	return true;
 }
 
-struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
+static struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif)
 {
 	return container_of(znotif, struct rds_msg_zcopy_info, znotif);
 }

From 84a1d9c4820080bebcbd413a845076dcb62f45fa Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 8 Mar 2018 15:45:03 +0000
Subject: [PATCH 0776/1678] net: ethtool: extend RXNFC API to support RSS
 spreading of filter matches

We use a two-step process to configure a filter with RSS spreading.  First,
 the RSS context is allocated and configured using ETHTOOL_SRSSH; this
 returns an identifier (rss_context) which can then be passed to subsequent
 invocations of ETHTOOL_SRXCLSRLINS to specify that the offset from the RSS
 indirection table lookup should be added to the queue number (ring_cookie)
 when delivering the packet.  Drivers for devices which can only use the
 indirection table entry directly (not add it to a base queue number)
 should reject rule insertions combining RSS with a nonzero ring_cookie.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  5 +++
 include/uapi/linux/ethtool.h | 32 ++++++++++++++----
 net/core/ethtool.c           | 64 +++++++++++++++++++++++++++---------
 3 files changed, 80 insertions(+), 21 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2ec41a7eb54f..ebe41811ed34 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -371,6 +371,11 @@ struct ethtool_ops {
 			    u8 *hfunc);
 	int	(*set_rxfh)(struct net_device *, const u32 *indir,
 			    const u8 *key, const u8 hfunc);
+	int	(*get_rxfh_context)(struct net_device *, u32 *indir, u8 *key,
+				    u8 *hfunc, u32 rss_context);
+	int	(*set_rxfh_context)(struct net_device *, const u32 *indir,
+				    const u8 *key, const u8 hfunc,
+				    u32 *rss_context, bool delete);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 44a0b675a6bc..20da156aaf64 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -914,12 +914,15 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW
  * @data: Command-dependent value
  * @fs: Flow classification rule
+ * @rss_context: RSS context to be affected
  * @rule_cnt: Number of rules to be affected
  * @rule_locs: Array of used rule locations
  *
  * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating
  * the fields included in the flow hash, e.g. %RXH_IP_SRC.  The following
- * structure fields must not be used.
+ * structure fields must not be used, except that if @flow_type includes
+ * the %FLOW_RSS flag, then @rss_context determines which RSS context to
+ * act on.
  *
  * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues
  * on return.
@@ -931,7 +934,9 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
- * existing rule on entry and @fs contains the rule on return.
+ * existing rule on entry and @fs contains the rule on return; if
+ * @fs.@flow_type includes the %FLOW_RSS flag, then @rss_context is
+ * filled with the RSS context ID associated with the rule.
  *
  * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the
  * user buffer for @rule_locs on entry.  On return, @data is the size
@@ -942,7 +947,11 @@ static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
  * @fs.@location either specifies the location to use or is a special
  * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
- * @fs.@location is the actual rule location.
+ * @fs.@location is the actual rule location.  If @fs.@flow_type
+ * includes the %FLOW_RSS flag, @rss_context is the RSS context ID to
+ * use for flow spreading traffic which matches this rule.  The value
+ * from the rxfh indirection table will be added to @fs.@ring_cookie
+ * to choose which ring to deliver to.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
@@ -963,7 +972,10 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	__u64				data;
 	struct ethtool_rx_flow_spec	fs;
-	__u32				rule_cnt;
+	union {
+		__u32			rule_cnt;
+		__u32			rss_context;
+	};
 	__u32				rule_locs[0];
 };
 
@@ -990,7 +1002,11 @@ struct ethtool_rxfh_indir {
 /**
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
- * @rss_context: RSS context identifier.
+ * @rss_context: RSS context identifier.  Context 0 is the default for normal
+ *	traffic; other contexts can be referenced as the destination for RX flow
+ *	classification rules.  %ETH_RXFH_CONTEXT_ALLOC is used with command
+ *	%ETHTOOL_SRSSH to allocate a new RSS context; on return this field will
+ *	contain the ID of the newly allocated context.
  * @indir_size: On entry, the array size of the user buffer for the
  *	indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
  *	%ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
@@ -1009,7 +1025,8 @@ struct ethtool_rxfh_indir {
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
  * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
  * and a @indir_size of zero means the indir table should be reset to default
- * values. An hfunc of zero means that hash function setting is not requested.
+ * values (if @rss_context == 0) or that the RSS context should be deleted.
+ * An hfunc of zero means that hash function setting is not requested.
  */
 struct ethtool_rxfh {
 	__u32   cmd;
@@ -1021,6 +1038,7 @@ struct ethtool_rxfh {
 	__u32	rsvd32;
 	__u32   rss_config[0];
 };
+#define ETH_RXFH_CONTEXT_ALLOC		0xffffffff
 #define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
 
 /**
@@ -1635,6 +1653,8 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 /* Flag to enable additional fields in struct ethtool_rx_flow_spec */
 #define	FLOW_EXT	0x80000000
 #define	FLOW_MAC_EXT	0x40000000
+/* Flag to enable RSS spreading of traffic matching rule (nfc only) */
+#define	FLOW_RSS	0x20000000
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3f89c76d5c24..157cd9efa4be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1022,6 +1022,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
+	/* If FLOW_RSS was requested then user-space must be using the
+	 * new definition, as FLOW_RSS is newer.
+	 */
+	if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+		info_size = sizeof(info);
+		if (copy_from_user(&info, useraddr, info_size))
+			return -EFAULT;
+	}
+
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
 			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
@@ -1251,9 +1260,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	user_key_size = rxfh.key_size;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-	    rxfh.rsvd8[2] || rxfh.rsvd32)
+	if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
+	/* Most drivers don't handle rss_context, check it's 0 as well */
+	if (rxfh.rss_context && !ops->get_rxfh_context)
+		return -EOPNOTSUPP;
 
 	rxfh.indir_size = dev_indir_size;
 	rxfh.key_size = dev_key_size;
@@ -1276,7 +1287,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	if (user_key_size)
 		hkey = rss_config + indir_bytes;
 
-	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+	if (rxfh.rss_context)
+		ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
+							 &dev_hfunc,
+							 rxfh.rss_context);
+	else
+		ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
 	if (ret)
 		goto out;
 
@@ -1306,6 +1322,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	u8 *hkey = NULL;
 	u8 *rss_config;
 	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+	bool delete = false;
 
 	if (!ops->get_rxnfc || !ops->set_rxfh)
 		return -EOPNOTSUPP;
@@ -1319,9 +1336,11 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		return -EFAULT;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
-	    rxfh.rsvd8[2] || rxfh.rsvd32)
+	if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
+	/* Most drivers don't handle rss_context, check it's 0 as well */
+	if (rxfh.rss_context && !ops->set_rxfh_context)
+		return -EOPNOTSUPP;
 
 	/* If either indir, hash key or function is valid, proceed further.
 	 * Must request at least one change: indir size, hash key or function.
@@ -1346,7 +1365,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	if (ret)
 		goto out;
 
-	/* rxfh.indir_size == 0 means reset the indir table to default.
+	/* rxfh.indir_size == 0 means reset the indir table to default (master
+	 * context) or delete the context (other RSS contexts).
 	 * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
 	 */
 	if (rxfh.indir_size &&
@@ -1359,9 +1379,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		if (ret)
 			goto out;
 	} else if (rxfh.indir_size == 0) {
-		indir = (u32 *)rss_config;
-		for (i = 0; i < dev_indir_size; i++)
-			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+		if (rxfh.rss_context == 0) {
+			indir = (u32 *)rss_config;
+			for (i = 0; i < dev_indir_size; i++)
+				indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+		} else {
+			delete = true;
+		}
 	}
 
 	if (rxfh.key_size) {
@@ -1374,15 +1398,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		}
 	}
 
-	ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+	if (rxfh.rss_context)
+		ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
+					    &rxfh.rss_context, delete);
+	else
+		ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
 	if (ret)
 		goto out;
 
-	/* indicate whether rxfh was set to default */
-	if (rxfh.indir_size == 0)
-		dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
-	else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-		dev->priv_flags |= IFF_RXFH_CONFIGURED;
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
+			 &rxfh.rss_context, sizeof(rxfh.rss_context)))
+		ret = -EFAULT;
+
+	if (!rxfh.rss_context) {
+		/* indicate whether rxfh was set to default */
+		if (rxfh.indir_size == 0)
+			dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+		else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
+			dev->priv_flags |= IFF_RXFH_CONFIGURED;
+	}
 
 out:
 	kfree(rss_config);

From 42356d9a137bc83268e3988e9f1fdd48dbeef2ef Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 8 Mar 2018 15:45:17 +0000
Subject: [PATCH 0777/1678] sfc: support RSS spreading of ethtool ntuple
 filters

Use a linked list to associate user-facing context IDs with FW-facing
 context IDs (since the latter can change after an MC reset).

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 273 +++++++++++++++++---------
 drivers/net/ethernet/sfc/efx.c        |  65 +++++-
 drivers/net/ethernet/sfc/efx.h        |  12 +-
 drivers/net/ethernet/sfc/ethtool.c    | 153 +++++++++++++--
 drivers/net/ethernet/sfc/farch.c      |  11 +-
 drivers/net/ethernet/sfc/filter.h     |   7 +-
 drivers/net/ethernet/sfc/net_driver.h |  44 ++++-
 drivers/net/ethernet/sfc/nic.h        |   2 -
 drivers/net/ethernet/sfc/siena.c      |  26 +--
 9 files changed, 443 insertions(+), 150 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 75fbf58e421c..e100273b623d 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -28,9 +28,6 @@ enum {
 	EFX_EF10_TEST = 1,
 	EFX_EF10_REFILL,
 };
-
-/* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
 /* The maximum size of a shared RSS context */
 /* TODO: this should really be from the mcdi protocol export */
 #define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
@@ -697,7 +694,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	}
 	nic_data->warm_boot_count = rc;
 
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
 	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
 
@@ -1489,8 +1486,8 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 	}
 
 	/* don't fail init if RSS setup doesn't work */
-	rc = efx->type->rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-	efx->rss_active = (rc == 0);
+	rc = efx->type->rx_push_rss_config(efx, false,
+					   efx->rss_context.rx_indir_table, NULL);
 
 	return 0;
 }
@@ -1507,7 +1504,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
 	nic_data->must_restore_filters = true;
 	nic_data->must_restore_piobufs = true;
 	efx_ef10_forget_old_piobufs(efx);
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 
 	/* Driver-created vswitches and vports must be re-created */
 	nic_data->must_probe_vswitching = true;
@@ -2703,27 +2700,30 @@ static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
  * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
  * just need to set the UDP ports flags (for both IP versions).
  */
-static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+static void efx_ef10_set_rss_flags(struct efx_nic *efx,
+				   struct efx_rss_context *ctx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
 	u32 flags;
 
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
 
-	if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+	if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0)
 		return;
-	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
+		       ctx->context_id);
 	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
 	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
 	if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
 			  NULL, 0, NULL))
 		/* Succeeded, so UDP 4-tuple is now enabled */
-		efx->rx_hash_udp_4tuple = true;
+		ctx->rx_hash_udp_4tuple = true;
 }
 
-static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
-				      bool exclusive, unsigned *context_size)
+static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
+				      struct efx_rss_context *ctx,
+				      unsigned *context_size)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
@@ -2739,7 +2739,7 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 				    EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
 
 	if (!exclusive && rss_spread == 1) {
-		*context = EFX_EF10_RSS_CONTEXT_INVALID;
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 		if (context_size)
 			*context_size = 1;
 		return 0;
@@ -2762,29 +2762,26 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 	if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
 		return -EIO;
 
-	*context = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
+	ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
 
 	if (context_size)
 		*context_size = rss_spread;
 
 	if (nic_data->datapath_caps &
 	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
-		efx_ef10_set_rss_flags(efx, *context);
+		efx_ef10_set_rss_flags(efx, ctx);
 
 	return 0;
 }
 
-static void efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
+static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
-	int rc;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
 		       context);
-
-	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
+	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
 			    NULL, 0, NULL);
-	WARN_ON(rc != 0);
 }
 
 static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
@@ -2796,15 +2793,15 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 	MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
 		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
 
-	/* This iterates over the length of efx->rx_indir_table, but copies
-	 * bytes from rx_indir_table.  That's because the latter is a pointer
-	 * rather than an array, but should have the same length.
-	 * The efx->rx_hash_key loop below is similar.
+	/* This iterates over the length of efx->rss_context.rx_indir_table, but
+	 * copies bytes from rx_indir_table.  That's because the latter is a
+	 * pointer rather than an array, but should have the same length.
+	 * The efx->rss_context.rx_hash_key loop below is similar.
 	 */
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); ++i)
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
 		MCDI_PTR(tablebuf,
 			 RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
 				(u8) rx_indir_table[i];
@@ -2816,9 +2813,9 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 	MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
 		       context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
-	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
+	for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
 		MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
 
 	return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
@@ -2827,27 +2824,27 @@ static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
 
 static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	int rc;
 
-	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
-		efx_ef10_free_rss_context(efx, nic_data->rx_rss_context);
-	nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID;
+	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
+		rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
+		WARN_ON(rc != 0);
+	}
+	efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 }
 
 static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
 					      unsigned *context_size)
 {
-	u32 new_rx_rss_context;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	int rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-					    false, context_size);
+	int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context,
+					    context_size);
 
 	if (rc != 0)
 		return rc;
 
-	nic_data->rx_rss_context = new_rx_rss_context;
 	nic_data->rx_rss_context_exclusive = false;
-	efx_set_default_rx_indir_table(efx);
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 	return 0;
 }
 
@@ -2855,50 +2852,79 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
 						 const u32 *rx_indir_table,
 						 const u8 *key)
 {
+	u32 old_rx_rss_context = efx->rss_context.context_id;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	int rc;
-	u32 new_rx_rss_context;
 
-	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID ||
+	if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
 	    !nic_data->rx_rss_context_exclusive) {
-		rc = efx_ef10_alloc_rss_context(efx, &new_rx_rss_context,
-						true, NULL);
+		rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
+						NULL);
 		if (rc == -EOPNOTSUPP)
 			return rc;
 		else if (rc != 0)
 			goto fail1;
-	} else {
-		new_rx_rss_context = nic_data->rx_rss_context;
 	}
 
-	rc = efx_ef10_populate_rss_table(efx, new_rx_rss_context,
+	rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id,
 					 rx_indir_table, key);
 	if (rc != 0)
 		goto fail2;
 
-	if (nic_data->rx_rss_context != new_rx_rss_context)
-		efx_ef10_rx_free_indir_table(efx);
-	nic_data->rx_rss_context = new_rx_rss_context;
+	if (efx->rss_context.context_id != old_rx_rss_context &&
+	    old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+		WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
 	nic_data->rx_rss_context_exclusive = true;
-	if (rx_indir_table != efx->rx_indir_table)
-		memcpy(efx->rx_indir_table, rx_indir_table,
-		       sizeof(efx->rx_indir_table));
-	if (key != efx->rx_hash_key)
-		memcpy(efx->rx_hash_key, key, efx->type->rx_hash_key_size);
+	if (rx_indir_table != efx->rss_context.rx_indir_table)
+		memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
+	if (key != efx->rss_context.rx_hash_key)
+		memcpy(efx->rss_context.rx_hash_key, key,
+		       efx->type->rx_hash_key_size);
 
 	return 0;
 
 fail2:
-	if (new_rx_rss_context != nic_data->rx_rss_context)
-		efx_ef10_free_rss_context(efx, new_rx_rss_context);
+	if (old_rx_rss_context != efx->rss_context.context_id) {
+		WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0);
+		efx->rss_context.context_id = old_rx_rss_context;
+	}
 fail1:
 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
 	return rc;
 }
 
-static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
+					       struct efx_rss_context *ctx,
+					       const u32 *rx_indir_table,
+					       const u8 *key)
+{
+	int rc;
+
+	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+		rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
+		if (rc)
+			return rc;
+	}
+
+	if (!rx_indir_table) /* Delete this context */
+		return efx_ef10_free_rss_context(efx, ctx->context_id);
+
+	rc = efx_ef10_populate_rss_table(efx, ctx->context_id,
+					 rx_indir_table, key);
+	if (rc)
+		return rc;
+
+	memcpy(ctx->rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
+	memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
+
+	return 0;
+}
+
+static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
+					       struct efx_rss_context *ctx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
 	MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
 	MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
@@ -2908,12 +2934,12 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
 		     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
 
-	if (nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
+	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
 		return -ENOENT;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
-		       nic_data->rx_rss_context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
 		     MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
 			  tablebuf, sizeof(tablebuf), &outlen);
@@ -2923,13 +2949,13 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
 		return -EIO;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] = MCDI_PTR(tablebuf,
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
 				RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
-		       nic_data->rx_rss_context);
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_hash_key) !=
+		       ctx->context_id);
+	BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
 		     MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
 	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
 			  keybuf, sizeof(keybuf), &outlen);
@@ -2939,13 +2965,38 @@ static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 	if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
 		return -EIO;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_hash_key); ++i)
-		efx->rx_hash_key[i] = MCDI_PTR(
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
+		ctx->rx_hash_key[i] = MCDI_PTR(
 				keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
 
 	return 0;
 }
 
+static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
+{
+	return efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
+}
+
+static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
+{
+	struct efx_rss_context *ctx;
+	int rc;
+
+	list_for_each_entry(ctx, &efx->rss_context.list, list) {
+		/* previous NIC RSS context is gone */
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		/* so try to allocate a new one */
+		rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
+							 ctx->rx_indir_table,
+							 ctx->rx_hash_key);
+		if (rc)
+			netif_warn(efx, probe, efx->net_dev,
+				   "failed to restore RSS context %u, rc=%d"
+				   "; RSS filters may fail to be applied\n",
+				   ctx->user_id, rc);
+	}
+}
+
 static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 					  const u32 *rx_indir_table,
 					  const u8 *key)
@@ -2956,7 +3007,7 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 		return 0;
 
 	if (!key)
-		key = efx->rx_hash_key;
+		key = efx->rss_context.rx_hash_key;
 
 	rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
 
@@ -2965,7 +3016,8 @@ static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
 		bool mismatch = false;
 		size_t i;
 
-		for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table) && !mismatch;
+		for (i = 0;
+		     i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
 		     i++)
 			mismatch = rx_indir_table[i] !=
 				ethtool_rxfh_indir_default(i, efx->rss_spread);
@@ -3000,11 +3052,9 @@ static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
 					  const u8 *key
 					  __attribute__ ((unused)))
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
 	if (user)
 		return -EOPNOTSUPP;
-	if (nic_data->rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+	if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
 		return 0;
 	return efx_ef10_rx_push_shared_rss_config(efx, NULL);
 }
@@ -4109,6 +4159,7 @@ efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx,
 static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 				      const struct efx_filter_spec *spec,
 				      efx_dword_t *inbuf, u64 handle,
+				      struct efx_rss_context *ctx,
 				      bool replacing)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -4116,11 +4167,16 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 
 	memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
 
-	/* Remove RSS flag if we don't have an RSS context. */
-	if (flags & EFX_FILTER_FLAG_RX_RSS &&
-	    spec->rss_context == EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-	    nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID)
-		flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	/* If RSS filter, caller better have given us an RSS context */
+	if (flags & EFX_FILTER_FLAG_RX_RSS) {
+		/* We don't have the ability to return an error, so we'll just
+		 * log a warning and disable RSS for the filter.
+		 */
+		if (WARN_ON_ONCE(!ctx))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+		else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
+			flags &= ~EFX_FILTER_FLAG_RX_RSS;
+	}
 
 	if (replacing) {
 		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
@@ -4146,21 +4202,18 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
 		       MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
 		       MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
 	if (flags & EFX_FILTER_FLAG_RX_RSS)
-		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT,
-			       spec->rss_context !=
-			       EFX_FILTER_RSS_CONTEXT_DEFAULT ?
-			       spec->rss_context : nic_data->rx_rss_context);
+		MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
 }
 
 static int efx_ef10_filter_push(struct efx_nic *efx,
-				const struct efx_filter_spec *spec,
-				u64 *handle, bool replacing)
+				const struct efx_filter_spec *spec, u64 *handle,
+				struct efx_rss_context *ctx, bool replacing)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
 	int rc;
 
-	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, replacing);
+	efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
 	rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
 			  outbuf, sizeof(outbuf), NULL);
 	if (rc == 0)
@@ -4252,6 +4305,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 	struct efx_filter_spec *saved_spec;
+	struct efx_rss_context *ctx = NULL;
 	unsigned int match_pri, hash;
 	unsigned int priv_flags;
 	bool replacing = false;
@@ -4275,6 +4329,18 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	if (is_mc_recip)
 		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 
+	if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(spec->rss_context,
+							 &efx->rss_context.list);
+		else
+			ctx = &efx->rss_context;
+		if (!ctx)
+			return -ENOENT;
+		if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
+			return -EOPNOTSUPP;
+	}
+
 	/* Find any existing filters with the same match tuple or
 	 * else a free slot to insert at.  If any of them are busy,
 	 * we have to wait and retry.
@@ -4390,7 +4456,7 @@ found:
 	spin_unlock_bh(&efx->filter_lock);
 
 	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
-				  replacing);
+				  ctx, replacing);
 
 	/* Finalise the software table entry */
 	spin_lock_bh(&efx->filter_lock);
@@ -4534,12 +4600,13 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 
 		new_spec.priority = EFX_FILTER_PRI_AUTO;
 		new_spec.flags = (EFX_FILTER_FLAG_RX |
-				  (efx_rss_enabled(efx) ?
+				  (efx_rss_active(&efx->rss_context) ?
 				   EFX_FILTER_FLAG_RX_RSS : 0));
 		new_spec.dmaq_id = 0;
-		new_spec.rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+		new_spec.rss_context = 0;
 		rc = efx_ef10_filter_push(efx, &new_spec,
 					  &table->entry[filter_idx].handle,
+					  &efx->rss_context,
 					  true);
 
 		spin_lock_bh(&efx->filter_lock);
@@ -4783,7 +4850,8 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
 	cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
 
 	efx_ef10_filter_push_prep(efx, spec, inbuf,
-				  table->entry[ins_index].handle, replacing);
+				  table->entry[ins_index].handle, NULL,
+				  replacing);
 	efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
 			   MC_CMD_FILTER_OP_OUT_LEN,
 			   efx_ef10_filter_rfs_insert_complete, cookie);
@@ -5104,6 +5172,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 	unsigned int invalid_filters = 0, failed = 0;
 	struct efx_ef10_filter_vlan *vlan;
 	struct efx_filter_spec *spec;
+	struct efx_rss_context *ctx;
 	unsigned int filter_idx;
 	u32 mcdi_flags;
 	int match_pri;
@@ -5133,17 +5202,34 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 			invalid_filters++;
 			goto not_restored;
 		}
-		if (spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT &&
-		    spec->rss_context != nic_data->rx_rss_context)
-			netif_warn(efx, drv, efx->net_dev,
-				   "Warning: unable to restore a filter with specific RSS context.\n");
+		if (spec->rss_context)
+			ctx = efx_find_rss_context_entry(spec->rss_context,
+							 &efx->rss_context.list);
+		else
+			ctx = &efx->rss_context;
+		if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+			if (!ctx) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+			if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+				netif_warn(efx, drv, efx->net_dev,
+					   "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
+					   spec->rss_context);
+				invalid_filters++;
+				goto not_restored;
+			}
+		}
 
 		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
 		spin_unlock_bh(&efx->filter_lock);
 
 		rc = efx_ef10_filter_push(efx, spec,
 					  &table->entry[filter_idx].handle,
-					  false);
+					  ctx, false);
 		if (rc)
 			failed++;
 		spin_lock_bh(&efx->filter_lock);
@@ -6784,6 +6870,9 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.tx_limit_len = efx_ef10_tx_limit_len,
 	.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
 	.rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
+	.rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
+	.rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
+	.rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
 	.rx_probe = efx_ef10_rx_probe,
 	.rx_init = efx_ef10_rx_init,
 	.rx_remove = efx_ef10_rx_remove,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 16757cfc5b29..7321a4cf6f4d 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1353,12 +1353,13 @@ static void efx_fini_io(struct efx_nic *efx)
 		pci_disable_device(efx->pci_dev);
 }
 
-void efx_set_default_rx_indir_table(struct efx_nic *efx)
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx)
 {
 	size_t i;
 
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] =
+	for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+		ctx->rx_indir_table[i] =
 			ethtool_rxfh_indir_default(i, efx->rss_spread);
 }
 
@@ -1739,9 +1740,9 @@ static int efx_probe_nic(struct efx_nic *efx)
 	} while (rc == -EAGAIN);
 
 	if (efx->n_channels > 1)
-		netdev_rss_key_fill(&efx->rx_hash_key,
-				    sizeof(efx->rx_hash_key));
-	efx_set_default_rx_indir_table(efx);
+		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
+				    sizeof(efx->rss_context.rx_hash_key));
+	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 
 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
@@ -2700,6 +2701,8 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 			   " VFs may not function\n", rc);
 #endif
 
+	if (efx->type->rx_restore_rss_contexts)
+		efx->type->rx_restore_rss_contexts(efx);
 	down_read(&efx->filter_sem);
 	efx_restore_filters(efx);
 	up_read(&efx->filter_sem);
@@ -3003,6 +3006,7 @@ static int efx_init_struct(struct efx_nic *efx,
 		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
 	efx->rx_packet_ts_offset =
 		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+	INIT_LIST_HEAD(&efx->rss_context.list);
 	spin_lock_init(&efx->stats_lock);
 	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
 	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
@@ -3072,6 +3076,55 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
 }
 
+/* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
+{
+	struct efx_rss_context *ctx, *new;
+	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+	/* Search for first gap in the numbering */
+	list_for_each_entry(ctx, head, list) {
+		if (ctx->user_id != id)
+			break;
+		id++;
+		/* Check for wrap.  If this happens, we have nearly 2^32
+		 * allocated RSS contexts, which seems unlikely.
+		 */
+		if (WARN_ON_ONCE(!id))
+			return NULL;
+	}
+
+	/* Create the new entry */
+	new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
+	if (!new)
+		return NULL;
+	new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+	new->rx_hash_udp_4tuple = false;
+
+	/* Insert the new entry into the gap */
+	new->user_id = id;
+	list_add_tail(&new->list, &ctx->list);
+	return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *head)
+{
+	struct efx_rss_context *ctx;
+
+	list_for_each_entry(ctx, head, list)
+		if (ctx->user_id == id)
+			return ctx;
+	return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+	list_del(&ctx->list);
+	kfree(ctx);
+}
+
 /**************************************************************************
  *
  * PCI interface
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 0cddc5ad77b1..3429ae3f3b08 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -34,7 +34,8 @@ extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
 
 /* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+				    struct efx_rss_context *ctx);
 void efx_rx_config_page_split(struct efx_nic *efx);
 int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
 void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
@@ -182,6 +183,15 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
 
+/* RSS contexts */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *list);
+struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *list);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+static inline bool efx_rss_active(struct efx_rss_context *ctx)
+{
+	return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+}
+
 /* Channels */
 int efx_channel_dummy_op_int(struct efx_channel *channel);
 void efx_channel_dummy_op_void(struct efx_channel *channel);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 4db2dc2bf52f..64049e71e6e7 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -808,7 +808,8 @@ static inline void ip6_fill_mask(__be32 *mask)
 }
 
 static int efx_ethtool_get_class_rule(struct efx_nic *efx,
-				      struct ethtool_rx_flow_spec *rule)
+				      struct ethtool_rx_flow_spec *rule,
+				      u32 *rss_context)
 {
 	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
 	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -964,6 +965,11 @@ static int efx_ethtool_get_class_rule(struct efx_nic *efx,
 		rule->m_ext.vlan_tci = htons(0xfff);
 	}
 
+	if (spec.flags & EFX_FILTER_FLAG_RX_RSS) {
+		rule->flow_type |= FLOW_RSS;
+		*rss_context = spec.rss_context;
+	}
+
 	return rc;
 }
 
@@ -972,6 +978,8 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		      struct ethtool_rxnfc *info, u32 *rule_locs)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	u32 rss_context = 0;
+	s32 rc;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
@@ -979,12 +987,20 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		return 0;
 
 	case ETHTOOL_GRXFH: {
+		struct efx_rss_context *ctx = &efx->rss_context;
+
+		if (info->flow_type & FLOW_RSS && info->rss_context) {
+			ctx = efx_find_rss_context_entry(info->rss_context,
+							 &efx->rss_context.list);
+			if (!ctx)
+				return -ENOENT;
+		}
 		info->data = 0;
-		if (!efx->rss_active) /* No RSS */
+		if (!efx_rss_active(ctx)) /* No RSS */
 			return 0;
-		switch (info->flow_type) {
+		switch (info->flow_type & ~FLOW_RSS) {
 		case UDP_V4_FLOW:
-			if (efx->rx_hash_udp_4tuple)
+			if (ctx->rx_hash_udp_4tuple)
 				/* fall through */
 		case TCP_V4_FLOW:
 				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -995,7 +1011,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 			info->data |= RXH_IP_SRC | RXH_IP_DST;
 			break;
 		case UDP_V6_FLOW:
-			if (efx->rx_hash_udp_4tuple)
+			if (ctx->rx_hash_udp_4tuple)
 				/* fall through */
 		case TCP_V6_FLOW:
 				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
@@ -1023,10 +1039,14 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 	case ETHTOOL_GRXCLSRULE:
 		if (efx_filter_get_rx_id_limit(efx) == 0)
 			return -EOPNOTSUPP;
-		return efx_ethtool_get_class_rule(efx, &info->fs);
+		rc = efx_ethtool_get_class_rule(efx, &info->fs, &rss_context);
+		if (rc < 0)
+			return rc;
+		if (info->fs.flow_type & FLOW_RSS)
+			info->rss_context = rss_context;
+		return 0;
 
-	case ETHTOOL_GRXCLSRLALL: {
-		s32 rc;
+	case ETHTOOL_GRXCLSRLALL:
 		info->data = efx_filter_get_rx_id_limit(efx);
 		if (info->data == 0)
 			return -EOPNOTSUPP;
@@ -1036,7 +1056,6 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 			return rc;
 		info->rule_cnt = rc;
 		return 0;
-	}
 
 	default:
 		return -EOPNOTSUPP;
@@ -1054,7 +1073,8 @@ static inline bool ip6_mask_is_empty(__be32 mask[4])
 }
 
 static int efx_ethtool_set_class_rule(struct efx_nic *efx,
-				      struct ethtool_rx_flow_spec *rule)
+				      struct ethtool_rx_flow_spec *rule,
+				      u32 rss_context)
 {
 	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
 	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
@@ -1066,6 +1086,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
 	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
 	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+	enum efx_filter_flags flags = 0;
 	struct efx_filter_spec spec;
 	int rc;
 
@@ -1084,12 +1105,19 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	     rule->m_ext.data[1]))
 		return -EINVAL;
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
-			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+	if (efx->rx_scatter)
+		flags |= EFX_FILTER_FLAG_RX_SCATTER;
+	if (rule->flow_type & FLOW_RSS)
+		flags |= EFX_FILTER_FLAG_RX_RSS;
+
+	efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, flags,
 			   (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
 			   EFX_FILTER_RX_DMAQ_ID_DROP : rule->ring_cookie);
 
-	switch (rule->flow_type & ~FLOW_EXT) {
+	if (rule->flow_type & FLOW_RSS)
+		spec.rss_context = rss_context;
+
+	switch (rule->flow_type & ~(FLOW_EXT | FLOW_RSS)) {
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
@@ -1265,7 +1293,8 @@ static int efx_ethtool_set_rxnfc(struct net_device *net_dev,
 
 	switch (info->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		return efx_ethtool_set_class_rule(efx, &info->fs);
+		return efx_ethtool_set_class_rule(efx, &info->fs,
+						  info->rss_context);
 
 	case ETHTOOL_SRXCLSRLDEL:
 		return efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_MANUAL,
@@ -1280,7 +1309,9 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
-	return (efx->n_rx_channels == 1) ? 0 : ARRAY_SIZE(efx->rx_indir_table);
+	if (efx->n_rx_channels == 1)
+		return 0;
+	return ARRAY_SIZE(efx->rss_context.rx_indir_table);
 }
 
 static u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev)
@@ -1303,9 +1334,11 @@ static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 	if (indir)
-		memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+		memcpy(indir, efx->rss_context.rx_indir_table,
+		       sizeof(efx->rss_context.rx_indir_table));
 	if (key)
-		memcpy(key, efx->rx_hash_key, efx->type->rx_hash_key_size);
+		memcpy(key, efx->rss_context.rx_hash_key,
+		       efx->type->rx_hash_key_size);
 	return 0;
 }
 
@@ -1321,13 +1354,93 @@ static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
 		return 0;
 
 	if (!key)
-		key = efx->rx_hash_key;
+		key = efx->rss_context.rx_hash_key;
 	if (!indir)
-		indir = efx->rx_indir_table;
+		indir = efx->rss_context.rx_indir_table;
 
 	return efx->type->rx_push_rss_config(efx, true, indir, key);
 }
 
+static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
+					u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_rss_context *ctx;
+	int rc;
+
+	if (!efx->type->rx_pull_rss_context_config)
+		return -EOPNOTSUPP;
+	ctx = efx_find_rss_context_entry(rss_context, &efx->rss_context.list);
+	if (!ctx)
+		return -ENOENT;
+	rc = efx->type->rx_pull_rss_context_config(efx, ctx);
+	if (rc)
+		return rc;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (indir)
+		memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table));
+	if (key)
+		memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size);
+	return 0;
+}
+
+static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
+					const u32 *indir, const u8 *key,
+					const u8 hfunc, u32 *rss_context,
+					bool delete)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_rss_context *ctx;
+	bool allocated = false;
+	int rc;
+
+	if (!efx->type->rx_push_rss_context_config)
+		return -EOPNOTSUPP;
+	/* Hash function is Toeplitz, cannot be changed */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+		if (delete)
+			/* alloc + delete == Nothing to do */
+			return -EINVAL;
+		ctx = efx_alloc_rss_context_entry(&efx->rss_context.list);
+		if (!ctx)
+			return -ENOMEM;
+		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+		/* Initialise indir table and key to defaults */
+		efx_set_default_rx_indir_table(efx, ctx);
+		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
+		allocated = true;
+	} else {
+		ctx = efx_find_rss_context_entry(*rss_context,
+						 &efx->rss_context.list);
+		if (!ctx)
+			return -ENOENT;
+	}
+
+	if (delete) {
+		/* delete this context */
+		rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
+		if (!rc)
+			efx_free_rss_context_entry(ctx);
+		return rc;
+	}
+
+	if (!key)
+		key = ctx->rx_hash_key;
+	if (!indir)
+		indir = ctx->rx_indir_table;
+
+	rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key);
+	if (rc && allocated)
+		efx_free_rss_context_entry(ctx);
+	else
+		*rss_context = ctx->user_id;
+	return rc;
+}
+
 static int efx_ethtool_get_ts_info(struct net_device *net_dev,
 				   struct ethtool_ts_info *ts_info)
 {
@@ -1403,6 +1516,8 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_rxfh_key_size	= efx_ethtool_get_rxfh_key_size,
 	.get_rxfh		= efx_ethtool_get_rxfh,
 	.set_rxfh		= efx_ethtool_set_rxfh,
+	.get_rxfh_context	= efx_ethtool_get_rxfh_context,
+	.set_rxfh_context	= efx_ethtool_set_rxfh_context,
 	.get_ts_info		= efx_ethtool_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 266b9bee1f3a..ad001e77d554 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1630,12 +1630,12 @@ void efx_farch_rx_push_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
 	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
 		EFX_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
-				     efx->rx_indir_table[i]);
+				     efx->rss_context.rx_indir_table[i]);
 		efx_writed(efx, &dword,
 			   FR_BZ_RX_INDIRECTION_TBL +
 			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
@@ -1647,14 +1647,14 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
 	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
 		efx_readd(efx, &dword,
 			   FR_BZ_RX_INDIRECTION_TBL +
 			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
-		efx->rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
+		efx->rss_context.rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
 	}
 }
 
@@ -2032,8 +2032,7 @@ efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
 {
 	bool is_full = false;
 
-	if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) &&
-	    gen_spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT)
+	if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) && gen_spec->rss_context)
 		return -EINVAL;
 
 	spec->priority = gen_spec->priority;
diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h
index 8189a1cd973f..59021ad6d98d 100644
--- a/drivers/net/ethernet/sfc/filter.h
+++ b/drivers/net/ethernet/sfc/filter.h
@@ -125,7 +125,9 @@ enum efx_encap_type {
  * @match_flags: Match type flags, from &enum efx_filter_match_flags
  * @priority: Priority of the filter, from &enum efx_filter_priority
  * @flags: Miscellaneous flags, from &enum efx_filter_flags
- * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set
+ * @rss_context: RSS context to use, if %EFX_FILTER_FLAG_RX_RSS is set.  This
+ *	is a user_id (with 0 meaning the driver/default RSS context), not an
+ *	MCFW context_id.
  * @dmaq_id: Source/target queue index, or %EFX_FILTER_RX_DMAQ_ID_DROP for
  *	an RX drop filter
  * @outer_vid: Outer VLAN ID to match, if %EFX_FILTER_MATCH_OUTER_VID is set
@@ -173,7 +175,6 @@ struct efx_filter_spec {
 };
 
 enum {
-	EFX_FILTER_RSS_CONTEXT_DEFAULT = 0xffffffff,
 	EFX_FILTER_RX_DMAQ_ID_DROP = 0xfff
 };
 
@@ -185,7 +186,7 @@ static inline void efx_filter_init_rx(struct efx_filter_spec *spec,
 	memset(spec, 0, sizeof(*spec));
 	spec->priority = priority;
 	spec->flags = EFX_FILTER_FLAG_RX | flags;
-	spec->rss_context = EFX_FILTER_RSS_CONTEXT_DEFAULT;
+	spec->rss_context = 0;
 	spec->dmaq_id = rxq_id;
 }
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index d20a8660ee48..203d64c88de5 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -704,6 +704,28 @@ union efx_multicast_hash {
 
 struct vfdi_status;
 
+/* The reserved RSS context value */
+#define EFX_EF10_RSS_CONTEXT_INVALID	0xffffffff
+/**
+ * struct efx_rss_context - A user-defined RSS context for filtering
+ * @list: node of linked list on which this struct is stored
+ * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
+ *	%EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ *	For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ * @user_id: the rss_context ID exposed to userspace over ethtool.
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rx_hash_key: Toeplitz hash key for this RSS context
+ * @indir_table: Indirection table for this RSS context
+ */
+struct efx_rss_context {
+	struct list_head list;
+	u32 context_id;
+	u32 user_id;
+	bool rx_hash_udp_4tuple;
+	u8 rx_hash_key[40];
+	u32 rx_indir_table[128];
+};
+
 /**
  * struct efx_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
@@ -764,11 +786,9 @@ struct vfdi_status;
  *	(valid only for NICs that set %EFX_RX_PKT_PREFIX_LEN; always negative)
  * @rx_packet_ts_offset: Offset of timestamp from start of packet data
  *	(valid only if channel->sync_timestamps_enabled; always negative)
- * @rx_hash_key: Toeplitz hash key for RSS
- * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
- * @rss_active: RSS enabled on hardware
- * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
+ * @rss_context: Main RSS context.  Its @list member is the head of the list of
+ *	RSS contexts created by user requests
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -909,11 +929,8 @@ struct efx_nic {
 	int rx_packet_hash_offset;
 	int rx_packet_len_offset;
 	int rx_packet_ts_offset;
-	u8 rx_hash_key[40];
-	u32 rx_indir_table[128];
 	bool rx_scatter;
-	bool rss_active;
-	bool rx_hash_udp_4tuple;
+	struct efx_rss_context rss_context;
 
 	unsigned int_error_count;
 	unsigned long int_error_expire;
@@ -1099,6 +1116,10 @@ struct efx_udp_tunnel {
  * @tx_write: Write TX descriptors and doorbell
  * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
  * @rx_pull_rss_config: Read RSS hash key and indirection table back from the NIC
+ * @rx_push_rss_context_config: Write RSS hash key and indirection table for
+ *	user RSS context to the NIC
+ * @rx_pull_rss_context_config: Read RSS hash key and indirection table for user
+ *	RSS context back from the NIC
  * @rx_probe: Allocate resources for RX queue
  * @rx_init: Initialise RX queue on the NIC
  * @rx_remove: Free resources for RX queue
@@ -1237,6 +1258,13 @@ struct efx_nic_type {
 	int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
 				  const u32 *rx_indir_table, const u8 *key);
 	int (*rx_pull_rss_config)(struct efx_nic *efx);
+	int (*rx_push_rss_context_config)(struct efx_nic *efx,
+					  struct efx_rss_context *ctx,
+					  const u32 *rx_indir_table,
+					  const u8 *key);
+	int (*rx_pull_rss_context_config)(struct efx_nic *efx,
+					  struct efx_rss_context *ctx);
+	void (*rx_restore_rss_contexts)(struct efx_nic *efx);
 	int (*rx_probe)(struct efx_rx_queue *rx_queue);
 	void (*rx_init)(struct efx_rx_queue *rx_queue);
 	void (*rx_remove)(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 6549fc685a48..d080a414e8f2 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -374,7 +374,6 @@ enum {
  * @piobuf_size: size of a single PIO buffer
  * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
  *	reboot
- * @rx_rss_context: Firmware handle for our RSS context
  * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
@@ -415,7 +414,6 @@ struct efx_ef10_nic_data {
 	unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
 	u16 piobuf_size;
 	bool must_restore_piobufs;
-	u32 rx_rss_context;
 	bool rx_rss_context_exclusive;
 	u64 stats[EF10_STAT_COUNT];
 	bool workaround_35388;
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index ae8645ae4492..18aab25234ba 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -350,11 +350,11 @@ static int siena_rx_pull_rss_config(struct efx_nic *efx)
 	 * siena_rx_push_rss_config, below)
 	 */
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-	memcpy(efx->rx_hash_key, &temp, sizeof(temp));
+	memcpy(efx->rss_context.rx_hash_key, &temp, sizeof(temp));
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
-	memcpy(efx->rx_hash_key + sizeof(temp), &temp, sizeof(temp));
+	memcpy(efx->rss_context.rx_hash_key + sizeof(temp), &temp, sizeof(temp));
 	efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
-	memcpy(efx->rx_hash_key + 2 * sizeof(temp), &temp,
+	memcpy(efx->rss_context.rx_hash_key + 2 * sizeof(temp), &temp,
 	       FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
 	efx_farch_rx_pull_indir_table(efx);
 	return 0;
@@ -367,26 +367,26 @@ static int siena_rx_push_rss_config(struct efx_nic *efx, bool user,
 
 	/* Set hash key for IPv4 */
 	if (key)
-		memcpy(efx->rx_hash_key, key, sizeof(temp));
-	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+		memcpy(efx->rss_context.rx_hash_key, key, sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
 	efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
 
 	/* Enable IPv6 RSS */
-	BUILD_BUG_ON(sizeof(efx->rx_hash_key) <
+	BUILD_BUG_ON(sizeof(efx->rss_context.rx_hash_key) <
 		     2 * sizeof(temp) + FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8 ||
 		     FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN != 0);
-	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
-	memcpy(&temp, efx->rx_hash_key + sizeof(temp), sizeof(temp));
+	memcpy(&temp, efx->rss_context.rx_hash_key + sizeof(temp), sizeof(temp));
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
 	EFX_POPULATE_OWORD_2(temp, FRF_CZ_RX_RSS_IPV6_THASH_ENABLE, 1,
 			     FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE, 1);
-	memcpy(&temp, efx->rx_hash_key + 2 * sizeof(temp),
+	memcpy(&temp, efx->rss_context.rx_hash_key + 2 * sizeof(temp),
 	       FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
 	efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
 
-	memcpy(efx->rx_indir_table, rx_indir_table,
-	       sizeof(efx->rx_indir_table));
+	memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+	       sizeof(efx->rss_context.rx_indir_table));
 	efx_farch_rx_push_indir_table(efx);
 
 	return 0;
@@ -432,8 +432,8 @@ static int siena_init_nic(struct efx_nic *efx)
 			    EFX_RX_USR_BUF_SIZE >> 5);
 	efx_writeo(efx, &temp, FR_AZ_RX_CFG);
 
-	siena_rx_push_rss_config(efx, false, efx->rx_indir_table, NULL);
-	efx->rss_active = true;
+	siena_rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL);
+	efx->rss_context.context_id = 0; /* indicates RSS is active */
 
 	/* Enable event logging */
 	rc = efx_mcdi_log_ctrl(efx, true, false, 0);

From 84c4df40029fa8a4d126bba69bdea3b2f1bf6a70 Mon Sep 17 00:00:00 2001
From: Alexander Kurz <akurz@blala.de>
Date: Thu, 8 Mar 2018 11:19:53 +0000
Subject: [PATCH 0778/1678] net: usb: asix88179_178a: set permanent address
 once only

The permanent address of asix88179_178a devices is read at probe time
and should not be overwritten later. Otherwise it may be overwritten
unintentionally with a configured address.

Signed-off-by: Alexander Kurz <akurz@blala.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index f32261ecd215..a6ef75907ae9 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1556,7 +1556,6 @@ static int ax88179_reset(struct usbnet *dev)
 
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN,
 			 dev->net->dev_addr);
-	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);

From cf29bded91f9153208c4c2145d602cd82430ad1a Mon Sep 17 00:00:00 2001
From: Alexander Kurz <akurz@blala.de>
Date: Thu, 8 Mar 2018 11:19:54 +0000
Subject: [PATCH 0779/1678] net: usb: asix88179_178a: de-duplicate code

Remove the duplicated code for asix88179_178a bind and reset methods.

Signed-off-by: Alexander Kurz <akurz@blala.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 115 +++++++++------------------------
 1 file changed, 30 insertions(+), 85 deletions(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index a6ef75907ae9..fb1b78d4b9ef 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev)
 	return 0;
 }
 
-static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 {
 	u8 buf[5];
 	u16 *tmp16;
@@ -1231,12 +1231,11 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
 	struct ethtool_eee eee_data;
 
-	usbnet_get_endpoints(dev, intf);
-
 	tmp16 = (u16 *)buf;
 	tmp = (u8 *)buf;
 
-	memset(ax179_data, 0, sizeof(*ax179_data));
+	if (!do_reset)
+		memset(ax179_data, 0, sizeof(*ax179_data));
 
 	/* Power up ethernet PHY */
 	*tmp16 = 0;
@@ -1249,9 +1248,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
 	msleep(100);
 
+	if (do_reset)
+		ax88179_auto_detach(dev, 0);
+
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
 			 ETH_ALEN, dev->net->dev_addr);
-	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
+	if (!do_reset)
+		memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
@@ -1266,19 +1269,21 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH,
 			  1, 1, tmp);
 
-	dev->net->netdev_ops = &ax88179_netdev_ops;
-	dev->net->ethtool_ops = &ax88179_ethtool_ops;
-	dev->net->needed_headroom = 8;
-	dev->net->max_mtu = 4088;
+	if (!do_reset) {
+		dev->net->netdev_ops = &ax88179_netdev_ops;
+		dev->net->ethtool_ops = &ax88179_ethtool_ops;
+		dev->net->needed_headroom = 8;
+		dev->net->max_mtu = 4088;
 
-	/* Initialize MII structure */
-	dev->mii.dev = dev->net;
-	dev->mii.mdio_read = ax88179_mdio_read;
-	dev->mii.mdio_write = ax88179_mdio_write;
-	dev->mii.phy_id_mask = 0xff;
-	dev->mii.reg_num_mask = 0xff;
-	dev->mii.phy_id = 0x03;
-	dev->mii.supports_gmii = 1;
+		/* Initialize MII structure */
+		dev->mii.dev = dev->net;
+		dev->mii.mdio_read = ax88179_mdio_read;
+		dev->mii.mdio_write = ax88179_mdio_write;
+		dev->mii.phy_id_mask = 0xff;
+		dev->mii.reg_num_mask = 0xff;
+		dev->mii.phy_id = 0x03;
+		dev->mii.supports_gmii = 1;
+	}
 
 	dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			      NETIF_F_RXCSUM;
@@ -1330,6 +1335,13 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 	return 0;
 }
 
+static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	usbnet_get_endpoints(dev, intf);
+
+	return ax88179_link_bind_or_reset(dev, false);
+}
+
 static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	u16 tmp16;
@@ -1458,74 +1470,7 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 
 static int ax88179_link_reset(struct usbnet *dev)
 {
-	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
-	u8 tmp[5], link_sts;
-	u16 mode, tmp16, delay = HZ / 10;
-	u32 tmp32 = 0x40000000;
-	unsigned long jtimeout;
-
-	jtimeout = jiffies + delay;
-	while (tmp32 & 0x40000000) {
-		mode = 0;
-		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &mode);
-		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2,
-				  &ax179_data->rxctl);
-
-		/*link up, check the usb device control TX FIFO full or empty*/
-		ax88179_read_cmd(dev, 0x81, 0x8c, 0, 4, &tmp32);
-
-		if (time_after(jiffies, jtimeout))
-			return 0;
-	}
-
-	mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
-	       AX_MEDIUM_RXFLOW_CTRLEN;
-
-	ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS,
-			 1, 1, &link_sts);
-
-	ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID,
-			 GMII_PHY_PHYSR, 2, &tmp16);
-
-	if (!(tmp16 & GMII_PHY_PHYSR_LINK)) {
-		return 0;
-	} else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-		mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ;
-		if (dev->net->mtu > 1500)
-			mode |= AX_MEDIUM_JUMBO_EN;
-
-		if (link_sts & AX_USB_SS)
-			memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
-		else if (link_sts & AX_USB_HS)
-			memcpy(tmp, &AX88179_BULKIN_SIZE[1], 5);
-		else
-			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	} else if (GMII_PHY_PHYSR_100 == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
-		mode |= AX_MEDIUM_PS;
-
-		if (link_sts & (AX_USB_SS | AX_USB_HS))
-			memcpy(tmp, &AX88179_BULKIN_SIZE[2], 5);
-		else
-			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	} else {
-		memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
-	}
-
-	/* RX bulk configuration */
-	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp);
-
-	dev->rx_urb_size = (1024 * (tmp[3] + 2));
-
-	if (tmp16 & GMII_PHY_PHYSR_FULL)
-		mode |= AX_MEDIUM_FULL_DUPLEX;
-	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
-			  2, 2, &mode);
-
-	ax179_data->eee_enabled = ax88179_chk_eee(dev);
-
-	netif_carrier_on(dev->net);
-
-	return 0;
+	return ax88179_link_bind_or_reset(dev, true);
 }
 
 static int ax88179_reset(struct usbnet *dev)

From 72ab55e96ebc00d68501f5cda59478ed7cf7e484 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:35 +0100
Subject: [PATCH 0780/1678] tools: bpftool: silence 'missing initializer'
 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building bpf tool, gcc emits piles of warnings:

prog.c: In function ‘prog_fd_by_tag’:
prog.c:101:9: warning: missing initializer for field ‘type’ of ‘struct bpf_prog_info’ [-Wmissing-field-initializers]
  struct bpf_prog_info info = {};
         ^
In file included from /home/storage/jbenc/git/net-next/tools/lib/bpf/bpf.h:26:0,
                 from prog.c:47:
/home/storage/jbenc/git/net-next/tools/include/uapi/linux/bpf.h:925:8: note: ‘type’ declared here
  __u32 type;
        ^

As these warnings are not useful, switch them off.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 26901ec87361..4c2867481f5c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -38,7 +38,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
 CC = gcc
 
 CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
 LIBS = -lelf -lbfd -lopcodes $(LIBBPF)

From 5a8997f207154826c7bf1a97acf75ffb44159c50 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:36 +0100
Subject: [PATCH 0781/1678] tools: bpf: respect output directory during build

Currently, the programs under tools/bpf (with the notable exception of
bpftool) do not respect the output directory (make O=dir). Fix that.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c8ec0ae16bf0..e7b15967492e 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
 prefix = /usr
 
 CC = gcc
@@ -7,7 +9,7 @@ YACC = bison
 MAKE = make
 
 CFLAGS += -Wall -O2
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
@@ -38,32 +40,36 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-%.yacc.c: %.y
+$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
 	$(YACC) -o $@ -d $<
 
-%.lex.c: %.l
+$(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
 	$(LEX) -o $@ $<
 
-all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
+$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
+	$(COMPILE.c) -o $@ $<
 
-bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
-bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
-bpf_jit_disasm : bpf_jit_disasm.o
+all: $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm bpftool
 
-bpf_dbg : LDLIBS = -lreadline
-bpf_dbg : bpf_dbg.o
+$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
+$(OUTPUT)bpf_jit_disasm: LDLIBS = -lopcodes -lbfd -ldl
+$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
 
-bpf_asm : LDLIBS =
-bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
-bpf_exp.lex.o : bpf_exp.yacc.c
+$(OUTPUT)bpf_dbg: LDLIBS = -lreadline
+$(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+
+$(OUTPUT)bpf_asm: LDLIBS =
+$(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.yacc.c
 
 clean: bpftool_clean
-	rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
+	rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
 
 install: bpftool_install
-	install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
-	install bpf_dbg $(prefix)/bin/bpf_dbg
-	install bpf_asm $(prefix)/bin/bpf_asm
+	install $(OUTPUT)bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
+	install $(OUTPUT)bpf_dbg $(prefix)/bin/bpf_dbg
+	install $(OUTPUT)bpf_asm $(prefix)/bin/bpf_asm
 
 bpftool:
 	$(MAKE) -C bpftool

From fde68c5becb552ce96fcf1d123624b5c11baa187 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:37 +0100
Subject: [PATCH 0782/1678] tools: bpf: consistent make bpf_install

Currently, make bpf_install in tools/ does not respect DESTDIR. Moreover, it
installs to /usr/bin/ unconditionally.

Let it respect DESTDIR and allow prefix to be specified. Also, to be more
consistent with bpftool and with the usual customs, default the prefix to
/usr/local instead of /usr.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index e7b15967492e..c42ca24a072d 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -1,12 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 include ../scripts/Makefile.include
 
-prefix = /usr
+prefix ?= /usr/local
 
 CC = gcc
 LEX = flex
 YACC = bison
 MAKE = make
+INSTALL ?= install
 
 CFLAGS += -Wall -O2
 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
@@ -67,9 +68,10 @@ clean: bpftool_clean
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
 
 install: bpftool_install
-	install $(OUTPUT)bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
-	install $(OUTPUT)bpf_dbg $(prefix)/bin/bpf_dbg
-	install $(OUTPUT)bpf_asm $(prefix)/bin/bpf_asm
+	$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+	$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+	$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+	$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
 
 bpftool:
 	$(MAKE) -C bpftool

From 6c0710084e6bf5337c9af8075e266069b79ac2c2 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:38 +0100
Subject: [PATCH 0783/1678] tools: bpf: make install should build first

Make the 'install' target depend on the 'all' target to build the binaries
first.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c42ca24a072d..e8d9e4125bf3 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -50,7 +50,9 @@ $(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
 $(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
 	$(COMPILE.c) -o $@ $<
 
-all: $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm bpftool
+PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
+
+all: $(PROGS) bpftool
 
 $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
 $(OUTPUT)bpf_jit_disasm: LDLIBS = -lopcodes -lbfd -ldl
@@ -67,7 +69,7 @@ clean: bpftool_clean
 	rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
 
-install: bpftool_install
+install: $(PROGS) bpftool_install
 	$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
 	$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
 	$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg

From 58416c37d0fe1c46aecb293283f2d558526cac19 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:39 +0100
Subject: [PATCH 0784/1678] tools: bpf: call descend in Makefile

Use the descend macro to properly propagate $(subdir) to bpftool.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index e8d9e4125bf3..daca0a4277d1 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -76,12 +76,12 @@ install: $(PROGS) bpftool_install
 	$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
 
 bpftool:
-	$(MAKE) -C bpftool
+	$(call descend,bpftool)
 
 bpftool_install:
-	$(MAKE) -C bpftool install
+	$(call descend,bpftool,install)
 
 bpftool_clean:
-	$(MAKE) -C bpftool clean
+	$(call descend,bpftool,clean)
 
 .PHONY: bpftool FORCE

From a50b7f8c618a7953cd0bef407b390b6db60d0fc3 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:40 +0100
Subject: [PATCH 0785/1678] tools: bpf: respect quiet/verbose build

Default to quiet build, with V=1 enabling verbose build as is usual.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index daca0a4277d1..757ea22c428a 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -17,6 +17,12 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
 FEATURE_USER = .bpf
 FEATURE_TESTS = libbfd disassembler-four-args
 FEATURE_DISPLAY = libbfd disassembler-four-args
@@ -42,38 +48,48 @@ CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
 $(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
-	$(YACC) -o $@ -d $<
+	$(QUIET_BISON)$(YACC) -o $@ -d $<
 
 $(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
-	$(LEX) -o $@ $<
+	$(QUIET_FLEX)$(LEX) -o $@ $<
 
 $(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
-	$(COMPILE.c) -o $@ $<
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+
+$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
+	$(QUIET_CC)$(COMPILE.c) -o $@ $<
 
 PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
 all: $(PROGS) bpftool
 
 $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
-$(OUTPUT)bpf_jit_disasm: LDLIBS = -lopcodes -lbfd -ldl
 $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lopcodes -lbfd -ldl
 
-$(OUTPUT)bpf_dbg: LDLIBS = -lreadline
 $(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lreadline
 
-$(OUTPUT)bpf_asm: LDLIBS =
 $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
+
 $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.yacc.c
 
 clean: bpftool_clean
-	rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+	$(call QUIET_CLEAN, bpf-progs)
+	$(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
 
 install: $(PROGS) bpftool_install
-	$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
-	$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
-	$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
-	$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
+	$(call QUIET_INSTALL, bpf_jit_disasm)
+	$(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+	$(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+	$(call QUIET_INSTALL, bpf_dbg)
+	$(Q)$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+	$(call QUIET_INSTALL, bpf_asm)
+	$(Q)$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
 
 bpftool:
 	$(call descend,bpftool)

From ef8ba83b7cf6c530fbb2c83cf87f9755b8424a84 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Thu, 8 Mar 2018 23:00:41 +0100
Subject: [PATCH 0786/1678] tools: bpf: silence make by not deleting
 intermediate file

Even in quiet mode, make finishes with

rm tools/bpf/bpf_exp.lex.c

That's because it considers the file to be intermediate. Silence that by
mentioning the lex.c file instead of the lex.o file; the dependency still
stays.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 757ea22c428a..c07b4e718eeb 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -75,7 +75,7 @@ $(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
 $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
 	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
 
-$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
 
 clean: bpftool_clean
 	$(call QUIET_CLEAN, bpf-progs)

From 6d8cb045cde681e64a5ed80a2ab70be831a7f9b0 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 8 Mar 2018 23:46:33 -0800
Subject: [PATCH 0787/1678] bpf: comment why dots in filenames under BPF
 virtual FS are not allowed

When pinning a file under the BPF virtual file system (traditionally
/sys/fs/bpf), using a dot in the name of the location to pin at is not
allowed. For example, trying to pin at "/sys/fs/bpf/foo.bar" will be
rejected with -EPERM.

This check was introduced at the same time as the BPF file system
itself, with commit b2197755b263 ("bpf: add support for persistent
maps/progs"). At this time, it was checked in a function called
"bpf_dname_reserved()", which made clear that using a dot was reserved
for future extensions.

This function disappeared and the check was moved elsewhere with commit
0c93b7d85d40 ("bpf: reject invalid names right in ->lookup()"), and the
meaning of the dot ban was lost.

The present commit simply adds a comment in the source to explain to the
reader that the usage of dots is reserved for future usage.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/inode.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 81e2f6995adb..bf6da59ae0d0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -178,6 +178,9 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 static struct dentry *
 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 {
+	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+	 * extensions.
+	 */
 	if (strchr(dentry->d_name.name, '.'))
 		return ERR_PTR(-EPERM);
 

From 8edfaf7d33983309b30564e2abaede26163764f3 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 8 Mar 2018 14:56:43 -0500
Subject: [PATCH 0788/1678] tc-testing: add csum tests

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Tested-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/csum.json     | 410 ++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/csum.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644
index 000000000000..93cf8fea8ae7
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6d84",
+        "name": "Add csum iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum iph index 800",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 800",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "1862",
+        "name": "Add csum ip4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "15c6",
+        "name": "Add csum ipv4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1122",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bf47",
+        "name": "Add csum icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "cc1d",
+        "name": "Add csum igmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum igmp index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 999",
+        "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bccc",
+        "name": "Add csum foobar action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum foobar index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "3bb4",
+        "name": "Add csum tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 9999",
+        "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "759c",
+        "name": "Add csum udp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp index 334455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 334455",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bdb6",
+        "name": "Add csum udp xor iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "c220",
+        "name": "Add csum udplite action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 3",
+        "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "8993",
+        "name": "Add csum sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum sctp index 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 777",
+        "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b138",
+        "name": "Add csum ip & icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 123",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "eeda",
+        "name": "Add csum ip & sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 2",
+        "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "0017",
+        "name": "Add csum udp or tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 27",
+        "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "ce92",
+        "name": "Add csum udp action with cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "912f",
+        "name": "Add csum icmp action with large cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 17",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "879b",
+        "name": "Add batch of 32 csum tcp actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    }
+]

From b19e5c1515f2ed0adf5806581ac6d0c4dc3e6dff Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 8 Mar 2018 21:21:36 +0100
Subject: [PATCH 0789/1678] net: dsa: mv88e6xxx: Fix irq free'ing

Call the common irq free function, rather than going recursive and
blowing away the stack, followed by the machine.

Fixes: 294d711ee8c0 ("net: dsa: mv88e6xxx: Poll when no interrupt defined")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index cfd53632a655..bd3ee84770c7 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -360,7 +360,7 @@ static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip)
 
 static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 {
-	mv88e6xxx_g1_irq_free(chip);
+	mv88e6xxx_g1_irq_free_common(chip);
 
 	free_irq(chip->irq, chip);
 }

From 75291f3a6b86a53f2607658de3b8b267e306bf60 Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Thu, 8 Mar 2018 15:27:44 -0500
Subject: [PATCH 0790/1678] tools: tc-testing: Can refer to $TESTID in test
 spec

When processing the commands in the test cases, substitute
the test id for $TESTID.  This helps to make more flexible
tests.  For example, the testid can be given as a command
line argument.

As an example, if we wish to save the test output to a file
named for the test case, we can write in the test case:

"cmdUnderTest": "some test command | tee -a $TESTID.out"

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 241eea37e4a4..c05b9f0f3db2 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -177,6 +177,7 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
                 '"{}" did not complete successfully'.format(prefix))
 
 def run_one_test(pm, args, index, tidx):
+    global NAMES
     result = True
     tresult = ""
     tap = ""
@@ -184,6 +185,9 @@ def run_one_test(pm, args, index, tidx):
         print("\t====================\n=====> ", end="")
     print("Test " + tidx["id"] + ": " + tidx["name"])
 
+    # populate NAMES with TESTID for this test
+    NAMES['TESTID'] = tidx['id']
+
     pm.call_pre_case(index, tidx['id'])
     prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
 
@@ -227,6 +231,8 @@ def run_one_test(pm, args, index, tidx):
 
     index += 1
 
+    # remove TESTID from NAMES
+    del(NAMES['TESTID'])
     return tap
 
 def test_runner(pm, args, filtered_tests):

From 2b3905de8b3d8511aee1d4acbf063197291cdd3f Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Thu, 8 Mar 2018 15:28:03 -0500
Subject: [PATCH 0791/1678] tools: tc-testing: Can pause just before post-suite

With option -P, the test script will pause just before
the post_suite functions are called.  This allows the tester to
inspect the system before it is torn down.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index c05b9f0f3db2..44de4a272a11 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -324,6 +324,12 @@ def test_runner(pm, args, filtered_tests):
                 count += 1
 
         tap += 'done flushing skipped test tap output\n'
+
+    if args.pause:
+        print('Want to pause\nPress enter to continue ...')
+        if input(sys.stdin):
+            print('got something on stdin')
+
     pm.call_post_suite(index)
 
     return tap
@@ -406,6 +412,9 @@ def set_args(parser):
         help='Suppress tap results for command under test')
     parser.add_argument('-d', '--device',
                         help='Execute the test case in flower category')
+    parser.add_argument(
+        '-P', '--pause', action='store_true',
+        help='Pause execution just before post-suite stage')
     return parser
 
 

From 79134e6ce2c9d1a00eab4d98cb48f975dd2474cb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Mar 2018 12:51:41 -0800
Subject: [PATCH 0792/1678] net: do not create fallback tunnels for non-default
 namespaces

fallback tunnels (like tunl0, gre0, gretap0, erspan0, sit0,
ip6tnl0, ip6gre0) are automatically created when the corresponding
module is loaded.

These tunnels are also automatically created when a new network
namespace is created, at a great cost.

In many cases, netns are used for isolation purposes, and these
extra network devices are a waste of resources. We are using
thousands of netns per host, and hit the netns creation/delete
bottleneck a lot. (Many thanks to Kirill for recent work on this)

Add a new sysctl so that we can opt-out from this automatic creation.

Note that these tunnels are still created for the initial namespace,
to be the least intrusive for typical setups.

Tested:
lpk43:~# cat add_del_unshare.sh
for i in `seq 1 40`
do
 (for j in `seq 1 100` ; do  unshare -n /bin/true >/dev/null ; done) &
done
wait

lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time ./add_del_unshare.sh

real	0m37.521s
user	0m0.886s
sys	7m7.084s
lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time ./add_del_unshare.sh

real	0m4.761s
user	0m0.851s
sys	1m8.343s
lpk43:~#

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt | 12 ++++++++++++
 include/linux/netdevice.h    |  7 +++++++
 include/net/ip_tunnels.h     |  2 ++
 net/core/sysctl_net_core.c   | 12 ++++++++++++
 net/ipv4/ip_tunnel.c         | 20 ++++++++++++--------
 net/ipv6/ip6_gre.c           |  4 +++-
 net/ipv6/ip6_tunnel.c        |  2 ++
 net/ipv6/sit.c               |  5 ++++-
 8 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 35c62f522754..5992602469d8 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -270,6 +270,18 @@ optmem_max
 Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
 of struct cmsghdr structures with appended data.
 
+fb_tunnels_only_for_init_net
+----------------------------
+
+Controls if fallback tunnels (like tunl0, gre0, gretap0, erspan0,
+sit0, ip6tnl0, ip6gre0) are automatically created when a new
+network namespace is created, if corresponding tunnel is present
+in initial network namespace.
+If set to 1, these devices are not automatically created, and
+user space is responsible for creating them if needed.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 95a613a7cc1c..9711108c3916 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -585,6 +585,13 @@ struct netdev_queue {
 #endif
 } ____cacheline_aligned_in_smp;
 
+extern int sysctl_fb_tunnels_only_for_init_net;
+
+static inline bool net_has_fallback_tunnels(const struct net *net)
+{
+	return net == &init_net || !sysctl_fb_tunnels_only_for_init_net;
+}
+
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 {
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index cbe5addb9293..540a4b4417bf 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -180,8 +180,10 @@ struct tnl_ptk_info {
 
 struct ip_tunnel_net {
 	struct net_device *fb_tunnel_dev;
+	struct rtnl_link_ops *rtnl_link_ops;
 	struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 	struct ip_tunnel __rcu *collect_md_tun;
+	int type;
 };
 
 static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d714f65782b7..4f47f92459cc 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -32,6 +32,9 @@ static int max_skb_frags = MAX_SKB_FRAGS;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
+int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
+EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -513,6 +516,15 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "fb_tunnels_only_for_init_net",
+		.data		= &sysctl_fb_tunnels_only_for_init_net,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 602597dfc395..5fcb17cb426b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -347,8 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
 	struct net_device *dev;
 	int t_hlen;
 
-	BUG_ON(!itn->fb_tunnel_dev);
-	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
+	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
 	if (IS_ERR(dev))
 		return ERR_CAST(dev);
 
@@ -822,7 +821,6 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 	struct net *net = t->net;
 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
 
-	BUG_ON(!itn->fb_tunnel_dev);
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		if (dev == itn->fb_tunnel_dev) {
@@ -847,7 +845,7 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 				p->o_key = 0;
 		}
 
-		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
+		t = ip_tunnel_find(itn, p, itn->type);
 
 		if (cmd == SIOCADDTUNNEL) {
 			if (!t) {
@@ -991,10 +989,15 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 	struct ip_tunnel_parm parms;
 	unsigned int i;
 
+	itn->rtnl_link_ops = ops;
 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&itn->tunnels[i]);
 
-	if (!ops) {
+	if (!ops || !net_has_fallback_tunnels(net)) {
+		struct ip_tunnel_net *it_init_net;
+
+		it_init_net = net_generic(&init_net, ip_tnl_net_id);
+		itn->type = it_init_net->type;
 		itn->fb_tunnel_dev = NULL;
 		return 0;
 	}
@@ -1012,6 +1015,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+		itn->type = itn->fb_tunnel_dev->type;
 	}
 	rtnl_unlock();
 
@@ -1019,10 +1023,10 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
 
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
+			      struct list_head *head,
 			      struct rtnl_link_ops *ops)
 {
-	struct net *net = dev_net(itn->fb_tunnel_dev);
 	struct net_device *dev, *aux;
 	int h;
 
@@ -1054,7 +1058,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
 	rtnl_lock();
 	list_for_each_entry(net, net_list, exit_list) {
 		itn = net_generic(net, id);
-		ip_tunnel_destroy(itn, &list, ops);
+		ip_tunnel_destroy(net, itn, &list, ops);
 	}
 	unregister_netdevice_many(&list);
 	rtnl_unlock();
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 18a3dfbd0300..7d8775c9570d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -236,7 +236,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 		return t;
 
 	dev = ign->fb_tunnel_dev;
-	if (dev->flags & IFF_UP)
+	if (dev && dev->flags & IFF_UP)
 		return netdev_priv(dev);
 
 	return NULL;
@@ -1472,6 +1472,8 @@ static int __net_init ip6gre_init_net(struct net *net)
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 	int err;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
 					  NET_NAME_UNKNOWN,
 					  ip6gre_tunnel_setup);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 56c4967f1868..5c045fa407da 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2205,6 +2205,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
 	ip6n->tnls[0] = ip6n->tnls_wc;
 	ip6n->tnls[1] = ip6n->tnls_r_l;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
 	err = -ENOMEM;
 	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 					NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a9c4ac6efe22..8a4f8fddd812 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel *t = netdev_priv(dev);
 
-	if (dev == sitn->fb_tunnel_dev) {
+	if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
 		ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 		t->ip6rd.relay_prefix = 0;
 		t->ip6rd.prefixlen = 16;
@@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
 	sitn->tunnels[2] = sitn->tunnels_r;
 	sitn->tunnels[3] = sitn->tunnels_r_l;
 
+	if (!net_has_fallback_tunnels(net))
+		return 0;
+
 	sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
 					   NET_NAME_UNKNOWN,
 					   ipip6_tunnel_setup);

From d04e6990c948a3315ea8eca5979ebea48cda56f4 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 8 Mar 2018 16:59:17 -0500
Subject: [PATCH 0793/1678] net sched actions: update Add/Delete action API
 with new argument

Introduce a new function argument to carry total attributes size for
correct allocation of skb in event messages.

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  3 ++-
 net/sched/act_api.c   | 21 +++++++++++++--------
 net/sched/cls_api.c   |  3 ++-
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9c2f22695025..88c1f99bae46 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -166,7 +166,8 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, struct netlink_ext_ack *extack);
+		    struct list_head *actions, size_t *attr_size,
+		    struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a54fa7b8c217..3de0e0610200 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -741,7 +741,8 @@ static void cleanup_a(struct list_head *actions, int ovr)
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, struct netlink_ext_ack *extack)
+		    struct list_head *actions, size_t *attr_size,
+		    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -994,12 +995,13 @@ err_out:
 
 static int
 tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid, struct netlink_ext_ack *extack)
+	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
 
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+			GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
@@ -1032,6 +1034,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
+	size_t attr_size = 0;
 	LIST_HEAD(actions);
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
@@ -1059,7 +1062,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	if (event == RTM_GETACTION)
 		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid, extack);
+		ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
 		if (ret)
 			goto err;
 		return ret;
@@ -1072,12 +1075,13 @@ err:
 
 static int
 tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid, struct netlink_ext_ack *extack)
+	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	int err = 0;
 
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
+			GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
@@ -1099,15 +1103,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 			  struct nlmsghdr *n, u32 portid, int ovr,
 			  struct netlink_ext_ack *extack)
 {
+	size_t attr_size = 0;
 	int ret = 0;
 	LIST_HEAD(actions);
 
 	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-			      extack);
+			      &attr_size, extack);
 	if (ret)
 		return ret;
 
-	return tcf_add_notify(net, n, &actions, portid, extack);
+	return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 19f9f421d5b7..ec5fe8ec0c3e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1433,6 +1433,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 #ifdef CONFIG_NET_CLS_ACT
 	{
 		struct tc_action *act;
+		size_t attr_size = 0;
 
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
@@ -1450,7 +1451,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions, extack);
+					      &actions, &attr_size, extack);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)

From a03b91b17684023c45d39b836c85579d5e535983 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 8 Mar 2018 16:59:18 -0500
Subject: [PATCH 0794/1678] net sched actions: add new tc_action_ops callback

Add a new callback in tc_action_ops, it will be needed by the tc actions
to compute its size when a ADD/DELETE notification message is constructed.
This routine has to take into account optional/variable size TLVs specific
per action.

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 88c1f99bae46..e0a9c2003b24 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -97,6 +97,7 @@ struct tc_action_ops {
 			const struct tc_action_ops *,
 			struct netlink_ext_ack *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
+	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 };
 

From 4e76e75d6aba83f35d55b50f66a6768af1657563 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 8 Mar 2018 16:59:19 -0500
Subject: [PATCH 0795/1678] net sched actions: calculate add/delete event
 message size

Introduce routines to calculate size of the shared tc netlink attributes
and the full message size including netlink header and tc service header.

Update add/delete action logic to have the size for event messages,
the size is passed to tcf_add_notify() and tcf_del_notify() where the
notification message is being allocated and constructed.

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3de0e0610200..57cf37145282 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -109,6 +109,42 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 }
 EXPORT_SYMBOL(__tcf_idr_release);
 
+static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+{
+	u32 cookie_len = 0;
+
+	if (act->act_cookie)
+		cookie_len = nla_total_size(act->act_cookie->len);
+
+	return  nla_total_size(0) /* action number nested */
+		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+		+ cookie_len /* TCA_ACT_COOKIE */
+		+ nla_total_size(0) /* TCA_ACT_STATS nested */
+		/* TCA_STATS_BASIC */
+		+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
+		/* TCA_STATS_QUEUE */
+		+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
+		+ nla_total_size(0) /* TCA_OPTIONS nested */
+		+ nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
+}
+
+static size_t tcf_action_full_attrs_size(size_t sz)
+{
+	return NLMSG_HDRLEN                     /* struct nlmsghdr */
+		+ sizeof(struct tcamsg)
+		+ nla_total_size(0)             /* TCA_ACT_TAB nested */
+		+ sz;
+}
+
+static size_t tcf_action_fill_size(const struct tc_action *act)
+{
+	size_t sz = tcf_action_shared_attrs_size(act);
+
+	if (act->ops->get_fill_size)
+		return act->ops->get_fill_size(act) + sz;
+	return sz;
+}
+
 static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
@@ -746,6 +782,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
+	size_t sz = 0;
 	int err;
 	int i;
 
@@ -761,11 +798,14 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 			goto err;
 		}
 		act->order = i;
+		sz += tcf_action_fill_size(act);
 		if (ovr)
 			act->tcfa_refcnt++;
 		list_add_tail(&act->list, actions);
 	}
 
+	*attr_size = tcf_action_full_attrs_size(sz);
+
 	/* Remove the temp refcnt which was necessary to protect against
 	 * destroying an existing action which was being replaced
 	 */
@@ -1056,9 +1096,12 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 			goto err;
 		}
 		act->order = i;
+		attr_size += tcf_action_fill_size(act);
 		list_add_tail(&act->list, &actions);
 	}
 
+	attr_size = tcf_action_full_attrs_size(attr_size);
+
 	if (event == RTM_GETACTION)
 		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
 	else { /* delete */

From 9c5c9c573700a0a4a4afa431da3e7d21f2992627 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 8 Mar 2018 16:59:20 -0500
Subject: [PATCH 0796/1678] net sched actions: implement get_fill_size routine
 in act_gact

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_gact.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 74563254e676..88fbb8403565 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -217,6 +217,19 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static size_t tcf_gact_get_fill_size(const struct tc_action *act)
+{
+	size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */
+
+#ifdef CONFIG_GACT_PROB
+	if (to_gact(act)->tcfg_ptype)
+		/* TCA_GACT_PROB */
+		sz += nla_total_size(sizeof(struct tc_gact_p));
+#endif
+
+	return sz;
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -227,6 +240,7 @@ static struct tc_action_ops act_gact_ops = {
 	.init		=	tcf_gact_init,
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
+	.get_fill_size	=	tcf_gact_get_fill_size,
 	.size		=	sizeof(struct tcf_gact),
 };
 

From 89523cfaa5e316ef3eec8d2c6ca44f9b5b9458d4 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:36:56 +0800
Subject: [PATCH 0797/1678] net: hns3: refactor the hclge_get/set_rss function

This patch refactors the hclge_get/set_rss function in
order to fix the rss configuration loss problem during
reset process.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 39 ++++---------------
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  2 +
 2 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 39bc0e6d6ab4..cd5b040b337e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2979,31 +2979,6 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
 	return HCLGE_RSS_IND_TBL_SIZE;
 }
 
-static int hclge_get_rss_algo(struct hclge_dev *hdev)
-{
-	struct hclge_rss_config_cmd *req;
-	struct hclge_desc desc;
-	int rss_hash_algo;
-	int ret;
-
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, true);
-
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Get link status error, status =%d\n", ret);
-		return ret;
-	}
-
-	req = (struct hclge_rss_config_cmd *)desc.data;
-	rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
-
-	if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
-		return ETH_RSS_HASH_TOP;
-
-	return -EINVAL;
-}
-
 static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 				  const u8 hfunc, const u8 *key)
 {
@@ -3042,7 +3017,7 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
 	return 0;
 }
 
-static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
+static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u8 *indir)
 {
 	struct hclge_rss_indirection_table_cmd *req;
 	struct hclge_desc desc;
@@ -3138,12 +3113,11 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
 			 u8 *key, u8 *hfunc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 	int i;
 
 	/* Get hash algorithm */
 	if (hfunc)
-		*hfunc = hclge_get_rss_algo(hdev);
+		*hfunc = vport->rss_algo;
 
 	/* Get the RSS Key required by the user */
 	if (key)
@@ -3167,8 +3141,6 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 
 	/* Set the RSS Hash Key if specififed by the user */
 	if (key) {
-		/* Update the shadow RSS key with user specified qids */
-		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
 
 		if (hfunc == ETH_RSS_HASH_TOP ||
 		    hfunc == ETH_RSS_HASH_NO_CHANGE)
@@ -3178,6 +3150,10 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
 		if (ret)
 			return ret;
+
+		/* Update the shadow RSS key with user specified qids */
+		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
+		vport->rss_algo = hash_algo;
 	}
 
 	/* Update the shadow RSS table with user specified qids */
@@ -3185,8 +3161,7 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
 		vport->rss_indirection_tbl[i] = indir[i];
 
 	/* Update the hardware */
-	ret = hclge_set_rss_indir_table(hdev, indir);
-	return ret;
+	return hclge_set_rss_indir_table(hdev, vport->rss_indirection_tbl);
 }
 
 static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index d99a76a9557c..7e762c4d4d81 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -579,6 +579,8 @@ struct hclge_vport {
 	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
 	/* User configured lookup table entries */
 	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+	int rss_algo;		/* User configured hash algorithm */
+
 	u16 alloc_rss_size;
 
 	u16 qs_offset;

From 6f2af4295553348953e91dfc0bd77749de961804 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:36:57 +0800
Subject: [PATCH 0798/1678] net: hns3: refactor the hclge_get/set_rss_tuple
 function

This patch refactors the hclge_get/set_rss_tuple function
in order to fix the rss configuration loss problem during
reset process.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 91 +++++++++++--------
 .../hisilicon/hns3/hns3pf/hclge_main.h        | 13 +++
 2 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index cd5b040b337e..747cc8f00116 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3091,14 +3091,16 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
 	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-	req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
-	req->ipv6_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
-	req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+	/* Get the tuple cfg from pf */
+	req->ipv4_tcp_en = hdev->vport[0].rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = hdev->vport[0].rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = hdev->vport[0].rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = hdev->vport[0].rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = hdev->vport[0].rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = hdev->vport[0].rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = hdev->vport[0].rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = hdev->vport[0].rss_tuple_sets.ipv6_fragment_en;
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
@@ -3204,15 +3206,16 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 		return -EINVAL;
 
 	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read rss tuple fail, status = %d\n", ret);
-		return ret;
-	}
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
 
-	hclge_cmd_reuse_desc(&desc, false);
+	req->ipv4_tcp_en = vport->rss_tuple_sets.ipv4_tcp_en;
+	req->ipv4_udp_en = vport->rss_tuple_sets.ipv4_udp_en;
+	req->ipv4_sctp_en = vport->rss_tuple_sets.ipv4_sctp_en;
+	req->ipv4_fragment_en = vport->rss_tuple_sets.ipv4_fragment_en;
+	req->ipv6_tcp_en = vport->rss_tuple_sets.ipv6_tcp_en;
+	req->ipv6_udp_en = vport->rss_tuple_sets.ipv6_udp_en;
+	req->ipv6_sctp_en = vport->rss_tuple_sets.ipv6_sctp_en;
+	req->ipv6_fragment_en = vport->rss_tuple_sets.ipv6_fragment_en;
 
 	tuple_sets = hclge_get_rss_hash_bits(nfc);
 	switch (nfc->flow_type) {
@@ -3249,52 +3252,49 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle,
 	}
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
+	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Set rss tuple fail, status = %d\n", ret);
+		return ret;
+	}
 
-	return ret;
+	vport->rss_tuple_sets.ipv4_tcp_en = req->ipv4_tcp_en;
+	vport->rss_tuple_sets.ipv4_udp_en = req->ipv4_udp_en;
+	vport->rss_tuple_sets.ipv4_sctp_en = req->ipv4_sctp_en;
+	vport->rss_tuple_sets.ipv4_fragment_en = req->ipv4_fragment_en;
+	vport->rss_tuple_sets.ipv6_tcp_en = req->ipv6_tcp_en;
+	vport->rss_tuple_sets.ipv6_udp_en = req->ipv6_udp_en;
+	vport->rss_tuple_sets.ipv6_sctp_en = req->ipv6_sctp_en;
+	vport->rss_tuple_sets.ipv6_fragment_en = req->ipv6_fragment_en;
+	return 0;
 }
 
 static int hclge_get_rss_tuple(struct hnae3_handle *handle,
 			       struct ethtool_rxnfc *nfc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-	struct hclge_rss_input_tuple_cmd *req;
-	struct hclge_desc desc;
 	u8 tuple_sets;
-	int ret;
 
 	nfc->data = 0;
 
-	req = (struct hclge_rss_input_tuple_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Read rss tuple fail, status = %d\n", ret);
-		return ret;
-	}
-
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
-		tuple_sets = req->ipv4_tcp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_tcp_en;
 		break;
 	case UDP_V4_FLOW:
-		tuple_sets = req->ipv4_udp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_udp_en;
 		break;
 	case TCP_V6_FLOW:
-		tuple_sets = req->ipv6_tcp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_tcp_en;
 		break;
 	case UDP_V6_FLOW:
-		tuple_sets = req->ipv6_udp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_udp_en;
 		break;
 	case SCTP_V4_FLOW:
-		tuple_sets = req->ipv4_sctp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv4_sctp_en;
 		break;
 	case SCTP_V6_FLOW:
-		tuple_sets = req->ipv6_sctp_en;
+		tuple_sets = vport->rss_tuple_sets.ipv6_sctp_en;
 		break;
 	case IPV4_FLOW:
 	case IPV6_FLOW:
@@ -3349,6 +3349,23 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 
 	/* Initialize RSS indirect table for each vport */
 	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+		vport[j].rss_tuple_sets.ipv4_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[j].rss_tuple_sets.ipv4_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[j].rss_tuple_sets.ipv4_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[j].rss_tuple_sets.ipv4_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[j].rss_tuple_sets.ipv6_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[j].rss_tuple_sets.ipv6_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[j].rss_tuple_sets.ipv6_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[j].rss_tuple_sets.ipv6_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+
 		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
 			vport[j].rss_indirection_tbl[i] =
 				i % vport[j].alloc_rss_size;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7e762c4d4d81..13d399931d93 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -573,6 +573,17 @@ struct hclge_rx_vtag_cfg {
 	bool vlan2_vlan_prionly;/* Outer VLAN Tag up to descriptor Enable */
 };
 
+struct hclge_rss_tuple_cfg {
+	u8 ipv4_tcp_en;
+	u8 ipv4_udp_en;
+	u8 ipv4_sctp_en;
+	u8 ipv4_fragment_en;
+	u8 ipv6_tcp_en;
+	u8 ipv6_udp_en;
+	u8 ipv6_sctp_en;
+	u8 ipv6_fragment_en;
+};
+
 struct hclge_vport {
 	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
 
@@ -580,6 +591,8 @@ struct hclge_vport {
 	/* User configured lookup table entries */
 	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
 	int rss_algo;		/* User configured hash algorithm */
+	/* User configured rss tuple sets */
+	struct hclge_rss_tuple_cfg rss_tuple_sets;
 
 	u16 alloc_rss_size;
 

From 268f5dfade2a873cef516ce94ebfc7129af245da Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:36:58 +0800
Subject: [PATCH 0799/1678] net: hns3: fix for RSS configuration loss problem
 during reset

RSS configuration will be set to default value by hclge_rss_init_hw
during reset, which causes the RSS configuration loss problem.

This patch fixes it by setting the default value in
hclge_rss_init_cfg function, which will not be called in the reset
process.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_dcb.c         |   2 +
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 107 +++++++++---------
 .../hisilicon/hns3/hns3pf/hclge_main.h        |   1 +
 3 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 5018d6633133..407cfabe6d21 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -144,6 +144,8 @@ static int hclge_map_update(struct hnae3_handle *h)
 	if (ret)
 		return ret;
 
+	hclge_rss_indir_init_cfg(hdev);
+
 	return hclge_rss_init_hw(hdev);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 747cc8f00116..34a09057e7c8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3329,67 +3329,28 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
 
 int hclge_rss_init_hw(struct hclge_dev *hdev)
 {
-	const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 	struct hclge_vport *vport = hdev->vport;
+	u8 *rss_indir = vport[0].rss_indirection_tbl;
+	u16 rss_size = vport[0].alloc_rss_size;
+	u8 *key = vport[0].rss_hash_key;
+	u8 hfunc = vport[0].rss_algo;
 	u16 tc_offset[HCLGE_MAX_TC_NUM];
-	u8 rss_key[HCLGE_RSS_KEY_SIZE];
 	u16 tc_valid[HCLGE_MAX_TC_NUM];
 	u16 tc_size[HCLGE_MAX_TC_NUM];
-	u32 *rss_indir = NULL;
-	u16 rss_size = 0, roundup_size;
-	const u8 *key;
-	int i, ret, j;
+	u16 roundup_size;
+	int i, ret;
 
-	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
-	if (!rss_indir)
-		return -ENOMEM;
-
-	/* Get default RSS key */
-	netdev_rss_key_fill(rss_key, HCLGE_RSS_KEY_SIZE);
-
-	/* Initialize RSS indirect table for each vport */
-	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
-		vport[j].rss_tuple_sets.ipv4_tcp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[j].rss_tuple_sets.ipv4_udp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[j].rss_tuple_sets.ipv4_sctp_en =
-			HCLGE_RSS_INPUT_TUPLE_SCTP;
-		vport[j].rss_tuple_sets.ipv4_fragment_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[j].rss_tuple_sets.ipv6_tcp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[j].rss_tuple_sets.ipv6_udp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[j].rss_tuple_sets.ipv6_sctp_en =
-			HCLGE_RSS_INPUT_TUPLE_SCTP;
-		vport[j].rss_tuple_sets.ipv6_fragment_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-
-		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
-			vport[j].rss_indirection_tbl[i] =
-				i % vport[j].alloc_rss_size;
-
-			/* vport 0 is for PF */
-			if (j != 0)
-				continue;
-
-			rss_size = vport[j].alloc_rss_size;
-			rss_indir[i] = vport[j].rss_indirection_tbl[i];
-		}
-	}
 	ret = hclge_set_rss_indir_table(hdev, rss_indir);
 	if (ret)
-		goto err;
+		return ret;
 
-	key = rss_key;
 	ret = hclge_set_rss_algo_key(hdev, hfunc, key);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = hclge_set_rss_input_tuple(hdev);
 	if (ret)
-		goto err;
+		return ret;
 
 	/* Each TC have the same queue size, and tc_size set to hardware is
 	 * the log2 of roundup power of two of rss_size, the acutal queue
@@ -3399,8 +3360,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 		dev_err(&hdev->pdev->dev,
 			"Configure rss tc size failed, invalid TC_SIZE = %d\n",
 			rss_size);
-		ret = -EINVAL;
-		goto err;
+		return -EINVAL;
 	}
 
 	roundup_size = roundup_pow_of_two(rss_size);
@@ -3417,12 +3377,50 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 		tc_offset[i] = rss_size * i;
 	}
 
-	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+	return hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+}
 
-err:
-	kfree(rss_indir);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i, j;
 
-	return ret;
+	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+			vport[j].rss_indirection_tbl[i] =
+				i % vport[j].alloc_rss_size;
+	}
+}
+
+static void hclge_rss_init_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int i;
+
+	netdev_rss_key_fill(vport->rss_hash_key, HCLGE_RSS_KEY_SIZE);
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		vport[i].rss_tuple_sets.ipv4_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv4_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv4_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[i].rss_tuple_sets.ipv4_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_tcp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_udp_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+		vport[i].rss_tuple_sets.ipv6_sctp_en =
+			HCLGE_RSS_INPUT_TUPLE_SCTP;
+		vport[i].rss_tuple_sets.ipv6_fragment_en =
+			HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+		vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+	}
+
+	hclge_rss_indir_init_cfg(hdev);
 }
 
 int hclge_bind_ring_with_vector(struct hclge_vport *vport,
@@ -5390,6 +5388,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	hclge_rss_init_cfg(hdev);
 	ret = hclge_rss_init_hw(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 13d399931d93..7bff6efe3e6d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -642,6 +642,7 @@ int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
 
 int hclge_buffer_alloc(struct hclge_dev *hdev);
 int hclge_rss_init_hw(struct hclge_dev *hdev);
+void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);

From f31c1ba6687ea00a4149d6557820e02470e1dcb8 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:36:59 +0800
Subject: [PATCH 0800/1678] net: hns3: fix for pause configuration lost during
 reset

Pause configuration will be set to default value by hclge_tm_schd_init
during reset, which causes the RSS configuration loss problem.

This patch fixes it by calling hclge_tm_init_hw during reset process
, which will set the pause configuration to default value.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 34a09057e7c8..109155de86ee 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5493,9 +5493,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_tm_schd_init(hdev);
+	ret = hclge_tm_init_hw(hdev);
 	if (ret) {
-		dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret);
 		return ret;
 	}
 

From ec77789032c0d5ab77c32abd762f5a8e65198e1b Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:37:00 +0800
Subject: [PATCH 0801/1678] net: hns3: fix for use-after-free when setting ring
 parameter

In hns3_set_ringparam, hns3_uninit_all_ring frees the
memory pointed by priv->ring_data[i].ring, and
hns3_change_all_ring_bd_num use that pointer without mallocing,
which will cause a use-after-free problem.

The patch fixes it by not freeing the memory in
hns3_uninit_all_ring, and uses hns3_put_ring_config to free it
when necessary.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c93694510027..e0ab161d809e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2967,13 +2967,8 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 			h->ae_algo->ops->reset_queue(h, i);
 
 		hns3_fini_ring(priv->ring_data[i].ring);
-		devm_kfree(priv->dev, priv->ring_data[i].ring);
 		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
-		devm_kfree(priv->dev,
-			   priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
-	devm_kfree(priv->dev, priv->ring_data);
-
 	return 0;
 }
 
@@ -3111,6 +3106,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
+	hns3_put_ring_config(priv);
+
 	priv->ring_data = NULL;
 
 	free_netdev(netdev);
@@ -3316,6 +3313,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
 
+	hns3_put_ring_config(priv);
+
 	priv->ring_data = NULL;
 
 	return ret;
@@ -3422,6 +3421,7 @@ int hns3_set_channels(struct net_device *netdev,
 	}
 
 	hns3_uninit_all_ring(priv);
+	hns3_put_ring_config(priv);
 
 	org_tqp_num = h->kinfo.num_tqps;
 	ret = hns3_modify_tqp_num(netdev, new_tqp_num);

From 0d3e6631de9a56a56e06435f3a2c6196014a059d Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:37:01 +0800
Subject: [PATCH 0802/1678] net: hns3: refactor the get/put_vector function

There is a get_vector function, which allocate the vectors
for a client, but there is not a put_vector to free the
vector.

This patch introduces the put_vector function in order to
fix the coalesce configuration lost problem during reset
process.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  3 ++
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   |  4 +++
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 28 ++++++++++++++-----
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 12 ++++++--
 4 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index fd06bc78c58e..a07204837a9b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -265,6 +265,8 @@ struct hnae3_ae_dev {
  *   Get tc size of handle
  * get_vector()
  *   Get vector number and vector information
+ * put_vector()
+ *   Put the vector in hdev
  * map_ring_to_vector()
  *   Map rings to vector
  * unmap_ring_from_vector()
@@ -375,6 +377,7 @@ struct hnae3_ae_ops {
 
 	int (*get_vector)(struct hnae3_handle *handle, u16 vector_num,
 			  struct hnae3_vector_info *vector_info);
+	int (*put_vector)(struct hnae3_handle *handle, int vector_num);
 	int (*map_ring_to_vector)(struct hnae3_handle *handle,
 				  int vector_num,
 				  struct hnae3_ring_chain_node *vr_chain);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index e0ab161d809e..bbaa21f67d36 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2721,6 +2721,10 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		if (ret)
 			return ret;
 
+		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+		if (ret)
+			return ret;
+
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 109155de86ee..a31877302fc2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2969,6 +2969,24 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
 	return -EINVAL;
 }
 
+static int hclge_put_vector(struct hnae3_handle *handle, int vector)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int vector_id;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&hdev->pdev->dev,
+			"Get vector index fail. vector_id =%d\n", vector_id);
+		return vector_id;
+	}
+
+	hclge_free_vector(hdev, vector_id);
+
+	return 0;
+}
+
 static u32 hclge_get_rss_key_size(struct hnae3_handle *handle)
 {
 	return HCLGE_RSS_KEY_SIZE;
@@ -3523,18 +3541,13 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
 	}
 
 	ret = hclge_bind_ring_with_vector(vport, vector_id, false, ring_chain);
-	if (ret) {
+	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vectorid=%d, ret =%d\n",
 			vector_id,
 			ret);
-		return ret;
-	}
 
-	/* Free this MSIX or MSI vector */
-	hclge_free_vector(hdev, vector_id);
-
-	return 0;
+	return ret;
 }
 
 int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
@@ -5994,6 +6007,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.map_ring_to_vector = hclge_map_ring_to_vector,
 	.unmap_ring_from_vector = hclge_unmap_ring_frm_vector,
 	.get_vector = hclge_get_vector,
+	.put_vector = hclge_put_vector,
 	.set_promisc_mode = hclge_set_promisc_mode,
 	.set_loopback = hclge_set_loopback,
 	.start = hclge_ae_start,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index c9a916a3d6e8..09a2a66975a6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -627,13 +627,18 @@ static int hclgevf_unmap_ring_from_vector(
 	}
 
 	ret = hclgevf_bind_ring_to_vector(handle, false, vector, ring_chain);
-	if (ret) {
+	if (ret)
 		dev_err(&handle->pdev->dev,
 			"Unmap ring from vector fail. vector=%d, ret =%d\n",
 			vector_id,
 			ret);
-		return ret;
-	}
+
+	return ret;
+}
+
+static int hclgevf_put_vector(struct hnae3_handle *handle, int vector)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
 	hclgevf_free_vector(hdev, vector);
 
@@ -1466,6 +1471,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.map_ring_to_vector = hclgevf_map_ring_to_vector,
 	.unmap_ring_from_vector = hclgevf_unmap_ring_from_vector,
 	.get_vector = hclgevf_get_vector,
+	.put_vector = hclgevf_put_vector,
 	.reset_queue = hclgevf_reset_tqp,
 	.set_promisc_mode = hclgevf_set_promisc_mode,
 	.get_mac_addr = hclgevf_get_mac_addr,

From dd38c72604dc8c49e6057010675651f3567dd3bf Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:37:02 +0800
Subject: [PATCH 0803/1678] net: hns3: fix for coalesce configuration lost
 during reset

Coalesce configuration will be set to default value by
hns3_nic_init_vector_data during reset, which causes the
coalesce configuration loss problem.

This patch fixes it by setting the default value in
hns3_nic_alloc_vector_data, which will not be called in the
reset process.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 178 ++++++++++++------
 1 file changed, 125 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index bbaa21f67d36..07dbebbdcb1f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -211,17 +211,23 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
 	tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
 	tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
 
+	/* Default: disable RL */
+	h->kinfo.int_rl_setting = 0;
+
+	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+}
+
+static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector,
+				      struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+
 	hns3_set_vector_coalesce_tx_gl(tqp_vector,
 				       tqp_vector->tx_group.int_gl);
 	hns3_set_vector_coalesce_rx_gl(tqp_vector,
 				       tqp_vector->rx_group.int_gl);
-
-	/* Default: disable RL */
-	h->kinfo.int_rl_setting = 0;
 	hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
-
-	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
-	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
 }
 
 static int hns3_nic_set_real_num_queue(struct net_device *netdev)
@@ -2623,6 +2629,65 @@ static void hns3_add_ring_to_group(struct hns3_enet_ring_group *group,
 static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_ring_chain_node vector_ring_chain;
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int ret = 0;
+	u16 i;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+		hns3_vector_gl_rl_init_hw(tqp_vector, priv);
+		tqp_vector->num_tqps = 0;
+	}
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		u16 vector_i = i % priv->vector_num;
+		u16 tqp_num = h->kinfo.num_tqps;
+
+		tqp_vector = &priv->tqp_vector[vector_i];
+
+		hns3_add_ring_to_group(&tqp_vector->tx_group,
+				       priv->ring_data[i].ring);
+
+		hns3_add_ring_to_group(&tqp_vector->rx_group,
+				       priv->ring_data[i + tqp_num].ring);
+
+		priv->ring_data[i].ring->tqp_vector = tqp_vector;
+		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+		tqp_vector->num_tqps++;
+	}
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+
+		tqp_vector->rx_group.total_bytes = 0;
+		tqp_vector->rx_group.total_packets = 0;
+		tqp_vector->tx_group.total_bytes = 0;
+		tqp_vector->tx_group.total_packets = 0;
+		tqp_vector->handle = h;
+
+		ret = hns3_get_vector_ring_chain(tqp_vector,
+						 &vector_ring_chain);
+		if (ret)
+			return ret;
+
+		ret = h->ae_algo->ops->map_ring_to_vector(h,
+			tqp_vector->vector_irq, &vector_ring_chain);
+
+		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
+
+		if (ret)
+			return ret;
+
+		netif_napi_add(priv->netdev, &tqp_vector->napi,
+			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+}
+
+static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
+{
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	struct hnae3_vector_info *vector;
@@ -2646,53 +2711,17 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
 		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
 			     GFP_KERNEL);
-	if (!priv->tqp_vector)
-		return -ENOMEM;
-
-	for (i = 0; i < tqp_num; i++) {
-		u16 vector_i = i % vector_num;
-
-		tqp_vector = &priv->tqp_vector[vector_i];
-
-		hns3_add_ring_to_group(&tqp_vector->tx_group,
-				       priv->ring_data[i].ring);
-
-		hns3_add_ring_to_group(&tqp_vector->rx_group,
-				       priv->ring_data[i + tqp_num].ring);
-
-		tqp_vector->idx = vector_i;
-		tqp_vector->mask_addr = vector[vector_i].io_addr;
-		tqp_vector->vector_irq = vector[vector_i].vector;
-		tqp_vector->num_tqps++;
-
-		priv->ring_data[i].ring->tqp_vector = tqp_vector;
-		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+	if (!priv->tqp_vector) {
+		ret = -ENOMEM;
+		goto out;
 	}
 
-	for (i = 0; i < vector_num; i++) {
+	for (i = 0; i < priv->vector_num; i++) {
 		tqp_vector = &priv->tqp_vector[i];
-
-		tqp_vector->rx_group.total_bytes = 0;
-		tqp_vector->rx_group.total_packets = 0;
-		tqp_vector->tx_group.total_bytes = 0;
-		tqp_vector->tx_group.total_packets = 0;
+		tqp_vector->idx = i;
+		tqp_vector->mask_addr = vector[i].io_addr;
+		tqp_vector->vector_irq = vector[i].vector;
 		hns3_vector_gl_rl_init(tqp_vector, priv);
-		tqp_vector->handle = h;
-
-		ret = hns3_get_vector_ring_chain(tqp_vector,
-						 &vector_ring_chain);
-		if (ret)
-			goto out;
-
-		ret = h->ae_algo->ops->map_ring_to_vector(h,
-			tqp_vector->vector_irq, &vector_ring_chain);
-		if (ret)
-			goto out;
-
-		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
-
-		netif_napi_add(priv->netdev, &tqp_vector->napi,
-			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
 	}
 
 out:
@@ -2700,12 +2729,17 @@ out:
 	return ret;
 }
 
+static void hns3_clear_ring_group(struct hns3_enet_ring_group *group)
+{
+	group->ring = NULL;
+	group->count = 0;
+}
+
 static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
-	struct pci_dev *pdev = h->pdev;
 	int i, ret;
 
 	for (i = 0; i < priv->vector_num; i++) {
@@ -2736,12 +2770,30 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
 		}
 
 		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
-
+		hns3_clear_ring_group(&tqp_vector->rx_group);
+		hns3_clear_ring_group(&tqp_vector->tx_group);
 		netif_napi_del(&priv->tqp_vector[i].napi);
 	}
 
-	devm_kfree(&pdev->dev, priv->tqp_vector);
+	return 0;
+}
 
+static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		struct hns3_enet_tqp_vector *tqp_vector;
+
+		tqp_vector = &priv->tqp_vector[i];
+		ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
+		if (ret)
+			return ret;
+	}
+
+	devm_kfree(&pdev->dev, priv->tqp_vector);
 	return 0;
 }
 
@@ -3057,6 +3109,12 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_get_ring_cfg;
 	}
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_alloc_vector_data;
+	}
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret) {
 		ret = -ENOMEM;
@@ -3085,8 +3143,10 @@ static int hns3_client_init(struct hnae3_handle *handle)
 out_reg_netdev_fail:
 out_init_ring_data:
 	(void)hns3_nic_uninit_vector_data(priv);
-	priv->ring_data = NULL;
 out_init_vector_data:
+	hns3_nic_dealloc_vector_data(priv);
+out_alloc_vector_data:
+	priv->ring_data = NULL;
 out_get_ring_cfg:
 	priv->ae_handle = NULL;
 	free_netdev(netdev);
@@ -3106,6 +3166,10 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 	if (ret)
 		netdev_err(netdev, "uninit vector error\n");
 
+	ret = hns3_nic_dealloc_vector_data(priv);
+	if (ret)
+		netdev_err(netdev, "dealloc vector error\n");
+
 	ret = hns3_uninit_all_ring(priv);
 	if (ret)
 		netdev_err(netdev, "uninit ring error\n");
@@ -3363,6 +3427,10 @@ static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
 	if (ret)
 		return ret;
 
+	ret = hns3_nic_alloc_vector_data(priv);
+	if (ret)
+		goto err_alloc_vector;
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret)
 		goto err_uninit_vector;
@@ -3377,6 +3445,8 @@ err_put_ring:
 	hns3_put_ring_config(priv);
 err_uninit_vector:
 	hns3_nic_uninit_vector_data(priv);
+err_alloc_vector:
+	hns3_nic_dealloc_vector_data(priv);
 	return ret;
 }
 
@@ -3424,6 +3494,8 @@ int hns3_set_channels(struct net_device *netdev,
 		goto open_netdev;
 	}
 
+	hns3_nic_dealloc_vector_data(priv);
+
 	hns3_uninit_all_ring(priv);
 	hns3_put_ring_config(priv);
 

From 9bc727a9d5e5c47015db118db407cbada1081a1e Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:37:03 +0800
Subject: [PATCH 0804/1678] net: hns3: refactor the coalesce related struct

This patch refoctors the coalesce related struct by introducing
the hns3_enet_coalesce struct, in order to fix the coalesce
configuation lost problem when changing the channel number.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 46 +++++++++----------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   | 10 ++--
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    | 26 +++++++----
 3 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 07dbebbdcb1f..f466f608a47d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -168,8 +168,8 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
 	 */
 
-	if (rl_reg > 0 && !tqp_vector->tx_group.gl_adapt_enable &&
-	    !tqp_vector->rx_group.gl_adapt_enable)
+	if (rl_reg > 0 && !tqp_vector->tx_group.coal.gl_adapt_enable &&
+	    !tqp_vector->rx_group.coal.gl_adapt_enable)
 		/* According to the hardware, the range of rl_reg is
 		 * 0-59 and the unit is 4.
 		 */
@@ -205,17 +205,17 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
 	 */
 
 	/* Default: enable interrupt coalescing self-adaptive and GL */
-	tqp_vector->tx_group.gl_adapt_enable = 1;
-	tqp_vector->rx_group.gl_adapt_enable = 1;
+	tqp_vector->tx_group.coal.gl_adapt_enable = 1;
+	tqp_vector->rx_group.coal.gl_adapt_enable = 1;
 
-	tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
-	tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
+	tqp_vector->tx_group.coal.int_gl = HNS3_INT_GL_50K;
+	tqp_vector->rx_group.coal.int_gl = HNS3_INT_GL_50K;
 
 	/* Default: disable RL */
 	h->kinfo.int_rl_setting = 0;
 
-	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
-	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
 }
 
 static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector,
@@ -224,9 +224,9 @@ static void hns3_vector_gl_rl_init_hw(struct hns3_enet_tqp_vector *tqp_vector,
 	struct hnae3_handle *h = priv->ae_handle;
 
 	hns3_set_vector_coalesce_tx_gl(tqp_vector,
-				       tqp_vector->tx_group.int_gl);
+				       tqp_vector->tx_group.coal.int_gl);
 	hns3_set_vector_coalesce_rx_gl(tqp_vector,
-				       tqp_vector->rx_group.int_gl);
+				       tqp_vector->rx_group.coal.int_gl);
 	hns3_set_vector_coalesce_rl(tqp_vector, h->kinfo.int_rl_setting);
 }
 
@@ -2393,12 +2393,12 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 	u16 new_int_gl;
 	int usecs;
 
-	if (!ring_group->int_gl)
+	if (!ring_group->coal.int_gl)
 		return false;
 
 	if (ring_group->total_packets == 0) {
-		ring_group->int_gl = HNS3_INT_GL_50K;
-		ring_group->flow_level = HNS3_FLOW_LOW;
+		ring_group->coal.int_gl = HNS3_INT_GL_50K;
+		ring_group->coal.flow_level = HNS3_FLOW_LOW;
 		return true;
 	}
 
@@ -2408,10 +2408,10 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 	 * 20-1249MB/s high      (18000 ints/s)
 	 * > 40000pps  ultra     (8000 ints/s)
 	 */
-	new_flow_level = ring_group->flow_level;
-	new_int_gl = ring_group->int_gl;
+	new_flow_level = ring_group->coal.flow_level;
+	new_int_gl = ring_group->coal.int_gl;
 	tqp_vector = ring_group->ring->tqp_vector;
-	usecs = (ring_group->int_gl << 1);
+	usecs = (ring_group->coal.int_gl << 1);
 	bytes_per_usecs = ring_group->total_bytes / usecs;
 	/* 1000000 microseconds */
 	packets_per_secs = ring_group->total_packets * 1000000 / usecs;
@@ -2458,9 +2458,9 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 
 	ring_group->total_bytes = 0;
 	ring_group->total_packets = 0;
-	ring_group->flow_level = new_flow_level;
-	if (new_int_gl != ring_group->int_gl) {
-		ring_group->int_gl = new_int_gl;
+	ring_group->coal.flow_level = new_flow_level;
+	if (new_int_gl != ring_group->coal.int_gl) {
+		ring_group->coal.int_gl = new_int_gl;
 		return true;
 	}
 	return false;
@@ -2472,18 +2472,18 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
 	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
 	bool rx_update, tx_update;
 
-	if (rx_group->gl_adapt_enable) {
+	if (rx_group->coal.gl_adapt_enable) {
 		rx_update = hns3_get_new_int_gl(rx_group);
 		if (rx_update)
 			hns3_set_vector_coalesce_rx_gl(tqp_vector,
-						       rx_group->int_gl);
+						       rx_group->coal.int_gl);
 	}
 
-	if (tx_group->gl_adapt_enable) {
+	if (tx_group->coal.gl_adapt_enable) {
 		tx_update = hns3_get_new_int_gl(&tqp_vector->tx_group);
 		if (tx_update)
 			hns3_set_vector_coalesce_tx_gl(tqp_vector,
-						       tx_group->int_gl);
+						       tx_group->coal.int_gl);
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 213f501b30bb..a5f4550483d9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -460,15 +460,19 @@ enum hns3_link_mode_bits {
 #define HNS3_INT_RL_MAX			0x00EC
 #define HNS3_INT_RL_ENABLE_MASK		0x40
 
+struct hns3_enet_coalesce {
+	u16 int_gl;
+	u8 gl_adapt_enable;
+	enum hns3_flow_level_range flow_level;
+};
+
 struct hns3_enet_ring_group {
 	/* array of pointers to rings */
 	struct hns3_enet_ring *ring;
 	u64 total_bytes;	/* total bytes processed this group */
 	u64 total_packets;	/* total packets processed this group */
 	u16 count;
-	enum hns3_flow_level_range flow_level;
-	u16 int_gl;
-	u8 gl_adapt_enable;
+	struct hns3_enet_coalesce coal;
 };
 
 struct hns3_enet_tqp_vector {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index b034c7f24eda..26274bca8b3d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -905,11 +905,13 @@ static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
 	tx_vector = priv->ring_data[queue].ring->tqp_vector;
 	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-	cmd->use_adaptive_tx_coalesce = tx_vector->tx_group.gl_adapt_enable;
-	cmd->use_adaptive_rx_coalesce = rx_vector->rx_group.gl_adapt_enable;
+	cmd->use_adaptive_tx_coalesce =
+			tx_vector->tx_group.coal.gl_adapt_enable;
+	cmd->use_adaptive_rx_coalesce =
+			rx_vector->rx_group.coal.gl_adapt_enable;
 
-	cmd->tx_coalesce_usecs = tx_vector->tx_group.int_gl;
-	cmd->rx_coalesce_usecs = rx_vector->rx_group.int_gl;
+	cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl;
+	cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl;
 
 	cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 	cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting;
@@ -1029,14 +1031,18 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
 	tx_vector = priv->ring_data[queue].ring->tqp_vector;
 	rx_vector = priv->ring_data[queue_num + queue].ring->tqp_vector;
 
-	tx_vector->tx_group.gl_adapt_enable = cmd->use_adaptive_tx_coalesce;
-	rx_vector->rx_group.gl_adapt_enable = cmd->use_adaptive_rx_coalesce;
+	tx_vector->tx_group.coal.gl_adapt_enable =
+				cmd->use_adaptive_tx_coalesce;
+	rx_vector->rx_group.coal.gl_adapt_enable =
+				cmd->use_adaptive_rx_coalesce;
 
-	tx_vector->tx_group.int_gl = cmd->tx_coalesce_usecs;
-	rx_vector->rx_group.int_gl = cmd->rx_coalesce_usecs;
+	tx_vector->tx_group.coal.int_gl = cmd->tx_coalesce_usecs;
+	rx_vector->rx_group.coal.int_gl = cmd->rx_coalesce_usecs;
 
-	hns3_set_vector_coalesce_tx_gl(tx_vector, tx_vector->tx_group.int_gl);
-	hns3_set_vector_coalesce_rx_gl(rx_vector, rx_vector->rx_group.int_gl);
+	hns3_set_vector_coalesce_tx_gl(tx_vector,
+				       tx_vector->tx_group.coal.int_gl);
+	hns3_set_vector_coalesce_rx_gl(rx_vector,
+				       rx_vector->rx_group.coal.int_gl);
 
 	hns3_set_vector_coalesce_rl(tx_vector, h->kinfo.int_rl_setting);
 	hns3_set_vector_coalesce_rl(rx_vector, h->kinfo.int_rl_setting);

From 7a242b232a6435a6f7ca3ce71efc56a7b3c451ba Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 9 Mar 2018 10:37:04 +0800
Subject: [PATCH 0805/1678] net: hns3: fix for coal configuation lost when
 setting the channel

This patch fixes the coalesce configuation lost problem when
setting the channel number by restoring all vectors's coalesce
configuation to vector 0's, because all vectors belonging to
the same netdev have the same coalesce configuation for now.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 37 +++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f466f608a47d..0a8c427b407e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3413,7 +3413,24 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
 	return ret;
 }
 
-static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
+static void hns3_restore_coal(struct hns3_nic_priv *priv,
+			      struct hns3_enet_coalesce *tx,
+			      struct hns3_enet_coalesce *rx)
+{
+	u16 vector_num = priv->vector_num;
+	int i;
+
+	for (i = 0; i < vector_num; i++) {
+		memcpy(&priv->tqp_vector[i].tx_group.coal, tx,
+		       sizeof(struct hns3_enet_coalesce));
+		memcpy(&priv->tqp_vector[i].rx_group.coal, rx,
+		       sizeof(struct hns3_enet_coalesce));
+	}
+}
+
+static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num,
+			       struct hns3_enet_coalesce *tx,
+			       struct hns3_enet_coalesce *rx)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -3431,6 +3448,8 @@ static int hns3_modify_tqp_num(struct net_device *netdev, u16 new_tqp_num)
 	if (ret)
 		goto err_alloc_vector;
 
+	hns3_restore_coal(priv, tx, rx);
+
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret)
 		goto err_uninit_vector;
@@ -3461,6 +3480,7 @@ int hns3_set_channels(struct net_device *netdev,
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	struct hns3_enet_coalesce tx_coal, rx_coal;
 	bool if_running = netif_running(netdev);
 	u32 new_tqp_num = ch->combined_count;
 	u16 org_tqp_num;
@@ -3494,15 +3514,26 @@ int hns3_set_channels(struct net_device *netdev,
 		goto open_netdev;
 	}
 
+	/* Changing the tqp num may also change the vector num,
+	 * ethtool only support setting and querying one coal
+	 * configuation for now, so save the vector 0' coal
+	 * configuation here in order to restore it.
+	 */
+	memcpy(&tx_coal, &priv->tqp_vector[0].tx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+	memcpy(&rx_coal, &priv->tqp_vector[0].rx_group.coal,
+	       sizeof(struct hns3_enet_coalesce));
+
 	hns3_nic_dealloc_vector_data(priv);
 
 	hns3_uninit_all_ring(priv);
 	hns3_put_ring_config(priv);
 
 	org_tqp_num = h->kinfo.num_tqps;
-	ret = hns3_modify_tqp_num(netdev, new_tqp_num);
+	ret = hns3_modify_tqp_num(netdev, new_tqp_num, &tx_coal, &rx_coal);
 	if (ret) {
-		ret = hns3_modify_tqp_num(netdev, org_tqp_num);
+		ret = hns3_modify_tqp_num(netdev, org_tqp_num,
+					  &tx_coal, &rx_coal);
 		if (ret) {
 			/* If revert to old tqp failed, fatal error occurred */
 			dev_err(&netdev->dev,

From ff3c1b1a817d72e092fded369596dbd84e719ab8 Mon Sep 17 00:00:00 2001
From: Vaibhav Murkute <vaibhavmurkute88@gmail.com>
Date: Fri, 9 Mar 2018 08:26:03 +0530
Subject: [PATCH 0806/1678] drivers: vhost: vsock: fixed a brace coding style
 issue

Fixed a coding style issue.

Signed-off-by: Vaibhav Murkute <vaibhavmurkute88@gmail.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0d14e2ff19f1..0898dbdbf955 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -61,9 +61,9 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
 		if (other_cid == 0)
 			continue;
 
-		if (other_cid == guest_cid) {
+		if (other_cid == guest_cid)
 			return vsock;
-		}
+
 	}
 
 	return NULL;

From 35951393bbffa1ce351438bee264b48347c8cbb7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 8 Mar 2018 23:43:40 -0600
Subject: [PATCH 0807/1678] pktgen: Remove VLA usage

In preparation to enabling -Wvla, remove VLA usage and replace it
with a fixed-length array instead.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d81bddd1bb80..e2d6ae3b290b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -907,7 +907,7 @@ static ssize_t pktgen_if_write(struct file *file,
 
 	if (debug) {
 		size_t copy = min_t(size_t, count, 1023);
-		char tb[copy + 1];
+		char tb[1024];
 		if (copy_from_user(tb, user_buffer, copy))
 			return -EFAULT;
 		tb[copy] = 0;

From d185efc1da089b968dfe1825329c51820d661755 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 9 Mar 2018 13:00:52 +0530
Subject: [PATCH 0808/1678] cxgb4: increase max tx rate limit to 100 Gbps

T6 cards can support up to 100 G speeds. So, increase
max programmable tx rate limit to 100 Gbps.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 6 +++---
 drivers/net/ethernet/chelsio/cxgb4/sched.h      | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 7b452e85de2a..1b44652e4966 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2870,11 +2870,11 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	/* Convert from Mbps to Kbps */
 	req_rate = rate << 10;
 
-	/* Max rate is 10 Gbps */
+	/* Max rate is 100 Gbps */
 	if (req_rate >= SCHED_MAX_RATE_KBPS) {
 		dev_err(adap->pdev_dev,
-			"Invalid rate %u Mbps, Max rate is %u Gbps\n",
-			rate, SCHED_MAX_RATE_KBPS);
+			"Invalid rate %u Mbps, Max rate is %u Mbps\n",
+			rate, SCHED_MAX_RATE_KBPS >> 10);
 		return -ERANGE;
 	}
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
index 77b2b3fd9021..3a49e00a38a1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -42,8 +42,8 @@
 
 #define FW_SCHED_CLS_NONE 0xffffffff
 
-/* Max rate that can be set to a scheduling class is 10 Gbps */
-#define SCHED_MAX_RATE_KBPS 10000000U
+/* Max rate that can be set to a scheduling class is 100 Gbps */
+#define SCHED_MAX_RATE_KBPS 100000000U
 
 enum {
 	SCHED_STATE_ACTIVE,

From f5426250a6ecfd1e9b2d5e0daf07565f664aa67d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 9 Mar 2018 10:39:24 +0100
Subject: [PATCH 0809/1678] net: introduce IFF_NO_RX_HANDLER

Some network devices - notably ipvlan slave - are not compatible with
any kind of rx_handler. Currently the hook can be installed but any
configuration (bridge, bond, macsec, ...) is nonfunctional.

This change allocates a priv_flag bit to mark such devices and explicitly
forbid installing a rx_handler if such bit is set. The new bit is used
by ipvlan slave device.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 2 ++
 include/linux/netdevice.h        | 3 +++
 net/core/dev.c                   | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 23fd5ab180e8..743d37fb034a 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -604,6 +604,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	 */
 	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
 
+	dev->priv_flags |= IFF_NO_RX_HANDLER;
+
 	err = register_netdevice(dev);
 	if (err < 0)
 		return err;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9711108c3916..5fbb9f1da7fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1397,6 +1397,7 @@ struct net_device_ops {
  * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
  *	entity (i.e. the master device for bridged veth)
  * @IFF_MACSEC: device is a MACsec device
+ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1425,6 +1426,7 @@ enum netdev_priv_flags {
 	IFF_RXFH_CONFIGURED		= 1<<23,
 	IFF_PHONY_HEADROOM		= 1<<24,
 	IFF_MACSEC			= 1<<25,
+	IFF_NO_RX_HANDLER		= 1<<26,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1452,6 +1454,7 @@ enum netdev_priv_flags {
 #define IFF_TEAM			IFF_TEAM
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
 #define IFF_MACSEC			IFF_MACSEC
+#define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
 
 /**
  *	struct net_device - The DEVICE structure.
diff --git a/net/core/dev.c b/net/core/dev.c
index e5b8d42b6410..259abb1515d0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4351,6 +4351,9 @@ int netdev_rx_handler_register(struct net_device *dev,
 	if (netdev_is_rx_handler_busy(dev))
 		return -EBUSY;
 
+	if (dev->priv_flags & IFF_NO_RX_HANDLER)
+		return -EINVAL;
+
 	/* Note: rx_handler_data must be set before rx_handler */
 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);

From ff5caa7a28c2c6dc1776eef36e8c9fbadfc53c1d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:52 +0100
Subject: [PATCH 0810/1678] s390/qeth: use __ipa_cmd() for casting an IPA cmd
 buffer

"s390/qeth: fix SETIP command handling" introduced a new helper, apply
it driver-wide.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 24 +++++++++++-------------
 drivers/s390/net/qeth_l2_main.c   | 14 ++++++--------
 drivers/s390/net/qeth_l3_main.c   | 14 +++++++-------
 3 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index c8b308cfabf1..d4cb0eea06b6 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2951,12 +2951,10 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 		enum qeth_ipa_cmds ipacmd, enum qeth_prot_versions prot)
 {
 	struct qeth_cmd_buffer *iob;
-	struct qeth_ipa_cmd *cmd;
 
 	iob = qeth_get_buffer(&card->write);
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-		qeth_fill_ipacmd_header(card, cmd, ipacmd, prot);
+		qeth_fill_ipacmd_header(card, __ipa_cmd(iob), ipacmd, prot);
 	} else {
 		dev_warn(&card->gdev->dev,
 			 "The qeth driver ran out of channel command buffers\n");
@@ -3067,7 +3065,7 @@ static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
 				     QETH_PROT_IPV4);
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+		cmd = __ipa_cmd(iob);
 		cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
 		cmd->data.setadapterparms.hdr.command_code = command;
 		cmd->data.setadapterparms.hdr.used_total = 1;
@@ -3209,7 +3207,7 @@ static int qeth_query_setdiagass(struct qeth_card *card)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 16;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
 	return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
@@ -3262,7 +3260,7 @@ int qeth_hw_trap(struct qeth_card *card, enum qeth_diags_trap_action action)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 80;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
 	cmd->data.diagass.type = 1;
@@ -4240,7 +4238,7 @@ void qeth_setadp_promisc_mode(struct qeth_card *card)
 			sizeof(struct qeth_ipacmd_setadpparms_hdr) + 8);
 	if (!iob)
 		return;
-	cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setadapterparms.data.mode = mode;
 	qeth_send_ipa_cmd(card, iob, qeth_setadp_promisc_mode_cb, NULL);
 }
@@ -4307,7 +4305,7 @@ int qeth_setadpparms_change_macaddr(struct qeth_card *card)
 				   sizeof(struct qeth_change_addr));
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setadapterparms.data.change_addr.cmd = CHANGE_ADDR_READ_MAC;
 	cmd->data.setadapterparms.data.change_addr.addr_size = ETH_ALEN;
 	ether_addr_copy(cmd->data.setadapterparms.data.change_addr.addr,
@@ -4422,7 +4420,7 @@ static int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
 				   sizeof(struct qeth_set_access_ctrl));
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl;
 	access_ctrl_req->subcmd_code = isolation;
 
@@ -4668,7 +4666,7 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 		rc = -ENOMEM;
 		goto out;
 	}
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
 	rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
 				    qeth_snmp_command_cb, (void *)&qinfo);
@@ -4753,7 +4751,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
 		rc = -ENOMEM;
 		goto out_free;
 	}
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	oat_req = &cmd->data.setadapterparms.data.query_oat;
 	oat_req->subcmd_code = oat_data.command;
 
@@ -5494,7 +5492,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETASSPARMS, prot);
 
 	if (iob) {
-		cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+		cmd = __ipa_cmd(iob);
 		cmd->data.setassparms.hdr.assist_no = ipa_func;
 		cmd->data.setassparms.hdr.length = 8 + len;
 		cmd->data.setassparms.hdr.command_code = cmd_code;
@@ -5517,7 +5515,7 @@ int qeth_send_setassparms(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "sendassp");
 
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	if (len <= sizeof(__u32))
 		cmd->data.setassparms.data.flags_32bit = (__u32) data;
 	else   /* (len > sizeof(__u32)) */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7f236440483f..1cd583b69eaa 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -108,7 +108,7 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setdelmac.mac_length = ETH_ALEN;
 	ether_addr_copy(cmd->data.setdelmac.mac, mac);
 	return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
@@ -305,7 +305,7 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setdelvlan.vlan_id = i;
 	return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
 					    qeth_l2_send_setdelvlan_cb, NULL));
@@ -1374,7 +1374,6 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 {
 	struct qeth_cmd_buffer *iob;
 	struct qeth_card *card;
-	int rc;
 
 	if (!dev)
 		return -ENODEV;
@@ -1385,9 +1384,8 @@ int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
 	if (!qeth_card_hw_is_reachable(card))
 		return -ENODEV;
 	iob = qeth_wait_for_buffer(&card->write);
-	memcpy(iob->data+IPA_PDU_HEADER_SIZE, data, data_len);
-	rc = qeth_osn_send_ipa_cmd(card, iob, data_len);
-	return rc;
+	memcpy(__ipa_cmd(iob), data, data_len);
+	return qeth_osn_send_ipa_cmd(card, iob, data_len);
 }
 EXPORT_SYMBOL(qeth_osn_assist);
 
@@ -1764,7 +1762,7 @@ static struct qeth_cmd_buffer *qeth_sbp_build_cmd(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
 	if (!iob)
 		return iob;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
 				      cmd_length;
 	cmd->data.sbp.hdr.command_code = sbp_cmd;
@@ -2129,7 +2127,7 @@ static int qeth_l2_vnicc_request(struct qeth_card *card,
 		return -ENOMEM;
 
 	/* create header for request */
-	cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	req = &cmd->data.vnicc;
 
 	/* create sub command header for request */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 962a04b68dd2..e5f11cd64fec 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -352,7 +352,7 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	ether_addr_copy(cmd->data.setdelipm.mac, addr->mac);
 	if (addr->proto == QETH_PROT_IPV6)
 		memcpy(cmd->data.setdelipm.ip6, &addr->u.a6.addr,
@@ -393,7 +393,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	if (addr->proto == QETH_PROT_IPV6) {
 		memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
 		       sizeof(struct in6_addr));
@@ -423,7 +423,7 @@ static int qeth_l3_send_setrouting(struct qeth_card *card,
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETRTG, prot);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setrtg.type = (type);
 	rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
 
@@ -1072,7 +1072,7 @@ static int qeth_l3_iqd_read_initial_mac(struct qeth_card *card)
 				     QETH_PROT_IPV6);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
 			card->info.unique_id;
 
@@ -1117,7 +1117,7 @@ static int qeth_l3_get_unique_id(struct qeth_card *card)
 				     QETH_PROT_IPV6);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	*((__u16 *) &cmd->data.create_destroy_addr.unique_id[6]) =
 			card->info.unique_id;
 
@@ -1193,7 +1193,7 @@ qeth_diags_trace(struct qeth_card *card, enum qeth_diags_trace_cmds diags_cmd)
 	iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.diagass.subcmd_len = 16;
 	cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRACE;
 	cmd->data.diagass.type = QETH_DIAGS_TYPE_HIPERSOCKET;
@@ -2004,7 +2004,7 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 				       prot);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd = __ipa_cmd(iob);
 	cmd->data.setassparms.data.query_arp.request_bits = 0x000F;
 	cmd->data.setassparms.data.query_arp.reply_bits = 0;
 	cmd->data.setassparms.data.query_arp.no_entries = 0;

From d857e11193a24d6623bb562e9b26cde582bd877f Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:53 +0100
Subject: [PATCH 0811/1678] s390/qeth: remove outdated portname debug msg

The 'portname' attribute is deprecated and setting it has no effect.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index d4cb0eea06b6..d884b399f92a 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -708,11 +708,8 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
 	QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
 	if ((buffer[2] & 0xc0) == 0xc0) {
-		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE "
-			   "with cause code 0x%02x%s\n",
-			   buffer[4],
-			   ((buffer[4] == 0x22) ?
-			    " -- try another portname" : ""));
+		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+				 buffer[4]);
 		QETH_CARD_TEXT(card, 2, "ckidxres");
 		QETH_CARD_TEXT(card, 2, " idxterm");
 		QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);

From 0f34294527ed17fd0966142d6b5e754ba97f65b5 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:54 +0100
Subject: [PATCH 0812/1678] s390/qeth: support SG for more device types

NETIF_F_SG support is currently limited to OSA (and for L2 even OSD)
devices. Advertise it for some more device types (OSM, L2 OSX, z/VM OSA)
that share the same code paths. For now, keep it switched off by
default on these devices.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 16 +++++++++-------
 drivers/s390/net/qeth_l3_main.c | 14 ++++++++------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 1cd583b69eaa..ea607e5fb5dc 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -991,9 +991,16 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		card->dev->features |= NETIF_F_VLAN_CHALLENGED;
 	else
 		card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (card->info.type != QETH_CARD_TYPE_OSN &&
+	    card->info.type != QETH_CARD_TYPE_IQD) {
+		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+		card->dev->needed_headroom = sizeof(struct qeth_hdr);
+		card->dev->hw_features |= NETIF_F_SG;
+		card->dev->vlan_features |= NETIF_F_SG;
+	}
+
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
-		card->dev->hw_features = NETIF_F_SG;
-		card->dev->vlan_features = NETIF_F_SG;
 		card->dev->features |= NETIF_F_SG;
 		/* OSA 3S and earlier has no RX/TX support */
 		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
@@ -1005,11 +1012,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 			card->dev->vlan_features |= NETIF_F_RXCSUM;
 		}
 	}
-	if (card->info.type != QETH_CARD_TYPE_OSN &&
-	    card->info.type != QETH_CARD_TYPE_IQD) {
-		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-		card->dev->needed_headroom = sizeof(struct qeth_hdr);
-	}
 
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e5f11cd64fec..e933a273d543 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2785,14 +2785,16 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 			if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
 				card->dev->dev_id = card->info.unique_id &
 							 0xffff;
+
+			card->dev->hw_features |= NETIF_F_SG;
+			card->dev->vlan_features |= NETIF_F_SG;
+
 			if (!card->info.guestlan) {
-				card->dev->hw_features = NETIF_F_SG |
-					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-					NETIF_F_TSO;
-				card->dev->vlan_features = NETIF_F_SG |
-					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
-					NETIF_F_TSO;
 				card->dev->features |= NETIF_F_SG;
+				card->dev->hw_features |= NETIF_F_TSO |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
+				card->dev->vlan_features |= NETIF_F_TSO |
+					NETIF_F_RXCSUM | NETIF_F_IP_CSUM;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {

From d3aacac4e47477ce26fe474c216f90baadcb2604 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:55 +0100
Subject: [PATCH 0813/1678] s390/qeth: advertise IFF_UNICAST_FLT

qeth implements HW-based Unicast Filtering (via SETVMAC) on L2 devices.
Tell the stack, so it knows that receiving traffic for secondary
addresses doesn't require full-blown promiscuous mode.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index ea607e5fb5dc..c7bc80b3a979 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -975,6 +975,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 		return -ENODEV;
 
 	card->dev->ml_priv = card;
+	card->dev->priv_flags |= IFF_UNICAST_FLT;
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
 	card->dev->min_mtu = 64;

From f0ea8bfbc1bcd0ca32011c1335f633f00d9199cf Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:56 +0100
Subject: [PATCH 0814/1678] s390/qeth: pass correct length to
 header_ops->create()

We need to pass the *payload* length, not the L2 address length.
For qeth (using eth_header()) this is merely a cosmetic change:
the parameter only matters when building headers for ETH_P_802_2
or ETH_P_802_3, whereas our fake headers are built with
ETH_P_IP / ETH_P_IPV6 / 0.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e933a273d543..8e8818ea33e2 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1522,10 +1522,10 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 		if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
 			card->dev->header_ops->create(skb, card->dev, prot,
 				tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
-				card->dev->addr_len);
+				skb->len);
 		else
 			card->dev->header_ops->create(skb, card->dev, prot,
-				tg_addr, "FAKELL", card->dev->addr_len);
+				tg_addr, "FAKELL", skb->len);
 	}
 
 	skb->protocol = eth_type_trans(skb, card->dev);
@@ -1584,8 +1584,7 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 				skb->dev = card->dev;
 				len = skb->len;
 				card->dev->header_ops->create(skb, card->dev, 0,
-					card->dev->dev_addr, "FAKELL",
-					card->dev->addr_len);
+					card->dev->dev_addr, "FAKELL", len);
 				netif_receive_skb(skb);
 			} else {
 				qeth_l3_rebuild_skb(card, skb, hdr);

From 37cf05d2fff52a8ad1ce977c2a485e91fe793162 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:57 +0100
Subject: [PATCH 0815/1678] s390/qeth: allocate skb from NAPI cache

napi_alloc_skb() doesn't need to disable IRQs during the allocation,
and thus may save us a few cycles.
Doing so requires a small fix-up in the HiperTransport path, which
currently assumes a fixed NET_SKB_PAD headroom padding. napi_alloc_skb()
adds an additional NET_IP_ALIGN padding, so use the proper helper for
setting up the mac_header offset.

Use this opportunity to convert the non-NAPI path to netdev_alloc_skb(),
which means that skb->dev is now always set-up during allocation and
doesn't need to be assigned manually.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 5 +++--
 drivers/s390/net/qeth_l2_main.c   | 1 -
 drivers/s390/net/qeth_l3_main.c   | 4 +---
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index d884b399f92a..8f427621e656 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2835,7 +2835,8 @@ static int qeth_init_input_buffer(struct qeth_card *card,
 	int i;
 
 	if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
-		buf->rx_skb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
+		buf->rx_skb = netdev_alloc_skb(card->dev,
+					       QETH_RX_PULL_LEN + ETH_HLEN);
 		if (!buf->rx_skb)
 			return 1;
 	}
@@ -5325,7 +5326,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
 	} else {
 		unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
 
-		skb = dev_alloc_skb(linear + headroom);
+		skb = napi_alloc_skb(&card->napi, linear + headroom);
 	}
 	if (!skb)
 		goto no_mem;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index c7bc80b3a979..95bf452fc51b 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -437,7 +437,6 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 			*done = 1;
 			break;
 		}
-		skb->dev = card->dev;
 		switch (hdr->hdr.l2.id) {
 		case QETH_HEADER_TYPE_LAYER2:
 			skb->pkt_type = PACKET_HOST;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 8e8818ea33e2..283cbdcc5d6b 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1572,7 +1572,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			*done = 1;
 			break;
 		}
-		skb->dev = card->dev;
 		switch (hdr->hdr.l3.id) {
 		case QETH_HEADER_TYPE_LAYER3:
 			magic = *(__u16 *)skb->data;
@@ -1580,11 +1579,10 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			    (magic == ETH_P_AF_IUCV)) {
 				skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
 				skb->pkt_type = PACKET_HOST;
-				skb->mac_header = NET_SKB_PAD;
-				skb->dev = card->dev;
 				len = skb->len;
 				card->dev->header_ops->create(skb, card->dev, 0,
 					card->dev->dev_addr, "FAKELL", len);
+				skb_reset_mac_header(skb);
 				netif_receive_skb(skb);
 			} else {
 				qeth_l3_rebuild_skb(card, skb, hdr);

From 04f673983bdb9c117b4710b5bcc712fe63486c34 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:58 +0100
Subject: [PATCH 0816/1678] s390/qeth: reduce RX skb setup

Newly-allocated skbs default to PACKET_HOST, and eth_type_trans() is
smart enough to determine any other packet type from the frame's
destination address.
So except for the IQD sniffer case, there is no need to set up
skb->pkt_type manually.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c |  1 -
 drivers/s390/net/qeth_l3_main.c | 10 +---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 95bf452fc51b..09768677222b 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -439,7 +439,6 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 		}
 		switch (hdr->hdr.l2.id) {
 		case QETH_HEADER_TYPE_LAYER2:
-			skb->pkt_type = PACKET_HOST;
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			if ((card->dev->features & NETIF_F_RXCSUM)
 			   && ((hdr->hdr.l2.flags[1] &
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 283cbdcc5d6b..c302274bab22 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1502,23 +1502,17 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 				ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
 
 			card->stats.multicast++;
-			skb->pkt_type = PACKET_MULTICAST;
 			break;
 		case QETH_CAST_BROADCAST:
 			ether_addr_copy(tg_addr, card->dev->broadcast);
 			card->stats.multicast++;
-			skb->pkt_type = PACKET_BROADCAST;
 			break;
-		case QETH_CAST_UNICAST:
-		case QETH_CAST_ANYCAST:
-		case QETH_CAST_NOCAST:
 		default:
 			if (card->options.sniffer)
 				skb->pkt_type = PACKET_OTHERHOST;
-			else
-				skb->pkt_type = PACKET_HOST;
 			ether_addr_copy(tg_addr, card->dev->dev_addr);
 		}
+
 		if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
 			card->dev->header_ops->create(skb, card->dev, prot,
 				tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
@@ -1578,7 +1572,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			if ((card->info.type == QETH_CARD_TYPE_IQD) &&
 			    (magic == ETH_P_AF_IUCV)) {
 				skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
-				skb->pkt_type = PACKET_HOST;
 				len = skb->len;
 				card->dev->header_ops->create(skb, card->dev, 0,
 					card->dev->dev_addr, "FAKELL", len);
@@ -1591,7 +1584,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 			}
 			break;
 		case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
-			skb->pkt_type = PACKET_HOST;
 			skb->protocol = eth_type_trans(skb, skb->dev);
 			len = skb->len;
 			netif_receive_skb(skb);

From 1b45c80be08d11e3f48c514a63d8c1214829fc19 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:12:59 +0100
Subject: [PATCH 0817/1678] s390/qeth: reset NAPI context during queue init

init_qdio_queues() resets the Input Queue's overall QDIO state, and
positions the buffer cursor back to 0. So this is the obvious place to
also reset the queue's NAPI context (in contrast to doing it rather
randomly in the middle of the big set_online() path).
No functional change.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 4 ++--
 drivers/s390/net/qeth_l2_main.c   | 1 -
 drivers/s390/net/qeth_l3_main.c   | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 8f427621e656..8c97ce2516bb 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2873,8 +2873,8 @@ int qeth_init_qdio_queues(struct qeth_card *card)
 	QETH_DBF_TEXT(SETUP, 2, "initqdqs");
 
 	/* inbound queue */
-	qdio_reset_buffers(card->qdio.in_q->qdio_bufs,
-			   QDIO_MAX_BUFFERS_PER_Q);
+	qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_initialize_working_pool_list(card);
 	/*give only as many buffers to hardware as we have buffer pool entries*/
 	for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i)
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 09768677222b..8f5babdccb42 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1087,7 +1087,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	qeth_l2_setup_bridgeport_attrs(card);
 
 	card->state = CARD_STATE_HARDSETUP;
-	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_print_status_message(card);
 
 	/* softsetup */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c302274bab22..17daf2047f7b 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2898,7 +2898,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		card->info.hwtrap = 0;
 
 	card->state = CARD_STATE_HARDSETUP;
-	memset(&card->rx, 0, sizeof(struct qeth_rx));
 	qeth_print_status_message(card);
 
 	/* softsetup */

From d66b1c0df3f19812a6edb69fa35a1db01a2251d5 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:13:00 +0100
Subject: [PATCH 0818/1678] s390/qeth: restructure IP notification handlers

Extract a helper that does the actual work & returns the right NOTIFY_*
responses, and start putting the temporary ipaddr container objects
on the stack rather than kmalloc'ing them. They are small, and this
reduces the confusion of which objects actually get added to qeth's
IP tables.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3.h      |  10 ++++
 drivers/s390/net/qeth_l3_main.c | 102 ++++++++++++--------------------
 2 files changed, 49 insertions(+), 63 deletions(-)

diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 498fe9af2cdb..cf834cdd0d2c 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -42,6 +42,16 @@ struct qeth_ipaddr {
 	} u;
 };
 
+static inline void qeth_l3_init_ipaddr(struct qeth_ipaddr *addr,
+				       enum qeth_ip_types type,
+				       enum qeth_prot_versions proto)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->type = type;
+	addr->proto = proto;
+	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
+}
+
 static inline bool qeth_l3_addr_match_ip(struct qeth_ipaddr *a1,
 					 struct qeth_ipaddr *a2)
 {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 17daf2047f7b..2ff8dd6d0a3e 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -67,6 +67,15 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
 		qeth_l3_ipaddr6_to_string(addr, buf);
 }
 
+struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
+{
+	struct qeth_ipaddr *addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+
+	if (addr)
+		qeth_l3_init_ipaddr(addr, QETH_IP_TYPE_NORMAL, prot);
+	return addr;
+}
+
 static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
 						   struct qeth_ipaddr *query)
 {
@@ -251,23 +260,6 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 	return rc;
 }
 
-
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(
-				enum qeth_prot_versions prot)
-{
-	struct qeth_ipaddr *addr;
-
-	addr = kzalloc(sizeof(struct qeth_ipaddr), GFP_ATOMIC);
-	if (!addr)
-		return NULL;
-
-	addr->type = QETH_IP_TYPE_NORMAL;
-	addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
-	addr->proto = prot;
-
-	return addr;
-}
-
 static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 {
 	struct qeth_ipaddr *addr;
@@ -3120,13 +3112,33 @@ struct qeth_discipline qeth_l3_discipline = {
 };
 EXPORT_SYMBOL_GPL(qeth_l3_discipline);
 
+static int qeth_l3_handle_ip_event(struct qeth_card *card,
+				   struct qeth_ipaddr *addr,
+				   unsigned long event)
+{
+	switch (event) {
+	case NETDEV_UP:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_add_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
+		return NOTIFY_OK;
+	case NETDEV_DOWN:
+		spin_lock_bh(&card->ip_lock);
+		qeth_l3_delete_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int qeth_l3_ip_event(struct notifier_block *this,
 			    unsigned long event, void *ptr)
 {
 
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
-	struct qeth_ipaddr *addr;
+	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
 	if (dev_net(dev) != &init_net)
@@ -3137,29 +3149,11 @@ static int qeth_l3_ip_event(struct notifier_block *this,
 		return NOTIFY_DONE;
 	QETH_CARD_TEXT(card, 3, "ipevent");
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
-	if (addr) {
-		addr->u.a4.addr = be32_to_cpu(ifa->ifa_address);
-		addr->u.a4.mask = be32_to_cpu(ifa->ifa_mask);
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return NOTIFY_DONE;
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
+	addr.u.a4.addr = be32_to_cpu(ifa->ifa_address);
+	addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
 
-	switch (event) {
-	case NETDEV_UP:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_add_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	case NETDEV_DOWN:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	}
-
-	kfree(addr);
-	return NOTIFY_DONE;
+	return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip_notifier = {
@@ -3172,7 +3166,7 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 {
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
 	struct net_device *dev = (struct net_device *)ifa->idev->dev;
-	struct qeth_ipaddr *addr;
+	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
 	card = qeth_l3_get_card_from_dev(dev);
@@ -3182,29 +3176,11 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 	if (!qeth_is_supported(card, IPA_IPV6))
 		return NOTIFY_DONE;
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr) {
-		memcpy(&addr->u.a6.addr, &ifa->addr, sizeof(struct in6_addr));
-		addr->u.a6.pfxlen = ifa->prefix_len;
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return NOTIFY_DONE;
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+	addr.u.a6.addr = ifa->addr;
+	addr.u.a6.pfxlen = ifa->prefix_len;
 
-	switch (event) {
-	case NETDEV_UP:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_add_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	case NETDEV_DOWN:
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		break;
-	}
-
-	kfree(addr);
-	return NOTIFY_DONE;
+	return qeth_l3_handle_ip_event(card, &addr, event);
 }
 
 static struct notifier_block qeth_l3_ip6_notifier = {

From b9caa98c5135774dae232843ecef3c487c36a7a0 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:13:01 +0100
Subject: [PATCH 0819/1678] s390/qeth: simplify card look-up on IP notification

On an IP event, current code tries to determine if the netdev belongs
to a L3 card by walking all qeth cards in the system, and then all of
their VLAN devices too. Short-cut the whole thing by identifying a L3
device through its netdev_ops.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h    |  2 -
 drivers/s390/net/qeth_l3_main.c | 77 +++++----------------------------
 2 files changed, 12 insertions(+), 67 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 959c65cf75d9..459ae3758f30 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -233,8 +233,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
 #define QETH_IDX_FUNC_LEVEL_OSD		 0x0101
 #define QETH_IDX_FUNC_LEVEL_IQD		 0x4108
 
-#define QETH_REAL_CARD		1
-#define QETH_VLAN_CARD		2
 #define QETH_BUFSIZE		4096
 
 /**
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 2ff8dd6d0a3e..ccf22e749105 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1594,69 +1594,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 	return work_done;
 }
 
-static int qeth_l3_verify_vlan_dev(struct net_device *dev,
-			struct qeth_card *card)
-{
-	int rc = 0;
-	u16 vid;
-
-	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
-		struct net_device *netdev;
-
-		rcu_read_lock();
-		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
-					      vid);
-		rcu_read_unlock();
-		if (netdev == dev) {
-			rc = QETH_VLAN_CARD;
-			break;
-		}
-	}
-
-	if (rc && !(vlan_dev_real_dev(dev)->ml_priv == (void *)card))
-		return 0;
-
-	return rc;
-}
-
-static int qeth_l3_verify_dev(struct net_device *dev)
-{
-	struct qeth_card *card;
-	int rc = 0;
-	unsigned long flags;
-
-	read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
-	list_for_each_entry(card, &qeth_core_card_list.list, list) {
-		if (card->dev == dev) {
-			rc = QETH_REAL_CARD;
-			break;
-		}
-		rc = qeth_l3_verify_vlan_dev(dev, card);
-		if (rc)
-			break;
-	}
-	read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
-
-	return rc;
-}
-
-static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
-{
-	struct qeth_card *card = NULL;
-	int rc;
-
-	rc = qeth_l3_verify_dev(dev);
-	if (rc == QETH_REAL_CARD)
-		card = dev->ml_priv;
-	else if (rc == QETH_VLAN_CARD)
-		card = vlan_dev_real_dev(dev)->ml_priv;
-	if (card && card->options.layer2)
-		card = NULL;
-	if (card)
-		QETH_CARD_TEXT_(card, 4, "%d", rc);
-	return card ;
-}
-
 static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode)
 {
 	QETH_DBF_TEXT(SETUP, 2, "stopcard");
@@ -3132,12 +3069,22 @@ static int qeth_l3_handle_ip_event(struct qeth_card *card,
 	}
 }
 
+static struct qeth_card *qeth_l3_get_card_from_dev(struct net_device *dev)
+{
+	if (is_vlan_dev(dev))
+		dev = vlan_dev_real_dev(dev);
+	if (dev->netdev_ops == &qeth_l3_osa_netdev_ops ||
+	    dev->netdev_ops == &qeth_l3_netdev_ops)
+		return (struct qeth_card *) dev->ml_priv;
+	return NULL;
+}
+
 static int qeth_l3_ip_event(struct notifier_block *this,
 			    unsigned long event, void *ptr)
 {
 
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)ifa->ifa_dev->dev;
+	struct net_device *dev = ifa->ifa_dev->dev;
 	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 
@@ -3165,7 +3112,7 @@ static int qeth_l3_ip6_event(struct notifier_block *this,
 			     unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
-	struct net_device *dev = (struct net_device *)ifa->idev->dev;
+	struct net_device *dev = ifa->idev->dev;
 	struct qeth_ipaddr addr;
 	struct qeth_card *card;
 

From 1617dae25e4478e0128da1bf467a18371a16e237 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:13:02 +0100
Subject: [PATCH 0820/1678] s390/qeth: extract helpers for managing special IPs

Reduce code duplication.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3.h      |  13 +--
 drivers/s390/net/qeth_l3_main.c | 158 +++++++++-----------------------
 drivers/s390/net/qeth_l3_sys.c  |  51 +++--------
 3 files changed, 57 insertions(+), 165 deletions(-)

diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index cf834cdd0d2c..3fdf7fdf30c2 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -119,15 +119,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *, struct qeth_ipato_entry *);
 int qeth_l3_del_ipato_entry(struct qeth_card *card,
 			    enum qeth_prot_versions proto, u8 *addr,
 			    int mask_bits);
-int qeth_l3_add_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr);
-int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr);
 void qeth_l3_update_ipato(struct qeth_card *card);
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
-int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
-int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add);
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+			     enum qeth_ip_types type,
+			     enum qeth_prot_versions proto);
 
 #endif /* __QETH_L3_H__ */
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ccf22e749105..0dfa16a03ea2 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -67,7 +67,7 @@ void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
 		qeth_l3_ipaddr6_to_string(addr, buf);
 }
 
-struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
+static struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions prot)
 {
 	struct qeth_ipaddr *addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
 
@@ -147,12 +147,18 @@ static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
 	return rc;
 }
 
-int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_delete_ip(struct qeth_card *card,
+			     struct qeth_ipaddr *tmp_addr)
 {
 	int rc = 0;
 	struct qeth_ipaddr *addr;
 
-	QETH_CARD_TEXT(card, 4, "delip");
+	if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+		QETH_CARD_TEXT(card, 2, "delrxip");
+	else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+		QETH_CARD_TEXT(card, 2, "delvipa");
+	else
+		QETH_CARD_TEXT(card, 2, "delip");
 
 	if (tmp_addr->proto == QETH_PROT_IPV4)
 		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -180,13 +186,18 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 	return rc;
 }
 
-int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
+static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 {
 	int rc = 0;
 	struct qeth_ipaddr *addr;
 	char buf[40];
 
-	QETH_CARD_TEXT(card, 4, "addip");
+	if (tmp_addr->type == QETH_IP_TYPE_RXIP)
+		QETH_CARD_TEXT(card, 2, "addrxip");
+	else if (tmp_addr->type == QETH_IP_TYPE_VIPA)
+		QETH_CARD_TEXT(card, 2, "addvipa");
+	else
+		QETH_CARD_TEXT(card, 2, "addip");
 
 	if (tmp_addr->proto == QETH_PROT_IPV4)
 		QETH_CARD_HEX(card, 4, &tmp_addr->u.a4.addr, 4);
@@ -598,132 +609,45 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
 	return rc;
 }
 
-/*
- * VIPA related functions
- */
-int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-	      const u8 *addr)
+int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
+			     enum qeth_ip_types type,
+			     enum qeth_prot_versions proto)
 {
-	struct qeth_ipaddr *ipaddr;
+	struct qeth_ipaddr addr;
 	int rc;
 
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addvipa4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addvipa6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_VIPA;
-		ipaddr->set_flags = QETH_IPA_SETIP_VIPA_FLAG;
-		ipaddr->del_flags = QETH_IPA_DELIP_VIPA_FLAG;
-	} else
-		return -ENOMEM;
+	qeth_l3_init_ipaddr(&addr, type, proto);
+	if (proto == QETH_PROT_IPV4)
+		memcpy(&addr.u.a4.addr, ip, 4);
+	else
+		memcpy(&addr.u.a6.addr, ip, 16);
+	if (type == QETH_IP_TYPE_RXIP) {
+		addr.set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
+	} else {
+		addr.set_flags = QETH_IPA_SETIP_VIPA_FLAG;
+		addr.del_flags = QETH_IPA_DELIP_VIPA_FLAG;
+	}
 
 	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, ipaddr);
+	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
 	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-
 	return rc;
 }
 
-int qeth_l3_del_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr)
+int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
 {
-	struct qeth_ipaddr *ipaddr;
-	int rc;
+	struct qeth_ipaddr addr;
+	int rc, i;
 
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "delvipa4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "delvipa6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_VIPA;
-	} else
-		return -ENOMEM;
+	qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV6);
+	addr.u.a6.addr.s6_addr[0] = 0xfe;
+	addr.u.a6.addr.s6_addr[1] = 0x80;
+	for (i = 0; i < 8; i++)
+		addr.u.a6.addr.s6_addr[8+i] = card->options.hsuid[i];
 
 	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_delete_ip(card, ipaddr);
+	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
 	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-	return rc;
-}
-
-/*
- * proxy ARP related functions
- */
-int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-	      const u8 *addr)
-{
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addrxip4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addrxip6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-
-		ipaddr->type = QETH_IP_TYPE_RXIP;
-		ipaddr->set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
-		ipaddr->del_flags = 0;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, ipaddr);
-	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
-
-	return rc;
-}
-
-int qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
-		     const u8 *addr)
-{
-	struct qeth_ipaddr *ipaddr;
-	int rc;
-
-	ipaddr = qeth_l3_get_addr_buffer(proto);
-	if (ipaddr) {
-		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "delrxip4");
-			memcpy(&ipaddr->u.a4.addr, addr, 4);
-			ipaddr->u.a4.mask = 0;
-		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "delrxip6");
-			memcpy(&ipaddr->u.a6.addr, addr, 16);
-			ipaddr->u.a6.pfxlen = 0;
-		}
-		ipaddr->type = QETH_IP_TYPE_RXIP;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_delete_ip(card, ipaddr);
-	spin_unlock_bh(&card->ip_lock);
-
-	kfree(ipaddr);
 	return rc;
 }
 
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index a645cfe66ddf..f61192a048f4 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -272,9 +272,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	struct qeth_ipaddr *addr;
 	char *tmp;
-	int rc, i;
+	int rc;
 
 	if (!card)
 		return -EINVAL;
@@ -293,25 +292,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (strlen(tmp) > 8)
 		return -EINVAL;
 
-	if (card->options.hsuid[0]) {
+	if (card->options.hsuid[0])
 		/* delete old ip address */
-		addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-		if (!addr)
-			return -ENOMEM;
-
-		addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-		for (i = 8; i < 16; i++)
-			addr->u.a6.addr.s6_addr[i] =
-				card->options.hsuid[i - 8];
-		addr->u.a6.pfxlen = 0;
-		addr->type = QETH_IP_TYPE_NORMAL;
-
-		spin_lock_bh(&card->ip_lock);
-		qeth_l3_delete_ip(card, addr);
-		spin_unlock_bh(&card->ip_lock);
-		kfree(addr);
-	}
+		qeth_l3_modify_hsuid(card, false);
 
 	if (strlen(tmp) == 0) {
 		/* delete ip address only */
@@ -331,21 +314,7 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
 	if (card->dev)
 		memcpy(card->dev->perm_addr, card->options.hsuid, 9);
 
-	addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
-	if (addr != NULL) {
-		addr->u.a6.addr.s6_addr32[0] = cpu_to_be32(0xfe800000);
-		addr->u.a6.addr.s6_addr32[1] = 0x00000000;
-		for (i = 8; i < 16; i++)
-			addr->u.a6.addr.s6_addr[i] = card->options.hsuid[i - 8];
-		addr->u.a6.pfxlen = 0;
-		addr->type = QETH_IP_TYPE_NORMAL;
-	} else
-		return -ENOMEM;
-
-	spin_lock_bh(&card->ip_lock);
-	rc = qeth_l3_add_ip(card, addr);
-	spin_unlock_bh(&card->ip_lock);
-	kfree(addr);
+	rc = qeth_l3_modify_hsuid(card, true);
 
 	return rc ? rc : count;
 }
@@ -767,7 +736,8 @@ static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_vipae(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_add_vipa(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+					      QETH_IP_TYPE_VIPA, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -796,7 +766,8 @@ static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_vipae(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_del_vipa(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+					      QETH_IP_TYPE_VIPA, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -908,7 +879,8 @@ static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_rxipe(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_add_rxip(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+					      QETH_IP_TYPE_RXIP, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
@@ -937,7 +909,8 @@ static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count,
 	mutex_lock(&card->conf_mutex);
 	rc = qeth_l3_parse_rxipe(buf, proto, addr);
 	if (!rc)
-		rc = qeth_l3_del_rxip(card, proto, addr);
+		rc = qeth_l3_modify_rxip_vipa(card, false, addr,
+					      QETH_IP_TYPE_RXIP, proto);
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }

From b1d5e36b418ed55eeb6d7b61bcc18a14c81475ed Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 9 Mar 2018 18:13:03 +0100
Subject: [PATCH 0821/1678] s390/qeth: shrink qeth_ipaddr struct

Using up 8 bytes in every ipaddr object to store SETIP/DELIP flags is
rather wasteful. Except for takeover eligibility, the flag values all
just depend on the address type, so determine them on demand.

While at it reorder the struct to fill an alignment hole.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h    |  6 -----
 drivers/s390/net/qeth_l3.h      | 11 +++++---
 drivers/s390/net/qeth_l3_main.c | 48 ++++++++++++++++++---------------
 3 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 459ae3758f30..4326715dc13e 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -554,12 +554,6 @@ enum qeth_prot_versions {
 	QETH_PROT_IPV6 = 0x0006,
 };
 
-enum qeth_ip_types {
-	QETH_IP_TYPE_NORMAL,
-	QETH_IP_TYPE_VIPA,
-	QETH_IP_TYPE_RXIP,
-};
-
 enum qeth_cmd_buffer_state {
 	BUF_STATE_FREE,
 	BUF_STATE_LOCKED,
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 3fdf7fdf30c2..87659cfc9066 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -15,21 +15,26 @@
 
 #define QETH_SNIFF_AVAIL	0x0008
 
+enum qeth_ip_types {
+	QETH_IP_TYPE_NORMAL,
+	QETH_IP_TYPE_VIPA,
+	QETH_IP_TYPE_RXIP,
+};
+
 struct qeth_ipaddr {
 	struct hlist_node hnode;
 	enum qeth_ip_types type;
-	enum qeth_ipa_setdelip_flags set_flags;
-	enum qeth_ipa_setdelip_flags del_flags;
+	unsigned char mac[ETH_ALEN];
 	u8 is_multicast:1;
 	u8 in_progress:1;
 	u8 disp_flag:2;
+	u8 ipato:1;			/* ucast only */
 
 	/* is changed only for normal ip addresses
 	 * for non-normal addresses it always is  1
 	 */
 	int  ref_counter;
 	enum qeth_prot_versions proto;
-	unsigned char mac[ETH_ALEN];
 	union {
 		struct {
 			unsigned int addr;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 0dfa16a03ea2..ef3f548b7d35 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -229,7 +229,7 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 
 		if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
 			QETH_CARD_TEXT(card, 2, "tkovaddr");
-			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
+			addr->ipato = 1;
 		}
 		hash_add(card->ip_htable, &addr->hnode,
 				qeth_l3_ipaddr_hash(addr));
@@ -382,21 +382,38 @@ static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len)
 	}
 }
 
-static int qeth_l3_send_setdelip(struct qeth_card *card,
-		struct qeth_ipaddr *addr, int ipacmd, unsigned int flags)
+static u32 qeth_l3_get_setdelip_flags(struct qeth_ipaddr *addr, bool set)
+{
+	switch (addr->type) {
+	case QETH_IP_TYPE_RXIP:
+		return (set) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+	case QETH_IP_TYPE_VIPA:
+		return (set) ? QETH_IPA_SETIP_VIPA_FLAG :
+			       QETH_IPA_DELIP_VIPA_FLAG;
+	default:
+		return (set && addr->ipato) ? QETH_IPA_SETIP_TAKEOVER_FLAG : 0;
+	}
+}
+
+static int qeth_l3_send_setdelip(struct qeth_card *card,
+				 struct qeth_ipaddr *addr,
+				 enum qeth_ipa_cmds ipacmd)
 {
-	int rc;
 	struct qeth_cmd_buffer *iob;
 	struct qeth_ipa_cmd *cmd;
 	__u8 netmask[16];
+	u32 flags;
 
 	QETH_CARD_TEXT(card, 4, "setdelip");
-	QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
 
 	iob = qeth_get_ipacmd_buffer(card, ipacmd, addr->proto);
 	if (!iob)
 		return -ENOMEM;
 	cmd = __ipa_cmd(iob);
+
+	flags = qeth_l3_get_setdelip_flags(addr, ipacmd == IPA_CMD_SETIP);
+	QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
+
 	if (addr->proto == QETH_PROT_IPV6) {
 		memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
 		       sizeof(struct in6_addr));
@@ -410,9 +427,7 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
 		cmd->data.setdelip4.flags = flags;
 	}
 
-	rc = qeth_send_ipa_cmd(card, iob, NULL, NULL);
-
-	return rc;
+	return qeth_send_ipa_cmd(card, iob, NULL, NULL);
 }
 
 static int qeth_l3_send_setrouting(struct qeth_card *card,
@@ -528,10 +543,7 @@ void qeth_l3_update_ipato(struct qeth_card *card)
 	hash_for_each(card->ip_htable, i, addr, hnode) {
 		if (addr->type != QETH_IP_TYPE_NORMAL)
 			continue;
-		if (qeth_l3_is_addr_covered_by_ipato(card, addr))
-			addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
-		else
-			addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
+		addr->ipato = qeth_l3_is_addr_covered_by_ipato(card, addr);
 	}
 }
 
@@ -621,12 +633,6 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
 		memcpy(&addr.u.a4.addr, ip, 4);
 	else
 		memcpy(&addr.u.a6.addr, ip, 16);
-	if (type == QETH_IP_TYPE_RXIP) {
-		addr.set_flags = QETH_IPA_SETIP_TAKEOVER_FLAG;
-	} else {
-		addr.set_flags = QETH_IPA_SETIP_VIPA_FLAG;
-		addr.del_flags = QETH_IPA_DELIP_VIPA_FLAG;
-	}
 
 	spin_lock_bh(&card->ip_lock);
 	rc = add ? qeth_l3_add_ip(card, &addr) : qeth_l3_delete_ip(card, &addr);
@@ -674,8 +680,7 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
 		if (addr->is_multicast)
 			rc =  qeth_l3_send_setdelmc(card, addr, IPA_CMD_SETIPM);
 		else
-			rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP,
-					addr->set_flags);
+			rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_SETIP);
 		if (rc)
 			QETH_CARD_TEXT(card, 2, "failed");
 	} while ((--cnt > 0) && rc);
@@ -707,8 +712,7 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 	if (addr->is_multicast)
 		rc = qeth_l3_send_setdelmc(card, addr, IPA_CMD_DELIPM);
 	else
-		rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP,
-					addr->del_flags);
+		rc = qeth_l3_send_setdelip(card, addr, IPA_CMD_DELIP);
 	if (rc)
 		QETH_CARD_TEXT(card, 2, "failed");
 

From 3a021ab56468db57868a9bc6721b7dfbec672a15 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Mar 2018 09:57:22 +0200
Subject: [PATCH 0822/1678] selftests: forwarding: Add a test for VLAN-unaware
 bridge

Similar to the VLAN-aware bridge test, test the VLAN-unaware bridge and
make sure that ping, FDB learning and flooding work as expected.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/bridge_vlan_unaware.sh     | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh

diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755
index 000000000000..1cddf06f691d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+ping_test $h1 192.0.2.2
+ping6_test $h1 2001:db8:1::2
+learning_test "br0" $swp1 $h1 $h2
+flood_test $swp2 $h1 $h2
+
+exit $EXIT_STATUS

From ff0162af9e7b2e33d4d40f41130c65ba416ba059 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Mar 2018 09:57:23 +0200
Subject: [PATCH 0823/1678] selftests: forwarding: Exit with error when missing
 dependencies

We already return an error when some dependencies (e.g., 'jq') are
missing so lets be consistent and do that for all.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 273511ef2b43..6ac8a98fa270 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -50,7 +50,7 @@ fi
 
 if [[ ! -x "$(command -v $MZ)" ]]; then
 	echo "SKIP: $MZ not installed"
-	exit 0
+	exit 1
 fi
 
 if [[ ! -v NUM_NETIFS ]]; then

From 231b85abaaeef119811a9f036245c94bd5483415 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Mar 2018 09:57:24 +0200
Subject: [PATCH 0824/1678] selftests: forwarding: Exit with error when missing
 interfaces

Returning 0 gives a false sense of success when the required modules did
not even manage to be initialized and register the required net devices.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 6ac8a98fa270..e989d8a1d4fb 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -55,7 +55,7 @@ fi
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
-	exit 0
+	exit 1
 fi
 
 ##############################################################################
@@ -115,7 +115,7 @@ for i in $(eval echo {1..$NUM_NETIFS}); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: could not find all required interfaces"
-		exit 0
+		exit 1
 	fi
 done
 

From 59be45c375e6decce4216dc85fcc8ddf46d38c68 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Mar 2018 09:57:25 +0200
Subject: [PATCH 0825/1678] selftests: forwarding: Allow creation of interfaces
 without a config file

Some users want to be able to run the tests without a configuration file
which is useful when one needs to test both virtual and physical
interfaces on the same machine.

Move the defines that set the type of interface to create and whether to
create it away from the optional configuration file to the library like
the rest of the defines.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/forwarding.config.sample    | 9 ++++-----
 tools/testing/selftests/net/forwarding/lib.sh            | 2 ++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index df54c9eb5100..e819d049d9ce 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -14,11 +14,6 @@ NETIFS[p6]=veth5
 NETIFS[p7]=veth6
 NETIFS[p8]=veth7
 
-NETIF_TYPE=veth
-
-# only virtual interfaces (veth) can be created by test infra
-#NETIF_CREATE=yes
-
 ##############################################################################
 # Defines
 
@@ -34,3 +29,7 @@ WAIT_TIME=5
 PAUSE_ON_FAIL=no
 # Whether to pause on cleanup or not.
 PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e989d8a1d4fb..1ac6c62271f3 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -11,6 +11,8 @@ MZ=${MZ:=mausezahn}
 WAIT_TIME=${WAIT_TIME:=5}
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
 
 if [[ -f forwarding.config ]]; then
 	source forwarding.config

From 590980558bd0f49ff598909464661298aad74819 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 10 Mar 2018 11:29:22 +0800
Subject: [PATCH 0826/1678] net: hns3: add existence check when remove old uc
 mac address

When driver is in initial state, the mac_vlan table table is empty.
So the delete operation for mac address must fail. Existence check
is needed here. Otherwise, the error message will make user confused.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h     |  3 ++-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c |  4 ++--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 17 +++++++----------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c  |  2 ++
 .../hisilicon/hns3/hns3vf/hclgevf_main.c        | 10 +++++++---
 5 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a07204837a9b..70441d281c27 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -338,7 +338,8 @@ struct hnae3_ae_ops {
 				   u32 *tx_usecs_high, u32 *rx_usecs_high);
 
 	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
-	int (*set_mac_addr)(struct hnae3_handle *handle, void *p);
+	int (*set_mac_addr)(struct hnae3_handle *handle, void *p,
+			    bool is_first);
 	int (*add_uc_addr)(struct hnae3_handle *handle,
 			   const unsigned char *addr);
 	int (*rm_uc_addr)(struct hnae3_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0a8c427b407e..8113d2236edd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1120,7 +1120,7 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
 	if (!mac_addr || !is_valid_ether_addr((const u8 *)mac_addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data);
+	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data, false);
 	if (ret) {
 		netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
 		return ret;
@@ -3048,7 +3048,7 @@ static void hns3_init_mac_addr(struct net_device *netdev)
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
-		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a31877302fc2..632672c04d44 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4378,7 +4378,8 @@ static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+			      bool is_first)
 {
 	const unsigned char *new_addr = (const unsigned char *)p;
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -4395,11 +4396,9 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 		return -EINVAL;
 	}
 
-	ret = hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr);
-	if (ret)
+	if (!is_first && hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
 		dev_warn(&hdev->pdev->dev,
-			 "remove old uc mac address fail, ret =%d.\n",
-			 ret);
+			 "remove old uc mac address fail.\n");
 
 	ret = hclge_add_uc_addr(handle, new_addr);
 	if (ret) {
@@ -4407,12 +4406,10 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
 			"add uc mac address fail, ret =%d.\n",
 			ret);
 
-		ret = hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr);
-		if (ret) {
+		if (!is_first &&
+		    hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr))
 			dev_err(&hdev->pdev->dev,
-				"restore uc mac address fail, ret =%d.\n",
-				ret);
-		}
+				"restore uc mac address fail.\n");
 
 		return -EIO;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index f332de6004fb..ed34ca3c3ea4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -196,6 +196,8 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 
 		hclge_rm_uc_addr_common(vport, old_addr);
 		status = hclge_add_uc_addr_common(vport, mac_addr);
+		if (status)
+			hclge_add_uc_addr_common(vport, old_addr);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_ADD) {
 		status = hclge_add_uc_addr_common(vport, mac_addr);
 	} else if (mbx_req->msg[1] == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 09a2a66975a6..ba3fed1a2996 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -734,20 +734,24 @@ static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p)
+static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
+				bool is_first)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 *old_mac_addr = (u8 *)hdev->hw.mac.mac_addr;
 	u8 *new_mac_addr = (u8 *)p;
 	u8 msg_data[ETH_ALEN * 2];
+	u16 subcode;
 	int status;
 
 	ether_addr_copy(msg_data, new_mac_addr);
 	ether_addr_copy(&msg_data[ETH_ALEN], old_mac_addr);
 
+	subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD :
+			HCLGE_MBX_MAC_VLAN_UC_MODIFY;
+
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
-				      HCLGE_MBX_MAC_VLAN_UC_MODIFY,
-				      msg_data, ETH_ALEN * 2,
+				      subcode, msg_data, ETH_ALEN * 2,
 				      false, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);

From 8cc6c1f77ba024fac4fb0cecc359a7c6a0df443c Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 10 Mar 2018 11:29:23 +0800
Subject: [PATCH 0827/1678] net: hns3: fix for netdev not running problem after
 calling net_stop and net_open

The link status update function is called by timer every second. But
net_stop and net_open may be called with very short intervals. The link
status update function can not detect the link state has changed. It
causes the netdev not running problem.

This patch fixes it by updating the link state in ae_stop function.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c   | 3 +++
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 632672c04d44..cc604077ba03 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3756,6 +3756,9 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclge_reset_tqp_stats(handle);
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+	hclge_update_link_status(hdev);
 }
 
 static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index ba3fed1a2996..c80a604a545f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1066,6 +1066,9 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclgevf_reset_tqp_stats(handle);
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+	hclgevf_update_link_status(hdev, 0);
 }
 
 static void hclgevf_state_init(struct hclgevf_dev *hdev)

From 20e4bf982b72e5716233542d73b1e516485b71fb Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 10 Mar 2018 11:29:24 +0800
Subject: [PATCH 0828/1678] net: hns3: fix for ipv6 address loss problem after
 setting channels

The function of dev_close and dev_open is just likes ifconfig <netif> down
and ifconfig <netif> up. The ipv6 address will be lost after dev_close and
dev_open are called. This patch uses hns3_nic_net_stop to replace dev_close
and uses hns3_nic_net_open to replace dev_open.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 8113d2236edd..4dba97498b63 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3503,7 +3503,7 @@ int hns3_set_channels(struct net_device *netdev,
 		return 0;
 
 	if (if_running)
-		dev_close(netdev);
+		hns3_nic_net_stop(netdev);
 
 	hns3_clear_all_ring(h);
 
@@ -3546,7 +3546,7 @@ int hns3_set_channels(struct net_device *netdev,
 
 open_netdev:
 	if (if_running)
-		dev_open(netdev);
+		hns3_nic_net_open(netdev);
 
 	return ret;
 }

From e98d7183f609ba48cfe5c5132b99f4c6ccab31c6 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 10 Mar 2018 11:29:25 +0800
Subject: [PATCH 0829/1678] net: hns3: unify the pause params setup function

Since the firmware cmd to setup mac pause params is the same as the
firmware cmd to pfc pause params, this patch unifies the pause params
setup function.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  2 +-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 23 +++++++++----------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index cc604077ba03..ea33cc5714c6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4417,7 +4417,7 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
 		return -EIO;
 	}
 
-	ret = hclge_mac_pause_addr_cfg(hdev, new_addr);
+	ret = hclge_pause_addr_cfg(hdev, new_addr);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"configure mac pause address fail, ret =%d.\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 36bd79a77940..4134a829c894 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -138,8 +138,8 @@ static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
-				     u8 pause_trans_gap, u16 pause_trans_time)
+static int hclge_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
+				 u8 pause_trans_gap, u16 pause_trans_time)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
@@ -155,7 +155,7 @@ static int hclge_mac_pause_param_cfg(struct hclge_dev *hdev, const u8 *addr,
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
 
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 {
 	struct hclge_cfg_pause_param_cmd *pause_param;
 	struct hclge_desc desc;
@@ -174,7 +174,7 @@ int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr)
 	trans_gap = pause_param->pause_trans_gap;
 	trans_time = le16_to_cpu(pause_param->pause_trans_time);
 
-	return hclge_mac_pause_param_cfg(hdev, mac_addr, trans_gap,
+	return hclge_pause_param_cfg(hdev, mac_addr, trans_gap,
 					 trans_time);
 }
 
@@ -1096,11 +1096,11 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
 	return hclge_tm_schd_mode_hw(hdev);
 }
 
-static int hclge_mac_pause_param_setup_hw(struct hclge_dev *hdev)
+static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
 
-	return hclge_mac_pause_param_cfg(hdev, mac->mac_addr,
+	return hclge_pause_param_cfg(hdev, mac->mac_addr,
 					 HCLGE_DEFAULT_PAUSE_TRANS_GAP,
 					 HCLGE_DEFAULT_PAUSE_TRANS_TIME);
 }
@@ -1151,13 +1151,12 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev)
 	int ret;
 	u8 i;
 
-	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
-		ret = hclge_mac_pause_setup_hw(hdev);
-		if (ret)
-			return ret;
+	ret = hclge_pause_param_setup_hw(hdev);
+	if (ret)
+		return ret;
 
-		return hclge_mac_pause_param_setup_hw(hdev);
-	}
+	if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+		return hclge_mac_pause_setup_hw(hdev);
 
 	/* Only DCB-supported dev supports qset back pressure and pfc cmd */
 	if (!hnae3_dev_dcb_supported(hdev))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 5401e7559437..c30c85b54ffc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -129,5 +129,5 @@ int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
 int hclge_tm_map_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
-int hclge_mac_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
 #endif

From f8d291f00bb33c97b8c9dd643277d6300d05559d Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 10 Mar 2018 11:29:26 +0800
Subject: [PATCH 0830/1678] net: hns3: fix rx path skb->truesize reporting bug

Original skb->truesize reports the received packet size,
not the actual buffer size NIC driver allocated(1 Page).
The linux net protocol will misjudge the true size of rx queue.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 4dba97498b63..94f0b92ead38 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2080,15 +2080,13 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 	desc = &ring->desc[ring->next_to_clean];
 	size = le16_to_cpu(desc->rx.size);
 
-	if (twobufs) {
-		truesize = hnae_buf_size(ring);
-	} else {
-		truesize = ALIGN(size, L1_CACHE_BYTES);
+	truesize = hnae_buf_size(ring);
+
+	if (!twobufs)
 		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
-	}
 
 	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-			size - pull_len, truesize - pull_len);
+			size - pull_len, truesize);
 
 	 /* Avoid re-using remote pages,flag default unreuse */
 	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))

From 64fd2300fcc15f5660e72754c2207d25cb6a0cba Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 10 Mar 2018 11:29:27 +0800
Subject: [PATCH 0831/1678] net: hns3: add support for querying pfc puase
 packets statistic

This patch add support for querying pfc puase packets statistic
in hclge_ieee_getpfc, which is used to tell user how many pfc
puase packets have been sent and received by this mac port.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_dcb.c         | 14 +++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 53 +++++++++++++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h |  6 +++
 3 files changed, 73 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index 407cfabe6d21..955f0e3d5c95 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -205,9 +205,11 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
 
 static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 {
+	u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC];
 	struct hclge_vport *vport = hclge_get_vport(h);
 	struct hclge_dev *hdev = vport->back;
 	u8 i, j, pfc_map, *prio_tc;
+	int ret;
 
 	memset(pfc, 0, sizeof(*pfc));
 	pfc->pfc_cap = hdev->pfc_max;
@@ -222,6 +224,18 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 		}
 	}
 
+	ret = hclge_pfc_tx_stats_get(hdev, requests);
+	if (ret)
+		return ret;
+
+	ret = hclge_pfc_rx_stats_get(hdev, indications);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		pfc->requests[i] = requests[i];
+		pfc->indications[i] = indications[i];
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 4134a829c894..885f25cd7be4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -23,6 +23,9 @@ enum hclge_shaper_level {
 	HCLGE_SHAPER_LVL_PF	= 1,
 };
 
+#define HCLGE_TM_PFC_PKT_GET_CMD_NUM	3
+#define HCLGE_TM_PFC_NUM_GET_PER_CMD	3
+
 #define HCLGE_SHAPER_BS_U_DEF	5
 #define HCLGE_SHAPER_BS_S_DEF	20
 
@@ -112,6 +115,56 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
 	return 0;
 }
 
+static int hclge_pfc_stats_get(struct hclge_dev *hdev,
+			       enum hclge_opcode_type opcode, u64 *stats)
+{
+	struct hclge_desc desc[HCLGE_TM_PFC_PKT_GET_CMD_NUM];
+	int ret, i, j;
+
+	if (!(opcode == HCLGE_OPC_QUERY_PFC_RX_PKT_CNT ||
+	      opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT))
+		return -EINVAL;
+
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], opcode, true);
+		if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1))
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get pfc pause stats fail, ret = %d.\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) {
+		struct hclge_pfc_stats_cmd *pfc_stats =
+				(struct hclge_pfc_stats_cmd *)desc[i].data;
+
+		for (j = 0; j < HCLGE_TM_PFC_NUM_GET_PER_CMD; j++) {
+			u32 index = i * HCLGE_TM_PFC_PKT_GET_CMD_NUM + j;
+
+			if (index < HCLGE_MAX_TC_NUM)
+				stats[index] =
+					le64_to_cpu(pfc_stats->pkt_num[j]);
+		}
+	}
+	return 0;
+}
+
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+	return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_RX_PKT_CNT, stats);
+}
+
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats)
+{
+	return hclge_pfc_stats_get(hdev, HCLGE_OPC_QUERY_PFC_TX_PKT_CNT, stats);
+}
+
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
 {
 	struct hclge_desc desc;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index c30c85b54ffc..2dbe177581e9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -109,6 +109,10 @@ struct hclge_cfg_pause_param_cmd {
 	__le16 pause_trans_time;
 };
 
+struct hclge_pfc_stats_cmd {
+	__le64 pkt_num[3];
+};
+
 struct hclge_port_shapping_cmd {
 	__le32 port_shapping_para;
 };
@@ -130,4 +134,6 @@ int hclge_tm_map_cfg(struct hclge_dev *hdev);
 int hclge_tm_init_hw(struct hclge_dev *hdev);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
+int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
 #endif

From f96818a7cc864ac612c157b5c5e8c57c4b3e0136 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Sat, 10 Mar 2018 11:29:28 +0800
Subject: [PATCH 0832/1678] net: hns3: fix for loopback failure when vlan
 filter is enable

When vlan ctag filter is enabled, the loopback selftest fails because
loopback selftest does not support vlan.

This patch fixes it by disabling the vlan ctag filter when runnig
loopback selftest.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_ethtool.c   | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 26274bca8b3d..2db127c7e463 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -309,6 +309,9 @@ static void hns3_self_test(struct net_device *ndev,
 	struct hnae3_handle *h = priv->ae_handle;
 	int st_param[HNS3_SELF_TEST_TPYE_NUM][2];
 	bool if_running = netif_running(ndev);
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	bool dis_vlan_filter;
+#endif
 	int test_index = 0;
 	u32 i;
 
@@ -323,6 +326,14 @@ static void hns3_self_test(struct net_device *ndev,
 	if (if_running)
 		dev_close(ndev);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	/* Disable the vlan filter for selftest does not support it */
+	dis_vlan_filter = (ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+				h->ae_algo->ops->enable_vlan_filter;
+	if (dis_vlan_filter)
+		h->ae_algo->ops->enable_vlan_filter(h, false);
+#endif
+
 	set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
 	for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) {
@@ -345,6 +356,11 @@ static void hns3_self_test(struct net_device *ndev,
 
 	clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
 
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	if (dis_vlan_filter)
+		h->ae_algo->ops->enable_vlan_filter(h, true);
+#endif
+
 	if (if_running)
 		dev_open(ndev);
 }

From 5d02a58dae60bf71a32625ec510b116ee22faebc Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Sat, 10 Mar 2018 11:29:29 +0800
Subject: [PATCH 0833/1678] net: hns3: fix for buffer overflow smatch warning

This patch fixes the buffer overflow warning by refactoring
hclgevf_bind_ring_to_vector and hclge_get_ring_chain_from_mbx.

Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support")
Fixes: dde1a86e93ca ("net: hns3: Add mailbox support to PF driver")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hclge_mbx.h   |  2 +
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 19 +++---
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 60 ++++++++-----------
 3 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 3e9203ea42a6..e6e1d221b5c8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -57,6 +57,8 @@ enum hclge_mbx_vlan_cfg_subcode {
 
 #define HCLGE_MBX_MAX_MSG_SIZE	16
 #define HCLGE_MBX_MAX_RESP_DATA_SIZE	8
+#define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM	3
+#define HCLGE_MBX_RING_NODE_VARIABLE_NUM	3
 
 struct hclgevf_mbx_resp_status {
 	struct mutex mbx_mutex; /* protects against contending sync cmd resp */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index ed34ca3c3ea4..e3e4ded1e98f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -105,14 +105,17 @@ static int hclge_get_ring_chain_from_mbx(
 			struct hnae3_ring_chain_node *ring_chain,
 			struct hclge_vport *vport)
 {
-#define HCLGE_RING_NODE_VARIABLE_NUM		3
-#define HCLGE_RING_MAP_MBX_BASIC_MSG_NUM	3
 	struct hnae3_ring_chain_node *cur_chain, *new_chain;
 	int ring_num;
 	int i;
 
 	ring_num = req->msg[2];
 
+	if (ring_num > ((HCLGE_MBX_VF_MSG_DATA_NUM -
+		HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+		HCLGE_MBX_RING_NODE_VARIABLE_NUM))
+		return -ENOMEM;
+
 	hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
 	ring_chain->tqp_index =
 			hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
@@ -128,18 +131,18 @@ static int hclge_get_ring_chain_from_mbx(
 			goto err;
 
 		hnae_set_bit(new_chain->flag, HNAE3_RING_TYPE_B,
-			     req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			     HCLGE_RING_MAP_MBX_BASIC_MSG_NUM]);
+			     req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			     HCLGE_MBX_RING_MAP_BASIC_MSG_NUM]);
 
 		new_chain->tqp_index =
 		hclge_get_queue_id(vport->nic.kinfo.tqp
-			[req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+			[req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 1]]);
 
 		hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
 			       HCLGE_INT_GL_IDX_S,
-			       req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
-			       HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 2]);
+			       req->msg[HCLGE_MBX_RING_NODE_VARIABLE_NUM * i +
+			       HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + 2]);
 
 		cur_chain->next = new_chain;
 		cur_chain = new_chain;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index c80a604a545f..6c240d646307 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -533,13 +533,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 				       int vector,
 				       struct hnae3_ring_chain_node *ring_chain)
 {
-#define HCLGEVF_RING_NODE_VARIABLE_NUM		3
-#define HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM	3
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hnae3_ring_chain_node *node;
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclgevf_desc desc;
-	int i, vector_id;
+	int i = 0, vector_id;
 	int status;
 	u8 type;
 
@@ -551,28 +549,33 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 		return vector_id;
 	}
 
-	hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false);
-	type = en ?
-		HCLGE_MBX_MAP_RING_TO_VECTOR : HCLGE_MBX_UNMAP_RING_TO_VECTOR;
-	req->msg[0] = type;
-	req->msg[1] = vector_id; /* vector_id should be id in VF */
-
-	i = 0;
 	for (node = ring_chain; node; node = node->next) {
-		i++;
-		/* msg[2] is cause num */
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i] =
-				hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
-				node->tqp_index;
-		req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 2] =
-				hnae_get_field(node->int_gl_idx,
-					       HNAE3_RING_GL_IDX_M,
-					       HNAE3_RING_GL_IDX_S);
+		int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM +
+					HCLGE_MBX_RING_NODE_VARIABLE_NUM * i;
 
-		if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
-		    HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
-		    HCLGEVF_RING_NODE_VARIABLE_NUM) {
+		if (i == 0) {
+			hclgevf_cmd_setup_basic_desc(&desc,
+						     HCLGEVF_OPC_MBX_VF_TO_PF,
+						     false);
+			type = en ?
+				HCLGE_MBX_MAP_RING_TO_VECTOR :
+				HCLGE_MBX_UNMAP_RING_TO_VECTOR;
+			req->msg[0] = type;
+			req->msg[1] = vector_id;
+		}
+
+		req->msg[idx_offset] =
+				hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
+		req->msg[idx_offset + 1] = node->tqp_index;
+		req->msg[idx_offset + 2] = hnae_get_field(node->int_gl_idx,
+							  HNAE3_RING_GL_IDX_M,
+							  HNAE3_RING_GL_IDX_S);
+
+		i++;
+		if ((i == (HCLGE_MBX_VF_MSG_DATA_NUM -
+		     HCLGE_MBX_RING_MAP_BASIC_MSG_NUM) /
+		     HCLGE_MBX_RING_NODE_VARIABLE_NUM) ||
+		    !node->next) {
 			req->msg[2] = i;
 
 			status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
@@ -591,17 +594,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
 		}
 	}
 
-	if (i > 0) {
-		req->msg[2] = i;
-
-		status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-		if (status) {
-			dev_err(&hdev->pdev->dev,
-				"Map TQP fail, status is %d.\n", status);
-			return status;
-		}
-	}
-
 	return 0;
 }
 

From aa7a795eecca4cd02557d2fd3cbc128ecc1e93c9 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 10 Mar 2018 11:29:30 +0800
Subject: [PATCH 0834/1678] net: hns3: fix error type definition of return
 value

An enum type variable was used to store an "int" type return value.
This patch fixes it.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index ea33cc5714c6..ba84281a36bb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4107,8 +4107,8 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
-	enum hclge_cmd_status status;
 	u16 egress_port = 0;
+	int ret;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -4140,9 +4140,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
 	hclge_prepare_mac_addr(&req, addr);
 
-	status = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+	ret = hclge_add_mac_vlan_tbl(vport, &req, NULL);
 
-	return status;
+	return ret;
 }
 
 static int hclge_rm_uc_addr(struct hnae3_handle *handle,
@@ -4158,7 +4158,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
-	enum hclge_cmd_status status;
+	int ret;
 
 	/* mac addr check */
 	if (is_zero_ether_addr(addr) ||
@@ -4174,9 +4174,9 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
 	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
 	hclge_prepare_mac_addr(&req, addr);
-	status = hclge_remove_mac_vlan_tbl(vport, &req);
+	ret = hclge_remove_mac_vlan_tbl(vport, &req);
 
-	return status;
+	return ret;
 }
 
 static int hclge_add_mc_addr(struct hnae3_handle *handle,

From eefd00a5d70378feae66c073105c548075609482 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 10 Mar 2018 11:29:31 +0800
Subject: [PATCH 0835/1678] net: hns3: fix return value error of
 hclge_get_mac_vlan_cmd_status()

Error code -EIO was used to indicate mutilple errors in function
hclge_get_mac_vlan_cmd_status().This patch fixes it by using
error code depending on the error type.

For no space error, return -ENOSPC.
For entry not found, return -ENOENT.
For command send fail, return -EIO.
For invalid op code, return -EINVAL.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index ba84281a36bb..47dcc9856d4c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3779,11 +3779,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if ((!resp_code) || (resp_code == 1)) {
 			return_status = 0;
 		} else if (resp_code == 2) {
-			return_status = -EIO;
+			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for uc_overflow.\n");
 		} else if (resp_code == 3) {
-			return_status = -EIO;
+			return_status = -ENOSPC;
 			dev_err(&hdev->pdev->dev,
 				"add mac addr failed for mc_overflow.\n");
 		} else {
@@ -3795,7 +3795,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if (!resp_code) {
 			return_status = 0;
 		} else if (resp_code == 1) {
-			return_status = -EIO;
+			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"remove mac addr failed for miss.\n");
 		} else {
@@ -3807,7 +3807,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 		if (!resp_code) {
 			return_status = 0;
 		} else if (resp_code == 1) {
-			return_status = -EIO;
+			return_status = -ENOENT;
 			dev_dbg(&hdev->pdev->dev,
 				"lookup mac addr failed for miss.\n");
 		} else {
@@ -3816,7 +3816,7 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 				resp_code);
 		}
 	} else {
-		return_status = -EIO;
+		return_status = -EINVAL;
 		dev_err(&hdev->pdev->dev,
 			"unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
 			op);

From d07b6bb4350040e10be8483640ae1b2bf37a3e5e Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 10 Mar 2018 11:29:32 +0800
Subject: [PATCH 0836/1678] net: hns3: add existence checking before adding
 unicast mac address

It's not allowed to add two same unicast mac address entries to the
mac_vlan table. When modify the uc mac address of a VF device to the
same value with the PF device's, the PF device will lose its entry of
the mac_vlan table.

Lookup the mac address in the mac_vlan table, and add it if the entry
is inexistent.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 47dcc9856d4c..d70619b5ff15 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4107,6 +4107,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_mac_vlan_tbl_entry_cmd req;
+	struct hclge_desc desc;
 	u16 egress_port = 0;
 	int ret;
 
@@ -4140,7 +4141,21 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 
 	hclge_prepare_mac_addr(&req, addr);
 
-	ret = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+	/* Lookup the mac address in the mac_vlan table, and add
+	 * it if the entry is inexistent. Repeated unicast entry
+	 * is not allowed in the mac vlan table.
+	 */
+	ret = hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false);
+	if (ret == -ENOENT)
+		return hclge_add_mac_vlan_tbl(vport, &req, NULL);
+
+	/* check if we just hit the duplicate */
+	if (!ret)
+		ret = -EINVAL;
+
+	dev_err(&hdev->pdev->dev,
+		"PF failed to add unicast entry(%pM) in the MAC table\n",
+		addr);
 
 	return ret;
 }

From 2097fdefa5c2c22c3165b3c9020ddb8ed610c371 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Sat, 10 Mar 2018 11:29:33 +0800
Subject: [PATCH 0837/1678] net: hns3: add result checking for VF when modify
 unicast mac address

VF changes unicast mac address by sending mailbox msg to PF, then PF
completes the mac address modification. It may fail when the target
uc mac address is already in the mac_vlan table. VF should be aware
of it by reading the message result.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c    | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c  | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index e3e4ded1e98f..4a49a6b2f4c3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -365,7 +365,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_SET_UNICAST:
-			ret = hclge_set_vf_uc_mac_addr(vport, req, false);
+			ret = hclge_set_vf_uc_mac_addr(vport, req, true);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
 					"PF fail(%d) to set VF UC MAC Addr\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 6c240d646307..23370258aaeb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -744,7 +744,7 @@ static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
 
 	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST,
 				      subcode, msg_data, ETH_ALEN * 2,
-				      false, NULL, 0);
+				      true, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 18283ef4ce81..9768f71f5b18 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -54,6 +54,10 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
 	mbx_resp = &hdev->mbx_resp;
 	r_code0 = (u16)(mbx_resp->origin_mbx_msg >> 16);
 	r_code1 = (u16)(mbx_resp->origin_mbx_msg & 0xff);
+
+	if (mbx_resp->resp_status)
+		return mbx_resp->resp_status;
+
 	if (resp_data)
 		memcpy(resp_data, &mbx_resp->additional_info[0], resp_len);
 

From 87a7c4b3c3570feed2edb9478c3bdd474f43a239 Mon Sep 17 00:00:00 2001
From: Veerasenareddy Burru <veerasenareddy.burru@cavium.com>
Date: Sat, 10 Mar 2018 00:17:35 -0800
Subject: [PATCH 0838/1678] liquidio: fix ndo_change_mtu to always return
 correct status to the caller

In a scenario where the command queued to firmware get dropped or times
out, MTU change from host will not propagate to firmware. So, it is
required for host driver to wait for response from firmware or timeout
and then return correct status to caller of ndo_change_mtu.

Also moved the common code for MTU change from PF and VF driver files to
common file lio_core.c

Signed-off-by: Veerasenareddy Burru <veerasenareddy.burru@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_core.c   | 94 +++++++++++++++++--
 .../net/ethernet/cavium/liquidio/lio_main.c   | 70 +++++---------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 67 +++----------
 .../cavium/liquidio/liquidio_common.h         |  3 +-
 .../ethernet/cavium/liquidio/octeon_network.h | 20 ++++
 5 files changed, 141 insertions(+), 113 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 8b1ee83134e3..e7b6eb87ab14 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -164,15 +164,6 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		}
 		break;
 
-	case OCTNET_CMD_CHANGE_MTU:
-		/* If command is successful, change the MTU. */
-		netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
-			   netdev->mtu, nctrl->ncmd.s.param1);
-		netdev->mtu = nctrl->ncmd.s.param1;
-		queue_delayed_work(lio->link_status_wq.wq,
-				   &lio->link_status_wq.wk.work, 0);
-		break;
-
 	case OCTNET_CMD_GPIO_ACCESS:
 		netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
 
@@ -1081,3 +1072,88 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
 	}
 	return 0;
 }
+
+static void liquidio_change_mtu_completion(struct octeon_device *oct,
+					   u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_if_cfg_context *ctx;
+
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	if (status) {
+		dev_err(&oct->pci_dev->dev, "MTU change failed. Status: %llx\n",
+			CVM_CAST64(status));
+		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_FAIL);
+	} else {
+		WRITE_ONCE(ctx->cond, LIO_CHANGE_MTU_SUCCESS);
+	}
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct liquidio_if_cfg_context *ctx;
+	struct octeon_soft_command *sc;
+	union octnet_cmd *ncmd;
+	int ctx_size;
+	int ret = 0;
+
+	ctx_size = sizeof(struct liquidio_if_cfg_context);
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, ctx_size);
+
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_CHANGE_MTU;
+	ncmd->s.param1 = new_mtu;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = liquidio_change_mtu_completion;
+	sc->callback_arg = sc;
+	sc->wait_time = 100;
+
+	ret = octeon_send_soft_command(oct, sc);
+	if (ret == IQ_SEND_FAILED) {
+		netif_info(lio, rx_err, lio->netdev, "Failed to change MTU\n");
+		return -EINVAL;
+	}
+	/* Sleep on a wait queue till the cond flag indicates that the
+	 * response arrived or timed-out.
+	 */
+	if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR ||
+	    ctx->cond == LIO_CHANGE_MTU_FAIL) {
+		octeon_free_soft_command(oct, sc);
+		return -EINVAL;
+	}
+
+	netdev->mtu = new_mtu;
+	lio->mtu = new_mtu;
+
+	octeon_free_soft_command(oct, sc);
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index a5eecd895a82..140085ba48cd 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -91,20 +91,6 @@ static int octeon_console_debug_enabled(u32 console)
  */
 #define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000
 
-struct liquidio_if_cfg_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
-struct liquidio_if_cfg_resp {
-	u64 rh;
-	struct liquidio_if_cfg_info cfg_info;
-	u64 status;
-};
-
 struct liquidio_rx_ctl_context {
 	int octeon_id;
 
@@ -841,8 +827,12 @@ static void octnet_link_status_change(struct work_struct *work)
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 
+	/* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+	 * this API is invoked only when new max-MTU of the interface is
+	 * less than current MTU.
+	 */
 	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
 	rtnl_unlock();
 }
 
@@ -891,7 +881,11 @@ static inline void update_link_status(struct net_device *netdev,
 {
 	struct lio *lio = GET_LIO(netdev);
 	int changed = (lio->linfo.link.u64 != ls->u64);
+	int current_max_mtu = lio->linfo.link.s.mtu;
+	struct octeon_device *oct = lio->oct_dev;
 
+	dev_dbg(&oct->pci_dev->dev, "%s: lio->linfo.link.u64=%llx, ls->u64=%llx\n",
+		__func__, lio->linfo.link.u64, ls->u64);
 	lio->linfo.link.u64 = ls->u64;
 
 	if ((lio->intf_open) && (changed)) {
@@ -899,12 +893,26 @@ static inline void update_link_status(struct net_device *netdev,
 		lio->link_changes++;
 
 		if (lio->linfo.link.s.link_up) {
+			dev_dbg(&oct->pci_dev->dev, "%s: link_up", __func__);
 			netif_carrier_on(netdev);
 			txqs_wake(netdev);
 		} else {
+			dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
 			netif_carrier_off(netdev);
 			stop_txq(netdev);
 		}
+		if (lio->linfo.link.s.mtu != current_max_mtu) {
+			netif_info(lio, probe, lio->netdev, "Max MTU changed from %d to %d\n",
+				   current_max_mtu, lio->linfo.link.s.mtu);
+			netdev->max_mtu = lio->linfo.link.s.mtu;
+		}
+		if (lio->linfo.link.s.mtu < netdev->mtu) {
+			dev_warn(&oct->pci_dev->dev,
+				 "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
+				     netdev->mtu, lio->linfo.link.s.mtu);
+			queue_delayed_work(lio->link_status_wq.wq,
+					   &lio->link_status_wq.wk.work, 0);
+		}
 	}
 }
 
@@ -2448,38 +2456,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 	return stats;
 }
 
-/**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
-	struct octnic_ctrl_pkt nctrl;
-	int ret = 0;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = new_mtu;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-		return -1;
-	}
-
-	lio->mtu = new_mtu;
-
-	return 0;
-}
-
 /**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index fd70a4844e2d..3342d64b7081 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -40,20 +40,6 @@ MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-struct liquidio_if_cfg_context {
-	int octeon_id;
-
-	wait_queue_head_t wc;
-
-	int cond;
-};
-
-struct liquidio_if_cfg_resp {
-	u64 rh;
-	struct liquidio_if_cfg_info cfg_info;
-	u64 status;
-};
-
 struct liquidio_rx_ctl_context {
 	int octeon_id;
 
@@ -564,8 +550,12 @@ static void octnet_link_status_change(struct work_struct *work)
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct lio *lio = (struct lio *)wk->ctxptr;
 
+	/* lio->linfo.link.s.mtu always contains max MTU of the lio interface.
+	 * this API is invoked only when new max-MTU of the interface is
+	 * less than current MTU.
+	 */
 	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	dev_set_mtu(lio->netdev, lio->linfo.link.s.mtu);
 	rtnl_unlock();
 }
 
@@ -613,6 +603,7 @@ static void update_link_status(struct net_device *netdev,
 			       union oct_link_status *ls)
 {
 	struct lio *lio = GET_LIO(netdev);
+	int current_max_mtu = lio->linfo.link.s.mtu;
 	struct octeon_device *oct = lio->oct_dev;
 
 	if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
@@ -629,18 +620,17 @@ static void update_link_status(struct net_device *netdev,
 			txqs_stop(netdev);
 		}
 
-		if (lio->linfo.link.s.mtu != netdev->max_mtu) {
-			dev_info(&oct->pci_dev->dev, "Max MTU Changed from %d to %d\n",
-				 netdev->max_mtu, lio->linfo.link.s.mtu);
+		if (lio->linfo.link.s.mtu != current_max_mtu) {
+			dev_info(&oct->pci_dev->dev,
+				 "Max MTU Changed from %d to %d\n",
+				 current_max_mtu, lio->linfo.link.s.mtu);
 			netdev->max_mtu = lio->linfo.link.s.mtu;
 		}
 
 		if (lio->linfo.link.s.mtu < netdev->mtu) {
 			dev_warn(&oct->pci_dev->dev,
-				 "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+				 "Current MTU is higher than new max MTU; Reducing the current mtu from %d to %d\n",
 				 netdev->mtu, lio->linfo.link.s.mtu);
-			lio->mtu = lio->linfo.link.s.mtu;
-			netdev->mtu = lio->linfo.link.s.mtu;
 			queue_delayed_work(lio->link_status_wq.wq,
 					   &lio->link_status_wq.wk.work, 0);
 		}
@@ -1537,41 +1527,6 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 	return stats;
 }
 
-/**
- * \brief Net device change_mtu
- * @param netdev network device
- */
-static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct octnic_ctrl_pkt nctrl;
-	struct octeon_device *oct;
-	struct lio *lio;
-	int ret = 0;
-
-	lio = GET_LIO(netdev);
-	oct = lio->oct_dev;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
-	nctrl.ncmd.s.param1 = new_mtu;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = LIO_CMD_WAIT_TM;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
-		return -EIO;
-	}
-
-	lio->mtu = new_mtu;
-
-	return 0;
-}
-
 /**
  * \brief Handler for SIOCSHWTSTAMP ioctl
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 522dcc4dcff7..4f72ed1ad15a 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -192,7 +192,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_MAX_FRM_SIZE        (16000 + OCTNET_FRM_HEADER_SIZE)
 
-#define   OCTNET_DEFAULT_FRM_SIZE    (1500 + OCTNET_FRM_HEADER_SIZE)
+#define   OCTNET_DEFAULT_MTU         (1500)
+#define   OCTNET_DEFAULT_FRM_SIZE  (OCTNET_DEFAULT_MTU + OCTNET_FRM_HEADER_SIZE)
 
 /** NIC Commands are sent using this Octeon Input Queue */
 #define   OCTNET_CMD_Q                0
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index f2d1a076a038..76803a569794 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -35,6 +35,18 @@
 #define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
 #define   LIO_IFSTATE_RESETTING		   0x10
 
+struct liquidio_if_cfg_context {
+	u32 octeon_id;
+	wait_queue_head_t wc;
+	int cond;
+};
+
+struct liquidio_if_cfg_resp {
+	u64 rh;
+	struct liquidio_if_cfg_info cfg_info;
+	u64 status;
+};
+
 struct oct_nic_stats_resp {
 	u64     rh;
 	struct oct_link_stats stats;
@@ -184,6 +196,14 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
  */
 void liquidio_set_ethtool_ops(struct net_device *netdev);
 
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+int liquidio_change_mtu(struct net_device *netdev, int new_mtu);
+#define LIO_CHANGE_MTU_SUCCESS 1
+#define LIO_CHANGE_MTU_FAIL    2
+
 #define SKB_ADJ_MASK  0x3F
 #define SKB_ADJ       (SKB_ADJ_MASK + 1)
 

From 7cbe543cae9b57cf4f9e710d1cec8dc3dd3c6dda Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Sat, 10 Mar 2018 17:34:50 +0530
Subject: [PATCH 0839/1678] cxgb4: do not display 50Gbps as unsupported speed

50Gbps is a supported speed, Stop reporting it as
unsupported speed.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 1b44652e4966..78f5506a5c4d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -210,6 +210,9 @@ static void link_report(struct net_device *dev)
 		case 40000:
 			s = "40Gbps";
 			break;
+		case 50000:
+			s = "50Gbps";
+			break;
 		case 100000:
 			s = "100Gbps";
 			break;

From 129cf5f7f19658db343edbdfbda5b95d2756cb2a Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Sat, 10 Mar 2018 19:27:52 +0530
Subject: [PATCH 0840/1678] cxgb4/cxgb4vf: check fw caps to set link mode mask

check firmware capabilities before setting ethtool
link mode mask, also add few missing speeds.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c    | 24 +++++++-----
 .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c   | 37 ++++++++++++-------
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 7852d98bad75..59d04d73c672 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -597,22 +597,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_KR:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP4_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
-		SET_LMM(10000baseKX4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
 		break;
 
 	case FW_PORT_TYPE_FIBER_XFI:
@@ -628,7 +628,9 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_BP40_BA:
 	case FW_PORT_TYPE_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		break;
 
 	case FW_PORT_TYPE_CR_QSFP:
@@ -655,12 +657,14 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_CR2_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(50000baseSR2_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
 		break;
 
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
+		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 361de86b65b9..7bd8497fd9be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1289,22 +1289,22 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_KR:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_BP4_AP:
 		SET_LMM(Backplane);
-		SET_LMM(10000baseR_FEC);
-		SET_LMM(10000baseKR_Full);
-		SET_LMM(1000baseKX_Full);
-		SET_LMM(10000baseKX4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
 		break;
 
 	case FW_PORT_TYPE_FIBER_XFI:
@@ -1320,18 +1320,24 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_BP40_BA:
 	case FW_PORT_TYPE_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
 		break;
 
 	case FW_PORT_TYPE_CR_QSFP:
 	case FW_PORT_TYPE_SFP28:
 		SET_LMM(FIBRE);
-		SET_LMM(25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
 		break;
 
 	case FW_PORT_TYPE_KR_SFP28:
 		SET_LMM(Backplane);
-		SET_LMM(25000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
 		break;
 
 	case FW_PORT_TYPE_KR_XLAUI:
@@ -1343,13 +1349,18 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 
 	case FW_PORT_TYPE_CR2_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(50000baseSR2_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
 		break;
 
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(100000baseCR4_Full);
+		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
+		FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
+		FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
 		break;
 
 	default:

From 918ee5073b0e253649083d731a88588b5c1723a3 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sun, 11 Mar 2018 09:45:47 +0200
Subject: [PATCH 0841/1678] net: ipv6: Introduce ip6_multipath_hash_policy()

In order to abstract away access to the
ipv6.sysctl.multipath_hash_policy variable, which is not available on
systems compiled without IPv6 support, introduce a wrapper function
ip6_multipath_hash_policy() that falls back to 0 on non-IPv6 systems.

Use this wrapper from mlxsw/spectrum_router instead of a direct
reference.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c |  2 +-
 include/net/ipv6.h                                    | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index a8a578610a7b..921bd1075edf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -7031,7 +7031,7 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
 
 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
 {
-	bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy;
+	bool only_l3 = !ip6_multipath_hash_policy(&init_net);
 
 	mlxsw_sp_mp_hash_header_set(recr2_pl,
 				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index cabd3cdd4015..50a6f0ddb878 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -888,6 +888,17 @@ static inline int ip6_default_np_autolabel(struct net *net)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+	return net->ipv6.sysctl.multipath_hash_policy;
+}
+#else
+static inline int ip6_multipath_hash_policy(const struct net *net)
+{
+	return 0;
+}
+#endif
 
 /*
  *	Header manipulation

From 36a1c3bd4a1503ee5561d7c2714b6f0fce049089 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sun, 11 Mar 2018 09:45:48 +0200
Subject: [PATCH 0842/1678] mlxsw: spectrum: Move mlxsw_sp_span_gretap4_route()

Move the function next to the rest of gretap4 functions. Thus the
generic functions shared between gretap4 and gretap6 are in one block at
the beginning, followed by a gretap4 block, followed by a gretap6 block.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 66 +++++++++----------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index f537e1de11d9..e82f5f4d66aa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -133,39 +133,6 @@ struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
 	.deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
 };
 
-static struct net_device *
-mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
-			    __be32 *saddrp, __be32 *daddrp)
-{
-	struct ip_tunnel *tun = netdev_priv(to_dev);
-	struct net_device *dev = NULL;
-	struct ip_tunnel_parm parms;
-	struct rtable *rt = NULL;
-	struct flowi4 fl4;
-
-	/* We assume "dev" stays valid after rt is put. */
-	ASSERT_RTNL();
-
-	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
-	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
-			    0, 0, parms.link, tun->fwmark);
-
-	rt = ip_route_output_key(tun->net, &fl4);
-	if (IS_ERR(rt))
-		return NULL;
-
-	if (rt->rt_type != RTN_UNICAST)
-		goto out;
-
-	dev = rt->dst.dev;
-	*saddrp = fl4.saddr;
-	*daddrp = rt->rt_gateway;
-
-out:
-	ip_rt_put(rt);
-	return dev;
-}
-
 static int mlxsw_sp_span_dmac(struct neigh_table *tbl,
 			      const void *pkey,
 			      struct net_device *l3edev,
@@ -227,6 +194,39 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
 	return 0;
 }
 
+static struct net_device *
+mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
+			    __be32 *saddrp, __be32 *daddrp)
+{
+	struct ip_tunnel *tun = netdev_priv(to_dev);
+	struct net_device *dev = NULL;
+	struct ip_tunnel_parm parms;
+	struct rtable *rt = NULL;
+	struct flowi4 fl4;
+
+	/* We assume "dev" stays valid after rt is put. */
+	ASSERT_RTNL();
+
+	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
+	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
+			    0, 0, parms.link, tun->fwmark);
+
+	rt = ip_route_output_key(tun->net, &fl4);
+	if (IS_ERR(rt))
+		return NULL;
+
+	if (rt->rt_type != RTN_UNICAST)
+		goto out;
+
+	dev = rt->dst.dev;
+	*saddrp = fl4.saddr;
+	*daddrp = rt->rt_gateway;
+
+out:
+	ip_rt_put(rt);
+	return dev;
+}
+
 static int
 mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev,
 				  struct mlxsw_sp_span_parms *sparmsp)

From 99db5229db81e2b12d213ff7533fcaee1206e2f0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sun, 11 Mar 2018 09:45:49 +0200
Subject: [PATCH 0843/1678] mlxsw: spectrum: Don't depend on ip_gre and ip6_gre

mlxsw_spectrum supports offloading of a tc action mirred egress mirror
to a gretap or an ip6gretap netdevice, which necessitates calls to
functions defined in ip_gre, ip6_gre and ip6_tunnel modules. Previously
this was enabled by introducing a hard dependency of MLXSW_SPECTRUM on
NET_IPGRE and IPV6_GRE. However the rest of mlxsw is careful about
picking which modules are absolutely required, and therefore the better
approach is to make mlxsw_spectrum tolerant of absence of one or both of
the GRE flavors.

Hence rework the NET_IPGRE and IPV6_GRE dependencies to just guard
matching modularity, and hide the corresponding code in spectrum_span.c
in an #if IS_ENABLED. Mark mlxsw_sp_span_entry_tunnel_parms_common as
maybe unused, to muffle warnings if neither GRE flavor is selected,
which seems cleaner than introducing a composite #if.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig         |  6 ++----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 10 +++++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 93d97b4676eb..f4d9c9975ac3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -76,12 +76,10 @@ config MLXSW_SPECTRUM
 	depends on PSAMPLE || PSAMPLE=n
 	depends on BRIDGE || BRIDGE=n
 	depends on IPV6 || IPV6=n
+	depends on NET_IPGRE || NET_IPGRE=n
+	depends on IPV6_GRE || IPV6_GRE=n
 	select PARMAN
 	select MLXFW
-	depends on NET_IPGRE
-	depends on !(MLXSW_CORE=y && NET_IPGRE=m)
-	depends on IPV6_GRE
-	depends on !(MLXSW_CORE=y && IPV6_GRE=m)
 	default m
 	---help---
 	  This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index e82f5f4d66aa..ae22a3daffbf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -167,7 +167,7 @@ mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp)
 	return 0;
 }
 
-static int
+static __maybe_unused int
 mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
 					union mlxsw_sp_l3addr saddr,
 					union mlxsw_sp_l3addr daddr,
@@ -194,6 +194,7 @@ mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_NET_IPGRE)
 static struct net_device *
 mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
 			    __be32 *saddrp, __be32 *daddrp)
@@ -291,7 +292,9 @@ static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
 	.configure = mlxsw_sp_span_entry_gretap4_configure,
 	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
 };
+#endif
 
+#if IS_ENABLED(CONFIG_IPV6_GRE)
 static struct net_device *
 mlxsw_sp_span_gretap6_route(const struct net_device *to_dev,
 			    struct in6_addr *saddrp,
@@ -389,12 +392,17 @@ struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
 	.configure = mlxsw_sp_span_entry_gretap6_configure,
 	.deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
 };
+#endif
 
 static const
 struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = {
 	&mlxsw_sp_span_entry_ops_phys,
+#if IS_ENABLED(CONFIG_NET_IPGRE)
 	&mlxsw_sp_span_entry_ops_gretap4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6_GRE)
 	&mlxsw_sp_span_entry_ops_gretap6,
+#endif
 };
 
 static int

From bbfa047a25c379e467536c5ed2ba00c0cb602ca1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 Mar 2018 11:09:33 -0400
Subject: [PATCH 0844/1678] ipv6: Use ip6_multipath_hash_policy() in
 rt6_multipath_hash().

Make use of the new helper.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0ae58424c45..81711e3e2604 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1846,7 +1846,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 	struct flow_keys hash_keys;
 	u32 mhash;
 
-	switch (net->ipv6.sysctl.multipath_hash_policy) {
+	switch (ip6_multipath_hash_policy(net)) {
 	case 0:
 		memset(&hash_keys, 0, sizeof(hash_keys));
 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;

From bdf08fc5412045f7648a49791d98cd04f72c1c1f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 11 Mar 2018 17:27:56 +0100
Subject: [PATCH 0845/1678] rds: remove redundant variable 'sg_off'

Variable sg_off is assigned a value but it is never read, hence it is
redundant and can be removed.

Cleans up clang warning:
net/rds/message.c:373:2: warning: Value stored to 'sg_off' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/message.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/rds/message.c b/net/rds/message.c
index 4462e4c9bd7d..a35f76971984 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -357,7 +357,6 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
 
 static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from)
 {
-	unsigned long sg_off;
 	struct scatterlist *sg;
 	int ret = 0;
 	int length = iov_iter_count(from);
@@ -370,7 +369,6 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
 	 * now allocate and copy in the data payload.
 	 */
 	sg = rm->data.op_sg;
-	sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)

From 8e8af97a3fa43e1cd2e9e714d3b79536ff98d8a9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 11 Mar 2018 17:42:33 +0100
Subject: [PATCH 0846/1678] lan743x: remove some redundant variables and
 assignments

Function lan743x_phy_init assigns pointer 'netdev' but this is never read
and hence it can be removed. The return error code handling can also be
cleaned up to remove the variable 'ret'.

Function lan743x_phy_link_status_change assigns pointer 'phy' twice and
this is never read, so it also can be removed.

Finally, function lan743x_tx_napi_poll initializes pointer 'adapter'
and then re-assigns the same value into this pointer a little later on
so this second assignment is redundant and can be also removed.

Cleans up clang warnings:
drivers/net/ethernet/microchip/lan743x_main.c:951:2: warning: Value
stored to 'netdev' is never read
drivers/net/ethernet/microchip/lan743x_main.c:971:3: warning: Value
stored to 'phy' is never read
drivers/net/ethernet/microchip/lan743x_main.c:1583:26: warning: Value
stored to 'adapter' during its initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 04022ec5ae1e..73ce3ee6a520 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -945,15 +945,7 @@ static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
 
 static int lan743x_phy_init(struct lan743x_adapter *adapter)
 {
-	struct net_device *netdev;
-	int ret;
-
-	netdev = adapter->netdev;
-	ret = lan743x_phy_reset(adapter);
-	if (ret)
-		return ret;
-
-	return 0;
+	return lan743x_phy_reset(adapter);
 }
 
 static void lan743x_phy_link_status_change(struct net_device *netdev)
@@ -964,11 +956,9 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
 	phy_print_status(phydev);
 	if (phydev->state == PHY_RUNNING) {
 		struct ethtool_link_ksettings ksettings;
-		struct lan743x_phy *phy = NULL;
 		int remote_advertisement = 0;
 		int local_advertisement = 0;
 
-		phy = &adapter->phy;
 		memset(&ksettings, 0, sizeof(ksettings));
 		phy_ethtool_get_link_ksettings(netdev, &ksettings);
 		local_advertisement = phy_read(phydev, MII_ADVERTISE);
@@ -1586,7 +1576,6 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
 	u32 ioc_bit = 0;
 	u32 int_sts = 0;
 
-	adapter = tx->adapter;
 	ioc_bit = DMAC_INT_BIT_TX_IOC_(tx->channel_number);
 	int_sts = lan743x_csr_read(adapter, DMAC_INT_STS);
 	if (tx->vector_flags & LAN743X_VECTOR_FLAG_SOURCE_STATUS_W2C)

From c16b1a9ccf70630004f3700532313ca0bcf3e663 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 11 Mar 2018 17:55:47 +0100
Subject: [PATCH 0847/1678] lan743x: make functions lan743x_csr_read and
 lan743x_csr_read static

Functions lan743x_csr_read and lan743x_csr_read are local to the source
and do not need to be in global scope, so make them static.

Cleans up sparse warning:
drivers/net/ethernet/microchip/lan743x_main.c:56:5: warning: symbol
lan743x_csr_read' was not declared. Should it be static?
drivers/net/ethernet/microchip/lan743x_main.c:61:6: warning: symbol
'lan743x_csr_write' was not declared. Should it be static?

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 73ce3ee6a520..dd947e4dd3ce 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -53,12 +53,13 @@ return_error:
 	return ret;
 }
 
-u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
+static u32 lan743x_csr_read(struct lan743x_adapter *adapter, int offset)
 {
 	return ioread32(&adapter->csr.csr_address[offset]);
 }
 
-void lan743x_csr_write(struct lan743x_adapter *adapter, int offset, u32 data)
+static void lan743x_csr_write(struct lan743x_adapter *adapter, int offset,
+			      u32 data)
 {
 	iowrite32(data, &adapter->csr.csr_address[offset]);
 }

From 678f4bda35483473ae7ad674ece4c7c43a000888 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Sun, 11 Mar 2018 22:12:04 +0100
Subject: [PATCH 0848/1678] net: llc: drop VLA in llc_sap_mcast()

Avoid a VLA[1] by using a real constant expression instead of a variable.
The compiler should be able to optimize the original code and avoid using
an actual VLA. Anyway this change is useful because it will avoid a false
positive with -Wvla, it might also help the compiler generating better
code.

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_sap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index d90928f50226..a7f7b8ff4729 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -394,8 +394,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			  const struct llc_addr *laddr,
 			  struct sk_buff *skb)
 {
-	int i = 0, count = 256 / sizeof(struct sock *);
-	struct sock *sk, *stack[count];
+	int i = 0;
+	struct sock *sk;
+	struct sock *stack[256 / sizeof(struct sock *)];
 	struct llc_sock *llc;
 	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
@@ -408,7 +409,7 @@ static void llc_sap_mcast(struct llc_sap *sap,
 			continue;
 
 		sock_hold(sk);
-		if (i < count)
+		if (i < ARRAY_SIZE(stack))
 			stack[i++] = sk;
 		else {
 			llc_do_mcast(sap, skb, stack, i);

From de8d5ab2ff6edc8e26822965a30b6aa4e9332025 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 12 Mar 2018 11:48:49 +0200
Subject: [PATCH 0849/1678] net: Make RX-FCS and HW GRO mutually exclusive

Same as LRO, hardware GRO cannot be enabled with RX-FCS.
When both are requested, hardware GRO will be dropped.

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 259abb1515d0..12a9aad0b057 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7549,10 +7549,17 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
-	/* LRO feature cannot be combined with RX-FCS */
-	if ((features & NETIF_F_LRO) && (features & NETIF_F_RXFCS)) {
-		netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
-		features &= ~NETIF_F_LRO;
+	/* LRO/HW-GRO features cannot be combined with RX-FCS */
+	if (features & NETIF_F_RXFCS) {
+		if (features & NETIF_F_LRO) {
+			netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
+			features &= ~NETIF_F_LRO;
+		}
+
+		if (features & NETIF_F_GRO_HW) {
+			netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
+			features &= ~NETIF_F_GRO_HW;
+		}
 	}
 
 	return features;

From 3ed898e8cd0588152d9f0d7f472387c4af7f640e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 12 Mar 2018 12:25:24 +0000
Subject: [PATCH 0850/1678] mlxsw: spectrum_kvdl: Make some functions static

Fixes the following sparse warnings:

drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c:371:5: warning:
 symbol 'mlxsw_sp_kvdl_single_occ_get' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c:384:5: warning:
 symbol 'mlxsw_sp_kvdl_chunks_occ_get' was not declared. Should it be static?
drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c:397:5: warning:
 symbol 'mlxsw_sp_kvdl_large_chunks_occ_get' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 059eb3214328..4c9bff2fa055 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -368,7 +368,7 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 	return occ;
 }
 
-u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -381,7 +381,7 @@ u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
-u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
@@ -394,7 +394,7 @@ u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
-u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);

From 68c1fb2d30209757f9a9f6e5ea587ea3bb8724c0 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Thu, 22 Feb 2018 11:09:55 -0800
Subject: [PATCH 0851/1678] ixgbe: check for 128-bit authentication

Make sure the Security Association is using
a 128-bit authentication, since that's the only
size that the hardware offload supports.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 16 +++++++++++-----
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h |  1 +
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 93eacddb6704..8b7dbc8057eb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -423,15 +423,21 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
 	const char aes_gcm_name[] = "rfc4106(gcm(aes))";
 	int key_len;
 
-	if (xs->aead) {
-		key_data = &xs->aead->alg_key[0];
-		key_len = xs->aead->alg_key_len;
-		alg_name = xs->aead->alg_name;
-	} else {
+	if (!xs->aead) {
 		netdev_err(dev, "Unsupported IPsec algorithm\n");
 		return -EINVAL;
 	}
 
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
 	if (strcmp(alg_name, aes_gcm_name)) {
 		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
 			   aes_gcm_name);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index da3ce7849e85..87d2800b94ab 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -32,6 +32,7 @@
 #define IXGBE_IPSEC_MAX_RX_IP_COUNT	128
 #define IXGBE_IPSEC_BASE_RX_INDEX	0
 #define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
 
 #define IXGBE_RXTXIDX_IPS_EN		0x00000001
 #define IXGBE_RXIDX_TBL_SHIFT		1

From 0ae418e6e25baadf3b75c7572d5b4fa5b749e1e3 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Thu, 22 Feb 2018 11:09:56 -0800
Subject: [PATCH 0852/1678] ixgbe: fix ipsec trailer length

Fix up the Tx trailer length calculation.  We can't believe the
trailer len from the xstate information because it was calculated
before the packet was put together and padding added.  This bit
of code finds the padding value in the trailer, adds it to the
authentication length, and saves it so later we can put it into
the Tx descriptor to tell the device where to stop the checksum
calculation.

Fixes: 592594704761 ("ixgbe: process the Tx ipsec offload")
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c    | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 8b7dbc8057eb..8623013e517e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -789,11 +789,33 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 
 	itd->flags = 0;
 	if (xs->id.proto == IPPROTO_ESP) {
+		struct sk_buff *skb = first->skb;
+		int ret, authlen, trailerlen;
+		u8 padlen;
+
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
 			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
 		if (first->protocol == htons(ETH_P_IP))
 			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
-		itd->trailer_len = xs->props.trailer_len;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * The "correct" way to get the auth length would be to use
+		 *    authlen = crypto_aead_authsize(xs->data);
+		 * but since we know we only have one size to worry about
+		 * we can let the compiler use the constant and save us a
+		 * few CPU cycles.
+		 */
+		authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+
+		ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1);
+		if (unlikely(ret))
+			return 0;
+		trailerlen = authlen + 2 + padlen;
+		itd->trailer_len = trailerlen;
 	}
 	if (tsa->encrypt)
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;

From 5c4aa458634f545e75845b5a20276ffe1c43459e Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Thu, 22 Feb 2018 11:09:57 -0800
Subject: [PATCH 0853/1678] ixgbe: remove unneeded ipsec state free callback

With commit 7f05b467a735 ("xfrm: check for xdo_dev_state_free")
we no longer need to add an empty callback function
to the driver, so now let's remove the useless code.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 8623013e517e..f2254528dcfc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -724,23 +724,10 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
 	return true;
 }
 
-/**
- * ixgbe_ipsec_free - called by xfrm garbage collections
- * @xs: pointer to transformer state struct
- *
- * We don't have any garbage to collect, so we shouldn't bother
- * implementing this function, but the XFRM code doesn't check for
- * existence before calling the API callback.
- **/
-static void ixgbe_ipsec_free(struct xfrm_state *xs)
-{
-}
-
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
 	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
 	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
-	.xdo_dev_state_free = ixgbe_ipsec_free,
 };
 
 /**

From e656805c1b7888c3e79cf26b040b83e6cfd102d6 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Wed, 28 Feb 2018 03:59:09 -0800
Subject: [PATCH 0854/1678] ixgbe: Add receive length error counter

ixgbe enabled rlec counter and the rx_error used it.
We can export the counter directly via ethtool -S ethX.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 89edb9fae6f0..c0e6ab42e0e1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -97,6 +97,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"tx_heartbeat_errors", IXGBE_NETDEV_STAT(tx_heartbeat_errors)},
 	{"tx_timeout_count", IXGBE_STAT(tx_timeout_count)},
 	{"tx_restart_queue", IXGBE_STAT(restart_queue)},
+	{"rx_length_errors", IXGBE_STAT(stats.rlec)},
 	{"rx_long_length_errors", IXGBE_STAT(stats.roc)},
 	{"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
 	{"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},

From 954b54dea0ebfd2ead48e6bb12a165025227f4b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 1 Mar 2018 00:17:36 +0100
Subject: [PATCH 0855/1678] ixgbevf: fix unused variable warning

The new ixgbevf_set_rx_buffer_len() function causes a harmless warnings
in configurations with large page size:

drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c: In function 'ixgbevf_set_rx_buffer_len':
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c:1758:15: error: unused variable 'max_frame' [-Werror=unused-variable]

This rephrases the code so that the compiler can see the use of that
variable, making it slightly easier to read in the process.

Fixes: f15c5ba5b6cd ("ixgbevf: add support for using order 1 pages to receive large frames")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index f37307131eb6..4da449e0a4ba 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1766,12 +1766,12 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
 
 	set_ring_build_skb_enabled(rx_ring);
 
-#if (PAGE_SIZE < 8192)
-	if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
-		return;
+	if (PAGE_SIZE < 8192) {
+		if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB)
+			return;
 
-	set_ring_uses_large_buffer(rx_ring);
-#endif
+		set_ring_uses_large_buffer(rx_ring);
+	}
 }
 
 /**

From f1cb9d68b4d8789d7f48bd293772095430860d86 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Sun, 11 Mar 2018 22:07:49 +0100
Subject: [PATCH 0856/1678] net: rds: drop VLA in rds_for_each_conn_info()

Avoid VLA[1] by using an already allocated buffer passed
by the caller.

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 2 +-
 net/rds/ib.c         | 3 +++
 net/rds/rds.h        | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 2da3176bf792..f80792c719eb 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -540,9 +540,9 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
 			  int (*visitor)(struct rds_connection *, void *),
+			  u64 *buffer,
 			  size_t item_len)
 {
-	uint64_t buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
 	struct rds_connection *conn;
 	size_t i;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 50a88f3e7e39..02deee29e7f1 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -321,8 +321,11 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 			   struct rds_info_iterator *iter,
 			   struct rds_info_lengths *lens)
 {
+	u64 buffer[(sizeof(struct rds_info_rdma_connection) + 7) / 8];
+
 	rds_for_each_conn_info(sock, len, iter, lens,
 				rds_ib_conn_info_visitor,
+				buffer,
 				sizeof(struct rds_info_rdma_connection));
 }
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 74cd27c661de..b04c333d9d1c 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -735,6 +735,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
 			  int (*visitor)(struct rds_connection *, void *),
+			  u64 *buffer,
 			  size_t item_len);
 
 __printf(2, 3)

From b2c9272ae7d366e85ab277f528c3dddb7f268b12 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Sun, 11 Mar 2018 22:07:50 +0100
Subject: [PATCH 0857/1678] net: rds: drop VLA in rds_walk_conn_path_info()

Avoid VLA[1] by using an already allocated buffer passed
by the caller.

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index f80792c719eb..abef75da89a7 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -578,9 +578,9 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 				    struct rds_info_iterator *iter,
 				    struct rds_info_lengths *lens,
 				    int (*visitor)(struct rds_conn_path *, void *),
+				    u64 *buffer,
 				    size_t item_len)
 {
-	u64  buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
 	struct rds_connection *conn;
 	size_t i;
@@ -649,8 +649,11 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens)
 {
+	u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8];
+
 	rds_walk_conn_path_info(sock, len, iter, lens,
 				rds_conn_info_visitor,
+				buffer,
 				sizeof(struct rds_info_connection));
 }
 

From b03254d797b6b5b7f50a94be0805510114b4d6d1 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Thu, 8 Mar 2018 07:26:08 -0500
Subject: [PATCH 0858/1678] ixgbe: fix disabling hide VLAN on VF reset

If port VLAN is enabled, set PFQDE.HIDE_VLAN during VF reset.

Setting only PFQDE.PFQDE during VF reset was clearing PFQDE.HIDE_VLAN.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 27a70a52f3c9..008aa073a679 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -831,7 +831,11 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg);
 
 	/* force drop enable for all VF Rx queues */
-	ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE);
+	reg = IXGBE_QDE_ENABLE;
+	if (adapter->vfinfo[vf].pf_vlan)
+		reg |= IXGBE_QDE_HIDE_VLAN;
+
+	ixgbe_write_qde(adapter, vf, reg);
 
 	/* enable receive for vf */
 	reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));

From c69851e940bcdaf202d20ca7099145d9b7ec7368 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 11 Mar 2018 15:00:37 +0100
Subject: [PATCH 0859/1678] net: phy: set link state to down when creating the
 phy_device

Currently the link state is initialized to "up" when the phy_device is
being created. This is not consistent with the phy state being
initialized to PHY_DOWN.

Usually this doen't do any harm because the link state is updated
once the PHY reaches state PHY_AN. However e.g. if a LAN port isn't
used and the PHY remains down this inconsistency remains and calls
to functions like phy_print_status() give false results.
Therefore change the initialization to link being down.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 478405e544cc..b285323327c4 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -374,7 +374,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 	dev->duplex = -1;
 	dev->pause = 0;
 	dev->asym_pause = 0;
-	dev->link = 1;
+	dev->link = 0;
 	dev->interface = PHY_INTERFACE_MODE_GMII;
 
 	dev->autoneg = AUTONEG_ENABLE;

From 08486add3a8d22cd3e41cb59028fc397abdcb279 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Mon, 12 Mar 2018 16:06:37 -0400
Subject: [PATCH 0860/1678] tc-testing: add TC vlan action tests

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/vlan.json     | 410 ++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644
index 000000000000..4510ddfa6e54
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -0,0 +1,410 @@
+[
+    {
+        "id": "6f5a",
+        "name": "Add vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with large index",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b6b9",
+        "name": "Add vlan pop action with jump opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "87c3",
+        "name": "Add vlan pop action with trap opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "2b91",
+        "name": "Add vlan invalid action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan bad_mode",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "57fc",
+        "name": "Add vlan action with invalid protocol type",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "3989",
+        "name": "Add vlan push action with default protocol and priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 18",
+        "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "79dc",
+        "name": "Add vlan push action with protocol 802.1Q and priority 3",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 734",
+        "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "4d73",
+        "name": "Add vlan push action with protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 10000",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "1f7b",
+        "name": "Add vlan push action with invalid vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5d02",
+        "name": "Add vlan push action with invalid IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "6812",
+        "name": "Add vlan modify action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 100",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5a31",
+        "name": "Add vlan modify action for protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "83a4",
+        "name": "Delete vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop index 44"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ed1e",
+        "name": "Delete vlan push action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a2a3",
+        "name": "Flush vlan actions",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+        ],
+        "cmdUnderTest": "$TC actions flush action vlan",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "1d78",
+        "name": "Add vlan action with cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    }
+]

From 9ba32046fc2d19697e1a208ce81663239e78e41f Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Mon, 12 Mar 2018 16:07:02 -0400
Subject: [PATCH 0861/1678] tc-testing: updated gact tests with batch test
 cases

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/gact.json     | 73 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index e2187b6e0b7a..ae96d0350d7e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -465,5 +465,76 @@
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "1021",
+        "name": "Add batch of 32 gact pass actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action pass index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "da7a",
+        "name": "Add batch of 32 gact continue actions with cookie",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action continue index $i cookie aabbccddeeff112233445566778800a1 \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "8aa3",
+        "name": "Delete batch of 32 gact continue actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "for i in `seq 1 32`; do cmd=\"action continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args"
+        ],
+        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action gact index $i \"; args=\"$args$cmd\"; done && $TC actions del $args",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
-]
+]
\ No newline at end of file

From da75e3b6a0292fc926f4773f58c5ff6b8db5a179 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:51:02 -0500
Subject: [PATCH 0862/1678] ibmvnic: Account for VLAN tag in L2 Header
 descriptor

If a VLAN tag is present in the Ethernet header, account
for that when providing the L2 header to firmware.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7be4b06f69d2..ea9f351eb343 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1221,7 +1221,10 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
 	int len = 0;
 	u8 *hdr;
 
-	hdr_len[0] = sizeof(struct ethhdr);
+	if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb))
+		hdr_len[0] = sizeof(struct vlan_ethhdr);
+	else
+		hdr_len[0] = sizeof(struct ethhdr);
 
 	if (skb->protocol == htons(ETH_P_IP)) {
 		hdr_len[1] = ip_hdr(skb)->ihl * 4;

From 8dff66cc4195bfaeb8764e7e1b62f2d13f311773 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:51:03 -0500
Subject: [PATCH 0863/1678] ibmvnic: Account for VLAN header length in TX
 buffers

The extra four bytes of a VLAN packet was throwing off
TX buffer entry values used by the driver. Account for those
bytes when in buffer size and buffer entry calculations

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ea9f351eb343..14f00810db33 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -659,7 +659,7 @@ static int init_tx_pools(struct net_device *netdev)
 
 		if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
 					 adapter->req_tx_entries_per_subcrq *
-					 adapter->req_mtu)) {
+					 (adapter->req_mtu + VLAN_HLEN))) {
 			release_tx_pools(adapter);
 			return -1;
 		}
@@ -1394,9 +1394,9 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
 			tx_pool->tso_index = 0;
 	} else {
-		offset = index * adapter->req_mtu;
+		offset = index * (adapter->req_mtu + VLAN_HLEN);
 		dst = tx_pool->long_term_buff.buff + offset;
-		memset(dst, 0, adapter->req_mtu);
+		memset(dst, 0, adapter->req_mtu + VLAN_HLEN);
 		data_dma_addr = tx_pool->long_term_buff.addr + offset;
 	}
 

From 1f247a6f9e5d1b5a1e57d66829ee52dbafac256f Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:51:04 -0500
Subject: [PATCH 0864/1678] ibmvnic: Pad small packets to minimum MTU size

Some backing devices cannot handle small packets well,
so pad any small packets to avoid that. It was recommended
that the VNIC driver should not send packets smaller than the
minimum MTU value provided by firmware, so pad small packets
to be at least that long.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 14f00810db33..7ed87fbb156b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1340,6 +1340,19 @@ static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
 			 txbuff->indir_arr + 1);
 }
 
+static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	/* For some backing devices, mishandling of small packets
+	 * can result in a loss of connection or TX stall. Device
+	 * architects recommend that no packet should be smaller
+	 * than the minimum MTU value provided to the driver, so
+	 * pad any packets to that length
+	 */
+	if (skb->len < netdev->min_mtu)
+		return skb_put_padto(skb, netdev->min_mtu);
+}
+
 static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -1377,6 +1390,13 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
+	if (ibmvnic_xmit_workarounds(skb, adapter)) {
+		tx_dropped++;
+		tx_send_failed++;
+		ret = NETDEV_TX_OK;
+		goto out;
+	}
+
 	tx_pool = &adapter->tx_pool[queue_num];
 	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));

From f10b09ef687f3134c26027d2a38a9d4cd3bb9cb7 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:51:05 -0500
Subject: [PATCH 0865/1678] ibmvnic: Handle TSO backing device errata

TSO packets with one segment or with an MSS less than 224 can
cause errors on some backing devices, so disable GSO in those cases.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7ed87fbb156b..e02d3b9aaf15 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2049,6 +2049,23 @@ static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
 	return wait_for_reset(adapter);
 }
 
+static netdev_features_t ibmvnic_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
+{
+	/* Some backing hardware adapters can not
+	 * handle packets with a MSS less than 224
+	 * or with only one segment.
+	 */
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_size < 224 ||
+		    skb_shinfo(skb)->gso_segs == 1)
+			features &= ~NETIF_F_GSO_MASK;
+	}
+
+	return features;
+}
+
 static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_open		= ibmvnic_open,
 	.ndo_stop		= ibmvnic_close,
@@ -2061,6 +2078,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_poll_controller	= ibmvnic_netpoll_controller,
 #endif
 	.ndo_change_mtu		= ibmvnic_change_mtu,
+	.ndo_features_check     = ibmvnic_features_check,
 };
 
 /* ethtool functions */

From 7083a45ad0df78546b3a735f0090b075efdf44e3 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Mon, 12 Mar 2018 21:05:26 -0500
Subject: [PATCH 0866/1678] ibmvnic: Fix recent errata commit

Sorry, one of the patches I sent in an earlier series
has some dumb mistakes. One was that I had changed the
parameter for the errata workaround function but forgot
to make that change in the code that called it.

The second mistake was a forgotten return value at the end
of the function in case the workaround was not needed.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e02d3b9aaf15..6ff43d7e5a72 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1351,6 +1351,8 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
 	 */
 	if (skb->len < netdev->min_mtu)
 		return skb_put_padto(skb, netdev->min_mtu);
+
+	return 0;
 }
 
 static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
@@ -1390,7 +1392,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto out;
 	}
 
-	if (ibmvnic_xmit_workarounds(skb, adapter)) {
+	if (ibmvnic_xmit_workarounds(skb, netdev)) {
 		tx_dropped++;
 		tx_send_failed++;
 		ret = NETDEV_TX_OK;

From d98985dd6c2dc69e2ad5f5482a5237fb9487ed99 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 13 Mar 2018 03:03:30 +0000
Subject: [PATCH 0867/1678] sctp: fix error return code in
 sctp_sendmsg_new_asoc()

Return error code -EINVAL in the address len check error handling
case since 'err' can be overwrite to 0 by 'err = sctp_verify_addr()'
in the for loop.

Fixes: 2c0dbaa0c43d ("sctp: add support for SCTP_DSTADDRV4/6 Information for sendmsg")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7d3476a4860d..af5cf29b0c65 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1677,7 +1677,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 	struct sctp_association *asoc;
 	enum sctp_scope scope;
 	struct cmsghdr *cmsg;
-	int err = -EINVAL;
+	int err;
 
 	*tp = NULL;
 
@@ -1761,16 +1761,20 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 		memset(daddr, 0, sizeof(*daddr));
 		dlen = cmsg->cmsg_len - sizeof(struct cmsghdr);
 		if (cmsg->cmsg_type == SCTP_DSTADDRV4) {
-			if (dlen < sizeof(struct in_addr))
+			if (dlen < sizeof(struct in_addr)) {
+				err = -EINVAL;
 				goto free;
+			}
 
 			dlen = sizeof(struct in_addr);
 			daddr->v4.sin_family = AF_INET;
 			daddr->v4.sin_port = htons(asoc->peer.port);
 			memcpy(&daddr->v4.sin_addr, CMSG_DATA(cmsg), dlen);
 		} else {
-			if (dlen < sizeof(struct in6_addr))
+			if (dlen < sizeof(struct in6_addr)) {
+				err = -EINVAL;
 				goto free;
+			}
 
 			dlen = sizeof(struct in6_addr);
 			daddr->v6.sin6_family = AF_INET6;

From 2e01ae0ef2db96c530249563383f479f8d9ca183 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Mar 2018 13:36:51 +0300
Subject: [PATCH 0868/1678] net: Convert sctp_defaults_ops

These pernet_operations have a deal with sysctl, /proc
entries and statistics. Also, there are freeing of
net::sctp::addr_waitq queue and net::sctp::local_addr_list
in exit method. All of them look pernet-divided, and it
seems these items are only interesting for sctp_defaults_ops,
which are safe to be executed in parallel.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91813e686c67..32be52304f98 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1330,6 +1330,7 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 static struct pernet_operations sctp_defaults_ops = {
 	.init = sctp_defaults_init,
 	.exit = sctp_defaults_exit,
+	.async = true,
 };
 
 static int __net_init sctp_ctrlsock_init(struct net *net)

From bfdfa38ff0e26463e418235c0be03c0f0eca4569 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Mar 2018 13:37:02 +0300
Subject: [PATCH 0869/1678] net: Convert sctp_ctrlsock_ops

These pernet_operations create and destroy net::sctp::ctl_sock.
Since pernet_operations do not send sctp packets each other,
they look safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 32be52304f98..606361ee9e4a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1354,6 +1354,7 @@ static void __net_init sctp_ctrlsock_exit(struct net *net)
 static struct pernet_operations sctp_ctrlsock_ops = {
 	.init = sctp_ctrlsock_init,
 	.exit = sctp_ctrlsock_exit,
+	.async = true,
 };
 
 /* Initialize the universe into something sensible.  */

From afbbc374ab1281f2e5c18278a62a47fb906f0fa4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Mar 2018 13:37:11 +0300
Subject: [PATCH 0870/1678] net: Convert tipc_net_ops

TIPC looks concentrated in itself, and other pernet_operations
seem not touching its entities.

tipc_net_ops look pernet-divided, and they should be safe to
be executed in parallel for several net the same time.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 0b982d048fb9..04fd91bb11d7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -105,6 +105,7 @@ static struct pernet_operations tipc_net_ops = {
 	.exit = tipc_exit_net,
 	.id   = &tipc_net_id,
 	.size = sizeof(struct tipc_net),
+	.async = true,
 };
 
 static int __init tipc_init(void)

From c939a5e4d597d635dee4eab2f27308d50fa606f7 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Mar 2018 13:37:21 +0300
Subject: [PATCH 0871/1678] net: Convert rds_tcp_net_ops

These pernet_operations create and destroy sysctl table
and listen socket. Also, exit method flushes global
workqueue and work. Everything looks per-net safe,
so we can mark them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08230a145042..eb04e7fa2467 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -515,6 +515,7 @@ static struct pernet_operations rds_tcp_net_ops = {
 	.exit = rds_tcp_exit_net,
 	.id = &rds_tcp_netid,
 	.size = sizeof(struct rds_tcp_net),
+	.async = true,
 };
 
 static void rds_tcp_kill_sock(struct net *net)

From 8b4e6b3ca2ed63917e18303005fdd34a586e86db Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Tue, 13 Mar 2018 16:24:45 +0530
Subject: [PATCH 0872/1678] cxgb4: Add HMA support

HMA(Host Memory Access) maps a part of host memory for T6-SO memfree cards.

This commit does the following:
- Query FW to check if we have HMA support. If yes, the params will
  return HMA size configured in FW. We will dma map memory based
  on this size.
- Also contains changes to get HMA memory information via debugfs.

Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Santosh Rastapur <santosh@chelsio.com>
Signed-off-by: Michael Werner <werner@chelsio.com>
Signed-off-by: Ganesh GR <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h    |  13 +
 .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c    |  10 +-
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   | 228 +++++++++++++++++-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c    |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h |  56 +++++
 5 files changed, 303 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index d3fa53db61ee..b2df0ffb7c94 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -831,6 +831,16 @@ struct vf_info {
 	u16 vlan;
 };
 
+enum {
+	HMA_DMA_MAPPED_FLAG = 1
+};
+
+struct hma_data {
+	unsigned char flags;
+	struct sg_table *sgt;
+	dma_addr_t *phy_addr;	/* physical address of the page */
+};
+
 struct mbox_list {
 	struct list_head list;
 };
@@ -946,6 +956,9 @@ struct adapter {
 
 	/* Ethtool Dump */
 	struct ethtool_dump eth_dump;
+
+	/* HMA */
+	struct hma_data hma;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 2822bbff73e8..de2ba86eccfd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2617,7 +2617,7 @@ int mem_open(struct inode *inode, struct file *file)
 
 	file->private_data = inode->i_private;
 
-	mem = (uintptr_t)file->private_data & 0x3;
+	mem = (uintptr_t)file->private_data & 0x7;
 	adap = file->private_data - mem;
 
 	(void)t4_fwcache(adap, FW_PARAM_DEV_FWCACHE_FLUSH);
@@ -2630,7 +2630,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
 {
 	loff_t pos = *ppos;
 	loff_t avail = file_inode(file)->i_size;
-	unsigned int mem = (uintptr_t)file->private_data & 3;
+	unsigned int mem = (uintptr_t)file->private_data & 0x7;
 	struct adapter *adap = file->private_data - mem;
 	__be32 *data;
 	int ret;
@@ -3042,6 +3042,12 @@ int t4_setup_debugfs(struct adapter *adap)
 			add_debugfs_mem(adap, "mc", MEM_MC,
 					EXT_MEM_SIZE_G(size));
 		}
+
+		if (i & HMA_MUX_F) {
+			size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+			add_debugfs_mem(adap, "hma", MEM_HMA,
+					EXT_MEM1_SIZE_G(size));
+		}
 	}
 
 	de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 78f5506a5c4d..5a349e1576cb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1736,10 +1736,11 @@ EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
 
 int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 {
-	struct adapter *adap;
-	u32 offset, memtype, memaddr;
 	u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
 	u32 edc0_end, edc1_end, mc0_end, mc1_end;
+	u32 offset, memtype, memaddr;
+	struct adapter *adap;
+	u32 hma_size = 0;
 	int ret;
 
 	adap = netdev2adap(dev);
@@ -1759,6 +1760,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 	size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
 	mc0_size = EXT_MEM0_SIZE_G(size) << 20;
 
+	if (t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A) & HMA_MUX_F) {
+		size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
+		hma_size = EXT_MEM1_SIZE_G(size) << 20;
+	}
 	edc0_end = edc0_size;
 	edc1_end = edc0_end + edc1_size;
 	mc0_end = edc1_end + mc0_size;
@@ -1770,7 +1775,10 @@ int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
 		memtype = MEM_EDC1;
 		memaddr = offset - edc0_end;
 	} else {
-		if (offset < mc0_end) {
+		if (hma_size && (offset < (edc1_end + hma_size))) {
+			memtype = MEM_HMA;
+			memaddr = offset - edc1_end;
+		} else if (offset < mc0_end) {
 			memtype = MEM_MC0;
 			memaddr = offset - edc1_end;
 		} else if (is_t5(adap->params.chip)) {
@@ -3301,6 +3309,206 @@ static void setup_memwin_rdma(struct adapter *adap)
 	}
 }
 
+/* HMA Definitions */
+
+/* The maximum number of address that can be send in a single FW cmd */
+#define HMA_MAX_ADDR_IN_CMD	5
+
+#define HMA_PAGE_SIZE		PAGE_SIZE
+
+#define HMA_MAX_NO_FW_ADDRESS	(16 << 10)  /* FW supports 16K addresses */
+
+#define HMA_PAGE_ORDER					\
+	((HMA_PAGE_SIZE < HMA_MAX_NO_FW_ADDRESS) ?	\
+	ilog2(HMA_MAX_NO_FW_ADDRESS / HMA_PAGE_SIZE) : 0)
+
+/* The minimum and maximum possible HMA sizes that can be specified in the FW
+ * configuration(in units of MB).
+ */
+#define HMA_MIN_TOTAL_SIZE	1
+#define HMA_MAX_TOTAL_SIZE				\
+	(((HMA_PAGE_SIZE << HMA_PAGE_ORDER) *		\
+	  HMA_MAX_NO_FW_ADDRESS) >> 20)
+
+static void adap_free_hma_mem(struct adapter *adapter)
+{
+	struct scatterlist *iter;
+	struct page *page;
+	int i;
+
+	if (!adapter->hma.sgt)
+		return;
+
+	if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
+		dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
+			     adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+		adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
+	}
+
+	for_each_sg(adapter->hma.sgt->sgl, iter,
+		    adapter->hma.sgt->orig_nents, i) {
+		page = sg_page(iter);
+		if (page)
+			__free_pages(page, HMA_PAGE_ORDER);
+	}
+
+	kfree(adapter->hma.phy_addr);
+	sg_free_table(adapter->hma.sgt);
+	kfree(adapter->hma.sgt);
+	adapter->hma.sgt = NULL;
+}
+
+static int adap_config_hma(struct adapter *adapter)
+{
+	struct scatterlist *sgl, *iter;
+	struct sg_table *sgt;
+	struct page *newpage;
+	unsigned int i, j, k;
+	u32 param, hma_size;
+	unsigned int ncmds;
+	size_t page_size;
+	u32 page_order;
+	int node, ret;
+
+	/* HMA is supported only for T6+ cards.
+	 * Avoid initializing HMA in kdump kernels.
+	 */
+	if (is_kdump_kernel() ||
+	    CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
+		return 0;
+
+	/* Get the HMA region size required by fw */
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HMA_SIZE));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &param, &hma_size);
+	/* An error means card has its own memory or HMA is not supported by
+	 * the firmware. Return without any errors.
+	 */
+	if (ret || !hma_size)
+		return 0;
+
+	if (hma_size < HMA_MIN_TOTAL_SIZE ||
+	    hma_size > HMA_MAX_TOTAL_SIZE) {
+		dev_err(adapter->pdev_dev,
+			"HMA size %uMB beyond bounds(%u-%lu)MB\n",
+			hma_size, HMA_MIN_TOTAL_SIZE, HMA_MAX_TOTAL_SIZE);
+		return -EINVAL;
+	}
+
+	page_size = HMA_PAGE_SIZE;
+	page_order = HMA_PAGE_ORDER;
+	adapter->hma.sgt = kzalloc(sizeof(*adapter->hma.sgt), GFP_KERNEL);
+	if (unlikely(!adapter->hma.sgt)) {
+		dev_err(adapter->pdev_dev, "HMA SG table allocation failed\n");
+		return -ENOMEM;
+	}
+	sgt = adapter->hma.sgt;
+	/* FW returned value will be in MB's
+	 */
+	sgt->orig_nents = (hma_size << 20) / (page_size << page_order);
+	if (sg_alloc_table(sgt, sgt->orig_nents, GFP_KERNEL)) {
+		dev_err(adapter->pdev_dev, "HMA SGL allocation failed\n");
+		kfree(adapter->hma.sgt);
+		adapter->hma.sgt = NULL;
+		return -ENOMEM;
+	}
+
+	sgl = adapter->hma.sgt->sgl;
+	node = dev_to_node(adapter->pdev_dev);
+	for_each_sg(sgl, iter, sgt->orig_nents, i) {
+		newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL,
+					   page_order);
+		if (!newpage) {
+			dev_err(adapter->pdev_dev,
+				"Not enough memory for HMA page allocation\n");
+			ret = -ENOMEM;
+			goto free_hma;
+		}
+		sg_set_page(iter, newpage, page_size << page_order, 0);
+	}
+
+	sgt->nents = dma_map_sg(adapter->pdev_dev, sgl, sgt->orig_nents,
+				DMA_BIDIRECTIONAL);
+	if (!sgt->nents) {
+		dev_err(adapter->pdev_dev,
+			"Not enough memory for HMA DMA mapping");
+		ret = -ENOMEM;
+		goto free_hma;
+	}
+	adapter->hma.flags |= HMA_DMA_MAPPED_FLAG;
+
+	adapter->hma.phy_addr = kcalloc(sgt->nents, sizeof(dma_addr_t),
+					GFP_KERNEL);
+	if (unlikely(!adapter->hma.phy_addr))
+		goto free_hma;
+
+	for_each_sg(sgl, iter, sgt->nents, i) {
+		newpage = sg_page(iter);
+		adapter->hma.phy_addr[i] = sg_dma_address(iter);
+	}
+
+	ncmds = DIV_ROUND_UP(sgt->nents, HMA_MAX_ADDR_IN_CMD);
+	/* Pass on the addresses to firmware */
+	for (i = 0, k = 0; i < ncmds; i++, k += HMA_MAX_ADDR_IN_CMD) {
+		struct fw_hma_cmd hma_cmd;
+		u8 naddr = HMA_MAX_ADDR_IN_CMD;
+		u8 soc = 0, eoc = 0;
+		u8 hma_mode = 1; /* Presently we support only Page table mode */
+
+		soc = (i == 0) ? 1 : 0;
+		eoc = (i == ncmds - 1) ? 1 : 0;
+
+		/* For last cmd, set naddr corresponding to remaining
+		 * addresses
+		 */
+		if (i == ncmds - 1) {
+			naddr = sgt->nents % HMA_MAX_ADDR_IN_CMD;
+			naddr = naddr ? naddr : HMA_MAX_ADDR_IN_CMD;
+		}
+		memset(&hma_cmd, 0, sizeof(hma_cmd));
+		hma_cmd.op_pkd = htonl(FW_CMD_OP_V(FW_HMA_CMD) |
+				       FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
+		hma_cmd.retval_len16 = htonl(FW_LEN16(hma_cmd));
+
+		hma_cmd.mode_to_pcie_params =
+			htonl(FW_HMA_CMD_MODE_V(hma_mode) |
+			      FW_HMA_CMD_SOC_V(soc) | FW_HMA_CMD_EOC_V(eoc));
+
+		/* HMA cmd size specified in MB's */
+		hma_cmd.naddr_size =
+			htonl(FW_HMA_CMD_SIZE_V(hma_size) |
+			      FW_HMA_CMD_NADDR_V(naddr));
+
+		/* Total Page size specified in units of 4K */
+		hma_cmd.addr_size_pkd =
+			htonl(FW_HMA_CMD_ADDR_SIZE_V
+				((page_size << page_order) >> 12));
+
+		/* Fill the 5 addresses */
+		for (j = 0; j < naddr; j++) {
+			hma_cmd.phy_address[j] =
+				cpu_to_be64(adapter->hma.phy_addr[j + k]);
+		}
+		ret = t4_wr_mbox(adapter, adapter->mbox, &hma_cmd,
+				 sizeof(hma_cmd), &hma_cmd);
+		if (ret) {
+			dev_err(adapter->pdev_dev,
+				"HMA FW command failed with err %d\n", ret);
+			goto free_hma;
+		}
+	}
+
+	if (!ret)
+		dev_info(adapter->pdev_dev,
+			 "Reserved %uMB host memory for HMA\n", hma_size);
+	return ret;
+
+free_hma:
+	adap_free_hma_mem(adapter);
+	return ret;
+}
+
 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 {
 	u32 v;
@@ -3754,6 +3962,12 @@ static int adap_init0_config(struct adapter *adapter, int reset)
 	if (ret < 0)
 		goto bye;
 
+	/* We will proceed even if HMA init fails. */
+	ret = adap_config_hma(adapter);
+	if (ret)
+		dev_err(adapter->pdev_dev,
+			"HMA configuration failed with error %d\n", ret);
+
 	/*
 	 * And finally tell the firmware to initialize itself using the
 	 * parameters from the Configuration File.
@@ -3960,6 +4174,11 @@ static int adap_init0(struct adapter *adap)
 	 * effect. Otherwise, it's time to try initializing the adapter.
 	 */
 	if (state == DEV_STATE_INIT) {
+		ret = adap_config_hma(adap);
+		if (ret)
+			dev_err(adap->pdev_dev,
+				"HMA configuration failed with error %d\n",
+				ret);
 		dev_info(adap->pdev_dev, "Coming up as %s: "\
 			 "Adapter already initialized\n",
 			 adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
@@ -4349,6 +4568,7 @@ static int adap_init0(struct adapter *adap)
 	 * happened to HW/FW, stop issuing commands.
 	 */
 bye:
+	adap_free_hma_mem(adap);
 	kfree(adap->sge.egr_map);
 	kfree(adap->sge.ingr_map);
 	kfree(adap->sge.starving_fl);
@@ -5576,6 +5796,8 @@ static void remove_one(struct pci_dev *pdev)
 			t4_uld_clean_up(adapter);
 		}
 
+		adap_free_hma_mem(adapter);
+
 		disable_interrupts(adapter);
 
 		for_each_port(adapter, i)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 2c889efc78ea..38e38dcfff91 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -487,7 +487,7 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
  * t4_memory_rw_init - Get memory window relative offset, base, and size.
  * @adap: the adapter
  * @win: PCI-E Memory Window to use
- * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1, MEM_HMA or MEM_MC
  * @mem_off: memory relative offset with respect to @mtype.
  * @mem_base: configured memory base address.
  * @mem_aperture: configured memory window aperture.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 0d83b4064a78..e40217a1c9e6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -766,6 +766,7 @@ enum fw_cmd_opcodes {
 	FW_DEVLOG_CMD                  = 0x25,
 	FW_CLIP_CMD                    = 0x28,
 	FW_PTP_CMD                     = 0x3e,
+	FW_HMA_CMD                     = 0x3f,
 	FW_LASTC2E_CMD                 = 0x40,
 	FW_ERROR_CMD                   = 0x80,
 	FW_DEBUG_CMD                   = 0x81,
@@ -1132,6 +1133,7 @@ enum fw_memtype_cf {
 	FW_MEMTYPE_CF_FLASH		= 0x4,
 	FW_MEMTYPE_CF_INTERNAL		= 0x5,
 	FW_MEMTYPE_CF_EXTMEM1           = 0x6,
+	FW_MEMTYPE_CF_HMA		= 0x7,
 };
 
 struct fw_caps_config_cmd {
@@ -1210,6 +1212,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
+	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
 };
 
 /*
@@ -3435,6 +3438,59 @@ struct fw_debug_cmd {
 #define FW_DEBUG_CMD_TYPE_G(x)	\
 	(((x) >> FW_DEBUG_CMD_TYPE_S) & FW_DEBUG_CMD_TYPE_M)
 
+struct fw_hma_cmd {
+	__be32 op_pkd;
+	__be32 retval_len16;
+	__be32 mode_to_pcie_params;
+	__be32 naddr_size;
+	__be32 addr_size_pkd;
+	__be32 r6;
+	__be64 phy_address[5];
+};
+
+#define FW_HMA_CMD_MODE_S	31
+#define FW_HMA_CMD_MODE_M	0x1
+#define FW_HMA_CMD_MODE_V(x)	((x) << FW_HMA_CMD_MODE_S)
+#define FW_HMA_CMD_MODE_G(x)	\
+	(((x) >> FW_HMA_CMD_MODE_S) & FW_HMA_CMD_MODE_M)
+#define FW_HMA_CMD_MODE_F	FW_HMA_CMD_MODE_V(1U)
+
+#define FW_HMA_CMD_SOC_S	30
+#define FW_HMA_CMD_SOC_M	0x1
+#define FW_HMA_CMD_SOC_V(x)	((x) << FW_HMA_CMD_SOC_S)
+#define FW_HMA_CMD_SOC_G(x)	(((x) >> FW_HMA_CMD_SOC_S) & FW_HMA_CMD_SOC_M)
+#define FW_HMA_CMD_SOC_F	FW_HMA_CMD_SOC_V(1U)
+
+#define FW_HMA_CMD_EOC_S	29
+#define FW_HMA_CMD_EOC_M	0x1
+#define FW_HMA_CMD_EOC_V(x)	((x) << FW_HMA_CMD_EOC_S)
+#define FW_HMA_CMD_EOC_G(x)	(((x) >> FW_HMA_CMD_EOC_S) & FW_HMA_CMD_EOC_M)
+#define FW_HMA_CMD_EOC_F	FW_HMA_CMD_EOC_V(1U)
+
+#define FW_HMA_CMD_PCIE_PARAMS_S	0
+#define FW_HMA_CMD_PCIE_PARAMS_M	0x7ffffff
+#define FW_HMA_CMD_PCIE_PARAMS_V(x)	((x) << FW_HMA_CMD_PCIE_PARAMS_S)
+#define FW_HMA_CMD_PCIE_PARAMS_G(x)	\
+	(((x) >> FW_HMA_CMD_PCIE_PARAMS_S) & FW_HMA_CMD_PCIE_PARAMS_M)
+
+#define FW_HMA_CMD_NADDR_S	12
+#define FW_HMA_CMD_NADDR_M	0x3f
+#define FW_HMA_CMD_NADDR_V(x)	((x) << FW_HMA_CMD_NADDR_S)
+#define FW_HMA_CMD_NADDR_G(x)	\
+	(((x) >> FW_HMA_CMD_NADDR_S) & FW_HMA_CMD_NADDR_M)
+
+#define FW_HMA_CMD_SIZE_S	0
+#define FW_HMA_CMD_SIZE_M	0xfff
+#define FW_HMA_CMD_SIZE_V(x)	((x) << FW_HMA_CMD_SIZE_S)
+#define FW_HMA_CMD_SIZE_G(x)	\
+	(((x) >> FW_HMA_CMD_SIZE_S) & FW_HMA_CMD_SIZE_M)
+
+#define FW_HMA_CMD_ADDR_SIZE_S		11
+#define FW_HMA_CMD_ADDR_SIZE_M		0x1fffff
+#define FW_HMA_CMD_ADDR_SIZE_V(x)	((x) << FW_HMA_CMD_ADDR_SIZE_S)
+#define FW_HMA_CMD_ADDR_SIZE_G(x)	\
+	(((x) >> FW_HMA_CMD_ADDR_SIZE_S) & FW_HMA_CMD_ADDR_SIZE_M)
+
 enum pcie_fw_eval {
 	PCIE_FW_EVAL_CRASH = 0,
 };

From 6056415d3a513846f774e7bbee0de0460b1c15df Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 13 Mar 2018 13:55:55 +0300
Subject: [PATCH 0873/1678] net: Add comment about pernet_operations methods
 and synchronization

Make locking scheme be visible for users, and provide
a comment what for we are need exit_batch() methods,
and when it should be used.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index d4417495773a..71abc8d79178 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -312,6 +312,20 @@ struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
 	struct list_head list;
+	/*
+	 * Below methods are called without any exclusive locks.
+	 * More than one net may be constructed and destructed
+	 * in parallel on several cpus. Every pernet_operations
+	 * have to keep in mind all other pernet_operations and
+	 * to introduce a locking, if they share common resources.
+	 *
+	 * Exit methods using blocking RCU primitives, such as
+	 * synchronize_rcu(), should be implemented via exit_batch.
+	 * Then, destruction of a group of net requires single
+	 * synchronize_rcu() related to these pernet_operations,
+	 * instead of separate synchronize_rcu() for every net.
+	 * Please, avoid synchronize_rcu() at all, where it's possible.
+	 */
 	int (*init)(struct net *net);
 	void (*exit)(struct net *net);
 	void (*exit_batch)(struct list_head *net_exit_list);

From 72597135cdd2fe524f9a185d7f954c2c3980f3ee Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 6 Mar 2018 08:26:00 +0100
Subject: [PATCH 0874/1678] netfilter: x_tables: fix build with CONFIG_COMPAT=n

I placed the helpers within CONFIG_COMPAT section, move them
outside.

Fixes: 472ebdcd15ebdb ("netfilter: x_tables: check error target size too")
Fixes: 07a9da51b4b6ae ("netfilter: x_tables: check standard verdicts in core")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 62 ++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 7521e8a72c06..bac932f1c582 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -577,6 +577,37 @@ error:
 }
 EXPORT_SYMBOL(xt_check_table_hooks);
 
+static bool verdict_ok(int verdict)
+{
+	if (verdict > 0)
+		return true;
+
+	if (verdict < 0) {
+		int v = -verdict - 1;
+
+		if (verdict == XT_RETURN)
+			return true;
+
+		switch (v) {
+		case NF_ACCEPT: return true;
+		case NF_DROP: return true;
+		case NF_QUEUE: return true;
+		default:
+			break;
+		}
+
+		return false;
+	}
+
+	return false;
+}
+
+static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
+			const char *msg, unsigned int msglen)
+{
+	return usersize == kernsize && strnlen(msg, msglen) < msglen;
+}
+
 #ifdef CONFIG_COMPAT
 int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
@@ -736,37 +767,6 @@ struct compat_xt_error_target {
 	char errorname[XT_FUNCTION_MAXNAMELEN];
 };
 
-static bool verdict_ok(int verdict)
-{
-	if (verdict > 0)
-		return true;
-
-	if (verdict < 0) {
-		int v = -verdict - 1;
-
-		if (verdict == XT_RETURN)
-			return true;
-
-		switch (v) {
-		case NF_ACCEPT: return true;
-		case NF_DROP: return true;
-		case NF_QUEUE: return true;
-		default:
-			break;
-		}
-
-		return false;
-	}
-
-	return false;
-}
-
-static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
-			const char *msg, unsigned int msglen)
-{
-	return usersize == kernsize && strnlen(msg, msglen) < msglen;
-}
-
 int xt_compat_check_entry_offsets(const void *base, const char *elems,
 				  unsigned int target_offset,
 				  unsigned int next_offset)

From a55efe1d416c345a73bef38848e1ac7109560e12 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Mon, 5 Mar 2018 15:35:57 -0600
Subject: [PATCH 0875/1678] ipvs: use true and false for boolean values

Assign true or false to boolean variables instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_lblc.c  | 4 ++--
 net/netfilter/ipvs/ip_vs_lblcr.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 6a340c94c4b8..942e835caf7f 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -238,7 +238,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
 	int i;
 
 	spin_lock_bh(&svc->sched_lock);
-	tbl->dead = 1;
+	tbl->dead = true;
 	for (i = 0; i < IP_VS_LBLC_TAB_SIZE; i++) {
 		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
 			ip_vs_lblc_del(en);
@@ -369,7 +369,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
 	tbl->rover = 0;
 	tbl->counter = 1;
-	tbl->dead = 0;
+	tbl->dead = false;
 	tbl->svc = svc;
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0627881128da..a5acab25c36b 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -404,7 +404,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
 	struct hlist_node *next;
 
 	spin_lock_bh(&svc->sched_lock);
-	tbl->dead = 1;
+	tbl->dead = true;
 	for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
 		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
 			ip_vs_lblcr_free(en);
@@ -532,7 +532,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
 	tbl->rover = 0;
 	tbl->counter = 1;
-	tbl->dead = 0;
+	tbl->dead = false;
 	tbl->svc = svc;
 
 	/*

From 24114a5f9470f75b47bdd6d40f4fc784326a9b58 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 24 Feb 2018 16:54:12 +0100
Subject: [PATCH 0876/1678] mt76: initialize available_antennas_{tx,rx} info

Initialize available_antennas related info in wiphy data structure
according to antenna_mask field; antenna_mask info is initialized
in device specific code and will be used in mac80211 {set,get}_antenna
callbacks

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c    | 3 +++
 drivers/net/wireless/mediatek/mt76/mt76.h        | 2 ++
 drivers/net/wireless/mediatek/mt76/mt76x2_init.c | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 85f8d324ebf8..ec94b86a37fd 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -262,6 +262,9 @@ int mt76_register_device(struct mt76_dev *dev, bool vht,
 
 	wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
 
+	wiphy->available_antennas_tx = dev->antenna_mask;
+	wiphy->available_antennas_rx = dev->antenna_mask;
+
 	hw->txq_data_size = sizeof(struct mt76_txq);
 	hw->max_tx_fragments = 16;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index d2ce15093edd..15009d5b0531 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -253,6 +253,8 @@ struct mt76_dev {
 	u32 rev;
 	unsigned long state;
 
+	u8 antenna_mask;
+
 	struct mt76_sband sband_2g;
 	struct mt76_sband sband_5g;
 	struct debugfs_blob_wrapper eeprom;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2_init.c
index 9dbf94947324..934c331d995e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_init.c
@@ -857,6 +857,9 @@ int mt76x2_register_device(struct mt76x2_dev *dev)
 	dev->mt76.led_cdev.brightness_set = mt76x2_led_set_brightness;
 	dev->mt76.led_cdev.blink_set = mt76x2_led_set_blink;
 
+	/* init antenna configuration */
+	dev->mt76.antenna_mask = 3;
+
 	ret = mt76_register_device(&dev->mt76, true, mt76x2_rates,
 				   ARRAY_SIZE(mt76x2_rates));
 	if (ret)

From 551e1ef4d29134ff1bb01869d522dbae0438032e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 24 Feb 2018 16:54:13 +0100
Subject: [PATCH 0877/1678] mt76: add mt76_init_stream_cap routine

Add mt76_init_stream_cap utility routine to set ht/vht sta capabilities
related to number of streams currently used by the device
(it will be used by mac80211 set_antenna callback)

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 56 +++++++++++++------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index ec94b86a37fd..a2a382a13466 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -119,6 +119,43 @@ static int mt76_led_init(struct mt76_dev *dev)
 	return devm_led_classdev_register(dev->dev, &dev->led_cdev);
 }
 
+static void mt76_init_stream_cap(struct mt76_dev *dev,
+				 struct ieee80211_supported_band *sband,
+				 bool vht)
+{
+	struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap;
+	int i, nstream = __sw_hweight8(dev->antenna_mask);
+	struct ieee80211_sta_vht_cap *vht_cap;
+	u16 mcs_map = 0;
+
+	if (nstream > 1)
+		ht_cap->cap |= IEEE80211_HT_CAP_TX_STBC;
+	else
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
+
+	for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
+		ht_cap->mcs.rx_mask[i] = i < nstream ? 0xff : 0;
+
+	if (!vht)
+		return;
+
+	vht_cap = &sband->vht_cap;
+	if (nstream > 1)
+		vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC;
+	else
+		vht_cap->cap &= ~IEEE80211_VHT_CAP_TXSTBC;
+
+	for (i = 0; i < 8; i++) {
+		if (i < nstream)
+			mcs_map |= (IEEE80211_VHT_MCS_SUPPORT_0_9 << (i * 2));
+		else
+			mcs_map |=
+				(IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2));
+	}
+	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
+	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
+}
+
 static int
 mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 		const struct ieee80211_channel *chan, int n_chan,
@@ -128,7 +165,6 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
 	void *chanlist;
-	u16 mcs_map;
 	int size;
 
 	size = n_chan * sizeof(*chan);
@@ -153,34 +189,20 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 		       IEEE80211_HT_CAP_GRN_FLD |
 		       IEEE80211_HT_CAP_SGI_20 |
 		       IEEE80211_HT_CAP_SGI_40 |
-		       IEEE80211_HT_CAP_TX_STBC |
 		       (1 << IEEE80211_HT_CAP_RX_STBC_SHIFT);
 
-	ht_cap->mcs.rx_mask[0] = 0xff;
-	ht_cap->mcs.rx_mask[1] = 0xff;
 	ht_cap->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 	ht_cap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
 	ht_cap->ampdu_density = IEEE80211_HT_MPDU_DENSITY_4;
 
+	mt76_init_stream_cap(dev, sband, vht);
+
 	if (!vht)
 		return 0;
 
 	vht_cap = &sband->vht_cap;
 	vht_cap->vht_supported = true;
-
-	mcs_map = (IEEE80211_VHT_MCS_SUPPORT_0_9 << (0 * 2)) |
-		  (IEEE80211_VHT_MCS_SUPPORT_0_9 << (1 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (2 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (3 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (4 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (5 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (6 * 2)) |
-		  (IEEE80211_VHT_MCS_NOT_SUPPORTED << (7 * 2));
-
-	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
-	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
 	vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC |
-			IEEE80211_VHT_CAP_TXSTBC |
 			IEEE80211_VHT_CAP_RXSTBC_1 |
 			IEEE80211_VHT_CAP_SHORT_GI_80;
 

From 5ebdc3e0692fdea0c61b41a4e05f3b6790b24e8e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 24 Feb 2018 16:54:14 +0100
Subject: [PATCH 0878/1678] mt76x2: add mac80211 {set,get}_antenna callbacks

Add capability to select tx/rx antennas. Possible values are:
 - 1: to use only the first antenna
 - 2: to use only the second antenna
 - 3: to use both of them

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c |  9 +++
 drivers/net/wireless/mediatek/mt76/mt76.h     |  1 +
 drivers/net/wireless/mediatek/mt76/mt76x2.h   |  1 +
 .../net/wireless/mediatek/mt76/mt76x2_main.c  | 36 +++++++++++
 .../net/wireless/mediatek/mt76/mt76x2_phy.c   | 59 +++++++++++++------
 .../net/wireless/mediatek/mt76/mt76x2_regs.h  |  2 +
 6 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index a2a382a13466..4f30cdcd2b53 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -156,6 +156,15 @@ static void mt76_init_stream_cap(struct mt76_dev *dev,
 	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
 }
 
+void mt76_set_stream_caps(struct mt76_dev *dev, bool vht)
+{
+	if (dev->cap.has_2ghz)
+		mt76_init_stream_cap(dev, &dev->sband_2g.sband, false);
+	if (dev->cap.has_5ghz)
+		mt76_init_stream_cap(dev, &dev->sband_5g.sband, vht);
+}
+EXPORT_SYMBOL_GPL(mt76_set_stream_caps);
+
 static int
 mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 		const struct ieee80211_channel *chan, int n_chan,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 15009d5b0531..065ff78059c3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -425,6 +425,7 @@ void mt76_release_buffered_frames(struct ieee80211_hw *hw,
 void mt76_set_channel(struct mt76_dev *dev);
 int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 		    struct survey_info *survey);
+void mt76_set_stream_caps(struct mt76_dev *dev, bool vht);
 
 int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tid,
 		       u16 ssn, u8 size);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2.h b/drivers/net/wireless/mediatek/mt76/mt76x2.h
index e62131b88102..783b8122ec3c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2.h
@@ -180,6 +180,7 @@ int mt76x2_eeprom_init(struct mt76x2_dev *dev);
 int mt76x2_apply_calibration_data(struct mt76x2_dev *dev, int channel);
 void mt76x2_set_tx_ackto(struct mt76x2_dev *dev);
 
+void mt76x2_phy_set_antenna(struct mt76x2_dev *dev);
 int mt76x2_phy_start(struct mt76x2_dev *dev);
 int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 			 struct cfg80211_chan_def *chandef);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
index 205043b470b2..25f4cebef26d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
@@ -549,6 +549,40 @@ mt76x2_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set)
 	return 0;
 }
 
+static int mt76x2_set_antenna(struct ieee80211_hw *hw, u32 tx_ant,
+			      u32 rx_ant)
+{
+	struct mt76x2_dev *dev = hw->priv;
+
+	if (!tx_ant || tx_ant > 3 || tx_ant != rx_ant)
+		return -EINVAL;
+
+	mutex_lock(&dev->mutex);
+
+	dev->chainmask = (tx_ant == 3) ? 0x202 : 0x101;
+	dev->mt76.antenna_mask = tx_ant;
+
+	mt76_set_stream_caps(&dev->mt76, true);
+	mt76x2_phy_set_antenna(dev);
+
+	mutex_unlock(&dev->mutex);
+
+	return 0;
+}
+
+static int mt76x2_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant,
+			      u32 *rx_ant)
+{
+	struct mt76x2_dev *dev = hw->priv;
+
+	mutex_lock(&dev->mutex);
+	*tx_ant = dev->mt76.antenna_mask;
+	*rx_ant = dev->mt76.antenna_mask;
+	mutex_unlock(&dev->mutex);
+
+	return 0;
+}
+
 const struct ieee80211_ops mt76x2_ops = {
 	.tx = mt76x2_tx,
 	.start = mt76x2_start,
@@ -573,5 +607,7 @@ const struct ieee80211_ops mt76x2_ops = {
 	.set_coverage_class = mt76x2_set_coverage_class,
 	.get_survey = mt76_get_survey,
 	.set_tim = mt76x2_set_tim,
+	.set_antenna = mt76x2_set_antenna,
+	.get_antenna = mt76x2_get_antenna,
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
index 5b742749d5de..fcc37eb7ce0b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_phy.c
@@ -361,27 +361,50 @@ mt76x2_phy_set_band(struct mt76x2_dev *dev, int band, bool primary_upper)
 		       primary_upper);
 }
 
-static void
-mt76x2_set_rx_chains(struct mt76x2_dev *dev)
+void mt76x2_phy_set_antenna(struct mt76x2_dev *dev)
 {
 	u32 val;
 
 	val = mt76_rr(dev, MT_BBP(AGC, 0));
-	val &= ~(BIT(3) | BIT(4));
-
-	if (dev->chainmask & BIT(1))
-		val |= BIT(3);
-
-	mt76_wr(dev, MT_BBP(AGC, 0), val);
-}
-
-static void
-mt76x2_set_tx_dac(struct mt76x2_dev *dev)
-{
-	if (dev->chainmask & BIT(1))
-		mt76_set(dev, MT_BBP(TXBE, 5), 3);
-	else
+	val &= ~(BIT(4) | BIT(1));
+	switch (dev->mt76.antenna_mask) {
+	case 1:
+		/* disable mac DAC control */
+		mt76_clear(dev, MT_BBP(IBI, 9), BIT(11));
 		mt76_clear(dev, MT_BBP(TXBE, 5), 3);
+		mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0x3);
+		mt76_rmw_field(dev, MT_BBP(CORE, 32), GENMASK(21, 20), 2);
+		/* disable DAC 1 */
+		mt76_rmw_field(dev, MT_BBP(CORE, 33), GENMASK(12, 9), 4);
+
+		val &= ~(BIT(3) | BIT(0));
+		break;
+	case 2:
+		/* disable mac DAC control */
+		mt76_clear(dev, MT_BBP(IBI, 9), BIT(11));
+		mt76_rmw_field(dev, MT_BBP(TXBE, 5), 3, 1);
+		mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0xc);
+		mt76_rmw_field(dev, MT_BBP(CORE, 32), GENMASK(21, 20), 1);
+		/* disable DAC 0 */
+		mt76_rmw_field(dev, MT_BBP(CORE, 33), GENMASK(12, 9), 1);
+
+		val &= ~BIT(3);
+		val |= BIT(0);
+		break;
+	case 3:
+	default:
+		/* enable mac DAC control */
+		mt76_set(dev, MT_BBP(IBI, 9), BIT(11));
+		mt76_set(dev, MT_BBP(TXBE, 5), 3);
+		mt76_rmw_field(dev, MT_TX_PIN_CFG, MT_TX_PIN_CFG_TXANT, 0xf);
+		mt76_clear(dev, MT_BBP(CORE, 32), GENMASK(21, 20));
+		mt76_clear(dev, MT_BBP(CORE, 33), GENMASK(12, 9));
+
+		val &= ~BIT(0);
+		val |= BIT(3);
+		break;
+	}
+	mt76_wr(dev, MT_BBP(AGC, 0), val);
 }
 
 static void
@@ -585,10 +608,8 @@ int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 	mt76x2_configure_tx_delay(dev, band, bw);
 	mt76x2_phy_set_txpower(dev);
 
-	mt76x2_set_rx_chains(dev);
 	mt76x2_phy_set_band(dev, chan->band, ch_group_index & 1);
 	mt76x2_phy_set_bw(dev, chandef->width, ch_group_index);
-	mt76x2_set_tx_dac(dev);
 
 	mt76_rmw(dev, MT_EXT_CCA_CFG,
 		 (MT_EXT_CCA_CFG_CCA0 |
@@ -604,6 +625,8 @@ int mt76x2_phy_set_channel(struct mt76x2_dev *dev,
 
 	mt76x2_mcu_init_gain(dev, channel, dev->cal.rx.mcu_gain, true);
 
+	mt76x2_phy_set_antenna(dev);
+
 	/* Enable LDPC Rx */
 	if (mt76xx_rev(dev) >= MT76XX_REV_E3)
 		mt76_set(dev, MT_BBP(RXO, 13), BIT(10));
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h b/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
index ce3ab85c8b0f..b9c334d9e5b8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_regs.h
@@ -321,6 +321,8 @@
 #define MT_TX_PWR_CFG_2			0x131c
 #define MT_TX_PWR_CFG_3			0x1320
 #define MT_TX_PWR_CFG_4			0x1324
+#define MT_TX_PIN_CFG			0x1328
+#define MT_TX_PIN_CFG_TXANT		GENMASK(3, 0)
 
 #define MT_TX_BAND_CFG			0x132c
 #define MT_TX_BAND_CFG_UPPER_40M	BIT(0)

From 3fb2f6a4db98037890c52f12efa5c351a7bf6c77 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Wed, 28 Feb 2018 15:26:57 +0100
Subject: [PATCH 0879/1678] mt7601u: remove a warning in
 mt7601u_efuse_physical_size_check()

Fix the following sparse warning in mt7601u_efuse_physical_size_check:
- drivers/net/wireless/mediatek/mt7601u/eeprom.c:77:27: warning:
  Variable length array is used

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Acked-by: Jakub Kicinski <kubakici@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt7601u/eeprom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
index 05d729be0c65..76117b402880 100644
--- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
@@ -75,7 +75,7 @@ static int
 mt7601u_efuse_physical_size_check(struct mt7601u_dev *dev)
 {
 	const int map_reads = DIV_ROUND_UP(MT_EFUSE_USAGE_MAP_SIZE, 16);
-	u8 data[map_reads * 16];
+	u8 data[round_up(MT_EFUSE_USAGE_MAP_SIZE, 16)];
 	int ret, i;
 	u32 start = 0, end = 0, cnt_free;
 

From fbae9c7490b76a84356dcbd9bca566209271c49e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Wed, 28 Feb 2018 23:20:12 +0100
Subject: [PATCH 0880/1678] mt76x2: remove unnecessary len variable in
 mt76x2_eeprom_load()

Substitute unnecessary len variable in mt76x2_eeprom_load() with
MT7662_EEPROM_SIZE macro since len is used just to store eeprom
default size.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
index 9c9bf3e785ba..5bb50027c1e8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_eeprom.c
@@ -222,11 +222,10 @@ static int
 mt76x2_eeprom_load(struct mt76x2_dev *dev)
 {
 	void *efuse;
-	int len = MT7662_EEPROM_SIZE;
 	bool found;
 	int ret;
 
-	ret = mt76_eeprom_init(&dev->mt76, len);
+	ret = mt76_eeprom_init(&dev->mt76, MT7662_EEPROM_SIZE);
 	if (ret < 0)
 		return ret;
 
@@ -234,14 +233,15 @@ mt76x2_eeprom_load(struct mt76x2_dev *dev)
 	if (found)
 		found = !mt76x2_check_eeprom(dev);
 
-	dev->mt76.otp.data = devm_kzalloc(dev->mt76.dev, len, GFP_KERNEL);
-	dev->mt76.otp.size = len;
+	dev->mt76.otp.data = devm_kzalloc(dev->mt76.dev, MT7662_EEPROM_SIZE,
+					  GFP_KERNEL);
+	dev->mt76.otp.size = MT7662_EEPROM_SIZE;
 	if (!dev->mt76.otp.data)
 		return -ENOMEM;
 
 	efuse = dev->mt76.otp.data;
 
-	if (mt76x2_get_efuse_data(dev, efuse, len))
+	if (mt76x2_get_efuse_data(dev, efuse, MT7662_EEPROM_SIZE))
 		goto out;
 
 	if (found) {
@@ -249,7 +249,7 @@ mt76x2_eeprom_load(struct mt76x2_dev *dev)
 	} else {
 		/* FIXME: check if efuse data is complete */
 		found = true;
-		memcpy(dev->mt76.eeprom.data, efuse, len);
+		memcpy(dev->mt76.eeprom.data, efuse, MT7662_EEPROM_SIZE);
 	}
 
 out:

From 2f04652f891a1911981988f6ce0c8a83098e9002 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Wed, 7 Mar 2018 10:25:50 +0100
Subject: [PATCH 0881/1678] mt7601u: simplify mt7601u_mcu_msg_alloc signature

Remove mt7601u_dev parameter from mt7601u_mcu_msg_alloc signature since
dev pointer is never used in routine body

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Acked-by: Jakub Kicinski <kubakici@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt7601u/mcu.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c
index 65a8004418ea..d9d6fd7eff5e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mcu.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c
@@ -58,8 +58,7 @@ static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev,
 	trace_mt_mcu_msg_send(dev, skb, csum, need_resp);
 }
 
-static struct sk_buff *
-mt7601u_mcu_msg_alloc(struct mt7601u_dev *dev, const void *data, int len)
+static struct sk_buff *mt7601u_mcu_msg_alloc(const void *data, int len)
 {
 	struct sk_buff *skb;
 
@@ -171,7 +170,7 @@ static int mt7601u_mcu_function_select(struct mt7601u_dev *dev,
 		.value = cpu_to_le32(val),
 	};
 
-	skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+	skb = mt7601u_mcu_msg_alloc(&msg, sizeof(msg));
 	if (!skb)
 		return -ENOMEM;
 	return mt7601u_mcu_msg_send(dev, skb, CMD_FUN_SET_OP, func == 5);
@@ -208,7 +207,7 @@ mt7601u_mcu_calibrate(struct mt7601u_dev *dev, enum mcu_calibrate cal, u32 val)
 		.value = cpu_to_le32(val),
 	};
 
-	skb = mt7601u_mcu_msg_alloc(dev, &msg, sizeof(msg));
+	skb = mt7601u_mcu_msg_alloc(&msg, sizeof(msg));
 	if (!skb)
 		return -ENOMEM;
 	return mt7601u_mcu_msg_send(dev, skb, CMD_CALIBRATION_OP, true);

From a9eab62d41646cbf5d8b3408c9d4617bb9678e71 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 9 Mar 2018 10:41:41 +0100
Subject: [PATCH 0882/1678] mt7601u: let mac80211 validate rx CCMP PN

Apparently the hardware does not perform CCMP PN validation so
let mac80211 take care of possible replay attacks in sw.
Moreover indicate ICV and MIC had been stripped setting corresponding
bits in ieee80211_rx_status.
The fix has been validated using 4.2.1 and 4.1.3 tests from the WiFi
Alliance vulnerability detection tool.

Fixes: c869f77d6abb ("add mt7601u driver")
Acked-by: Jakub Kicinski <kubakici@wp.pl>
Tested-by: David Park <david.park@hitemengineering.com>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt7601u/initvals.h |  1 +
 drivers/net/wireless/mediatek/mt7601u/mac.c      | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/initvals.h b/drivers/net/wireless/mediatek/mt7601u/initvals.h
index ec11ff66969d..2dc6b68e7fb9 100644
--- a/drivers/net/wireless/mediatek/mt7601u/initvals.h
+++ b/drivers/net/wireless/mediatek/mt7601u/initvals.h
@@ -139,6 +139,7 @@ static const struct mt76_reg_pair mac_common_vals[] = {
 	{ MT_TXOP_HLDR_ET,		0x00000002 },
 	{ MT_XIFS_TIME_CFG,		0x33a41010 },
 	{ MT_PWR_PIN_CFG,		0x00000000 },
+	{ MT_PN_PAD_MODE,		0x00000001 },
 };
 
 static const struct mt76_reg_pair mac_chip_vals[] = {
diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c
index 4d3077941138..d55d7040a56d 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mac.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mac.c
@@ -480,8 +480,16 @@ u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
 		status->flag |= RX_FLAG_DECRYPTED;
-		status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
+		status->flag |= RX_FLAG_MMIC_STRIPPED;
+		status->flag |= RX_FLAG_MIC_STRIPPED;
+		status->flag |= RX_FLAG_ICV_STRIPPED;
+		status->flag |= RX_FLAG_IV_STRIPPED;
 	}
+	/* let mac80211 take care of PN validation since apparently
+	 * the hardware does not support it
+	 */
+	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_PN_LEN))
+		status->flag &= ~RX_FLAG_IV_STRIPPED;
 
 	status->chains = BIT(0);
 	rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);

From 1100f81bbcd1ace918ce63fa3be1b854bca6baf2 Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Tue, 27 Feb 2018 19:56:11 +0530
Subject: [PATCH 0883/1678] rsi: add rx control block to handle rx packets in
 USB

Rx bluetooth endpoint shall be added in further patches. Rx control
block is introduced here to handle Rx packets properly. Separate
function is written to initialize the RX control blocks.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_main.c     |  4 +-
 drivers/net/wireless/rsi/rsi_91x_sdio_ops.c |  2 +-
 drivers/net/wireless/rsi/rsi_91x_usb.c      | 75 ++++++++++++++++-----
 drivers/net/wireless/rsi/rsi_91x_usb_ops.c  | 35 ++++++----
 drivers/net/wireless/rsi/rsi_common.h       |  2 +-
 drivers/net/wireless/rsi/rsi_main.h         |  2 +-
 drivers/net/wireless/rsi/rsi_usb.h          | 10 ++-
 7 files changed, 96 insertions(+), 34 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index 0cb8e68bab58..0413af88cd25 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -137,7 +137,7 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common,
  *
  * Return: 0 on success, -1 on failure.
  */
-int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
+int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
 {
 	u8 *frame_desc = NULL, extended_desc = 0;
 	u32 index, length = 0, queueno = 0;
@@ -146,7 +146,7 @@ int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len)
 
 	index = 0;
 	do {
-		frame_desc = &common->rx_data_pkt[index];
+		frame_desc = &rx_pkt[index];
 		actual_length = *(u16 *)&frame_desc[0];
 		offset = *(u16 *)&frame_desc[2];
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 8e2a95c486b0..9fbc0ef82559 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -118,7 +118,7 @@ static int rsi_process_pkt(struct rsi_common *common)
 		goto fail;
 	}
 
-	status = rsi_read_pkt(common, rcv_pkt_len);
+	status = rsi_read_pkt(common, common->rx_data_pkt, rcv_pkt_len);
 
 fail:
 	kfree(common->rx_data_pkt);
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 8f8443833348..bbce809d3b47 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -247,12 +247,13 @@ static int rsi_usb_reg_write(struct usb_device *usbdev,
  */
 static void rsi_rx_done_handler(struct urb *urb)
 {
-	struct rsi_hw *adapter = urb->context;
-	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+	struct rx_usb_ctrl_block *rx_cb = urb->context;
+	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data;
 
 	if (urb->status)
 		return;
 
+	rx_cb->pend = 1;
 	rsi_set_event(&dev->rx_thread.event);
 }
 
@@ -262,10 +263,11 @@ static void rsi_rx_done_handler(struct urb *urb)
  *
  * Return: 0 on success, a negative error code on failure.
  */
-static int rsi_rx_urb_submit(struct rsi_hw *adapter)
+static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num)
 {
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
-	struct urb *urb = dev->rx_usb_urb[0];
+	struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1];
+	struct urb *urb = rx_cb->rx_urb;
 	int status;
 
 	usb_fill_bulk_urb(urb,
@@ -275,7 +277,7 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter)
 			  urb->transfer_buffer,
 			  3000,
 			  rsi_rx_done_handler,
-			  adapter);
+			  rx_cb);
 
 	status = usb_submit_urb(urb, GFP_KERNEL);
 	if (status)
@@ -484,14 +486,54 @@ static struct rsi_host_intf_ops usb_host_intf_ops = {
  */
 static void rsi_deinit_usb_interface(struct rsi_hw *adapter)
 {
+	u8 idx;
+
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 
 	rsi_kill_thread(&dev->rx_thread);
-	usb_free_urb(dev->rx_usb_urb[0]);
+
+	for (idx = 0; idx < MAX_RX_URBS; idx++) {
+		usb_free_urb(dev->rx_cb[idx].rx_urb);
+		kfree(dev->rx_cb[idx].rx_buffer);
+	}
+
 	kfree(adapter->priv->rx_data_pkt);
 	kfree(dev->tx_buffer);
 }
 
+static int rsi_usb_init_rx(struct rsi_hw *adapter)
+{
+	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+	struct rx_usb_ctrl_block *rx_cb;
+	u8 idx;
+
+	for (idx = 0; idx < MAX_RX_URBS; idx++) {
+		rx_cb = &dev->rx_cb[idx];
+
+		rx_cb->rx_buffer = kzalloc(RSI_USB_BUF_SIZE * 2,
+					   GFP_KERNEL);
+		if (!rx_cb->rx_buffer)
+			goto err;
+
+		rx_cb->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (!rx_cb->rx_urb) {
+			rsi_dbg(ERR_ZONE, "Failed alloc rx urb[%d]\n", idx);
+			goto err;
+		}
+		rx_cb->rx_urb->transfer_buffer = rx_cb->rx_buffer;
+		rx_cb->ep_num = idx + 1;
+		rx_cb->data = (void *)dev;
+	}
+	return 0;
+
+err:
+	for (idx = 0; idx < MAX_RX_URBS; idx++) {
+		kfree(dev->rx_cb[idx].rx_buffer);
+		kfree(dev->rx_cb[idx].rx_urb);
+	}
+	return -1;
+}
+
 /**
  * rsi_init_usb_interface() - This function initializes the usb interface.
  * @adapter: Pointer to the adapter structure.
@@ -504,7 +546,7 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 {
 	struct rsi_91x_usbdev *rsi_dev;
 	struct rsi_common *common = adapter->priv;
-	int status;
+	int status, i;
 
 	rsi_dev = kzalloc(sizeof(*rsi_dev), GFP_KERNEL);
 	if (!rsi_dev)
@@ -531,12 +573,12 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 		status = -ENOMEM;
 		goto fail_tx;
 	}
-	rsi_dev->rx_usb_urb[0] = usb_alloc_urb(0, GFP_KERNEL);
-	if (!rsi_dev->rx_usb_urb[0]) {
-		status = -ENOMEM;
-		goto fail_rx;
+
+	if (rsi_usb_init_rx(adapter)) {
+		rsi_dbg(ERR_ZONE, "Failed to init RX handle\n");
+		return -ENOMEM;
 	}
-	rsi_dev->rx_usb_urb[0]->transfer_buffer = adapter->priv->rx_data_pkt;
+
 	rsi_dev->tx_blk_size = 252;
 	adapter->block_size = rsi_dev->tx_blk_size;
 
@@ -564,9 +606,10 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 	return 0;
 
 fail_thread:
-	usb_free_urb(rsi_dev->rx_usb_urb[0]);
-fail_rx:
-	kfree(rsi_dev->tx_buffer);
+	for (i = 0; i < MAX_RX_URBS; i++) {
+		kfree(rsi_dev->rx_cb[i].rx_buffer);
+		kfree(rsi_dev->rx_cb[i].rx_urb);
+	}
 fail_tx:
 	kfree(common->rx_data_pkt);
 	return status;
@@ -698,7 +741,7 @@ static int rsi_probe(struct usb_interface *pfunction,
 		rsi_dbg(INIT_ZONE, "%s: Device Init Done\n", __func__);
 	}
 
-	status = rsi_rx_urb_submit(adapter);
+	status = rsi_rx_urb_submit(adapter, WLAN_EP);
 	if (status)
 		goto err1;
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index 465692b3c351..d0650eaeec23 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -29,7 +29,8 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 {
 	struct rsi_hw *adapter = common->priv;
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
-	int status;
+	struct rx_usb_ctrl_block *rx_cb;
+	int status, idx;
 
 	do {
 		rsi_wait_event(&dev->rx_thread.event, EVENT_WAIT_FOREVER);
@@ -37,20 +38,30 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 		if (atomic_read(&dev->rx_thread.thread_done))
 			goto out;
 
-		mutex_lock(&common->rx_lock);
-		status = rsi_read_pkt(common, 0);
-		if (status) {
-			rsi_dbg(ERR_ZONE, "%s: Failed To read data", __func__);
+		for (idx = 0; idx < MAX_RX_URBS; idx++) {
+			rx_cb = &dev->rx_cb[idx];
+			if (!rx_cb->pend)
+				continue;
+
+			mutex_lock(&common->rx_lock);
+			status = rsi_read_pkt(common, rx_cb->rx_buffer, 0);
+			if (status) {
+				rsi_dbg(ERR_ZONE, "%s: Failed To read data",
+					__func__);
+				mutex_unlock(&common->rx_lock);
+				break;
+			}
+			rx_cb->pend = 0;
 			mutex_unlock(&common->rx_lock);
-			return;
+
+			if (adapter->rx_urb_submit(adapter, rx_cb->ep_num)) {
+				rsi_dbg(ERR_ZONE,
+					"%s: Failed in urb submission",
+					__func__);
+				return;
+			}
 		}
-		mutex_unlock(&common->rx_lock);
 		rsi_reset_event(&dev->rx_thread.event);
-		if (adapter->rx_urb_submit(adapter)) {
-			rsi_dbg(ERR_ZONE,
-				"%s: Failed in urb submission", __func__);
-			return;
-		}
 	} while (1);
 
 out:
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index d07dbba61727..1d8af419fb1f 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -82,7 +82,7 @@ void rsi_mac80211_detach(struct rsi_hw *hw);
 u16 rsi_get_connected_channel(struct ieee80211_vif *vif);
 struct rsi_hw *rsi_91x_init(void);
 void rsi_91x_deinit(struct rsi_hw *adapter);
-int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len);
+int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len);
 #ifdef CONFIG_PM
 int rsi_config_wowlan(struct rsi_hw *adapter, struct cfg80211_wowlan *wowlan);
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index 8cab630af4a5..ee469dc66562 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -343,7 +343,7 @@ struct rsi_hw {
 	void *rsi_dev;
 	struct rsi_host_intf_ops *host_intf_ops;
 	int (*check_hw_queue_status)(struct rsi_hw *adapter, u8 q_num);
-	int (*rx_urb_submit)(struct rsi_hw *adapter);
+	int (*rx_urb_submit)(struct rsi_hw *adapter, u8 ep_num);
 	int (*determine_event_timeout)(struct rsi_hw *adapter);
 };
 
diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
index 891daea2d932..7e781d55bad8 100644
--- a/drivers/net/wireless/rsi/rsi_usb.h
+++ b/drivers/net/wireless/rsi/rsi_usb.h
@@ -39,12 +39,20 @@
 #define RSI_USB_BUF_SIZE	     4096
 #define RSI_USB_CTRL_BUF_SIZE	     0x04
 
+struct rx_usb_ctrl_block {
+	u8 *data;
+	struct urb *rx_urb;
+	u8 *rx_buffer;
+	u8 ep_num;
+	u8 pend;
+};
+
 struct rsi_91x_usbdev {
 	struct rsi_thread rx_thread;
 	u8 endpoint;
 	struct usb_device *usbdev;
 	struct usb_interface *pfunction;
-	struct urb *rx_usb_urb[MAX_RX_URBS];
+	struct rx_usb_ctrl_block rx_cb[MAX_RX_URBS];
 	u8 *tx_buffer;
 	__le16 bulkin_size;
 	u8 bulkin_endpoint_addr;

From a4302bff28e2f35ea2471804978c7a601aa05999 Mon Sep 17 00:00:00 2001
From: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Date: Tue, 27 Feb 2018 19:56:12 +0530
Subject: [PATCH 0884/1678] rsi: add bluetooth rx endpoint

USB endpoint 1 is used for WLAN which is presently in use.
USB endpoint 2 is introduced for BT Rx traffic. Enumeration
of Rx BT endpoint and submitting Rx BT URB are added.

>From /sys/kernel/debug/usb/devices,
Ad=81(I) is for WLAN EP IN and Ad=01(O) is for Wlan EP OUT.
Ad=82(I) is for BT EP IN and Ad=02(O) is for BT EP OUT.

T:  Bus=01 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#=  4 Spd=480  MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=1618 ProdID=9113 Rev= 0.02
S:  Manufacturer=Redpine Signals, Inc.
S:  Product=Wireless USB Network Module
S:  SerialNumber=000000000001
C:* #Ifs= 1 Cfg#= 1 Atr=c0 MxPwr=  2mA
I:* If#= 0 Alt= 0 #EPs= 6 Cls=00(>ifc ) Sub=00 Prot=00 Driver=(none)
E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_usb.c | 37 +++++++++++++++-----------
 drivers/net/wireless/rsi/rsi_usb.h     |  6 ++---
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index bbce809d3b47..9ab86fb1da28 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -103,41 +103,42 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface,
 	struct usb_host_interface *iface_desc;
 	struct usb_endpoint_descriptor *endpoint;
 	__le16 buffer_size;
-	int ii, bep_found = 0;
+	int ii, bin_found = 0, bout_found = 0;
 
 	iface_desc = &(interface->altsetting[0]);
 
 	for (ii = 0; ii < iface_desc->desc.bNumEndpoints; ++ii) {
 		endpoint = &(iface_desc->endpoint[ii].desc);
 
-		if ((!(dev->bulkin_endpoint_addr)) &&
+		if (!dev->bulkin_endpoint_addr[bin_found] &&
 		    (endpoint->bEndpointAddress & USB_DIR_IN) &&
-		    ((endpoint->bmAttributes &
-		    USB_ENDPOINT_XFERTYPE_MASK) ==
+		    ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
 		    USB_ENDPOINT_XFER_BULK)) {
 			buffer_size = endpoint->wMaxPacketSize;
-			dev->bulkin_size = buffer_size;
-			dev->bulkin_endpoint_addr =
+			dev->bulkin_size[bin_found] = buffer_size;
+			dev->bulkin_endpoint_addr[bin_found] =
 				endpoint->bEndpointAddress;
+			bin_found++;
 		}
 
-		if (!dev->bulkout_endpoint_addr[bep_found] &&
+		if (!dev->bulkout_endpoint_addr[bout_found] &&
 		    !(endpoint->bEndpointAddress & USB_DIR_IN) &&
 		    ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) ==
-		      USB_ENDPOINT_XFER_BULK)) {
-			dev->bulkout_endpoint_addr[bep_found] =
+		    USB_ENDPOINT_XFER_BULK)) {
+			buffer_size = endpoint->wMaxPacketSize;
+			dev->bulkout_endpoint_addr[bout_found] =
 				endpoint->bEndpointAddress;
 			buffer_size = endpoint->wMaxPacketSize;
-			dev->bulkout_size[bep_found] = buffer_size;
-			bep_found++;
+			dev->bulkout_size[bout_found] = buffer_size;
+			bout_found++;
 		}
 
-		if (bep_found >= MAX_BULK_EP)
+		if (bin_found >= MAX_BULK_EP || bout_found >= MAX_BULK_EP)
 			break;
 	}
 
-	if (!(dev->bulkin_endpoint_addr) &&
-	    (dev->bulkout_endpoint_addr[0]))
+	if (!(dev->bulkin_endpoint_addr[0]) &&
+	    dev->bulkout_endpoint_addr[0])
 		return -EINVAL;
 
 	return 0;
@@ -273,7 +274,7 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num)
 	usb_fill_bulk_urb(urb,
 			  dev->usbdev,
 			  usb_rcvbulkpipe(dev->usbdev,
-				dev->bulkin_endpoint_addr),
+			  dev->bulkin_endpoint_addr[ep_num - 1]),
 			  urb->transfer_buffer,
 			  3000,
 			  rsi_rx_done_handler,
@@ -745,6 +746,12 @@ static int rsi_probe(struct usb_interface *pfunction,
 	if (status)
 		goto err1;
 
+	if (adapter->priv->coex_mode > 1) {
+		status = rsi_rx_urb_submit(adapter, BT_EP);
+		if (status)
+			goto err1;
+	}
+
 	return 0;
 err1:
 	rsi_deinit_usb_interface(adapter);
diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
index 7e781d55bad8..0fda14ce693e 100644
--- a/drivers/net/wireless/rsi/rsi_usb.h
+++ b/drivers/net/wireless/rsi/rsi_usb.h
@@ -31,7 +31,7 @@
 #define USB_VENDOR_REGISTER_WRITE    0x16
 #define RSI_USB_TX_HEAD_ROOM         128
 
-#define MAX_RX_URBS                  1
+#define MAX_RX_URBS                  2
 #define MAX_BULK_EP                  8
 #define WLAN_EP                      1
 #define BT_EP                        2
@@ -54,8 +54,8 @@ struct rsi_91x_usbdev {
 	struct usb_interface *pfunction;
 	struct rx_usb_ctrl_block rx_cb[MAX_RX_URBS];
 	u8 *tx_buffer;
-	__le16 bulkin_size;
-	u8 bulkin_endpoint_addr;
+	__le16 bulkin_size[MAX_BULK_EP];
+	u8 bulkin_endpoint_addr[MAX_BULK_EP];
 	__le16 bulkout_size[MAX_BULK_EP];
 	u8 bulkout_endpoint_addr[MAX_BULK_EP];
 	u32 tx_blk_size;

From 4c10d56a76bb1d40ea6bede579d1522cbcdc438e Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Tue, 27 Feb 2018 19:56:13 +0530
Subject: [PATCH 0885/1678] rsi: add header file rsi_91x

The common parameters used by wlan and bt modules are add
to a new header file "rsi_91x.h" defined in 'include/net'

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_main.h | 12 ++--------
 include/net/rsi_91x.h               | 34 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 10 deletions(-)
 create mode 100644 include/net/rsi_91x.h

diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index ee469dc66562..b0f4e2cce0ec 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <net/mac80211.h>
+#include <net/rsi_91x.h>
 
 struct rsi_sta {
 	struct ieee80211_sta *sta;
@@ -85,10 +86,6 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define MGMT_HW_Q			10
 #define BEACON_HW_Q			11
 
-/* Queue information */
-#define RSI_COEX_Q			0x0
-#define RSI_WIFI_MGMT_Q                 0x4
-#define RSI_WIFI_DATA_Q                 0x5
 #define IEEE80211_MGMT_FRAME            0x00
 #define IEEE80211_CTL_FRAME             0x04
 
@@ -293,11 +290,6 @@ struct rsi_common {
 	struct ieee80211_vif *roc_vif;
 };
 
-enum host_intf {
-	RSI_HOST_INTF_SDIO = 0,
-	RSI_HOST_INTF_USB
-};
-
 struct eepromrw_info {
 	u32 offset;
 	u32 length;
@@ -322,7 +314,7 @@ struct rsi_hw {
 	struct device *device;
 	u8 sc_nvifs;
 
-	enum host_intf rsi_host_intf;
+	enum rsi_host_intf rsi_host_intf;
 	u16 block_size;
 	enum ps_state ps_state;
 	struct rsi_ps_info ps_info;
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
new file mode 100644
index 000000000000..16a447b46119
--- /dev/null
+++ b/include/net/rsi_91x.h
@@ -0,0 +1,34 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_HEADER_H__
+#define __RSI_HEADER_H__
+
+/* HAL queue information */
+#define RSI_COEX_Q			0x0
+#define RSI_BT_Q			0x2
+#define RSI_WLAN_Q                      0x3
+#define RSI_WIFI_MGMT_Q                 0x4
+#define RSI_WIFI_DATA_Q                 0x5
+#define RSI_BT_MGMT_Q			0x6
+#define RSI_BT_DATA_Q			0x7
+
+enum rsi_host_intf {
+	RSI_HOST_INTF_SDIO = 0,
+	RSI_HOST_INTF_USB
+};
+
+#endif

From 2108df3c4b1856588ca2e7f641900c2bbf38467e Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Tue, 27 Feb 2018 19:56:14 +0530
Subject: [PATCH 0886/1678] rsi: add coex support

With BT support, driver has to handle two streams of data
(i.e. wlan and BT). Actual coex implementation is in firmware.
Coex module just schedule the packets to firmware by taking them
from the corresponding paths.

Structures for module and protocol operations are introduced for
this purpose. Protocol operations structure is global structure
which can be shared among different modules. Move initialization
of coex and operating mode values to rsi_91x_init().

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/Kconfig        |   9 ++
 drivers/net/wireless/rsi/Makefile       |   1 +
 drivers/net/wireless/rsi/rsi_91x_coex.c | 177 ++++++++++++++++++++++++
 drivers/net/wireless/rsi/rsi_91x_hal.c  |  17 +--
 drivers/net/wireless/rsi/rsi_91x_main.c |  38 ++++-
 drivers/net/wireless/rsi/rsi_91x_mgmt.c |   2 +-
 drivers/net/wireless/rsi/rsi_91x_sdio.c |   1 +
 drivers/net/wireless/rsi/rsi_91x_usb.c  |   2 +
 drivers/net/wireless/rsi/rsi_coex.h     |  37 +++++
 drivers/net/wireless/rsi/rsi_main.h     |   6 +
 drivers/net/wireless/rsi/rsi_mgmt.h     |   3 +
 include/net/rsi_91x.h                   |  20 +++
 12 files changed, 303 insertions(+), 10 deletions(-)
 create mode 100644 drivers/net/wireless/rsi/rsi_91x_coex.c
 create mode 100644 drivers/net/wireless/rsi/rsi_coex.h

diff --git a/drivers/net/wireless/rsi/Kconfig b/drivers/net/wireless/rsi/Kconfig
index 7c5e4ca4e3d0..e6135ee35213 100644
--- a/drivers/net/wireless/rsi/Kconfig
+++ b/drivers/net/wireless/rsi/Kconfig
@@ -42,4 +42,13 @@ config RSI_USB
 	  This option enables the USB bus support in rsi drivers.
 	  Select M (recommended), if you have a RSI 1x1 wireless module.
 
+config RSI_COEX
+	bool "Redpine Signals WLAN BT Coexistence support"
+	depends on BT_HCIRSI && RSI_91X
+	default y
+	---help---
+	  This option enables the WLAN BT coex support in rsi drivers.
+	  Select M (recommended), if you have want to use this feature
+	  and you have RS9113 module.
+
 endif # WLAN_VENDOR_RSI
diff --git a/drivers/net/wireless/rsi/Makefile b/drivers/net/wireless/rsi/Makefile
index 47c45908d894..ff87121a5928 100644
--- a/drivers/net/wireless/rsi/Makefile
+++ b/drivers/net/wireless/rsi/Makefile
@@ -5,6 +5,7 @@ rsi_91x-y			+= rsi_91x_mac80211.o
 rsi_91x-y			+= rsi_91x_mgmt.o
 rsi_91x-y			+= rsi_91x_hal.o
 rsi_91x-y			+= rsi_91x_ps.o
+rsi_91x-$(CONFIG_RSI_COEX)	+= rsi_91x_coex.o
 rsi_91x-$(CONFIG_RSI_DEBUGFS)	+= rsi_91x_debugfs.o
 
 rsi_usb-y			+= rsi_91x_usb.o rsi_91x_usb_ops.o
diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
new file mode 100644
index 000000000000..c07e839017ea
--- /dev/null
+++ b/drivers/net/wireless/rsi/rsi_91x_coex.c
@@ -0,0 +1,177 @@
+/**
+ * Copyright (c) 2018 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "rsi_main.h"
+#include "rsi_coex.h"
+#include "rsi_mgmt.h"
+#include "rsi_hal.h"
+
+static enum rsi_coex_queues rsi_coex_determine_coex_q
+			(struct rsi_coex_ctrl_block *coex_cb)
+{
+	enum rsi_coex_queues q_num = RSI_COEX_Q_INVALID;
+
+	if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_COMMON]) > 0)
+		q_num = RSI_COEX_Q_COMMON;
+	if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]) > 0)
+		q_num = RSI_COEX_Q_BT;
+	if (skb_queue_len(&coex_cb->coex_tx_qs[RSI_COEX_Q_WLAN]) > 0)
+		q_num = RSI_COEX_Q_WLAN;
+
+	return q_num;
+}
+
+static void rsi_coex_sched_tx_pkts(struct rsi_coex_ctrl_block *coex_cb)
+{
+	enum rsi_coex_queues coex_q = RSI_COEX_Q_INVALID;
+	struct sk_buff *skb;
+
+	do {
+		coex_q = rsi_coex_determine_coex_q(coex_cb);
+		rsi_dbg(INFO_ZONE, "queue = %d\n", coex_q);
+
+		if (coex_q == RSI_COEX_Q_BT)
+			skb = skb_dequeue(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]);
+	} while (coex_q != RSI_COEX_Q_INVALID);
+}
+
+static void rsi_coex_scheduler_thread(struct rsi_common *common)
+{
+	struct rsi_coex_ctrl_block *coex_cb =
+		(struct rsi_coex_ctrl_block *)common->coex_cb;
+	u32 timeout = EVENT_WAIT_FOREVER;
+
+	do {
+		rsi_wait_event(&coex_cb->coex_tx_thread.event, timeout);
+		rsi_reset_event(&coex_cb->coex_tx_thread.event);
+
+		rsi_coex_sched_tx_pkts(coex_cb);
+	} while (atomic_read(&coex_cb->coex_tx_thread.thread_done) == 0);
+
+	complete_and_exit(&coex_cb->coex_tx_thread.completion, 0);
+}
+
+int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg)
+{
+	u8 msg_type = msg[RSI_RX_DESC_MSG_TYPE_OFFSET];
+
+	switch (msg_type) {
+	case COMMON_CARD_READY_IND:
+		rsi_dbg(INFO_ZONE, "common card ready received\n");
+		rsi_handle_card_ready(common, msg);
+		break;
+	case SLEEP_NOTIFY_IND:
+		rsi_dbg(INFO_ZONE, "sleep notify received\n");
+		rsi_mgmt_pkt_recv(common, msg);
+		break;
+	}
+
+	return 0;
+}
+
+static inline int rsi_map_coex_q(u8 hal_queue)
+{
+	switch (hal_queue) {
+	case RSI_COEX_Q:
+		return RSI_COEX_Q_COMMON;
+	case RSI_WLAN_Q:
+		return RSI_COEX_Q_WLAN;
+	case RSI_BT_Q:
+		return RSI_COEX_Q_BT;
+	}
+	return RSI_COEX_Q_INVALID;
+}
+
+int rsi_coex_send_pkt(void *priv, struct sk_buff *skb, u8 hal_queue)
+{
+	struct rsi_common *common = (struct rsi_common *)priv;
+	struct rsi_coex_ctrl_block *coex_cb =
+		(struct rsi_coex_ctrl_block *)common->coex_cb;
+	struct skb_info *tx_params = NULL;
+	enum rsi_coex_queues coex_q;
+	int status;
+
+	coex_q = rsi_map_coex_q(hal_queue);
+	if (coex_q == RSI_COEX_Q_INVALID) {
+		rsi_dbg(ERR_ZONE, "Invalid coex queue\n");
+		return -EINVAL;
+	}
+	if (coex_q != RSI_COEX_Q_COMMON &&
+	    coex_q != RSI_COEX_Q_WLAN) {
+		skb_queue_tail(&coex_cb->coex_tx_qs[coex_q], skb);
+		rsi_set_event(&coex_cb->coex_tx_thread.event);
+		return 0;
+	}
+	if (common->iface_down) {
+		tx_params =
+			(struct skb_info *)&IEEE80211_SKB_CB(skb)->driver_data;
+
+		if (!(tx_params->flags & INTERNAL_MGMT_PKT)) {
+			rsi_indicate_tx_status(common->priv, skb, -EINVAL);
+			return 0;
+		}
+	}
+
+	/* Send packet to hal */
+	if (skb->priority == MGMT_SOFT_Q)
+		status = rsi_send_mgmt_pkt(common, skb);
+	else
+		status = rsi_send_data_pkt(common, skb);
+
+	return status;
+}
+
+int rsi_coex_attach(struct rsi_common *common)
+{
+	struct rsi_coex_ctrl_block *coex_cb;
+	int cnt;
+
+	coex_cb = kzalloc(sizeof(*coex_cb), GFP_KERNEL);
+	if (!coex_cb)
+		return -ENOMEM;
+
+	common->coex_cb = (void *)coex_cb;
+	coex_cb->priv = common;
+
+	/* Initialize co-ex queues */
+	for (cnt = 0; cnt < NUM_COEX_TX_QUEUES; cnt++)
+		skb_queue_head_init(&coex_cb->coex_tx_qs[cnt]);
+	rsi_init_event(&coex_cb->coex_tx_thread.event);
+
+	/* Initialize co-ex thread */
+	if (rsi_create_kthread(common,
+			       &coex_cb->coex_tx_thread,
+			       rsi_coex_scheduler_thread,
+			       "Coex-Tx-Thread")) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to init tx thrd\n", __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void rsi_coex_detach(struct rsi_common *common)
+{
+	struct rsi_coex_ctrl_block *coex_cb =
+		(struct rsi_coex_ctrl_block *)common->coex_cb;
+	int cnt;
+
+	rsi_kill_thread(&coex_cb->coex_tx_thread);
+
+	for (cnt = 0; cnt < NUM_COEX_TX_QUEUES; cnt++)
+		skb_queue_purge(&coex_cb->coex_tx_qs[cnt]);
+
+	kfree(coex_cb);
+}
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index 1176de646942..151d228a6167 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -31,8 +31,15 @@ int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
 	struct rsi_hw *adapter = common->priv;
 	int status;
 
+	if (common->coex_mode > 1)
+		mutex_lock(&common->tx_bus_mutex);
+
 	status = adapter->host_intf_ops->write_pkt(common->priv,
 						   skb->data, skb->len);
+
+	if (common->coex_mode > 1)
+		mutex_unlock(&common->tx_bus_mutex);
+
 	return status;
 }
 
@@ -296,8 +303,7 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
 	if (status)
 		goto err;
 
-	status = adapter->host_intf_ops->write_pkt(common->priv, skb->data,
-						   skb->len);
+	status = rsi_send_pkt_to_bus(common, skb);
 	if (status)
 		rsi_dbg(ERR_ZONE, "%s: Failed to write pkt\n", __func__);
 
@@ -342,8 +348,7 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
 		goto err;
 
 	rsi_prepare_mgmt_desc(common, skb);
-	status = adapter->host_intf_ops->write_pkt(common->priv,
-						   (u8 *)skb->data, skb->len);
+	status = rsi_send_pkt_to_bus(common, skb);
 	if (status)
 		rsi_dbg(ERR_ZONE, "%s: Failed to write the packet\n", __func__);
 
@@ -926,10 +931,6 @@ int rsi_hal_device_init(struct rsi_hw *adapter)
 {
 	struct rsi_common *common = adapter->priv;
 
-	common->coex_mode = RSI_DEV_COEX_MODE_WIFI_ALONE;
-	common->oper_mode = RSI_DEV_OPMODE_WIFI_ALONE;
-	adapter->device_model = RSI_DEV_9113;
-
 	switch (adapter->device_model) {
 	case RSI_DEV_9113:
 		if (rsi_load_firmware(adapter)) {
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index 0413af88cd25..641c388b5666 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -20,6 +20,7 @@
 #include <linux/firmware.h>
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
+#include "rsi_coex.h"
 #include "rsi_hal.h"
 
 u32 rsi_zone_enabled = /* INFO_ZONE |
@@ -160,8 +161,15 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
 
 		switch (queueno) {
 		case RSI_COEX_Q:
-			rsi_mgmt_pkt_recv(common, (frame_desc + offset));
+#ifdef CONFIG_RSI_COEX
+			if (common->coex_mode > 1)
+				rsi_coex_recv_pkt(common, frame_desc + offset);
+			else
+#endif
+				rsi_mgmt_pkt_recv(common,
+						  (frame_desc + offset));
 			break;
+
 		case RSI_WIFI_DATA_Q:
 			skb = rsi_prepare_skb(common,
 					      (frame_desc + offset),
@@ -217,6 +225,15 @@ static void rsi_tx_scheduler_thread(struct rsi_common *common)
 	complete_and_exit(&common->tx_thread.completion, 0);
 }
 
+#ifdef CONFIG_RSI_COEX
+enum rsi_host_intf rsi_get_host_intf(void *priv)
+{
+	struct rsi_common *common = (struct rsi_common *)priv;
+
+	return common->priv->rsi_host_intf;
+}
+#endif
+
 /**
  * rsi_91x_init() - This function initializes os interface operations.
  * @void: Void.
@@ -251,6 +268,7 @@ struct rsi_hw *rsi_91x_init(void)
 	mutex_init(&common->mutex);
 	mutex_init(&common->tx_lock);
 	mutex_init(&common->rx_lock);
+	mutex_init(&common->tx_bus_mutex);
 
 	if (rsi_create_kthread(common,
 			       &common->tx_thread,
@@ -265,6 +283,19 @@ struct rsi_hw *rsi_91x_init(void)
 	timer_setup(&common->roc_timer, rsi_roc_timeout, 0);
 	init_completion(&common->wlan_init_completion);
 	common->init_done = true;
+
+	common->coex_mode = RSI_DEV_COEX_MODE_WIFI_ALONE;
+	common->oper_mode = RSI_DEV_OPMODE_WIFI_ALONE;
+	adapter->device_model = RSI_DEV_9113;
+#ifdef CONFIG_RSI_COEX
+	if (common->coex_mode > 1) {
+		if (rsi_coex_attach(common)) {
+			rsi_dbg(ERR_ZONE, "Failed to init coex module\n");
+			goto err;
+		}
+	}
+#endif
+
 	return adapter;
 
 err:
@@ -294,6 +325,11 @@ void rsi_91x_deinit(struct rsi_hw *adapter)
 
 	common->init_done = false;
 
+#ifdef CONFIG_RSI_COEX
+	if (common->coex_mode > 1)
+		rsi_coex_detach(common);
+#endif
+
 	kfree(common);
 	kfree(adapter->rsi_dev);
 	kfree(adapter);
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index 46c9d5470dfb..c21fca750fd4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -1791,7 +1791,7 @@ out:
 	return -EINVAL;
 }
 
-static int rsi_handle_card_ready(struct rsi_common *common, u8 *msg)
+int rsi_handle_card_ready(struct rsi_common *common, u8 *msg)
 {
 	switch (common->fsm_state) {
 	case FSM_CARD_NOT_READY:
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index b0cf41195051..ba38c6d00128 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include "rsi_sdio.h"
 #include "rsi_common.h"
+#include "rsi_coex.h"
 #include "rsi_hal.h"
 
 /**
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 9ab86fb1da28..b33a05f057ba 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -16,8 +16,10 @@
  */
 
 #include <linux/module.h>
+#include <net/rsi_91x.h>
 #include "rsi_usb.h"
 #include "rsi_hal.h"
+#include "rsi_coex.h"
 
 /**
  * rsi_usb_card_write() - This function writes to the USB Card.
diff --git a/drivers/net/wireless/rsi/rsi_coex.h b/drivers/net/wireless/rsi/rsi_coex.h
new file mode 100644
index 000000000000..0fdc67f37a56
--- /dev/null
+++ b/drivers/net/wireless/rsi/rsi_coex.h
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c) 2018 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_COEX_H__
+#define __RSI_COEX_H__
+
+#include "rsi_common.h"
+
+#ifdef CONFIG_RSI_COEX
+#define COMMON_CARD_READY_IND           0
+#define NUM_COEX_TX_QUEUES              5
+
+struct rsi_coex_ctrl_block {
+	struct rsi_common *priv;
+	struct sk_buff_head coex_tx_qs[NUM_COEX_TX_QUEUES];
+	struct rsi_thread coex_tx_thread;
+};
+
+int rsi_coex_attach(struct rsi_common *common);
+void rsi_coex_detach(struct rsi_common *common);
+int rsi_coex_send_pkt(void *priv, struct sk_buff *skb, u8 proto_type);
+int rsi_coex_recv_pkt(struct rsi_common *common, u8 *msg);
+#endif
+#endif
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index b0f4e2cce0ec..99a00a3ccaa4 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -206,6 +206,7 @@ struct rsi_common {
 	struct rsi_hw *priv;
 	struct vif_priv vif_info[RSI_MAX_VIFS];
 
+	void *coex_cb;
 	bool mgmt_q_block;
 	struct version_info lmac_ver;
 
@@ -270,6 +271,8 @@ struct rsi_common {
 	u8 obm_ant_sel_val;
 	int tx_power;
 	u8 ant_in_use;
+	/* Mutex used for writing packet to bus */
+	struct mutex tx_bus_mutex;
 	bool hibernate_resume;
 	bool reinit_hw;
 	u8 wow_flags;
@@ -359,4 +362,7 @@ struct rsi_host_intf_ops {
 				      u8 *fw);
 	int (*reinit_device)(struct rsi_hw *adapter);
 };
+
+enum rsi_host_intf rsi_get_host_intf(void *priv);
+
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h
index 389094a3f91c..cf6567ae5bbe 100644
--- a/drivers/net/wireless/rsi/rsi_mgmt.h
+++ b/drivers/net/wireless/rsi/rsi_mgmt.h
@@ -57,12 +57,14 @@
 #define WOW_PATTERN_SIZE 256
 
 /* Receive Frame Types */
+#define RSI_RX_DESC_MSG_TYPE_OFFSET	2
 #define TA_CONFIRM_TYPE                 0x01
 #define RX_DOT11_MGMT                   0x02
 #define TX_STATUS_IND                   0x04
 #define BEACON_EVENT_IND		0x08
 #define PROBEREQ_CONFIRM                2
 #define CARD_READY_IND                  0x00
+#define SLEEP_NOTIFY_IND                0x06
 
 #define RSI_DELETE_PEER                 0x0
 #define RSI_ADD_PEER                    0x1
@@ -638,6 +640,7 @@ static inline void rsi_set_len_qno(__le16 *addr, u16 len, u8 qno)
 	*addr = cpu_to_le16(len | ((qno & 7) << 12));
 }
 
+int rsi_handle_card_ready(struct rsi_common *common, u8 *msg);
 int rsi_mgmt_pkt_recv(struct rsi_common *common, u8 *msg);
 int rsi_set_vap_capabilities(struct rsi_common *common, enum opmode mode,
 			     u8 *mac_addr, u8 vap_id, u8 vap_status);
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
index 16a447b46119..737ab4e01e3b 100644
--- a/include/net/rsi_91x.h
+++ b/include/net/rsi_91x.h
@@ -17,6 +17,8 @@
 #ifndef __RSI_HEADER_H__
 #define __RSI_HEADER_H__
 
+#include <linux/skbuff.h>
+
 /* HAL queue information */
 #define RSI_COEX_Q			0x0
 #define RSI_BT_Q			0x2
@@ -26,9 +28,27 @@
 #define RSI_BT_MGMT_Q			0x6
 #define RSI_BT_DATA_Q			0x7
 
+enum rsi_coex_queues {
+	RSI_COEX_Q_INVALID = -1,
+	RSI_COEX_Q_COMMON = 0,
+	RSI_COEX_Q_BT,
+	RSI_COEX_Q_WLAN
+};
+
 enum rsi_host_intf {
 	RSI_HOST_INTF_SDIO = 0,
 	RSI_HOST_INTF_USB
 };
 
+struct rsi_proto_ops {
+	int (*coex_send_pkt)(void *priv, struct sk_buff *skb, u8 hal_queue);
+	enum rsi_host_intf (*get_host_intf)(void *priv);
+	void (*set_bt_context)(void *priv, void *context);
+};
+
+struct rsi_mod_ops {
+	int (*attach)(void *priv, struct rsi_proto_ops *ops);
+	void (*detach)(void *priv);
+	int (*recv_pkt)(void *priv, u8 *msg);
+};
 #endif

From 38aa4da504837ba8b9c04941e843642f129661eb Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Tue, 27 Feb 2018 19:56:15 +0530
Subject: [PATCH 0887/1678] Bluetooth: btrsi: add new rsi bluetooth driver

Redpine bluetooth driver is a thin driver which depends on
'rsi_91x' driver for transmitting and receiving packets
to/from device. It creates hci interface when attach() is
called from 'rsi_91x' module.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bluetooth/Kconfig  |  12 +++
 drivers/bluetooth/Makefile |   2 +
 drivers/bluetooth/btrsi.c  | 188 +++++++++++++++++++++++++++++++++++++
 include/net/rsi_91x.h      |   4 +-
 4 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 drivers/bluetooth/btrsi.c

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 07e55cd8f8c8..d8bbd661dbdb 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -392,4 +392,16 @@ config BT_QCOMSMD
 	  Say Y here to compile support for HCI over Qualcomm SMD into the
 	  kernel or say M to compile as a module.
 
+config BT_HCIRSI
+	tristate "Redpine HCI support"
+	default n
+	select RSI_COEX
+	help
+	  Redpine BT driver.
+	  This driver handles BT traffic from upper layers and pass
+	  to the RSI_91x coex module for further scheduling to device
+
+	  Say Y here to compile support for HCI over Redpine into the
+	  kernel or say M to compile as a module.
+
 endmenu
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 4e4e44d09796..03cfc1b20c4a 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -28,6 +28,8 @@ obj-$(CONFIG_BT_QCA)		+= btqca.o
 
 obj-$(CONFIG_BT_HCIUART_NOKIA)	+= hci_nokia.o
 
+obj-$(CONFIG_BT_HCIRSI)		+= btrsi.o
+
 btmrvl-y			:= btmrvl_main.o
 btmrvl-$(CONFIG_DEBUG_FS)	+= btmrvl_debugfs.o
 
diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c
new file mode 100644
index 000000000000..5034325e417c
--- /dev/null
+++ b/drivers/bluetooth/btrsi.c
@@ -0,0 +1,188 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <asm/unaligned.h>
+#include <net/rsi_91x.h>
+#include <net/genetlink.h>
+
+#define RSI_HEADROOM_FOR_BT_HAL	16
+#define RSI_FRAME_DESC_SIZE	16
+
+struct rsi_hci_adapter {
+	void *priv;
+	struct rsi_proto_ops *proto_ops;
+	struct hci_dev *hdev;
+};
+
+static int rsi_hci_open(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int rsi_hci_close(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int rsi_hci_flush(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int rsi_hci_send_pkt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct rsi_hci_adapter *h_adapter = hci_get_drvdata(hdev);
+	struct sk_buff *new_skb = NULL;
+
+	switch (hci_skb_pkt_type(skb)) {
+	case HCI_COMMAND_PKT:
+		hdev->stat.cmd_tx++;
+		break;
+	case HCI_ACLDATA_PKT:
+		hdev->stat.acl_tx++;
+		break;
+	case HCI_SCODATA_PKT:
+		hdev->stat.sco_tx++;
+		break;
+	}
+
+	if (skb_headroom(skb) < RSI_HEADROOM_FOR_BT_HAL) {
+		/* Insufficient skb headroom - allocate a new skb */
+		new_skb = skb_realloc_headroom(skb, RSI_HEADROOM_FOR_BT_HAL);
+		if (unlikely(!new_skb))
+			return -ENOMEM;
+		bt_cb(new_skb)->pkt_type = hci_skb_pkt_type(skb);
+		kfree_skb(skb);
+		skb = new_skb;
+	}
+
+	return h_adapter->proto_ops->coex_send_pkt(h_adapter->priv, skb,
+						   RSI_BT_Q);
+}
+
+static int rsi_hci_recv_pkt(void *priv, const u8 *pkt)
+{
+	struct rsi_hci_adapter *h_adapter = priv;
+	struct hci_dev *hdev = h_adapter->hdev;
+	struct sk_buff *skb;
+	int pkt_len = get_unaligned_le16(pkt) & 0x0fff;
+
+	skb = dev_alloc_skb(pkt_len);
+	if (!skb)
+		return -ENOMEM;
+
+	memcpy(skb->data, pkt + RSI_FRAME_DESC_SIZE, pkt_len);
+	skb_put(skb, pkt_len);
+	h_adapter->hdev->stat.byte_rx += skb->len;
+
+	hci_skb_pkt_type(skb) = pkt[14];
+
+	return hci_recv_frame(hdev, skb);
+}
+
+static int rsi_hci_attach(void *priv, struct rsi_proto_ops *ops)
+{
+	struct rsi_hci_adapter *h_adapter = NULL;
+	struct hci_dev *hdev;
+	int err = 0;
+
+	h_adapter = kzalloc(sizeof(*h_adapter), GFP_KERNEL);
+	if (!h_adapter)
+		return -ENOMEM;
+
+	h_adapter->priv = priv;
+	ops->set_bt_context(priv, h_adapter);
+	h_adapter->proto_ops = ops;
+
+	hdev = hci_alloc_dev();
+	if (!hdev) {
+		BT_ERR("Failed to alloc HCI device");
+		goto err;
+	}
+
+	h_adapter->hdev = hdev;
+
+	if (ops->get_host_intf(priv) == RSI_HOST_INTF_SDIO)
+		hdev->bus = HCI_SDIO;
+	else
+		hdev->bus = HCI_USB;
+
+	hci_set_drvdata(hdev, h_adapter);
+	hdev->dev_type = HCI_PRIMARY;
+	hdev->open = rsi_hci_open;
+	hdev->close = rsi_hci_close;
+	hdev->flush = rsi_hci_flush;
+	hdev->send = rsi_hci_send_pkt;
+
+	err = hci_register_dev(hdev);
+	if (err < 0) {
+		BT_ERR("HCI registration failed with errcode %d", err);
+		hci_free_dev(hdev);
+		goto err;
+	}
+
+	return 0;
+err:
+	h_adapter->hdev = NULL;
+	kfree(h_adapter);
+	return -EINVAL;
+}
+
+static void rsi_hci_detach(void *priv)
+{
+	struct rsi_hci_adapter *h_adapter = priv;
+	struct hci_dev *hdev;
+
+	if (!h_adapter)
+		return;
+
+	hdev = h_adapter->hdev;
+	if (hdev) {
+		hci_unregister_dev(hdev);
+		hci_free_dev(hdev);
+		h_adapter->hdev = NULL;
+	}
+
+	kfree(h_adapter);
+}
+
+const struct rsi_mod_ops rsi_bt_ops = {
+	.attach	= rsi_hci_attach,
+	.detach	= rsi_hci_detach,
+	.recv_pkt = rsi_hci_recv_pkt,
+};
+EXPORT_SYMBOL(rsi_bt_ops);
+
+static int rsi_91x_bt_module_init(void)
+{
+	return 0;
+}
+
+static void rsi_91x_bt_module_exit(void)
+{
+	return;
+}
+
+module_init(rsi_91x_bt_module_init);
+module_exit(rsi_91x_bt_module_exit);
+MODULE_AUTHOR("Redpine Signals Inc");
+MODULE_DESCRIPTION("RSI BT driver");
+MODULE_SUPPORTED_DEVICE("RSI-BT");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
index 737ab4e01e3b..040f07b47f1f 100644
--- a/include/net/rsi_91x.h
+++ b/include/net/rsi_91x.h
@@ -49,6 +49,8 @@ struct rsi_proto_ops {
 struct rsi_mod_ops {
 	int (*attach)(void *priv, struct rsi_proto_ops *ops);
 	void (*detach)(void *priv);
-	int (*recv_pkt)(void *priv, u8 *msg);
+	int (*recv_pkt)(void *priv, const u8 *msg);
 };
+
+extern const struct rsi_mod_ops rsi_bt_ops;
 #endif

From 716b840c76417e608af3a8d354028604045ec46f Mon Sep 17 00:00:00 2001
From: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Date: Tue, 27 Feb 2018 19:56:16 +0530
Subject: [PATCH 0888/1678] rsi: handle BT traffic in driver

BT frames are passed through coex and hal modules to BUS.
After firmware is loaded, based on the operating mode CARD
READY frame comes for each protocol. When BT card ready is
received, BT attach is called.
Protocol operations are exchanged between the modules
at initialization time.
Build flag CONFIG_RSI_COEX is introduced to enable coex support
if CONFIG_BT_RSIHCI is enabled. Coex operations are valid if
coex mode is greater than 1 only.

Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_coex.c     |  4 +-
 drivers/net/wireless/rsi/rsi_91x_core.c     | 20 ++++++---
 drivers/net/wireless/rsi/rsi_91x_hal.c      | 39 ++++++++++++++++
 drivers/net/wireless/rsi/rsi_91x_main.c     | 49 +++++++++++++++++++--
 drivers/net/wireless/rsi/rsi_91x_sdio_ops.c |  1 +
 drivers/net/wireless/rsi/rsi_common.h       |  1 +
 drivers/net/wireless/rsi/rsi_hal.h          | 10 +++++
 drivers/net/wireless/rsi/rsi_main.h         |  3 ++
 8 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_coex.c b/drivers/net/wireless/rsi/rsi_91x_coex.c
index c07e839017ea..d055099dadf1 100644
--- a/drivers/net/wireless/rsi/rsi_91x_coex.c
+++ b/drivers/net/wireless/rsi/rsi_91x_coex.c
@@ -43,8 +43,10 @@ static void rsi_coex_sched_tx_pkts(struct rsi_coex_ctrl_block *coex_cb)
 		coex_q = rsi_coex_determine_coex_q(coex_cb);
 		rsi_dbg(INFO_ZONE, "queue = %d\n", coex_q);
 
-		if (coex_q == RSI_COEX_Q_BT)
+		if (coex_q == RSI_COEX_Q_BT) {
 			skb = skb_dequeue(&coex_cb->coex_tx_qs[RSI_COEX_Q_BT]);
+			rsi_send_bt_pkt(coex_cb->priv, skb);
+		}
 	} while (coex_q != RSI_COEX_Q_INVALID);
 }
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c
index d0d2201830e8..5dafd2e1306c 100644
--- a/drivers/net/wireless/rsi/rsi_91x_core.c
+++ b/drivers/net/wireless/rsi/rsi_91x_core.c
@@ -17,6 +17,7 @@
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
 #include "rsi_hal.h"
+#include "rsi_coex.h"
 
 /**
  * rsi_determine_min_weight_queue() - This function determines the queue with
@@ -301,14 +302,23 @@ void rsi_core_qos_processor(struct rsi_common *common)
 			mutex_unlock(&common->tx_lock);
 			break;
 		}
-
-		if (q_num == MGMT_SOFT_Q) {
-			status = rsi_send_mgmt_pkt(common, skb);
-		} else if (q_num == MGMT_BEACON_Q) {
+		if (q_num == MGMT_BEACON_Q) {
 			status = rsi_send_pkt_to_bus(common, skb);
 			dev_kfree_skb(skb);
 		} else {
-			status = rsi_send_data_pkt(common, skb);
+#ifdef CONFIG_RSI_COEX
+			if (common->coex_mode > 1) {
+				status = rsi_coex_send_pkt(common, skb,
+							   RSI_WLAN_Q);
+			} else {
+#endif
+				if (q_num == MGMT_SOFT_Q)
+					status = rsi_send_mgmt_pkt(common, skb);
+				else
+					status = rsi_send_data_pkt(common, skb);
+#ifdef CONFIG_RSI_COEX
+			}
+#endif
 		}
 
 		if (status) {
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index 151d228a6167..de608ae365a4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/firmware.h>
+#include <net/bluetooth/bluetooth.h>
 #include "rsi_mgmt.h"
 #include "rsi_hal.h"
 #include "rsi_sdio.h"
@@ -24,6 +25,7 @@
 static struct ta_metadata metadata_flash_content[] = {
 	{"flash_content", 0x00010000},
 	{"rsi/rs9113_wlan_qspi.rps", 0x00010000},
+	{"rsi/rs9113_wlan_bt_dual_mode.rps", 0x00010000},
 };
 
 int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
@@ -357,6 +359,43 @@ err:
 	return status;
 }
 
+int rsi_send_bt_pkt(struct rsi_common *common, struct sk_buff *skb)
+{
+	int status = -EINVAL;
+	u8 header_size = 0;
+	struct rsi_bt_desc *bt_desc;
+	u8 queueno = ((skb->data[1] >> 4) & 0xf);
+
+	if (queueno == RSI_BT_MGMT_Q) {
+		status = rsi_send_pkt_to_bus(common, skb);
+		if (status)
+			rsi_dbg(ERR_ZONE, "%s: Failed to write bt mgmt pkt\n",
+				__func__);
+		goto out;
+	}
+	header_size = FRAME_DESC_SZ;
+	if (header_size > skb_headroom(skb)) {
+		rsi_dbg(ERR_ZONE, "%s: Not enough headroom\n", __func__);
+		status = -ENOSPC;
+		goto out;
+	}
+	skb_push(skb, header_size);
+	memset(skb->data, 0, header_size);
+	bt_desc = (struct rsi_bt_desc *)skb->data;
+
+	rsi_set_len_qno(&bt_desc->len_qno, (skb->len - FRAME_DESC_SZ),
+			RSI_BT_DATA_Q);
+	bt_desc->bt_pkt_type = cpu_to_le16(bt_cb(skb)->pkt_type);
+
+	status = rsi_send_pkt_to_bus(common, skb);
+	if (status)
+		rsi_dbg(ERR_ZONE, "%s: Failed to write bt pkt\n", __func__);
+
+out:
+	dev_kfree_skb(skb);
+	return status;
+}
+
 int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb)
 {
 	struct rsi_hw *adapter = (struct rsi_hw *)common->priv;
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index 641c388b5666..b3e7d75dafae 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -18,6 +18,7 @@
 
 #include <linux/module.h>
 #include <linux/firmware.h>
+#include <net/rsi_91x.h>
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
 #include "rsi_coex.h"
@@ -35,6 +36,14 @@ u32 rsi_zone_enabled = /* INFO_ZONE |
 			0;
 EXPORT_SYMBOL_GPL(rsi_zone_enabled);
 
+#ifdef CONFIG_RSI_COEX
+static struct rsi_proto_ops g_proto_ops = {
+	.coex_send_pkt = rsi_coex_send_pkt,
+	.get_host_intf = rsi_get_host_intf,
+	.set_bt_context = rsi_set_bt_context,
+};
+#endif
+
 /**
  * rsi_dbg() - This function outputs informational messages.
  * @zone: Zone of interest for output message.
@@ -144,6 +153,9 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
 	u32 index, length = 0, queueno = 0;
 	u16 actual_length = 0, offset;
 	struct sk_buff *skb = NULL;
+#ifdef CONFIG_RSI_COEX
+	u8 bt_pkt_type;
+#endif
 
 	index = 0;
 	do {
@@ -185,6 +197,25 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len)
 			rsi_mgmt_pkt_recv(common, (frame_desc + offset));
 			break;
 
+#ifdef CONFIG_RSI_COEX
+		case RSI_BT_MGMT_Q:
+		case RSI_BT_DATA_Q:
+#define BT_RX_PKT_TYPE_OFST	14
+#define BT_CARD_READY_IND	0x89
+			bt_pkt_type = frame_desc[offset + BT_RX_PKT_TYPE_OFST];
+			if (bt_pkt_type == BT_CARD_READY_IND) {
+				rsi_dbg(INFO_ZONE, "BT Card ready recvd\n");
+				if (rsi_bt_ops.attach(common, &g_proto_ops))
+					rsi_dbg(ERR_ZONE,
+						"Failed to attach BT module\n");
+			} else {
+				if (common->bt_adapter)
+					rsi_bt_ops.recv_pkt(common->bt_adapter,
+							frame_desc + offset);
+			}
+			break;
+#endif
+
 		default:
 			rsi_dbg(ERR_ZONE, "%s: pkt from invalid queue: %d\n",
 				__func__,   queueno);
@@ -232,6 +263,13 @@ enum rsi_host_intf rsi_get_host_intf(void *priv)
 
 	return common->priv->rsi_host_intf;
 }
+
+void rsi_set_bt_context(void *priv, void *bt_context)
+{
+	struct rsi_common *common = (struct rsi_common *)priv;
+
+	common->bt_adapter = bt_context;
+}
 #endif
 
 /**
@@ -323,13 +361,18 @@ void rsi_91x_deinit(struct rsi_hw *adapter)
 	for (ii = 0; ii < NUM_SOFT_QUEUES; ii++)
 		skb_queue_purge(&common->tx_queue[ii]);
 
-	common->init_done = false;
-
 #ifdef CONFIG_RSI_COEX
-	if (common->coex_mode > 1)
+	if (common->coex_mode > 1) {
+		if (common->bt_adapter) {
+			rsi_bt_ops.detach(common->bt_adapter);
+			common->bt_adapter = NULL;
+		}
 		rsi_coex_detach(common);
+	}
 #endif
 
+	common->init_done = false;
+
 	kfree(common);
 	kfree(adapter->rsi_dev);
 	kfree(adapter);
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 9fbc0ef82559..169c98125202 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -16,6 +16,7 @@
  */
 
 #include <linux/firmware.h>
+#include <net/rsi_91x.h>
 #include "rsi_sdio.h"
 #include "rsi_common.h"
 
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index 1d8af419fb1f..461658522d0f 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -62,6 +62,7 @@ static inline int rsi_create_kthread(struct rsi_common *common,
 				     u8 *name)
 {
 	init_completion(&thread->completion);
+	atomic_set(&thread->thread_done, 0);
 	thread->task = kthread_run(func_ptr, common, "%s", name);
 	if (IS_ERR(thread->task))
 		return (int)PTR_ERR(thread->task);
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index a09d36b6b765..e7122239f969 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -145,8 +145,18 @@ struct rsi_data_desc {
 	u8 sta_id;
 } __packed;
 
+struct rsi_bt_desc {
+	__le16 len_qno;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 reserved3;
+	__le16 reserved4;
+	__le16 bt_pkt_type;
+} __packed;
+
 int rsi_hal_device_init(struct rsi_hw *adapter);
 int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb);
 int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb);
+int rsi_send_bt_pkt(struct rsi_common *common, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index 99a00a3ccaa4..47af0cb9de1d 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -291,6 +291,8 @@ struct rsi_common {
 	bool p2p_enabled;
 	struct timer_list roc_timer;
 	struct ieee80211_vif *roc_vif;
+
+	void *bt_adapter;
 };
 
 struct eepromrw_info {
@@ -364,5 +366,6 @@ struct rsi_host_intf_ops {
 };
 
 enum rsi_host_intf rsi_get_host_intf(void *priv);
+void rsi_set_bt_context(void *priv, void *bt_context);
 
 #endif

From 898b255339310944965309a465309317ed5538ae Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Tue, 27 Feb 2018 19:56:17 +0530
Subject: [PATCH 0889/1678] rsi: add module parameter operating mode

Operating mode determines the support for other protocols.
This is made as module parameter for better usage.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Siva Rebbagondla <siva.rebbagondla@redpinesignals.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_main.c | 48 ++++++++++++++++++++++---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 10 +++++-
 drivers/net/wireless/rsi/rsi_91x_usb.c  | 10 +++++-
 drivers/net/wireless/rsi/rsi_common.h   |  2 +-
 drivers/net/wireless/rsi/rsi_hal.h      | 11 ++++++
 5 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index b3e7d75dafae..1485a0c89df2 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -70,8 +70,24 @@ EXPORT_SYMBOL_GPL(rsi_dbg);
 static char *opmode_str(int oper_mode)
 {
 	switch (oper_mode) {
-	case RSI_DEV_OPMODE_WIFI_ALONE:
+	case DEV_OPMODE_WIFI_ALONE:
 		return "Wi-Fi alone";
+	case DEV_OPMODE_BT_ALONE:
+		return "BT EDR alone";
+	case DEV_OPMODE_BT_LE_ALONE:
+		return "BT LE alone";
+	case DEV_OPMODE_BT_DUAL:
+		return "BT Dual";
+	case DEV_OPMODE_STA_BT:
+		return "Wi-Fi STA + BT EDR";
+	case DEV_OPMODE_STA_BT_LE:
+		return "Wi-Fi STA + BT LE";
+	case DEV_OPMODE_STA_BT_DUAL:
+		return "Wi-Fi STA + BT DUAL";
+	case DEV_OPMODE_AP_BT:
+		return "Wi-Fi AP + BT EDR";
+	case DEV_OPMODE_AP_BT_DUAL:
+		return "Wi-Fi AP + BT DUAL";
 	}
 
 	return "Unknown";
@@ -278,7 +294,7 @@ void rsi_set_bt_context(void *priv, void *bt_context)
  *
  * Return: Pointer to the adapter structure on success, NULL on failure .
  */
-struct rsi_hw *rsi_91x_init(void)
+struct rsi_hw *rsi_91x_init(u16 oper_mode)
 {
 	struct rsi_hw *adapter = NULL;
 	struct rsi_common *common = NULL;
@@ -321,9 +337,33 @@ struct rsi_hw *rsi_91x_init(void)
 	timer_setup(&common->roc_timer, rsi_roc_timeout, 0);
 	init_completion(&common->wlan_init_completion);
 	common->init_done = true;
+	adapter->device_model = RSI_DEV_9113;
+	common->oper_mode = oper_mode;
+
+	/* Determine coex mode */
+	switch (common->oper_mode) {
+	case DEV_OPMODE_STA_BT_DUAL:
+	case DEV_OPMODE_STA_BT:
+	case DEV_OPMODE_STA_BT_LE:
+	case DEV_OPMODE_BT_ALONE:
+	case DEV_OPMODE_BT_LE_ALONE:
+	case DEV_OPMODE_BT_DUAL:
+		common->coex_mode = 2;
+		break;
+	case DEV_OPMODE_AP_BT_DUAL:
+	case DEV_OPMODE_AP_BT:
+		common->coex_mode = 4;
+		break;
+	case DEV_OPMODE_WIFI_ALONE:
+		common->coex_mode = 1;
+		break;
+	default:
+		common->oper_mode = 1;
+		common->coex_mode = 1;
+	}
+	rsi_dbg(INFO_ZONE, "%s: oper_mode = %d, coex_mode = %d\n",
+		__func__, common->oper_mode, common->coex_mode);
 
-	common->coex_mode = RSI_DEV_COEX_MODE_WIFI_ALONE;
-	common->oper_mode = RSI_DEV_OPMODE_WIFI_ALONE;
 	adapter->device_model = RSI_DEV_9113;
 #ifdef CONFIG_RSI_COEX
 	if (common->coex_mode > 1) {
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index ba38c6d00128..5722736d5557 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -21,6 +21,14 @@
 #include "rsi_coex.h"
 #include "rsi_hal.h"
 
+/* Default operating mode is wlan STA + BT */
+static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+module_param(dev_oper_mode, ushort, 0444);
+MODULE_PARM_DESC(dev_oper_mode,
+		 "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+		 "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+		 "6[AP + BT classic], 14[AP + BT classic + BT LE]");
+
 /**
  * rsi_sdio_set_cmd52_arg() - This function prepares cmd 52 read/write arg.
  * @rw: Read/write
@@ -926,7 +934,7 @@ static int rsi_probe(struct sdio_func *pfunction,
 
 	rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
-	adapter = rsi_91x_init();
+	adapter = rsi_91x_init(dev_oper_mode);
 	if (!adapter) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
 			__func__);
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index b33a05f057ba..0a50cff4e48f 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -21,6 +21,14 @@
 #include "rsi_hal.h"
 #include "rsi_coex.h"
 
+/* Default operating mode is wlan STA + BT */
+static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL;
+module_param(dev_oper_mode, ushort, 0444);
+MODULE_PARM_DESC(dev_oper_mode,
+		 "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n"
+		 "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
+		 "6[AP + BT classic], 14[AP + BT classic + BT LE]");
+
 /**
  * rsi_usb_card_write() - This function writes to the USB Card.
  * @adapter: Pointer to the adapter structure.
@@ -708,7 +716,7 @@ static int rsi_probe(struct usb_interface *pfunction,
 
 	rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
-	adapter = rsi_91x_init();
+	adapter = rsi_91x_init(dev_oper_mode);
 	if (!adapter) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
 			__func__);
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index 461658522d0f..d9ff3b8be86e 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -81,7 +81,7 @@ static inline int rsi_kill_thread(struct rsi_thread *handle)
 
 void rsi_mac80211_detach(struct rsi_hw *hw);
 u16 rsi_get_connected_channel(struct ieee80211_vif *vif);
-struct rsi_hw *rsi_91x_init(void);
+struct rsi_hw *rsi_91x_init(u16 oper_mode);
 void rsi_91x_deinit(struct rsi_hw *adapter);
 int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len);
 #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index e7122239f969..a7d3011c5193 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -17,6 +17,17 @@
 #ifndef __RSI_HAL_H__
 #define __RSI_HAL_H__
 
+/* Device Operating modes */
+#define DEV_OPMODE_WIFI_ALONE		1
+#define DEV_OPMODE_BT_ALONE		4
+#define DEV_OPMODE_BT_LE_ALONE		8
+#define DEV_OPMODE_BT_DUAL		12
+#define DEV_OPMODE_STA_BT		5
+#define DEV_OPMODE_STA_BT_LE		9
+#define DEV_OPMODE_STA_BT_DUAL		13
+#define DEV_OPMODE_AP_BT		6
+#define DEV_OPMODE_AP_BT_DUAL		14
+
 #define FLASH_WRITE_CHUNK_SIZE		(4 * 1024)
 #define FLASH_SECTOR_SIZE		(4 * 1024)
 

From 681805b1401be32b12956a891be276061c24aecb Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Tue, 27 Feb 2018 19:56:18 +0530
Subject: [PATCH 0890/1678] rsi: sdio changes to support BT

Queue number is correctly updated for BT traffic. Also, kzalloc
instead of kmalloc is used for Rx packet allocation.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c     | 2 ++
 drivers/net/wireless/rsi/rsi_91x_sdio_ops.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 5722736d5557..beb18d0d0e07 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -763,6 +763,8 @@ static int rsi_sdio_host_intf_write_pkt(struct rsi_hw *adapter,
 	int status;
 
 	queueno = ((pkt[1] >> 4) & 0xf);
+	if (queueno == RSI_BT_MGMT_Q || queueno == RSI_BT_DATA_Q)
+		queueno = RSI_BT_Q;
 
 	num_blocks = len / block_size;
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 169c98125202..6e74261186c6 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -103,7 +103,7 @@ static int rsi_process_pkt(struct rsi_common *common)
 
 	rcv_pkt_len = (num_blks * 256);
 
-	common->rx_data_pkt = kmalloc(rcv_pkt_len, GFP_KERNEL);
+	common->rx_data_pkt = kzalloc(rcv_pkt_len, GFP_KERNEL);
 	if (!common->rx_data_pkt) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in memory allocation\n",
 			__func__);

From 50117605770c9ce94b8f395d7a774c6b029475dc Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Wed, 28 Feb 2018 13:08:26 +0530
Subject: [PATCH 0891/1678] rsi: improve RX handling in SDIO interface

Currently, RX packets are handled in interrupt context in SDIO
interface. To improve the efficiency of processing RX packets,
RX thread and RX skb queues are introduced.
When the packet is read from device, driver prepares skb, add to
RX queue and trigger RX thread event. RX thread processes the
packets from RX queue.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c     | 45 +++++++++++----
 drivers/net/wireless/rsi/rsi_91x_sdio_ops.c | 64 ++++++++++++++++-----
 drivers/net/wireless/rsi/rsi_main.h         |  1 +
 drivers/net/wireless/rsi/rsi_sdio.h         |  8 +++
 4 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index beb18d0d0e07..98c7d1dae18e 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -933,6 +933,8 @@ static int rsi_probe(struct sdio_func *pfunction,
 		     const struct sdio_device_id *id)
 {
 	struct rsi_hw *adapter;
+	struct rsi_91x_sdiodev *sdev;
+	int status;
 
 	rsi_dbg(INIT_ZONE, "%s: Init function called\n", __func__);
 
@@ -940,7 +942,7 @@ static int rsi_probe(struct sdio_func *pfunction,
 	if (!adapter) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init os intf ops\n",
 			__func__);
-		return 1;
+		return -EINVAL;
 	}
 	adapter->rsi_host_intf = RSI_HOST_INTF_SDIO;
 	adapter->host_intf_ops = &sdio_host_intf_ops;
@@ -948,39 +950,58 @@ static int rsi_probe(struct sdio_func *pfunction,
 	if (rsi_init_sdio_interface(adapter, pfunction)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to init sdio interface\n",
 			__func__);
-		goto fail;
+		status = -EIO;
+		goto fail_free_adapter;
 	}
+	sdev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+	rsi_init_event(&sdev->rx_thread.event);
+	status = rsi_create_kthread(adapter->priv, &sdev->rx_thread,
+				    rsi_sdio_rx_thread, "SDIO-RX-Thread");
+	if (status) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
+		goto fail_free_adapter;
+	}
+	skb_queue_head_init(&sdev->rx_q.head);
+	sdev->rx_q.num_rx_pkts = 0;
+
 	sdio_claim_host(pfunction);
 	if (sdio_claim_irq(pfunction, rsi_handle_interrupt)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to request IRQ\n", __func__);
 		sdio_release_host(pfunction);
-		goto fail;
+		status = -EIO;
+		goto fail_kill_thread;
 	}
 	sdio_release_host(pfunction);
 	rsi_dbg(INIT_ZONE, "%s: Registered Interrupt handler\n", __func__);
 
 	if (rsi_hal_device_init(adapter)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in device init\n", __func__);
-		sdio_claim_host(pfunction);
-		sdio_release_irq(pfunction);
-		sdio_disable_func(pfunction);
-		sdio_release_host(pfunction);
-		goto fail;
+		status = -EINVAL;
+		goto fail_kill_thread;
 	}
 	rsi_dbg(INFO_ZONE, "===> RSI Device Init Done <===\n");
 
 	if (rsi_sdio_master_access_msword(adapter, MISC_CFG_BASE_ADDR)) {
 		rsi_dbg(ERR_ZONE, "%s: Unable to set ms word reg\n", __func__);
-		return -EIO;
+		status = -EIO;
+		goto fail_dev_init;
 	}
 
 	adapter->priv->hibernate_resume = false;
 	adapter->priv->reinit_hw = false;
 	return 0;
-fail:
+
+fail_dev_init:
+	sdio_claim_host(pfunction);
+	sdio_release_irq(pfunction);
+	sdio_disable_func(pfunction);
+	sdio_release_host(pfunction);
+fail_kill_thread:
+	rsi_kill_thread(&sdev->rx_thread);
+fail_free_adapter:
 	rsi_91x_deinit(adapter);
 	rsi_dbg(ERR_ZONE, "%s: Failed in probe...Exiting\n", __func__);
-	return 1;
+	return status;
 }
 
 static void ulp_read_write(struct rsi_hw *adapter, u16 addr, u32 data,
@@ -1076,6 +1097,8 @@ static void rsi_disconnect(struct sdio_func *pfunction)
 		return;
 
 	dev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+
+	rsi_kill_thread(&dev->rx_thread);
 	sdio_claim_host(pfunction);
 	sdio_release_irq(pfunction);
 	sdio_release_host(pfunction);
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index 6e74261186c6..612c211e21a1 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -60,6 +60,43 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word)
 	return status;
 }
 
+void rsi_sdio_rx_thread(struct rsi_common *common)
+{
+	struct rsi_hw *adapter = common->priv;
+	struct rsi_91x_sdiodev *sdev = adapter->rsi_dev;
+	struct sk_buff *skb;
+	int status;
+
+	do {
+		rsi_wait_event(&sdev->rx_thread.event, EVENT_WAIT_FOREVER);
+		rsi_reset_event(&sdev->rx_thread.event);
+
+		while (true) {
+			if (atomic_read(&sdev->rx_thread.thread_done))
+				goto out;
+
+			skb = skb_dequeue(&sdev->rx_q.head);
+			if (!skb)
+				break;
+			if (sdev->rx_q.num_rx_pkts > 0)
+				sdev->rx_q.num_rx_pkts--;
+			status = rsi_read_pkt(common, skb->data, skb->len);
+			if (status) {
+				rsi_dbg(ERR_ZONE, "Failed to read the packet\n");
+				dev_kfree_skb(skb);
+				break;
+			}
+			dev_kfree_skb(skb);
+		}
+	} while (1);
+
+out:
+	rsi_dbg(INFO_ZONE, "%s: Terminated SDIO RX thread\n", __func__);
+	skb_queue_purge(&sdev->rx_q.head);
+	atomic_inc(&sdev->rx_thread.thread_done);
+	complete_and_exit(&sdev->rx_thread.completion, 0);
+}
+
 /**
  * rsi_process_pkt() - This Function reads rx_blocks register and figures out
  *		       the size of the rx pkt.
@@ -76,6 +113,10 @@ static int rsi_process_pkt(struct rsi_common *common)
 	u32 rcv_pkt_len = 0;
 	int status = 0;
 	u8 value = 0;
+	struct sk_buff *skb;
+
+	if (dev->rx_q.num_rx_pkts >= RSI_MAX_RX_PKTS)
+		return 0;
 
 	num_blks = ((adapter->interrupt_status & 1) |
 			((adapter->interrupt_status >> RECV_NUM_BLOCKS) << 1));
@@ -103,27 +144,24 @@ static int rsi_process_pkt(struct rsi_common *common)
 
 	rcv_pkt_len = (num_blks * 256);
 
-	common->rx_data_pkt = kzalloc(rcv_pkt_len, GFP_KERNEL);
-	if (!common->rx_data_pkt) {
-		rsi_dbg(ERR_ZONE, "%s: Failed in memory allocation\n",
-			__func__);
+	skb = dev_alloc_skb(rcv_pkt_len);
+	if (!skb)
 		return -ENOMEM;
-	}
 
-	status = rsi_sdio_host_intf_read_pkt(adapter,
-					     common->rx_data_pkt,
-					     rcv_pkt_len);
+	status = rsi_sdio_host_intf_read_pkt(adapter, skb->data, rcv_pkt_len);
 	if (status) {
 		rsi_dbg(ERR_ZONE, "%s: Failed to read packet from card\n",
 			__func__);
-		goto fail;
+		dev_kfree_skb(skb);
+		return status;
 	}
+	skb_put(skb, rcv_pkt_len);
+	skb_queue_tail(&dev->rx_q.head, skb);
+	dev->rx_q.num_rx_pkts++;
 
-	status = rsi_read_pkt(common, common->rx_data_pkt, rcv_pkt_len);
+	rsi_set_event(&dev->rx_thread.event);
 
-fail:
-	kfree(common->rx_data_pkt);
-	return status;
+	return 0;
 }
 
 /**
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index 47af0cb9de1d..c91d62522f9b 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -112,6 +112,7 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define RSI_WOW_NO_CONNECTION		BIT(1)
 
 #define RSI_DEV_9113		1
+#define RSI_MAX_RX_PKTS		64
 
 struct version_info {
 	u16 major;
diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h
index 49c549ba6682..ba649be284af 100644
--- a/drivers/net/wireless/rsi/rsi_sdio.h
+++ b/drivers/net/wireless/rsi/rsi_sdio.h
@@ -105,6 +105,11 @@ struct receive_info {
 	u32 buf_available_counter;
 };
 
+struct rsi_sdio_rx_q {
+	u8 num_rx_pkts;
+	struct sk_buff_head head;
+};
+
 struct rsi_91x_sdiodev {
 	struct sdio_func *pfunction;
 	struct task_struct *sdio_irq_task;
@@ -117,6 +122,8 @@ struct rsi_91x_sdiodev {
 	u16 tx_blk_size;
 	u8 write_fail;
 	bool buff_status_updated;
+	struct rsi_sdio_rx_q rx_q;
+	struct rsi_thread rx_thread;
 };
 
 void rsi_interrupt_handler(struct rsi_hw *adapter);
@@ -131,4 +138,5 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word);
 void rsi_sdio_ack_intr(struct rsi_hw *adapter, u8 int_bit);
 int rsi_sdio_determine_event_timeout(struct rsi_hw *adapter);
 int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num);
+void rsi_sdio_rx_thread(struct rsi_common *common);
 #endif

From 8809f08cdc0b57765c190abe0853a394513f6d2f Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Wed, 28 Feb 2018 13:08:27 +0530
Subject: [PATCH 0892/1678] rsi: use dynamic RX control blocks instead of
 MAX_RX_URB

Currently 2 RX control blocks are allocated by default.
If wifi alone mode is used rx control block 2 is unusable.
So, changes are done accordingly in all places to use
RX control blocks dynamically based on coex mode check.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_usb.c     | 34 +++++++++++++---------
 drivers/net/wireless/rsi/rsi_91x_usb_ops.c |  5 ++--
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 0a50cff4e48f..64781a3629f0 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -497,15 +497,15 @@ static struct rsi_host_intf_ops usb_host_intf_ops = {
  */
 static void rsi_deinit_usb_interface(struct rsi_hw *adapter)
 {
-	u8 idx;
-
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 
 	rsi_kill_thread(&dev->rx_thread);
 
-	for (idx = 0; idx < MAX_RX_URBS; idx++) {
-		usb_free_urb(dev->rx_cb[idx].rx_urb);
-		kfree(dev->rx_cb[idx].rx_buffer);
+	usb_free_urb(dev->rx_cb[0].rx_urb);
+	kfree(dev->rx_cb[0].rx_buffer);
+	if (adapter->priv->coex_mode > 1) {
+		usb_free_urb(dev->rx_cb[1].rx_urb);
+		kfree(dev->rx_cb[1].rx_buffer);
 	}
 
 	kfree(adapter->priv->rx_data_pkt);
@@ -516,9 +516,11 @@ static int rsi_usb_init_rx(struct rsi_hw *adapter)
 {
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 	struct rx_usb_ctrl_block *rx_cb;
-	u8 idx;
+	u8 idx, num_rx_cb;
 
-	for (idx = 0; idx < MAX_RX_URBS; idx++) {
+	num_rx_cb = (adapter->priv->coex_mode > 1 ? 2 : 1);
+
+	for (idx = 0; idx < num_rx_cb; idx++) {
 		rx_cb = &dev->rx_cb[idx];
 
 		rx_cb->rx_buffer = kzalloc(RSI_USB_BUF_SIZE * 2,
@@ -538,9 +540,11 @@ static int rsi_usb_init_rx(struct rsi_hw *adapter)
 	return 0;
 
 err:
-	for (idx = 0; idx < MAX_RX_URBS; idx++) {
-		kfree(dev->rx_cb[idx].rx_buffer);
-		kfree(dev->rx_cb[idx].rx_urb);
+	kfree(dev->rx_cb[0].rx_buffer);
+	usb_free_urb(dev->rx_cb[0].rx_urb);
+	if (adapter->priv->coex_mode > 1) {
+		kfree(dev->rx_cb[1].rx_buffer);
+		usb_free_urb(dev->rx_cb[1].rx_urb);
 	}
 	return -1;
 }
@@ -557,7 +561,7 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 {
 	struct rsi_91x_usbdev *rsi_dev;
 	struct rsi_common *common = adapter->priv;
-	int status, i;
+	int status;
 
 	rsi_dev = kzalloc(sizeof(*rsi_dev), GFP_KERNEL);
 	if (!rsi_dev)
@@ -617,9 +621,11 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 	return 0;
 
 fail_thread:
-	for (i = 0; i < MAX_RX_URBS; i++) {
-		kfree(rsi_dev->rx_cb[i].rx_buffer);
-		kfree(rsi_dev->rx_cb[i].rx_urb);
+	kfree(rsi_dev->rx_cb[0].rx_buffer);
+	usb_free_urb(rsi_dev->rx_cb[0].rx_urb);
+	if (common->coex_mode > 1) {
+		kfree(rsi_dev->rx_cb[1].rx_buffer);
+		usb_free_urb(rsi_dev->rx_cb[1].rx_urb);
 	}
 fail_tx:
 	kfree(common->rx_data_pkt);
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index d0650eaeec23..fc25b1b3c8cd 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -30,15 +30,16 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 	struct rsi_hw *adapter = common->priv;
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 	struct rx_usb_ctrl_block *rx_cb;
-	int status, idx;
+	int status, idx, num_rx_cb;
 
+	num_rx_cb = (adapter->priv->coex_mode > 1 ? 2 : 1);
 	do {
 		rsi_wait_event(&dev->rx_thread.event, EVENT_WAIT_FOREVER);
 
 		if (atomic_read(&dev->rx_thread.thread_done))
 			goto out;
 
-		for (idx = 0; idx < MAX_RX_URBS; idx++) {
+		for (idx = 0; idx < num_rx_cb; idx++) {
 			rx_cb = &dev->rx_cb[idx];
 			if (!rx_cb->pend)
 				continue;

From a1854fae1414dd8edfff4857fd26c3e355d43e19 Mon Sep 17 00:00:00 2001
From: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Date: Wed, 28 Feb 2018 13:08:28 +0530
Subject: [PATCH 0893/1678] rsi: improve RX packet handling in USB interface

Curretly, RX packet processing is done sequencially. To improve
the efficiency, RX skb queue is introduced.
Here, while preparing RX URB skb is allocated and used
for RX buffer. When rx done handler is called, enqueue the skb
to rx_q and set the thread event.
RX thread is modified to dequeue packets from skb queue and
process further.

Signed-off-by: Prameela Rani Garnepudi <prameela.j04cs@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_usb.c     | 110 ++++++++++++---------
 drivers/net/wireless/rsi/rsi_91x_usb_ops.c |  35 +++----
 drivers/net/wireless/rsi/rsi_hal.h         |   1 +
 drivers/net/wireless/rsi/rsi_main.h        |   1 -
 drivers/net/wireless/rsi/rsi_usb.h         |   5 +-
 5 files changed, 78 insertions(+), 74 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 64781a3629f0..be8236f404b5 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -29,6 +29,8 @@ MODULE_PARM_DESC(dev_oper_mode,
 		 "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n"
 		 "6[AP + BT classic], 14[AP + BT classic + BT LE]");
 
+static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num);
+
 /**
  * rsi_usb_card_write() - This function writes to the USB Card.
  * @adapter: Pointer to the adapter structure.
@@ -260,12 +262,31 @@ static void rsi_rx_done_handler(struct urb *urb)
 {
 	struct rx_usb_ctrl_block *rx_cb = urb->context;
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data;
+	int status = -EINVAL;
 
 	if (urb->status)
-		return;
+		goto out;
+
+	if (urb->actual_length <= 0) {
+		rsi_dbg(INFO_ZONE, "%s: Zero length packet\n", __func__);
+		goto out;
+	}
+	if (skb_queue_len(&dev->rx_q) >= RSI_MAX_RX_PKTS) {
+		rsi_dbg(INFO_ZONE, "Max RX packets reached\n");
+		goto out;
+	}
+	skb_put(rx_cb->rx_skb, urb->actual_length);
+	skb_queue_tail(&dev->rx_q, rx_cb->rx_skb);
 
-	rx_cb->pend = 1;
 	rsi_set_event(&dev->rx_thread.event);
+	status = 0;
+
+out:
+	if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num))
+		rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__);
+
+	if (status)
+		dev_kfree_skb(rx_cb->rx_skb);
 }
 
 /**
@@ -280,13 +301,26 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num)
 	struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1];
 	struct urb *urb = rx_cb->rx_urb;
 	int status;
+	struct sk_buff *skb;
+	u8 dword_align_bytes = 0;
+
+#define RSI_MAX_RX_USB_PKT_SIZE	3000
+	skb = dev_alloc_skb(RSI_MAX_RX_USB_PKT_SIZE);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, MAX_DWORD_ALIGN_BYTES);
+	dword_align_bytes = (unsigned long)skb->data & 0x3f;
+	if (dword_align_bytes > 0)
+		skb_push(skb, dword_align_bytes);
+	urb->transfer_buffer = skb->data;
+	rx_cb->rx_skb = skb;
 
 	usb_fill_bulk_urb(urb,
 			  dev->usbdev,
 			  usb_rcvbulkpipe(dev->usbdev,
 			  dev->bulkin_endpoint_addr[ep_num - 1]),
 			  urb->transfer_buffer,
-			  3000,
+			  RSI_MAX_RX_USB_PKT_SIZE,
 			  rsi_rx_done_handler,
 			  rx_cb);
 
@@ -502,13 +536,9 @@ static void rsi_deinit_usb_interface(struct rsi_hw *adapter)
 	rsi_kill_thread(&dev->rx_thread);
 
 	usb_free_urb(dev->rx_cb[0].rx_urb);
-	kfree(dev->rx_cb[0].rx_buffer);
-	if (adapter->priv->coex_mode > 1) {
+	if (adapter->priv->coex_mode > 1)
 		usb_free_urb(dev->rx_cb[1].rx_urb);
-		kfree(dev->rx_cb[1].rx_buffer);
-	}
 
-	kfree(adapter->priv->rx_data_pkt);
 	kfree(dev->tx_buffer);
 }
 
@@ -523,29 +553,29 @@ static int rsi_usb_init_rx(struct rsi_hw *adapter)
 	for (idx = 0; idx < num_rx_cb; idx++) {
 		rx_cb = &dev->rx_cb[idx];
 
-		rx_cb->rx_buffer = kzalloc(RSI_USB_BUF_SIZE * 2,
-					   GFP_KERNEL);
-		if (!rx_cb->rx_buffer)
-			goto err;
-
 		rx_cb->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!rx_cb->rx_urb) {
 			rsi_dbg(ERR_ZONE, "Failed alloc rx urb[%d]\n", idx);
 			goto err;
 		}
-		rx_cb->rx_urb->transfer_buffer = rx_cb->rx_buffer;
 		rx_cb->ep_num = idx + 1;
 		rx_cb->data = (void *)dev;
 	}
+	skb_queue_head_init(&dev->rx_q);
+	rsi_init_event(&dev->rx_thread.event);
+	if (rsi_create_kthread(adapter->priv, &dev->rx_thread,
+			       rsi_usb_rx_thread, "RX-Thread")) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
+		goto err;
+	}
+
 	return 0;
 
 err:
-	kfree(dev->rx_cb[0].rx_buffer);
 	usb_free_urb(dev->rx_cb[0].rx_urb);
-	if (adapter->priv->coex_mode > 1) {
-		kfree(dev->rx_cb[1].rx_buffer);
+	if (adapter->priv->coex_mode > 1)
 		usb_free_urb(dev->rx_cb[1].rx_urb);
-	}
+
 	return -1;
 }
 
@@ -560,7 +590,6 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 				  struct usb_interface *pfunction)
 {
 	struct rsi_91x_usbdev *rsi_dev;
-	struct rsi_common *common = adapter->priv;
 	int status;
 
 	rsi_dev = kzalloc(sizeof(*rsi_dev), GFP_KERNEL);
@@ -569,49 +598,37 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 
 	adapter->rsi_dev = rsi_dev;
 	rsi_dev->usbdev = interface_to_usbdev(pfunction);
+	rsi_dev->priv = (void *)adapter;
 
-	if (rsi_find_bulk_in_and_out_endpoints(pfunction, adapter))
-		return -EINVAL;
+	if (rsi_find_bulk_in_and_out_endpoints(pfunction, adapter)) {
+		status = -EINVAL;
+		goto fail_eps;
+	}
 
 	adapter->device = &pfunction->dev;
 	usb_set_intfdata(pfunction, adapter);
 
-	common->rx_data_pkt = kmalloc(2048, GFP_KERNEL);
-	if (!common->rx_data_pkt) {
-		rsi_dbg(ERR_ZONE, "%s: Failed to allocate memory\n",
-			__func__);
-		return -ENOMEM;
-	}
-
 	rsi_dev->tx_buffer = kmalloc(2048, GFP_KERNEL);
 	if (!rsi_dev->tx_buffer) {
 		status = -ENOMEM;
-		goto fail_tx;
+		goto fail_eps;
 	}
 
 	if (rsi_usb_init_rx(adapter)) {
 		rsi_dbg(ERR_ZONE, "Failed to init RX handle\n");
-		return -ENOMEM;
+		status = -ENOMEM;
+		goto fail_rx;
 	}
 
 	rsi_dev->tx_blk_size = 252;
 	adapter->block_size = rsi_dev->tx_blk_size;
 
 	/* Initializing function callbacks */
-	adapter->rx_urb_submit = rsi_rx_urb_submit;
 	adapter->check_hw_queue_status = rsi_usb_check_queue_status;
 	adapter->determine_event_timeout = rsi_usb_event_timeout;
 	adapter->rsi_host_intf = RSI_HOST_INTF_USB;
 	adapter->host_intf_ops = &usb_host_intf_ops;
 
-	rsi_init_event(&rsi_dev->rx_thread.event);
-	status = rsi_create_kthread(common, &rsi_dev->rx_thread,
-				    rsi_usb_rx_thread, "RX-Thread");
-	if (status) {
-		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
-		goto fail_thread;
-	}
-
 #ifdef CONFIG_RSI_DEBUGFS
 	/* In USB, one less than the MAX_DEBUGFS_ENTRIES entries is required */
 	adapter->num_debugfs_entries = (MAX_DEBUGFS_ENTRIES - 1);
@@ -620,15 +637,12 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter,
 	rsi_dbg(INIT_ZONE, "%s: Enabled the interface\n", __func__);
 	return 0;
 
-fail_thread:
-	kfree(rsi_dev->rx_cb[0].rx_buffer);
-	usb_free_urb(rsi_dev->rx_cb[0].rx_urb);
-	if (common->coex_mode > 1) {
-		kfree(rsi_dev->rx_cb[1].rx_buffer);
-		usb_free_urb(rsi_dev->rx_cb[1].rx_urb);
-	}
-fail_tx:
-	kfree(common->rx_data_pkt);
+fail_rx:
+	kfree(rsi_dev->tx_buffer);
+
+fail_eps:
+	kfree(rsi_dev);
+
 	return status;
 }
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index fc25b1b3c8cd..b1687d22f73f 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -29,44 +29,33 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 {
 	struct rsi_hw *adapter = common->priv;
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
-	struct rx_usb_ctrl_block *rx_cb;
-	int status, idx, num_rx_cb;
+	int status;
+	struct sk_buff *skb;
 
-	num_rx_cb = (adapter->priv->coex_mode > 1 ? 2 : 1);
 	do {
 		rsi_wait_event(&dev->rx_thread.event, EVENT_WAIT_FOREVER);
+		rsi_reset_event(&dev->rx_thread.event);
 
-		if (atomic_read(&dev->rx_thread.thread_done))
-			goto out;
+		while (true) {
+			if (atomic_read(&dev->rx_thread.thread_done))
+				goto out;
 
-		for (idx = 0; idx < num_rx_cb; idx++) {
-			rx_cb = &dev->rx_cb[idx];
-			if (!rx_cb->pend)
-				continue;
-
-			mutex_lock(&common->rx_lock);
-			status = rsi_read_pkt(common, rx_cb->rx_buffer, 0);
+			skb = skb_dequeue(&dev->rx_q);
+			if (!skb)
+				break;
+			status = rsi_read_pkt(common, skb->data, 0);
 			if (status) {
 				rsi_dbg(ERR_ZONE, "%s: Failed To read data",
 					__func__);
-				mutex_unlock(&common->rx_lock);
 				break;
 			}
-			rx_cb->pend = 0;
-			mutex_unlock(&common->rx_lock);
-
-			if (adapter->rx_urb_submit(adapter, rx_cb->ep_num)) {
-				rsi_dbg(ERR_ZONE,
-					"%s: Failed in urb submission",
-					__func__);
-				return;
-			}
+			dev_kfree_skb(skb);
 		}
-		rsi_reset_event(&dev->rx_thread.event);
 	} while (1);
 
 out:
 	rsi_dbg(INFO_ZONE, "%s: Terminated thread\n", __func__);
+	skb_queue_purge(&dev->rx_q);
 	complete_and_exit(&dev->rx_thread.completion, 0);
 }
 
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index a7d3011c5193..786dccd0b732 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -114,6 +114,7 @@
 
 #define FW_FLASH_OFFSET			0x820
 #define LMAC_VER_OFFSET			(FW_FLASH_OFFSET + 0x200)
+#define MAX_DWORD_ALIGN_BYTES		64
 
 struct bl_header {
 	__le32 flags;
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index c91d62522f9b..ef4fa323694b 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -341,7 +341,6 @@ struct rsi_hw {
 	void *rsi_dev;
 	struct rsi_host_intf_ops *host_intf_ops;
 	int (*check_hw_queue_status)(struct rsi_hw *adapter, u8 q_num);
-	int (*rx_urb_submit)(struct rsi_hw *adapter, u8 ep_num);
 	int (*determine_event_timeout)(struct rsi_hw *adapter);
 };
 
diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
index 0fda14ce693e..a88d59295a98 100644
--- a/drivers/net/wireless/rsi/rsi_usb.h
+++ b/drivers/net/wireless/rsi/rsi_usb.h
@@ -42,12 +42,12 @@
 struct rx_usb_ctrl_block {
 	u8 *data;
 	struct urb *rx_urb;
-	u8 *rx_buffer;
+	struct sk_buff *rx_skb;
 	u8 ep_num;
-	u8 pend;
 };
 
 struct rsi_91x_usbdev {
+	void *priv;
 	struct rsi_thread rx_thread;
 	u8 endpoint;
 	struct usb_device *usbdev;
@@ -60,6 +60,7 @@ struct rsi_91x_usbdev {
 	u8 bulkout_endpoint_addr[MAX_BULK_EP];
 	u32 tx_blk_size;
 	u8 write_fail;
+	struct sk_buff_head rx_q;
 };
 
 static inline int rsi_usb_check_queue_status(struct rsi_hw *adapter, u8 q_num)

From 79ca239a68f8f006ed872a023d97fbadf9d1577d Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Thu, 1 Mar 2018 09:58:12 +0000
Subject: [PATCH 0894/1678] bcma: Prevent build of PCI host features in module

Attempting to build bcma.ko with BCMA_DRIVER_PCI_HOSTMODE=y results in
a build error due to use of symbols not exported from vmlinux:

ERROR: "pcibios_enable_device" [drivers/bcma/bcma.ko] undefined!
ERROR: "register_pci_controller" [drivers/bcma/bcma.ko] undefined!
make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1

To prevent this, don't allow the host mode feature to be built if
CONFIG_BCMA=m

Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index ba8acca036df..cb0f1aad20b7 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI
 
 config BCMA_DRIVER_PCI_HOSTMODE
 	bool "Driver for PCI core working in hostmode"
-	depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY
+	depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY && BCMA = y
 	help
 	  PCI core hostmode operation (external PCI bus).
 

From a24853aab59184ebd19c5e078c7b29e1c316e3a1 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 8 Mar 2018 11:53:24 +0530
Subject: [PATCH 0895/1678] ssb: use put_device() if device_register fail

Never directly free @dev after calling device_register(), even
if it returned an error! Always use put_device() to give up the
reference initialized.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/ssb/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index fdb89b6bfb40..116594413f66 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -522,7 +522,7 @@ static int ssb_devices_register(struct ssb_bus *bus)
 			/* Set dev to NULL to not unregister
 			 * dev on error unwinding. */
 			sdev->dev = NULL;
-			kfree(devwrap);
+			put_device(dev);
 			goto error;
 		}
 		dev_idx++;

From a6cf02e6485a80ec0f708e1f72ee95abc3426b84 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 2 Mar 2018 18:03:30 -0800
Subject: [PATCH 0896/1678] net/wireless: fix spaces and grammar copy/paste in
 vendor Kconfig help text

Lots of the wireless driver vendor Kconfig symol help text says
"questions about  cards." (2 spaces between "about" and "cards")

Besides dropping one of those spaces, it also needs some other word
inserted there. Instead of putting each vendor's name there, I chose
to say "these" cards in all of the Kconfig help text.

Cc: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/admtek/Kconfig    | 4 ++--
 drivers/net/wireless/ath/Kconfig       | 4 ++--
 drivers/net/wireless/atmel/Kconfig     | 4 ++--
 drivers/net/wireless/broadcom/Kconfig  | 4 ++--
 drivers/net/wireless/cisco/Kconfig     | 4 ++--
 drivers/net/wireless/intel/Kconfig     | 4 ++--
 drivers/net/wireless/intersil/Kconfig  | 4 ++--
 drivers/net/wireless/marvell/Kconfig   | 4 ++--
 drivers/net/wireless/mediatek/Kconfig  | 4 ++--
 drivers/net/wireless/quantenna/Kconfig | 4 ++--
 drivers/net/wireless/ralink/Kconfig    | 4 ++--
 drivers/net/wireless/realtek/Kconfig   | 4 ++--
 drivers/net/wireless/rsi/Kconfig       | 4 ++--
 drivers/net/wireless/st/Kconfig        | 4 ++--
 drivers/net/wireless/ti/Kconfig        | 4 ++--
 drivers/net/wireless/zydas/Kconfig     | 4 ++--
 16 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/admtek/Kconfig b/drivers/net/wireless/admtek/Kconfig
index d5a2dc728078..9317367e37f0 100644
--- a/drivers/net/wireless/admtek/Kconfig
+++ b/drivers/net/wireless/admtek/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ADMTEK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ADMTEK
diff --git a/drivers/net/wireless/ath/Kconfig b/drivers/net/wireless/ath/Kconfig
index 44b2470af81d..82ab7c33cf97 100644
--- a/drivers/net/wireless/ath/Kconfig
+++ b/drivers/net/wireless/ath/Kconfig
@@ -8,8 +8,8 @@ config WLAN_VENDOR_ATH
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 	  For more information and documentation on this module you can visit:
diff --git a/drivers/net/wireless/atmel/Kconfig b/drivers/net/wireless/atmel/Kconfig
index a43cfd163254..3e684f8c1f93 100644
--- a/drivers/net/wireless/atmel/Kconfig
+++ b/drivers/net/wireless/atmel/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ATMEL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ATMEL
diff --git a/drivers/net/wireless/broadcom/Kconfig b/drivers/net/wireless/broadcom/Kconfig
index d3651ceb5046..eebe2864835f 100644
--- a/drivers/net/wireless/broadcom/Kconfig
+++ b/drivers/net/wireless/broadcom/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_BROADCOM
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_BROADCOM
diff --git a/drivers/net/wireless/cisco/Kconfig b/drivers/net/wireless/cisco/Kconfig
index b22567dff893..26eb8b0c2104 100644
--- a/drivers/net/wireless/cisco/Kconfig
+++ b/drivers/net/wireless/cisco/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_CISCO
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_CISCO
diff --git a/drivers/net/wireless/intel/Kconfig b/drivers/net/wireless/intel/Kconfig
index 5b14f2f64a8a..6fdc14b08b8e 100644
--- a/drivers/net/wireless/intel/Kconfig
+++ b/drivers/net/wireless/intel/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_INTEL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_INTEL
diff --git a/drivers/net/wireless/intersil/Kconfig b/drivers/net/wireless/intersil/Kconfig
index 9da136049955..e89fce1d4f27 100644
--- a/drivers/net/wireless/intersil/Kconfig
+++ b/drivers/net/wireless/intersil/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_INTERSIL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_INTERSIL
diff --git a/drivers/net/wireless/marvell/Kconfig b/drivers/net/wireless/marvell/Kconfig
index 4938c7ec0009..27038901d3ee 100644
--- a/drivers/net/wireless/marvell/Kconfig
+++ b/drivers/net/wireless/marvell/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_MARVELL
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_MARVELL
diff --git a/drivers/net/wireless/mediatek/Kconfig b/drivers/net/wireless/mediatek/Kconfig
index 92ce4062f307..ff5fc8987b0a 100644
--- a/drivers/net/wireless/mediatek/Kconfig
+++ b/drivers/net/wireless/mediatek/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_MEDIATEK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_MEDIATEK
diff --git a/drivers/net/wireless/quantenna/Kconfig b/drivers/net/wireless/quantenna/Kconfig
index 30943656e989..de84ce125c26 100644
--- a/drivers/net/wireless/quantenna/Kconfig
+++ b/drivers/net/wireless/quantenna/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_QUANTENNA
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_QUANTENNA
diff --git a/drivers/net/wireless/ralink/Kconfig b/drivers/net/wireless/ralink/Kconfig
index 41dbf3130e2b..9b79e59ee97b 100644
--- a/drivers/net/wireless/ralink/Kconfig
+++ b/drivers/net/wireless/ralink/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_RALINK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_RALINK
diff --git a/drivers/net/wireless/realtek/Kconfig b/drivers/net/wireless/realtek/Kconfig
index 8a8ba2003964..3db988e689d7 100644
--- a/drivers/net/wireless/realtek/Kconfig
+++ b/drivers/net/wireless/realtek/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_REALTEK
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_REALTEK
diff --git a/drivers/net/wireless/rsi/Kconfig b/drivers/net/wireless/rsi/Kconfig
index e6135ee35213..f004be33fcfa 100644
--- a/drivers/net/wireless/rsi/Kconfig
+++ b/drivers/net/wireless/rsi/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_RSI
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_RSI
diff --git a/drivers/net/wireless/st/Kconfig b/drivers/net/wireless/st/Kconfig
index 969b4f6e53b5..ff69a80a9633 100644
--- a/drivers/net/wireless/st/Kconfig
+++ b/drivers/net/wireless/st/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ST
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ST
diff --git a/drivers/net/wireless/ti/Kconfig b/drivers/net/wireless/ti/Kconfig
index 92fbd6597e34..366c687445ad 100644
--- a/drivers/net/wireless/ti/Kconfig
+++ b/drivers/net/wireless/ti/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_TI
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_TI
diff --git a/drivers/net/wireless/zydas/Kconfig b/drivers/net/wireless/zydas/Kconfig
index a58c0f65e376..b327f86f05be 100644
--- a/drivers/net/wireless/zydas/Kconfig
+++ b/drivers/net/wireless/zydas/Kconfig
@@ -5,8 +5,8 @@ config WLAN_VENDOR_ZYDAS
 	  If you have a wireless card belonging to this class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about  cards. If you say Y, you will be asked for
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about these cards. If you say Y, you will be asked for
 	  your specific card in the following questions.
 
 if WLAN_VENDOR_ZYDAS

From eaab43e505d01ed19f63e08e52252cbc1c69b9b9 Mon Sep 17 00:00:00 2001
From: Xinming Hu <huxm@marvell.com>
Date: Thu, 8 Mar 2018 13:54:02 +0800
Subject: [PATCH 0897/1678] mwifiex: correct antenna number with high bits
 reserved

High bits of antenna number are reserved in hardware spec,
using low 4 bits represent supported antenna.

Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Xinming Hu <huxm@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/cmdevt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 874660052055..7014f440e6f8 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -1529,7 +1529,8 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv,
 
 	adapter->fw_release_number = le32_to_cpu(hw_spec->fw_release_number);
 	adapter->fw_api_ver = (adapter->fw_release_number >> 16) & 0xff;
-	adapter->number_of_antenna = le16_to_cpu(hw_spec->number_of_antenna);
+	adapter->number_of_antenna =
+			le16_to_cpu(hw_spec->number_of_antenna) & 0xf;
 
 	if (le32_to_cpu(hw_spec->dot_11ac_dev_cap)) {
 		adapter->is_hw_11ac_capable = true;

From 53a7094204b7f2b9016648b5c4b5ece8e810a461 Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <gbhat@marvell.com>
Date: Fri, 9 Mar 2018 23:45:24 +0530
Subject: [PATCH 0898/1678] Revert "mwifiex: fix incorrect ht capability
 problem"

This reverts commit bcc920e8f08336cbbdcdba7c4449c27137e6b4b9.

Drivers gets hardware info and updates ht_cap field of
wiphy->bands during initialization. Once updated during init,
ht_cap must not be modified as it reflects the capability
supported by hardwawre. Above patch tries to modify the ht_cap
field and this results in wrongly advertising capabilities during
association.

Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/marvell/mwifiex/sta_ioctl.c  | 44 +------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index a6077ab3efc3..ed66f12d3301 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -146,7 +146,6 @@ int mwifiex_fill_new_bss_desc(struct mwifiex_private *priv,
 	size_t beacon_ie_len;
 	struct mwifiex_bss_priv *bss_priv = (void *)bss->priv;
 	const struct cfg80211_bss_ies *ies;
-	int ret;
 
 	rcu_read_lock();
 	ies = rcu_dereference(bss->ies);
@@ -190,48 +189,7 @@ int mwifiex_fill_new_bss_desc(struct mwifiex_private *priv,
 	if (bss_desc->cap_info_bitmap & WLAN_CAPABILITY_SPECTRUM_MGMT)
 		bss_desc->sensed_11h = true;
 
-	ret = mwifiex_update_bss_desc_with_ie(priv->adapter, bss_desc);
-	if (ret)
-		return ret;
-
-	/* Update HT40 capability based on current channel information */
-	if (bss_desc->bcn_ht_oper && bss_desc->bcn_ht_cap) {
-		u8 ht_param = bss_desc->bcn_ht_oper->ht_param;
-		u8 radio = mwifiex_band_to_radio_type(bss_desc->bss_band);
-		struct ieee80211_supported_band *sband =
-						priv->wdev.wiphy->bands[radio];
-		int freq = ieee80211_channel_to_frequency(bss_desc->channel,
-							  radio);
-		struct ieee80211_channel *chan =
-			ieee80211_get_channel(priv->adapter->wiphy, freq);
-
-		switch (ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
-		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-			if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
-				sband->ht_cap.cap &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
-			} else {
-				sband->ht_cap.cap |=
-					IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-					IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-			if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
-				sband->ht_cap.cap &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
-				sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40;
-			} else {
-				sband->ht_cap.cap |=
-					IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
-					IEEE80211_HT_CAP_SGI_40;
-			}
-			break;
-		}
-	}
-
-	return 0;
+	return mwifiex_update_bss_desc_with_ie(priv->adapter, bss_desc);
 }
 
 void mwifiex_dnld_txpwr_table(struct mwifiex_private *priv)

From 77423fa739274a4c7b0e7ad90ca52ef22bdbe84e Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <gbhat@marvell.com>
Date: Fri, 9 Mar 2018 23:45:25 +0530
Subject: [PATCH 0899/1678] mwifiex: fix incorrect ht capability problem

IEEE80211_CHAN_NO_HT40PLUS and IEEE80211_CHAN_NO_HT40PLUS channel
flags tell if HT40 operation is allowed on a channel or not.

This patch ensures ht_capability information is modified
accordingly so that we don't end up creating a HT40 connection
when it's not allowed for current regulatory domain.

Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/11n.c | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index 8772e3949327..feebfdcf025a 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -341,6 +341,36 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
 		       le16_to_cpu(ht_cap->header.len));
 
 		mwifiex_fill_cap_info(priv, radio_type, &ht_cap->ht_cap);
+		/* Update HT40 capability from current channel information */
+		if (bss_desc->bcn_ht_oper) {
+			u8 ht_param = bss_desc->bcn_ht_oper->ht_param;
+			u8 radio =
+			mwifiex_band_to_radio_type(bss_desc->bss_band);
+			int freq =
+			ieee80211_channel_to_frequency(bss_desc->channel,
+						       radio);
+			struct ieee80211_channel *chan =
+			ieee80211_get_channel(priv->adapter->wiphy, freq);
+
+			switch (ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
+			case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+				if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SGI_40;
+				}
+				break;
+			case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+				if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+					ht_cap->ht_cap.cap_info &=
+					~IEEE80211_HT_CAP_SGI_40;
+				}
+				break;
+			}
+		}
 
 		*buffer += sizeof(struct mwifiex_ie_types_htcap);
 		ret_len += sizeof(struct mwifiex_ie_types_htcap);

From 28bf8312a983a7873997bf3faf4c2b4e62e4abc0 Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <gbhat@marvell.com>
Date: Fri, 9 Mar 2018 23:45:26 +0530
Subject: [PATCH 0900/1678] mwifiex: get_channel from firmware

At present driver gets chan_type by referring to
IEEE80211_HT_PARAM_CHA_SEC_OFFSET, in ASSOC response. Sometimes
AP shows IEEE80211_HT_PARAM_CHA_SEC_OFFSET as above/below in
assoc response, even if the association is done on HT20 channel
only. So, it will be accurate to get econdary channel offset from
firmware.

Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/marvell/mwifiex/cfg80211.c   | 40 ++++++++++++-------
 drivers/net/wireless/marvell/mwifiex/decl.h   | 17 ++++++++
 drivers/net/wireless/marvell/mwifiex/fw.h     |  7 ++++
 drivers/net/wireless/marvell/mwifiex/main.h   | 16 +++++++-
 .../net/wireless/marvell/mwifiex/sta_cmd.c    | 22 ++++++++++
 .../wireless/marvell/mwifiex/sta_cmdresp.c    | 19 +++++++++
 .../net/wireless/marvell/mwifiex/sta_ioctl.c  | 12 ++++++
 7 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index ce4432c535f0..7f7e9de2db1c 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -95,18 +95,32 @@ u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type)
 
 /* This function maps IEEE HT secondary channel type to NL80211 channel type
  */
-u8 mwifiex_sec_chan_offset_to_chan_type(u8 second_chan_offset)
+u8 mwifiex_get_chan_type(struct mwifiex_private *priv)
 {
-	switch (second_chan_offset) {
-	case IEEE80211_HT_PARAM_CHA_SEC_NONE:
-		return NL80211_CHAN_HT20;
-	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
-		return NL80211_CHAN_HT40PLUS;
-	case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
-		return NL80211_CHAN_HT40MINUS;
-	default:
-		return NL80211_CHAN_HT20;
+	struct mwifiex_channel_band channel_band;
+	int ret;
+
+	ret = mwifiex_get_chan_info(priv, &channel_band);
+
+	if (!ret) {
+		switch (channel_band.band_config.chan_width) {
+		case CHAN_BW_20MHZ:
+			if (IS_11N_ENABLED(priv))
+				return NL80211_CHAN_HT20;
+			else
+				return NL80211_CHAN_NO_HT;
+		case CHAN_BW_40MHZ:
+			if (channel_band.band_config.chan2_offset ==
+			    SEC_CHAN_ABOVE)
+				return NL80211_CHAN_HT40PLUS;
+			else
+				return NL80211_CHAN_HT40MINUS;
+		default:
+			return NL80211_CHAN_HT20;
+		}
 	}
+
+	return NL80211_CHAN_HT20;
 }
 
 /*
@@ -3937,7 +3951,6 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy,
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev);
 	struct mwifiex_bssdescriptor *curr_bss;
 	struct ieee80211_channel *chan;
-	u8 second_chan_offset;
 	enum nl80211_channel_type chan_type;
 	enum nl80211_band band;
 	int freq;
@@ -3954,10 +3967,7 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy,
 		chan = ieee80211_get_channel(wiphy, freq);
 
 		if (priv->ht_param_present) {
-			second_chan_offset = priv->assoc_resp_ht_param &
-					IEEE80211_HT_PARAM_CHA_SEC_OFFSET;
-			chan_type = mwifiex_sec_chan_offset_to_chan_type
-							(second_chan_offset);
+			chan_type = mwifiex_get_chan_type(priv);
 			cfg80211_chandef_create(chandef, chan, chan_type);
 		} else {
 			cfg80211_chandef_create(chandef, chan,
diff --git a/drivers/net/wireless/marvell/mwifiex/decl.h b/drivers/net/wireless/marvell/mwifiex/decl.h
index 188e4c370836..46696ea0b23e 100644
--- a/drivers/net/wireless/marvell/mwifiex/decl.h
+++ b/drivers/net/wireless/marvell/mwifiex/decl.h
@@ -294,4 +294,21 @@ enum rdwr_status {
 	RDWR_STATUS_DONE = 2
 };
 
+enum mwifiex_chan_width {
+	CHAN_BW_20MHZ = 0,
+	CHAN_BW_10MHZ,
+	CHAN_BW_40MHZ,
+	CHAN_BW_80MHZ,
+	CHAN_BW_8080MHZ,
+	CHAN_BW_160MHZ,
+	CHAN_BW_5MHZ,
+};
+
+enum mwifiex_chan_offset {
+	SEC_CHAN_NONE = 0,
+	SEC_CHAN_ABOVE = 1,
+	SEC_CHAN_5MHZ = 2,
+	SEC_CHAN_BELOW = 3
+};
+
 #endif /* !_MWIFIEX_DECL_H_ */
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 9c2cdef54074..c5dc518f768b 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -411,6 +411,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER {
 #define HostCmd_CMD_TDLS_OPER                         0x0122
 #define HostCmd_CMD_FW_DUMP_EVENT		      0x0125
 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG               0x0223
+#define HostCmd_CMD_STA_CONFIGURE		      0x023f
 #define HostCmd_CMD_CHAN_REGION_CFG		      0x0242
 #define HostCmd_CMD_PACKET_AGGR_CTRL		      0x0251
 
@@ -2285,6 +2286,11 @@ struct host_cmd_ds_pkt_aggr_ctrl {
 	__le16 tx_aggr_align;
 } __packed;
 
+struct host_cmd_ds_sta_configure {
+	__le16 action;
+	u8 tlv_buffer[0];
+} __packed;
+
 struct host_cmd_ds_command {
 	__le16 command;
 	__le16 size;
@@ -2361,6 +2367,7 @@ struct host_cmd_ds_command {
 		struct host_cmd_ds_gtk_rekey_params rekey;
 		struct host_cmd_ds_chan_region_cfg reg_cfg;
 		struct host_cmd_ds_pkt_aggr_ctrl pkt_aggr_ctrl;
+		struct host_cmd_ds_sta_configure sta_cfg;
 	} params;
 } __packed;
 
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 58e6ae5415e7..9bde181700dc 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -517,6 +517,18 @@ enum mwifiex_iface_work_flags {
 	MWIFIEX_IFACE_WORK_CARD_RESET,
 };
 
+struct mwifiex_band_config {
+	u8 chan_band:2;
+	u8 chan_width:2;
+	u8 chan2_offset:2;
+	u8 scan_mode:2;
+} __packed;
+
+struct mwifiex_channel_band {
+	struct mwifiex_band_config band_config;
+	u8 channel;
+};
+
 struct mwifiex_private {
 	struct mwifiex_adapter *adapter;
 	u8 bss_type;
@@ -1557,7 +1569,7 @@ int mwifiex_check_network_compatibility(struct mwifiex_private *priv,
 					struct mwifiex_bssdescriptor *bss_desc);
 
 u8 mwifiex_chan_type_to_sec_chan_offset(enum nl80211_channel_type chan_type);
-u8 mwifiex_sec_chan_offset_to_chan_type(u8 second_chan_offset);
+u8 mwifiex_get_chan_type(struct mwifiex_private *priv);
 
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 					      const char *name,
@@ -1683,6 +1695,8 @@ void mwifiex_queue_main_work(struct mwifiex_adapter *adapter);
 int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action,
 			      int cmd_type,
 			      struct mwifiex_ds_wakeup_reason *wakeup_reason);
+int mwifiex_get_chan_info(struct mwifiex_private *priv,
+			  struct mwifiex_channel_band *channel_band);
 int mwifiex_ret_wakeup_reason(struct mwifiex_private *priv,
 			      struct host_cmd_ds_command *resp,
 			      struct host_cmd_ds_wakeup_reason *wakeup_reason);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index 211e47d8b318..4ed10cf82f9a 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -1898,6 +1898,25 @@ static int mwifiex_cmd_get_wakeup_reason(struct mwifiex_private *priv,
 	return 0;
 }
 
+static int mwifiex_cmd_get_chan_info(struct host_cmd_ds_command *cmd,
+				     u16 cmd_action)
+{
+	struct host_cmd_ds_sta_configure *sta_cfg_cmd = &cmd->params.sta_cfg;
+	struct host_cmd_tlv_channel_band *tlv_band_channel =
+	(struct host_cmd_tlv_channel_band *)sta_cfg_cmd->tlv_buffer;
+
+	cmd->command = cpu_to_le16(HostCmd_CMD_STA_CONFIGURE);
+	cmd->size = cpu_to_le16(sizeof(*sta_cfg_cmd) +
+				sizeof(*tlv_band_channel) + S_DS_GEN);
+	sta_cfg_cmd->action = cpu_to_le16(cmd_action);
+	memset(tlv_band_channel, 0, sizeof(*tlv_band_channel));
+	tlv_band_channel->header.type = cpu_to_le16(TLV_TYPE_CHANNELBANDLIST);
+	tlv_band_channel->header.len  = cpu_to_le16(sizeof(*tlv_band_channel) -
+					sizeof(struct mwifiex_ie_types_header));
+
+	return 0;
+}
+
 /* This function check if the command is supported by firmware */
 static int mwifiex_is_cmd_supported(struct mwifiex_private *priv, u16 cmd_no)
 {
@@ -2210,6 +2229,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no,
 		cmd_ptr->command = cpu_to_le16(cmd_no);
 		cmd_ptr->size = cpu_to_le16(S_DS_GEN);
 		break;
+	case HostCmd_CMD_STA_CONFIGURE:
+		ret = mwifiex_cmd_get_chan_info(cmd_ptr, cmd_action);
+		break;
 	default:
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "PREP_CMD: unknown cmd- %#x\n", cmd_no);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
index 1bd4e13b8449..69e3b624adbb 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
@@ -1170,6 +1170,22 @@ static int mwifiex_ret_pkt_aggr_ctrl(struct mwifiex_private *priv,
 	return 0;
 }
 
+static int mwifiex_ret_get_chan_info(struct mwifiex_private *priv,
+				     struct host_cmd_ds_command *resp,
+				     struct mwifiex_channel_band *channel_band)
+{
+	struct host_cmd_ds_sta_configure *sta_cfg_cmd = &resp->params.sta_cfg;
+	struct host_cmd_tlv_channel_band *tlv_band_channel;
+
+	tlv_band_channel =
+	(struct host_cmd_tlv_channel_band *)sta_cfg_cmd->tlv_buffer;
+	memcpy(&channel_band->band_config, &tlv_band_channel->band_config,
+	       sizeof(struct mwifiex_band_config));
+	channel_band->channel = tlv_band_channel->channel;
+
+	return 0;
+}
+
 /*
  * This function handles the command responses.
  *
@@ -1393,6 +1409,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no,
 	case HostCmd_CMD_CHAN_REGION_CFG:
 		ret = mwifiex_ret_chan_region_cfg(priv, resp);
 		break;
+	case HostCmd_CMD_STA_CONFIGURE:
+		ret = mwifiex_ret_get_chan_info(priv, resp, data_buf);
+		break;
 	default:
 		mwifiex_dbg(adapter, ERROR,
 			    "CMD_RESP: unknown cmd response %#x\n",
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index ed66f12d3301..5414b755cf82 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -1481,3 +1481,15 @@ int mwifiex_get_wakeup_reason(struct mwifiex_private *priv, u16 action,
 
 	return status;
 }
+
+int mwifiex_get_chan_info(struct mwifiex_private *priv,
+			  struct mwifiex_channel_band *channel_band)
+{
+	int status = 0;
+
+	status = mwifiex_send_cmd(priv, HostCmd_CMD_STA_CONFIGURE,
+				  HostCmd_ACT_GEN_GET, 0, channel_band,
+				  MWIFIEX_SYNC_CMD);
+
+	return status;
+}

From be9fc0971a5c27b791608cf9705a04fe96dbd395 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 13 Mar 2018 12:44:53 +0100
Subject: [PATCH 0901/1678] net: fix sysctl_fb_tunnels_only_for_init_net link
 error

The new variable is only available when CONFIG_SYSCTL is enabled,
otherwise we get a link error:

net/ipv4/ip_tunnel.o: In function `ip_tunnel_init_net':
ip_tunnel.c:(.text+0x278b): undefined reference to `sysctl_fb_tunnels_only_for_init_net'
net/ipv6/sit.o: In function `sit_init_net':
sit.c:(.init.text+0x4c): undefined reference to `sysctl_fb_tunnels_only_for_init_net'
net/ipv6/ip6_tunnel.o: In function `ip6_tnl_init_net':
ip6_tunnel.c:(.init.text+0x39): undefined reference to `sysctl_fb_tunnels_only_for_init_net'

This adds an extra condition, keeping the traditional behavior when
CONFIG_SYSCTL is disabled.

Fixes: 79134e6ce2c9 ("net: do not create fallback tunnels for non-default namespaces")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5fbb9f1da7fd..913b1cc882cf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -589,7 +589,9 @@ extern int sysctl_fb_tunnels_only_for_init_net;
 
 static inline bool net_has_fallback_tunnels(const struct net *net)
 {
-	return net == &init_net || !sysctl_fb_tunnels_only_for_init_net;
+	return net == &init_net ||
+	       !IS_ENABLED(CONFIG_SYSCTL) ||
+	       !sysctl_fb_tunnels_only_for_init_net;
 }
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)

From a870a02cc963de35452bbed932560ed69725c4f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 13 Mar 2018 21:58:39 +0100
Subject: [PATCH 0902/1678] pktgen: use dynamic allocation for debug print
 buffer

After the removal of the VLA, we get a harmless warning about a large
stack frame:

net/core/pktgen.c: In function 'pktgen_if_write':
net/core/pktgen.c:1710:1: error: the frame size of 1076 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]

The function was previously shown to be safe despite hitting
the 1024 bye warning level. To get rid of the annoyging warning,
while keeping it readable, this changes it to use strndup_user().

Obviously this is not a fast path, so the kmalloc() overhead
can be disregarded.

Fixes: 35951393bbff ("pktgen: Remove VLA usage")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e2d6ae3b290b..fd65761e1fed 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -906,13 +906,14 @@ static ssize_t pktgen_if_write(struct file *file,
 	i += len;
 
 	if (debug) {
-		size_t copy = min_t(size_t, count, 1023);
-		char tb[1024];
-		if (copy_from_user(tb, user_buffer, copy))
-			return -EFAULT;
-		tb[copy] = 0;
-		pr_debug("%s,%lu  buffer -:%s:-\n",
-			 name, (unsigned long)count, tb);
+		size_t copy = min_t(size_t, count + 1, 1024);
+		char *tp = strndup_user(user_buffer, copy);
+
+		if (IS_ERR(tp))
+			return PTR_ERR(tp);
+
+		pr_debug("%s,%zu  buffer -:%s:-\n", name, count, tp);
+		kfree(buf);
 	}
 
 	if (!strcmp(name, "min_pkt_size")) {

From 41aeefcc38a2643a62db46818a70a781efb40d99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Tue, 13 Mar 2018 11:41:12 +0100
Subject: [PATCH 0903/1678] batman-adv: add DAT cache netlink support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dump the list of DAT cache entries via the netlink socket.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h        |  20 ++++
 net/batman-adv/distributed-arp-table.c | 152 +++++++++++++++++++++++++
 net/batman-adv/distributed-arp-table.h |   8 ++
 net/batman-adv/netlink.c               |  76 +++++++------
 4 files changed, 223 insertions(+), 33 deletions(-)

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 56ae28934070..95ab5dbd09fa 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -272,6 +272,21 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_BLA_CRC,
 
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_IP4ADDRESS: Client IPv4 address
+	 */
+	BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_HWADDRESS: Client MAC address
+	 */
+	BATADV_ATTR_DAT_CACHE_HWADDRESS,
+
+	/**
+	 * @BATADV_ATTR_DAT_CACHE_VID: VLAN ID
+	 */
+	BATADV_ATTR_DAT_CACHE_VID,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -361,6 +376,11 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_BLA_BACKBONE,
 
+	/**
+	 * @BATADV_CMD_GET_DAT_CACHE: Query list of DAT cache entries
+	 */
+	BATADV_CMD_GET_DAT_CACHE,
+
 	/* add new commands above here */
 
 	/**
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 4469dcc1558f..75dda9454ccf 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -43,13 +44,19 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "send.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -851,6 +858,151 @@ out:
 }
 #endif
 
+/**
+ * batadv_dat_cache_dump_entry() - dump one entry of the DAT cache table to a
+ *  netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @dat_entry: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_dat_entry *dat_entry)
+{
+	int msecs;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+	if (!hdr)
+		return -ENOBUFS;
+
+	msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
+
+	if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
+			    dat_entry->ip) ||
+	    nla_put(msg, BATADV_ATTR_DAT_CACHE_HWADDRESS, ETH_ALEN,
+		    dat_entry->mac_addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_DAT_CACHE_VID, dat_entry->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		return -EMSGSIZE;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+}
+
+/**
+ * batadv_dat_cache_dump_bucket() - dump one bucket of the DAT cache table to
+ *  a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_dat_entry *dat_entry;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+		if (idx < *idx_skip)
+			goto skip;
+
+		if (batadv_dat_cache_dump_entry(msg, portid, seq,
+						dat_entry)) {
+			rcu_read_unlock();
+			*idx_skip = idx;
+
+			return -EMSGSIZE;
+		}
+
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * batadv_dat_cache_dump() - dump DAT cache table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->dat.hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_dat_cache_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, head,
+						 &idx))
+			break;
+
+		bucket++;
+		idx = 0;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
 /**
  * batadv_arp_get_type() - parse an ARP packet and gets the type
  * @bat_priv: the bat priv with all the soft interface information
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index e24aa9601c52..a04596028337 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -28,6 +28,7 @@
 
 #include "originator.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -81,6 +82,7 @@ batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
 int batadv_dat_init(struct batadv_priv *bat_priv);
 void batadv_dat_free(struct batadv_priv *bat_priv);
 int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 /**
  * batadv_dat_inc_counter() - increment the correct DAT packet counter
@@ -169,6 +171,12 @@ static inline void batadv_dat_free(struct batadv_priv *bat_priv)
 {
 }
 
+static inline int
+batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv,
 					  u8 subtype)
 {
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 129af56b944d..2b3e7c3f87fa 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -45,6 +45,7 @@
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
+#include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
 #include "originator.h"
@@ -64,39 +65,42 @@ static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
 };
 
 static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
-	[BATADV_ATTR_VERSION]		= { .type = NLA_STRING },
-	[BATADV_ATTR_ALGO_NAME]		= { .type = NLA_STRING },
-	[BATADV_ATTR_MESH_IFINDEX]	= { .type = NLA_U32 },
-	[BATADV_ATTR_MESH_IFNAME]	= { .type = NLA_STRING },
-	[BATADV_ATTR_MESH_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_HARD_IFINDEX]	= { .type = NLA_U32 },
-	[BATADV_ATTR_HARD_IFNAME]	= { .type = NLA_STRING },
-	[BATADV_ATTR_HARD_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_ORIG_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TPMETER_RESULT]	= { .type = NLA_U8 },
-	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
-	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
-	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
-	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
-	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
-	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
-	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
-	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
-	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
-	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
-	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
-	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
-	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
-	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
-	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
-	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
-	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
+	[BATADV_ATTR_VERSION]			= { .type = NLA_STRING },
+	[BATADV_ATTR_ALGO_NAME]			= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_IFINDEX]		= { .type = NLA_U32 },
+	[BATADV_ATTR_MESH_IFNAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_MESH_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_HARD_IFINDEX]		= { .type = NLA_U32 },
+	[BATADV_ATTR_HARD_IFNAME]		= { .type = NLA_STRING },
+	[BATADV_ATTR_HARD_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_ORIG_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TPMETER_RESULT]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TPMETER_TEST_TIME]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TPMETER_BYTES]		= { .type = NLA_U64 },
+	[BATADV_ATTR_TPMETER_COOKIE]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]			= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]			= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]			= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]			= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]			= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]			= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]			= { .type = NLA_U16 },
+	[BATADV_ATTR_DAT_CACHE_IP4ADDRESS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_DAT_CACHE_HWADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_DAT_CACHE_VID]		= { .type = NLA_U16 },
 };
 
 /**
@@ -604,6 +608,12 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_bla_backbone_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_DAT_CACHE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_dat_cache_dump,
+	},
 
 };
 

From 53dd9a68ba683986ec90497586f94b941bb748a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Tue, 13 Mar 2018 11:41:13 +0100
Subject: [PATCH 0904/1678] batman-adv: add multicast flags netlink support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dump the list of multicast flags entries via the netlink socket.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  62 +++++++++
 net/batman-adv/multicast.c      | 237 ++++++++++++++++++++++++++++++++
 net/batman-adv/multicast.h      |  18 +++
 net/batman-adv/netlink.c        |  12 ++
 4 files changed, 329 insertions(+)

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 95ab5dbd09fa..324a0e1143e7 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -91,6 +91,53 @@ enum batadv_tt_client_flags {
 	BATADV_TT_CLIENT_TEMP	 = (1 << 11),
 };
 
+/**
+ * enum batadv_mcast_flags_priv - Private, own multicast flags
+ *
+ * These are internal, multicast related flags. Currently they describe certain
+ * multicast related attributes of the segment this originator bridges into the
+ * mesh.
+ *
+ * Those attributes are used to determine the public multicast flags this
+ * originator is going to announce via TT.
+ *
+ * For netlink, if BATADV_MCAST_FLAGS_BRIDGED is unset then all querier
+ * related flags are undefined.
+ */
+enum batadv_mcast_flags_priv {
+	/**
+	 * @BATADV_MCAST_FLAGS_BRIDGED: There is a bridge on top of the mesh
+	 * interface.
+	 */
+	BATADV_MCAST_FLAGS_BRIDGED			= (1 << 0),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS: Whether an IGMP querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS		= (1 << 1),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS: Whether an MLD querier
+	 * exists in the mesh
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS		= (1 << 2),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING: If an IGMP querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING	= (1 << 3),
+
+	/**
+	 * @BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING: If an MLD querier
+	 * exists, whether it is potentially shadowing multicast listeners
+	 * (i.e. querier is behind our own bridge segment)
+	 */
+	BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING	= (1 << 4),
+};
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
@@ -287,6 +334,16 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_DAT_CACHE_VID,
 
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS: Per originator multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS,
+
+	/**
+	 * @BATADV_ATTR_MCAST_FLAGS_PRIV: Private, own multicast flags
+	 */
+	BATADV_ATTR_MCAST_FLAGS_PRIV,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -381,6 +438,11 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_DAT_CACHE,
 
+	/**
+	 * @BATADV_CMD_GET_MCAST_FLAGS: Query list of multicast flags
+	 */
+	BATADV_CMD_GET_MCAST_FLAGS,
+
 	/* add new commands above here */
 
 	/**
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index c7a1305ca7e7..5615b6abea6f 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
@@ -52,14 +53,20 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <net/addrconf.h>
+#include <net/genetlink.h>
 #include <net/if_inet6.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 #include "tvlv.h"
 
@@ -1333,6 +1340,236 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
 }
 #endif
 
+/**
+ * batadv_mcast_mesh_info_put() - put multicast info into a netlink message
+ * @msg: buffer for the message
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 or error code.
+ */
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+			       struct batadv_priv *bat_priv)
+{
+	u32 flags = bat_priv->mcast.flags;
+	u32 flags_priv = BATADV_NO_FLAGS;
+
+	if (bat_priv->mcast.bridged) {
+		flags_priv |= BATADV_MCAST_FLAGS_BRIDGED;
+
+		if (bat_priv->mcast.querier_ipv4.exists)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_EXISTS;
+		if (bat_priv->mcast.querier_ipv6.exists)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_EXISTS;
+		if (bat_priv->mcast.querier_ipv4.shadowing)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV4_SHADOWING;
+		if (bat_priv->mcast.querier_ipv6.shadowing)
+			flags_priv |= BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS, flags) ||
+	    nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS_PRIV, flags_priv))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_entry() - dump one entry of the multicast flags table
+ *  to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @orig_node: originator to dump the multicast flags of
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_orig_node *orig_node)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig)) {
+		genlmsg_cancel(msg, hdr);
+		return -EMSGSIZE;
+	}
+
+	if (test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+		     &orig_node->capabilities)) {
+		if (nla_put_u32(msg, BATADV_ATTR_MCAST_FLAGS,
+				orig_node->mcast_flags)) {
+			genlmsg_cancel(msg, hdr);
+			return -EMSGSIZE;
+		}
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+}
+
+/**
+ * batadv_mcast_flags_dump_bucket() - dump one bucket of the multicast flags
+ *  table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct hlist_head *head, long *idx_skip)
+{
+	struct batadv_orig_node *orig_node;
+	long idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+			      &orig_node->capa_initialized))
+			continue;
+
+		if (idx < *idx_skip)
+			goto skip;
+
+		if (batadv_mcast_flags_dump_entry(msg, portid, seq,
+						  orig_node)) {
+			rcu_read_unlock();
+			*idx_skip = idx;
+
+			return -EMSGSIZE;
+		}
+
+skip:
+		idx++;
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/**
+ * __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bucket: current bucket to dump
+ * @idx: index in current bucket to the next entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv, long *bucket, long *idx)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	long bucket_tmp = *bucket;
+	struct hlist_head *head;
+	long idx_tmp = *idx;
+
+	while (bucket_tmp < hash->size) {
+		head = &hash->table[bucket_tmp];
+
+		if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
+						   &idx_tmp))
+			break;
+
+		bucket_tmp++;
+		idx_tmp = 0;
+	}
+
+	*bucket = bucket_tmp;
+	*idx = idx_tmp;
+
+	return msg->len;
+}
+
+/**
+ * batadv_mcast_netlink_get_primary() - get primary interface from netlink
+ *  callback
+ * @cb: netlink callback structure
+ * @primary_if: the primary interface pointer to return the result in
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
+				 struct batadv_hard_iface **primary_if)
+{
+	struct batadv_hard_iface *hard_iface = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	hard_iface = batadv_primary_if_get_selected(bat_priv);
+	if (!hard_iface || hard_iface->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+out:
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	if (!ret && primary_if)
+		*primary_if = hard_iface;
+	else
+		batadv_hardif_put(hard_iface);
+
+	return ret;
+}
+
+/**
+ * batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_priv *bat_priv;
+	long *bucket = &cb->args[0];
+	long *idx = &cb->args[1];
+	int ret;
+
+	ret = batadv_mcast_netlink_get_primary(cb, &primary_if);
+	if (ret)
+		return ret;
+
+	bat_priv = netdev_priv(primary_if->soft_iface);
+	ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
+					bat_priv, bucket, idx);
+
+	batadv_hardif_put(primary_if);
+	return ret;
+}
+
 /**
  * batadv_mcast_free() - free the multicast optimizations structures
  * @bat_priv: the bat priv with all the soft interface information
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 6b8594e23da3..3b04ab13f0eb 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -21,6 +21,7 @@
 
 #include "main.h"
 
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -54,6 +55,11 @@ void batadv_mcast_init(struct batadv_priv *bat_priv);
 
 int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
 
+int batadv_mcast_mesh_info_put(struct sk_buff *msg,
+			       struct batadv_priv *bat_priv);
+
+int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb);
+
 void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
@@ -72,6 +78,18 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
 	return 0;
 }
 
+static inline int
+batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline int batadv_mcast_flags_dump(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
 }
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 2b3e7c3f87fa..0d9459b69bdb 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -48,6 +48,7 @@
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
 #include "hard-interface.h"
+#include "multicast.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
@@ -101,6 +102,8 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_DAT_CACHE_IP4ADDRESS]	= { .type = NLA_U32 },
 	[BATADV_ATTR_DAT_CACHE_HWADDRESS]	= { .len = ETH_ALEN },
 	[BATADV_ATTR_DAT_CACHE_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_MCAST_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
 };
 
 /**
@@ -151,6 +154,9 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
 		goto out;
 #endif
 
+	if (batadv_mcast_mesh_info_put(msg, bat_priv))
+		goto out;
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
 		hard_iface = primary_if->net_dev;
@@ -614,6 +620,12 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_dat_cache_dump,
 	},
+	{
+		.cmd = BATADV_CMD_GET_MCAST_FLAGS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_mcast_flags_dump,
+	},
 
 };
 

From 38a1390e02b7bfd02cabc98238e859c07d86a6d3 Mon Sep 17 00:00:00 2001
From: Rakesh Pillai <pillair@codeaurora.org>
Date: Tue, 27 Feb 2018 19:09:07 +0200
Subject: [PATCH 0905/1678] ath10k: dma unmap mgmt tx buffer if wmi cmd send
 fails

WCN3990 sends mgmt frames by reference via WMI.
The host dma maps the mgmt frame and sends the physical
address to the firmware in the wmi command. Since the
dma mapping is done in the gen_mgmt_tx and if the wmi
command send fails, the corresponding mgmt frame is
not being dma unmapped.

Fix the missing dma unmapping of mgmt tx frame when
wmi command sending fails for mgmt tx by reference
via WMI. The already exisiting mgmt tx using copy by
value does not need such dma unmapping.
Add a separate wmi-tlv op for mgmt tx via ref, which
takes care of unmapping the dma address, in case of
wmi command sending failure.

Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c     | 28 ++++++++++++++----
 drivers/net/wireless/ath/ath10k/wmi-ops.h | 36 ++++++++++++++++++-----
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 22 ++++++--------
 3 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index ebb3f1b046f3..7e02ca02b28e 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -3808,6 +3809,7 @@ void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work)
 {
 	struct ath10k *ar = container_of(work, struct ath10k, wmi_mgmt_tx_work);
 	struct sk_buff *skb;
+	dma_addr_t paddr;
 	int ret;
 
 	for (;;) {
@@ -3815,11 +3817,27 @@ void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work)
 		if (!skb)
 			break;
 
-		ret = ath10k_wmi_mgmt_tx(ar, skb);
-		if (ret) {
-			ath10k_warn(ar, "failed to transmit management frame via WMI: %d\n",
-				    ret);
-			ieee80211_free_txskb(ar->hw, skb);
+		if (test_bit(ATH10K_FW_FEATURE_MGMT_TX_BY_REF,
+			     ar->running_fw->fw_file.fw_features)) {
+			paddr = dma_map_single(ar->dev, skb->data,
+					       skb->len, DMA_TO_DEVICE);
+			if (!paddr)
+				continue;
+			ret = ath10k_wmi_mgmt_tx_send(ar, skb, paddr);
+			if (ret) {
+				ath10k_warn(ar, "failed to transmit management frame by ref via WMI: %d\n",
+					    ret);
+				dma_unmap_single(ar->dev, paddr, skb->len,
+						 DMA_FROM_DEVICE);
+				ieee80211_free_txskb(ar->hw, skb);
+			}
+		} else {
+			ret = ath10k_wmi_mgmt_tx(ar, skb);
+			if (ret) {
+				ath10k_warn(ar, "failed to transmit management frame via WMI: %d\n",
+					    ret);
+				ieee80211_free_txskb(ar->hw, skb);
+			}
 		}
 	}
 }
diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h
index 14093cfdc505..89d230bc9f6e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-ops.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -125,6 +126,9 @@ struct wmi_ops {
 					     enum wmi_force_fw_hang_type type,
 					     u32 delay_ms);
 	struct sk_buff *(*gen_mgmt_tx)(struct ath10k *ar, struct sk_buff *skb);
+	struct sk_buff *(*gen_mgmt_tx_send)(struct ath10k *ar,
+					    struct sk_buff *skb,
+					    dma_addr_t paddr);
 	struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u64 module_enable,
 					  u32 log_level);
 	struct sk_buff *(*gen_pktlog_enable)(struct ath10k *ar, u32 filter);
@@ -371,13 +375,34 @@ ath10k_wmi_get_txbf_conf_scheme(struct ath10k *ar)
 	return ar->wmi.ops->get_txbf_conf_scheme(ar);
 }
 
+static inline int
+ath10k_wmi_mgmt_tx_send(struct ath10k *ar, struct sk_buff *msdu,
+			dma_addr_t paddr)
+{
+	struct sk_buff *skb;
+	int ret;
+
+	if (!ar->wmi.ops->gen_mgmt_tx_send)
+		return -EOPNOTSUPP;
+
+	skb = ar->wmi.ops->gen_mgmt_tx_send(ar, msdu, paddr);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	ret = ath10k_wmi_cmd_send(ar, skb,
+				  ar->wmi.cmd->mgmt_tx_send_cmdid);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static inline int
 ath10k_wmi_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu);
 	struct sk_buff *skb;
 	int ret;
-	u32 mgmt_tx_cmdid;
 
 	if (!ar->wmi.ops->gen_mgmt_tx)
 		return -EOPNOTSUPP;
@@ -386,13 +411,8 @@ ath10k_wmi_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
-	if (test_bit(ATH10K_FW_FEATURE_MGMT_TX_BY_REF,
-		     ar->running_fw->fw_file.fw_features))
-		mgmt_tx_cmdid = ar->wmi.cmd->mgmt_tx_send_cmdid;
-	else
-		mgmt_tx_cmdid = ar->wmi.cmd->mgmt_tx_cmdid;
-
-	ret = ath10k_wmi_cmd_send(ar, skb, mgmt_tx_cmdid);
+	ret = ath10k_wmi_cmd_send(ar, skb,
+				  ar->wmi.cmd->mgmt_tx_cmdid);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index ae77a007ae07..523af3f8bb62 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -2484,19 +2485,19 @@ ath10k_wmi_tlv_op_gen_request_stats(struct ath10k *ar, u32 stats_mask)
 }
 
 static struct sk_buff *
-ath10k_wmi_tlv_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
+ath10k_wmi_tlv_op_gen_mgmt_tx_send(struct ath10k *ar, struct sk_buff *msdu,
+				   dma_addr_t paddr)
 {
 	struct ath10k_skb_cb *cb = ATH10K_SKB_CB(msdu);
 	struct wmi_tlv_mgmt_tx_cmd *cmd;
-	struct wmi_tlv *tlv;
 	struct ieee80211_hdr *hdr;
+	struct ath10k_vif *arvif;
+	u32 buf_len = msdu->len;
+	struct wmi_tlv *tlv;
 	struct sk_buff *skb;
+	u32 vdev_id;
 	void *ptr;
 	int len;
-	u32 buf_len = msdu->len;
-	struct ath10k_vif *arvif;
-	dma_addr_t mgmt_frame_dma;
-	u32 vdev_id;
 
 	if (!cb->vif)
 		return ERR_PTR(-EINVAL);
@@ -2537,12 +2538,7 @@ ath10k_wmi_tlv_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu)
 	cmd->chanfreq = 0;
 	cmd->buf_len = __cpu_to_le32(buf_len);
 	cmd->frame_len = __cpu_to_le32(msdu->len);
-	mgmt_frame_dma = dma_map_single(arvif->ar->dev, msdu->data,
-					msdu->len, DMA_TO_DEVICE);
-	if (!mgmt_frame_dma)
-		return ERR_PTR(-ENOMEM);
-
-	cmd->paddr = __cpu_to_le64(mgmt_frame_dma);
+	cmd->paddr = __cpu_to_le64(paddr);
 
 	ptr += sizeof(*tlv);
 	ptr += sizeof(*cmd);
@@ -3701,7 +3697,7 @@ static const struct wmi_ops wmi_tlv_ops = {
 	.gen_request_stats = ath10k_wmi_tlv_op_gen_request_stats,
 	.gen_force_fw_hang = ath10k_wmi_tlv_op_gen_force_fw_hang,
 	/* .gen_mgmt_tx = not implemented; HTT is used */
-	.gen_mgmt_tx =  ath10k_wmi_tlv_op_gen_mgmt_tx,
+	.gen_mgmt_tx_send = ath10k_wmi_tlv_op_gen_mgmt_tx_send,
 	.gen_dbglog_cfg = ath10k_wmi_tlv_op_gen_dbglog_cfg,
 	.gen_pktlog_enable = ath10k_wmi_tlv_op_gen_pktlog_enable,
 	.gen_pktlog_disable = ath10k_wmi_tlv_op_gen_pktlog_disable,

From 182b1917109892ab9f26d66bfdcbc4ba6f0a0a65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
Date: Tue, 27 Feb 2018 19:09:44 +0200
Subject: [PATCH 0906/1678] ath9k: Protect queue draining by rcu_read_lock()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When ath9k was switched over to use the mac80211 intermediate queues,
node cleanup now drains the mac80211 queues. However, this call path is
not protected by rcu_read_lock() as it was previously entirely internal
to the driver which uses its own locking.

This leads to a possible rcu_dereference() without holding
rcu_read_lock(); but only if a station is cleaned up while having
packets queued on the TXQ. Fix this by adding the rcu_read_lock() to the
caller in ath9k.

Fixes: 50f08edf9809 ("ath9k: Switch to using mac80211 intermediate software queues.")
Cc: stable@vger.kernel.org
Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/xmit.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 396bf05c6bf6..d8b041f48ca8 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -2892,6 +2892,8 @@ void ath_tx_node_cleanup(struct ath_softc *sc, struct ath_node *an)
 	struct ath_txq *txq;
 	int tidno;
 
+	rcu_read_lock();
+
 	for (tidno = 0; tidno < IEEE80211_NUM_TIDS; tidno++) {
 		tid = ath_node_to_tid(an, tidno);
 		txq = tid->txq;
@@ -2909,6 +2911,8 @@ void ath_tx_node_cleanup(struct ath_softc *sc, struct ath_node *an)
 		if (!an->sta)
 			break; /* just one multicast ath_atx_tid */
 	}
+
+	rcu_read_unlock();
 }
 
 #ifdef CONFIG_ATH9K_TX99

From 29d1df72ce2ac187d457990fe445a16212dcfa19 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 14 Mar 2018 03:07:27 -0500
Subject: [PATCH 0907/1678] pktgen: Fix memory leak in pktgen_if_write

_buf_ is an array and the one that must be freed is _tp_ instead.

Fixes: a870a02cc963 ("pktgen: use dynamic allocation for debug print buffer")
Reported-by: Wang Jian <jianjian.wang1@gmail.com>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fd65761e1fed..545cf08cd558 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -913,7 +913,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			return PTR_ERR(tp);
 
 		pr_debug("%s,%zu  buffer -:%s:-\n", name, count, tp);
-		kfree(buf);
+		kfree(tp);
 	}
 
 	if (!strcmp(name, "min_pkt_size")) {

From ced68234b6a244355aeab07c2681bc49f695eaed Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 14 Mar 2018 12:49:19 -0400
Subject: [PATCH 0908/1678] sock: remove zerocopy sockopt restriction on closed
 tcp state

Socket option SO_ZEROCOPY determines whether the kernel ignores or
processes flag MSG_ZEROCOPY on subsequent send calls. This to avoid
changing behavior for legacy processes.

Limiting the state change to closed sockets is annoying with passive
sockets and not necessary for correctness. Once created, zerocopy skbs
are processed based on their private state, not this socket flag.

Remove the constraint.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/msg_zerocopy.rst | 5 -----
 net/core/sock.c                           | 2 --
 2 files changed, 7 deletions(-)

diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
index 291a01264967..fe46d4867e2d 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -72,11 +72,6 @@ this flag, a process must first signal intent by setting a socket option:
 	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
 		error(1, errno, "setsockopt zerocopy");
 
-Setting the socket option only works when the socket is in its initial
-(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
-for example, will lead to an EBUSY error. In this case, the option should be set
-to the listening socket and it will be inherited by the accepted sockets.
-
 Transmission
 ------------
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 27f218bba43f..a8962d912895 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1052,8 +1052,6 @@ set_rcvbuf:
 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
 			if (sk->sk_protocol != IPPROTO_TCP)
 				ret = -ENOTSUPP;
-			else if (sk->sk_state != TCP_CLOSE)
-				ret = -EBUSY;
 		} else if (sk->sk_family != PF_RDS) {
 			ret = -ENOTSUPP;
 		}

From e676d81c8990f511d60698a1a8abaa438b3f9d3d Mon Sep 17 00:00:00 2001
From: John Allen <jallen@linux.vnet.ibm.com>
Date: Wed, 14 Mar 2018 10:41:29 -0500
Subject: [PATCH 0909/1678] ibmvnic: Fix reset return from closed state

The case in which we handle a reset from the state where the device is
closed seems to be bugged for all types of reset. For most types of reset
we currently exit the reset routine correctly, but don't set the state to
indicate that we are back in the "closed" state. For some specific cases,
we don't exit the reset routine at all and resetting will cause a closed
device to be opened.

This patch fixes the problem by unconditionally checking the reset_state
and correctly setting the adapter state before returning.

Signed-off-by: John Allen <jallen@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 6ff43d7e5a72..9c7d19c926f9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1739,12 +1739,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			rc = reset_rx_pools(adapter);
 			if (rc)
 				return rc;
-
-			if (reset_state == VNIC_CLOSED)
-				return 0;
 		}
 	}
 
+	adapter->state = VNIC_CLOSED;
+
+	if (reset_state == VNIC_CLOSED)
+		return 0;
+
 	rc = __ibmvnic_open(netdev);
 	if (rc) {
 		if (list_empty(&adapter->rwi_list))

From f215347cc0fcf76933d6bdab95e253724b823625 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 14 Mar 2018 14:21:00 +0000
Subject: [PATCH 0910/1678] sfc: update MCDI protocol headers

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/mcdi_pcol.h | 2822 ++++++++++++++++----------
 1 file changed, 1776 insertions(+), 1046 deletions(-)

diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index 869d76f8f589..3839eec783ea 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -273,7 +273,8 @@
 #define MC_CMD_ERR_NO_PRIVILEGE 0x1013
 /* Workaround 26807 could not be turned on/off because some functions
  * have already installed filters. See the comment at
- * MC_CMD_WORKAROUND_BUG26807. */
+ * MC_CMD_WORKAROUND_BUG26807.
+ * May also returned for other operations such as sub-variant switching. */
 #define MC_CMD_ERR_FILTERS_PRESENT 0x1014
 /* The clock whose frequency you've attempted to set set
  * doesn't exist on this NIC */
@@ -292,6 +293,10 @@
  * away.  This is distinct from MC_CMD_ERR_DATAPATH_DISABLED in that the
  * datapath absence may be temporary*/
 #define MC_CMD_ERR_NO_DATAPATH 0x1019
+/* The operation could not complete because some VIs are allocated */
+#define MC_CMD_ERR_VIS_PRESENT 0x101a
+/* The operation could not complete because some PIO buffers are allocated */
+#define MC_CMD_ERR_PIOBUFS_PRESENT 0x101b
 
 #define MC_CMD_ERR_CODE_OFST 0
 
@@ -312,10 +317,17 @@
 #define SIENA_MC_BOOTROM_COPYCODE_VEC (0x800 - 3 * 0x4)
 #define HUNT_MC_BOOTROM_COPYCODE_VEC (0x8000 - 3 * 0x4)
 #define MEDFORD_MC_BOOTROM_COPYCODE_VEC (0x10000 - 3 * 0x4)
-/* Points to the recovery mode entry point. */
+/* Points to the recovery mode entry point. Misnamed but kept for compatibility. */
 #define SIENA_MC_BOOTROM_NOFLASH_VEC (0x800 - 2 * 0x4)
 #define HUNT_MC_BOOTROM_NOFLASH_VEC (0x8000 - 2 * 0x4)
 #define MEDFORD_MC_BOOTROM_NOFLASH_VEC (0x10000 - 2 * 0x4)
+/* Points to the recovery mode entry point. Same as above, but the right name. */
+#define SIENA_MC_BOOTROM_RECOVERY_VEC (0x800 - 2 * 0x4)
+#define HUNT_MC_BOOTROM_RECOVERY_VEC (0x8000 - 2 * 0x4)
+#define MEDFORD_MC_BOOTROM_RECOVERY_VEC (0x10000 - 2 * 0x4)
+
+/* Points to noflash mode entry point. */
+#define MEDFORD_MC_BOOTROM_REAL_NOFLASH_VEC (0x10000 - 4 * 0x4)
 
 /* The command set exported by the boot ROM (MCDI v0) */
 #define MC_CMD_GET_VERSION_V0_SUPPORTED_FUNCS {		\
@@ -365,7 +377,7 @@
 #define       MCDI_EVENT_LEVEL_LBN 33
 #define       MCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MCDI_EVENT_LEVEL_INFO  0x0
+#define          MCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -385,21 +397,21 @@
 #define        MCDI_EVENT_LINKCHANGE_SPEED_LBN 16
 #define        MCDI_EVENT_LINKCHANGE_SPEED_WIDTH 4
 /* enum: Link is down or link speed could not be determined */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN  0x0
+#define          MCDI_EVENT_LINKCHANGE_SPEED_UNKNOWN 0x0
 /* enum: 100Mbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100M  0x1
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100M 0x1
 /* enum: 1Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_1G  0x2
+#define          MCDI_EVENT_LINKCHANGE_SPEED_1G 0x2
 /* enum: 10Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_10G  0x3
+#define          MCDI_EVENT_LINKCHANGE_SPEED_10G 0x3
 /* enum: 40Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_40G  0x4
+#define          MCDI_EVENT_LINKCHANGE_SPEED_40G 0x4
 /* enum: 25Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_25G  0x5
+#define          MCDI_EVENT_LINKCHANGE_SPEED_25G 0x5
 /* enum: 50Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_50G  0x6
+#define          MCDI_EVENT_LINKCHANGE_SPEED_50G 0x6
 /* enum: 100Gbs */
-#define          MCDI_EVENT_LINKCHANGE_SPEED_100G  0x7
+#define          MCDI_EVENT_LINKCHANGE_SPEED_100G 0x7
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_LBN 20
 #define        MCDI_EVENT_LINKCHANGE_FCNTL_WIDTH 4
 #define        MCDI_EVENT_LINKCHANGE_LINK_FLAGS_LBN 24
@@ -606,23 +618,23 @@
 /* enum: Transmit error */
 #define          MCDI_EVENT_CODE_TX_ERR 0xb
 /* enum: Tx flush has completed */
-#define          MCDI_EVENT_CODE_TX_FLUSH  0xc
+#define          MCDI_EVENT_CODE_TX_FLUSH 0xc
 /* enum: PTP packet received timestamp */
-#define          MCDI_EVENT_CODE_PTP_RX  0xd
+#define          MCDI_EVENT_CODE_PTP_RX 0xd
 /* enum: PTP NIC failure */
-#define          MCDI_EVENT_CODE_PTP_FAULT  0xe
+#define          MCDI_EVENT_CODE_PTP_FAULT 0xe
 /* enum: PTP PPS event */
-#define          MCDI_EVENT_CODE_PTP_PPS  0xf
+#define          MCDI_EVENT_CODE_PTP_PPS 0xf
 /* enum: Rx flush has completed */
-#define          MCDI_EVENT_CODE_RX_FLUSH  0x10
+#define          MCDI_EVENT_CODE_RX_FLUSH 0x10
 /* enum: Receive error */
 #define          MCDI_EVENT_CODE_RX_ERR 0x11
 /* enum: AOE fault */
-#define          MCDI_EVENT_CODE_AOE  0x12
+#define          MCDI_EVENT_CODE_AOE 0x12
 /* enum: Network port calibration failed (VCAL). */
-#define          MCDI_EVENT_CODE_VCAL_FAIL  0x13
+#define          MCDI_EVENT_CODE_VCAL_FAIL 0x13
 /* enum: HW PPS event */
-#define          MCDI_EVENT_CODE_HW_PPS  0x14
+#define          MCDI_EVENT_CODE_HW_PPS 0x14
 /* enum: The MC has rebooted (huntington and later, siena uses CODE_REBOOT and
  * a different format)
  */
@@ -654,7 +666,7 @@
 /* enum: Artificial event generated by host and posted via MC for test
  * purposes.
  */
-#define          MCDI_EVENT_CODE_TESTGEN  0xfa
+#define          MCDI_EVENT_CODE_TESTGEN 0xfa
 #define       MCDI_EVENT_CMDDONE_DATA_OFST 0
 #define       MCDI_EVENT_CMDDONE_DATA_LEN 4
 #define       MCDI_EVENT_CMDDONE_DATA_LBN 0
@@ -784,7 +796,7 @@
 #define       FCDI_EVENT_LEVEL_LBN 33
 #define       FCDI_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          FCDI_EVENT_LEVEL_INFO  0x0
+#define          FCDI_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          FCDI_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -916,7 +928,7 @@
 #define       MUM_EVENT_LEVEL_LBN 33
 #define       MUM_EVENT_LEVEL_WIDTH 3
 /* enum: Info. */
-#define          MUM_EVENT_LEVEL_INFO  0x0
+#define          MUM_EVENT_LEVEL_INFO 0x0
 /* enum: Warning. */
 #define          MUM_EVENT_LEVEL_WARN 0x1
 /* enum: Error. */
@@ -1002,7 +1014,9 @@
 
 /***********************************/
 /* MC_CMD_READ32
- * Read multiple 32byte words from MC memory.
+ * Read multiple 32byte words from MC memory. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_READ32 0x1
 
@@ -1050,7 +1064,9 @@
 
 /***********************************/
 /* MC_CMD_COPYCODE
- * Copy MC code between two locations and jump.
+ * Copy MC code between two locations and jump. Note - this command really
+ * belongs to INSECURE category but is required by shmboot. The command handler
+ * has additional checks to reject insecure calls.
  */
 #define MC_CMD_COPYCODE 0x3
 
@@ -1139,7 +1155,7 @@
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_OFST 0
 #define       MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_LEN 4
 /* enum: indicates that the MC wasn't flash booted */
-#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL  0xdeadbeef
+#define          MC_CMD_GET_BOOT_STATUS_OUT_BOOT_OFFSET_NULL 0xdeadbeef
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_OFST 4
 #define       MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_LEN 4
 #define        MC_CMD_GET_BOOT_STATUS_OUT_FLAGS_WATCHDOG_LBN 0
@@ -1555,11 +1571,10 @@
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_OFST 8
 #define       MC_CMD_PTP_IN_MANFTEST_PACKET_TEST_ENABLE_LEN 4
 
-/* MC_CMD_PTP_IN_RESET_STATS msgrequest */
+/* MC_CMD_PTP_IN_RESET_STATS msgrequest: Reset PTP statistics */
 #define    MC_CMD_PTP_IN_RESET_STATS_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset PTP statistics */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
@@ -1710,11 +1725,10 @@
 /* enum: External. */
 #define          MC_CMD_PTP_CLK_SRC_EXTERNAL 0x1
 
-/* MC_CMD_PTP_IN_RST_CLK msgrequest */
+/* MC_CMD_PTP_IN_RST_CLK msgrequest: Reset value of Timer Reg. */
 #define    MC_CMD_PTP_IN_RST_CLK_LEN 8
 /*            MC_CMD_PTP_IN_CMD_OFST 0 */
 /*            MC_CMD_PTP_IN_CMD_LEN 4 */
-/* Reset value of Timer Reg. */
 /*            MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */
 /*            MC_CMD_PTP_IN_PERIPH_ID_LEN 4 */
 
@@ -2687,8 +2701,16 @@
 #define       MC_CMD_DRV_ATTACH_IN_NEW_STATE_LEN 4
 #define        MC_CMD_DRV_ATTACH_LBN 0
 #define        MC_CMD_DRV_ATTACH_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_LBN 0
+#define        MC_CMD_DRV_ATTACH_IN_ATTACH_WIDTH 1
 #define        MC_CMD_DRV_PREBOOT_LBN 1
 #define        MC_CMD_DRV_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_LBN 1
+#define        MC_CMD_DRV_ATTACH_IN_PREBOOT_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_LBN 2
+#define        MC_CMD_DRV_ATTACH_IN_SUBVARIANT_AWARE_WIDTH 1
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_LBN 3
+#define        MC_CMD_DRV_ATTACH_IN_WANT_VI_SPREADING_WIDTH 1
 /* 1 to set new state, or 0 to just report the existing state */
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_OFST 4
 #define       MC_CMD_DRV_ATTACH_IN_UPDATE_LEN 4
@@ -2711,8 +2733,14 @@
  * support
  */
 #define          MC_CMD_FW_RULES_ENGINE 0x5
+/* enum: Prefer to use firmware with additional DPDK support */
+#define          MC_CMD_FW_DPDK 0x6
+/* enum: Prefer to use "l3xudp" custom datapath firmware (see SF-119495-PD and
+ * bug69716)
+ */
+#define          MC_CMD_FW_L3XUDP 0x7
 /* enum: Only this option is allowed for non-admin functions */
-#define          MC_CMD_FW_DONT_CARE  0xffffffff
+#define          MC_CMD_FW_DONT_CARE 0xffffffff
 
 /* MC_CMD_DRV_ATTACH_OUT msgresponse */
 #define    MC_CMD_DRV_ATTACH_OUT_LEN 4
@@ -2740,6 +2768,11 @@
  * refers to the Sorrento external FPGA port.
  */
 #define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_NO_ACTIVE_PORT 0x3
+/* enum: If set, indicates that VI spreading is currently enabled. Will always
+ * indicate the current state, regardless of the value in the WANT_VI_SPREADING
+ * input.
+ */
+#define          MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_VI_SPREADING_ENABLED 0x4
 
 
 /***********************************/
@@ -3294,83 +3327,83 @@
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_100M_HI_OFST 4
 /* enum: None. */
-#define          MC_CMD_LOOPBACK_NONE  0x0
+#define          MC_CMD_LOOPBACK_NONE 0x0
 /* enum: Data. */
-#define          MC_CMD_LOOPBACK_DATA  0x1
+#define          MC_CMD_LOOPBACK_DATA 0x1
 /* enum: GMAC. */
-#define          MC_CMD_LOOPBACK_GMAC  0x2
+#define          MC_CMD_LOOPBACK_GMAC 0x2
 /* enum: XGMII. */
 #define          MC_CMD_LOOPBACK_XGMII 0x3
 /* enum: XGXS. */
-#define          MC_CMD_LOOPBACK_XGXS  0x4
+#define          MC_CMD_LOOPBACK_XGXS 0x4
 /* enum: XAUI. */
-#define          MC_CMD_LOOPBACK_XAUI  0x5
+#define          MC_CMD_LOOPBACK_XAUI 0x5
 /* enum: GMII. */
-#define          MC_CMD_LOOPBACK_GMII  0x6
+#define          MC_CMD_LOOPBACK_GMII 0x6
 /* enum: SGMII. */
-#define          MC_CMD_LOOPBACK_SGMII  0x7
+#define          MC_CMD_LOOPBACK_SGMII 0x7
 /* enum: XGBR. */
-#define          MC_CMD_LOOPBACK_XGBR  0x8
+#define          MC_CMD_LOOPBACK_XGBR 0x8
 /* enum: XFI. */
-#define          MC_CMD_LOOPBACK_XFI  0x9
+#define          MC_CMD_LOOPBACK_XFI 0x9
 /* enum: XAUI Far. */
-#define          MC_CMD_LOOPBACK_XAUI_FAR  0xa
+#define          MC_CMD_LOOPBACK_XAUI_FAR 0xa
 /* enum: GMII Far. */
-#define          MC_CMD_LOOPBACK_GMII_FAR  0xb
+#define          MC_CMD_LOOPBACK_GMII_FAR 0xb
 /* enum: SGMII Far. */
-#define          MC_CMD_LOOPBACK_SGMII_FAR  0xc
+#define          MC_CMD_LOOPBACK_SGMII_FAR 0xc
 /* enum: XFI Far. */
-#define          MC_CMD_LOOPBACK_XFI_FAR  0xd
+#define          MC_CMD_LOOPBACK_XFI_FAR 0xd
 /* enum: GPhy. */
-#define          MC_CMD_LOOPBACK_GPHY  0xe
+#define          MC_CMD_LOOPBACK_GPHY 0xe
 /* enum: PhyXS. */
-#define          MC_CMD_LOOPBACK_PHYXS  0xf
+#define          MC_CMD_LOOPBACK_PHYXS 0xf
 /* enum: PCS. */
-#define          MC_CMD_LOOPBACK_PCS  0x10
+#define          MC_CMD_LOOPBACK_PCS 0x10
 /* enum: PMA-PMD. */
-#define          MC_CMD_LOOPBACK_PMAPMD  0x11
+#define          MC_CMD_LOOPBACK_PMAPMD 0x11
 /* enum: Cross-Port. */
-#define          MC_CMD_LOOPBACK_XPORT  0x12
+#define          MC_CMD_LOOPBACK_XPORT 0x12
 /* enum: XGMII-Wireside. */
-#define          MC_CMD_LOOPBACK_XGMII_WS  0x13
+#define          MC_CMD_LOOPBACK_XGMII_WS 0x13
 /* enum: XAUI Wireside. */
-#define          MC_CMD_LOOPBACK_XAUI_WS  0x14
+#define          MC_CMD_LOOPBACK_XAUI_WS 0x14
 /* enum: XAUI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15
+#define          MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15
 /* enum: XAUI Wireside near. */
-#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16
+#define          MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16
 /* enum: GMII Wireside. */
-#define          MC_CMD_LOOPBACK_GMII_WS  0x17
+#define          MC_CMD_LOOPBACK_GMII_WS 0x17
 /* enum: XFI Wireside. */
-#define          MC_CMD_LOOPBACK_XFI_WS  0x18
+#define          MC_CMD_LOOPBACK_XFI_WS 0x18
 /* enum: XFI Wireside Far. */
-#define          MC_CMD_LOOPBACK_XFI_WS_FAR  0x19
+#define          MC_CMD_LOOPBACK_XFI_WS_FAR 0x19
 /* enum: PhyXS Wireside. */
-#define          MC_CMD_LOOPBACK_PHYXS_WS  0x1a
+#define          MC_CMD_LOOPBACK_PHYXS_WS 0x1a
 /* enum: PMA lanes MAC-Serdes. */
-#define          MC_CMD_LOOPBACK_PMA_INT  0x1b
+#define          MC_CMD_LOOPBACK_PMA_INT 0x1b
 /* enum: KR Serdes Parallel (Encoder). */
-#define          MC_CMD_LOOPBACK_SD_NEAR  0x1c
+#define          MC_CMD_LOOPBACK_SD_NEAR 0x1c
 /* enum: KR Serdes Serial. */
-#define          MC_CMD_LOOPBACK_SD_FAR  0x1d
+#define          MC_CMD_LOOPBACK_SD_FAR 0x1d
 /* enum: PMA lanes MAC-Serdes Wireside. */
-#define          MC_CMD_LOOPBACK_PMA_INT_WS  0x1e
+#define          MC_CMD_LOOPBACK_PMA_INT_WS 0x1e
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-#define          MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f
+#define          MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20
+#define          MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-#define          MC_CMD_LOOPBACK_SD_FEP_WS  0x21
+#define          MC_CMD_LOOPBACK_SD_FEP_WS 0x21
 /* enum: KR Serdes Serial Wireside. */
-#define          MC_CMD_LOOPBACK_SD_FES_WS  0x22
+#define          MC_CMD_LOOPBACK_SD_FES_WS 0x22
 /* enum: Near side of AOE Siena side port */
-#define          MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23
+#define          MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23
 /* enum: Medford Wireside datapath loopback */
-#define          MC_CMD_LOOPBACK_DATA_WS  0x24
+#define          MC_CMD_LOOPBACK_DATA_WS 0x24
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25
+#define          MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_1G_LEN 8
@@ -3410,83 +3443,83 @@
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_LO_OFST 0
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_100M_HI_OFST 4
 /* enum: None. */
-/*               MC_CMD_LOOPBACK_NONE  0x0 */
+/*               MC_CMD_LOOPBACK_NONE 0x0 */
 /* enum: Data. */
-/*               MC_CMD_LOOPBACK_DATA  0x1 */
+/*               MC_CMD_LOOPBACK_DATA 0x1 */
 /* enum: GMAC. */
-/*               MC_CMD_LOOPBACK_GMAC  0x2 */
+/*               MC_CMD_LOOPBACK_GMAC 0x2 */
 /* enum: XGMII. */
 /*               MC_CMD_LOOPBACK_XGMII 0x3 */
 /* enum: XGXS. */
-/*               MC_CMD_LOOPBACK_XGXS  0x4 */
+/*               MC_CMD_LOOPBACK_XGXS 0x4 */
 /* enum: XAUI. */
-/*               MC_CMD_LOOPBACK_XAUI  0x5 */
+/*               MC_CMD_LOOPBACK_XAUI 0x5 */
 /* enum: GMII. */
-/*               MC_CMD_LOOPBACK_GMII  0x6 */
+/*               MC_CMD_LOOPBACK_GMII 0x6 */
 /* enum: SGMII. */
-/*               MC_CMD_LOOPBACK_SGMII  0x7 */
+/*               MC_CMD_LOOPBACK_SGMII 0x7 */
 /* enum: XGBR. */
-/*               MC_CMD_LOOPBACK_XGBR  0x8 */
+/*               MC_CMD_LOOPBACK_XGBR 0x8 */
 /* enum: XFI. */
-/*               MC_CMD_LOOPBACK_XFI  0x9 */
+/*               MC_CMD_LOOPBACK_XFI 0x9 */
 /* enum: XAUI Far. */
-/*               MC_CMD_LOOPBACK_XAUI_FAR  0xa */
+/*               MC_CMD_LOOPBACK_XAUI_FAR 0xa */
 /* enum: GMII Far. */
-/*               MC_CMD_LOOPBACK_GMII_FAR  0xb */
+/*               MC_CMD_LOOPBACK_GMII_FAR 0xb */
 /* enum: SGMII Far. */
-/*               MC_CMD_LOOPBACK_SGMII_FAR  0xc */
+/*               MC_CMD_LOOPBACK_SGMII_FAR 0xc */
 /* enum: XFI Far. */
-/*               MC_CMD_LOOPBACK_XFI_FAR  0xd */
+/*               MC_CMD_LOOPBACK_XFI_FAR 0xd */
 /* enum: GPhy. */
-/*               MC_CMD_LOOPBACK_GPHY  0xe */
+/*               MC_CMD_LOOPBACK_GPHY 0xe */
 /* enum: PhyXS. */
-/*               MC_CMD_LOOPBACK_PHYXS  0xf */
+/*               MC_CMD_LOOPBACK_PHYXS 0xf */
 /* enum: PCS. */
-/*               MC_CMD_LOOPBACK_PCS  0x10 */
+/*               MC_CMD_LOOPBACK_PCS 0x10 */
 /* enum: PMA-PMD. */
-/*               MC_CMD_LOOPBACK_PMAPMD  0x11 */
+/*               MC_CMD_LOOPBACK_PMAPMD 0x11 */
 /* enum: Cross-Port. */
-/*               MC_CMD_LOOPBACK_XPORT  0x12 */
+/*               MC_CMD_LOOPBACK_XPORT 0x12 */
 /* enum: XGMII-Wireside. */
-/*               MC_CMD_LOOPBACK_XGMII_WS  0x13 */
+/*               MC_CMD_LOOPBACK_XGMII_WS 0x13 */
 /* enum: XAUI Wireside. */
-/*               MC_CMD_LOOPBACK_XAUI_WS  0x14 */
+/*               MC_CMD_LOOPBACK_XAUI_WS 0x14 */
 /* enum: XAUI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_FAR  0x15 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_FAR 0x15 */
 /* enum: XAUI Wireside near. */
-/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR  0x16 */
+/*               MC_CMD_LOOPBACK_XAUI_WS_NEAR 0x16 */
 /* enum: GMII Wireside. */
-/*               MC_CMD_LOOPBACK_GMII_WS  0x17 */
+/*               MC_CMD_LOOPBACK_GMII_WS 0x17 */
 /* enum: XFI Wireside. */
-/*               MC_CMD_LOOPBACK_XFI_WS  0x18 */
+/*               MC_CMD_LOOPBACK_XFI_WS 0x18 */
 /* enum: XFI Wireside Far. */
-/*               MC_CMD_LOOPBACK_XFI_WS_FAR  0x19 */
+/*               MC_CMD_LOOPBACK_XFI_WS_FAR 0x19 */
 /* enum: PhyXS Wireside. */
-/*               MC_CMD_LOOPBACK_PHYXS_WS  0x1a */
+/*               MC_CMD_LOOPBACK_PHYXS_WS 0x1a */
 /* enum: PMA lanes MAC-Serdes. */
-/*               MC_CMD_LOOPBACK_PMA_INT  0x1b */
+/*               MC_CMD_LOOPBACK_PMA_INT 0x1b */
 /* enum: KR Serdes Parallel (Encoder). */
-/*               MC_CMD_LOOPBACK_SD_NEAR  0x1c */
+/*               MC_CMD_LOOPBACK_SD_NEAR 0x1c */
 /* enum: KR Serdes Serial. */
-/*               MC_CMD_LOOPBACK_SD_FAR  0x1d */
+/*               MC_CMD_LOOPBACK_SD_FAR 0x1d */
 /* enum: PMA lanes MAC-Serdes Wireside. */
-/*               MC_CMD_LOOPBACK_PMA_INT_WS  0x1e */
+/*               MC_CMD_LOOPBACK_PMA_INT_WS 0x1e */
 /* enum: KR Serdes Parallel Wireside (Full PCS). */
-/*               MC_CMD_LOOPBACK_SD_FEP2_WS  0x1f */
+/*               MC_CMD_LOOPBACK_SD_FEP2_WS 0x1f */
 /* enum: KR Serdes Parallel Wireside (Sym Aligner to TX). */
-/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS  0x20 */
+/*               MC_CMD_LOOPBACK_SD_FEP1_5_WS 0x20 */
 /* enum: KR Serdes Parallel Wireside (Deserializer to Serializer). */
-/*               MC_CMD_LOOPBACK_SD_FEP_WS  0x21 */
+/*               MC_CMD_LOOPBACK_SD_FEP_WS 0x21 */
 /* enum: KR Serdes Serial Wireside. */
-/*               MC_CMD_LOOPBACK_SD_FES_WS  0x22 */
+/*               MC_CMD_LOOPBACK_SD_FES_WS 0x22 */
 /* enum: Near side of AOE Siena side port */
-/*               MC_CMD_LOOPBACK_AOE_INT_NEAR  0x23 */
+/*               MC_CMD_LOOPBACK_AOE_INT_NEAR 0x23 */
 /* enum: Medford Wireside datapath loopback */
-/*               MC_CMD_LOOPBACK_DATA_WS  0x24 */
+/*               MC_CMD_LOOPBACK_DATA_WS 0x24 */
 /* enum: Force link up without setting up any physical loopback (snapper use
  * only)
  */
-/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK  0x25 */
+/*               MC_CMD_LOOPBACK_FORCE_EXT_LINK 0x25 */
 /* Supported loopbacks. */
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_OFST 8
 #define       MC_CMD_GET_LOOPBACK_MODES_OUT_V2_1G_LEN 8
@@ -3537,6 +3570,37 @@
 /*            Enum values, see field(s): */
 /*               100M */
 
+/* AN_TYPE structuredef: Auto-negotiation types defined in IEEE802.3 */
+#define    AN_TYPE_LEN 4
+#define       AN_TYPE_TYPE_OFST 0
+#define       AN_TYPE_TYPE_LEN 4
+/* enum: None, AN disabled or not supported */
+#define          MC_CMD_AN_NONE 0x0
+/* enum: Clause 28 - BASE-T */
+#define          MC_CMD_AN_CLAUSE28 0x1
+/* enum: Clause 37 - BASE-X */
+#define          MC_CMD_AN_CLAUSE37 0x2
+/* enum: Clause 73 - BASE-R startup protocol for backplane and copper cable
+ * assemblies. Includes Clause 72/Clause 92 link-training.
+ */
+#define          MC_CMD_AN_CLAUSE73 0x3
+#define       AN_TYPE_TYPE_LBN 0
+#define       AN_TYPE_TYPE_WIDTH 32
+
+/* FEC_TYPE structuredef: Forward error correction types defined in IEEE802.3
+ */
+#define    FEC_TYPE_LEN 4
+#define       FEC_TYPE_TYPE_OFST 0
+#define       FEC_TYPE_TYPE_LEN 4
+/* enum: No FEC */
+#define          MC_CMD_FEC_NONE 0x0
+/* enum: Clause 74 BASE-R FEC (a.k.a Firecode) */
+#define          MC_CMD_FEC_BASER 0x1
+/* enum: Clause 91/Clause 108 Reed-Solomon FEC */
+#define          MC_CMD_FEC_RS 0x2
+#define       FEC_TYPE_TYPE_LBN 0
+#define       FEC_TYPE_TYPE_WIDTH 32
+
 
 /***********************************/
 /* MC_CMD_GET_LINK
@@ -3552,10 +3616,14 @@
 
 /* MC_CMD_GET_LINK_OUT msgresponse */
 #define    MC_CMD_GET_LINK_OUT_LEN 28
-/* near-side advertised capabilities */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_CAP_OFST 0
 #define       MC_CMD_GET_LINK_OUT_CAP_LEN 4
-/* link-partner advertised capabilities */
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_OFST 4
 #define       MC_CMD_GET_LINK_OUT_LP_CAP_LEN 4
 /* Autonegotiated speed in mbit/s. The link may still be down even if this
@@ -3598,6 +3666,97 @@
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3
 #define        MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1
 
+/* MC_CMD_GET_LINK_OUT_V2 msgresponse: Extended link state information */
+#define    MC_CMD_GET_LINK_OUT_V2_LEN 44
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_OFST 0
+#define       MC_CMD_GET_LINK_OUT_V2_CAP_LEN 4
+/* Link-partner advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_OFST 4
+#define       MC_CMD_GET_LINK_OUT_V2_LP_CAP_LEN 4
+/* Autonegotiated speed in mbit/s. The link may still be down even if this
+ * reads non-zero.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_OFST 8
+#define       MC_CMD_GET_LINK_OUT_V2_LINK_SPEED_LEN 4
+/* Current loopback setting. */
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_OFST 12
+#define       MC_CMD_GET_LINK_OUT_V2_LOOPBACK_MODE_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_GET_LOOPBACK_MODES/MC_CMD_GET_LOOPBACK_MODES_OUT/100M */
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_OFST 16
+#define       MC_CMD_GET_LINK_OUT_V2_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_FULL_DUPLEX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_BPX_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PHY_LINK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_RX_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_LINK_FAULT_TX_WIDTH 1
+/* This returns the negotiated flow control value. */
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_OFST 20
+#define       MC_CMD_GET_LINK_OUT_V2_FCNTL_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_SET_MAC/MC_CMD_SET_MAC_IN/FCNTL */
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_OFST 24
+#define       MC_CMD_GET_LINK_OUT_V2_MAC_FAULT_LEN 4
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_LBN 0 */
+/*             MC_CMD_MAC_FAULT_XGMII_LOCAL_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_LBN 1 */
+/*             MC_CMD_MAC_FAULT_XGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_LBN 2 */
+/*             MC_CMD_MAC_FAULT_SGMII_REMOTE_WIDTH 1 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_LBN 3 */
+/*             MC_CMD_MAC_FAULT_PENDING_RECONFIG_WIDTH 1 */
+/* True local device capabilities (taking into account currently used PMD/MDI,
+ * e.g. plugged-in module). In general, subset of
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP, but may include extra _FEC_REQUEST
+ * bits, if the PMD requires FEC. 0 if unknown (e.g. module unplugged). Equal
+ * to SUPPORTED_CAP for non-pluggable PMDs. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_OFST 28
+#define       MC_CMD_GET_LINK_OUT_V2_LD_CAP_LEN 4
+/* Auto-negotiation type used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_OFST 32
+#define       MC_CMD_GET_LINK_OUT_V2_AN_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               AN_TYPE/TYPE */
+/* Forward error correction used on the link */
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_OFST 36
+#define       MC_CMD_GET_LINK_OUT_V2_FEC_TYPE_LEN 4
+/*            Enum values, see field(s): */
+/*               FEC_TYPE/TYPE */
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_OFST 40
+#define       MC_CMD_GET_LINK_OUT_V2_EXT_FLAGS_LEN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_LBN 0
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_MDI_CONNECTED_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_LBN 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_READY_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_LBN 2
+#define        MC_CMD_GET_LINK_OUT_V2_PMD_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_LBN 3
+#define        MC_CMD_GET_LINK_OUT_V2_PMA_LINK_UP_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_LBN 4
+#define        MC_CMD_GET_LINK_OUT_V2_PCS_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_LBN 5
+#define        MC_CMD_GET_LINK_OUT_V2_ALIGN_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_LBN 6
+#define        MC_CMD_GET_LINK_OUT_V2_HI_BER_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_LBN 7
+#define        MC_CMD_GET_LINK_OUT_V2_FEC_LOCK_WIDTH 1
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_LBN 8
+#define        MC_CMD_GET_LINK_OUT_V2_AN_DONE_WIDTH 1
+
 
 /***********************************/
 /* MC_CMD_SET_LINK
@@ -3610,7 +3769,9 @@
 
 /* MC_CMD_SET_LINK_IN msgrequest */
 #define    MC_CMD_SET_LINK_IN_LEN 16
-/* ??? */
+/* Near-side advertised capabilities. Refer to
+ * MC_CMD_GET_PHY_CFG_OUT/SUPPORTED_CAP for bit definitions.
+ */
 #define       MC_CMD_SET_LINK_IN_CAP_OFST 0
 #define       MC_CMD_SET_LINK_IN_CAP_LEN 4
 /* Flags */
@@ -3650,9 +3811,9 @@
 /* Set LED state. */
 #define       MC_CMD_SET_ID_LED_IN_STATE_OFST 0
 #define       MC_CMD_SET_ID_LED_IN_STATE_LEN 4
-#define          MC_CMD_LED_OFF  0x0 /* enum */
-#define          MC_CMD_LED_ON  0x1 /* enum */
-#define          MC_CMD_LED_DEFAULT  0x2 /* enum */
+#define          MC_CMD_LED_OFF 0x0 /* enum */
+#define          MC_CMD_LED_ON 0x1 /* enum */
+#define          MC_CMD_LED_DEFAULT 0x2 /* enum */
 
 /* MC_CMD_SET_ID_LED_OUT msgresponse */
 #define    MC_CMD_SET_ID_LED_OUT_LEN 0
@@ -3802,53 +3963,53 @@
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_LEN 4
 #define       MC_CMD_PHY_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_PHY_NSTATS
 /* enum: OUI. */
-#define          MC_CMD_OUI  0x0
+#define          MC_CMD_OUI 0x0
 /* enum: PMA-PMD Link Up. */
-#define          MC_CMD_PMA_PMD_LINK_UP  0x1
+#define          MC_CMD_PMA_PMD_LINK_UP 0x1
 /* enum: PMA-PMD RX Fault. */
-#define          MC_CMD_PMA_PMD_RX_FAULT  0x2
+#define          MC_CMD_PMA_PMD_RX_FAULT 0x2
 /* enum: PMA-PMD TX Fault. */
-#define          MC_CMD_PMA_PMD_TX_FAULT  0x3
+#define          MC_CMD_PMA_PMD_TX_FAULT 0x3
 /* enum: PMA-PMD Signal */
-#define          MC_CMD_PMA_PMD_SIGNAL  0x4
+#define          MC_CMD_PMA_PMD_SIGNAL 0x4
 /* enum: PMA-PMD SNR A. */
-#define          MC_CMD_PMA_PMD_SNR_A  0x5
+#define          MC_CMD_PMA_PMD_SNR_A 0x5
 /* enum: PMA-PMD SNR B. */
-#define          MC_CMD_PMA_PMD_SNR_B  0x6
+#define          MC_CMD_PMA_PMD_SNR_B 0x6
 /* enum: PMA-PMD SNR C. */
-#define          MC_CMD_PMA_PMD_SNR_C  0x7
+#define          MC_CMD_PMA_PMD_SNR_C 0x7
 /* enum: PMA-PMD SNR D. */
-#define          MC_CMD_PMA_PMD_SNR_D  0x8
+#define          MC_CMD_PMA_PMD_SNR_D 0x8
 /* enum: PCS Link Up. */
-#define          MC_CMD_PCS_LINK_UP  0x9
+#define          MC_CMD_PCS_LINK_UP 0x9
 /* enum: PCS RX Fault. */
-#define          MC_CMD_PCS_RX_FAULT  0xa
+#define          MC_CMD_PCS_RX_FAULT 0xa
 /* enum: PCS TX Fault. */
-#define          MC_CMD_PCS_TX_FAULT  0xb
+#define          MC_CMD_PCS_TX_FAULT 0xb
 /* enum: PCS BER. */
-#define          MC_CMD_PCS_BER  0xc
+#define          MC_CMD_PCS_BER 0xc
 /* enum: PCS Block Errors. */
-#define          MC_CMD_PCS_BLOCK_ERRORS  0xd
+#define          MC_CMD_PCS_BLOCK_ERRORS 0xd
 /* enum: PhyXS Link Up. */
-#define          MC_CMD_PHYXS_LINK_UP  0xe
+#define          MC_CMD_PHYXS_LINK_UP 0xe
 /* enum: PhyXS RX Fault. */
-#define          MC_CMD_PHYXS_RX_FAULT  0xf
+#define          MC_CMD_PHYXS_RX_FAULT 0xf
 /* enum: PhyXS TX Fault. */
-#define          MC_CMD_PHYXS_TX_FAULT  0x10
+#define          MC_CMD_PHYXS_TX_FAULT 0x10
 /* enum: PhyXS Align. */
-#define          MC_CMD_PHYXS_ALIGN  0x11
+#define          MC_CMD_PHYXS_ALIGN 0x11
 /* enum: PhyXS Sync. */
-#define          MC_CMD_PHYXS_SYNC  0x12
+#define          MC_CMD_PHYXS_SYNC 0x12
 /* enum: AN link-up. */
-#define          MC_CMD_AN_LINK_UP  0x13
+#define          MC_CMD_AN_LINK_UP 0x13
 /* enum: AN Complete. */
-#define          MC_CMD_AN_COMPLETE  0x14
+#define          MC_CMD_AN_COMPLETE 0x14
 /* enum: AN 10GBaseT Status. */
-#define          MC_CMD_AN_10GBT_STATUS  0x15
+#define          MC_CMD_AN_10GBT_STATUS 0x15
 /* enum: Clause 22 Link-Up. */
-#define          MC_CMD_CL22_LINK_UP  0x16
+#define          MC_CMD_CL22_LINK_UP 0x16
 /* enum: (Last entry) */
-#define          MC_CMD_PHY_NSTATS  0x17
+#define          MC_CMD_PHY_NSTATS 0x17
 
 
 /***********************************/
@@ -3910,139 +4071,139 @@
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_LO_OFST 0
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS
-#define          MC_CMD_MAC_GENERATION_START  0x0 /* enum */
-#define          MC_CMD_MAC_DMABUF_START  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PKTS  0x1 /* enum */
-#define          MC_CMD_MAC_TX_PAUSE_PKTS  0x2 /* enum */
-#define          MC_CMD_MAC_TX_CONTROL_PKTS  0x3 /* enum */
-#define          MC_CMD_MAC_TX_UNICAST_PKTS  0x4 /* enum */
-#define          MC_CMD_MAC_TX_MULTICAST_PKTS  0x5 /* enum */
-#define          MC_CMD_MAC_TX_BROADCAST_PKTS  0x6 /* enum */
-#define          MC_CMD_MAC_TX_BYTES  0x7 /* enum */
-#define          MC_CMD_MAC_TX_BAD_BYTES  0x8 /* enum */
-#define          MC_CMD_MAC_TX_LT64_PKTS  0x9 /* enum */
-#define          MC_CMD_MAC_TX_64_PKTS  0xa /* enum */
-#define          MC_CMD_MAC_TX_65_TO_127_PKTS  0xb /* enum */
-#define          MC_CMD_MAC_TX_128_TO_255_PKTS  0xc /* enum */
-#define          MC_CMD_MAC_TX_256_TO_511_PKTS  0xd /* enum */
-#define          MC_CMD_MAC_TX_512_TO_1023_PKTS  0xe /* enum */
-#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS  0xf /* enum */
-#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS  0x10 /* enum */
-#define          MC_CMD_MAC_TX_GTJUMBO_PKTS  0x11 /* enum */
-#define          MC_CMD_MAC_TX_BAD_FCS_PKTS  0x12 /* enum */
-#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS  0x13 /* enum */
-#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS  0x14 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS  0x15 /* enum */
-#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS  0x16 /* enum */
-#define          MC_CMD_MAC_TX_DEFERRED_PKTS  0x17 /* enum */
-#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS  0x18 /* enum */
-#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS  0x19 /* enum */
-#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS  0x1a /* enum */
-#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS  0x1b /* enum */
-#define          MC_CMD_MAC_RX_PKTS  0x1c /* enum */
-#define          MC_CMD_MAC_RX_PAUSE_PKTS  0x1d /* enum */
-#define          MC_CMD_MAC_RX_GOOD_PKTS  0x1e /* enum */
-#define          MC_CMD_MAC_RX_CONTROL_PKTS  0x1f /* enum */
-#define          MC_CMD_MAC_RX_UNICAST_PKTS  0x20 /* enum */
-#define          MC_CMD_MAC_RX_MULTICAST_PKTS  0x21 /* enum */
-#define          MC_CMD_MAC_RX_BROADCAST_PKTS  0x22 /* enum */
-#define          MC_CMD_MAC_RX_BYTES  0x23 /* enum */
-#define          MC_CMD_MAC_RX_BAD_BYTES  0x24 /* enum */
-#define          MC_CMD_MAC_RX_64_PKTS  0x25 /* enum */
-#define          MC_CMD_MAC_RX_65_TO_127_PKTS  0x26 /* enum */
-#define          MC_CMD_MAC_RX_128_TO_255_PKTS  0x27 /* enum */
-#define          MC_CMD_MAC_RX_256_TO_511_PKTS  0x28 /* enum */
-#define          MC_CMD_MAC_RX_512_TO_1023_PKTS  0x29 /* enum */
-#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS  0x2a /* enum */
-#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS  0x2b /* enum */
-#define          MC_CMD_MAC_RX_GTJUMBO_PKTS  0x2c /* enum */
-#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS  0x2d /* enum */
-#define          MC_CMD_MAC_RX_BAD_FCS_PKTS  0x2e /* enum */
-#define          MC_CMD_MAC_RX_OVERFLOW_PKTS  0x2f /* enum */
-#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS  0x30 /* enum */
-#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS  0x31 /* enum */
-#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS  0x32 /* enum */
-#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS  0x33 /* enum */
-#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS  0x34 /* enum */
-#define          MC_CMD_MAC_RX_JABBER_PKTS  0x35 /* enum */
-#define          MC_CMD_MAC_RX_NODESC_DROPS  0x36 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR  0x37 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR  0x38 /* enum */
-#define          MC_CMD_MAC_RX_LANES01_DISP_ERR  0x39 /* enum */
-#define          MC_CMD_MAC_RX_LANES23_DISP_ERR  0x3a /* enum */
-#define          MC_CMD_MAC_RX_MATCH_FAULT  0x3b /* enum */
+#define          MC_CMD_MAC_GENERATION_START 0x0 /* enum */
+#define          MC_CMD_MAC_DMABUF_START 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PKTS 0x1 /* enum */
+#define          MC_CMD_MAC_TX_PAUSE_PKTS 0x2 /* enum */
+#define          MC_CMD_MAC_TX_CONTROL_PKTS 0x3 /* enum */
+#define          MC_CMD_MAC_TX_UNICAST_PKTS 0x4 /* enum */
+#define          MC_CMD_MAC_TX_MULTICAST_PKTS 0x5 /* enum */
+#define          MC_CMD_MAC_TX_BROADCAST_PKTS 0x6 /* enum */
+#define          MC_CMD_MAC_TX_BYTES 0x7 /* enum */
+#define          MC_CMD_MAC_TX_BAD_BYTES 0x8 /* enum */
+#define          MC_CMD_MAC_TX_LT64_PKTS 0x9 /* enum */
+#define          MC_CMD_MAC_TX_64_PKTS 0xa /* enum */
+#define          MC_CMD_MAC_TX_65_TO_127_PKTS 0xb /* enum */
+#define          MC_CMD_MAC_TX_128_TO_255_PKTS 0xc /* enum */
+#define          MC_CMD_MAC_TX_256_TO_511_PKTS 0xd /* enum */
+#define          MC_CMD_MAC_TX_512_TO_1023_PKTS 0xe /* enum */
+#define          MC_CMD_MAC_TX_1024_TO_15XX_PKTS 0xf /* enum */
+#define          MC_CMD_MAC_TX_15XX_TO_JUMBO_PKTS 0x10 /* enum */
+#define          MC_CMD_MAC_TX_GTJUMBO_PKTS 0x11 /* enum */
+#define          MC_CMD_MAC_TX_BAD_FCS_PKTS 0x12 /* enum */
+#define          MC_CMD_MAC_TX_SINGLE_COLLISION_PKTS 0x13 /* enum */
+#define          MC_CMD_MAC_TX_MULTIPLE_COLLISION_PKTS 0x14 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_COLLISION_PKTS 0x15 /* enum */
+#define          MC_CMD_MAC_TX_LATE_COLLISION_PKTS 0x16 /* enum */
+#define          MC_CMD_MAC_TX_DEFERRED_PKTS 0x17 /* enum */
+#define          MC_CMD_MAC_TX_EXCESSIVE_DEFERRED_PKTS 0x18 /* enum */
+#define          MC_CMD_MAC_TX_NON_TCPUDP_PKTS 0x19 /* enum */
+#define          MC_CMD_MAC_TX_MAC_SRC_ERR_PKTS 0x1a /* enum */
+#define          MC_CMD_MAC_TX_IP_SRC_ERR_PKTS 0x1b /* enum */
+#define          MC_CMD_MAC_RX_PKTS 0x1c /* enum */
+#define          MC_CMD_MAC_RX_PAUSE_PKTS 0x1d /* enum */
+#define          MC_CMD_MAC_RX_GOOD_PKTS 0x1e /* enum */
+#define          MC_CMD_MAC_RX_CONTROL_PKTS 0x1f /* enum */
+#define          MC_CMD_MAC_RX_UNICAST_PKTS 0x20 /* enum */
+#define          MC_CMD_MAC_RX_MULTICAST_PKTS 0x21 /* enum */
+#define          MC_CMD_MAC_RX_BROADCAST_PKTS 0x22 /* enum */
+#define          MC_CMD_MAC_RX_BYTES 0x23 /* enum */
+#define          MC_CMD_MAC_RX_BAD_BYTES 0x24 /* enum */
+#define          MC_CMD_MAC_RX_64_PKTS 0x25 /* enum */
+#define          MC_CMD_MAC_RX_65_TO_127_PKTS 0x26 /* enum */
+#define          MC_CMD_MAC_RX_128_TO_255_PKTS 0x27 /* enum */
+#define          MC_CMD_MAC_RX_256_TO_511_PKTS 0x28 /* enum */
+#define          MC_CMD_MAC_RX_512_TO_1023_PKTS 0x29 /* enum */
+#define          MC_CMD_MAC_RX_1024_TO_15XX_PKTS 0x2a /* enum */
+#define          MC_CMD_MAC_RX_15XX_TO_JUMBO_PKTS 0x2b /* enum */
+#define          MC_CMD_MAC_RX_GTJUMBO_PKTS 0x2c /* enum */
+#define          MC_CMD_MAC_RX_UNDERSIZE_PKTS 0x2d /* enum */
+#define          MC_CMD_MAC_RX_BAD_FCS_PKTS 0x2e /* enum */
+#define          MC_CMD_MAC_RX_OVERFLOW_PKTS 0x2f /* enum */
+#define          MC_CMD_MAC_RX_FALSE_CARRIER_PKTS 0x30 /* enum */
+#define          MC_CMD_MAC_RX_SYMBOL_ERROR_PKTS 0x31 /* enum */
+#define          MC_CMD_MAC_RX_ALIGN_ERROR_PKTS 0x32 /* enum */
+#define          MC_CMD_MAC_RX_LENGTH_ERROR_PKTS 0x33 /* enum */
+#define          MC_CMD_MAC_RX_INTERNAL_ERROR_PKTS 0x34 /* enum */
+#define          MC_CMD_MAC_RX_JABBER_PKTS 0x35 /* enum */
+#define          MC_CMD_MAC_RX_NODESC_DROPS 0x36 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_CHAR_ERR 0x37 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_CHAR_ERR 0x38 /* enum */
+#define          MC_CMD_MAC_RX_LANES01_DISP_ERR 0x39 /* enum */
+#define          MC_CMD_MAC_RX_LANES23_DISP_ERR 0x3a /* enum */
+#define          MC_CMD_MAC_RX_MATCH_FAULT 0x3b /* enum */
 /* enum: PM trunc_bb_overflow counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW  0x3c
+#define          MC_CMD_MAC_PM_TRUNC_BB_OVERFLOW 0x3c
 /* enum: PM discard_bb_overflow counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW  0x3d
+#define          MC_CMD_MAC_PM_DISCARD_BB_OVERFLOW 0x3d
 /* enum: PM trunc_vfifo_full counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL  0x3e
+#define          MC_CMD_MAC_PM_TRUNC_VFIFO_FULL 0x3e
 /* enum: PM discard_vfifo_full counter. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL  0x3f
+#define          MC_CMD_MAC_PM_DISCARD_VFIFO_FULL 0x3f
 /* enum: PM trunc_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_TRUNC_QBB  0x40
+#define          MC_CMD_MAC_PM_TRUNC_QBB 0x40
 /* enum: PM discard_qbb counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_QBB  0x41
+#define          MC_CMD_MAC_PM_DISCARD_QBB 0x41
 /* enum: PM discard_mapping counter. Valid for EF10 with PM_AND_RXDP_COUNTERS
  * capability only.
  */
-#define          MC_CMD_MAC_PM_DISCARD_MAPPING  0x42
+#define          MC_CMD_MAC_PM_DISCARD_MAPPING 0x42
 /* enum: RXDP counter: Number of packets dropped due to the queue being
  * disabled. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS  0x43
+#define          MC_CMD_MAC_RXDP_Q_DISABLED_PKTS 0x43
 /* enum: RXDP counter: Number of packets dropped by the DICPU. Valid for EF10
  * with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS  0x45
+#define          MC_CMD_MAC_RXDP_DI_DROPPED_PKTS 0x45
 /* enum: RXDP counter: Number of non-host packets. Valid for EF10 with
  * PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_STREAMING_PKTS  0x46
+#define          MC_CMD_MAC_RXDP_STREAMING_PKTS 0x46
 /* enum: RXDP counter: Number of times an hlb descriptor fetch was performed.
  * Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS  0x47
+#define          MC_CMD_MAC_RXDP_HLB_FETCH_CONDITIONS 0x47
 /* enum: RXDP counter: Number of times the DPCPU waited for an existing
  * descriptor fetch. Valid for EF10 with PM_AND_RXDP_COUNTERS capability only.
  */
-#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS  0x48
-#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS  0x4c /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES  0x4d /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS  0x4e /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES  0x4f /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS  0x50 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES  0x51 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS  0x52 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES  0x53 /* enum */
-#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW  0x54 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS  0x57 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES  0x58 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS  0x59 /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES  0x5a /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS  0x5b /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES  0x5c /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS  0x5d /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES  0x5e /* enum */
-#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW  0x5f /* enum */
+#define          MC_CMD_MAC_RXDP_HLB_WAIT_CONDITIONS 0x48
+#define          MC_CMD_MAC_VADAPTER_RX_DMABUF_START 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_PACKETS 0x4c /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_UNICAST_BYTES 0x4d /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_PACKETS 0x4e /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_MULTICAST_BYTES 0x4f /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_PACKETS 0x50 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BROADCAST_BYTES 0x51 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_PACKETS 0x52 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_BAD_BYTES 0x53 /* enum */
+#define          MC_CMD_MAC_VADAPTER_RX_OVERFLOW 0x54 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_DMABUF_START 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_PACKETS 0x57 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_UNICAST_BYTES 0x58 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_PACKETS 0x59 /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_MULTICAST_BYTES 0x5a /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_PACKETS 0x5b /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BROADCAST_BYTES 0x5c /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_PACKETS 0x5d /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_BAD_BYTES 0x5e /* enum */
+#define          MC_CMD_MAC_VADAPTER_TX_OVERFLOW 0x5f /* enum */
 /* enum: Start of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_START  0x40
+#define          MC_CMD_GMAC_DMABUF_START 0x40
 /* enum: End of GMAC stats buffer space, for Siena only. */
-#define          MC_CMD_GMAC_DMABUF_END    0x5f
+#define          MC_CMD_GMAC_DMABUF_END 0x5f
 /* enum: GENERATION_END value, used together with GENERATION_START to verify
  * consistency of DMAd data. For legacy firmware / drivers without extended
  * stats (more precisely, when DMA_LEN == MC_CMD_MAC_NSTATS *
@@ -4054,7 +4215,7 @@
  * sizeof(uint64_t). See SF-109306-TC, Section 9.2 for details.
  */
 #define          MC_CMD_MAC_GENERATION_END 0x60
-#define          MC_CMD_MAC_NSTATS  0x61 /* enum */
+#define          MC_CMD_MAC_NSTATS 0x61 /* enum */
 
 /* MC_CMD_MAC_STATS_V2_OUT_DMA msgresponse */
 #define    MC_CMD_MAC_STATS_V2_OUT_DMA_LEN 0
@@ -4067,25 +4228,25 @@
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V2_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V2
 /* enum: Start of FEC stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_FEC_DMABUF_START  0x61
+#define          MC_CMD_MAC_FEC_DMABUF_START 0x61
 /* enum: Number of uncorrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS  0x61
+#define          MC_CMD_MAC_FEC_UNCORRECTED_ERRORS 0x61
 /* enum: Number of corrected FEC codewords on link (RS-FEC only for Medford2)
  */
-#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS  0x62
+#define          MC_CMD_MAC_FEC_CORRECTED_ERRORS 0x62
 /* enum: Number of corrected 10-bit symbol errors, lane 0 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0  0x63
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE0 0x63
 /* enum: Number of corrected 10-bit symbol errors, lane 1 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1  0x64
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE1 0x64
 /* enum: Number of corrected 10-bit symbol errors, lane 2 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2  0x65
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE2 0x65
 /* enum: Number of corrected 10-bit symbol errors, lane 3 (RS-FEC only) */
-#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3  0x66
+#define          MC_CMD_MAC_FEC_CORRECTED_SYMBOLS_LANE3 0x66
 /* enum: This includes the space at offset 103 which is the final
  * GENERATION_END in a MAC_STATS_V2 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V2  0x68
+#define          MC_CMD_MAC_NSTATS_V2 0x68
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_OUT_NO_DMA/STATISTICS */
 
@@ -4100,66 +4261,66 @@
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_HI_OFST 4
 #define       MC_CMD_MAC_STATS_V3_OUT_NO_DMA_STATISTICS_NUM MC_CMD_MAC_NSTATS_V3
 /* enum: Start of CTPIO stats buffer space, Medford2 and up */
-#define          MC_CMD_MAC_CTPIO_DMABUF_START  0x68
+#define          MC_CMD_MAC_CTPIO_DMABUF_START 0x68
 /* enum: Number of CTPIO fallbacks because a DMA packet was in progress on the
  * target VI
  */
-#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK  0x68
+#define          MC_CMD_MAC_CTPIO_VI_BUSY_FALLBACK 0x68
 /* enum: Number of times a CTPIO send wrote beyond frame end (informational
  * only)
  */
-#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS  0x69
+#define          MC_CMD_MAC_CTPIO_LONG_WRITE_SUCCESS 0x69
 /* enum: Number of CTPIO failures because the TX doorbell was written before
  * the end of the frame data
  */
-#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL  0x6a
+#define          MC_CMD_MAC_CTPIO_MISSING_DBELL_FAIL 0x6a
 /* enum: Number of CTPIO failures because the internal FIFO overflowed */
-#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL  0x6b
+#define          MC_CMD_MAC_CTPIO_OVERFLOW_FAIL 0x6b
 /* enum: Number of CTPIO failures because the host did not deliver data fast
  * enough to avoid MAC underflow
  */
-#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL  0x6c
+#define          MC_CMD_MAC_CTPIO_UNDERFLOW_FAIL 0x6c
 /* enum: Number of CTPIO failures because the host did not deliver all the
  * frame data within the timeout
  */
-#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL  0x6d
+#define          MC_CMD_MAC_CTPIO_TIMEOUT_FAIL 0x6d
 /* enum: Number of CTPIO failures because the frame data arrived out of order
  * or with gaps
  */
-#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL  0x6e
+#define          MC_CMD_MAC_CTPIO_NONCONTIG_WR_FAIL 0x6e
 /* enum: Number of CTPIO failures because the host started a new frame before
  * completing the previous one
  */
-#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL  0x6f
+#define          MC_CMD_MAC_CTPIO_FRM_CLOBBER_FAIL 0x6f
 /* enum: Number of CTPIO failures because a write was not a multiple of 32 bits
  * or not 32-bit aligned
  */
-#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL  0x70
+#define          MC_CMD_MAC_CTPIO_INVALID_WR_FAIL 0x70
 /* enum: Number of CTPIO fallbacks because another VI on the same port was
  * sending a CTPIO frame
  */
-#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK  0x71
+#define          MC_CMD_MAC_CTPIO_VI_CLOBBER_FALLBACK 0x71
 /* enum: Number of CTPIO fallbacks because target VI did not have CTPIO enabled
  */
-#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK  0x72
+#define          MC_CMD_MAC_CTPIO_UNQUALIFIED_FALLBACK 0x72
 /* enum: Number of CTPIO fallbacks because length in header was less than 29
  * bytes
  */
-#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK  0x73
+#define          MC_CMD_MAC_CTPIO_RUNT_FALLBACK 0x73
 /* enum: Total number of successful CTPIO sends on this port */
-#define          MC_CMD_MAC_CTPIO_SUCCESS  0x74
+#define          MC_CMD_MAC_CTPIO_SUCCESS 0x74
 /* enum: Total number of CTPIO fallbacks on this port */
-#define          MC_CMD_MAC_CTPIO_FALLBACK  0x75
+#define          MC_CMD_MAC_CTPIO_FALLBACK 0x75
 /* enum: Total number of CTPIO poisoned frames on this port, whether erased or
  * not
  */
-#define          MC_CMD_MAC_CTPIO_POISON  0x76
+#define          MC_CMD_MAC_CTPIO_POISON 0x76
 /* enum: Total number of CTPIO erased frames on this port */
-#define          MC_CMD_MAC_CTPIO_ERASE  0x77
+#define          MC_CMD_MAC_CTPIO_ERASE 0x77
 /* enum: This includes the space at offset 120 which is the final
  * GENERATION_END in a MAC_STATS_V3 response and otherwise unused.
  */
-#define          MC_CMD_MAC_NSTATS_V3  0x79
+#define          MC_CMD_MAC_NSTATS_V3 0x79
 /*            Other enum values, see field(s): */
 /*               MC_CMD_MAC_STATS_V2_OUT_NO_DMA/STATISTICS */
 
@@ -4268,25 +4429,25 @@
 #define    MC_CMD_WOL_FILTER_SET_IN_LEN 192
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_OFST 0
 #define       MC_CMD_WOL_FILTER_SET_IN_FILTER_MODE_LEN 4
-#define          MC_CMD_FILTER_MODE_SIMPLE    0x0 /* enum */
+#define          MC_CMD_FILTER_MODE_SIMPLE 0x0 /* enum */
 #define          MC_CMD_FILTER_MODE_STRUCTURED 0xffffffff /* enum */
 /* A type value of 1 is unused. */
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_OFST 4
 #define       MC_CMD_WOL_FILTER_SET_IN_WOL_TYPE_LEN 4
 /* enum: Magic */
-#define          MC_CMD_WOL_TYPE_MAGIC      0x0
+#define          MC_CMD_WOL_TYPE_MAGIC 0x0
 /* enum: MS Windows Magic */
 #define          MC_CMD_WOL_TYPE_WIN_MAGIC 0x2
 /* enum: IPv4 Syn */
-#define          MC_CMD_WOL_TYPE_IPV4_SYN   0x3
+#define          MC_CMD_WOL_TYPE_IPV4_SYN 0x3
 /* enum: IPv6 Syn */
-#define          MC_CMD_WOL_TYPE_IPV6_SYN   0x4
+#define          MC_CMD_WOL_TYPE_IPV6_SYN 0x4
 /* enum: Bitmap */
-#define          MC_CMD_WOL_TYPE_BITMAP     0x5
+#define          MC_CMD_WOL_TYPE_BITMAP 0x5
 /* enum: Link */
-#define          MC_CMD_WOL_TYPE_LINK       0x6
+#define          MC_CMD_WOL_TYPE_LINK 0x6
 /* enum: (Above this for future use) */
-#define          MC_CMD_WOL_TYPE_MAX        0x7
+#define          MC_CMD_WOL_TYPE_MAX 0x7
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_OFST 8
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_LEN 4
 #define       MC_CMD_WOL_FILTER_SET_IN_DATA_NUM 46
@@ -4515,6 +4676,8 @@
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
@@ -4542,6 +4705,8 @@
 #define        MC_CMD_NVRAM_INFO_V2_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_LBN 2
+#define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_IF_TSA_BOUND_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_LBN 5
 #define        MC_CMD_NVRAM_INFO_V2_OUT_READ_ONLY_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_V2_OUT_A_B_LBN 7
@@ -4560,7 +4725,11 @@
 /* MC_CMD_NVRAM_UPDATE_START
  * Start a group of update operations on a virtual NVRAM partition. Locks
  * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type), EACCES (if
- * PHY_LOCK required and not held).
+ * PHY_LOCK required and not held). In an adapter bound to a TSA controller,
+ * MC_CMD_NVRAM_UPDATE_START can only be used on a subset of partition types
+ * i.e. static config, dynamic config and expansion ROM config. Attempting to
+ * perform this operation on a restricted partition will return the error
+ * EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_START 0x38
 
@@ -4720,8 +4889,12 @@
 /***********************************/
 /* MC_CMD_NVRAM_UPDATE_FINISH
  * Finish a group of update operations on a virtual NVRAM partition. Locks
- * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad
- * type/offset/length), EACCES (if PHY_LOCK required and not held)
+ * required: PHY_LOCK if type==*PHY*. Returns: 0, EINVAL (bad type/offset/
+ * length), EACCES (if PHY_LOCK required and not held). In an adapter bound to
+ * a TSA controller, MC_CMD_NVRAM_UPDATE_FINISH can only be used on a subset of
+ * partition types i.e. static config, dynamic config and expansion ROM config.
+ * Attempting to perform this operation on a restricted partition will return
+ * the error EPERM.
  */
 #define MC_CMD_NVRAM_UPDATE_FINISH 0x3c
 
@@ -4958,181 +5131,181 @@
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_OFST 0
 #define       MC_CMD_SENSOR_INFO_OUT_MASK_LEN 4
 /* enum: Controller temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TEMP  0x0
+#define          MC_CMD_SENSOR_CONTROLLER_TEMP 0x0
 /* enum: Phy common temperature: degC */
-#define          MC_CMD_SENSOR_PHY_COMMON_TEMP  0x1
+#define          MC_CMD_SENSOR_PHY_COMMON_TEMP 0x1
 /* enum: Controller cooling: bool */
-#define          MC_CMD_SENSOR_CONTROLLER_COOLING  0x2
+#define          MC_CMD_SENSOR_CONTROLLER_COOLING 0x2
 /* enum: Phy 0 temperature: degC */
-#define          MC_CMD_SENSOR_PHY0_TEMP  0x3
+#define          MC_CMD_SENSOR_PHY0_TEMP 0x3
 /* enum: Phy 0 cooling: bool */
-#define          MC_CMD_SENSOR_PHY0_COOLING  0x4
+#define          MC_CMD_SENSOR_PHY0_COOLING 0x4
 /* enum: Phy 1 temperature: degC */
-#define          MC_CMD_SENSOR_PHY1_TEMP  0x5
+#define          MC_CMD_SENSOR_PHY1_TEMP 0x5
 /* enum: Phy 1 cooling: bool */
-#define          MC_CMD_SENSOR_PHY1_COOLING  0x6
+#define          MC_CMD_SENSOR_PHY1_COOLING 0x6
 /* enum: 1.0v power: mV */
-#define          MC_CMD_SENSOR_IN_1V0  0x7
+#define          MC_CMD_SENSOR_IN_1V0 0x7
 /* enum: 1.2v power: mV */
-#define          MC_CMD_SENSOR_IN_1V2  0x8
+#define          MC_CMD_SENSOR_IN_1V2 0x8
 /* enum: 1.8v power: mV */
-#define          MC_CMD_SENSOR_IN_1V8  0x9
+#define          MC_CMD_SENSOR_IN_1V8 0x9
 /* enum: 2.5v power: mV */
-#define          MC_CMD_SENSOR_IN_2V5  0xa
+#define          MC_CMD_SENSOR_IN_2V5 0xa
 /* enum: 3.3v power: mV */
-#define          MC_CMD_SENSOR_IN_3V3  0xb
+#define          MC_CMD_SENSOR_IN_3V3 0xb
 /* enum: 12v power: mV */
-#define          MC_CMD_SENSOR_IN_12V0  0xc
+#define          MC_CMD_SENSOR_IN_12V0 0xc
 /* enum: 1.2v analogue power: mV */
-#define          MC_CMD_SENSOR_IN_1V2A  0xd
+#define          MC_CMD_SENSOR_IN_1V2A 0xd
 /* enum: reference voltage: mV */
-#define          MC_CMD_SENSOR_IN_VREF  0xe
+#define          MC_CMD_SENSOR_IN_VREF 0xe
 /* enum: AOE FPGA power: mV */
-#define          MC_CMD_SENSOR_OUT_VAOE  0xf
+#define          MC_CMD_SENSOR_OUT_VAOE 0xf
 /* enum: AOE FPGA temperature: degC */
-#define          MC_CMD_SENSOR_AOE_TEMP  0x10
+#define          MC_CMD_SENSOR_AOE_TEMP 0x10
 /* enum: AOE FPGA PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_AOE_TEMP  0x11
+#define          MC_CMD_SENSOR_PSU_AOE_TEMP 0x11
 /* enum: AOE PSU temperature: degC */
-#define          MC_CMD_SENSOR_PSU_TEMP  0x12
+#define          MC_CMD_SENSOR_PSU_TEMP 0x12
 /* enum: Fan 0 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_0  0x13
+#define          MC_CMD_SENSOR_FAN_0 0x13
 /* enum: Fan 1 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_1  0x14
+#define          MC_CMD_SENSOR_FAN_1 0x14
 /* enum: Fan 2 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_2  0x15
+#define          MC_CMD_SENSOR_FAN_2 0x15
 /* enum: Fan 3 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_3  0x16
+#define          MC_CMD_SENSOR_FAN_3 0x16
 /* enum: Fan 4 speed: RPM */
-#define          MC_CMD_SENSOR_FAN_4  0x17
+#define          MC_CMD_SENSOR_FAN_4 0x17
 /* enum: AOE FPGA input power: mV */
-#define          MC_CMD_SENSOR_IN_VAOE  0x18
+#define          MC_CMD_SENSOR_IN_VAOE 0x18
 /* enum: AOE FPGA current: mA */
-#define          MC_CMD_SENSOR_OUT_IAOE  0x19
+#define          MC_CMD_SENSOR_OUT_IAOE 0x19
 /* enum: AOE FPGA input current: mA */
-#define          MC_CMD_SENSOR_IN_IAOE  0x1a
+#define          MC_CMD_SENSOR_IN_IAOE 0x1a
 /* enum: NIC power consumption: W */
-#define          MC_CMD_SENSOR_NIC_POWER  0x1b
+#define          MC_CMD_SENSOR_NIC_POWER 0x1b
 /* enum: 0.9v power voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9  0x1c
+#define          MC_CMD_SENSOR_IN_0V9 0x1c
 /* enum: 0.9v power current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9  0x1d
+#define          MC_CMD_SENSOR_IN_I0V9 0x1d
 /* enum: 1.2v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V2  0x1e
+#define          MC_CMD_SENSOR_IN_I1V2 0x1e
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE0_NEXT  0x1f
+#define          MC_CMD_SENSOR_PAGE0_NEXT 0x1f
 /* enum: 0.9v power voltage (at ADC): mV */
-#define          MC_CMD_SENSOR_IN_0V9_ADC  0x20
+#define          MC_CMD_SENSOR_IN_0V9_ADC 0x20
 /* enum: Controller temperature 2: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP  0x21
+#define          MC_CMD_SENSOR_CONTROLLER_2_TEMP 0x21
 /* enum: Voltage regulator internal temperature: degC */
-#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP  0x22
+#define          MC_CMD_SENSOR_VREG_INTERNAL_TEMP 0x22
 /* enum: 0.9V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_TEMP  0x23
+#define          MC_CMD_SENSOR_VREG_0V9_TEMP 0x23
 /* enum: 1.2V voltage regulator temperature: degC */
-#define          MC_CMD_SENSOR_VREG_1V2_TEMP  0x24
+#define          MC_CMD_SENSOR_VREG_1V2_TEMP 0x24
 /* enum: controller internal temperature sensor voltage (internal ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT  0x25
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT 0x25
 /* enum: controller internal temperature (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP  0x26
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP 0x26
 /* enum: controller internal temperature sensor voltage (external ADC): mV */
-#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC  0x27
+#define          MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC 0x27
 /* enum: controller internal temperature (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC  0x28
+#define          MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC 0x28
 /* enum: ambient temperature: degC */
-#define          MC_CMD_SENSOR_AMBIENT_TEMP  0x29
+#define          MC_CMD_SENSOR_AMBIENT_TEMP 0x29
 /* enum: air flow: bool */
-#define          MC_CMD_SENSOR_AIRFLOW  0x2a
+#define          MC_CMD_SENSOR_AIRFLOW 0x2a
 /* enum: voltage between VSS08D and VSS08D at CSR: mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR  0x2b
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR 0x2b
 /* enum: voltage between VSS08D and VSS08D at CSR (external ADC): mV */
-#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC  0x2c
+#define          MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC 0x2c
 /* enum: Hotpoint temperature: degC */
-#define          MC_CMD_SENSOR_HOTPOINT_TEMP  0x2d
+#define          MC_CMD_SENSOR_HOTPOINT_TEMP 0x2d
 /* enum: Port 0 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT0  0x2e
+#define          MC_CMD_SENSOR_PHY_POWER_PORT0 0x2e
 /* enum: Port 1 PHY power switch over-current: bool */
-#define          MC_CMD_SENSOR_PHY_POWER_PORT1  0x2f
-/* enum: Mop-up microcontroller reference voltage (millivolts) */
-#define          MC_CMD_SENSOR_MUM_VCC  0x30
+#define          MC_CMD_SENSOR_PHY_POWER_PORT1 0x2f
+/* enum: Mop-up microcontroller reference voltage: mV */
+#define          MC_CMD_SENSOR_MUM_VCC 0x30
 /* enum: 0.9v power phase A voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_A  0x31
+#define          MC_CMD_SENSOR_IN_0V9_A 0x31
 /* enum: 0.9v power phase A current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_A  0x32
+#define          MC_CMD_SENSOR_IN_I0V9_A 0x32
 /* enum: 0.9V voltage regulator phase A temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP  0x33
+#define          MC_CMD_SENSOR_VREG_0V9_A_TEMP 0x33
 /* enum: 0.9v power phase B voltage: mV */
-#define          MC_CMD_SENSOR_IN_0V9_B  0x34
+#define          MC_CMD_SENSOR_IN_0V9_B 0x34
 /* enum: 0.9v power phase B current: mA */
-#define          MC_CMD_SENSOR_IN_I0V9_B  0x35
+#define          MC_CMD_SENSOR_IN_I0V9_B 0x35
 /* enum: 0.9V voltage regulator phase B temperature: degC */
-#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP  0x36
+#define          MC_CMD_SENSOR_VREG_0V9_B_TEMP 0x36
 /* enum: CCOM AVREG 1v2 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY  0x37
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY 0x37
 /* enum: CCOM AVREG 1v2 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC  0x38
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC 0x38
 /* enum: CCOM AVREG 1v8 supply (interval ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY  0x39
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY 0x39
 /* enum: CCOM AVREG 1v8 supply (external ADC): mV */
-#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC  0x3a
+#define          MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC 0x3a
 /* enum: CCOM RTS temperature: degC */
-#define          MC_CMD_SENSOR_CONTROLLER_RTS  0x3b
+#define          MC_CMD_SENSOR_CONTROLLER_RTS 0x3b
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE1_NEXT  0x3f
+#define          MC_CMD_SENSOR_PAGE1_NEXT 0x3f
 /* enum: controller internal temperature sensor voltage on master core
  * (internal ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT  0x40
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT 0x40
 /* enum: controller internal temperature on master core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP  0x41
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP 0x41
 /* enum: controller internal temperature sensor voltage on master core
  * (external ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC  0x42
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC 0x42
 /* enum: controller internal temperature on master core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC  0x43
+#define          MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC 0x43
 /* enum: controller internal temperature on slave core sensor voltage (internal
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT  0x44
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT 0x44
 /* enum: controller internal temperature on slave core (internal ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP  0x45
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP 0x45
 /* enum: controller internal temperature on slave core sensor voltage (external
  * ADC): mV
  */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC  0x46
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC 0x46
 /* enum: controller internal temperature on slave core (external ADC): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC  0x47
+#define          MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC 0x47
 /* enum: Voltage supplied to the SODIMMs from their power supply: mV */
-#define          MC_CMD_SENSOR_SODIMM_VOUT  0x49
+#define          MC_CMD_SENSOR_SODIMM_VOUT 0x49
 /* enum: Temperature of SODIMM 0 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_0_TEMP  0x4a
+#define          MC_CMD_SENSOR_SODIMM_0_TEMP 0x4a
 /* enum: Temperature of SODIMM 1 (if installed): degC */
-#define          MC_CMD_SENSOR_SODIMM_1_TEMP  0x4b
+#define          MC_CMD_SENSOR_SODIMM_1_TEMP 0x4b
 /* enum: Voltage supplied to the QSFP #0 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY0_VCC  0x4c
+#define          MC_CMD_SENSOR_PHY0_VCC 0x4c
 /* enum: Voltage supplied to the QSFP #1 from their power supply: mV */
-#define          MC_CMD_SENSOR_PHY1_VCC  0x4d
+#define          MC_CMD_SENSOR_PHY1_VCC 0x4d
 /* enum: Controller die temperature (TDIODE): degC */
-#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP  0x4e
+#define          MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP 0x4e
 /* enum: Board temperature (front): degC */
-#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP  0x4f
+#define          MC_CMD_SENSOR_BOARD_FRONT_TEMP 0x4f
 /* enum: Board temperature (back): degC */
-#define          MC_CMD_SENSOR_BOARD_BACK_TEMP  0x50
+#define          MC_CMD_SENSOR_BOARD_BACK_TEMP 0x50
 /* enum: 1.8v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V8  0x51
+#define          MC_CMD_SENSOR_IN_I1V8 0x51
 /* enum: 2.5v power current: mA */
-#define          MC_CMD_SENSOR_IN_I2V5  0x52
+#define          MC_CMD_SENSOR_IN_I2V5 0x52
 /* enum: 3.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I3V3  0x53
+#define          MC_CMD_SENSOR_IN_I3V3 0x53
 /* enum: 12v power current: mA */
-#define          MC_CMD_SENSOR_IN_I12V0  0x54
+#define          MC_CMD_SENSOR_IN_I12V0 0x54
 /* enum: 1.3v power: mV */
-#define          MC_CMD_SENSOR_IN_1V3  0x55
+#define          MC_CMD_SENSOR_IN_1V3 0x55
 /* enum: 1.3v power current: mA */
-#define          MC_CMD_SENSOR_IN_I1V3  0x56
+#define          MC_CMD_SENSOR_IN_I1V3 0x56
 /* enum: Not a sensor: reserved for the next page flag */
-#define          MC_CMD_SENSOR_PAGE2_NEXT  0x5f
+#define          MC_CMD_SENSOR_PAGE2_NEXT 0x5f
 /* MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF */
 #define       MC_CMD_SENSOR_ENTRY_OFST 4
 #define       MC_CMD_SENSOR_ENTRY_LEN 8
@@ -5234,17 +5407,17 @@
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_OFST 2
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LEN 1
 /* enum: Ok. */
-#define          MC_CMD_SENSOR_STATE_OK  0x0
+#define          MC_CMD_SENSOR_STATE_OK 0x0
 /* enum: Breached warning threshold. */
-#define          MC_CMD_SENSOR_STATE_WARNING  0x1
+#define          MC_CMD_SENSOR_STATE_WARNING 0x1
 /* enum: Breached fatal threshold. */
-#define          MC_CMD_SENSOR_STATE_FATAL  0x2
+#define          MC_CMD_SENSOR_STATE_FATAL 0x2
 /* enum: Fault with sensor. */
-#define          MC_CMD_SENSOR_STATE_BROKEN  0x3
+#define          MC_CMD_SENSOR_STATE_BROKEN 0x3
 /* enum: Sensor is working but does not currently have a reading. */
-#define          MC_CMD_SENSOR_STATE_NO_READING  0x4
+#define          MC_CMD_SENSOR_STATE_NO_READING 0x4
 /* enum: Sensor initialisation failed. */
-#define          MC_CMD_SENSOR_STATE_INIT_FAILED  0x5
+#define          MC_CMD_SENSOR_STATE_INIT_FAILED 0x5
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_LBN 16
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE_WIDTH 8
 #define       MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_TYPE_OFST 3
@@ -5327,7 +5500,7 @@
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_OFST 0
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_PROTOCOL_LEN 4
 #define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_ARP 0x1 /* enum */
-#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS  0x2 /* enum */
+#define          MC_CMD_LIGHTSOUT_OFFLOAD_PROTOCOL_NS 0x2 /* enum */
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_OFST 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_LEN 4
 #define       MC_CMD_ADD_LIGHTSOUT_OFFLOAD_IN_DATA_MINNUM 1
@@ -5416,17 +5589,17 @@
 /* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless
  * you're testing firmware, this is what you want.
  */
-#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES  0x0
+#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES 0x0
 /* enum: Assert using assert(0); */
-#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE  0x1
+#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE 0x1
 /* enum: Deliberately trigger a watchdog */
-#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG  0x2
+#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG 0x2
 /* enum: Deliberately trigger a trap by loading from an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP  0x3
+#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP 0x3
 /* enum: Deliberately trigger a trap by storing to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP  0x4
+#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP 0x4
 /* enum: Jump to an invalid address */
-#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP  0x5
+#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP 0x5
 
 /* MC_CMD_TESTASSERT_V2_OUT msgresponse */
 #define    MC_CMD_TESTASSERT_V2_OUT_LEN 0
@@ -5969,7 +6142,7 @@
 /*            MC_CMD_MUM_IN_CMD_LEN 4 */
 #define       MC_CMD_MUM_IN_LOG_OP_OFST 4
 #define       MC_CMD_MUM_IN_LOG_OP_LEN 4
-#define          MC_CMD_MUM_IN_LOG_OP_UART  0x1 /* enum */
+#define          MC_CMD_MUM_IN_LOG_OP_UART 0x1 /* enum */
 
 /* MC_CMD_MUM_IN_LOG_OP_UART msgrequest */
 #define    MC_CMD_MUM_IN_LOG_OP_UART_LEN 12
@@ -6464,17 +6637,17 @@
 #define       EVB_PORT_ID_PORT_ID_OFST 0
 #define       EVB_PORT_ID_PORT_ID_LEN 4
 /* enum: An invalid port handle. */
-#define          EVB_PORT_ID_NULL  0x0
+#define          EVB_PORT_ID_NULL 0x0
 /* enum: The port assigned to this function.. */
-#define          EVB_PORT_ID_ASSIGNED  0x1000000
+#define          EVB_PORT_ID_ASSIGNED 0x1000000
 /* enum: External network port 0 */
-#define          EVB_PORT_ID_MAC0  0x2000000
+#define          EVB_PORT_ID_MAC0 0x2000000
 /* enum: External network port 1 */
-#define          EVB_PORT_ID_MAC1  0x2000001
+#define          EVB_PORT_ID_MAC1 0x2000001
 /* enum: External network port 2 */
-#define          EVB_PORT_ID_MAC2  0x2000002
+#define          EVB_PORT_ID_MAC2 0x2000002
 /* enum: External network port 3 */
-#define          EVB_PORT_ID_MAC3  0x2000003
+#define          EVB_PORT_ID_MAC3 0x2000003
 #define       EVB_PORT_ID_PORT_ID_LBN 0
 #define       EVB_PORT_ID_PORT_ID_WIDTH 32
 
@@ -6486,7 +6659,7 @@
 #define       EVB_VLAN_TAG_MODE_LBN 12
 #define       EVB_VLAN_TAG_MODE_WIDTH 4
 /* enum: Insert the VLAN. */
-#define          EVB_VLAN_TAG_INSERT  0x0
+#define          EVB_VLAN_TAG_INSERT 0x0
 /* enum: Replace the VLAN if already present. */
 #define          EVB_VLAN_TAG_REPLACE 0x1
 
@@ -6515,110 +6688,110 @@
 #define       NVRAM_PARTITION_TYPE_ID_OFST 0
 #define       NVRAM_PARTITION_TYPE_ID_LEN 2
 /* enum: Primary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE          0x100
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE 0x100
 /* enum: Secondary MC firmware partition */
-#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP   0x200
+#define          NVRAM_PARTITION_TYPE_MC_FIRMWARE_BACKUP 0x200
 /* enum: Expansion ROM partition */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM        0x300
+#define          NVRAM_PARTITION_TYPE_EXPANSION_ROM 0x300
 /* enum: Static configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG        0x400
+#define          NVRAM_PARTITION_TYPE_STATIC_CONFIG 0x400
 /* enum: Dynamic configuration TLV partition */
-#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG       0x500
+#define          NVRAM_PARTITION_TYPE_DYNAMIC_CONFIG 0x500
 /* enum: Expansion ROM configuration data for port 0 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0  0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT0 0x600
 /* enum: Synonym for EXPROM_CONFIG_PORT0 as used in pmap files */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG        0x600
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG 0x600
 /* enum: Expansion ROM configuration data for port 1 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1  0x601
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT1 0x601
 /* enum: Expansion ROM configuration data for port 2 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2  0x602
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT2 0x602
 /* enum: Expansion ROM configuration data for port 3 */
-#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3  0x603
+#define          NVRAM_PARTITION_TYPE_EXPROM_CONFIG_PORT3 0x603
 /* enum: Non-volatile log output partition */
-#define          NVRAM_PARTITION_TYPE_LOG                  0x700
+#define          NVRAM_PARTITION_TYPE_LOG 0x700
 /* enum: Non-volatile log output of second core on dual-core device */
-#define          NVRAM_PARTITION_TYPE_LOG_SLAVE            0x701
+#define          NVRAM_PARTITION_TYPE_LOG_SLAVE 0x701
 /* enum: Device state dump output partition */
-#define          NVRAM_PARTITION_TYPE_DUMP                 0x800
+#define          NVRAM_PARTITION_TYPE_DUMP 0x800
 /* enum: Application license key storage partition */
-#define          NVRAM_PARTITION_TYPE_LICENSE              0x900
+#define          NVRAM_PARTITION_TYPE_LICENSE 0x900
 /* enum: Start of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MIN              0xa00
+#define          NVRAM_PARTITION_TYPE_PHY_MIN 0xa00
 /* enum: End of range used for PHY partitions (low 8 bits are the PHY ID) */
-#define          NVRAM_PARTITION_TYPE_PHY_MAX              0xaff
+#define          NVRAM_PARTITION_TYPE_PHY_MAX 0xaff
 /* enum: Primary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA                 0xb00
+#define          NVRAM_PARTITION_TYPE_FPGA 0xb00
 /* enum: Secondary FPGA partition */
-#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP          0xb01
+#define          NVRAM_PARTITION_TYPE_FPGA_BACKUP 0xb01
 /* enum: FC firmware partition */
-#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE          0xb02
+#define          NVRAM_PARTITION_TYPE_FC_FIRMWARE 0xb02
 /* enum: FC License partition */
-#define          NVRAM_PARTITION_TYPE_FC_LICENSE           0xb03
+#define          NVRAM_PARTITION_TYPE_FC_LICENSE 0xb03
 /* enum: Non-volatile log output partition for FC */
-#define          NVRAM_PARTITION_TYPE_FC_LOG               0xb04
+#define          NVRAM_PARTITION_TYPE_FC_LOG 0xb04
 /* enum: MUM firmware partition */
-#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_MUM_FIRMWARE 0xc00
 /* enum: SUC firmware partition (this is intentionally an alias of
  * MUM_FIRMWARE)
  */
-#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE         0xc00
+#define          NVRAM_PARTITION_TYPE_SUC_FIRMWARE 0xc00
 /* enum: MUM Non-volatile log output partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_LOG              0xc01
+#define          NVRAM_PARTITION_TYPE_MUM_LOG 0xc01
 /* enum: MUM Application table partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE         0xc02
+#define          NVRAM_PARTITION_TYPE_MUM_APPTABLE 0xc02
 /* enum: MUM boot rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM         0xc03
+#define          NVRAM_PARTITION_TYPE_MUM_BOOT_ROM 0xc03
 /* enum: MUM production signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM         0xc04
+#define          NVRAM_PARTITION_TYPE_MUM_PROD_ROM 0xc04
 /* enum: MUM user signatures & calibration rom partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM         0xc05
+#define          NVRAM_PARTITION_TYPE_MUM_USER_ROM 0xc05
 /* enum: MUM fuses and lockbits partition. */
-#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK         0xc06
+#define          NVRAM_PARTITION_TYPE_MUM_FUSELOCK 0xc06
 /* enum: UEFI expansion ROM if separate from PXE */
-#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
+#define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI 0xd00
 /* enum: Used by the expansion ROM for logging */
-#define          NVRAM_PARTITION_TYPE_PXE_LOG              0x1000
+#define          NVRAM_PARTITION_TYPE_PXE_LOG 0x1000
 /* enum: Used for XIP code of shmbooted images */
-#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH          0x1100
+#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH 0x1100
 /* enum: Spare partition 2 */
-#define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
+#define          NVRAM_PARTITION_TYPE_SPARE_2 0x1200
 /* enum: Manufacturing partition. Used during manufacture to pass information
  * between XJTAG and Manftest.
  */
-#define          NVRAM_PARTITION_TYPE_MANUFACTURING        0x1300
+#define          NVRAM_PARTITION_TYPE_MANUFACTURING 0x1300
 /* enum: Spare partition 4 */
-#define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
+#define          NVRAM_PARTITION_TYPE_SPARE_4 0x1400
 /* enum: Spare partition 5 */
-#define          NVRAM_PARTITION_TYPE_SPARE_5              0x1500
+#define          NVRAM_PARTITION_TYPE_SPARE_5 0x1500
 /* enum: Partition for reporting MC status. See mc_flash_layout.h
  * medford_mc_status_hdr_t for layout on Medford.
  */
-#define          NVRAM_PARTITION_TYPE_STATUS               0x1600
+#define          NVRAM_PARTITION_TYPE_STATUS 0x1600
 /* enum: Spare partition 13 */
-#define          NVRAM_PARTITION_TYPE_SPARE_13              0x1700
+#define          NVRAM_PARTITION_TYPE_SPARE_13 0x1700
 /* enum: Spare partition 14 */
-#define          NVRAM_PARTITION_TYPE_SPARE_14              0x1800
+#define          NVRAM_PARTITION_TYPE_SPARE_14 0x1800
 /* enum: Spare partition 15 */
-#define          NVRAM_PARTITION_TYPE_SPARE_15              0x1900
+#define          NVRAM_PARTITION_TYPE_SPARE_15 0x1900
 /* enum: Spare partition 16 */
-#define          NVRAM_PARTITION_TYPE_SPARE_16              0x1a00
+#define          NVRAM_PARTITION_TYPE_SPARE_16 0x1a00
 /* enum: Factory defaults for dynamic configuration */
-#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS    0x1b00
+#define          NVRAM_PARTITION_TYPE_DYNCONFIG_DEFAULTS 0x1b00
 /* enum: Factory defaults for expansion ROM configuration */
-#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS    0x1c00
+#define          NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS 0x1c00
 /* enum: Field Replaceable Unit inventory information for use on IPMI
  * platforms. See SF-119124-PS. The STATIC_CONFIG partition may contain a
  * subset of the information stored in this partition.
  */
-#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION       0x1d00
+#define          NVRAM_PARTITION_TYPE_FRU_INFORMATION 0x1d00
 /* enum: Start of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN  0xff00
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MIN 0xff00
 /* enum: End of reserved value range (firmware may use for any purpose) */
-#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX  0xfffd
+#define          NVRAM_PARTITION_TYPE_RESERVED_VALUES_MAX 0xfffd
 /* enum: Recovery partition map (provided if real map is missing or corrupt) */
-#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP         0xfffe
+#define          NVRAM_PARTITION_TYPE_RECOVERY_MAP 0xfffe
 /* enum: Partition map (real map as stored in flash) */
-#define          NVRAM_PARTITION_TYPE_PARTITION_MAP        0xffff
+#define          NVRAM_PARTITION_TYPE_PARTITION_MAP 0xffff
 #define       NVRAM_PARTITION_TYPE_ID_LBN 0
 #define       NVRAM_PARTITION_TYPE_ID_WIDTH 16
 
@@ -6627,37 +6800,37 @@
 #define       LICENSED_APP_ID_ID_OFST 0
 #define       LICENSED_APP_ID_ID_LEN 4
 /* enum: OpenOnload */
-#define          LICENSED_APP_ID_ONLOAD                  0x1
+#define          LICENSED_APP_ID_ONLOAD 0x1
 /* enum: PTP timestamping */
-#define          LICENSED_APP_ID_PTP                     0x2
+#define          LICENSED_APP_ID_PTP 0x2
 /* enum: SolarCapture Pro */
-#define          LICENSED_APP_ID_SOLARCAPTURE_PRO        0x4
+#define          LICENSED_APP_ID_SOLARCAPTURE_PRO 0x4
 /* enum: SolarSecure filter engine */
-#define          LICENSED_APP_ID_SOLARSECURE             0x8
+#define          LICENSED_APP_ID_SOLARSECURE 0x8
 /* enum: Performance monitor */
-#define          LICENSED_APP_ID_PERF_MONITOR            0x10
+#define          LICENSED_APP_ID_PERF_MONITOR 0x10
 /* enum: SolarCapture Live */
-#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE       0x20
+#define          LICENSED_APP_ID_SOLARCAPTURE_LIVE 0x20
 /* enum: Capture SolarSystem */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM     0x40
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM 0x40
 /* enum: Network Access Control */
-#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL  0x80
+#define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL 0x80
 /* enum: TCP Direct */
-#define          LICENSED_APP_ID_TCP_DIRECT              0x100
+#define          LICENSED_APP_ID_TCP_DIRECT 0x100
 /* enum: Low Latency */
-#define          LICENSED_APP_ID_LOW_LATENCY             0x200
+#define          LICENSED_APP_ID_LOW_LATENCY 0x200
 /* enum: SolarCapture Tap */
-#define          LICENSED_APP_ID_SOLARCAPTURE_TAP        0x400
+#define          LICENSED_APP_ID_SOLARCAPTURE_TAP 0x400
 /* enum: Capture SolarSystem 40G */
 #define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800
 /* enum: Capture SolarSystem 1G */
-#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G  0x1000
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_1G 0x1000
 /* enum: ScaleOut Onload */
-#define          LICENSED_APP_ID_SCALEOUT_ONLOAD         0x2000
+#define          LICENSED_APP_ID_SCALEOUT_ONLOAD 0x2000
 /* enum: SCS Network Analytics Dashboard */
-#define          LICENSED_APP_ID_DSHBRD                  0x4000
+#define          LICENSED_APP_ID_DSHBRD 0x4000
 /* enum: SolarCapture Trading Analytics */
-#define          LICENSED_APP_ID_SCATRD                  0x8000
+#define          LICENSED_APP_ID_SCATRD 0x8000
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
@@ -6775,23 +6948,23 @@
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_OFST 3
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LEN 1
 /* enum: This is a TX completion event, not a timestamp */
-#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION  0x0
+#define          TX_TIMESTAMP_EVENT_TX_EV_COMPLETION 0x0
 /* enum: This is a TX completion event for a CTPIO transmit. The event format
  * is the same as for TX_EV_COMPLETION.
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION  0x11
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_COMPLETION 0x11
 /* enum: This is the low part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_LO
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO  0x12
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_LO 0x12
 /* enum: This is the high part of a TX timestamp for a CTPIO transmission. The
  * event format is the same as for TX_EV_TSTAMP_HI
  */
-#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI  0x13
+#define          TX_TIMESTAMP_EVENT_TX_EV_CTPIO_TS_HI 0x13
 /* enum: This is the low part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO  0x51
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO 0x51
 /* enum: This is the high part of a TX timestamp event */
-#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI  0x52
+#define          TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI 0x52
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_LBN 24
 #define       TX_TIMESTAMP_EVENT_TX_EV_TYPE_WIDTH 8
 /* upper 16 bits of timestamp data */
@@ -7071,17 +7244,17 @@
 #define       QUEUE_CRC_MODE_MODE_LBN 0
 #define       QUEUE_CRC_MODE_MODE_WIDTH 4
 /* enum: No CRC. */
-#define          QUEUE_CRC_MODE_NONE  0x0
+#define          QUEUE_CRC_MODE_NONE 0x0
 /* enum: CRC Fiber channel over ethernet. */
-#define          QUEUE_CRC_MODE_FCOE  0x1
+#define          QUEUE_CRC_MODE_FCOE 0x1
 /* enum: CRC (digest) iSCSI header only. */
-#define          QUEUE_CRC_MODE_ISCSI_HDR  0x2
+#define          QUEUE_CRC_MODE_ISCSI_HDR 0x2
 /* enum: CRC (digest) iSCSI header and payload. */
-#define          QUEUE_CRC_MODE_ISCSI  0x3
+#define          QUEUE_CRC_MODE_ISCSI 0x3
 /* enum: CRC Fiber channel over IP over ethernet. */
-#define          QUEUE_CRC_MODE_FCOIPOE  0x4
+#define          QUEUE_CRC_MODE_FCOIPOE 0x4
 /* enum: CRC MPA. */
-#define          QUEUE_CRC_MODE_MPA  0x5
+#define          QUEUE_CRC_MODE_MPA 0x5
 #define       QUEUE_CRC_MODE_SPARE_LBN 4
 #define       QUEUE_CRC_MODE_SPARE_WIDTH 4
 
@@ -7157,11 +7330,15 @@
 /* Size, in entries */
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_OFST 0
 #define       MC_CMD_INIT_RXQ_EXT_IN_SIZE_LEN 4
-/* The EVQ to send events to. This is an index originally specified to INIT_EVQ
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
  */
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_OFST 4
 #define       MC_CMD_INIT_RXQ_EXT_IN_TARGET_EVQ_LEN 4
-/* The value to put in the event data. Check hardware spec. for valid range. */
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_OFST 8
 #define       MC_CMD_INIT_RXQ_EXT_IN_LABEL_LEN 4
 /* Desired instance. Must be set to a specific instance, which is a function
@@ -7189,18 +7366,25 @@
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_LBN 10
 #define        MC_CMD_INIT_RXQ_EXT_IN_DMA_MODE_WIDTH 4
 /* enum: One packet per descriptor (for normal networking) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET  0x0
+#define          MC_CMD_INIT_RXQ_EXT_IN_SINGLE_PACKET 0x0
 /* enum: Pack multiple packets into large descriptors (for SolarCapture) */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM  0x1
+#define          MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_EXT_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_LBN 14
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
 #define        MC_CMD_INIT_RXQ_EXT_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M  0x0 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K  0x1 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K  0x2 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K  0x3 /* enum */
-#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K  0x4 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_EXT_IN_PS_BUFF_64K 0x4 /* enum */
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
 #define        MC_CMD_INIT_RXQ_EXT_IN_FLAG_FORCE_EV_MERGING_LBN 19
@@ -7221,12 +7405,122 @@
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_OFST 540
 #define       MC_CMD_INIT_RXQ_EXT_IN_SNAPSHOT_LENGTH_LEN 4
 
+/* MC_CMD_INIT_RXQ_V3_IN msgrequest */
+#define    MC_CMD_INIT_RXQ_V3_IN_LEN 560
+/* Size, in entries */
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_OFST 0
+#define       MC_CMD_INIT_RXQ_V3_IN_SIZE_LEN 4
+/* The EVQ to send events to. This is an index originally specified to
+ * INIT_EVQ. If DMA_MODE == PACKED_STREAM this must be equal to INSTANCE.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_OFST 4
+#define       MC_CMD_INIT_RXQ_V3_IN_TARGET_EVQ_LEN 4
+/* The value to put in the event data. Check hardware spec. for valid range.
+ * This field is ignored if DMA_MODE == EQUAL_STRIDE_PACKED_STREAM or DMA_MODE
+ * == PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_OFST 8
+#define       MC_CMD_INIT_RXQ_V3_IN_LABEL_LEN 4
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_OFST 12
+#define       MC_CMD_INIT_RXQ_V3_IN_INSTANCE_LEN 4
+/* There will be more flags here. */
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_OFST 16
+#define       MC_CMD_INIT_RXQ_V3_IN_FLAGS_LEN 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_LBN 0
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_BUFF_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_LBN 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_HDR_SPLIT_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_LBN 2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_TIMESTAMP_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_LBN 3
+#define        MC_CMD_INIT_RXQ_V3_IN_CRC_MODE_WIDTH 4
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_LBN 7
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_CHAIN_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_LBN 8
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_PREFIX_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_LBN 9
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_LBN 10
+#define        MC_CMD_INIT_RXQ_V3_IN_DMA_MODE_WIDTH 4
+/* enum: One packet per descriptor (for normal networking) */
+#define          MC_CMD_INIT_RXQ_V3_IN_SINGLE_PACKET 0x0
+/* enum: Pack multiple packets into large descriptors (for SolarCapture) */
+#define          MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM 0x1
+/* enum: Pack multiple packets into large descriptors using the format designed
+ * to maximise packet rate. This mode uses 1 "bucket" per descriptor with
+ * multiple fixed-size packet buffers within each bucket. For a full
+ * description see SF-119419-TC. This mode is only supported by "dpdk" datapath
+ * firmware.
+ */
+#define          MC_CMD_INIT_RXQ_V3_IN_EQUAL_STRIDE_PACKED_STREAM 0x2
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_LBN 14
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_SNAPSHOT_MODE_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_LBN 15
+#define        MC_CMD_INIT_RXQ_V3_IN_PACKED_STREAM_BUFF_SIZE_WIDTH 3
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_1M 0x0 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_512K 0x1 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_256K 0x2 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_128K 0x3 /* enum */
+#define          MC_CMD_INIT_RXQ_V3_IN_PS_BUFF_64K 0x4 /* enum */
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_LBN 18
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_WANT_OUTER_CLASSES_WIDTH 1
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_LBN 19
+#define        MC_CMD_INIT_RXQ_V3_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+/* Owner ID to use if in buffer mode (zero if physical) */
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_OFST 20
+#define       MC_CMD_INIT_RXQ_V3_IN_OWNER_ID_LEN 4
+/* The port ID associated with the v-adaptor which should contain this DMAQ. */
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_OFST 24
+#define       MC_CMD_INIT_RXQ_V3_IN_PORT_ID_LEN 4
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_LO_OFST 28
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_HI_OFST 32
+#define       MC_CMD_INIT_RXQ_V3_IN_DMA_ADDR_NUM 64
+/* Maximum length of packet to receive, if SNAPSHOT_MODE flag is set */
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_OFST 540
+#define       MC_CMD_INIT_RXQ_V3_IN_SNAPSHOT_LENGTH_LEN 4
+/* The number of packet buffers that will be contained within each
+ * EQUAL_STRIDE_PACKED_STREAM format bucket supplied by the driver. This field
+ * is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_OFST 544
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_BUFFERS_PER_BUCKET_LEN 4
+/* The length in bytes of the area in each packet buffer that can be written to
+ * by the adapter. This is used to store the packet prefix and the packet
+ * payload. This length does not include any end padding added by the driver.
+ * This field is ignored unless DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_OFST 548
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_MAX_DMA_LEN_LEN 4
+/* The length in bytes of a single packet buffer within a
+ * EQUAL_STRIDE_PACKED_STREAM format bucket. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_OFST 552
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_PACKET_STRIDE_LEN 4
+/* The maximum time in nanoseconds that the datapath will be backpressured if
+ * there are no RX descriptors available. If the timeout is reached and there
+ * are still no descriptors then the packet will be dropped. A timeout of 0
+ * means the datapath will never be blocked. This field is ignored unless
+ * DMA_MODE == EQUAL_STRIDE_PACKED_STREAM.
+ */
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_OFST 556
+#define       MC_CMD_INIT_RXQ_V3_IN_ES_HEAD_OF_LINE_BLOCK_TIMEOUT_LEN 4
+
 /* MC_CMD_INIT_RXQ_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_OUT_LEN 0
 
 /* MC_CMD_INIT_RXQ_EXT_OUT msgresponse */
 #define    MC_CMD_INIT_RXQ_EXT_OUT_LEN 0
 
+/* MC_CMD_INIT_RXQ_V3_OUT msgresponse */
+#define    MC_CMD_INIT_RXQ_V3_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_INIT_TXQ
@@ -7466,7 +7760,7 @@
 #define        MC_CMD_PROXY_CMD_IN_TARGET_PF_WIDTH 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_LBN 16
 #define        MC_CMD_PROXY_CMD_IN_TARGET_VF_WIDTH 16
-#define          MC_CMD_PROXY_CMD_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PROXY_CMD_IN_VF_NULL 0xffff /* enum */
 
 /* MC_CMD_PROXY_CMD_OUT msgresponse */
 #define    MC_CMD_PROXY_CMD_OUT_LEN 0
@@ -7479,7 +7773,7 @@
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_OFST 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LEN 4
 /* enum: An invalid handle. */
-#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID  0x0
+#define          MC_PROXY_STATUS_BUFFER_HANDLE_INVALID 0x0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_LBN 0
 #define       MC_PROXY_STATUS_BUFFER_HANDLE_WIDTH 32
 /* The requesting physical function number */
@@ -7748,17 +8042,17 @@
 #define       MC_CMD_FILTER_OP_IN_OP_OFST 0
 #define       MC_CMD_FILTER_OP_IN_OP_LEN 4
 /* enum: single-recipient filter insert */
-#define          MC_CMD_FILTER_OP_IN_OP_INSERT  0x0
+#define          MC_CMD_FILTER_OP_IN_OP_INSERT 0x0
 /* enum: single-recipient filter remove */
-#define          MC_CMD_FILTER_OP_IN_OP_REMOVE  0x1
+#define          MC_CMD_FILTER_OP_IN_OP_REMOVE 0x1
 /* enum: multi-recipient filter subscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE  0x2
+#define          MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE 0x2
 /* enum: multi-recipient filter unsubscribe */
-#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE  0x3
+#define          MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE 0x3
 /* enum: replace one recipient with another (warning - the filter handle may
  * change)
  */
-#define          MC_CMD_FILTER_OP_IN_OP_REPLACE  0x4
+#define          MC_CMD_FILTER_OP_IN_OP_REPLACE 0x4
 /* filter handle (for remove / unsubscribe operations) */
 #define       MC_CMD_FILTER_OP_IN_HANDLE_OFST 4
 #define       MC_CMD_FILTER_OP_IN_HANDLE_LEN 8
@@ -7803,15 +8097,15 @@
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_IN_RX_QUEUE_LEN 4
@@ -7819,14 +8113,14 @@
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
@@ -7843,7 +8137,7 @@
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_IN_TX_DEST_PM_LBN 1
@@ -7971,15 +8265,15 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_OFST 20
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_DEST_LEN 4
 /* enum: drop packets */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_DROP 0x0
 /* enum: receive to host */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST 0x1
 /* enum: receive to MC */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_MC 0x2
 /* enum: loop back to TXDP 0 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0  0x3
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX0 0x3
 /* enum: loop back to TXDP 1 */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1  0x4
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_DEST_TX1 0x4
 /* receive queue handle (for multiple queue modes, this is the base queue) */
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_OFST 24
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_QUEUE_LEN 4
@@ -7987,14 +8281,14 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_OFST 28
 #define       MC_CMD_FILTER_OP_EXT_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS 0x1
 /* enum: receive to multiple queues using .1p mapping */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING  0x2
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_DOT1P_MAPPING 0x2
 /* enum: install a filter entry that will never match; for test purposes only
  */
-#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH  0x80000000
+#define          MC_CMD_FILTER_OP_EXT_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
 /* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
  * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
  * MC_CMD_DOT1P_MAPPING_ALLOC.
@@ -8011,7 +8305,7 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_OFST 40
 #define       MC_CMD_FILTER_OP_EXT_IN_TX_DEST_LEN 4
 /* enum: request default behaviour (based on filter type) */
-#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT  0xffffffff
+#define          MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT 0xffffffff
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_MAC_WIDTH 1
 #define        MC_CMD_FILTER_OP_EXT_IN_TX_DEST_PM_LBN 1
@@ -8054,17 +8348,17 @@
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_WIDTH 8
 /* enum: Match VXLAN traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN 0x0
 /* enum: Match Geneve traffic with this VNI */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE  0x1
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE 0x1
 /* enum: Reserved for experimental development use */
-#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL  0xfe
+#define          MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_EXPERIMENTAL 0xfe
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_LBN 0
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_VALUE_WIDTH 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_LBN 24
 #define        MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_WIDTH 8
 /* enum: Match NVGRE traffic with this VSID */
-#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE  0x0
+#define          MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE 0x0
 /* source IP address to match (as bytes in network order; set last 12 bytes to
  * 0 for IPv4 address)
  */
@@ -8131,6 +8425,273 @@
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_OFST 156
 #define       MC_CMD_FILTER_OP_EXT_IN_IFRM_DST_IP_LEN 16
 
+/* MC_CMD_FILTER_OP_V3_IN msgrequest: FILTER_OP extension to support additional
+ * filter actions for Intel's DPDK (Data Plane Development Kit, dpdk.org) via
+ * its rte_flow API. This extension is only useful with the sfc_efx driver
+ * included as part of DPDK, used in conjunction with the dpdk datapath
+ * firmware variant.
+ */
+#define    MC_CMD_FILTER_OP_V3_IN_LEN 180
+/* identifies the type of operation requested */
+#define       MC_CMD_FILTER_OP_V3_IN_OP_OFST 0
+#define       MC_CMD_FILTER_OP_V3_IN_OP_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_FILTER_OP_IN/OP */
+/* filter handle (for remove / unsubscribe operations) */
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LEN 8
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_LO_OFST 4
+#define       MC_CMD_FILTER_OP_V3_IN_HANDLE_HI_OFST 8
+/* The port ID associated with the v-adaptor which should contain this filter.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_OFST 12
+#define       MC_CMD_FILTER_OP_V3_IN_PORT_ID_LEN 4
+/* fields to include in match criteria */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_OFST 16
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_FIELDS_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_LBN 2
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_LBN 3
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_LBN 4
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_LBN 5
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_LBN 6
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_LBN 7
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_LBN 8
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_LBN 9
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_LBN 10
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_LBN 11
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_VNI_OR_VSID_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_LBN 12
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_LBN 13
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_IP_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_LBN 14
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_LBN 15
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_SRC_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_LBN 16
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_LBN 17
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_DST_PORT_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_LBN 18
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_ETHER_TYPE_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_LBN 19
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_INNER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_LBN 20
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_OUTER_VLAN_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_LBN 21
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_IP_PROTO_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_LBN 22
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF0_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_LBN 23
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_FWDEF1_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN 25
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_LBN 30
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_MCAST_DST_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_LBN 31
+#define        MC_CMD_FILTER_OP_V3_IN_MATCH_UNKNOWN_UCAST_DST_WIDTH 1
+/* receive destination */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_OFST 20
+#define       MC_CMD_FILTER_OP_V3_IN_RX_DEST_LEN 4
+/* enum: drop packets */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_DROP 0x0
+/* enum: receive to host */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_HOST 0x1
+/* enum: receive to MC */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_MC 0x2
+/* enum: loop back to TXDP 0 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX0 0x3
+/* enum: loop back to TXDP 1 */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_DEST_TX1 0x4
+/* receive queue handle (for multiple queue modes, this is the base queue) */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_OFST 24
+#define       MC_CMD_FILTER_OP_V3_IN_RX_QUEUE_LEN 4
+/* receive mode */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_OFST 28
+#define       MC_CMD_FILTER_OP_V3_IN_RX_MODE_LEN 4
+/* enum: receive to just the specified queue */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_SIMPLE 0x0
+/* enum: receive to multiple queues using RSS context */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_RSS 0x1
+/* enum: receive to multiple queues using .1p mapping */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_DOT1P_MAPPING 0x2
+/* enum: install a filter entry that will never match; for test purposes only
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_RX_MODE_TEST_NEVER_MATCH 0x80000000
+/* RSS context (for RX_MODE_RSS) or .1p mapping handle (for
+ * RX_MODE_DOT1P_MAPPING), as returned by MC_CMD_RSS_CONTEXT_ALLOC or
+ * MC_CMD_DOT1P_MAPPING_ALLOC.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_OFST 32
+#define       MC_CMD_FILTER_OP_V3_IN_RX_CONTEXT_LEN 4
+/* transmit domain (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_OFST 36
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DOMAIN_LEN 4
+/* transmit destination (either set the MAC and/or PM bits for explicit
+ * control, or set this field to TX_DEST_DEFAULT for sensible default
+ * behaviour)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_OFST 40
+#define       MC_CMD_FILTER_OP_V3_IN_TX_DEST_LEN 4
+/* enum: request default behaviour (based on filter type) */
+#define          MC_CMD_FILTER_OP_V3_IN_TX_DEST_DEFAULT 0xffffffff
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_MAC_WIDTH 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_LBN 1
+#define        MC_CMD_FILTER_OP_V3_IN_TX_DEST_PM_WIDTH 1
+/* source MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_OFST 44
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_MAC_LEN 6
+/* source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_OFST 50
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_PORT_LEN 2
+/* destination MAC address to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_OFST 52
+#define       MC_CMD_FILTER_OP_V3_IN_DST_MAC_LEN 6
+/* destination port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_OFST 58
+#define       MC_CMD_FILTER_OP_V3_IN_DST_PORT_LEN 2
+/* Ethernet type to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_OFST 60
+#define       MC_CMD_FILTER_OP_V3_IN_ETHER_TYPE_LEN 2
+/* Inner VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_OFST 62
+#define       MC_CMD_FILTER_OP_V3_IN_INNER_VLAN_LEN 2
+/* Outer VLAN tag to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_OFST 64
+#define       MC_CMD_FILTER_OP_V3_IN_OUTER_VLAN_LEN 2
+/* IP protocol to match (in low byte; set high byte to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_OFST 66
+#define       MC_CMD_FILTER_OP_V3_IN_IP_PROTO_LEN 2
+/* Firmware defined register 0 to match (reserved; set to 0) */
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_OFST 68
+#define       MC_CMD_FILTER_OP_V3_IN_FWDEF0_LEN 4
+/* VNI (for VXLAN/Geneve, when IP protocol is UDP) or VSID (for NVGRE, when IP
+ * protocol is GRE) to match (as bytes in network order; set last byte to 0 for
+ * VXLAN/NVGRE, or 1 for Geneve)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_OFST 72
+#define       MC_CMD_FILTER_OP_V3_IN_VNI_OR_VSID_LEN 4
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_WIDTH 8
+/* enum: Match VXLAN traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_VXLAN 0x0
+/* enum: Match Geneve traffic with this VNI */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_GENEVE 0x1
+/* enum: Reserved for experimental development use */
+#define          MC_CMD_FILTER_OP_V3_IN_VNI_TYPE_EXPERIMENTAL 0xfe
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_LBN 0
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_VALUE_WIDTH 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_LBN 24
+#define        MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_WIDTH 8
+/* enum: Match NVGRE traffic with this VSID */
+#define          MC_CMD_FILTER_OP_V3_IN_VSID_TYPE_NVGRE 0x0
+/* source IP address to match (as bytes in network order; set last 12 bytes to
+ * 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_OFST 76
+#define       MC_CMD_FILTER_OP_V3_IN_SRC_IP_LEN 16
+/* destination IP address to match (as bytes in network order; set last 12
+ * bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_OFST 92
+#define       MC_CMD_FILTER_OP_V3_IN_DST_IP_LEN 16
+/* VXLAN/NVGRE inner frame source MAC address to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_OFST 108
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_MAC_LEN 6
+/* VXLAN/NVGRE inner frame source port to match (as bytes in network order) */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_OFST 114
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_PORT_LEN 2
+/* VXLAN/NVGRE inner frame destination MAC address to match (as bytes in
+ * network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_OFST 116
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_MAC_LEN 6
+/* VXLAN/NVGRE inner frame destination port to match (as bytes in network
+ * order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_OFST 122
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_PORT_LEN 2
+/* VXLAN/NVGRE inner frame Ethernet type to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_OFST 124
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_ETHER_TYPE_LEN 2
+/* VXLAN/NVGRE inner frame Inner VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_OFST 126
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_INNER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame Outer VLAN tag to match (as bytes in network order)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_OFST 128
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_OUTER_VLAN_LEN 2
+/* VXLAN/NVGRE inner frame IP protocol to match (in low byte; set high byte to
+ * 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_OFST 130
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_IP_PROTO_LEN 2
+/* VXLAN/NVGRE inner frame Firmware defined register 0 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_OFST 132
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF0_LEN 4
+/* VXLAN/NVGRE inner frame Firmware defined register 1 to match (reserved; set
+ * to 0)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_OFST 136
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_FWDEF1_LEN 4
+/* VXLAN/NVGRE inner frame source IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_OFST 140
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_SRC_IP_LEN 16
+/* VXLAN/NVGRE inner frame destination IP address to match (as bytes in network
+ * order; set last 12 bytes to 0 for IPv4 address)
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_OFST 156
+#define       MC_CMD_FILTER_OP_V3_IN_IFRM_DST_IP_LEN 16
+/* Set an action for all packets matching this filter. The DPDK driver and dpdk
+ * f/w variant use their own specific delivery structures, which are documented
+ * in the DPDK Firmware Driver Interface (SF-119419-TC). Requesting anything
+ * other than MATCH_ACTION_NONE when the NIC is running another f/w variant
+ * will cause the filter insertion to fail with ENOTSUP.
+ */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_OFST 172
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_LEN 4
+/* enum: do nothing extra */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_NONE 0x0
+/* enum: Set the match flag in the packet prefix for packets matching the
+ * filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "FLAG" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_FLAG 0x1
+/* enum: Insert MATCH_MARK_VALUE into the packet prefix for packets matching
+ * the filter (only with dpdk firmware, otherwise fails with ENOTSUP). Used to
+ * support the DPDK rte_flow "MARK" action.
+ */
+#define          MC_CMD_FILTER_OP_V3_IN_MATCH_ACTION_MARK 0x2
+/* the mark value for MATCH_ACTION_MARK */
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_OFST 176
+#define       MC_CMD_FILTER_OP_V3_IN_MATCH_MARK_VALUE_LEN 4
+
 /* MC_CMD_FILTER_OP_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_OUT_LEN 12
 /* identifies the type of operation requested */
@@ -8147,9 +8708,9 @@
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_LO_OFST 4
 #define       MC_CMD_FILTER_OP_OUT_HANDLE_HI_OFST 8
 /* enum: guaranteed invalid filter handle (low 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_LO_INVALID 0xffffffff
 /* enum: guaranteed invalid filter handle (high 32 bits) */
-#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID  0xffffffff
+#define          MC_CMD_FILTER_OP_OUT_HANDLE_HI_INVALID 0xffffffff
 
 /* MC_CMD_FILTER_OP_EXT_OUT msgresponse */
 #define    MC_CMD_FILTER_OP_EXT_OUT_LEN 12
@@ -8184,20 +8745,20 @@
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_OFST 0
 #define       MC_CMD_GET_PARSER_DISP_INFO_IN_OP_LEN 4
 /* enum: read the list of supported RX filter matches */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES  0x1
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES 0x1
 /* enum: read flags indicating restrictions on filter insertion for the calling
  * client
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS  0x2
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_RESTRICTIONS 0x2
 /* enum: read properties relating to security rules (Medford-only; for use by
  * SolarSecure apps, not directly by drivers. See SF-114946-SW.)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO  0x3
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SECURITY_RULE_INFO 0x3
 /* enum: read the list of supported RX filter matches for VXLAN/NVGRE
  * encapsulated frames, which follow a different match sequence to normal
  * frames (Medford only)
  */
-#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES  0x4
+#define          MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES 0x4
 
 /* MC_CMD_GET_PARSER_DISP_INFO_OUT msgresponse */
 #define    MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMIN 8
@@ -8238,7 +8799,9 @@
  * Direct read/write of parser-dispatcher state (DICPUs and LUE) for debugging.
  * Please note that this interface is only of use to debug tools which have
  * knowledge of firmware and hardware data structures; nothing here is intended
- * for use by normal driver code.
+ * for use by normal driver code. Note that although this command is in the
+ * Admin privilege group, in tamperproof adapters, only read operations are
+ * permitted.
  */
 #define MC_CMD_PARSER_DISP_RW 0xe5
 
@@ -8250,32 +8813,36 @@
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_IN_TARGET_LEN 4
 /* enum: RX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX_DICPU 0x0
 /* enum: TX dispatcher CPU */
-#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_TX_DICPU 0x1
 /* enum: Lookup engine (with original metadata format). Deprecated; used only
  * by cmdclient as a fallback for very old Huntington firmware, and not
  * supported in firmware beyond v6.4.0.1005. Use LUE_VERSIONED_METADATA
  * instead.
  */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE 0x2
 /* enum: Lookup engine (with requested metadata format) */
-#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA  0x3
+#define          MC_CMD_PARSER_DISP_RW_IN_LUE_VERSIONED_METADATA 0x3
 /* enum: RX0 dispatcher CPU (alias for RX_DICPU; Medford has 2 RX DICPUs) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU  0x0
+#define          MC_CMD_PARSER_DISP_RW_IN_RX0_DICPU 0x0
 /* enum: RX1 dispatcher CPU (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU  0x4
+#define          MC_CMD_PARSER_DISP_RW_IN_RX1_DICPU 0x4
 /* enum: Miscellaneous other state (only valid for Medford) */
-#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE  0x5
+#define          MC_CMD_PARSER_DISP_RW_IN_MISC_STATE 0x5
 /* identifies the type of operation requested */
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_OFST 4
 #define       MC_CMD_PARSER_DISP_RW_IN_OP_LEN 4
 /* enum: Read a word of DICPU DMEM or a LUE entry */
-#define          MC_CMD_PARSER_DISP_RW_IN_READ  0x0
-/* enum: Write a word of DICPU DMEM or a LUE entry. */
-#define          MC_CMD_PARSER_DISP_RW_IN_WRITE  0x1
-/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). */
-#define          MC_CMD_PARSER_DISP_RW_IN_RMW  0x2
+#define          MC_CMD_PARSER_DISP_RW_IN_READ 0x0
+/* enum: Write a word of DICPU DMEM or a LUE entry. Not permitted on
+ * tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_WRITE 0x1
+/* enum: Read-modify-write a word of DICPU DMEM (not valid for LUE). Not
+ * permitted on tamperproof adapters.
+ */
+#define          MC_CMD_PARSER_DISP_RW_IN_RMW 0x2
 /* data memory address (DICPU targets) or LUE index (LUE targets) */
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_ADDRESS_LEN 4
@@ -8283,7 +8850,7 @@
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_OFST 8
 #define       MC_CMD_PARSER_DISP_RW_IN_SELECTOR_LEN 4
 /* enum: Port to datapath mapping */
-#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING  0x1
+#define          MC_CMD_PARSER_DISP_RW_IN_PORT_DP_MAPPING 0x1
 /* value to write (for DMEM writes) */
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_OFST 12
 #define       MC_CMD_PARSER_DISP_RW_IN_DMEM_WRITE_VALUE_LEN 4
@@ -8317,8 +8884,8 @@
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_OFST 0
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_LEN 4
 #define       MC_CMD_PARSER_DISP_RW_OUT_PORT_DP_MAPPING_NUM 4
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP0  0x1 /* enum */
-#define          MC_CMD_PARSER_DISP_RW_OUT_DP1  0x2 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP0 0x1 /* enum */
+#define          MC_CMD_PARSER_DISP_RW_OUT_DP1 0x2 /* enum */
 
 
 /***********************************/
@@ -8783,13 +9350,13 @@
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_OFST 0
 #define       MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_LEN 4
 /* enum: MISC. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC  0x0
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_MISC 0x0
 /* enum: IDO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO  0x1
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_IDO 0x1
 /* enum: RO. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO  0x2
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_RO 0x2
 /* enum: TPH Type. */
-#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE  0x3
+#define          MC_CMD_GET_TLP_PROCESSING_GLOBALS_IN_TLP_GLOBAL_CATEGORY_TPH_TYPE 0x3
 
 /* MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT msgresponse */
 #define    MC_CMD_GET_TLP_PROCESSING_GLOBALS_OUT_LEN 8
@@ -8920,57 +9487,57 @@
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_OFST 0
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_LEN 4
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE     0x0 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET    0x1 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS    0x2 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS  0x3 /* enum */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY    0x4 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IDLE 0x0 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_RESET 0x1 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_IMEMS 0x2 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_VECTORS 0x3 /* enum */
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_PHASE_READY 0x4 /* enum */
 /* Target for download. (These match the blob numbers defined in
  * mc_flash_layout.h.)
  */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_LEN 4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_TEXT 0x0
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_TEXT 0x1
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDP_TEXT 0x2
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT  0x3
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDP_TEXT 0x3
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT  0x4
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT 0x4
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG  0x5
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_LUT_CFG 0x5
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT  0x6
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT 0x6
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG  0x7
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_LUT_CFG 0x7
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM  0x8
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_HR_PGM 0x8
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM  0x9
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXHRSL_SL_PGM 0x9
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM  0xa
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_HR_PGM 0xa
 /* enum: Valid in phase 2 (PHASE_IMEMS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM  0xb
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXHRSL_SL_PGM 0xb
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0  0xc
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL0 0xc
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0  0xd
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL0 0xd
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1  0xe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_RXDI_VTBL1 0xe
 /* enum: Valid in phase 3 (PHASE_VECTORS) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1  0xf
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_TXDI_VTBL1 0xf
 /* enum: Valid in phases 1 (PHASE_RESET) and 4 (PHASE_READY) only */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_TARGET_ALL 0xffffffff
 /* Chunk ID, or CHUNK_ID_LAST or CHUNK_ID_ABORT */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_OFST 8
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LEN 4
 /* enum: Last chunk, containing checksum rather than data */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST  0xffffffff
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_LAST 0xffffffff
 /* enum: Abort download of this item */
-#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT  0xfffffffe
+#define          MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_ID_ABORT 0xfffffffe
 /* Length of this chunk in bytes */
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_OFST 12
 #define       MC_CMD_SATELLITE_DOWNLOAD_IN_CHUNK_LEN_LEN 4
@@ -8989,21 +9556,21 @@
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_OFST 4
 #define       MC_CMD_SATELLITE_DOWNLOAD_OUT_INFO_LEN 4
 /* enum: Code download OK, completed. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE  0x0
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_COMPLETE 0x0
 /* enum: Code download aborted as requested. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED  0x1
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_ABORTED 0x1
 /* enum: Code download OK so far, send next chunk. */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK  0x2
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_OK_NEXT_CHUNK 0x2
 /* enum: Download phases out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE  0x100
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_PHASE 0x100
 /* enum: Bad target for this phase */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET  0x101
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_TARGET 0x101
 /* enum: Chunk ID out of sequence */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID  0x200
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_ID 0x200
 /* enum: Chunk length zero or too large */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN  0x201
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHUNK_LEN 0x201
 /* enum: Checksum was incorrect */
-#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM  0x300
+#define          MC_CMD_SATELLITE_DOWNLOAD_OUT_ERR_BAD_CHECKSUM 0x300
 
 
 /***********************************/
@@ -9087,54 +9654,58 @@
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9144,41 +9715,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9188,34 +9761,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_OUT_HW_CAPABILITIES_LEN 4
@@ -9293,54 +9868,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9350,41 +9929,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9394,34 +9975,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_HW_CAPABILITIES_LEN 4
@@ -9469,6 +10052,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -9482,18 +10077,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -9501,9 +10096,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V2_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -9592,54 +10187,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9649,41 +10248,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -9693,34 +10294,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_LEN 4
@@ -9768,6 +10371,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -9781,18 +10396,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -9800,9 +10415,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -9833,11 +10448,11 @@
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
@@ -9916,54 +10531,58 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_OFST 4
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RX_DPCPU_FW_ID_LEN 2
 /* enum: Standard RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP 0x0
 /* enum: Low latency RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_LOW_LATENCY 0x1
 /* enum: Packed stream RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_PACKED_STREAM 0x2
 /* enum: Rules engine RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_RULES_ENGINE 0x5
+/* enum: DPDK RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_DPDK 0x6
 /* enum: BIST RXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST  0x10a
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_BIST 0x10a
 /* enum: RXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101
 /* enum: RXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102
 /* enum: RXDP Test firmware image 3 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103
 /* enum: RXDP Test firmware image 4 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104
 /* enum: RXDP Test firmware image 5 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE  0x105
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_BACKPRESSURE 0x105
 /* enum: RXDP Test firmware image 6 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106
 /* enum: RXDP Test firmware image 7 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107
 /* enum: RXDP Test firmware image 8 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DISABLE_DL 0x108
 /* enum: RXDP Test firmware image 9 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b
 /* enum: RXDP Test firmware image 10 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW  0x10c
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXDP_TEST_FW_SLOW 0x10c
 /* TxDPCPU firmware id. */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_OFST 6
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TX_DPCPU_FW_ID_LEN 2
 /* enum: Standard TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP 0x0
 /* enum: Low latency TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_LOW_LATENCY 0x1
 /* enum: High packet rate TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_HIGH_PACKET_RATE 0x3
 /* enum: Rules engine TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_RULES_ENGINE 0x5
+/* enum: DPDK TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_DPDK 0x6
 /* enum: BIST TXDP firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST  0x12d
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_BIST 0x12d
 /* enum: TXDP Test firmware image 1 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_TSO_EDIT 0x101
 /* enum: TXDP Test firmware image 2 */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102
 /* enum: TXDP CSR bus test firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR  0x103
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXDP_TEST_FW_CSR 0x103
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_OFST 8
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_VERSION_REV_LBN 0
@@ -9973,41 +10592,43 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial RX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: RX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant RX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
 /* enum: Low latency RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5
 /* enum: Packed stream RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6
 /* enum: RX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine RX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * encapsulations (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_OFST 10
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_LEN 2
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_VERSION_REV_LBN 0
@@ -10017,34 +10638,36 @@
 /* enum: reserved value - do not use (may indicate alternative interpretation
  * of REV field in future)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED  0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED 0x0
 /* enum: Trivial TX PD firmware for early Huntington development (Huntington
  * development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1
 /* enum: TX PD firmware with approximately Siena-compatible behaviour
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2
 /* enum: Full featured TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_FULL_FEATURED 0x3
 /* enum: (deprecated original name for the FULL_FEATURED variant) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_VSWITCH 0x3
 /* enum: siena_compat variant TX PD firmware using PM rather than MAC
  * (Huntington development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */
 /* enum: TX PD firmware handling layer 2 only for high packet rate performance
  * tests (Medford development only)
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7
 /* enum: Rules engine TX PD production firmware */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
-/* enum: reserved value - do not use (bug69716) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RESERVED_9  0x9
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8
+/* enum: Custom firmware variant (see SF-119495-PD and bug69716) */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_L3XUDP 0x9
+/* enum: DPDK TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_DPDK 0xa
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe
 /* Hardware capabilities of NIC */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_OFST 12
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_HW_CAPABILITIES_LEN 4
@@ -10092,6 +10715,18 @@
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_TSA_BOUND_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_LBN 18
 #define        MC_CMD_GET_CAPABILITIES_V4_OUT_SF_ADAPTER_AUTHENTICATION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_FLAG_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FILTER_ACTION_MARK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_EQUAL_STRIDE_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_L3XUDP_SUPPORT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_FW_SUBVARIANT_NO_TX_CSUM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V4_OUT_VI_SPREADING_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -10105,18 +10740,18 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff
 /* enum: PF does not exist. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe
 /* enum: PF does exist but is not assigned to any external port. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED  0xfd
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_ASSIGNED 0xfd
 /* enum: This value indicates that PF is assigned, but it cannot be expressed
  * in this field. It is intended for a possible future situation where a more
  * complex scheme of PFs to ports mapping is being used. The future driver
  * should look for a new field supporting the new scheme. The current/old
  * driver should treat this value as PF_NOT_ASSIGNED.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc
 /* One byte per PF containing the number of its VFs, indexed by PF number. A
  * special value indicates that a PF is not present.
  */
@@ -10124,9 +10759,9 @@
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_LEN 1
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VFS_PER_PF_NUM 16
 /* enum: The caller is not permitted to access information on this PF. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED  0xff */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_ACCESS_NOT_PERMITTED 0xff */
 /* enum: PF does not exist. */
-/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT  0xfe */
+/*               MC_CMD_GET_CAPABILITIES_V4_OUT_PF_NOT_PRESENT 0xfe */
 /* Number of VIs available for each external port */
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_OFST 58
 #define       MC_CMD_GET_CAPABILITIES_V4_OUT_NUM_VIS_PER_PORT_LEN 2
@@ -10157,11 +10792,11 @@
 /* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
  * CTPIO is not mapped.
  */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K   0x0
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_8K 0x0
 /* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K  0x1
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_16K 0x1
 /* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
-#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K  0x2
+#define          MC_CMD_GET_CAPABILITIES_V4_OUT_VI_WINDOW_MODE_64K 0x2
 /* Number of vFIFOs per adapter that can be used for VFIFO Stuffing
  * (SF-115995-SW) in the present configuration of firmware and port mode.
  */
@@ -10201,7 +10836,16 @@
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_LBN 16
 #define       MC_CMD_V2_EXTN_IN_ACTUAL_LEN_WIDTH 10
 #define       MC_CMD_V2_EXTN_IN_UNUSED2_LBN 26
-#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 6
+#define       MC_CMD_V2_EXTN_IN_UNUSED2_WIDTH 2
+/* Type of command/response */
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_LBN 28
+#define       MC_CMD_V2_EXTN_IN_MESSAGE_TYPE_WIDTH 4
+/* enum: MCDI command directed to or response originating from the MC. */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_MC 0x0
+/* enum: MCDI command directed to a TSA controller. MCDI responses of this type
+ * are not defined.
+ */
+#define          MC_CMD_V2_EXTN_IN_MCDI_MESSAGE_TYPE_TSA 0x1
 
 
 /***********************************/
@@ -10412,15 +11056,15 @@
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VSWITCH_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN  0x1
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VLAN 0x1
 /* enum: VEB */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB  0x2
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA  0x3
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_VEPA 0x3
 /* enum: MUX */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX  0x4
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_MUX 0x4
 /* enum: Snapper specific; semantics TBD */
-#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST  0x5
+#define          MC_CMD_VSWITCH_ALLOC_IN_VSWITCH_TYPE_TEST 0x5
 /* Flags controlling v-port creation */
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VSWITCH_ALLOC_IN_FLAGS_LEN 4
@@ -10495,23 +11139,23 @@
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_OFST 4
 #define       MC_CMD_VPORT_ALLOC_IN_TYPE_LEN 4
 /* enum: VLAN (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN  0x1
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VLAN 0x1
 /* enum: VEB (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB  0x2
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEB 0x2
 /* enum: VEPA (obsolete) */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA  0x3
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_VEPA 0x3
 /* enum: A normal v-port receives packets which match a specified MAC and/or
  * VLAN.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL  0x4
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL 0x4
 /* enum: An expansion v-port packets traffic which don't match any other
  * v-port.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION  0x5
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_EXPANSION 0x5
 /* enum: An test v-port receives packets which match any filters installed by
  * its downstream components.
  */
-#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST  0x6
+#define          MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_TEST 0x6
 /* Flags controlling v-port creation */
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_OFST 8
 #define       MC_CMD_VPORT_ALLOC_IN_FLAGS_LEN 4
@@ -10595,7 +11239,7 @@
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_OFST 24
 #define       MC_CMD_VADAPTOR_ALLOC_IN_MACADDR_LEN 6
 /* enum: Derive the MAC address from the upstream port */
-#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC  0x0
+#define          MC_CMD_VADAPTOR_ALLOC_IN_AUTO_MAC 0x0
 
 /* MC_CMD_VADAPTOR_ALLOC_OUT msgresponse */
 #define    MC_CMD_VADAPTOR_ALLOC_OUT_LEN 0
@@ -10809,12 +11453,12 @@
 /* enum: Allocate a context for exclusive use. The key and indirection table
  * must be explicitly configured.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE  0x0
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE 0x0
 /* enum: Allocate a context for shared use; this will spread across a range of
  * queues, but the key and indirection table are pre-configured and may not be
  * changed. For this mode, NUM_QUEUES must 2, 4, 8, 16, 32 or 64.
  */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED  0x1
+#define          MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED 0x1
 /* Number of queues spanned by this context, in the range 1-64; valid offsets
  * in the indirection table will be in the range 0 to NUM_QUEUES-1.
  */
@@ -10830,7 +11474,7 @@
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_OFST 0
 #define       MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_LEN 4
 /* enum: guaranteed invalid RSS context handle value */
-#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID  0xffffffff
+#define          MC_CMD_RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID_INVALID 0xffffffff
 
 
 /***********************************/
@@ -11073,7 +11717,7 @@
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_OFST 0
 #define       MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_LEN 4
 /* enum: guaranteed invalid .1p mapping handle value */
-#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID  0xffffffff
+#define          MC_CMD_DOT1P_MAPPING_ALLOC_OUT_DOT1P_MAPPING_ID_INVALID 0xffffffff
 
 
 /***********************************/
@@ -11385,11 +12029,11 @@
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_LBN 1
 #define        MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_LEN_WIDTH 2
 /* enum: pad to 64 bytes */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64  0x0
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_64 0x0
 /* enum: pad to 128 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128  0x1
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_128 0x1
 /* enum: pad to 256 bytes (Medford only) */
-#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256   0x2
+#define          MC_CMD_SET_RXDP_CONFIG_IN_PAD_HOST_256 0x2
 
 /* MC_CMD_SET_RXDP_CONFIG_OUT msgresponse */
 #define    MC_CMD_SET_RXDP_CONFIG_OUT_LEN 0
@@ -11453,37 +12097,37 @@
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_IN_SYS_FREQ_LEN 4
 /* enum: Leave the system clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_SYS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for inter-core clock domain */
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_IN_ICORE_FREQ_LEN 4
 /* enum: Leave the inter-core clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_ICORE_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for DPCPU clock domain */
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_IN_DPCPU_FREQ_LEN 4
 /* enum: Leave the DPCPU clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_DPCPU_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for PCS clock domain */
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_IN_PCS_FREQ_LEN 4
 /* enum: Leave the PCS clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_PCS_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for MC clock domain */
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_IN_MC_FREQ_LEN 4
 /* enum: Leave the MC clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_MC_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for rmon clock domain */
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_IN_RMON_FREQ_LEN 4
 /* enum: Leave the rmon clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_RMON_DOMAIN_DONT_CHANGE 0x0
 /* Requested frequency in MHz for vswitch clock domain */
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_IN_VSWITCH_FREQ_LEN 4
 /* enum: Leave the vswitch clock domain frequency unchanged */
-#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE  0x0
+#define          MC_CMD_SET_CLOCK_IN_VSWITCH_DOMAIN_DONT_CHANGE 0x0
 
 /* MC_CMD_SET_CLOCK_OUT msgresponse */
 #define    MC_CMD_SET_CLOCK_OUT_LEN 28
@@ -11491,37 +12135,37 @@
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_OFST 0
 #define       MC_CMD_SET_CLOCK_OUT_SYS_FREQ_LEN 4
 /* enum: The system clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_SYS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting inter-core frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_OFST 4
 #define       MC_CMD_SET_CLOCK_OUT_ICORE_FREQ_LEN 4
 /* enum: The inter-core clock domain doesn't exist / isn't used */
-#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_ICORE_DOMAIN_UNSUPPORTED 0x0
 /* Resulting DPCPU frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_OFST 8
 #define       MC_CMD_SET_CLOCK_OUT_DPCPU_FREQ_LEN 4
 /* enum: The dpcpu clock domain doesn't exist */
-#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_DPCPU_DOMAIN_UNSUPPORTED 0x0
 /* Resulting PCS frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_OFST 12
 #define       MC_CMD_SET_CLOCK_OUT_PCS_FREQ_LEN 4
 /* enum: The PCS clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_PCS_DOMAIN_UNSUPPORTED 0x0
 /* Resulting MC frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_OFST 16
 #define       MC_CMD_SET_CLOCK_OUT_MC_FREQ_LEN 4
 /* enum: The MC clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_MC_DOMAIN_UNSUPPORTED 0x0
 /* Resulting rmon frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_OFST 20
 #define       MC_CMD_SET_CLOCK_OUT_RMON_FREQ_LEN 4
 /* enum: The rmon clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_RMON_DOMAIN_UNSUPPORTED 0x0
 /* Resulting vswitch frequency in MHz */
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_OFST 24
 #define       MC_CMD_SET_CLOCK_OUT_VSWITCH_FREQ_LEN 4
 /* enum: The vswitch clock domain doesn't exist / isn't controlled */
-#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED  0x0
+#define          MC_CMD_SET_CLOCK_OUT_VSWITCH_DOMAIN_UNSUPPORTED 0x0
 
 
 /***********************************/
@@ -11537,21 +12181,21 @@
 #define       MC_CMD_DPCPU_RPC_IN_CPU_OFST 0
 #define       MC_CMD_DPCPU_RPC_IN_CPU_LEN 4
 /* enum: RxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0  0x0
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX0 0x0
 /* enum: TxDPCPU0 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0  0x1
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX0 0x1
 /* enum: TxDPCPU1 */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1  0x2
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX1 0x2
 /* enum: RxDPCPU1 (Medford only) */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1   0x3
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX1 0x3
 /* enum: RxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_RX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX   0x80
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_RX 0x80
 /* enum: TxDPCPU (will be for the calling function; for now, just an alias of
  * DPCPU_TX0)
  */
-#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX   0x81
+#define          MC_CMD_DPCPU_RPC_IN_DPCPU_TX 0x81
 /* First 8 bits [39:32] of DATA are consumed by MC-DPCPU protocol and must be
  * initialised to zero
  */
@@ -11559,15 +12203,15 @@
 #define       MC_CMD_DPCPU_RPC_IN_DATA_LEN 32
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_LBN 8
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_CMDNUM_WIDTH 8
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ  0x6 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE  0x7 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST  0xc /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS  0xe /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ  0x46 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE  0x47 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST  0x4a /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS  0x4c /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT  0x4d /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_READ 0x6 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_WRITE 0x7 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_SELF_TEST 0xc /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_TXDPCPU_CSR_ACCESS 0xe /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_READ 0x46 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_WRITE 0x47 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SELF_TEST 0x4a /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_CSR_ACCESS 0x4c /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CMDNUM_RXDPCPU_SET_MC_REPLAY_CNTXT 0x4d /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_OBJID_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_HDR_CMD_REQ_ADDR_LBN 16
@@ -11578,11 +12222,11 @@
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_INFO_WIDTH 240
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT  0x0 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ  0x3 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ  0x4 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_STOP_RETURN_RESULT 0x0 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_READ 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_WRITE_READ 0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_CMD_START_PIPELINED_READ 0x4 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_LBN 48
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_START_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_RPT_COUNT_LBN 64
@@ -11591,9 +12235,9 @@
 #define        MC_CMD_DPCPU_RPC_IN_CSR_ACCESS_GAP_DELAY_WIDTH 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_LBN 16
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_WIDTH 16
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH  0x1 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD  0x2 /* enum */
-#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST  0x3 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_CUT_THROUGH 0x1 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD 0x2 /* enum */
+#define          MC_CMD_DPCPU_RPC_IN_MC_REPLAY_MODE_STORE_FORWARD_FIRST 0x3 /* enum */
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_LBN 64
 #define        MC_CMD_DPCPU_RPC_IN_MC_REPLAY_CNTXT_WIDTH 16
 #define       MC_CMD_DPCPU_RPC_IN_WDATA_OFST 12
@@ -11660,7 +12304,7 @@
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_OFST 0
 #define       MC_CMD_SHMBOOT_OP_IN_SHMBOOT_OP_LEN 4
 /* enum: Copy slave_data section to the slave core. (Greenport only) */
-#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA  0x0
+#define          MC_CMD_SHMBOOT_OP_IN_PUSH_SLAVE_DATA 0x0
 
 /* MC_CMD_SHMBOOT_OP_OUT msgresponse */
 #define    MC_CMD_SHMBOOT_OP_OUT_LEN 0
@@ -11709,14 +12353,14 @@
 #define       MC_CMD_DUMP_DO_IN_PADDING_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_OFST 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_DEFAULT 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_OFST 8
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_TYPE_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM  0x1 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY  0x2 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI  0x3 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART  0x4 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_NVRAM 0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY 0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_HOST_MEMORY_MLI 0x3 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMP_LOCATION_UART 0x4 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_PARTITION_TYPE_ID_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_NVRAM_OFFSET_OFST 16
@@ -11727,24 +12371,24 @@
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_LO_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE  0x1000 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_PAGE_SIZE 0x1000 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_OFST 16
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_ROOT_ADDR_HI_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_OFST 20
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_HOST_MEMORY_MLI_DEPTH_LEN 4
-#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH  0x2 /* enum */
+#define          MC_CMD_DUMP_DO_IN_HOST_MEMORY_MLI_MAX_DEPTH 0x2 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_OFST 12
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_UART_PORT_LEN 4
 /* enum: The uart port this command was received over (if using a uart
  * transport)
  */
-#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC  0xff
+#define          MC_CMD_DUMP_DO_IN_UART_PORT_SRC 0xff
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_OFST 24
 #define       MC_CMD_DUMP_DO_IN_DUMPSPEC_SRC_CUSTOM_SIZE_LEN 4
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_OFST 28
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_LEN 4
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM  0x0 /* enum */
-#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION  0x1 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM 0x0 /* enum */
+#define          MC_CMD_DUMP_DO_IN_DUMPFILE_DST_NVRAM_DUMP_PARTITION 0x1 /* enum */
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_OFST 32
 #define       MC_CMD_DUMP_DO_IN_DUMPFILE_DST_CUSTOM_TYPE_LEN 4
 /*            Enum values, see field(s): */
@@ -11854,11 +12498,11 @@
 #define    MC_CMD_SET_PSU_IN_LEN 12
 #define       MC_CMD_SET_PSU_IN_PARAM_OFST 0
 #define       MC_CMD_SET_PSU_IN_PARAM_LEN 4
-#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE  0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_PARAM_SUPPLY_VOLTAGE 0x0 /* enum */
 #define       MC_CMD_SET_PSU_IN_RAIL_OFST 4
 #define       MC_CMD_SET_PSU_IN_RAIL_LEN 4
-#define          MC_CMD_SET_PSU_IN_RAIL_0V9  0x0 /* enum */
-#define          MC_CMD_SET_PSU_IN_RAIL_1V2  0x1 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_0V9 0x0 /* enum */
+#define          MC_CMD_SET_PSU_IN_RAIL_1V2 0x1 /* enum */
 /* desired value, eg voltage in mV */
 #define       MC_CMD_SET_PSU_IN_VALUE_OFST 8
 #define       MC_CMD_SET_PSU_IN_VALUE_LEN 4
@@ -12031,26 +12675,30 @@
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_OFST 0
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_KR_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_KR_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_KR_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_KR_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_KR_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_KR_TUNE_IN_TXEQ_SET 0x3
 /* enum: Force KR Serdes reset / recalibration */
-#define          MC_CMD_KR_TUNE_IN_RECAL  0x4
+#define          MC_CMD_KR_TUNE_IN_RECAL 0x4
 /* enum: Start KR Serdes Eye diagram plot on a given lane. Lane must have valid
  * signal.
  */
-#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_KR_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll KR Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_KR_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Read Figure Of Merit (eye quality, higher is better). */
-#define          MC_CMD_KR_TUNE_IN_READ_FOM  0x7
+#define          MC_CMD_KR_TUNE_IN_READ_FOM 0x7
+/* enum: Start/stop link training frames */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_RUN 0x8
+/* enum: Issue KR link training command (control training coefficients) */
+#define          MC_CMD_KR_TUNE_IN_LINK_TRAIN_CMD 0x9
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_IN_KR_TUNE_RSVD_LEN 3
@@ -12084,98 +12732,98 @@
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15, Huntington) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: Edge DFE Tap1 (Huntington - 0 - max negative, 64 - zero, 127 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1  0x2
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP1 0x2
 /* enum: Edge DFE Tap2 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-31)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2  0x3
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP2 0x3
 /* enum: Edge DFE Tap3 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3  0x4
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP3 0x4
 /* enum: Edge DFE Tap4 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4  0x5
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP4 0x5
 /* enum: Edge DFE Tap5 (Huntington - 0 - max negative, 32 - zero, 63 - max
  * positive, Medford - 0-16)
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5  0x6
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_TAP5 0x6
 /* enum: Edge DFE DLEV (0-128 for Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV  0x7
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_EDFE_DLEV 0x7
 /* enum: Variable Gain Amplifier (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA  0x8
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_VGA 0x8
 /* enum: CTLE EQ Capacitor (0-15, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (0-7, Medford) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 /* enum: CTLE gain (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN  0xb
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_GAIN 0xb
 /* enum: CTLE pole (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE  0xc
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_POLE 0xc
 /* enum: CTLE peaking (0-31, Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK  0xd
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CTLE_PEAK 0xd
 /* enum: DFE Tap1 - even path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN  0xe
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_EVEN 0xe
 /* enum: DFE Tap1 - odd path (Medford2 - 6 bit signed (-29 - +29)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD  0xf
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP1_ODD 0xf
 /* enum: DFE Tap2 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x10
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x10
 /* enum: DFE Tap3 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x11
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x11
 /* enum: DFE Tap4 (Medford2 - 6 bit signed (-20 - +20)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x12
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x12
 /* enum: DFE Tap5 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x13
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x13
 /* enum: DFE Tap6 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6  0x14
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP6 0x14
 /* enum: DFE Tap7 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7  0x15
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP7 0x15
 /* enum: DFE Tap8 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8  0x16
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP8 0x16
 /* enum: DFE Tap9 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9  0x17
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP9 0x17
 /* enum: DFE Tap10 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10  0x18
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP10 0x18
 /* enum: DFE Tap11 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11  0x19
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP11 0x19
 /* enum: DFE Tap12 (Medford2 - 6 bit signed (-24 - +24)) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12  0x1a
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_DFE_TAP12 0x1a
 /* enum: I/Q clk offset (Medford2 - 4 bit signed (-5 - +5))) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF  0x1b
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_IQ_OFF 0x1b
 /* enum: Negative h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN  0x1c
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_EVEN 0x1c
 /* enum: Negative h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD  0x1d
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1N_OFF_ODD 0x1d
 /* enum: Positive h1 polarity data sampler offset calibration code, even path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN  0x1e
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_EVEN 0x1e
 /* enum: Positive h1 polarity data sampler offset calibration code, odd path
  * (Medford2 - 6 bit signed (-29 - +29)))
  */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD  0x1f
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_H1P_OFF_ODD 0x1f
 /* enum: CDR calibration loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT  0x20
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_PVT 0x20
 /* enum: CDR integral loop code (Medford2) */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG  0x21
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_CDR_INTEG 0x21
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_RXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 11
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_KR_TUNE_RXEQ_GET_OUT_RESERVED_LBN 12
@@ -12241,38 +12889,38 @@
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TX Amplitude (Huntington, Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV  0x0
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV 0x0
 /* enum: De-Emphasis Tap1 Magnitude (0-7) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE  0x1
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_MODE 0x1
 /* enum: De-Emphasis Tap1 Fine */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV  0x2
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_DTLEV 0x2
 /* enum: De-Emphasis Tap2 Magnitude (0-6) (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2  0x3
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2 0x3
 /* enum: De-Emphasis Tap2 Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV  0x4
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_D2TLEV 0x4
 /* enum: Pre-Emphasis Magnitude (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E  0x5
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_E 0x5
 /* enum: Pre-Emphasis Fine (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV  0x6
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_ETLEV 0x6
 /* enum: TX Slew Rate Coarse control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY  0x7
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_PREDRV_DLY 0x7
 /* enum: TX Slew Rate Fine control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET  0x8
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_SR_SET 0x8
 /* enum: TX Termination Impedance control (Huntington) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET  0x9
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_RT_SET 0x9
 /* enum: TX Amplitude Fine control (Medford) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE  0xa
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TX_LEV_FINE 0xa
 /* enum: Pre-shoot Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV  0xb
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_ADV 0xb
 /* enum: De-emphasis Tap (Medford, Medford2) */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY  0xc
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_TAP_DLY 0xc
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 3
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL  0x4 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_KR_TUNE_TXEQ_GET_OUT_LANE_ALL 0x4 /* enum */
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_LBN 11
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_RESERVED_WIDTH 5
 #define        MC_CMD_KR_TUNE_TXEQ_GET_OUT_PARAM_INITIAL_LBN 16
@@ -12345,9 +12993,12 @@
 /* Align the arguments to 32 bits */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_OFST 1
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_KR_TUNE_RSVD_LEN 3
-/* Port-relative lane to scan eye on */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_LANE_ABS_REL_WIDTH 1
 /* Scan duration / cycle count */
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_OFST 8
 #define       MC_CMD_KR_TUNE_START_EYE_PLOT_V2_IN_BER_LEN 4
@@ -12383,12 +13034,91 @@
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_KR_TUNE_RSVD_LEN 3
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_OFST 4
 #define       MC_CMD_KR_TUNE_READ_FOM_IN_LANE_LEN 4
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_LBN 0
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_NUM_WIDTH 8
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_LBN 31
+#define        MC_CMD_KR_TUNE_READ_FOM_IN_LANE_ABS_REL_WIDTH 1
 
 /* MC_CMD_KR_TUNE_READ_FOM_OUT msgresponse */
 #define    MC_CMD_KR_TUNE_READ_FOM_OUT_LEN 4
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_OFST 0
 #define       MC_CMD_KR_TUNE_READ_FOM_OUT_FOM_LEN 4
 
+/* MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_LEN 8
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_RUN_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_STOP 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_RUN_IN_START 0x1 /* enum */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN msgrequest */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LEN 28
+/* Requested operation */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_OP_LEN 1
+/* Align the arguments to 32 bits */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_OFST 1
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_KR_TUNE_RSVD_LEN 3
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_LANE_LEN 4
+/* Set INITIALIZE state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_INITIALIZE_LEN 4
+/* Set PRESET state */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_PRESET_LEN 4
+/* C(-1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CM1_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_HOLD 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_INCREMENT 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_REQ_DECREMENT 0x2 /* enum */
+/* C(0) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_C0_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) request */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_OFST 24
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN_CP1_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+
+/* MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT msgresponse */
+#define    MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_LEN 24
+/* C(-1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_OFST 0
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_STATUS_LEN 4
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_NOT_UPDATED 0x0 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_UPDATED 0x1 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MINIMUM 0x2 /* enum */
+#define          MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_STATUS_MAXIMUM 0x3 /* enum */
+/* C(0) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_OFST 4
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_OFST 8
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_STATUS_LEN 4
+/*            Enum values, see field(s): */
+/*               MC_CMD_KR_TUNE_LINK_TRAIN_CMD_IN/CM1 */
+/* C(-1) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_OFST 12
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CM1_VALUE_LEN 4
+/* C(0) value */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_OFST 16
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_C0_VALUE_LEN 4
+/* C(+1) status */
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_OFST 20
+#define       MC_CMD_KR_TUNE_LINK_TRAIN_CMD_OUT_CP1_VALUE_LEN 4
+
 
 /***********************************/
 /* MC_CMD_PCIE_TUNE
@@ -12406,22 +13136,22 @@
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_OFST 0
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_OP_LEN 1
 /* enum: Get current RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET  0x0
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_GET 0x0
 /* enum: Override RXEQ settings */
-#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET  0x1
+#define          MC_CMD_PCIE_TUNE_IN_RXEQ_SET 0x1
 /* enum: Get current TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET  0x2
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_GET 0x2
 /* enum: Override TX Driver settings */
-#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET  0x3
+#define          MC_CMD_PCIE_TUNE_IN_TXEQ_SET 0x3
 /* enum: Start PCIe Serdes Eye diagram plot on a given lane. */
-#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT  0x5
+#define          MC_CMD_PCIE_TUNE_IN_START_EYE_PLOT 0x5
 /* enum: Poll PCIe Serdes Eye diagram plot. Returns one row of BER data. The
  * caller should call this command repeatedly after starting eye plot, until no
  * more data is returned.
  */
-#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT  0x6
+#define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT 0x6
 /* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */
-#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE  0x7
+#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE 0x7
 /* Align the arguments to 32 bits */
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3
@@ -12455,46 +13185,46 @@
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: Attenuation (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT  0x0
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_ATT 0x0
 /* enum: CTLE Boost (0-15) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST  0x1
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_BOOST 0x1
 /* enum: DFE Tap1 (0 - max negative, 64 - zero, 127 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1  0x2
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP1 0x2
 /* enum: DFE Tap2 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2  0x3
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP2 0x3
 /* enum: DFE Tap3 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3  0x4
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP3 0x4
 /* enum: DFE Tap4 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4  0x5
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP4 0x5
 /* enum: DFE Tap5 (0 - max negative, 32 - zero, 63 - max positive) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5  0x6
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_TAP5 0x6
 /* enum: DFE DLev */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV  0x7
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_DFE_DLEV 0x7
 /* enum: Figure of Merit */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM  0x8
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_FOM 0x8
 /* enum: CTLE EQ Capacitor (HF Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC  0x9
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQC 0x9
 /* enum: CTLE EQ Resistor (DC Gain) */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES  0xa
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_CTLE_EQRES 0xa
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_LANE_WIDTH 5
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0  0x0 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1  0x1 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2  0x2 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3  0x3 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4  0x4 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5  0x5 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6  0x6 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7  0x7 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8  0x8 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9  0x9 /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10  0xa /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11  0xb /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12  0xc /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13  0xd /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14  0xe /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15  0xf /* enum */
-#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL  0x10 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_0 0x0 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_1 0x1 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_2 0x2 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_3 0x3 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_4 0x4 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_5 0x5 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_6 0x6 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_7 0x7 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_8 0x8 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_9 0x9 /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_10 0xa /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_11 0xb /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_12 0xc /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_13 0xd /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_14 0xe /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_15 0xf /* enum */
+#define          MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_LANE_ALL 0x10 /* enum */
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_LBN 13
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_PARAM_AUTOCAL_WIDTH 1
 #define        MC_CMD_PCIE_TUNE_RXEQ_GET_OUT_RESERVED_LBN 14
@@ -12558,15 +13288,15 @@
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_LBN 0
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_ID_WIDTH 8
 /* enum: TxMargin (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN  0x0
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXMARGIN 0x0
 /* enum: TxSwing (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING  0x1
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_TXSWING 0x1
 /* enum: De-emphasis coefficient C(-1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1  0x2
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CM1 0x2
 /* enum: De-emphasis coefficient C(0) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0  0x3
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_C0 0x3
 /* enum: De-emphasis coefficient C(+1) (PIPE) */
-#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1  0x4
+#define          MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_CP1 0x4
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_LBN 8
 #define        MC_CMD_PCIE_TUNE_TXEQ_GET_OUT_PARAM_LANE_WIDTH 4
 /*             Enum values, see field(s): */
@@ -12632,9 +13362,9 @@
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses */
-#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS  0x1
+#define          MC_CMD_LICENSING_IN_OP_GET_KEY_STATS 0x1
 
 /* MC_CMD_LICENSING_OUT msgresponse */
 #define    MC_CMD_LICENSING_OUT_LEN 28
@@ -12665,9 +13395,9 @@
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_OFST 24
 #define       MC_CMD_LICENSING_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_OUT_SELF_TEST_PASS 0x1
 
 
 /***********************************/
@@ -12687,11 +13417,11 @@
 /* enum: re-read and apply licenses after a license key partition update; note
  * that this operation returns a zero-length response
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
+#define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE 0x0
 /* enum: report counts of installed licenses Returns EAGAIN if license
  * processing (updating) has been started but not yet completed.
  */
-#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
+#define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE 0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
 #define    MC_CMD_LICENSING_V3_OUT_LEN 88
@@ -12718,9 +13448,9 @@
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_OFST 20
 #define       MC_CMD_LICENSING_V3_OUT_LICENSING_SELF_TEST_LEN 4
 /* enum: licensing subsystem self-test failed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL  0x0
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_FAIL 0x0
 /* enum: licensing subsystem self-test passed */
-#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS  0x1
+#define          MC_CMD_LICENSING_V3_OUT_SELF_TEST_PASS 0x1
 /* bitmask of licensed applications */
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_OFST 24
 #define       MC_CMD_LICENSING_V3_OUT_LICENSED_APPS_LEN 8
@@ -12806,9 +13536,9 @@
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
@@ -12837,9 +13567,9 @@
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_OFST 0
 #define       MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_STATE_LEN 4
 /* enum: no (or invalid) license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED  0x0
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_NOT_LICENSED 0x0
 /* enum: a valid license is present for the application */
-#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED  0x1
+#define          MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LICENSED 0x1
 
 
 /***********************************/
@@ -12891,9 +13621,9 @@
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_OFST 4
 #define       MC_CMD_LICENSED_APP_OP_IN_OP_LEN 4
 /* enum: validate application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE  0x0
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_VALIDATE 0x0
 /* enum: mask application */
-#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK  0x1
+#define          MC_CMD_LICENSED_APP_OP_IN_OP_MASK 0x1
 /* arguments specific to this particular operation */
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_OFST 8
 #define       MC_CMD_LICENSED_APP_OP_IN_ARGS_LEN 4
@@ -12984,9 +13714,9 @@
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_LEN 4
 /* enum: expiry units are accounting units */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC 0x0
 /* enum: expiry units are calendar days */
-#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
+#define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS 0x1
 /* base MAC address of the NIC stored in NVRAM (note that this is a constant
  * value for a given NIC regardless which function is calling, effectively this
  * is PF0 base MAC address)
@@ -13019,9 +13749,9 @@
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_OFST 8
 #define       MC_CMD_LICENSED_V3_MASK_FEATURES_IN_FLAG_LEN 4
 /* enum: turn the features off */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF  0x0
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_OFF 0x0
 /* enum: turn the features back on */
-#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON  0x1
+#define          MC_CMD_LICENSED_V3_MASK_FEATURES_IN_ON 0x1
 
 /* MC_CMD_LICENSED_V3_MASK_FEATURES_OUT msgresponse */
 #define    MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0
@@ -13048,15 +13778,15 @@
  * This is an asynchronous operation owing to the time taken to validate an
  * ECDSA license
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_SET  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_SET 0x0
 /* enum: clear the license immediately rather than waiting for the next power
  * cycle
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR 0x1
 /* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET
  * operation
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS 0x2
 
 /* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */
 #define    MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164
@@ -13082,13 +13812,13 @@
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_LEN 4
 /* enum: finished validating and installing license */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK  0x0
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK 0x0
 /* enum: license validation and installation in progress */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS  0x1
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS 0x1
 /* enum: licensing error. More specific error messages are not provided to
  * avoid exposing details of the licensing system to the client
  */
-#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR  0x2
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR 0x2
 /* bitmask of licensed features */
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4
 #define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8
@@ -13124,9 +13854,9 @@
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
@@ -13146,7 +13876,7 @@
  */
 #define MC_CMD_GET_PORT_SNIFF_CONFIG 0xf8
 
-#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xf8_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_PORT_SNIFF_CONFIG_IN_LEN 0
@@ -13167,9 +13897,9 @@
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
@@ -13193,12 +13923,12 @@
 /* enum: Per-TXQ enable for multicast UDP destination lookup for possible
  * internal loopback. (ENTITY is a queue handle, VALUE is a single boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN  0x0
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_TXQ_MCAST_UDP_DST_LOOKUP_EN 0x0
 /* enum: Per-v-adaptor enable for suppression of self-transmissions on the
  * internal loopback path. (ENTITY is an EVB_PORT_ID, VALUE is a single
  * boolean.)
  */
-#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX  0x1
+#define          MC_CMD_SET_PARSER_DISP_CONFIG_IN_VADAPTOR_SUPPRESS_SELF_TX 0x1
 /* handle for the entity to update: queue handle, EVB port ID, etc. depending
  * on the type of configuration setting being changed
  */
@@ -13278,9 +14008,9 @@
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_OFST 8
 #define       MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_LEN 4
 /* enum: receive to just the specified queue */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_SIMPLE 0x0
 /* enum: receive to multiple queues using RSS context */
-#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS  0x1
+#define          MC_CMD_SET_TX_PORT_SNIFF_CONFIG_IN_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) as returned by MC_CMD_RSS_CONTEXT_ALLOC. Note
  * that these handles should be considered opaque to the host, although a value
  * of 0xFFFFFFFF is guaranteed never to be a valid handle.
@@ -13300,7 +14030,7 @@
  */
 #define MC_CMD_GET_TX_PORT_SNIFF_CONFIG 0xfc
 
-#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xfc_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_TX_PORT_SNIFF_CONFIG_IN_LEN 0
@@ -13319,9 +14049,9 @@
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_OFST 8
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_LEN 4
 /* enum: receiving to just the specified queue */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE  0x0
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_SIMPLE 0x0
 /* enum: receiving to multiple queues using RSS context */
-#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS  0x1
+#define          MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_MODE_RSS 0x1
 /* RSS context (for RX_MODE_RSS) */
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_OFST 12
 #define       MC_CMD_GET_TX_PORT_SNIFF_CONFIG_OUT_RX_CONTEXT_LEN 4
@@ -13431,9 +14161,9 @@
 #define    MC_CMD_READ_ATB_IN_LEN 16
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_OFST 0
 #define       MC_CMD_READ_ATB_IN_SIGNAL_BUS_LEN 4
-#define          MC_CMD_READ_ATB_IN_BUS_CCOM  0x0 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CKR  0x1 /* enum */
-#define          MC_CMD_READ_ATB_IN_BUS_CPCIE  0x8 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CCOM 0x0 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CKR 0x1 /* enum */
+#define          MC_CMD_READ_ATB_IN_BUS_CPCIE 0x8 /* enum */
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_OFST 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_EN_BITNO_LEN 4
 #define       MC_CMD_READ_ATB_IN_SIGNAL_SEL_OFST 8
@@ -13503,46 +14233,46 @@
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_PF_WIDTH 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_LBN 16
 #define        MC_CMD_PRIVILEGE_MASK_IN_FUNCTION_VF_WIDTH 16
-#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL  0xffff /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_VF_NULL 0xffff /* enum */
 /* New privilege mask to be set. The mask will only be changed if the MSB is
  * set to 1.
  */
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_OFST 4
 #define       MC_CMD_PRIVILEGE_MASK_IN_NEW_MASK_LEN 4
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN             0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK              0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD            0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP               0x8 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS  0x10 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ADMIN 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_LINK 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ONLOAD 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PTP 0x8 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE_FILTERS 0x10 /* enum */
 /* enum: Deprecated. Equivalent to MAC_SPOOFING_TX combined with CHANGE_MAC. */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING      0x20
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST           0x40 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST         0x80 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST         0x100 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST     0x200 /* enum */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS       0x400 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING 0x20
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNICAST 0x40 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MULTICAST 0x80 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_BROADCAST 0x100 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_ALL_MULTICAST 0x200 /* enum */
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_PROMISCUOUS 0x400 /* enum */
 /* enum: Allows to set the TX packets' source MAC address to any arbitrary MAC
  * adress.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX   0x800
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_MAC_SPOOFING_TX 0x800
 /* enum: Privilege that allows a Function to change the MAC address configured
  * in its associated vAdapter/vPort.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC        0x1000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_CHANGE_MAC 0x1000
 /* enum: Privilege that allows a Function to install filters that specify VLANs
  * that are not in the permit list for the associated vPort. This privilege is
  * primarily to support ESX where vPorts are created that restrict traffic to
  * only a set of permitted VLANs. See the vPort flag FLAG_VLAN_RESTRICT.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN  0x2000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_UNRESTRICTED_VLAN 0x2000
 /* enum: Privilege for insecure commands. Commands that belong to this group
  * are not permitted on secure adapters regardless of the privilege mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE          0x4000
+#define          MC_CMD_PRIVILEGE_MASK_IN_GRP_INSECURE 0x4000
 /* enum: Set this bit to indicate that a new privilege mask is to be set,
  * otherwise the command will only read the existing mask.
  */
-#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE             0x80000000
+#define          MC_CMD_PRIVILEGE_MASK_IN_DO_CHANGE 0x80000000
 
 /* MC_CMD_PRIVILEGE_MASK_OUT msgresponse */
 #define    MC_CMD_PRIVILEGE_MASK_OUT_LEN 4
@@ -13573,12 +14303,12 @@
 /* New link state mode to be set */
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_OFST 4
 #define       MC_CMD_LINK_STATE_MODE_IN_NEW_MODE_LEN 4
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO       0x0 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP         0x1 /* enum */
-#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN       0x2 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_AUTO 0x0 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_UP 0x1 /* enum */
+#define          MC_CMD_LINK_STATE_MODE_IN_LINK_STATE_DOWN 0x2 /* enum */
 /* enum: Use this value to just read the existing setting without modifying it.
  */
-#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE         0xffffffff
+#define          MC_CMD_LINK_STATE_MODE_IN_DO_NOT_CHANGE 0xffffffff
 
 /* MC_CMD_LINK_STATE_MODE_OUT msgresponse */
 #define    MC_CMD_LINK_STATE_MODE_OUT_LEN 4
@@ -13674,12 +14404,12 @@
 /* The groups of functions to have their privilege masks modified. */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_OFST 0
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FN_GROUP_LEN 4
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE       0x0 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL        0x1 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY   0x2 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY   0x3 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF  0x4 /* enum */
-#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE        0x5 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_NONE 0x0 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ALL 0x1 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_PFS_ONLY 0x2 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_ONLY 0x3 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_VFS_OF_PF 0x4 /* enum */
+#define          MC_CMD_PRIVILEGE_MODIFY_IN_ONE 0x5 /* enum */
 /* For VFS_OF_PF specify the PF, for ONE specify the target function */
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_OFST 4
 #define       MC_CMD_PRIVILEGE_MODIFY_IN_FUNCTION_LEN 4
@@ -13782,11 +14512,11 @@
 /* Sector type */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_OFST 0
 #define       MC_CMD_XPM_READ_SECTOR_OUT_TYPE_LEN 4
-#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK            0x0 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128   0x1 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256   0x2 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA      0x3 /* enum */
-#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID          0xff /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_BLANK 0x0 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_128 0x1 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_KEY_256 0x2 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_CRYPTO_DATA 0x3 /* enum */
+#define          MC_CMD_XPM_READ_SECTOR_OUT_INVALID 0xff /* enum */
 /* Sector data */
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_OFST 4
 #define       MC_CMD_XPM_READ_SECTOR_OUT_DATA_LEN 1
@@ -14001,18 +14731,18 @@
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_OFST 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LEN 2
 /* enum: the IANA allocated UDP port for VXLAN */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT  0x12b5
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT 0x12b5
 /* enum: the IANA allocated UDP port for Geneve */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT  0x17c1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT 0x17c1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LBN 0
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_WIDTH 16
 /* tunnel encapsulation protocol (only those named below are supported) */
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_OFST 2
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LEN 2
 /* enum: This port will be used for VXLAN on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN  0x0
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN 0x0
 /* enum: This port will be used for Geneve on both IPv4 and IPv6 */
-#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE  0x1
+#define          TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE 0x1
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LBN 16
 #define       TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_WIDTH 16
 
@@ -14180,10 +14910,10 @@
 /* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
 #define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_LEN 4
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
-#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS 0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START 0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START 0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF 0x3 /* enum */
 
 /* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
 #define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
@@ -14269,7 +14999,7 @@
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d
 
-#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20
@@ -14281,9 +15011,9 @@
 /* Will the common pool be used as TX_vFIFO_ULL (1) */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_LEN 4
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED       0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED 0x1 /* enum */
 /* enum: Using this interface without TX_vFIFO_ULL is not supported for now */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED      0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED 0x0
 /* Number of buffers to reserve for the common pool */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_LEN 4
@@ -14291,20 +15021,20 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_LEN 4
 /* enum: Extracts information from function */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1
 /* Network port or RX Engine to which the common pool connects. */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_LEN 4
 /* enum: Extracts information from function */
-/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3          0x3 /* enum */
+/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1 0x5
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4
@@ -14320,7 +15050,7 @@
  */
 #define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e
 
-#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20
@@ -14332,20 +15062,20 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_LEN 4
 /* enum: Extracts information from common pool */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE   -0x1
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0          0x0 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1          0x1 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2          0x2 /* enum */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3          0x3 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0 0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1 0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2 0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3 0x3 /* enum */
 /* enum: To enable Switch loopback with Rx engine 0 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0     0x4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0 0x4
 /* enum: To enable Switch loopback with Rx engine 1 */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1     0x5
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1 0x5
 /* Minimum number of buffers that the pool must have */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_LEN 4
 /* enum: Do not check the space available */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM     0x0
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM 0x0
 /* Will the vFIFO be used as TX_vFIFO_ULL */
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_LEN 4
@@ -14353,7 +15083,7 @@
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16
 #define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_LEN 4
 /* enum: Search for the lowest unused priority */
-#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE  -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE -0x1
 
 /* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */
 #define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8
@@ -14372,7 +15102,7 @@
  */
 #define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f
 
-#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */
 #define    MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4
@@ -14391,7 +15121,7 @@
  */
 #define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121
 
-#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */
 #define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4
@@ -14410,7 +15140,7 @@
  */
 #define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124
 
-#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */
 #define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0

From 7f61e6c6279bcb340489ab6b781835da700f1c4b Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 14 Mar 2018 14:21:26 +0000
Subject: [PATCH 0911/1678] sfc: support FEC configuration through ethtool

As well as 'auto' and the forced 'off', 'rs' and 'baser' states, we also
 handle combinations of settings (since the fecparam->fec field is a
 bitmask), where auto|rs and auto|baser specify a preferred FEC mode but
 will fall back to the other if the cable or link partner doesn't support
 it.  rs|baser (with or without auto bit) means prefer FEC even where
 auto wouldn't use it, but let FW choose which encoding to use.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ethtool.c    |  32 ++++++
 drivers/net/ethernet/sfc/mcdi_port.c  | 150 ++++++++++++++++++++++++++
 drivers/net/ethernet/sfc/net_driver.h |   8 ++
 3 files changed, 190 insertions(+)

diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 64049e71e6e7..bb1c80d48d12 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1488,6 +1488,36 @@ static int efx_ethtool_get_module_info(struct net_device *net_dev,
 	return ret;
 }
 
+static int efx_ethtool_get_fecparam(struct net_device *net_dev,
+				    struct ethtool_fecparam *fecparam)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	if (!efx->phy_op || !efx->phy_op->get_fecparam)
+		return -EOPNOTSUPP;
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->get_fecparam(efx, fecparam);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+static int efx_ethtool_set_fecparam(struct net_device *net_dev,
+				    struct ethtool_fecparam *fecparam)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	if (!efx->phy_op || !efx->phy_op->get_fecparam)
+		return -EOPNOTSUPP;
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->set_fecparam(efx, fecparam);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
 const struct ethtool_ops efx_ethtool_ops = {
 	.get_drvinfo		= efx_ethtool_get_drvinfo,
 	.get_regs_len		= efx_ethtool_get_regs_len,
@@ -1523,4 +1553,6 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
 	.get_link_ksettings	= efx_ethtool_get_link_ksettings,
 	.set_link_ksettings	= efx_ethtool_set_link_ksettings,
+	.get_fecparam		= efx_ethtool_get_fecparam,
+	.set_fecparam		= efx_ethtool_set_fecparam,
 };
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index ce8aabf9091e..9382bb0b4d5a 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -352,6 +352,64 @@ static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
 	link_state->speed = speed;
 }
 
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits.  Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+	u32 ret = 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_OFF)
+		return 0;
+
+	if (ethtool_cap & ETHTOOL_FEC_AUTO)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_RS)
+		ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+	if (ethtool_cap & ETHTOOL_FEC_BASER)
+		ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+		       (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+		       (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+	return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi.  There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO.  This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+	bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+	     rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+	     baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+			    : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+	     baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+				: caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+	if (!baser && !rs)
+		return ETHTOOL_FEC_OFF;
+	return (rs_req ? ETHTOOL_FEC_RS : 0) |
+	       (baser_req ? ETHTOOL_FEC_BASER : 0) |
+	       (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
 static int efx_mcdi_phy_probe(struct efx_nic *efx)
 {
 	struct efx_mcdi_phy_data *phy_data;
@@ -438,6 +496,13 @@ static int efx_mcdi_phy_probe(struct efx_nic *efx)
 		MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
 		MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
 
+	/* Record the initial FEC configuration (or nearest approximation
+	 * representable in the ethtool configuration space)
+	 */
+	efx->fec_config = mcdi_fec_caps_to_ethtool(caps,
+						   efx->link_state.speed == 25000 ||
+						   efx->link_state.speed == 50000);
+
 	/* Default to Autonegotiated flow control if the PHY supports it */
 	efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
 	if (phy_data->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
@@ -458,6 +523,8 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
 		    ethtool_linkset_to_mcdi_cap(efx->link_advertising) :
 		    phy_cfg->forced_cap);
 
+	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
 	return efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 				 efx->loopback_mode, 0);
 }
@@ -584,6 +651,8 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 		}
 	}
 
+	caps |= ethtool_fec_caps_to_mcdi(efx->fec_config);
+
 	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
 			       efx->loopback_mode, 0);
 	if (rc)
@@ -599,6 +668,85 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
 	return 0;
 }
 
+static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+				     struct ethtool_fecparam *fec)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+	u32 caps, active, speed; /* MCDI format */
+	bool is_25g = false;
+	size_t outlen;
+	int rc;
+
+	BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+	rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+			  outbuf, sizeof(outbuf), &outlen);
+	if (rc)
+		return rc;
+	if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+		return -EOPNOTSUPP;
+
+	/* behaviour for 25G/50G links depends on 25G BASER bit */
+	speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+	is_25g = speed == 25000 || speed == 50000;
+
+	caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+	fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+	/* BASER is never supported on 100G */
+	if (speed == 100000)
+		fec->fec &= ~ETHTOOL_FEC_BASER;
+
+	active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+	switch (active) {
+	case MC_CMD_FEC_NONE:
+		fec->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	case MC_CMD_FEC_BASER:
+		fec->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case MC_CMD_FEC_RS:
+		fec->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		netif_warn(efx, hw, efx->net_dev,
+			   "Firmware reports unrecognised FEC_TYPE %u\n",
+			   active);
+		/* We don't know what firmware has picked.  AUTO is as good a
+		 * "can't happen" value as any other.
+		 */
+		fec->active_fec = ETHTOOL_FEC_AUTO;
+		break;
+	}
+
+	return 0;
+}
+
+static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
+				     const struct ethtool_fecparam *fec)
+{
+	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	u32 caps;
+	int rc;
+
+	/* Work out what efx_mcdi_phy_set_link_ksettings() would produce from
+	 * saved advertising bits
+	 */
+	if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, efx->link_advertising))
+		caps = (ethtool_linkset_to_mcdi_cap(efx->link_advertising) |
+			1 << MC_CMD_PHY_CAP_AN_LBN);
+	else
+		caps = phy_cfg->forced_cap;
+
+	caps |= ethtool_fec_caps_to_mcdi(fec->fec);
+	rc = efx_mcdi_set_link(efx, caps, efx_get_mcdi_phy_flags(efx),
+			       efx->loopback_mode, 0);
+	if (rc)
+		return rc;
+
+	/* Record the new FEC setting for subsequent set_link calls */
+	efx->fec_config = fec->fec;
+	return 0;
+}
+
 static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
@@ -977,6 +1125,8 @@ static const struct efx_phy_operations efx_mcdi_phy_ops = {
 	.remove		= efx_mcdi_phy_remove,
 	.get_link_ksettings = efx_mcdi_phy_get_link_ksettings,
 	.set_link_ksettings = efx_mcdi_phy_set_link_ksettings,
+	.get_fecparam	= efx_mcdi_phy_get_fecparam,
+	.set_fecparam	= efx_mcdi_phy_set_fecparam,
 	.test_alive	= efx_mcdi_phy_test_alive,
 	.run_tests	= efx_mcdi_phy_run_tests,
 	.test_name	= efx_mcdi_phy_test_name,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 203d64c88de5..2453f3849e72 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -627,6 +627,8 @@ static inline bool efx_link_state_equal(const struct efx_link_state *left,
  *	Serialised by the mac_lock.
  * @get_link_ksettings: Get ethtool settings. Serialised by the mac_lock.
  * @set_link_ksettings: Set ethtool settings. Serialised by the mac_lock.
+ * @get_fecparam: Get Forward Error Correction settings. Serialised by mac_lock.
+ * @set_fecparam: Set Forward Error Correction settings. Serialised by mac_lock.
  * @set_npage_adv: Set abilities advertised in (Extended) Next Page
  *	(only needed where AN bit is set in mmds)
  * @test_alive: Test that PHY is 'alive' (online)
@@ -645,6 +647,9 @@ struct efx_phy_operations {
 				   struct ethtool_link_ksettings *cmd);
 	int (*set_link_ksettings)(struct efx_nic *efx,
 				  const struct ethtool_link_ksettings *cmd);
+	int (*get_fecparam)(struct efx_nic *efx, struct ethtool_fecparam *fec);
+	int (*set_fecparam)(struct efx_nic *efx,
+			    const struct ethtool_fecparam *fec);
 	void (*set_npage_adv) (struct efx_nic *efx, u32);
 	int (*test_alive) (struct efx_nic *efx);
 	const char *(*test_name) (struct efx_nic *efx, unsigned int index);
@@ -820,6 +825,8 @@ struct efx_rss_context {
  * @mdio_bus: PHY MDIO bus ID (only used by Siena)
  * @phy_mode: PHY operating mode. Serialised by @mac_lock.
  * @link_advertising: Autonegotiation advertising flags
+ * @fec_config: Forward Error Correction configuration flags.  For bit positions
+ *	see &enum ethtool_fec_config_bits.
  * @link_state: Current state of the link
  * @n_link_state_changes: Number of times the link has changed state
  * @unicast_filter: Flag for Falcon-arch simple unicast filter.
@@ -972,6 +979,7 @@ struct efx_nic {
 	enum efx_phy_mode phy_mode;
 
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(link_advertising);
+	u32 fec_config;
 	struct efx_link_state link_state;
 	unsigned int n_link_state_changes;
 

From 59655a5b6c837e392e873629591069c898585592 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 14 Mar 2018 11:23:40 +0800
Subject: [PATCH 0912/1678] tuntap: XDP_TX can use native XDP

Now we have ndo_xdp_xmit, switch to use it instead of the slow generic
XDP TX routine. XDP_TX on TAP gets ~20% improvements from ~1.5Mpps to
~1.8Mpps on 2.60GHz Core(TM) i7-5600U.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 475088f947bb..baeafa004463 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1613,7 +1613,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	unsigned int delta = 0;
 	char *buf;
 	size_t copied;
-	bool xdp_xmit = false;
 	int err, pad = TUN_RX_PAD;
 
 	rcu_read_lock();
@@ -1671,8 +1670,14 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 			preempt_enable();
 			return NULL;
 		case XDP_TX:
-			xdp_xmit = true;
-			/* fall through */
+			get_page(alloc_frag->page);
+			alloc_frag->offset += buflen;
+			if (tun_xdp_xmit(tun->dev, &xdp))
+				goto err_redirect;
+			tun_xdp_flush(tun->dev);
+			rcu_read_unlock();
+			preempt_enable();
+			return NULL;
 		case XDP_PASS:
 			delta = orig_data - xdp.data;
 			break;
@@ -1699,14 +1704,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 	get_page(alloc_frag->page);
 	alloc_frag->offset += buflen;
 
-	if (xdp_xmit) {
-		skb->dev = tun->dev;
-		generic_xdp_tx(skb, xdp_prog);
-		rcu_read_unlock();
-		preempt_enable();
-		return NULL;
-	}
-
 	rcu_read_unlock();
 	preempt_enable();
 

From 5677629a5a091a62512427534404b509fe6c615d Mon Sep 17 00:00:00 2001
From: Veerasenareddy Burru <veerasenareddy.burru@cavium.com>
Date: Tue, 13 Mar 2018 22:04:45 -0700
Subject: [PATCH 0913/1678] liquidio: Add support for liquidio 10GBase-T NIC

Added ethtool changes to show port type as TP (Twisted Pair) for
10GBASE-T ports. Same driver and firmware works for liquidio NIC with
SFP+ ports or TP ports.

Signed-off-by: Veerasenareddy Burru <veerasenareddy.burru@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/cavium/liquidio/lio_ethtool.c    | 24 ++++++++++++++-----
 .../cavium/liquidio/liquidio_common.h         | 12 ++++++++--
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index a63ddf07f168..550ac29682a5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -232,10 +232,16 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
 	linfo = &lio->linfo;
 
-	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
+	switch (linfo->link.s.phy_type) {
+	case LIO_PHY_PORT_TP:
+		ecmd->base.port = PORT_TP;
+		supported = (SUPPORTED_10000baseT_Full |
+			     SUPPORTED_TP | SUPPORTED_Pause);
+		advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
+		ecmd->base.autoneg = AUTONEG_DISABLE;
+		break;
+
+	case LIO_PHY_PORT_FIBRE:
 		ecmd->base.port = PORT_FIBRE;
 
 		if (linfo->link.s.speed == SPEED_10000) {
@@ -245,12 +251,18 @@ static int lio_get_link_ksettings(struct net_device *netdev,
 
 		supported |= SUPPORTED_FIBRE | SUPPORTED_Pause;
 		advertising |= ADVERTISED_Pause;
+		ecmd->base.autoneg = AUTONEG_DISABLE;
+		break;
+	}
+
+	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XLAUI ||
+	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
 		ethtool_convert_legacy_u32_to_link_mode(
 			ecmd->link_modes.supported, supported);
 		ethtool_convert_legacy_u32_to_link_mode(
 			ecmd->link_modes.advertising, advertising);
-		ecmd->base.autoneg = AUTONEG_DISABLE;
-
 	} else {
 		dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
 			linfo->link.s.if_mode);
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 4f72ed1ad15a..ecc16824fc1b 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -676,9 +676,11 @@ union oct_link_status {
 		u64 if_mode:5;
 		u64 pause:1;
 		u64 flashing:1;
-		u64 reserved:15;
+		u64 phy_type:5;
+		u64 reserved:10;
 #else
-		u64 reserved:15;
+		u64 reserved:10;
+		u64 phy_type:5;
 		u64 flashing:1;
 		u64 pause:1;
 		u64 if_mode:5;
@@ -691,6 +693,12 @@ union oct_link_status {
 	} s;
 };
 
+enum lio_phy_type {
+	LIO_PHY_PORT_TP = 0x0,
+	LIO_PHY_PORT_FIBRE = 0x1,
+	LIO_PHY_PORT_UNKNOWN,
+};
+
 /** The txpciq info passed to host from the firmware */
 
 union oct_txpciq {

From c9f4c6cf53bfafb639386a4c094929f13f573e04 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Wed, 14 Mar 2018 11:01:00 +0100
Subject: [PATCH 0914/1678] net/smc: pay attention to MAX_ORDER for CQ entries

smc allocates a certain number of CQ entries for used RoCE devices. For
mlx5 devices the chosen constant number results in a large allocation
causing this warning:

[13355.124656] WARNING: CPU: 3 PID: 16535 at mm/page_alloc.c:3883 __alloc_pages_nodemask+0x2be/0x10c0
[13355.124657] Modules linked in: smc_diag(O) smc(O) xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc ip6table_filter ip6_tables iptable_filter mlx5_ib ib_core sunrpc mlx5_core s390_trng rng_core ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha256_s390 sha1_s390 sha_common ptp pps_core eadm_sch dm_multipath dm_mod vhost_net tun vhost tap sch_fq_codel kvm ip_tables x_tables autofs4 [last unloaded: smc]
[13355.124672] CPU: 3 PID: 16535 Comm: kworker/3:0 Tainted: G           O    4.14.0uschi #1
[13355.124673] Hardware name: IBM 3906 M04 704 (LPAR)
[13355.124675] Workqueue: events smc_listen_work [smc]
[13355.124677] task: 00000000e2f22100 task.stack: 0000000084720000
[13355.124678] Krnl PSW : 0704c00180000000 000000000029da76 (__alloc_pages_nodemask+0x2be/0x10c0)
[13355.124681]            R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
[13355.124682] Krnl GPRS: 0000000000000000 00550e00014080c0 0000000000000000 0000000000000001
[13355.124684]            000000000029d8b6 00000000f3bfd710 0000000000000000 00000000014080c0
[13355.124685]            0000000000000009 00000000ec277a00 0000000000200000 0000000000000000
[13355.124686]            0000000000000000 00000000000001ff 000000000029d8b6 0000000084723720
[13355.124708] Krnl Code: 000000000029da6a: a7110200		tmll	%r1,512
                          000000000029da6e: a774ff29		brc	7,29d8c0
                         #000000000029da72: a7f40001		brc	15,29da74
                         >000000000029da76: a7f4ff25		brc	15,29d8c0
                          000000000029da7a: a7380000		lhi	%r3,0
                          000000000029da7e: a7f4fef1		brc	15,29d860
                          000000000029da82: 5820f0c4		l	%r2,196(%r15)
                          000000000029da86: a53e0048		llilh	%r3,72
[13355.124720] Call Trace:
[13355.124722] ([<000000000029d8b6>] __alloc_pages_nodemask+0xfe/0x10c0)
[13355.124724]  [<000000000013bd1e>] s390_dma_alloc+0x6e/0x148
[13355.124733]  [<000003ff802eeba6>] mlx5_dma_zalloc_coherent_node+0x8e/0xe0 [mlx5_core]
[13355.124740]  [<000003ff802eee18>] mlx5_buf_alloc_node+0x70/0x108 [mlx5_core]
[13355.124744]  [<000003ff804eb410>] mlx5_ib_create_cq+0x558/0x898 [mlx5_ib]
[13355.124749]  [<000003ff80407d40>] ib_create_cq+0x48/0x88 [ib_core]
[13355.124751]  [<000003ff80109fba>] smc_ib_setup_per_ibdev+0x52/0x118 [smc]
[13355.124753]  [<000003ff8010bcb6>] smc_conn_create+0x65e/0x728 [smc]
[13355.124755]  [<000003ff801081a2>] smc_listen_work+0x2d2/0x540 [smc]
[13355.124756]  [<0000000000162c66>] process_one_work+0x1be/0x440
[13355.124758]  [<0000000000162f40>] worker_thread+0x58/0x458
[13355.124759]  [<0000000000169e7e>] kthread+0x14e/0x168
[13355.124760]  [<00000000009ce8be>] kernel_thread_starter+0x6/0xc
[13355.124762]  [<00000000009ce8b8>] kernel_thread_starter+0x0/0xc
[13355.124762] Last Breaking-Event-Address:
[13355.124764]  [<000000000029da72>] __alloc_pages_nodemask+0x2ba/0x10c0
[13355.124764] ---[ end trace 34be38b581c0b585 ]---

This patch reduces the smc constant for the maximum number of allocated
completion queue entries SMC_MAX_CQE by 2 to avoid high round up values
in the mlx5 code, and reduces the number of allocated completion queue
entries even more, if the final allocation for an mlx5 device hits the
MAX_ORDER limit.

Reported-by: Ihnken Menssen <menssen@de.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ib.c | 10 +++++++++-
 net/smc/smc_wr.h |  1 -
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2a8957bd6d38..26df554f7588 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -23,6 +23,8 @@
 #include "smc_wr.h"
 #include "smc.h"
 
+#define SMC_MAX_CQE 32766	/* max. # of completion queue elements */
+
 #define SMC_QP_MIN_RNR_TIMER		5
 #define SMC_QP_TIMEOUT			15 /* 4096 * 2 ** timeout usec */
 #define SMC_QP_RETRY_CNT			7 /* 7: infinite */
@@ -438,9 +440,15 @@ out:
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 {
 	struct ib_cq_init_attr cqattr =	{
-		.cqe = SMC_WR_MAX_CQE, .comp_vector = 0 };
+		.cqe = SMC_MAX_CQE, .comp_vector = 0 };
+	int cqe_size_order, smc_order;
 	long rc;
 
+	/* the calculated number of cq entries fits to mlx5 cq allocation */
+	cqe_size_order = cache_line_size() == 128 ? 7 : 6;
+	smc_order = MAX_ORDER - cqe_size_order - 1;
+	if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
+		cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
 	smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
 					      smc_wr_tx_cq_handler, NULL,
 					      smcibdev, &cqattr);
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index ef0c3494c9cb..210bec3c3ebe 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -19,7 +19,6 @@
 #include "smc.h"
 #include "smc_core.h"
 
-#define SMC_WR_MAX_CQE 32768	/* max. # of completion queue elements */
 #define SMC_WR_BUF_CNT 16	/* # of ctrl buffers per link */
 
 #define SMC_WR_TX_WAIT_FREE_SLOT_TIME	(10 * HZ)

From 268ffcc4ebfc8b10c1502357dfb82ce6af0770ac Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Wed, 14 Mar 2018 11:01:01 +0100
Subject: [PATCH 0915/1678] net/smc: free link group without pending free_work
 only

Make sure there is no pending or running free_work worker for the link
group when freeing the link group.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 1 +
 net/smc/smc_core.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2c6f4e0a9f3d..649489f825a5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1477,6 +1477,7 @@ static void __exit smc_exit(void)
 	spin_unlock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
 		list_del_init(&lgr->list);
+		cancel_delayed_work_sync(&lgr->free_work);
 		smc_lgr_free(lgr); /* free link group */
 	}
 	static_branch_disable(&tcp_have_smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f76f60e463cb..ec6189fe2a48 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -140,7 +140,8 @@ static void smc_lgr_free_work(struct work_struct *work)
 	list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
 	spin_unlock_bh(&smc_lgr_list.lock);
-	smc_lgr_free(lgr);
+	if (!delayed_work_pending(&lgr->free_work))
+		smc_lgr_free(lgr);
 }
 
 /* create a new SMC link group */

From 97cdbc4213df101228564b8d27090cc1f9a26332 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Wed, 14 Mar 2018 11:01:02 +0100
Subject: [PATCH 0916/1678] net/smc: schedule free_work when link group is
 terminated

The free_work worker must be scheduled when the link group is
abnormally terminated.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index ec6189fe2a48..f44f6803f7ff 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -32,6 +32,17 @@
 
 static u32 smc_lgr_num;			/* unique link group number */
 
+static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
+{
+	/* client link group creation always follows the server link group
+	 * creation. For client use a somewhat higher removal delay time,
+	 * otherwise there is a risk of out-of-sync link groups.
+	 */
+	mod_delayed_work(system_wq, &lgr->free_work,
+			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+						 SMC_LGR_FREE_DELAY_SERV);
+}
+
 /* Register connection's alert token in our lookup structure.
  * To use rbtrees we have to implement our own insert core.
  * Requires @conns_lock
@@ -111,13 +122,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 	write_unlock_bh(&lgr->conns_lock);
 	if (!reduced || lgr->conns_num)
 		return;
-	/* client link group creation always follows the server link group
-	 * creation. For client use a somewhat higher removal delay time,
-	 * otherwise there is a risk of out-of-sync link groups.
-	 */
-	mod_delayed_work(system_wq, &lgr->free_work,
-			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-						 SMC_LGR_FREE_DELAY_SERV);
+	smc_lgr_schedule_free_work(lgr);
 }
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -344,6 +349,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
 	}
 	write_unlock_bh(&lgr->conns_lock);
 	wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+	smc_lgr_schedule_free_work(lgr);
 }
 
 /* Determine vlan of internal TCP socket.

From 1b1e0bc9947427ae58bbe7de0ce9cfd591b589b9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:30 +0800
Subject: [PATCH 0917/1678] sctp: add refcnt support for sh_key

With refcnt support for sh_key, chunks auth sh_keys can be decided
before enqueuing it. Changing the active key later will not affect
the chunks already enqueued.

Furthermore, this is necessary when adding the support for authinfo
for sendmsg in next patch.

Note that struct sctp_chunk can't be grown due to that performance
drop issue on slow cpu, so it just reuses head_skb memory for shkey
in sctp_chunk.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/auth.h    |  9 ++--
 include/net/sctp/sm.h      |  3 +-
 include/net/sctp/structs.h |  9 +++-
 net/sctp/auth.c            | 88 +++++++++++++++++++-------------------
 net/sctp/chunk.c           |  5 +++
 net/sctp/output.c          | 18 +++++++-
 net/sctp/sm_make_chunk.c   | 15 +++++--
 net/sctp/sm_statefuns.c    | 11 +++--
 net/sctp/socket.c          |  6 +++
 9 files changed, 105 insertions(+), 59 deletions(-)

diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index e5c57d0a082d..017c1aa3a2c9 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -62,8 +62,9 @@ struct sctp_auth_bytes {
 /* Definition for a shared key, weather endpoint or association */
 struct sctp_shared_key {
 	struct list_head key_list;
-	__u16 key_id;
 	struct sctp_auth_bytes *key;
+	refcount_t refcnt;
+	__u16 key_id;
 };
 
 #define key_for_each(__key, __list_head) \
@@ -103,8 +104,10 @@ int sctp_auth_send_cid(enum sctp_cid chunk,
 int sctp_auth_recv_cid(enum sctp_cid chunk,
 		       const struct sctp_association *asoc);
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-			    struct sk_buff *skb,
-			    struct sctp_auth_chunk *auth, gfp_t gfp);
+			      struct sk_buff *skb, struct sctp_auth_chunk *auth,
+			      struct sctp_shared_key *ep_key, gfp_t gfp);
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key);
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key);
 
 /* API Helpers */
 int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id);
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 2883c43c5258..2d0e782c9055 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -263,7 +263,8 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 				    __u32 new_cum_tsn, size_t nstreams,
 				    struct sctp_fwdtsn_skip *skiplist);
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc);
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+				  __u16 key_id);
 struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc,
 					  __u16 stream_num, __be16 *stream_list,
 					  bool out, bool in);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ec6e46b7e119..49ad67bbdbb5 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -577,8 +577,12 @@ struct sctp_chunk {
 	/* This points to the sk_buff containing the actual data.  */
 	struct sk_buff *skb;
 
-	/* In case of GSO packets, this will store the head one */
-	struct sk_buff *head_skb;
+	union {
+		/* In case of GSO packets, this will store the head one */
+		struct sk_buff *head_skb;
+		/* In case of auth enabled, this will point to the shkey */
+		struct sctp_shared_key *shkey;
+	};
 
 	/* These are the SCTP headers by reverse order in a packet.
 	 * Note that some of these may happen more than once.  In that
@@ -1995,6 +1999,7 @@ struct sctp_association {
 	 * The current generated assocaition shared key (secret)
 	 */
 	struct sctp_auth_bytes *asoc_shared_key;
+	struct sctp_shared_key *shkey;
 
 	/* SCTP AUTH: hmac id of the first peer requested algorithm
 	 * that we support.
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 00667c50efa7..e5214fd7f650 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -101,13 +101,14 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
 		return NULL;
 
 	INIT_LIST_HEAD(&new->key_list);
+	refcount_set(&new->refcnt, 1);
 	new->key_id = key_id;
 
 	return new;
 }
 
 /* Free the shared key structure */
-static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
+static void sctp_auth_shkey_destroy(struct sctp_shared_key *sh_key)
 {
 	BUG_ON(!list_empty(&sh_key->key_list));
 	sctp_auth_key_put(sh_key->key);
@@ -115,6 +116,17 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
 	kfree(sh_key);
 }
 
+void sctp_auth_shkey_release(struct sctp_shared_key *sh_key)
+{
+	if (refcount_dec_and_test(&sh_key->refcnt))
+		sctp_auth_shkey_destroy(sh_key);
+}
+
+void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key)
+{
+	refcount_inc(&sh_key->refcnt);
+}
+
 /* Destroy the entire key list.  This is done during the
  * associon and endpoint free process.
  */
@@ -128,7 +140,7 @@ void sctp_auth_destroy_keys(struct list_head *keys)
 
 	key_for_each_safe(ep_key, tmp, keys) {
 		list_del_init(&ep_key->key_list);
-		sctp_auth_shkey_free(ep_key);
+		sctp_auth_shkey_release(ep_key);
 	}
 }
 
@@ -409,13 +421,19 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 
 	sctp_auth_key_put(asoc->asoc_shared_key);
 	asoc->asoc_shared_key = secret;
+	asoc->shkey = ep_key;
 
 	/* Update send queue in case any chunk already in there now
 	 * needs authenticating
 	 */
 	list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) {
-		if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc))
+		if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) {
 			chunk->auth = 1;
+			if (!chunk->shkey) {
+				chunk->shkey = asoc->shkey;
+				sctp_auth_shkey_hold(chunk->shkey);
+			}
+		}
 	}
 
 	return 0;
@@ -703,16 +721,15 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
  *    after the AUTH chunk in the SCTP packet.
  */
 void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
-			      struct sk_buff *skb,
-			      struct sctp_auth_chunk *auth,
-			      gfp_t gfp)
+			      struct sk_buff *skb, struct sctp_auth_chunk *auth,
+			      struct sctp_shared_key *ep_key, gfp_t gfp)
 {
-	struct crypto_shash *tfm;
 	struct sctp_auth_bytes *asoc_key;
+	struct crypto_shash *tfm;
 	__u16 key_id, hmac_id;
-	__u8 *digest;
 	unsigned char *end;
 	int free_key = 0;
+	__u8 *digest;
 
 	/* Extract the info we need:
 	 * - hmac id
@@ -724,12 +741,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
 	if (key_id == asoc->active_key_id)
 		asoc_key = asoc->asoc_shared_key;
 	else {
-		struct sctp_shared_key *ep_key;
-
-		ep_key = sctp_auth_get_shkey(asoc, key_id);
-		if (!ep_key)
-			return;
-
+		/* ep_key can't be NULL here */
 		asoc_key = sctp_auth_asoc_create_secret(asoc, ep_key, gfp);
 		if (!asoc_key)
 			return;
@@ -829,7 +841,7 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 		      struct sctp_association *asoc,
 		      struct sctp_authkey *auth_key)
 {
-	struct sctp_shared_key *cur_key = NULL;
+	struct sctp_shared_key *cur_key, *shkey;
 	struct sctp_auth_bytes *key;
 	struct list_head *sh_keys;
 	int replace = 0;
@@ -842,46 +854,34 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	else
 		sh_keys = &ep->endpoint_shared_keys;
 
-	key_for_each(cur_key, sh_keys) {
-		if (cur_key->key_id == auth_key->sca_keynumber) {
+	key_for_each(shkey, sh_keys) {
+		if (shkey->key_id == auth_key->sca_keynumber) {
 			replace = 1;
 			break;
 		}
 	}
 
-	/* If we are not replacing a key id, we need to allocate
-	 * a shared key.
-	 */
-	if (!replace) {
-		cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber,
-						 GFP_KERNEL);
-		if (!cur_key)
-			return -ENOMEM;
-	}
+	cur_key = sctp_auth_shkey_create(auth_key->sca_keynumber, GFP_KERNEL);
+	if (!cur_key)
+		return -ENOMEM;
 
 	/* Create a new key data based on the info passed in */
 	key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
-	if (!key)
-		goto nomem;
+	if (!key) {
+		kfree(cur_key);
+		return -ENOMEM;
+	}
 
 	memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
-
-	/* If we are replacing, remove the old keys data from the
-	 * key id.  If we are adding new key id, add it to the
-	 * list.
-	 */
-	if (replace)
-		sctp_auth_key_put(cur_key->key);
-	else
-		list_add(&cur_key->key_list, sh_keys);
-
 	cur_key->key = key;
-	return 0;
-nomem:
-	if (!replace)
-		sctp_auth_shkey_free(cur_key);
 
-	return -ENOMEM;
+	if (replace) {
+		list_del_init(&shkey->key_list);
+		sctp_auth_shkey_release(shkey);
+	}
+	list_add(&cur_key->key_list, sh_keys);
+
+	return 0;
 }
 
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
@@ -952,7 +952,7 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
 
 	/* Delete the shared key */
 	list_del_init(&key->key_list);
-	sctp_auth_shkey_free(key);
+	sctp_auth_shkey_release(key);
 
 	return 0;
 }
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 991a530c6b31..9f28a9aae976 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -168,6 +168,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 {
 	size_t len, first_len, max_data, remaining;
 	size_t msg_len = iov_iter_count(from);
+	struct sctp_shared_key *shkey = NULL;
 	struct list_head *pos, *temp;
 	struct sctp_chunk *chunk;
 	struct sctp_datamsg *msg;
@@ -204,6 +205,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		if (hmac_desc)
 			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
+
+		shkey = asoc->shkey;
 	}
 
 	/* Check what's our max considering the above */
@@ -275,6 +278,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		if (err < 0)
 			goto errout_chunk_free;
 
+		chunk->shkey = shkey;
+
 		/* Put the chunk->skb back into the form expected by send.  */
 		__skb_pull(chunk->skb, (__u8 *)chunk->chunk_hdr -
 				       chunk->skb->data);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 01a26ee051e3..d6e1c90cc09a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -241,10 +241,13 @@ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
 	if (!chunk->auth)
 		return retval;
 
-	auth = sctp_make_auth(asoc);
+	auth = sctp_make_auth(asoc, chunk->shkey->key_id);
 	if (!auth)
 		return retval;
 
+	auth->shkey = chunk->shkey;
+	sctp_auth_shkey_hold(auth->shkey);
+
 	retval = __sctp_packet_append_chunk(pkt, auth);
 
 	if (retval != SCTP_XMIT_OK)
@@ -490,7 +493,8 @@ merge:
 		}
 
 		if (auth) {
-			sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+			sctp_auth_calculate_hmac(tp->asoc, nskb, auth,
+						 packet->auth->shkey, gfp);
 			/* free auth if no more chunks, or add it back */
 			if (list_empty(&packet->chunk_list))
 				sctp_chunk_free(packet->auth);
@@ -770,6 +774,16 @@ static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
 	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
 
+	/* Don't bundle in this packet if this chunk's auth key doesn't
+	 * match other chunks already enqueued on this packet. Also,
+	 * don't bundle the chunk with auth key if other chunks in this
+	 * packet don't have auth key.
+	 */
+	if ((packet->auth && chunk->shkey != packet->auth->shkey) ||
+	    (!packet->auth && chunk->shkey &&
+	     chunk->chunk_hdr->type != SCTP_CID_AUTH))
+		return SCTP_XMIT_PMTU_FULL;
+
 	psize = packet->size;
 	if (packet->transport->asoc)
 		pmtu = packet->transport->asoc->pathmtu;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d01475f5f710..10f071cdf188 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -87,7 +87,10 @@ static void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
 /* Control chunk destructor */
 static void sctp_control_release_owner(struct sk_buff *skb)
 {
-	/*TODO: do memory release */
+	struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
+
+	if (chunk->shkey)
+		sctp_auth_shkey_release(chunk->shkey);
 }
 
 static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
@@ -102,7 +105,12 @@ static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
 	 *
 	 *  For now don't do anything for now.
 	 */
+	if (chunk->auth) {
+		chunk->shkey = asoc->shkey;
+		sctp_auth_shkey_hold(chunk->shkey);
+	}
 	skb->sk = asoc ? asoc->base.sk : NULL;
+	skb_shinfo(skb)->destructor_arg = chunk;
 	skb->destructor = sctp_control_release_owner;
 }
 
@@ -1271,7 +1279,8 @@ nodata:
 	return retval;
 }
 
-struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
+struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc,
+				  __u16 key_id)
 {
 	struct sctp_authhdr auth_hdr;
 	struct sctp_hmac *hmac_desc;
@@ -1289,7 +1298,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 		return NULL;
 
 	auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
-	auth_hdr.shkey_id = htons(asoc->active_key_id);
+	auth_hdr.shkey_id = htons(key_id);
 
 	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
 						 &auth_hdr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index eb7905ffe5f2..792e0e2be320 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4114,6 +4114,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 					const union sctp_subtype type,
 					struct sctp_chunk *chunk)
 {
+	struct sctp_shared_key *sh_key = NULL;
 	struct sctp_authhdr *auth_hdr;
 	__u8 *save_digest, *digest;
 	struct sctp_hmac *hmac;
@@ -4135,9 +4136,11 @@ static enum sctp_ierror sctp_sf_authenticate(
 	 * configured
 	 */
 	key_id = ntohs(auth_hdr->shkey_id);
-	if (key_id != asoc->active_key_id && !sctp_auth_get_shkey(asoc, key_id))
-		return SCTP_IERROR_AUTH_BAD_KEYID;
-
+	if (key_id != asoc->active_key_id) {
+		sh_key = sctp_auth_get_shkey(asoc, key_id);
+		if (!sh_key)
+			return SCTP_IERROR_AUTH_BAD_KEYID;
+	}
 
 	/* Make sure that the length of the signature matches what
 	 * we expect.
@@ -4166,7 +4169,7 @@ static enum sctp_ierror sctp_sf_authenticate(
 
 	sctp_auth_calculate_hmac(asoc, chunk->skb,
 				 (struct sctp_auth_chunk *)chunk->chunk_hdr,
-				 GFP_ATOMIC);
+				 sh_key, GFP_ATOMIC);
 
 	/* Discard the packet if the digests do not match */
 	if (memcmp(save_digest, digest, sig_len)) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index af5cf29b0c65..003a4ad89c01 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -156,6 +156,9 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 	/* The sndbuf space is tracked per association.  */
 	sctp_association_hold(asoc);
 
+	if (chunk->shkey)
+		sctp_auth_shkey_hold(chunk->shkey);
+
 	skb_set_owner_w(chunk->skb, sk);
 
 	chunk->skb->destructor = sctp_wfree;
@@ -8109,6 +8112,9 @@ static void sctp_wfree(struct sk_buff *skb)
 	sk->sk_wmem_queued   -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
 
+	if (chunk->shkey)
+		sctp_auth_shkey_release(chunk->shkey);
+
 	sock_wfree(skb);
 	sctp_wake_up_waiters(sk, asoc);
 

From 3ff547c06a7d75d72d37dae2c064fcf0672e56c0 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:31 +0800
Subject: [PATCH 0918/1678] sctp: add support for SCTP AUTH Information for
 sendmsg

This patch is to add support for SCTP AUTH Information for sendmsg,
as described in section 5.3.8 of RFC6458.

With this option, you can provide shared key identifier used for
sending the user message.

It's also a necessary send info for sctp_sendv.

Note that it reuses sinfo->sinfo_tsn to indicate if this option is
set and sinfo->sinfo_ssn to save the shkey ID which can be 0.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 include/uapi/linux/sctp.h  | 14 +++++++++++++-
 net/sctp/chunk.c           | 11 ++++++++++-
 net/sctp/socket.c          | 23 +++++++++++++++++++++++
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 49ad67bbdbb5..012fb3e2f4cf 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2118,6 +2118,7 @@ struct sctp_cmsgs {
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
 	struct sctp_prinfo *prinfo;
+	struct sctp_authinfo *authinfo;
 	struct msghdr *addrs_msg;
 };
 
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index e94b6d297ad9..47e781ebde05 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -273,6 +273,18 @@ struct sctp_prinfo {
 	__u32 pr_value;
 };
 
+/* 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+ */
+struct sctp_authinfo {
+	__u16 auth_keynumber;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -310,7 +322,7 @@ typedef enum sctp_cmsg_type {
 #define SCTP_NXTINFO	SCTP_NXTINFO
 	SCTP_PRINFO,		/* 5.3.7 SCTP PR-SCTP Information Structure */
 #define SCTP_PRINFO	SCTP_PRINFO
-	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure (RESERVED) */
+	SCTP_AUTHINFO,		/* 5.3.8 SCTP AUTH Information Structure */
 #define SCTP_AUTHINFO	SCTP_AUTHINFO
 	SCTP_DSTADDRV4,		/* 5.3.9 SCTP Destination IPv4 Address Structure */
 #define SCTP_DSTADDRV4	SCTP_DSTADDRV4
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 9f28a9aae976..f889a84f264d 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -206,7 +206,16 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
 
-		shkey = asoc->shkey;
+		if (sinfo->sinfo_tsn &&
+		    sinfo->sinfo_ssn != asoc->active_key_id) {
+			shkey = sctp_auth_get_shkey(asoc, sinfo->sinfo_ssn);
+			if (!shkey) {
+				err = -EINVAL;
+				goto errout;
+			}
+		} else {
+			shkey = asoc->shkey;
+		}
 	}
 
 	/* Check what's our max considering the above */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 003a4ad89c01..9ffdecbc3531 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1987,6 +1987,14 @@ static void sctp_sendmsg_update_sinfo(struct sctp_association *asoc,
 
 	if (!cmsgs->srinfo && !cmsgs->prinfo)
 		sinfo->sinfo_timetolive = asoc->default_timetolive;
+
+	if (cmsgs->authinfo) {
+		/* Reuse sinfo_tsn to indicate that authinfo was set and
+		 * sinfo_ssn to save the keyid on tx path.
+		 */
+		sinfo->sinfo_tsn = 1;
+		sinfo->sinfo_ssn = cmsgs->authinfo->auth_keynumber;
+	}
 }
 
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
@@ -7874,6 +7882,21 @@ static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 			if (cmsgs->prinfo->pr_policy == SCTP_PR_SCTP_NONE)
 				cmsgs->prinfo->pr_value = 0;
 			break;
+		case SCTP_AUTHINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.8 SCTP AUTH Information Structure (SCTP_AUTHINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for sendmsg().
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_AUTHINFO  struct sctp_authinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_authinfo)))
+				return -EINVAL;
+
+			cmsgs->authinfo = CMSG_DATA(cmsg);
+			break;
 		case SCTP_DSTADDRV4:
 		case SCTP_DSTADDRV6:
 			/* SCTP Socket API Extension

From 601590ec155aadf5daa17a6f63a06d1bba5b5ce9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:32 +0800
Subject: [PATCH 0919/1678] sctp: add sockopt SCTP_AUTH_DEACTIVATE_KEY

This patch is to add sockopt SCTP_AUTH_DEACTIVATE_KEY, as described in
section 8.3.4 of RFC6458.

This set option indicates that the application will no longer send user
messages using the indicated key identifier.

Note that RFC requires that only deactivated keys that are no longer used
by an association can be deleted, but for the backward compatibility, it
is not to check deactivated when deleting or replacing one sh_key.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/auth.h   | 12 +++++-----
 include/uapi/linux/sctp.h |  1 +
 net/sctp/auth.c           | 46 ++++++++++++++++++++++++++++++++++++---
 net/sctp/socket.c         | 31 ++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index 017c1aa3a2c9..687e7f80037d 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -65,6 +65,7 @@ struct sctp_shared_key {
 	struct sctp_auth_bytes *key;
 	refcount_t refcnt;
 	__u16 key_id;
+	__u8 deactivated;
 };
 
 #define key_for_each(__key, __list_head) \
@@ -113,14 +114,13 @@ void sctp_auth_shkey_hold(struct sctp_shared_key *sh_key);
 int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id);
 int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 			    struct sctp_hmacalgo *hmacs);
-int sctp_auth_set_key(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
+int sctp_auth_set_key(struct sctp_endpoint *ep, struct sctp_association *asoc,
 		      struct sctp_authkey *auth_key);
 int sctp_auth_set_active_key(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
-		      __u16 key_id);
+			     struct sctp_association *asoc, __u16 key_id);
 int sctp_auth_del_key_id(struct sctp_endpoint *ep,
-		      struct sctp_association *asoc,
-		      __u16 key_id);
+			 struct sctp_association *asoc, __u16 key_id);
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+			   struct sctp_association *asoc, __u16 key_id);
 
 #endif
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 47e781ebde05..08fc313829f4 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -99,6 +99,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVRCVINFO	32
 #define SCTP_RECVNXTINFO	33
 #define SCTP_DEFAULT_SNDINFO	34
+#define SCTP_AUTH_DEACTIVATE_KEY	35
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e5214fd7f650..a073123fc485 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -449,8 +449,11 @@ struct sctp_shared_key *sctp_auth_get_shkey(
 
 	/* First search associations set of endpoint pair shared keys */
 	key_for_each(key, &asoc->endpoint_shared_keys) {
-		if (key->key_id == key_id)
-			return key;
+		if (key->key_id == key_id) {
+			if (!key->deactivated)
+				return key;
+			break;
+		}
 	}
 
 	return NULL;
@@ -905,7 +908,7 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
 		}
 	}
 
-	if (!found)
+	if (!found || key->deactivated)
 		return -EINVAL;
 
 	if (asoc) {
@@ -956,3 +959,40 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
 
 	return 0;
 }
+
+int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
+			   struct sctp_association *asoc, __u16  key_id)
+{
+	struct sctp_shared_key *key;
+	struct list_head *sh_keys;
+	int found = 0;
+
+	/* The key identifier MUST NOT be the current active key
+	 * The key identifier MUST correst to an existing key
+	 */
+	if (asoc) {
+		if (asoc->active_key_id == key_id)
+			return -EINVAL;
+
+		sh_keys = &asoc->endpoint_shared_keys;
+	} else {
+		if (ep->active_key_id == key_id)
+			return -EINVAL;
+
+		sh_keys = &ep->endpoint_shared_keys;
+	}
+
+	key_for_each(key, sh_keys) {
+		if (key->key_id == key_id) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		return -EINVAL;
+
+	key->deactivated = 1;
+
+	return 0;
+}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9ffdecbc3531..65cc354c520f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3646,6 +3646,33 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 
 }
 
+/*
+ * 8.3.4  Deactivate a Shared Key (SCTP_AUTH_DEACTIVATE_KEY)
+ *
+ * This set option will deactivate a shared secret key.
+ */
+static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
+	struct sctp_authkeyid val;
+	struct sctp_association *asoc;
+
+	if (!ep->auth_enable)
+		return -EACCES;
+
+	if (optlen != sizeof(struct sctp_authkeyid))
+		return -EINVAL;
+	if (copy_from_user(&val, optval, optlen))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, val.scact_assoc_id);
+	if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+
+	return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber);
+}
+
 /*
  * 8.1.23 SCTP_AUTO_ASCONF
  *
@@ -4238,6 +4265,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_DELETE_KEY:
 		retval = sctp_setsockopt_del_key(sk, optval, optlen);
 		break;
+	case SCTP_AUTH_DEACTIVATE_KEY:
+		retval = sctp_setsockopt_deactivate_key(sk, optval, optlen);
+		break;
 	case SCTP_AUTO_ASCONF:
 		retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
 		break;
@@ -7212,6 +7242,7 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_KEY:
 	case SCTP_AUTH_CHUNK:
 	case SCTP_AUTH_DELETE_KEY:
+	case SCTP_AUTH_DEACTIVATE_KEY:
 		retval = -EOPNOTSUPP;
 		break;
 	case SCTP_HMAC_IDENT:

From ec2e506c680deaa8e1a087986db6d73ba63a04be Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:33 +0800
Subject: [PATCH 0920/1678] sctp: add SCTP_AUTH_FREE_KEY type for
 AUTHENTICATION_EVENT

This patch is to add SCTP_AUTH_FREE_KEY type for AUTHENTICATION_EVENT,
as described in section 6.1.8 of RFC6458.

      SCTP_AUTH_FREE_KEY:  This report indicates that the SCTP
         implementation will no longer use the key identifier specified
         in auth_keynumber.

After deactivating a key, it would never be used again, which means
it's refcnt can't be held/increased by new chunks. But there may be
some chunks in out queue still using it. So only when refcnt is 1,
which means no chunk in outqueue is using/holding this key either,
this EVENT would be sent.

When users receive this notification, they could do DEL_KEY sockopt to
remove this shkey, and also tell the peer that this key won't be used
in any chunk thoroughly from now on, then the peer can remove it as
well safely.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |  6 +++++-
 net/sctp/auth.c           | 14 ++++++++++++++
 net/sctp/sm_make_chunk.c  | 20 +++++++++++++++++++-
 net/sctp/sm_statefuns.c   |  2 +-
 net/sctp/socket.c         | 19 ++++++++++++++++++-
 5 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 08fc313829f4..18ebbfeee4af 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -518,7 +518,11 @@ struct sctp_authkey_event {
 	sctp_assoc_t auth_assoc_id;
 };
 
-enum { SCTP_AUTH_NEWKEY = 0, };
+enum {
+	SCTP_AUTH_NEW_KEY,
+#define	SCTP_AUTH_NEWKEY	SCTP_AUTH_NEW_KEY /* compatible with before */
+	SCTP_AUTH_FREE_KEY,
+};
 
 /*
  * 6.1.9. SCTP_SENDER_DRY_EVENT
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index a073123fc485..e64630cd3331 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -992,6 +992,20 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
 	if (!found)
 		return -EINVAL;
 
+	/* refcnt == 1 and !list_empty mean it's not being used anywhere
+	 * and deactivated will be set, so it's time to notify userland
+	 * that this shkey can be freed.
+	 */
+	if (asoc && !list_empty(&key->key_list) &&
+	    refcount_read(&key->refcnt) == 1) {
+		struct sctp_ulpevent *ev;
+
+		ev = sctp_ulpevent_make_authkey(asoc, key->key_id,
+						SCTP_AUTH_FREE_KEY, GFP_KERNEL);
+		if (ev)
+			asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+	}
+
 	key->deactivated = 1;
 
 	return 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 10f071cdf188..cc20bc39ee7c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -89,8 +89,26 @@ static void sctp_control_release_owner(struct sk_buff *skb)
 {
 	struct sctp_chunk *chunk = skb_shinfo(skb)->destructor_arg;
 
-	if (chunk->shkey)
+	if (chunk->shkey) {
+		struct sctp_shared_key *shkey = chunk->shkey;
+		struct sctp_association *asoc = chunk->asoc;
+
+		/* refcnt == 2 and !list_empty mean after this release, it's
+		 * not being used anywhere, and it's time to notify userland
+		 * that this shkey can be freed if it's been deactivated.
+		 */
+		if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+		    refcount_read(&shkey->refcnt) == 2) {
+			struct sctp_ulpevent *ev;
+
+			ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+							SCTP_AUTH_FREE_KEY,
+							GFP_KERNEL);
+			if (ev)
+				asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+		}
 		sctp_auth_shkey_release(chunk->shkey);
+	}
 }
 
 static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 792e0e2be320..1e41dee70b51 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4246,7 +4246,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
 		struct sctp_ulpevent *ev;
 
 		ev = sctp_ulpevent_make_authkey(asoc, ntohs(auth_hdr->shkey_id),
-				    SCTP_AUTH_NEWKEY, GFP_ATOMIC);
+				    SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
 
 		if (!ev)
 			return -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 65cc354c520f..aeecdd620c45 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -8166,8 +8166,25 @@ static void sctp_wfree(struct sk_buff *skb)
 	sk->sk_wmem_queued   -= skb->truesize;
 	sk_mem_uncharge(sk, skb->truesize);
 
-	if (chunk->shkey)
+	if (chunk->shkey) {
+		struct sctp_shared_key *shkey = chunk->shkey;
+
+		/* refcnt == 2 and !list_empty mean after this release, it's
+		 * not being used anywhere, and it's time to notify userland
+		 * that this shkey can be freed if it's been deactivated.
+		 */
+		if (shkey->deactivated && !list_empty(&shkey->key_list) &&
+		    refcount_read(&shkey->refcnt) == 2) {
+			struct sctp_ulpevent *ev;
+
+			ev = sctp_ulpevent_make_authkey(asoc, shkey->key_id,
+							SCTP_AUTH_FREE_KEY,
+							GFP_KERNEL);
+			if (ev)
+				asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+		}
 		sctp_auth_shkey_release(chunk->shkey);
+	}
 
 	sock_wfree(skb);
 	sctp_wake_up_waiters(sk, asoc);

From 30f6ebf65bc46161c5aaff1db2e6e7c76aa4a06b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Mar 2018 19:05:34 +0800
Subject: [PATCH 0921/1678] sctp: add SCTP_AUTH_NO_AUTH type for
 AUTHENTICATION_EVENT

This patch is to add SCTP_AUTH_NO_AUTH type for AUTHENTICATION_EVENT,
as described in section 6.1.8 of RFC6458.

      SCTP_AUTH_NO_AUTH:  This report indicates that the peer does not
         support SCTP authentication as defined in [RFC4895].

Note that the implementation is quite similar as that of
SCTP_ADAPTATION_INDICATION.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/command.h |  1 +
 include/uapi/linux/sctp.h  |  1 +
 net/sctp/sm_sideeffect.c   | 13 ++++++++++++
 net/sctp/sm_statefuns.c    | 43 ++++++++++++++++++++++++++++++++++++--
 4 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index b55c6a48a206..6640f84fe536 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -100,6 +100,7 @@ enum sctp_verb {
 	SCTP_CMD_SET_SK_ERR,	 /* Set sk_err */
 	SCTP_CMD_ASSOC_CHANGE,	 /* generate and send assoc_change event */
 	SCTP_CMD_ADAPTATION_IND, /* generate and send adaptation event */
+	SCTP_CMD_PEER_NO_AUTH,   /* generate and send authentication event */
 	SCTP_CMD_ASSOC_SHKEY,    /* generate the association shared keys */
 	SCTP_CMD_T1_RETRAN,	 /* Mark for retransmission after T1 timeout  */
 	SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 18ebbfeee4af..afd4346386e0 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -522,6 +522,7 @@ enum {
 	SCTP_AUTH_NEW_KEY,
 #define	SCTP_AUTH_NEWKEY	SCTP_AUTH_NEW_KEY /* compatible with before */
 	SCTP_AUTH_FREE_KEY,
+	SCTP_AUTH_NO_AUTH,
 };
 
 /*
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b71e7fb0a20a..298112ca8c06 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1049,6 +1049,16 @@ static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
 		asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
 }
 
+static void sctp_cmd_peer_no_auth(struct sctp_cmd_seq *commands,
+				  struct sctp_association *asoc)
+{
+	struct sctp_ulpevent *ev;
+
+	ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH, GFP_ATOMIC);
+	if (ev)
+		asoc->stream.si->enqueue_event(&asoc->ulpq, ev);
+}
+
 /* Helper function to generate an adaptation indication event */
 static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
 				    struct sctp_association *asoc)
@@ -1755,6 +1765,9 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
 		case SCTP_CMD_ADAPTATION_IND:
 			sctp_cmd_adaptation_ind(commands, asoc);
 			break;
+		case SCTP_CMD_PEER_NO_AUTH:
+			sctp_cmd_peer_no_auth(commands, asoc);
+			break;
 
 		case SCTP_CMD_ASSOC_SHKEY:
 			error = sctp_auth_asoc_init_active_key(asoc,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 1e41dee70b51..cc56a67dbb4d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -659,7 +659,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 					 void *arg,
 					 struct sctp_cmd_seq *commands)
 {
-	struct sctp_ulpevent *ev, *ai_ev = NULL;
+	struct sctp_ulpevent *ev, *ai_ev = NULL, *auth_ev = NULL;
 	struct sctp_association *new_asoc;
 	struct sctp_init_chunk *peer_init;
 	struct sctp_chunk *chunk = arg;
@@ -820,6 +820,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 			goto nomem_aiev;
 	}
 
+	if (!new_asoc->peer.auth_capable) {
+		auth_ev = sctp_ulpevent_make_authkey(new_asoc, 0,
+						     SCTP_AUTH_NO_AUTH,
+						     GFP_ATOMIC);
+		if (!auth_ev)
+			goto nomem_authev;
+	}
+
 	/* Add all the state machine commands now since we've created
 	 * everything.  This way we don't introduce memory corruptions
 	 * during side-effect processing and correclty count established
@@ -847,8 +855,14 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 				SCTP_ULPEVENT(ai_ev));
 
+	if (auth_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(auth_ev));
+
 	return SCTP_DISPOSITION_CONSUME;
 
+nomem_authev:
+	sctp_ulpevent_free(ai_ev);
 nomem_aiev:
 	sctp_ulpevent_free(ev);
 nomem_ev:
@@ -953,6 +967,15 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 				SCTP_ULPEVENT(ev));
 	}
 
+	if (!asoc->peer.auth_capable) {
+		ev = sctp_ulpevent_make_authkey(asoc, 0, SCTP_AUTH_NO_AUTH,
+						GFP_ATOMIC);
+		if (!ev)
+			goto nomem;
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(ev));
+	}
+
 	return SCTP_DISPOSITION_CONSUME;
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
@@ -1908,6 +1931,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
 	if (asoc->peer.adaptation_ind)
 		sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
 
+	if (!asoc->peer.auth_capable)
+		sctp_add_cmd_sf(commands, SCTP_CMD_PEER_NO_AUTH, SCTP_NULL());
+
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
@@ -1954,7 +1980,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
-	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
+	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL, *auth_ev = NULL;
 	struct sctp_chunk *repl;
 
 	/* Clarification from Implementor's Guide:
@@ -2001,6 +2027,14 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 				goto nomem;
 
 		}
+
+		if (!asoc->peer.auth_capable) {
+			auth_ev = sctp_ulpevent_make_authkey(asoc, 0,
+							     SCTP_AUTH_NO_AUTH,
+							     GFP_ATOMIC);
+			if (!auth_ev)
+				goto nomem;
+		}
 	}
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2015,10 +2049,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_d(
 	if (ai_ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 					SCTP_ULPEVENT(ai_ev));
+	if (auth_ev)
+		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
+				SCTP_ULPEVENT(auth_ev));
 
 	return SCTP_DISPOSITION_CONSUME;
 
 nomem:
+	if (auth_ev)
+		sctp_ulpevent_free(auth_ev);
 	if (ai_ev)
 		sctp_ulpevent_free(ai_ev);
 	if (ev)

From 1f71e2b11ceed122343a0ec5c3c10fefb91c2838 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Sun, 28 Jan 2018 20:22:30 +0000
Subject: [PATCH 0922/1678] i40e: remove i40e_fcoe files

i40e_fcoe support was removed via commit 9eed69a9147c ("i40e: Drop FCoE code from core driver files")
But this left files in place but un-compilable.
Let's finish the cleaning.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_fcoe.c | 1571 -------------------
 drivers/net/ethernet/intel/i40e/i40e_fcoe.h |  127 --
 2 files changed, 1698 deletions(-)
 delete mode 100644 drivers/net/ethernet/intel/i40e/i40e_fcoe.c
 delete mode 100644 drivers/net/ethernet/intel/i40e/i40e_fcoe.h

diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
deleted file mode 100644
index 2d1253c5b7a1..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ /dev/null
@@ -1,1571 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#include <linux/if_ether.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/fc/fc_fs.h>
-#include <scsi/fc/fc_fip.h>
-#include <scsi/fc/fc_fcoe.h>
-#include <scsi/libfc.h>
-#include <scsi/libfcoe.h>
-#include <uapi/linux/dcbnl.h>
-
-#include "i40e.h"
-#include "i40e_fcoe.h"
-
-/**
- * i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class2(u8 sof)
-{
-	return (sof == FC_SOF_I2) || (sof == FC_SOF_N2);
-}
-
-/**
- * i40e_fcoe_sof_is_class3 - returns true if this is a FC Class 3 SOF
- * @sof: the FCoE start of frame delimiter
- **/
-static inline bool i40e_fcoe_sof_is_class3(u8 sof)
-{
-	return (sof == FC_SOF_I3) || (sof == FC_SOF_N3);
-}
-
-/**
- * i40e_fcoe_sof_is_supported - returns true if the FC SOF is supported by HW
- * @sof: the input SOF value from the frame
- **/
-static inline bool i40e_fcoe_sof_is_supported(u8 sof)
-{
-	return i40e_fcoe_sof_is_class2(sof) ||
-	       i40e_fcoe_sof_is_class3(sof);
-}
-
-/**
- * i40e_fcoe_fc_sof - pull the SOF from FCoE header in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_sof(struct sk_buff *skb, u8 *sof)
-{
-	*sof = ((struct fcoe_hdr *)skb_network_header(skb))->fcoe_sof;
-
-	if (!i40e_fcoe_sof_is_supported(*sof))
-		return -EINVAL;
-	return 0;
-}
-
-/**
- * i40e_fcoe_eof_is_supported - returns true if the EOF is supported by HW
- * @eof:     the input EOF value from the frame
- **/
-static inline bool i40e_fcoe_eof_is_supported(u8 eof)
-{
-	return (eof == FC_EOF_N) || (eof == FC_EOF_T) ||
-	       (eof == FC_EOF_NI) || (eof == FC_EOF_A);
-}
-
-/**
- * i40e_fcoe_fc_eof - pull EOF from FCoE trailer in the frame
- * @skb: the frame whose EOF is to be pulled from
- **/
-static inline int i40e_fcoe_fc_eof(struct sk_buff *skb, u8 *eof)
-{
-	/* the first byte of the last dword is EOF */
-	skb_copy_bits(skb, skb->len - 4, eof, 1);
-
-	if (!i40e_fcoe_eof_is_supported(*eof))
-		return -EINVAL;
-	return 0;
-}
-
-/**
- * i40e_fcoe_ctxt_eof - convert input FC EOF for descriptor programming
- * @eof: the input eof value from the frame
- *
- * The FC EOF is converted to the value understood by HW for descriptor
- * programming. Never call this w/o calling i40e_fcoe_eof_is_supported()
- * first and that already checks for all supported valid eof values.
- **/
-static inline u32 i40e_fcoe_ctxt_eof(u8 eof)
-{
-	switch (eof) {
-	case FC_EOF_N:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_N;
-	case FC_EOF_T:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_T;
-	case FC_EOF_NI:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI;
-	case FC_EOF_A:
-		return I40E_TX_DESC_CMD_L4T_EOFT_EOF_A;
-	default:
-		/* Supported valid eof shall be already checked by
-		 * calling i40e_fcoe_eof_is_supported() first,
-		 * therefore this default case shall never hit.
-		 */
-		WARN_ON(1);
-		return -EINVAL;
-	}
-}
-
-/**
- * i40e_fcoe_xid_is_valid - returns true if the exchange id is valid
- * @xid: the exchange id
- **/
-static inline bool i40e_fcoe_xid_is_valid(u16 xid)
-{
-	return (xid != FC_XID_UNKNOWN) && (xid < I40E_FCOE_DDP_MAX);
-}
-
-/**
- * i40e_fcoe_ddp_unmap - unmap the mapped sglist associated
- * @pf: pointer to PF
- * @ddp: sw DDP context
- *
- * Unmap the scatter-gather list associated with the given SW DDP context
- *
- * Returns: data length already ddp-ed in bytes
- *
- **/
-static inline void i40e_fcoe_ddp_unmap(struct i40e_pf *pf,
-				       struct i40e_fcoe_ddp *ddp)
-{
-	if (test_and_set_bit(__I40E_FCOE_DDP_UNMAPPED, &ddp->flags))
-		return;
-
-	if (ddp->sgl) {
-		dma_unmap_sg(&pf->pdev->dev, ddp->sgl, ddp->sgc,
-			     DMA_FROM_DEVICE);
-		ddp->sgl = NULL;
-		ddp->sgc = 0;
-	}
-
-	if (ddp->pool) {
-		dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-		ddp->pool = NULL;
-	}
-}
-
-/**
- * i40e_fcoe_ddp_clear - clear the given SW DDP context
- * @ddp - SW DDP context
- **/
-static inline void i40e_fcoe_ddp_clear(struct i40e_fcoe_ddp *ddp)
-{
-	memset(ddp, 0, sizeof(struct i40e_fcoe_ddp));
-	ddp->xid = FC_XID_UNKNOWN;
-	ddp->flags = __I40E_FCOE_DDP_NONE;
-}
-
-/**
- * i40e_fcoe_progid_is_fcoe - check if the prog_id is for FCoE
- * @id: the prog id for the programming status Rx descriptor write-back
- **/
-static inline bool i40e_fcoe_progid_is_fcoe(u8 id)
-{
-	return (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
-	       (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS);
-}
-
-/**
- * i40e_fcoe_fc_get_xid - get xid from the frame header
- * @fh: the fc frame header
- *
- * In case the incoming frame's exchange is originated from
- * the initiator, then received frame's exchange id is ANDed
- * with fc_cpu_mask bits to get the same cpu on which exchange
- * was originated, otherwise just use the current cpu.
- *
- * Returns ox_id if exchange originator, rx_id if responder
- **/
-static inline u16 i40e_fcoe_fc_get_xid(struct fc_frame_header *fh)
-{
-	u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-	return (f_ctl & FC_FC_EX_CTX) ?
-		be16_to_cpu(fh->fh_ox_id) :
-		be16_to_cpu(fh->fh_rx_id);
-}
-
-/**
- * i40e_fcoe_fc_frame_header - get fc frame header from skb
- * @skb: packet
- *
- * This checks if there is a VLAN header and returns the data
- * pointer to the start of the fc_frame_header.
- *
- * Returns pointer to the fc_frame_header
- **/
-static inline struct fc_frame_header *i40e_fcoe_fc_frame_header(
-	struct sk_buff *skb)
-{
-	void *fh = skb->data + sizeof(struct fcoe_hdr);
-
-	if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
-		fh += sizeof(struct vlan_hdr);
-
-	return (struct fc_frame_header *)fh;
-}
-
-/**
- * i40e_fcoe_ddp_put - release the DDP context for a given exchange id
- * @netdev: the corresponding net_device
- * @xid: the exchange id that corresponding DDP context will be released
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_done
- * and it is expected to be called by ULD, i.e., FCP layer of libfc
- * to release the corresponding ddp context when the I/O is done.
- *
- * Returns : data length already ddp-ed in bytes
- **/
-static int i40e_fcoe_ddp_put(struct net_device *netdev, u16 xid)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	int len = 0;
-	struct i40e_fcoe_ddp *ddp = &fcoe->ddp[xid];
-
-	if (!fcoe || !ddp)
-		goto out;
-
-	if (test_bit(__I40E_FCOE_DDP_DONE, &ddp->flags))
-		len = ddp->len;
-	i40e_fcoe_ddp_unmap(pf, ddp);
-out:
-	return len;
-}
-
-/**
- * i40e_fcoe_sw_init - sets up the HW for FCoE
- * @pf: pointer to PF
- **/
-void i40e_init_pf_fcoe(struct i40e_pf *pf)
-{
-	struct i40e_hw *hw = &pf->hw;
-	u32 val;
-
-	pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
-	pf->num_fcoe_qps = 0;
-	pf->fcoe_hmc_cntx_num = 0;
-	pf->fcoe_hmc_filt_num = 0;
-
-	if (!pf->hw.func_caps.fcoe) {
-		dev_dbg(&pf->pdev->dev, "FCoE capability is disabled\n");
-		return;
-	}
-
-	if (!pf->hw.func_caps.dcb) {
-		dev_warn(&pf->pdev->dev,
-			 "Hardware is not DCB capable not enabling FCoE.\n");
-		return;
-	}
-
-	/* enable FCoE hash filter */
-	val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1));
-	val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32);
-	val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32);
-	val &= I40E_PFQF_HENA_PTYPE_ENA_MASK;
-	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val);
-
-	/* enable flag */
-	pf->flags |= I40E_FLAG_FCOE_ENABLED;
-	pf->num_fcoe_qps = I40E_DEFAULT_FCOE;
-
-	/* Reserve 4K DDP contexts and 20K filter size for FCoE */
-	pf->fcoe_hmc_cntx_num = BIT(I40E_DMA_CNTX_SIZE_4K) *
-				I40E_DMA_CNTX_BASE_SIZE;
-	pf->fcoe_hmc_filt_num = pf->fcoe_hmc_cntx_num +
-				BIT(I40E_HASH_FILTER_SIZE_16K) *
-				I40E_HASH_FILTER_BASE_SIZE;
-
-	/* FCoE object: max 16K filter buckets and 4K DMA contexts */
-	pf->filter_settings.fcoe_filt_num = I40E_HASH_FILTER_SIZE_16K;
-	pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K;
-
-	/* Setup max frame with FCoE_MTU plus L2 overheads */
-	val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL);
-	val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK;
-	val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
-		 << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT);
-	i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val);
-
-	dev_info(&pf->pdev->dev, "FCoE is supported.\n");
-}
-
-/**
- * i40e_get_fcoe_tc_map - Return TC map for FCoE APP
- * @pf: pointer to PF
- *
- **/
-u8 i40e_get_fcoe_tc_map(struct i40e_pf *pf)
-{
-	struct i40e_dcb_app_priority_table app;
-	struct i40e_hw *hw = &pf->hw;
-	u8 enabled_tc = 0;
-	u8 tc, i;
-	/* Get the FCoE APP TLV */
-	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
-
-	for (i = 0; i < dcbcfg->numapps; i++) {
-		app = dcbcfg->app[i];
-		if (app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
-		    app.protocolid == ETH_P_FCOE) {
-			tc = dcbcfg->etscfg.prioritytable[app.priority];
-			enabled_tc |= BIT(tc);
-			break;
-		}
-	}
-
-	/* TC0 if there is no TC defined for FCoE APP TLV */
-	enabled_tc = enabled_tc ? enabled_tc : 0x1;
-
-	return enabled_tc;
-}
-
-/**
- * i40e_fcoe_vsi_init - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-int i40e_fcoe_vsi_init(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt)
-{
-	struct i40e_aqc_vsi_properties_data *info = &ctxt->info;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_hw *hw = &pf->hw;
-	u8 enabled_tc = 0;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		dev_err(&pf->pdev->dev,
-			"FCoE is not enabled for this device\n");
-		return -EPERM;
-	}
-
-	/* initialize the hardware for FCoE */
-	ctxt->pf_num = hw->pf_id;
-	ctxt->vf_num = 0;
-	ctxt->uplink_seid = vsi->uplink_seid;
-	ctxt->connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
-	ctxt->flags = I40E_AQ_VSI_TYPE_PF;
-
-	/* FCoE VSI would need the following sections */
-	info->valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
-
-	/* FCoE VSI does not need these sections */
-	info->valid_sections &= cpu_to_le16(~(I40E_AQ_VSI_PROP_SECURITY_VALID |
-					    I40E_AQ_VSI_PROP_VLAN_VALID |
-					    I40E_AQ_VSI_PROP_CAS_PV_VALID |
-					    I40E_AQ_VSI_PROP_INGRESS_UP_VALID |
-					    I40E_AQ_VSI_PROP_EGRESS_UP_VALID));
-
-	if (i40e_is_vsi_uplink_mode_veb(vsi)) {
-		info->valid_sections |=
-				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
-		info->switch_id =
-				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
-	}
-	enabled_tc = i40e_get_fcoe_tc_map(pf);
-	i40e_vsi_setup_queue_map(vsi, ctxt, enabled_tc, true);
-
-	/* set up queue option section: only enable FCoE */
-	info->queueing_opt_flags = I40E_AQ_VSI_QUE_OPT_FCOE_ENA;
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_enable - this is the implementation of ndo_fcoe_enable,
- * indicating the upper FCoE protocol stack is ready to use FCoE
- * offload features.
- *
- * @netdev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- * in RTNL
- *
- **/
-int i40e_fcoe_enable(struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		netdev_err(netdev, "HW does not support FCoE.\n");
-		return -ENODEV;
-	}
-
-	if (vsi->type != I40E_VSI_FCOE) {
-		netdev_err(netdev, "interface does not support FCoE.\n");
-		return -EBUSY;
-	}
-
-	atomic_inc(&fcoe->refcnt);
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_disable- disables FCoE for upper FCoE protocol stack.
- * @dev: pointer to the netdev that FCoE is created on
- *
- * Returns 0 on success
- *
- **/
-int i40e_fcoe_disable(struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) {
-		netdev_err(netdev, "device does not support FCoE\n");
-		return -ENODEV;
-	}
-	if (vsi->type != I40E_VSI_FCOE)
-		return -EBUSY;
-
-	if (!atomic_dec_and_test(&fcoe->refcnt))
-		return -EINVAL;
-
-	netdev_info(netdev, "FCoE disabled\n");
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_dma_pool_free - free the per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- **/
-static void i40e_fcoe_dma_pool_free(struct i40e_fcoe *fcoe,
-				    struct device *dev,
-				    unsigned int cpu)
-{
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (!ddp_pool->pool) {
-		dev_warn(dev, "DDP pool already freed for cpu %d\n", cpu);
-		return;
-	}
-	dma_pool_destroy(ddp_pool->pool);
-	ddp_pool->pool = NULL;
-}
-
-/**
- * i40e_fcoe_dma_pool_create - per cpu pool for FCoE DDP
- * @fcoe: the FCoE sw object
- * @dev: the device that the pool is associated with
- * @cpu: the cpu for this pool
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-static int i40e_fcoe_dma_pool_create(struct i40e_fcoe *fcoe,
-				     struct device *dev,
-				     unsigned int cpu)
-{
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-	struct dma_pool *pool;
-	char pool_name[32];
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
-	if (ddp_pool && ddp_pool->pool) {
-		dev_warn(dev, "DDP pool already allocated for cpu %d\n", cpu);
-		return 0;
-	}
-	snprintf(pool_name, sizeof(pool_name), "i40e_fcoe_ddp_%d", cpu);
-	pool = dma_pool_create(pool_name, dev, I40E_FCOE_DDP_PTR_MAX,
-			       I40E_FCOE_DDP_PTR_ALIGN, PAGE_SIZE);
-	if (!pool) {
-		dev_err(dev, "dma_pool_create %s failed\n", pool_name);
-		return -ENOMEM;
-	}
-	ddp_pool->pool = pool;
-	return 0;
-}
-
-/**
- * i40e_fcoe_free_ddp_resources - release FCoE DDP resources
- * @vsi: the vsi FCoE is associated with
- *
- **/
-void i40e_fcoe_free_ddp_resources(struct i40e_vsi *vsi)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	int cpu, i;
-
-	/* do nothing if not FCoE VSI */
-	if (vsi->type != I40E_VSI_FCOE)
-		return;
-
-	/* do nothing if no DDP pools were allocated */
-	if (!fcoe->ddp_pool)
-		return;
-
-	for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-		i40e_fcoe_ddp_put(vsi->netdev, i);
-
-	for_each_possible_cpu(cpu)
-		i40e_fcoe_dma_pool_free(fcoe, &pf->pdev->dev, cpu);
-
-	free_percpu(fcoe->ddp_pool);
-	fcoe->ddp_pool = NULL;
-
-	netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources released\n",
-		    vsi->id, vsi->seid);
-}
-
-/**
- * i40e_fcoe_setup_ddp_resources - allocate per cpu DDP resources
- * @vsi: the VSI FCoE is associated with
- *
- * Returns 0 on successful or non zero on failure
- *
- **/
-int i40e_fcoe_setup_ddp_resources(struct i40e_vsi *vsi)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct device *dev = &pf->pdev->dev;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	unsigned int cpu;
-	int i;
-
-	if (vsi->type != I40E_VSI_FCOE)
-		return -ENODEV;
-
-	/* do nothing if no DDP pools were allocated */
-	if (fcoe->ddp_pool)
-		return -EEXIST;
-
-	/* allocate per CPU memory to track DDP pools */
-	fcoe->ddp_pool = alloc_percpu(struct i40e_fcoe_ddp_pool);
-	if (!fcoe->ddp_pool) {
-		dev_err(&pf->pdev->dev, "failed to allocate percpu DDP\n");
-		return -ENOMEM;
-	}
-
-	/* allocate pci pool for each cpu */
-	for_each_possible_cpu(cpu) {
-		if (!i40e_fcoe_dma_pool_create(fcoe, dev, cpu))
-			continue;
-
-		dev_err(dev, "failed to alloc DDP pool on cpu:%d\n", cpu);
-		i40e_fcoe_free_ddp_resources(vsi);
-		return -ENOMEM;
-	}
-
-	/* initialize the sw context */
-	for (i = 0; i < I40E_FCOE_DDP_MAX; i++)
-		i40e_fcoe_ddp_clear(&fcoe->ddp[i]);
-
-	netdev_info(vsi->netdev, "VSI %d,%d FCoE DDP resources allocated\n",
-		    vsi->id, vsi->seid);
-
-	return 0;
-}
-
-/**
- * i40e_fcoe_handle_status - check the Programming Status for FCoE
- * @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for Programming Status, not a packet descriptor.
- *
- * Check if this is the Rx Programming Status descriptor write-back for FCoE.
- * This is used to verify if the context/filter programming or invalidation
- * requested by SW to the HW is successful or not and take actions accordingly.
- **/
-void i40e_fcoe_handle_status(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc, u8 prog_id)
-{
-	struct i40e_pf *pf = rx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct i40e_fcoe_ddp *ddp;
-	u32 error;
-	u16 xid;
-	u64 qw;
-
-	/* we only care for FCoE here */
-	if (!i40e_fcoe_progid_is_fcoe(prog_id))
-		return;
-
-	xid = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param) &
-	      (I40E_FCOE_DDP_MAX - 1);
-
-	if (!i40e_fcoe_xid_is_valid(xid))
-		return;
-
-	ddp = &fcoe->ddp[xid];
-	WARN_ON(xid != ddp->xid);
-
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
-		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
-
-	/* DDP context programming status: failure or success */
-	if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) {
-		if (I40E_RX_PROG_FCOE_ERROR_TBL_FULL(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x TABLE FULL\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT;
-		}
-		if (I40E_RX_PROG_FCOE_ERROR_CONFLICT(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x CONFLICT\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT;
-		}
-	}
-
-	/* DDP context invalidation status: failure or success */
-	if (prog_id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS) {
-		if (I40E_RX_PROG_FCOE_ERROR_INVLFAIL(error)) {
-			dev_err(&pf->pdev->dev, "xid %x ddp->xid %x INVALIDATION FAILURE\n",
-				xid, ddp->xid);
-			ddp->prerr |= I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT;
-		}
-		/* clear the flag so we can retry invalidation */
-		clear_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags);
-	}
-
-	/* unmap DMA */
-	i40e_fcoe_ddp_unmap(pf, ddp);
-	i40e_fcoe_ddp_clear(ddp);
-}
-
-/**
- * i40e_fcoe_handle_offload - check ddp status and mark it done
- * @adapter: i40e adapter
- * @rx_desc: advanced rx descriptor
- * @skb: the skb holding the received data
- *
- * This checks ddp status.
- *
- * Returns : < 0 indicates an error or not a FCOE ddp, 0 indicates
- * not passing the skb to ULD, > 0 indicates is the length of data
- * being ddped.
- *
- **/
-int i40e_fcoe_handle_offload(struct i40e_ring *rx_ring,
-			     union i40e_rx_desc *rx_desc,
-			     struct sk_buff *skb)
-{
-	struct i40e_pf *pf = rx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct fc_frame_header *fh = NULL;
-	struct i40e_fcoe_ddp *ddp = NULL;
-	u32 status, fltstat;
-	u32 error, fcerr;
-	int rc = -EINVAL;
-	u16 ptype;
-	u16 xid;
-	u64 qw;
-
-	/* check this rxd is for programming status */
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	/* packet descriptor, check packet type */
-	ptype = (qw & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
-	if (!i40e_rx_is_fcoe(ptype))
-		goto out_no_ddp;
-
-	error = (qw & I40E_RXD_QW1_ERROR_MASK) >> I40E_RXD_QW1_ERROR_SHIFT;
-	fcerr = (error >> I40E_RX_DESC_ERROR_L3L4E_SHIFT) &
-		 I40E_RX_DESC_FCOE_ERROR_MASK;
-
-	/* check stateless offload error */
-	if (unlikely(fcerr == I40E_RX_DESC_ERROR_L3L4E_PROT)) {
-		dev_err(&pf->pdev->dev, "Protocol Error\n");
-		skb->ip_summed = CHECKSUM_NONE;
-	} else {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-
-	/* check hw status on ddp */
-	status = (qw & I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT;
-	fltstat = (status >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
-		   I40E_RX_DESC_FLTSTAT_FCMASK;
-
-	/* now we are ready to check DDP */
-	fh = i40e_fcoe_fc_frame_header(skb);
-	xid = i40e_fcoe_fc_get_xid(fh);
-	if (!i40e_fcoe_xid_is_valid(xid))
-		goto out_no_ddp;
-
-	/* non DDP normal receive, return to the protocol stack */
-	if (fltstat == I40E_RX_DESC_FLTSTAT_NOMTCH)
-		goto out_no_ddp;
-
-	/* do we have a sw ddp context setup ? */
-	ddp = &fcoe->ddp[xid];
-	if (!ddp->sgl)
-		goto out_no_ddp;
-
-	/* fetch xid from hw rxd wb, which should match up the sw ctxt */
-	xid = le16_to_cpu(rx_desc->wb.qword0.lo_dword.mirr_fcoe.fcoe_ctx_id);
-	if (ddp->xid != xid) {
-		dev_err(&pf->pdev->dev, "xid 0x%x does not match ctx_xid 0x%x\n",
-			ddp->xid, xid);
-		goto out_put_ddp;
-	}
-
-	/* the same exchange has already errored out */
-	if (ddp->fcerr) {
-		dev_err(&pf->pdev->dev, "xid 0x%x fcerr 0x%x reported fcer 0x%x\n",
-			xid, ddp->fcerr, fcerr);
-		goto out_put_ddp;
-	}
-
-	/* fcoe param is valid by now with correct DDPed length */
-	ddp->len = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fcoe_param);
-	ddp->fcerr = fcerr;
-	/* header posting only, useful only for target mode and debugging */
-	if (fltstat == I40E_RX_DESC_FLTSTAT_DDP) {
-		/* For target mode, we get header of the last packet but it
-		 * does not have the FCoE trailer field, i.e., CRC and EOF
-		 * Ordered Set since they are offloaded by the HW, so fill
-		 * it up correspondingly to allow the packet to pass through
-		 * to the upper protocol stack.
-		 */
-		u32 f_ctl = ntoh24(fh->fh_f_ctl);
-
-		if ((f_ctl & FC_FC_END_SEQ) &&
-		    (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA)) {
-			struct fcoe_crc_eof *crc = NULL;
-
-			crc = skb_put(skb, sizeof(*crc));
-			crc->fcoe_eof = FC_EOF_T;
-		} else {
-			/* otherwise, drop the header only frame */
-			rc = 0;
-			goto out_no_ddp;
-		}
-	}
-
-out_put_ddp:
-	/* either we got RSP or we have an error, unmap DMA in both cases */
-	i40e_fcoe_ddp_unmap(pf, ddp);
-	if (ddp->len && !ddp->fcerr) {
-		int pkts;
-
-		rc = ddp->len;
-		i40e_fcoe_ddp_clear(ddp);
-		ddp->len = rc;
-		pkts = DIV_ROUND_UP(rc, 2048);
-		rx_ring->stats.bytes += rc;
-		rx_ring->stats.packets += pkts;
-		rx_ring->q_vector->rx.total_bytes += rc;
-		rx_ring->q_vector->rx.total_packets += pkts;
-		set_bit(__I40E_FCOE_DDP_DONE, &ddp->flags);
-	}
-
-out_no_ddp:
-	return rc;
-}
-
-/**
- * i40e_fcoe_ddp_setup - called to set up ddp context
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- * @target_mode: indicates this is a DDP request for target
- *
- * Returns : 1 for success and 0 for no DDP on this I/O
- **/
-static int i40e_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
-			       struct scatterlist *sgl, unsigned int sgc,
-			       int target_mode)
-{
-	static const unsigned int bufflen = I40E_FCOE_DDP_BUF_MIN;
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_fcoe_ddp_pool *ddp_pool;
-	struct i40e_pf *pf = np->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	unsigned int i, j, dmacount;
-	struct i40e_fcoe_ddp *ddp;
-	unsigned int firstoff = 0;
-	unsigned int thisoff = 0;
-	unsigned int thislen = 0;
-	struct scatterlist *sg;
-	dma_addr_t addr = 0;
-	unsigned int len;
-
-	if (xid >= I40E_FCOE_DDP_MAX) {
-		dev_warn(&pf->pdev->dev, "xid=0x%x out-of-range\n", xid);
-		return 0;
-	}
-
-	/* no DDP if we are already down or resetting */
-	if (test_bit(__I40E_DOWN, &pf->state) ||
-	    test_bit(__I40E_NEEDS_RESTART, &pf->state)) {
-		dev_info(&pf->pdev->dev, "xid=0x%x device in reset/down\n",
-			 xid);
-		return 0;
-	}
-
-	ddp = &fcoe->ddp[xid];
-	if (ddp->sgl) {
-		dev_info(&pf->pdev->dev, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
-			 xid, ddp->sgl, ddp->sgc);
-		return 0;
-	}
-	i40e_fcoe_ddp_clear(ddp);
-
-	if (!fcoe->ddp_pool) {
-		dev_info(&pf->pdev->dev, "No DDP pool, xid 0x%x\n", xid);
-		return 0;
-	}
-
-	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu());
-	if (!ddp_pool->pool) {
-		dev_info(&pf->pdev->dev, "No percpu ddp pool, xid 0x%x\n", xid);
-		goto out_noddp;
-	}
-
-	/* setup dma from scsi command sgl */
-	dmacount = dma_map_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-	if (dmacount == 0) {
-		dev_info(&pf->pdev->dev, "dma_map_sg for sgl %p, sgc %d failed\n",
-			 sgl, sgc);
-		goto out_noddp_unmap;
-	}
-
-	/* alloc the udl from our ddp pool */
-	ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
-	if (!ddp->udl) {
-		dev_info(&pf->pdev->dev,
-			 "Failed allocated ddp context, xid 0x%x\n", xid);
-		goto out_noddp_unmap;
-	}
-
-	j = 0;
-	ddp->len = 0;
-	for_each_sg(sgl, sg, dmacount, i) {
-		addr = sg_dma_address(sg);
-		len = sg_dma_len(sg);
-		ddp->len += len;
-		while (len) {
-			/* max number of buffers allowed in one DDP context */
-			if (j >= I40E_FCOE_DDP_BUFFCNT_MAX) {
-				dev_info(&pf->pdev->dev,
-					 "xid=%x:%d,%d,%d:addr=%llx not enough descriptors\n",
-					 xid, i, j, dmacount, (u64)addr);
-				goto out_noddp_free;
-			}
-
-			/* get the offset of length of current buffer */
-			thisoff = addr & ((dma_addr_t)bufflen - 1);
-			thislen = min_t(unsigned int, (bufflen - thisoff), len);
-			/* all but the 1st buffer (j == 0)
-			 * must be aligned on bufflen
-			 */
-			if ((j != 0) && (thisoff))
-				goto out_noddp_free;
-
-			/* all but the last buffer
-			 * ((i == (dmacount - 1)) && (thislen == len))
-			 * must end at bufflen
-			 */
-			if (((i != (dmacount - 1)) || (thislen != len)) &&
-			    ((thislen + thisoff) != bufflen))
-				goto out_noddp_free;
-
-			ddp->udl[j] = (u64)(addr - thisoff);
-			/* only the first buffer may have none-zero offset */
-			if (j == 0)
-				firstoff = thisoff;
-			len -= thislen;
-			addr += thislen;
-			j++;
-		}
-	}
-	/* only the last buffer may have non-full bufflen */
-	ddp->lastsize = thisoff + thislen;
-	ddp->firstoff = firstoff;
-	ddp->list_len = j;
-	ddp->pool = ddp_pool->pool;
-	ddp->sgl = sgl;
-	ddp->sgc = sgc;
-	ddp->xid = xid;
-	if (target_mode)
-		set_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-	set_bit(__I40E_FCOE_DDP_INITALIZED, &ddp->flags);
-
-	put_cpu();
-	return 1; /* Success */
-
-out_noddp_free:
-	dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
-	i40e_fcoe_ddp_clear(ddp);
-
-out_noddp_unmap:
-	dma_unmap_sg(&pf->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
-out_noddp:
-	put_cpu();
-	return 0;
-}
-
-/**
- * i40e_fcoe_ddp_get - called to set up ddp context in initiator mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_setup
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_get(struct net_device *netdev, u16 xid,
-			     struct scatterlist *sgl, unsigned int sgc)
-{
-	return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 0);
-}
-
-/**
- * i40e_fcoe_ddp_target - called to set up ddp context in target mode
- * @netdev: the corresponding net_device
- * @xid: the exchange id requesting ddp
- * @sgl: the scatter-gather list for this request
- * @sgc: the number of scatter-gather items
- *
- * This is the implementation of net_device_ops.ndo_fcoe_ddp_target
- * and is expected to be called from ULD, e.g., FCP layer of libfc
- * to set up ddp for the corresponding xid of the given sglist for
- * the corresponding I/O. The DDP in target mode is a write I/O request
- * from the initiator.
- *
- * Returns : 1 for success and 0 for no ddp
- **/
-static int i40e_fcoe_ddp_target(struct net_device *netdev, u16 xid,
-				struct scatterlist *sgl, unsigned int sgc)
-{
-	return i40e_fcoe_ddp_setup(netdev, xid, sgl, sgc, 1);
-}
-
-/**
- * i40e_fcoe_program_ddp - programs the HW DDP related descriptors
- * @tx_ring: transmit ring for this packet
- * @skb:     the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is READ/WRITE command, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY).
- *
- * Note: caller checks sof and ddp sw context
- *
- * Returns : none
- *
- **/
-static void i40e_fcoe_program_ddp(struct i40e_ring *tx_ring,
-				  struct sk_buff *skb,
-				  struct i40e_fcoe_ddp *ddp, u8 sof)
-{
-	struct i40e_fcoe_filter_context_desc *filter_desc = NULL;
-	struct i40e_fcoe_queue_context_desc *queue_desc = NULL;
-	struct i40e_fcoe_ddp_context_desc *ddp_desc = NULL;
-	struct i40e_pf *pf = tx_ring->vsi->back;
-	u16 i = tx_ring->next_to_use;
-	struct fc_frame_header *fh;
-	u64 flags_rsvd_lanq = 0;
-	bool target_mode;
-
-	/* check if abort is still pending */
-	if (test_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags)) {
-		dev_warn(&pf->pdev->dev,
-			 "DDP abort is still pending xid:%hx and ddp->flags:%lx:\n",
-			 ddp->xid, ddp->flags);
-		return;
-	}
-
-	/* set the flag to indicate this is programmed */
-	if (test_and_set_bit(__I40E_FCOE_DDP_PROGRAMMED, &ddp->flags)) {
-		dev_warn(&pf->pdev->dev,
-			 "DDP is already programmed for xid:%hx and ddp->flags:%lx:\n",
-			 ddp->xid, ddp->flags);
-		return;
-	}
-
-	/* Prepare the DDP context descriptor */
-	ddp_desc = I40E_DDP_CONTEXT_DESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	ddp_desc->type_cmd_foff_lsize =
-				cpu_to_le64(I40E_TX_DESC_DTYPE_DDP_CTX	|
-				((u64)I40E_FCOE_DDP_CTX_DESC_BSIZE_4K  <<
-				I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT)	|
-				((u64)ddp->firstoff		       <<
-				I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT)	|
-				((u64)ddp->lastsize		       <<
-				I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT));
-	ddp_desc->rsvd = cpu_to_le64(0);
-
-	/* target mode needs last packet in the sequence  */
-	target_mode = test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags);
-	if (target_mode)
-		ddp_desc->type_cmd_foff_lsize |=
-			cpu_to_le64(I40E_FCOE_DDP_CTX_DESC_LASTSEQH);
-
-	/* Prepare queue_context descriptor */
-	queue_desc = I40E_QUEUE_CONTEXT_DESC(tx_ring, i++);
-	if (i == tx_ring->count)
-		i = 0;
-	queue_desc->dmaindx_fbase = cpu_to_le64(ddp->xid | ((u64)ddp->udp));
-	queue_desc->flen_tph = cpu_to_le64(ddp->list_len |
-				((u64)(I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC |
-				I40E_FCOE_QUEUE_CTX_DESC_TPHDATA) <<
-				I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT));
-
-	/* Prepare filter_context_desc */
-	filter_desc = I40E_FILTER_CONTEXT_DESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	filter_desc->param = cpu_to_le32(ntohl(fh->fh_parm_offset));
-	filter_desc->seqn = cpu_to_le16(ntohs(fh->fh_seq_cnt));
-	filter_desc->rsvd_dmaindx = cpu_to_le16(ddp->xid <<
-				I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT);
-
-	flags_rsvd_lanq = I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP;
-	flags_rsvd_lanq |= (u64)(target_mode ?
-			I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP :
-			I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT);
-
-	flags_rsvd_lanq |= (u64)((sof == FC_SOF_I2 || sof == FC_SOF_N2) ?
-			I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 :
-			I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3);
-
-	flags_rsvd_lanq |= ((u64)skb->queue_mapping <<
-				I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT);
-	filter_desc->flags_rsvd_lanq = cpu_to_le64(flags_rsvd_lanq);
-
-	/* By this time, all offload related descriptors has been programmed */
-	tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_invalidate_ddp - invalidates DDP in case of abort
- * @tx_ring: transmit ring for this packet
- * @skb: the packet associated w/ this DDP invalidation, i.e., ABTS
- * @ddp: the SW DDP context for this DDP
- *
- * Programs the Tx context descriptor to do DDP invalidation.
- **/
-static void i40e_fcoe_invalidate_ddp(struct i40e_ring *tx_ring,
-				     struct sk_buff *skb,
-				     struct i40e_fcoe_ddp *ddp)
-{
-	struct i40e_tx_context_desc *context_desc;
-	int i;
-
-	if (test_and_set_bit(__I40E_FCOE_DDP_ABORTED, &ddp->flags))
-		return;
-
-	i = tx_ring->next_to_use;
-	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
-	i++;
-	if (i == tx_ring->count)
-		i = 0;
-
-	context_desc->tunneling_params = cpu_to_le32(0);
-	context_desc->l2tag2 = cpu_to_le16(0);
-	context_desc->rsvd = cpu_to_le16(0);
-	context_desc->type_cmd_tso_mss = cpu_to_le64(
-		I40E_TX_DESC_DTYPE_FCOE_CTX |
-		(I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL <<
-		I40E_TXD_CTX_QW1_CMD_SHIFT) |
-		(I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND <<
-		I40E_TXD_CTX_QW1_CMD_SHIFT));
-	tx_ring->next_to_use = i;
-}
-
-/**
- * i40e_fcoe_handle_ddp - check we should setup or invalidate DDP
- * @tx_ring: transmit ring for this packet
- * @skb: the packet to be sent out
- * @sof: the SOF to indicate class of service
- *
- * Determine if it is ABTS/READ/XFER_RDY, and finds out if there is
- * a matching SW DDP context for this command. DDP is applicable
- * only in case of READ if initiator or WRITE in case of
- * responder (via checking XFER_RDY). In case this is an ABTS, send
- * just invalidate the context.
- **/
-static void i40e_fcoe_handle_ddp(struct i40e_ring *tx_ring,
-				 struct sk_buff *skb, u8 sof)
-{
-	struct i40e_pf *pf = tx_ring->vsi->back;
-	struct i40e_fcoe *fcoe = &pf->fcoe;
-	struct fc_frame_header *fh;
-	struct i40e_fcoe_ddp *ddp;
-	u32 f_ctl;
-	u8 r_ctl;
-	u16 xid;
-
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	f_ctl = ntoh24(fh->fh_f_ctl);
-	r_ctl = fh->fh_r_ctl;
-	ddp = NULL;
-
-	if ((r_ctl == FC_RCTL_DD_DATA_DESC) && (f_ctl & FC_FC_EX_CTX)) {
-		/* exchange responder? if so, XFER_RDY for write */
-		xid = ntohs(fh->fh_rx_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-		}
-	} else if (r_ctl == FC_RCTL_DD_UNSOL_CMD) {
-		/* exchange originator, check READ cmd */
-		xid = ntohs(fh->fh_ox_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_program_ddp(tx_ring, skb, ddp, sof);
-		}
-	} else if (r_ctl == FC_RCTL_BA_ABTS) {
-		/* exchange originator, check ABTS */
-		xid = ntohs(fh->fh_ox_id);
-		if (i40e_fcoe_xid_is_valid(xid)) {
-			ddp = &fcoe->ddp[xid];
-			if ((ddp->xid == xid) &&
-			    (!test_bit(__I40E_FCOE_DDP_TARGET, &ddp->flags)))
-				i40e_fcoe_invalidate_ddp(tx_ring, skb, ddp);
-		}
-	}
-}
-
-/**
- * i40e_fcoe_tso - set up FCoE TSO
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @tx_flags: collected send information
- * @hdr_len:  the tso header length
- * @sof: the SOF to indicate class of service
- *
- * Note must already have sof checked to be either class 2 or class 3 before
- * calling this function.
- *
- * Returns 1 to indicate sequence segmentation offload is properly setup
- * or returns 0 to indicate no tso is needed, otherwise returns error
- * code to drop the frame.
- **/
-static int i40e_fcoe_tso(struct i40e_ring *tx_ring,
-			 struct sk_buff *skb,
-			 u32 tx_flags, u8 *hdr_len, u8 sof)
-{
-	struct i40e_tx_context_desc *context_desc;
-	u32 cd_type, cd_cmd, cd_tso_len, cd_mss;
-	struct fc_frame_header *fh;
-	u64 cd_type_cmd_tso_mss;
-
-	/* must match gso type as FCoE */
-	if (!skb_is_gso(skb))
-		return 0;
-
-	/* is it the expected gso type for FCoE ?*/
-	if (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE) {
-		netdev_err(skb->dev,
-			   "wrong gso type %d:expecting SKB_GSO_FCOE\n",
-			   skb_shinfo(skb)->gso_type);
-		return -EINVAL;
-	}
-
-	/* header and trailer are inserted by hw */
-	*hdr_len = skb_transport_offset(skb) + sizeof(struct fc_frame_header) +
-		   sizeof(struct fcoe_crc_eof);
-
-	/* check sof to decide a class 2 or 3 TSO */
-	if (likely(i40e_fcoe_sof_is_class3(sof)))
-		cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3;
-	else
-		cd_cmd = I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2;
-
-	/* param field valid? */
-	fh = (struct fc_frame_header *)skb_transport_header(skb);
-	if (fh->fh_f_ctl[2] & FC_FC_REL_OFF)
-		cd_cmd |= I40E_FCOE_TX_CTX_DESC_RELOFF;
-
-	/* fill the field values */
-	cd_type = I40E_TX_DESC_DTYPE_FCOE_CTX;
-	cd_tso_len = skb->len - *hdr_len;
-	cd_mss = skb_shinfo(skb)->gso_size;
-	cd_type_cmd_tso_mss =
-		((u64)cd_type  << I40E_TXD_CTX_QW1_DTYPE_SHIFT)     |
-		((u64)cd_cmd     << I40E_TXD_CTX_QW1_CMD_SHIFT)	    |
-		((u64)cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
-		((u64)cd_mss     << I40E_TXD_CTX_QW1_MSS_SHIFT);
-
-	/* grab the next descriptor */
-	context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use);
-	tx_ring->next_to_use++;
-	if (tx_ring->next_to_use == tx_ring->count)
-		tx_ring->next_to_use = 0;
-
-	context_desc->tunneling_params = 0;
-	context_desc->l2tag2 = cpu_to_le16((tx_flags & I40E_TX_FLAGS_VLAN_MASK)
-					    >> I40E_TX_FLAGS_VLAN_SHIFT);
-	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
-
-	return 1;
-}
-
-/**
- * i40e_fcoe_tx_map - build the tx descriptor
- * @tx_ring:  ring to send buffer on
- * @skb:      send buffer
- * @first:    first buffer info buffer to use
- * @tx_flags: collected send information
- * @hdr_len:  ptr to the size of the packet header
- * @eof:      the frame eof value
- *
- * Note, for FCoE, sof and eof are already checked
- **/
-static void i40e_fcoe_tx_map(struct i40e_ring *tx_ring,
-			     struct sk_buff *skb,
-			     struct i40e_tx_buffer *first,
-			     u32 tx_flags, u8 hdr_len, u8 eof)
-{
-	u32 td_offset = 0;
-	u32 td_cmd = 0;
-	u32 maclen;
-
-	/* insert CRC */
-	td_cmd = I40E_TX_DESC_CMD_ICRC;
-
-	/* setup MACLEN */
-	maclen = skb_network_offset(skb);
-	if (tx_flags & I40E_TX_FLAGS_SW_VLAN)
-		maclen += sizeof(struct vlan_hdr);
-
-	if (skb->protocol == htons(ETH_P_FCOE)) {
-		/* for FCoE, maclen should exclude ether type */
-		maclen -= 2;
-		/* setup type as FCoE and EOF insertion */
-		td_cmd |= (I40E_TX_DESC_CMD_FCOET | i40e_fcoe_ctxt_eof(eof));
-		/* setup FCoELEN and FCLEN */
-		td_offset |= ((((sizeof(struct fcoe_hdr) + 2) >> 2) <<
-				I40E_TX_DESC_LENGTH_IPLEN_SHIFT) |
-			      ((sizeof(struct fc_frame_header) >> 2) <<
-				I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT));
-		/* trim to exclude trailer */
-		pskb_trim(skb, skb->len - sizeof(struct fcoe_crc_eof));
-	}
-
-	/* MACLEN is ether header length in words not bytes */
-	td_offset |= (maclen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
-
-	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset);
-}
-
-/**
- * i40e_fcoe_set_skb_header - adjust skb header point for FIP/FCoE/FC
- * @skb: the skb to be adjusted
- *
- * Returns true if this skb is a FCoE/FIP or VLAN carried FCoE/FIP and then
- * adjusts the skb header pointers correspondingly. Otherwise, returns false.
- **/
-static inline int i40e_fcoe_set_skb_header(struct sk_buff *skb)
-{
-	__be16 protocol = skb->protocol;
-
-	skb_reset_mac_header(skb);
-	skb->mac_len = sizeof(struct ethhdr);
-	if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veth = (struct vlan_ethhdr *)eth_hdr(skb);
-
-		protocol = veth->h_vlan_encapsulated_proto;
-		skb->mac_len += sizeof(struct vlan_hdr);
-	}
-
-	/* FCoE or FIP only */
-	if ((protocol != htons(ETH_P_FIP)) &&
-	    (protocol != htons(ETH_P_FCOE)))
-		return -EINVAL;
-
-	/* set header to L2 of FCoE/FIP */
-	skb_set_network_header(skb, skb->mac_len);
-	if (protocol == htons(ETH_P_FIP))
-		return 0;
-
-	/* set header to L3 of FC */
-	skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr));
-	return 0;
-}
-
-/**
- * i40e_fcoe_xmit_frame - transmit buffer
- * @skb:     send buffer
- * @netdev:  the fcoe netdev
- *
- * Returns 0 if sent, else an error code
- **/
-static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb,
-					struct net_device *netdev)
-{
-	struct i40e_netdev_priv *np = netdev_priv(skb->dev);
-	struct i40e_vsi *vsi = np->vsi;
-	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
-	struct i40e_tx_buffer *first;
-	u32 tx_flags = 0;
-	int fso, count;
-	u8 hdr_len = 0;
-	u8 sof = 0;
-	u8 eof = 0;
-
-	if (i40e_fcoe_set_skb_header(skb))
-		goto out_drop;
-
-	count = i40e_xmit_descriptor_count(skb);
-	if (i40e_chk_linearize(skb, count)) {
-		if (__skb_linearize(skb))
-			goto out_drop;
-		count = i40e_txd_use_count(skb->len);
-		tx_ring->tx_stats.tx_linearize++;
-	}
-
-	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
-	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
-	 *       + 4 desc gap to avoid the cache line where head is,
-	 *       + 1 desc for context descriptor,
-	 * otherwise try next time
-	 */
-	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
-		tx_ring->tx_stats.tx_busy++;
-		return NETDEV_TX_BUSY;
-	}
-
-	/* prepare the xmit flags */
-	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
-		goto out_drop;
-
-	/* record the location of the first descriptor for this packet */
-	first = &tx_ring->tx_bi[tx_ring->next_to_use];
-
-	/* FIP is a regular L2 traffic w/o offload */
-	if (skb->protocol == htons(ETH_P_FIP))
-		goto out_send;
-
-	/* check sof and eof, only supports FC Class 2 or 3 */
-	if (i40e_fcoe_fc_sof(skb, &sof) || i40e_fcoe_fc_eof(skb, &eof)) {
-		netdev_err(netdev, "SOF/EOF error:%02x - %02x\n", sof, eof);
-		goto out_drop;
-	}
-
-	/* always do FCCRC for FCoE */
-	tx_flags |= I40E_TX_FLAGS_FCCRC;
-
-	/* check we should do sequence offload */
-	fso = i40e_fcoe_tso(tx_ring, skb, tx_flags, &hdr_len, sof);
-	if (fso < 0)
-		goto out_drop;
-	else if (fso)
-		tx_flags |= I40E_TX_FLAGS_FSO;
-	else
-		i40e_fcoe_handle_ddp(tx_ring, skb, sof);
-
-out_send:
-	/* send out the packet */
-	i40e_fcoe_tx_map(tx_ring, skb, first, tx_flags, hdr_len, eof);
-
-	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
-	return NETDEV_TX_OK;
-
-out_drop:
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
-/**
- * i40e_fcoe_change_mtu - NDO callback to change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns error as operation not permitted
- *
- **/
-static int i40e_fcoe_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	netdev_warn(netdev, "MTU change is not supported on FCoE interfaces\n");
-	return -EPERM;
-}
-
-/**
- * i40e_fcoe_set_features - set the netdev feature flags
- * @netdev: ptr to the netdev being adjusted
- * @features: the feature set that the stack is suggesting
- *
- **/
-static int i40e_fcoe_set_features(struct net_device *netdev,
-				  netdev_features_t features)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		i40e_vlan_stripping_enable(vsi);
-	else
-		i40e_vlan_stripping_disable(vsi);
-
-	return 0;
-}
-
-static const struct net_device_ops i40e_fcoe_netdev_ops = {
-	.ndo_open		= i40e_open,
-	.ndo_stop		= i40e_close,
-	.ndo_get_stats64	= i40e_get_netdev_stats_struct,
-	.ndo_set_rx_mode	= i40e_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= i40e_set_mac,
-	.ndo_change_mtu		= i40e_fcoe_change_mtu,
-	.ndo_do_ioctl		= i40e_ioctl,
-	.ndo_tx_timeout		= i40e_tx_timeout,
-	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
-	.ndo_setup_tc		= __i40e_setup_tc,
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller	= i40e_netpoll,
-#endif
-	.ndo_start_xmit		= i40e_fcoe_xmit_frame,
-	.ndo_fcoe_enable	= i40e_fcoe_enable,
-	.ndo_fcoe_disable	= i40e_fcoe_disable,
-	.ndo_fcoe_ddp_setup	= i40e_fcoe_ddp_get,
-	.ndo_fcoe_ddp_done	= i40e_fcoe_ddp_put,
-	.ndo_fcoe_ddp_target	= i40e_fcoe_ddp_target,
-	.ndo_set_features	= i40e_fcoe_set_features,
-};
-
-/* fcoe network device type */
-static struct device_type fcoe_netdev_type = {
-	.name = "fcoe",
-};
-
-/**
- * i40e_fcoe_config_netdev - prepares the VSI context for creating a FCoE VSI
- * @vsi: pointer to the associated VSI struct
- * @ctxt: pointer to the associated VSI context to be passed to HW
- *
- * Returns 0 on success or < 0 on error
- **/
-void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi)
-{
-	struct i40e_hw *hw = &vsi->back->hw;
-	struct i40e_pf *pf = vsi->back;
-
-	if (vsi->type != I40E_VSI_FCOE)
-		return;
-
-	netdev->features = (NETIF_F_HW_VLAN_CTAG_TX |
-			    NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_HW_VLAN_CTAG_FILTER);
-
-	netdev->vlan_features = netdev->features;
-	netdev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
-				   NETIF_F_HW_VLAN_CTAG_RX |
-				   NETIF_F_HW_VLAN_CTAG_FILTER);
-	netdev->fcoe_ddp_xid = I40E_FCOE_DDP_MAX - 1;
-	netdev->features |= NETIF_F_ALL_FCOE;
-	netdev->vlan_features |= NETIF_F_ALL_FCOE;
-	netdev->hw_features |= netdev->features;
-	netdev->priv_flags |= IFF_UNICAST_FLT;
-	netdev->priv_flags |= IFF_SUPP_NOFCS;
-
-	strlcpy(netdev->name, "fcoe%d", IFNAMSIZ-1);
-	netdev->mtu = FCOE_MTU;
-	SET_NETDEV_DEV(netdev, &pf->pdev->dev);
-	SET_NETDEV_DEVTYPE(netdev, &fcoe_netdev_type);
-	/* set different dev_port value 1 for FCoE netdev than the default
-	 * zero dev_port value for PF netdev, this helps biosdevname user
-	 * tool to differentiate them correctly while both attached to the
-	 * same PCI function.
-	 */
-	netdev->dev_port = 1;
-	spin_lock_bh(&vsi->mac_filter_hash_lock);
-	i40e_add_filter(vsi, hw->mac.san_addr, 0);
-	i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0);
-	i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0);
-	i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0);
-	spin_unlock_bh(&vsi->mac_filter_hash_lock);
-
-	/* use san mac */
-	ether_addr_copy(netdev->dev_addr, hw->mac.san_addr);
-	ether_addr_copy(netdev->perm_addr, hw->mac.san_addr);
-	/* fcoe netdev ops */
-	netdev->netdev_ops = &i40e_fcoe_netdev_ops;
-}
-
-/**
- * i40e_fcoe_vsi_setup - allocate and set up FCoE VSI
- * @pf: the PF that VSI is associated with
- *
- **/
-void i40e_fcoe_vsi_setup(struct i40e_pf *pf)
-{
-	struct i40e_vsi *vsi;
-	u16 seid;
-	int i;
-
-	if (!(pf->flags & I40E_FLAG_FCOE_ENABLED))
-		return;
-
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		vsi = pf->vsi[i];
-		if (vsi && vsi->type == I40E_VSI_FCOE) {
-			dev_warn(&pf->pdev->dev,
-				 "FCoE VSI already created\n");
-			return;
-		}
-	}
-
-	seid = pf->vsi[pf->lan_vsi]->seid;
-	vsi = i40e_vsi_setup(pf, I40E_VSI_FCOE, seid, 0);
-	if (vsi) {
-		dev_dbg(&pf->pdev->dev,
-			"Successfully created FCoE VSI seid %d id %d uplink_seid %d PF seid %d\n",
-			vsi->seid, vsi->id, vsi->uplink_seid, seid);
-	} else {
-		dev_info(&pf->pdev->dev, "Failed to create FCoE VSI\n");
-	}
-}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h b/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
deleted file mode 100644
index a93174ddeaba..000000000000
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*******************************************************************************
- *
- * Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Contact Information:
- * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
- * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
- *
- ******************************************************************************/
-
-#ifndef _I40E_FCOE_H_
-#define _I40E_FCOE_H_
-
-/* FCoE HW context helper macros */
-#define I40E_DDP_CONTEXT_DESC(R, i)     \
-	(&(((struct i40e_fcoe_ddp_context_desc *)((R)->desc))[i]))
-
-#define I40E_QUEUE_CONTEXT_DESC(R, i)   \
-	(&(((struct i40e_fcoe_queue_context_desc *)((R)->desc))[i]))
-
-#define I40E_FILTER_CONTEXT_DESC(R, i)  \
-	(&(((struct i40e_fcoe_filter_context_desc *)((R)->desc))[i]))
-
-/* receive queue descriptor filter status for FCoE */
-#define I40E_RX_DESC_FLTSTAT_FCMASK	0x3
-#define I40E_RX_DESC_FLTSTAT_NOMTCH	0x0	/* no ddp context match */
-#define I40E_RX_DESC_FLTSTAT_NODDP	0x1	/* no ddp due to error */
-#define I40E_RX_DESC_FLTSTAT_DDP	0x2	/* DDPed payload, post header */
-#define I40E_RX_DESC_FLTSTAT_FCPRSP	0x3	/* FCP_RSP */
-
-/* receive queue descriptor error codes for FCoE */
-#define I40E_RX_DESC_FCOE_ERROR_MASK		\
-	(I40E_RX_DESC_ERROR_L3L4E_PROT |	\
-	 I40E_RX_DESC_ERROR_L3L4E_FC |		\
-	 I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR |	\
-	 I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN)
-
-/* receive queue descriptor programming error */
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL(e)	\
-	(((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)	\
-	(((e) >> I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT) & 0x1)
-
-#define I40E_RX_PROG_FCOE_ERROR_TBL_FULL_BIT	\
-	BIT(I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT)
-#define I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT	\
-	BIT(I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT)
-
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL(e)	\
-	I40E_RX_PROG_FCOE_ERROR_CONFLICT(e)
-#define I40E_RX_PROG_FCOE_ERROR_INVLFAIL_BIT	\
-	I40E_RX_PROG_FCOE_ERROR_CONFLICT_BIT
-
-/* FCoE DDP related definitions */
-#define I40E_FCOE_MIN_XID	0x0000  /* the min xid supported by fcoe_sw */
-#define I40E_FCOE_MAX_XID	0x0FFF  /* the max xid supported by fcoe_sw */
-#define I40E_FCOE_DDP_BUFFCNT_MAX	512	/* 9 bits bufcnt */
-#define I40E_FCOE_DDP_PTR_ALIGN		16
-#define I40E_FCOE_DDP_PTR_MAX	(I40E_FCOE_DDP_BUFFCNT_MAX * sizeof(dma_addr_t))
-#define I40E_FCOE_DDP_BUF_MIN	4096
-#define I40E_FCOE_DDP_MAX	2048
-#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT	8
-
-/* supported netdev features for FCoE */
-#define I40E_FCOE_NETIF_FEATURES (NETIF_F_ALL_FCOE | \
-	NETIF_F_HW_VLAN_CTAG_TX | \
-	NETIF_F_HW_VLAN_CTAG_RX | \
-	NETIF_F_HW_VLAN_CTAG_FILTER)
-
-/* DDP context flags */
-enum i40e_fcoe_ddp_flags {
-	__I40E_FCOE_DDP_NONE = 1,
-	__I40E_FCOE_DDP_TARGET,
-	__I40E_FCOE_DDP_INITALIZED,
-	__I40E_FCOE_DDP_PROGRAMMED,
-	__I40E_FCOE_DDP_DONE,
-	__I40E_FCOE_DDP_ABORTED,
-	__I40E_FCOE_DDP_UNMAPPED,
-};
-
-/* DDP SW context struct */
-struct i40e_fcoe_ddp {
-	int len;
-	u16 xid;
-	u16 firstoff;
-	u16 lastsize;
-	u16 list_len;
-	u8 fcerr;
-	u8 prerr;
-	unsigned long flags;
-	unsigned int sgc;
-	struct scatterlist *sgl;
-	dma_addr_t udp;
-	u64 *udl;
-	struct dma_pool *pool;
-
-};
-
-struct i40e_fcoe_ddp_pool {
-	struct dma_pool *pool;
-};
-
-struct i40e_fcoe {
-	unsigned long mode;
-	atomic_t refcnt;
-	struct i40e_fcoe_ddp_pool __percpu *ddp_pool;
-	struct i40e_fcoe_ddp ddp[I40E_FCOE_DDP_MAX];
-};
-
-#endif /* _I40E_FCOE_H_ */

From 20dd9147c96e0dfd628fe42840e8f8d5ccd077c5 Mon Sep 17 00:00:00 2001
From: Gustavo A R Silva <garsilva@embeddedor.com>
Date: Thu, 15 Feb 2018 11:44:35 -0600
Subject: [PATCH 0923/1678] i40evf/i40evf_main: Fix variable assignment in
 i40evf_parse_cls_flower

It seems this is a copy-paste error and that the proper variable to use
in this particular case is _src_ instead of _dst_.

Addresses-Coverity-ID: 1465282 ("Copy-paste error")
Fixes: 0075fa0fadd0 ("i40evf: Add support to apply cloud filters")
Signed-off-by: Gustavo A R Silva <garsilva@embeddedor.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index dae121877935..486cf491b000 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2749,7 +2749,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter,
 
 		if (key->src) {
 			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
-			vf->data.tcp_spec.src_port = key->dst;
+			vf->data.tcp_spec.src_port = key->src;
 		}
 	}
 	vf->field_flags = field_flags;

From 028daf80117376b22909becd9720daaefdfceff4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Jab=C5=82o=C5=84ski?=
 <pawel.jablonski@intel.com>
Date: Thu, 8 Mar 2018 14:52:05 -0800
Subject: [PATCH 0924/1678] i40e: Fix attach VF to VM issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix for "Resource temporarily unavailable" problem when virsh is
trying to attach a device to VM. When the VF driver is loaded on
host and virsh is trying to attach it to the VM and set a MAC
address, it ends with a race condition between i40e_reset_vf and
i40e_ndo_set_vf_mac functions. The bug is fixed by adding polling
in i40e_ndo_set_vf_mac function For when the VF is in Reset mode.

Signed-off-by: Paweł Jabłoński <pawel.jablonski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index e23975c67417..520d86357c69 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3741,6 +3741,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	int ret = 0;
 	struct hlist_node *h;
 	int bkt;
+	u8 i;
 
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
@@ -3752,6 +3753,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	/* When the VF is resetting wait until it is done.
+	 * It can take up to 200 milliseconds,
+	 * but wait for up to 300 milliseconds to be safe.
+	 */
+	for (i = 0; i < 15; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+			break;
+		msleep(20);
+	}
 	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
 		dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n",
 			vf_id);

From 2972b00797588b5cfdd8369612cca8415a4e9e73 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 8 Mar 2018 14:52:06 -0800
Subject: [PATCH 0925/1678] i40e: Cleanup i40e_vlan_rx_register

We used to use the function i40e_vlan_rx_register as a way to hook
into the now defunct .ndo_vlan_rx_register netdev hook. This was
removed but we kept the function around because we still used it
internally to control enabling or disabling of VLAN stripping.

As pointed out in upstream review, VLAN stripping is only used in a
single location and the previous function is quite small, just inline
it into i40e_restore_vlan() rather than carrying the function
separately.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index be9a1467a1a1..146969891579 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2718,22 +2718,6 @@ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
 	}
 }
 
-/**
- * i40e_vlan_rx_register - Setup or shutdown vlan offload
- * @netdev: network interface to be adjusted
- * @features: netdev features to test if VLAN offload is enabled or not
- **/
-static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_vsi *vsi = np->vsi;
-
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
-		i40e_vlan_stripping_enable(vsi);
-	else
-		i40e_vlan_stripping_disable(vsi);
-}
-
 /**
  * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
  * @vsi: the vsi being configured
@@ -2909,7 +2893,10 @@ static void i40e_restore_vlan(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features);
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		i40e_vlan_stripping_enable(vsi);
+	else
+		i40e_vlan_stripping_disable(vsi);
 
 	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
 		i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q),

From 03ce7b1d23498c97bc530a1296852af0cdd2fe77 Mon Sep 17 00:00:00 2001
From: Filip Sadowski <filip.sadowski@intel.com>
Date: Thu, 8 Mar 2018 14:52:07 -0800
Subject: [PATCH 0926/1678] i40e: Fix permission check for VF MAC filters

When VF requests adding of MAC filters the checking is done against number
of already present MAC filters not adding them at the same time. It makes
it possible to add a bunch of filters at once possibly exceeding
acceptable limit of I40E_VC_MAX_MAC_ADDR_PER_VF filters.

This happens because when checking vf->num_mac, we do not check how many
filters are being requested at once. Modify the check function to ensure
that it knows how many filters are being requested. This allows the
check to ensure that the total number of filters in a single request
does not cause us to go over the limit.

Additionally, move the check to within the lock to ensure that the
vf->num_mac is checked while holding the lock to maintain consistency.
We could have simply moved the call to i40e_vf_check_permission to
within the loop, but this could cause a request to be non-atomic, and
add some but not all the addresses, while reporting an error code. We
want to avoid this behavior so that users are not confused about which
filters have or have not been added.

Signed-off-by: Filip Sadowski <filip.sadowski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 79 ++++++++++++-------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 520d86357c69..321ab4badb68 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2368,25 +2368,47 @@ error_param:
 /**
  * i40e_check_vf_permission
  * @vf: pointer to the VF info
- * @macaddr: pointer to the MAC Address being checked
+ * @al: MAC address list from virtchnl
  *
- * Check if the VF has permission to add or delete unicast MAC address
- * filters and return error code -EPERM if not.  Then check if the
- * address filter requested is broadcast or zero and if so return
- * an invalid MAC address error code.
+ * Check that the given list of MAC addresses is allowed. Will return -EPERM
+ * if any address in the list is not valid. Checks the following conditions:
+ *
+ * 1) broadcast and zero addresses are never valid
+ * 2) unicast addresses are not allowed if the VMM has administratively set
+ *    the VF MAC address, unless the VF is marked as privileged.
+ * 3) There is enough space to add all the addresses.
+ *
+ * Note that to guarantee consistency, it is expected this function be called
+ * while holding the mac_filter_hash_lock, as otherwise the current number of
+ * addresses might not be accurate.
  **/
-static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
+static inline int i40e_check_vf_permission(struct i40e_vf *vf,
+					   struct virtchnl_ether_addr_list *al)
 {
 	struct i40e_pf *pf = vf->pf;
-	int ret = 0;
+	int i;
+
+	/* If this VF is not privileged, then we can't add more than a limited
+	 * number of addresses. Check to make sure that the additions do not
+	 * push us over the limit.
+	 */
+	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+	    (vf->num_mac + al->num_elements) > I40E_VC_MAX_MAC_ADDR_PER_VF) {
+		dev_err(&pf->pdev->dev,
+			"Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+		return -EPERM;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *addr = al->list[i].addr;
+
+		if (is_broadcast_ether_addr(addr) ||
+		    is_zero_ether_addr(addr)) {
+			dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n",
+				addr);
+			return I40E_ERR_INVALID_MAC_ADDR;
+		}
 
-	if (is_broadcast_ether_addr(macaddr) ||
-		   is_zero_ether_addr(macaddr)) {
-		dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", macaddr);
-		ret = I40E_ERR_INVALID_MAC_ADDR;
-	} else if (vf->pf_set_mac && !is_multicast_ether_addr(macaddr) &&
-		   !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
-		   !ether_addr_equal(macaddr, vf->default_lan_addr.addr)) {
 		/* If the host VMM administrator has set the VF MAC address
 		 * administratively via the ndo_set_vf_mac command then deny
 		 * permission to the VF to add or delete unicast MAC addresses.
@@ -2394,16 +2416,16 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, u8 *macaddr)
 		 * The VF may request to set the MAC address filter already
 		 * assigned to it so do not return an error in that case.
 		 */
-		dev_err(&pf->pdev->dev,
-			"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
-		ret = -EPERM;
-	} else if ((vf->num_mac >= I40E_VC_MAX_MAC_ADDR_PER_VF) &&
-		   !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
-		dev_err(&pf->pdev->dev,
-			"VF is not trusted, switch the VF to trusted to add more functionality\n");
-		ret = -EPERM;
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+		    !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+			dev_err(&pf->pdev->dev,
+				"VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n");
+			return -EPERM;
+		}
 	}
-	return ret;
+
+	return 0;
 }
 
 /**
@@ -2430,11 +2452,6 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		goto error_param;
 	}
 
-	for (i = 0; i < al->num_elements; i++) {
-		ret = i40e_check_vf_permission(vf, al->list[i].addr);
-		if (ret)
-			goto error_param;
-	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
 	/* Lock once, because all function inside for loop accesses VSI's
@@ -2442,6 +2459,12 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	 */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
+	ret = i40e_check_vf_permission(vf, al);
+	if (ret) {
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+		goto error_param;
+	}
+
 	/* add new addresses to the list */
 	for (i = 0; i < al->num_elements; i++) {
 		struct i40e_mac_filter *f;

From 6ac6d5a7ff424354074d21a68c2018a8b6fe4fc6 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 8 Mar 2018 14:52:08 -0800
Subject: [PATCH 0927/1678] i40e: track filter type statistics when deleting
 invalid filters

When hardware has trouble with a particular filter, we delete it from
the list. Unfortunately, we did not properly update the per-filter
statistic when doing so.

Create a helper function to handle this, and properly reduce the
necessary counter so that it tracks the number of active filters
properly.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 56 ++++++++++++++++++---
 1 file changed, 48 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 146969891579..848c8f1acbbf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8135,6 +8135,51 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
 	return fcnt_prog;
 }
 
+/**
+ * i40e_delete_invalid_filter - Delete an invalid FDIR filter
+ * @pf: board private structure
+ * @filter: FDir filter to remove
+ */
+static void i40e_delete_invalid_filter(struct i40e_pf *pf,
+				       struct i40e_fdir_filter *filter)
+{
+	/* Update counters */
+	pf->fdir_pf_active_filters--;
+	pf->fd_inv = 0;
+
+	switch (filter->flow_type) {
+	case TCP_V4_FLOW:
+		pf->fd_tcp4_filter_cnt--;
+		break;
+	case UDP_V4_FLOW:
+		pf->fd_udp4_filter_cnt--;
+		break;
+	case SCTP_V4_FLOW:
+		pf->fd_sctp4_filter_cnt--;
+		break;
+	case IP_USER_FLOW:
+		switch (filter->ip4_proto) {
+		case IPPROTO_TCP:
+			pf->fd_tcp4_filter_cnt--;
+			break;
+		case IPPROTO_UDP:
+			pf->fd_udp4_filter_cnt--;
+			break;
+		case IPPROTO_SCTP:
+			pf->fd_sctp4_filter_cnt--;
+			break;
+		case IPPROTO_IP:
+			pf->fd_ip4_filter_cnt--;
+			break;
+		}
+		break;
+	}
+
+	/* Remove the filter from the list and free memory */
+	hlist_del(&filter->fdir_node);
+	kfree(filter);
+}
+
 /**
  * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
  * @pf: board private structure
@@ -8179,14 +8224,9 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 	/* if hw had a problem adding a filter, delete it */
 	if (pf->fd_inv > 0) {
 		hlist_for_each_entry_safe(filter, node,
-					  &pf->fdir_filter_list, fdir_node) {
-			if (filter->fd_id == pf->fd_inv) {
-				hlist_del(&filter->fdir_node);
-				kfree(filter);
-				pf->fdir_pf_active_filters--;
-				pf->fd_inv = 0;
-			}
-		}
+					  &pf->fdir_filter_list, fdir_node)
+			if (filter->fd_id == pf->fd_inv)
+				i40e_delete_invalid_filter(pf, filter);
 	}
 }
 

From 01c96952847d6478e47ebe8194217b40b922d791 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 8 Mar 2018 14:52:09 -0800
Subject: [PATCH 0928/1678] i40e: factor out re-enable functions for ATR and SB

A future patch needs to expand on the logic for re-enabling ATR. Doing
so would cause some code to break the 80-character line limit.

To reduce the level of indentation, factor out helper functions for
re-enabling ATR and SB rules.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 48 ++++++++++++++-------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 848c8f1acbbf..beb854208047 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8135,6 +8135,34 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
 	return fcnt_prog;
 }
 
+/**
+ * i40e_reenable_fdir_sb - Restore FDir SB capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
+{
+	if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
+		pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
+		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
+	}
+}
+
+/**
+ * i40e_reenable_fdir_atr - Restore FDir ATR capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
+{
+	if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
+		pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
+	}
+}
+
 /**
  * i40e_delete_invalid_filter - Delete an invalid FDIR filter
  * @pf: board private structure
@@ -8198,28 +8226,16 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 	fcnt_avail = pf->fdir_pf_filter_count;
 	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
 	    (pf->fd_add_err == 0) ||
-	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) {
-		if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
-			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-			    (I40E_DEBUG_FD & pf->hw.debug_mask))
-				dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
-		}
-	}
+	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
+		i40e_reenable_fdir_sb(pf);
 
 	/* We should wait for even more space before re-enabling ATR.
 	 * Additionally, we cannot enable ATR as long as we still have TCP SB
 	 * rules active.
 	 */
 	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
-	    (pf->fd_tcp4_filter_cnt == 0)) {
-		if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
-			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-			    (I40E_DEBUG_FD & pf->hw.debug_mask))
-				dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
-		}
-	}
+	    (pf->fd_tcp4_filter_cnt == 0))
+		i40e_reenable_fdir_atr(pf);
 
 	/* if hw had a problem adding a filter, delete it */
 	if (pf->fd_inv > 0) {

From bc2a3a6cdb00c7e3c04735370fb5777d3cae22d2 Mon Sep 17 00:00:00 2001
From: Mariusz Stachura <mariusz.stachura@intel.com>
Date: Thu, 8 Mar 2018 14:52:10 -0800
Subject: [PATCH 0929/1678] i40e: fix for wrong partition id calculation on OCP
 mezz cards

This patch overwrites number of ports for X722 devices with support
for OCP PHY mezzanine.
The old method with checking if port is disabled in the PRTGEN_CNF
register cannot be used in this case. When the OCP is removed, ports
were seen as disabled, which resulted in wrong calculation of partition
id, that caused WoL to be disabled on certain ports.

Signed-off-by: Mariusz Stachura <mariusz.stachura@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c | 23 ++++++++++++++++++-
 drivers/net/ethernet/intel/i40e/i40e_type.h   |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ef5a868aae46..242c4c789e8d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -3200,9 +3200,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 	u32 valid_functions, num_functions;
 	u32 number, logical_id, phys_id;
 	struct i40e_hw_capabilities *p;
+	u16 id, ocp_cfg_word0;
+	i40e_status status;
 	u8 major_rev;
 	u32 i = 0;
-	u16 id;
 
 	cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
 
@@ -3389,6 +3390,26 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 			hw->num_ports++;
 	}
 
+	/* OCP cards case: if a mezz is removed the Ethernet port is at
+	 * disabled state in PRTGEN_CNF register. Additional NVM read is
+	 * needed in order to check if we are dealing with OCP card.
+	 * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
+	 * physical ports results in wrong partition id calculation and thus
+	 * not supporting WoL.
+	 */
+	if (hw->mac.type == I40E_MAC_X722) {
+		if (!i40e_acquire_nvm(hw, I40E_RESOURCE_READ)) {
+			status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR,
+						  2 * I40E_SR_OCP_CFG_WORD0,
+						  sizeof(ocp_cfg_word0),
+						  &ocp_cfg_word0, true, NULL);
+			if (!status &&
+			    (ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
+				hw->num_ports = 4;
+			i40e_release_nvm(hw);
+		}
+	}
+
 	valid_functions = p->valid_functions;
 	num_functions = 0;
 	while (valid_functions) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index b0eed8c0b2f2..69ea15892a5b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -1336,6 +1336,9 @@ struct i40e_hw_port_stats {
 #define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE	1024
 #define I40E_SR_CONTROL_WORD_1_SHIFT		0x06
 #define I40E_SR_CONTROL_WORD_1_MASK	(0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_PTR_TYPE				BIT(15)
+#define I40E_SR_OCP_CFG_WORD0			0x2B
+#define I40E_SR_OCP_ENABLED			BIT(15)
 
 /* Shadow RAM related */
 #define I40E_SR_SECTOR_SIZE_IN_WORDS	0x800

From 089915f0f23f1a09b4aecebe88212c5a4fef37fa Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 8 Mar 2018 14:52:11 -0800
Subject: [PATCH 0930/1678] i40e: restore TCPv4 input set when re-enabling ATR

When we re-enable ATR we need to restore the input set for TCPv4
filters, in order for ATR to function correctly. We already do this for
the normal case of re-enabling ATR when disabling ntuple support.
However, when re-enabling ATR after the last TCPv4 filter is removed (but
when ntuple support is still active), we did not restore the TCPv4
filter input set.

This can cause problems if the TCPv4 filters from FDir had changed the
input set, as ATR will no longer behave as expected.

When clearing the ATR auto-disable flag, make sure we restore the TCPv4
input set to avoid this.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index beb854208047..b78c06a1f82c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8156,6 +8156,15 @@ static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
 static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
 {
 	if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
+		/* ATR uses the same filtering logic as SB rules. It only
+		 * functions properly if the input set mask is at the default
+		 * settings. It is safe to restore the default input set
+		 * because there are no active TCPv4 filter rules.
+		 */
+		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
 		pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
 		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))

From 615755a77b2461ed78dfafb8a6649456201949c7 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Mar 2018 10:23:21 -0700
Subject: [PATCH 0931/1678] bpf: extend stackmap to save binary_build_id+offset
 instead of address

Currently, bpf stackmap store address for each entry in the call trace.
To map these addresses to user space files, it is necessary to maintain
the mapping from these virtual address to symbols in the binary. Usually,
the user space profiler (such as perf) has to scan /proc/pid/maps at the
beginning of profiling, and monitor mmap2() calls afterwards. Given the
cost of maintaining the address map, this solution is not practical for
system wide profiling that is always on.

This patch tries to solve this problem with a variation of stackmap. This
variation is enabled by flag BPF_F_STACK_BUILD_ID. Instead of storing
addresses, the variation stores ELF file build_id + offset.

Build ID is a 20-byte unique identifier for ELF files. The following
command shows the Build ID of /bin/bash:

  [user@]$ readelf -n /bin/bash
  ...
    Build ID: XXXXXXXXXX
  ...

With BPF_F_STACK_BUILD_ID, bpf_get_stackid() tries to parse Build ID
for each entry in the call trace, and translate it into the following
struct:

  struct bpf_stack_build_id_offset {
          __s32           status;
          unsigned char   build_id[BPF_BUILD_ID_SIZE];
          union {
                  __u64   offset;
                  __u64   ip;
          };
  };

The search of build_id is limited to the first page of the file, and this
page should be in page cache. Otherwise, we fallback to store ip for this
entry (ip field in struct bpf_stack_build_id_offset). This requires the
build_id to be stored in the first page. A quick survey of binary and
dynamic library files in a few different systems shows that almost all
binary and dynamic library files have build_id in the first page.

Build_id is only meaningful for user stack. If a kernel stack is added to
a stackmap with BPF_F_STACK_BUILD_ID, it will automatically fallback to
only store ip (status == BPF_STACK_BUILD_ID_IP). Similarly, if build_id
lookup failed for some reason, it will also fallback to store ip.

User space can access struct bpf_stack_build_id_offset with bpf
syscall BPF_MAP_LOOKUP_ELEM. It is necessary for user space to
maintain mapping from build id to binary files. This mostly static
mapping is much easier to maintain than per process address maps.

Note: Stackmap with build_id only works in non-nmi context at this time.
This is because we need to take mm->mmap_sem for find_vma(). If this
changes, we would like to allow build_id lookup in nmi context.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h |  22 ++++
 kernel/bpf/stackmap.c    | 257 +++++++++++++++++++++++++++++++++++----
 2 files changed, 257 insertions(+), 22 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a66769e5875..1e15d1724d89 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -231,6 +231,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b0ecf43f5894..57eeb1234b67 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -9,16 +9,19 @@
 #include <linux/filter.h>
 #include <linux/stacktrace.h>
 #include <linux/perf_event.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
 #include "percpu_freelist.h"
 
-#define STACK_CREATE_FLAG_MASK \
-	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+#define STACK_CREATE_FLAG_MASK					\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
+	 BPF_F_STACK_BUILD_ID)
 
 struct stack_map_bucket {
 	struct pcpu_freelist_node fnode;
 	u32 hash;
 	u32 nr;
-	u64 ip[];
+	u64 data[];
 };
 
 struct bpf_stack_map {
@@ -29,6 +32,17 @@ struct bpf_stack_map {
 	struct stack_map_bucket *buckets[];
 };
 
+static inline bool stack_map_use_build_id(struct bpf_map *map)
+{
+	return (map->map_flags & BPF_F_STACK_BUILD_ID);
+}
+
+static inline int stack_map_data_size(struct bpf_map *map)
+{
+	return stack_map_use_build_id(map) ?
+		sizeof(struct bpf_stack_build_id) : sizeof(u64);
+}
+
 static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
 {
 	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
@@ -68,8 +82,16 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    value_size < 8 || value_size % 8 ||
-	    value_size / 8 > sysctl_perf_event_max_stack)
+	    value_size < 8 || value_size % 8)
+		return ERR_PTR(-EINVAL);
+
+	BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
+	if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
+		if (value_size % sizeof(struct bpf_stack_build_id) ||
+		    value_size / sizeof(struct bpf_stack_build_id)
+		    > sysctl_perf_event_max_stack)
+			return ERR_PTR(-EINVAL);
+	} else if (value_size / 8 > sysctl_perf_event_max_stack)
 		return ERR_PTR(-EINVAL);
 
 	/* hash table size must be power of 2 */
@@ -114,13 +136,184 @@ free_smap:
 	return ERR_PTR(err);
 }
 
+#define BPF_BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int stack_map_parse_build_id(void *page_addr,
+					   unsigned char *build_id,
+					   void *note_start,
+					   Elf32_Word note_size)
+{
+	Elf32_Word note_offs = 0, new_offs;
+
+	/* check for overflow */
+	if (note_start < page_addr || note_start + note_size < note_start)
+		return -EINVAL;
+
+	/* only supports note that fits in the first page */
+	if (note_start + note_size > page_addr + PAGE_SIZE)
+		return -EINVAL;
+
+	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+		if (nhdr->n_type == BPF_BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU") &&
+		    nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
+			memcpy(build_id,
+			       note_start + note_offs +
+			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+			       BPF_BUILD_ID_SIZE);
+			return 0;
+		}
+		new_offs = note_offs + sizeof(Elf32_Nhdr) +
+			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+		if (new_offs <= note_offs)  /* overflow */
+			break;
+		note_offs = new_offs;
+	}
+	return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int stack_map_get_build_id_32(void *page_addr,
+				     unsigned char *build_id)
+{
+	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+	Elf32_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i)
+		if (phdr[i].p_type == PT_NOTE)
+			return stack_map_parse_build_id(page_addr, build_id,
+					page_addr + phdr[i].p_offset,
+					phdr[i].p_filesz);
+	return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int stack_map_get_build_id_64(void *page_addr,
+				     unsigned char *build_id)
+{
+	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+	Elf64_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i)
+		if (phdr[i].p_type == PT_NOTE)
+			return stack_map_parse_build_id(page_addr, build_id,
+					page_addr + phdr[i].p_offset,
+					phdr[i].p_filesz);
+	return -EINVAL;
+}
+
+/* Parse build ID of ELF file mapped to vma */
+static int stack_map_get_build_id(struct vm_area_struct *vma,
+				  unsigned char *build_id)
+{
+	Elf32_Ehdr *ehdr;
+	struct page *page;
+	void *page_addr;
+	int ret;
+
+	/* only works for page backed storage  */
+	if (!vma->vm_file)
+		return -EINVAL;
+
+	page = find_get_page(vma->vm_file->f_mapping, 0);
+	if (!page)
+		return -EFAULT;	/* page not mapped */
+
+	ret = -EINVAL;
+	page_addr = page_address(page);
+	ehdr = (Elf32_Ehdr *)page_addr;
+
+	/* compare magic x7f "ELF" */
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+		goto out;
+
+	/* only support executable file and shared object file */
+	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+		goto out;
+
+	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+		ret = stack_map_get_build_id_32(page_addr, build_id);
+	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+		ret = stack_map_get_build_id_64(page_addr, build_id);
+out:
+	put_page(page);
+	return ret;
+}
+
+static void stack_map_get_build_id_offset(struct bpf_map *map,
+					  struct stack_map_bucket *bucket,
+					  u64 *ips, u32 trace_nr, bool user)
+{
+	int i;
+	struct vm_area_struct *vma;
+	struct bpf_stack_build_id *id_offs;
+
+	bucket->nr = trace_nr;
+	id_offs = (struct bpf_stack_build_id *)bucket->data;
+
+	/*
+	 * We cannot do up_read() in nmi context, so build_id lookup is
+	 * only supported for non-nmi events. If at some point, it is
+	 * possible to run find_vma() without taking the semaphore, we
+	 * would like to allow build_id lookup in nmi context.
+	 *
+	 * Same fallback is used for kernel stack (!user) on a stackmap
+	 * with build_id.
+	 */
+	if (!user || !current || !current->mm || in_nmi() ||
+	    down_read_trylock(&current->mm->mmap_sem) == 0) {
+		/* cannot access current->mm, fall back to ips */
+		for (i = 0; i < trace_nr; i++) {
+			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+			id_offs[i].ip = ips[i];
+		}
+		return;
+	}
+
+	for (i = 0; i < trace_nr; i++) {
+		vma = find_vma(current->mm, ips[i]);
+		if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+			/* per entry fall back to ips */
+			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+			id_offs[i].ip = ips[i];
+			continue;
+		}
+		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
+			- vma->vm_start;
+		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+	}
+	up_read(&current->mm->mmap_sem);
+}
+
 BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	   u64, flags)
 {
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
 	struct perf_callchain_entry *trace;
 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
-	u32 max_depth = map->value_size / 8;
+	u32 max_depth = map->value_size / stack_map_data_size(map);
 	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
 	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
@@ -128,6 +321,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	bool user = flags & BPF_F_USER_STACK;
 	bool kernel = !user;
 	u64 *ips;
+	bool hash_matches;
 
 	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
 			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
@@ -156,24 +350,43 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 	id = hash & (smap->n_buckets - 1);
 	bucket = READ_ONCE(smap->buckets[id]);
 
-	if (bucket && bucket->hash == hash) {
-		if (flags & BPF_F_FAST_STACK_CMP)
+	hash_matches = bucket && bucket->hash == hash;
+	/* fast cmp */
+	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
+		return id;
+
+	if (stack_map_use_build_id(map)) {
+		/* for build_id+offset, pop a bucket before slow cmp */
+		new_bucket = (struct stack_map_bucket *)
+			pcpu_freelist_pop(&smap->freelist);
+		if (unlikely(!new_bucket))
+			return -ENOMEM;
+		stack_map_get_build_id_offset(map, new_bucket, ips,
+					      trace_nr, user);
+		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
+		if (hash_matches && bucket->nr == trace_nr &&
+		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
+			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
 			return id;
-		if (bucket->nr == trace_nr &&
-		    memcmp(bucket->ip, ips, trace_len) == 0)
+		}
+		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
+			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+			return -EEXIST;
+		}
+	} else {
+		if (hash_matches && bucket->nr == trace_nr &&
+		    memcmp(bucket->data, ips, trace_len) == 0)
 			return id;
+		if (bucket && !(flags & BPF_F_REUSE_STACKID))
+			return -EEXIST;
+
+		new_bucket = (struct stack_map_bucket *)
+			pcpu_freelist_pop(&smap->freelist);
+		if (unlikely(!new_bucket))
+			return -ENOMEM;
+		memcpy(new_bucket->data, ips, trace_len);
 	}
 
-	/* this call stack is not in the map, try to add it */
-	if (bucket && !(flags & BPF_F_REUSE_STACKID))
-		return -EEXIST;
-
-	new_bucket = (struct stack_map_bucket *)
-		pcpu_freelist_pop(&smap->freelist);
-	if (unlikely(!new_bucket))
-		return -ENOMEM;
-
-	memcpy(new_bucket->ip, ips, trace_len);
 	new_bucket->hash = hash;
 	new_bucket->nr = trace_nr;
 
@@ -212,8 +425,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 	if (!bucket)
 		return -ENOENT;
 
-	trace_len = bucket->nr * sizeof(u64);
-	memcpy(value, bucket->ip, trace_len);
+	trace_len = bucket->nr * stack_map_data_size(map);
+	memcpy(value, bucket->data, trace_len);
 	memset(value + trace_len, 0, map->value_size - trace_len);
 
 	old_bucket = xchg(&smap->buckets[id], bucket);

From 81f77fd0deeb866d65ccc79733df8d2af5217c82 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 14 Mar 2018 10:23:22 -0700
Subject: [PATCH 0932/1678] bpf: add selftest for stackmap with
 BPF_F_STACK_BUILD_ID

test_stacktrace_build_id() is added. It accesses tracepoint urandom_read
with "dd" and "urandom_read" and gathers stack traces. Then it reads the
stack traces from the stackmap.

urandom_read is a statically link binary that reads from /dev/urandom.
test_stacktrace_build_id() calls readelf to read build ID of urandom_read
and compares it with build ID from the stackmap.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h                |  22 +++
 tools/testing/selftests/bpf/Makefile          |  12 +-
 tools/testing/selftests/bpf/test_progs.c      | 164 +++++++++++++++++-
 .../selftests/bpf/test_stacktrace_build_id.c  |  60 +++++++
 tools/testing/selftests/bpf/urandom_read.c    |  22 +++
 5 files changed, 278 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_stacktrace_build_id.c
 create mode 100644 tools/testing/selftests/bpf/urandom_read.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db6bdc375126..1944d0aee7e8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -231,6 +231,28 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY		(1U << 3)
 #define BPF_F_WRONLY		(1U << 4)
 
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8567a858b789..b0d29fd5e51c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -13,6 +13,14 @@ endif
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 LDLIBS += -lcap -lelf -lrt -lpthread
 
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): urandom_read
+
+urandom_read: urandom_read.c
+	$(CC) -o $(TEST_CUSTOM_PROGS) -static $<
+
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
@@ -21,7 +29,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o test_tcpbpf_kern.o
+	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -74,3 +82,5 @@ $(OUTPUT)/%.o: %.c
 	$(CLANG) $(CLANG_FLAGS) \
 		 -O2 -target bpf -emit-llvm -c $< -o - |      \
 	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 27ad5404389e..e9df48b306df 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -841,7 +841,8 @@ static void test_tp_attach_query(void)
 static int compare_map_keys(int map1_fd, int map2_fd)
 {
 	__u32 key, next_key;
-	char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+	char val_buf[PERF_MAX_STACK_DEPTH *
+		     sizeof(struct bpf_stack_build_id)];
 	int err;
 
 	err = bpf_map_get_next_key(map1_fd, NULL, &key);
@@ -964,6 +965,166 @@ out:
 	return;
 }
 
+static int extract_build_id(char *build_id, size_t size)
+{
+	FILE *fp;
+	char *line = NULL;
+	size_t len = 0;
+
+	fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+	if (fp == NULL)
+		return -1;
+
+	if (getline(&line, &len, fp) == -1)
+		goto err;
+	fclose(fp);
+
+	if (len > size)
+		len = size;
+	memcpy(build_id, line, len);
+	build_id[len] = '\0';
+	return 0;
+err:
+	fclose(fp);
+	return -1;
+}
+
+static void test_stacktrace_build_id(void)
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int bytes, efd, err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		goto out;
+
+	/* Get the ID for the sched/sched_switch tracepoint */
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	/* Open the perf event and attach bpf progrram */
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	CHECK(build_id_matches < 1, "build id match",
+	      "Didn't find expected build ID from the map");
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+
+out:
+	return;
+}
+
 int main(void)
 {
 	test_pkt_access();
@@ -976,6 +1137,7 @@ int main(void)
 	test_obj_name();
 	test_tp_attach_query();
 	test_stacktrace_map();
+	test_stacktrace_build_id();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000000..b755bd783ce5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_stack_build_id)
+		* PERF_MAX_STACK_DEPTH,
+	.max_entries = 128,
+	.map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+	unsigned long long pad;
+	int got_bits;
+	int pool_left;
+	int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+	__u32 key = 0, val = 0, *value_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* The size of stackmap and stackid_hmap should be the same */
+	key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+	if ((int)key >= 0)
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000000..4acfdebf36fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+int main(void)
+{
+	int fd = open("/dev/urandom", O_RDONLY);
+	int i;
+	char buf[BUF_SIZE];
+
+	if (fd < 0)
+		return 1;
+	for (i = 0; i < 4; ++i)
+		read(fd, buf, BUF_SIZE);
+
+	close(fd);
+	return 0;
+}

From 0c3d5a96d5e5e6d5662d335a3bdfb76f35433f18 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 Mar 2018 08:07:12 -0700
Subject: [PATCH 0933/1678] net: drivers/net: Remove unnecessary
 skb_copy_expand OOM messages

skb_copy_expand without __GFP_NOWARN already does a dump_stack
on OOM so these messages are redundant.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_spi.c | 1 -
 drivers/net/usb/lg-vl600.c              | 6 +-----
 drivers/net/wimax/i2400m/usb-rx.c       | 3 ---
 drivers/net/wireless/ti/wl1251/tx.c     | 4 +---
 drivers/usb/gadget/function/f_eem.c     | 1 -
 net/mac80211/rx.c                       | 5 +----
 net/netfilter/nfnetlink_queue.c         | 5 +----
 7 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 9c236298fe21..5803cd6db406 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -705,7 +705,6 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
 		tskb = skb_copy_expand(skb, QCAFRM_HEADER_LEN,
 				       QCAFRM_FOOTER_LEN + pad_len, GFP_ATOMIC);
 		if (!tskb) {
-			netdev_dbg(qca->net_dev, "could not allocate tx_buff\n");
 			qca->stats.out_of_mem++;
 			return NETDEV_TX_BUSY;
 		}
diff --git a/drivers/net/usb/lg-vl600.c b/drivers/net/usb/lg-vl600.c
index dbabd7ca5268..257916f172cd 100644
--- a/drivers/net/usb/lg-vl600.c
+++ b/drivers/net/usb/lg-vl600.c
@@ -157,12 +157,8 @@ static int vl600_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 		s->current_rx_buf = skb_copy_expand(skb, 0,
 				le32_to_cpup(&frame->len), GFP_ATOMIC);
-		if (!s->current_rx_buf) {
-			netif_err(dev, ifup, dev->net, "Reserving %i bytes "
-					"for packet assembly failed.\n",
-					le32_to_cpup(&frame->len));
+		if (!s->current_rx_buf)
 			dev->net->stats.rx_errors++;
-		}
 
 		return 0;
 	}
diff --git a/drivers/net/wimax/i2400m/usb-rx.c b/drivers/net/wimax/i2400m/usb-rx.c
index b78ee676e102..5b64bda7d9e7 100644
--- a/drivers/net/wimax/i2400m/usb-rx.c
+++ b/drivers/net/wimax/i2400m/usb-rx.c
@@ -263,9 +263,6 @@ retry:
 		new_skb = skb_copy_expand(rx_skb, 0, rx_size - rx_skb->len,
 					  GFP_KERNEL);
 		if (new_skb == NULL) {
-			if (printk_ratelimit())
-				dev_err(dev, "RX: Can't reallocate skb to %d; "
-					"RX dropped\n", rx_size);
 			kfree_skb(rx_skb);
 			rx_skb = NULL;
 			goto out;	/* drop it...*/
diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c
index de2fa6705574..12ed14ebc307 100644
--- a/drivers/net/wireless/ti/wl1251/tx.c
+++ b/drivers/net/wireless/ti/wl1251/tx.c
@@ -221,10 +221,8 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb,
 			struct sk_buff *newskb = skb_copy_expand(skb, 0, 3,
 								 GFP_KERNEL);
 
-			if (unlikely(newskb == NULL)) {
-				wl1251_error("Can't allocate skb!");
+			if (unlikely(newskb == NULL))
 				return -EINVAL;
-			}
 
 			tx_hdr = (struct tx_double_buffer_desc *) newskb->data;
 
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index 37557651b600..c13befa31110 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -507,7 +507,6 @@ static int eem_unwrap(struct gether *port,
 						0,
 						GFP_ATOMIC);
 			if (unlikely(!skb3)) {
-				DBG(cdev, "unable to realign EEM packet\n");
 				dev_kfree_skb_any(skb2);
 				continue;
 			}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d01743234cf6..9c898a3688c6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2549,11 +2549,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 	fwd_skb = skb_copy_expand(skb, local->tx_headroom +
 				       sdata->encrypt_headroom, 0, GFP_ATOMIC);
-	if (!fwd_skb) {
-		net_info_ratelimited("%s: failed to clone mesh frame\n",
-				    sdata->name);
+	if (!fwd_skb)
 		goto out;
-	}
 
 	fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
 	fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 59e2833c17f1..9f572ed56208 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -833,11 +833,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
 		if (diff > skb_tailroom(e->skb)) {
 			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
 					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "nf_queue: OOM "
-				      "in mangle, dropping packet\n");
+			if (!nskb)
 				return -ENOMEM;
-			}
 			kfree_skb(e->skb);
 			e->skb = nskb;
 		}

From 0aee4c259849099cb07ead6cd7fff74e561d5225 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 12 Mar 2018 14:15:25 -0400
Subject: [PATCH 0934/1678] sctp: Fix double free in sctp_sendmsg_to_asoc

syzbot/kasan detected a double free in sctp_sendmsg_to_asoc:
BUG: KASAN: use-after-free in sctp_association_free+0x7b7/0x930
net/sctp/associola.c:332
Read of size 8 at addr ffff8801d8006ae0 by task syzkaller914861/4202

CPU: 1 PID: 4202 Comm: syzkaller914861 Not tainted 4.16.0-rc4+ #258
Hardware name: Google Google Compute Engine/Google Compute Engine
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 print_address_description+0x73/0x250 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x23c/0x360 mm/kasan/report.c:412
 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
 sctp_association_free+0x7b7/0x930 net/sctp/associola.c:332
 sctp_sendmsg+0xc67/0x1a80 net/sctp/socket.c:2075
 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:763
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:639
 SYSC_sendto+0x361/0x5c0 net/socket.c:1748
 SyS_sendto+0x40/0x50 net/socket.c:1716
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7

This was introduced by commit:
f84af33 sctp: factor out sctp_sendmsg_to_asoc from sctp_sendmsg

As the newly refactored function moved the wait_for_sndbuf call to a
point after the association was connected, allowing for peeloff events
to occur, which in turn caused wait_for_sndbuf to return -EPIPE which
was not caught by the logic that determines if an association should be
freed or not.

Fix it the easy way by returning the ordering of
sctp_primitive_ASSOCIATE and sctp_wait_for_sndbuf to the old order, to
ensure that EPIPE will not happen.

Tested by myself using the syzbot reproducers with positive results

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: davem@davemloft.net
CC: Xin Long <lucien.xin@gmail.com>
Reported-by: syzbot+a4e4112c3aff00c8cfd8@syzkaller.appspotmail.com
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aeecdd620c45..7a10ae3c3d82 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1883,6 +1883,19 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		goto err;
 	}
 
+	if (asoc->pmtu_pending)
+		sctp_assoc_pending_pmtu(asoc);
+
+	if (sctp_wspace(asoc) < msg_len)
+		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
+
+	if (!sctp_wspace(asoc)) {
+		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+		if (err)
+			goto err;
+	}
+
 	if (sctp_state(asoc, CLOSED)) {
 		err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
 		if (err)
@@ -1900,19 +1913,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		pr_debug("%s: we associated primitively\n", __func__);
 	}
 
-	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(asoc);
-
-	if (sctp_wspace(asoc) < msg_len)
-		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
-
-	if (!sctp_wspace(asoc)) {
-		timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
-		if (err)
-			goto err;
-	}
-
 	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
 	if (IS_ERR(datamsg)) {
 		err = PTR_ERR(datamsg);

From 90126e3a40843de369a496db160ab22bdd6f4c48 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 15 Mar 2018 23:26:14 -0700
Subject: [PATCH 0935/1678] tools: bpftool: fix dependency file path

Auto-generated dependency files are in the OUTPUT directory,
we need to include them from there.  This fixes object files
not being rebuilt after header changes.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4c2867481f5c..50a715ac9e8b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -70,7 +70,7 @@ ifeq ($(feature-disassembler-four-args), 1)
 CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
 endif
 
-include $(wildcard *.d)
+include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
 

From d5fc73dceb060688f01c2ee225a2b42cf47352ba Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 15 Mar 2018 23:26:15 -0700
Subject: [PATCH 0936/1678] tools: bpftool: fix potential format truncation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC 7 complains:

xlated_dumper.c: In function â€˜print_callâ€™:
xlated_dumper.c:179:10: warning: â€˜%sâ€™ directive output may be truncated writing up to 255 bytes into a region of size between 249 and 253 [-Wformat-truncation=]
     "%+d#%s", insn->off, sym->name);

Add a bit more space to the buffer so it can handle the entire
string and integer without truncation.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/xlated_dumper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index 51c935d38ae2..b34affa7ef2d 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,7 +49,7 @@ struct dump_data {
 	unsigned long address_call_base;
 	struct kernel_sym *sym_mapping;
 	__u32 sym_count;
-	char scratch_buff[SYM_MAX_NAME];
+	char scratch_buff[SYM_MAX_NAME + 8];
 };
 
 void kernel_syms_load(struct dump_data *dd);

From 8050ea4653c21e14681d9e00390c6886556a2640 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 15 Mar 2018 23:26:16 -0700
Subject: [PATCH 0937/1678] tools: bpf: cleanup PHONY target

There is no FORCE target in the Makefile and some of the PHONY
targets are missing, update the list.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index c07b4e718eeb..acfaf4c8cc32 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -100,4 +100,4 @@ bpftool_install:
 bpftool_clean:
 	$(call descend,bpftool,clean)
 
-.PHONY: bpftool FORCE
+.PHONY: all install clean bpftool bpftool_install bpftool_clean

From cc5b3403f0248ca5f40539d33f0e127ccca25dd2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 15 Mar 2018 23:26:17 -0700
Subject: [PATCH 0938/1678] tools: bpf: remove feature detection output

bpf tools use feature detection for libbfd dependency, clean up
the output files on make clean.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/Makefile         | 2 ++
 tools/bpf/bpftool/Makefile | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index acfaf4c8cc32..1ea545965ee3 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -81,6 +81,8 @@ clean: bpftool_clean
 	$(call QUIET_CLEAN, bpf-progs)
 	$(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+	$(call QUIET_CLEAN, core-gen)
+	$(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
 
 install: $(PROGS) bpftool_install
 	$(call QUIET_INSTALL, bpf_jit_disasm)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 50a715ac9e8b..4e69782c4a79 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -89,6 +89,8 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+	$(call QUIET_CLEAN, core-gen)
+	$(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
 
 install: $(OUTPUT)bpftool
 	$(call QUIET_INSTALL, bpftool)

From 650b4eca47399420a1644056c41b66735e250d35 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 12 Mar 2018 17:25:38 +0000
Subject: [PATCH 0939/1678] rxrpc: remove redundant initialization of variable
 'len'

The variable 'len' is being initialized with a value that is never
read and it is re-assigned later, hence the initialization is redundant
and can be removed.

Cleans up clang warning:
net/rxrpc/recvmsg.c:275:15: warning: Value stored to 'len' during its
initialization is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/recvmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9d45d8b56744..7bff716e911e 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -272,7 +272,7 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     unsigned int *_offset, unsigned int *_len)
 {
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
-	unsigned int len = *_len;
+	unsigned int len;
 	int ret;
 	u8 annotation = *_annotation;
 

From ce3db6aade20e0aef88878ef65f3e066175bf3bc Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Tue, 13 Mar 2018 05:24:33 -0700
Subject: [PATCH 0940/1678] enic: drop IP proto check for vxlan tunnel delete

Commit d11790941dd3 ("enic: Add vxlan offload support for IPv6 pkts")
added vxlan offload support for IPv6 pkts. Required change in
enic_udp_tunnel_del was not made. This creates a bug where once user
adds IPv6 tunnel, hw offload for that cannot be deleted.

This patch removes check for IP proto in tunnel delete path. Driver need
not check for IP proto since same UDP port cannot be used to create two
tunnels.

Fixes: d11790941dd3 ("enic: Add vxlan offload support for IPv6 pkts")
Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index a25fb95492a0..81684acf52af 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -251,9 +251,8 @@ static void enic_udp_tunnel_del(struct net_device *netdev,
 
 	spin_lock_bh(&enic->devcmd_lock);
 
-	if ((ti->sa_family != AF_INET) ||
-	    ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number)) ||
-	    (ti->type != UDP_TUNNEL_TYPE_VXLAN)) {
+	if ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number) ||
+	    ti->type != UDP_TUNNEL_TYPE_VXLAN) {
 		netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded",
 			    ntohs(ti->port), ti->sa_family, ti->type);
 		goto unlock;

From 2b221d20db7583fa8f36d4545554786fcf93ca8e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 13 Mar 2018 12:24:19 -0700
Subject: [PATCH 0941/1678] doc: remove out of date links and info from packet
 mmap

The packet_mmap documentation had links to no longer existing web
sites; replace with other site which has similar example.

Support for packet mmap has been in mainline versions of libpcap
for several years.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index bf654845556e..999eb41da81d 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -7,15 +7,12 @@ socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
 i) capture network traffic with utilities like tcpdump, ii) transmit network
 traffic, or any other that needs raw access to network interface.
 
-You can find the latest version of this document at:
-    http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap
-
 Howto can be found at:
-    http://wiki.gnu-log.net (packet_mmap)
+    https://sites.google.com/site/packetmmap/
 
 Please send your comments to
     Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
-    Johann Baudy <johann.baudy@gnu-log.net>
+    Johann Baudy
 
 -------------------------------------------------------------------------------
 + Why use PACKET_MMAP
@@ -51,17 +48,8 @@ From the user standpoint, you should use the higher level libpcap library, which
 is a de facto standard, portable across nearly all operating systems
 including Win32. 
 
-Said that, at time of this writing, official libpcap 0.8.1 is out and doesn't include
-support for PACKET_MMAP, and also probably the libpcap included in your distribution. 
-
-I'm aware of two implementations of PACKET_MMAP in libpcap:
-
-    http://wiki.ipxwarzone.com/		     (by Simon Patarin, based on libpcap 0.6.2)
-    http://public.lanl.gov/cpw/              (by Phil Wood, based on lastest libpcap)
-
-The rest of this document is intended for people who want to understand
-the low level details or want to improve libpcap by including PACKET_MMAP
-support.
+Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
+TPACKET_V3 support was added in version 1.5.0
 
 --------------------------------------------------------------------------------
 + How to use mmap() directly to improve capture process
@@ -174,7 +162,7 @@ As capture, each frame contains two parts:
  /* bind socket to eth0 */
  bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
 
- A complete tutorial is available at: http://wiki.gnu-log.net/
+ A complete tutorial is available at: https://sites.google.com/site/packetmmap/
 
 By default, the user should put data at :
  frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)

From 739de9a1563a19459c82d0d8c4d1cc8eb55ddf4e Mon Sep 17 00:00:00 2001
From: Brad Mouring <brad.mouring@ni.com>
Date: Tue, 13 Mar 2018 16:32:13 -0500
Subject: [PATCH 0942/1678] net: macb: Reorganize macb_mii bringup

The macb mii setup (mii_probe() and mii_init()) previously was
somewhat interspersed, likely a result of organic growth and hacking.

This change moves mii bus registration into mii_init and probing the
bus for devices into mii_probe.

Signed-off-by: Brad Mouring <brad.mouring@ni.com>
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 79 ++++++++++++------------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index e84afcf1ecb5..4b514c705e57 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -472,8 +472,42 @@ static int macb_mii_probe(struct net_device *dev)
 	struct macb *bp = netdev_priv(dev);
 	struct macb_platform_data *pdata;
 	struct phy_device *phydev;
-	int phy_irq;
-	int ret;
+	struct device_node *np;
+	int phy_irq, ret, i;
+
+	pdata = dev_get_platdata(&bp->pdev->dev);
+	np = bp->pdev->dev.of_node;
+	ret = 0;
+
+	if (np) {
+		if (of_phy_is_fixed_link(np)) {
+			if (of_phy_register_fixed_link(np) < 0) {
+				dev_err(&bp->pdev->dev,
+					"broken fixed-link specification\n");
+				return -ENODEV;
+			}
+			bp->phy_node = of_node_get(np);
+		} else {
+			/* fallback to standard phy registration if no phy were
+			 * found during dt phy registration
+			 */
+			if (!phy_find_first(bp->mii_bus)) {
+				for (i = 0; i < PHY_MAX_ADDR; i++) {
+					struct phy_device *phydev;
+
+					phydev = mdiobus_scan(bp->mii_bus, i);
+					if (IS_ERR(phydev) &&
+					    PTR_ERR(phydev) != -ENODEV) {
+						ret = PTR_ERR(phydev);
+						break;
+					}
+				}
+
+				if (ret)
+					return -ENODEV;
+			}
+		}
+	}
 
 	if (bp->phy_node) {
 		phydev = of_phy_connect(dev, bp->phy_node,
@@ -488,7 +522,6 @@ static int macb_mii_probe(struct net_device *dev)
 			return -ENXIO;
 		}
 
-		pdata = dev_get_platdata(&bp->pdev->dev);
 		if (pdata) {
 			if (gpio_is_valid(pdata->phy_irq_pin)) {
 				ret = devm_gpio_request(&bp->pdev->dev,
@@ -533,7 +566,7 @@ static int macb_mii_init(struct macb *bp)
 {
 	struct macb_platform_data *pdata;
 	struct device_node *np;
-	int err = -ENXIO, i;
+	int err, i;
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -556,39 +589,9 @@ static int macb_mii_init(struct macb *bp)
 	dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
 
 	np = bp->pdev->dev.of_node;
+
 	if (np) {
-		if (of_phy_is_fixed_link(np)) {
-			if (of_phy_register_fixed_link(np) < 0) {
-				dev_err(&bp->pdev->dev,
-					"broken fixed-link specification\n");
-				goto err_out_unregister_bus;
-			}
-			bp->phy_node = of_node_get(np);
-
-			err = mdiobus_register(bp->mii_bus);
-		} else {
-			/* try dt phy registration */
-			err = of_mdiobus_register(bp->mii_bus, np);
-
-			/* fallback to standard phy registration if no phy were
-			 * found during dt phy registration
-			 */
-			if (!err && !phy_find_first(bp->mii_bus)) {
-				for (i = 0; i < PHY_MAX_ADDR; i++) {
-					struct phy_device *phydev;
-
-					phydev = mdiobus_scan(bp->mii_bus, i);
-					if (IS_ERR(phydev) &&
-					    PTR_ERR(phydev) != -ENODEV) {
-						err = PTR_ERR(phydev);
-						break;
-					}
-				}
-
-				if (err)
-					goto err_out_unregister_bus;
-			}
-		}
+		err = of_mdiobus_register(bp->mii_bus, np);
 	} else {
 		for (i = 0; i < PHY_MAX_ADDR; i++)
 			bp->mii_bus->irq[i] = PHY_POLL;
@@ -610,10 +613,10 @@ static int macb_mii_init(struct macb *bp)
 
 err_out_unregister_bus:
 	mdiobus_unregister(bp->mii_bus);
-err_out_free_mdiobus:
-	of_node_put(bp->phy_node);
 	if (np && of_phy_is_fixed_link(np))
 		of_phy_deregister_fixed_link(np);
+err_out_free_mdiobus:
+	of_node_put(bp->phy_node);
 	mdiobus_free(bp->mii_bus);
 err_out:
 	return err;

From cb732e9a26f336ddd3e7ef4dba84ce190d1e175e Mon Sep 17 00:00:00 2001
From: Brad Mouring <brad.mouring@ni.com>
Date: Tue, 13 Mar 2018 16:32:14 -0500
Subject: [PATCH 0943/1678] net: macb: Remove redundant poll irq assignment

In phy_device's general probe, this device will already be set for
phy register polling, rendering this code redundant.

Signed-off-by: Brad Mouring <brad.mouring@ni.com>
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 4b514c705e57..db1dc301bed3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -566,7 +566,7 @@ static int macb_mii_init(struct macb *bp)
 {
 	struct macb_platform_data *pdata;
 	struct device_node *np;
-	int err, i;
+	int err;
 
 	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
@@ -593,9 +593,6 @@ static int macb_mii_init(struct macb *bp)
 	if (np) {
 		err = of_mdiobus_register(bp->mii_bus, np);
 	} else {
-		for (i = 0; i < PHY_MAX_ADDR; i++)
-			bp->mii_bus->irq[i] = PHY_POLL;
-
 		if (pdata)
 			bp->mii_bus->phy_mask = pdata->phy_mask;
 

From 2105a5d3acd778e20634f25950cc9254ea408efe Mon Sep 17 00:00:00 2001
From: Brad Mouring <brad.mouring@ni.com>
Date: Tue, 13 Mar 2018 16:32:15 -0500
Subject: [PATCH 0944/1678] net: macb: Add phy-handle DT support

This optional binding (as described in the ethernet DT bindings doc)
directs the netdev to the phydev to use. This is useful for a phy
chip that has >1 phy in it, and two netdevs are using the same phy
chip (i.e. the second mac's phy lives on the first mac's MDIO bus)

The devicetree snippet would look something like this:

ethernet@feedf00d {
	...
	phy-handle = <&phy0> // the first netdev is physically wired to phy0
	...
	phy0: phy@0 {
		...
		reg = <0x0> // MDIO address 0
		...
	}
	phy1: phy@1 {
		...
		reg = <0x1> // MDIO address 1
		...
	}
...
}

ethernet@deadbeef {
	...
	phy-handle = <&phy1> // tells the driver to use phy1 on the
						 // first mac's mdio bus (it's wired thusly)
	...
}

The work done to add the phy_node in the first place (dacdbb4dfc1a1:
"net: macb: add fixed-link node support") will consume the
device_node (if found).

Signed-off-by: Brad Mouring <brad.mouring@ni.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index db1dc301bed3..d09bd43680b3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -488,10 +488,12 @@ static int macb_mii_probe(struct net_device *dev)
 			}
 			bp->phy_node = of_node_get(np);
 		} else {
-			/* fallback to standard phy registration if no phy were
-			 * found during dt phy registration
+			bp->phy_node = of_parse_phandle(np, "phy-handle", 0);
+			/* fallback to standard phy registration if no
+			 * phy-handle was found nor any phy found during
+			 * dt phy registration
 			 */
-			if (!phy_find_first(bp->mii_bus)) {
+			if (!bp->phy_node && !phy_find_first(bp->mii_bus)) {
 				for (i = 0; i < PHY_MAX_ADDR; i++) {
 					struct phy_device *phydev;
 

From f3b249e652792290cea1db45fa58e51c69cdd68e Mon Sep 17 00:00:00 2001
From: Brad Mouring <brad.mouring@ni.com>
Date: Tue, 13 Mar 2018 16:32:16 -0500
Subject: [PATCH 0945/1678] Documentation: macb: Document phy-handle binding

Document the existence of the optional binding, directing to the
general ethernet document that describes this binding.

Signed-off-by: Brad Mouring <brad.mouring@ni.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/macb.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 27966ae741e0..457d5ae16f23 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -29,6 +29,7 @@ Optional properties for PHY child node:
 - reset-gpios : Should specify the gpio for phy reset
 - magic-packet : If present, indicates that the hardware supports waking
   up via magic packet.
+- phy-handle : see ethernet.txt file in the same directory
 
 Examples:
 

From 9fbb704c3385adf5f9adfce7fd3c0bf31aa4da6d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Mar 2018 08:29:36 -0700
Subject: [PATCH 0946/1678] net/ipv6: Refactor gateway validation on route add

Move gateway validation code from ip6_route_info_create into
ip6_validate_gw. Code move plus adjustments to handle the potential
reset of dev and idev and to make checkpatch happy.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 120 ++++++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 54 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 81711e3e2604..23ced851fdb1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2550,7 +2550,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 
 static int ip6_route_check_nh_onlink(struct net *net,
 				     struct fib6_config *cfg,
-				     struct net_device *dev,
+				     const struct net_device *dev,
 				     struct netlink_ext_ack *extack)
 {
 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2626,6 +2626,68 @@ out:
 	return err;
 }
 
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+			   struct net_device **_dev, struct inet6_dev **idev,
+			   struct netlink_ext_ack *extack)
+{
+	const struct in6_addr *gw_addr = &cfg->fc_gateway;
+	int gwa_type = ipv6_addr_type(gw_addr);
+	const struct net_device *dev = *_dev;
+	int err = -EINVAL;
+
+	/* if gw_addr is local we will fail to detect this in case
+	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
+	 * will return already-added prefix route via interface that
+	 * prefix route was assigned to, which might be non-loopback.
+	 */
+	if (ipv6_chk_addr_and_flags(net, gw_addr,
+				    gwa_type & IPV6_ADDR_LINKLOCAL ?
+				    dev : NULL, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Invalid gateway address");
+		goto out;
+	}
+
+	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+		/* IPv6 strictly inhibits using not link-local
+		 * addresses as nexthop address.
+		 * Otherwise, router will not able to send redirects.
+		 * It is very good, but in some (rare!) circumstances
+		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
+		 * some exceptions. --ANK
+		 * We allow IPv4-mapped nexthops to support RFC4798-type
+		 * addressing
+		 */
+		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+			NL_SET_ERR_MSG(extack, "Invalid gateway address");
+			goto out;
+		}
+
+		if (cfg->fc_flags & RTNH_F_ONLINK)
+			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+		else
+			err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+		if (err)
+			goto out;
+	}
+
+	/* reload in case device was changed */
+	dev = *_dev;
+
+	err = -EINVAL;
+	if (!dev) {
+		NL_SET_ERR_MSG(extack, "Egress device not specified");
+		goto out;
+	} else if (dev->flags & IFF_LOOPBACK) {
+		NL_SET_ERR_MSG(extack,
+			       "Egress device can not be loopback device for this route");
+		goto out;
+	}
+	err = 0;
+out:
+	return err;
+}
+
 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 					      struct netlink_ext_ack *extack)
 {
@@ -2808,61 +2870,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	}
 
 	if (cfg->fc_flags & RTF_GATEWAY) {
-		const struct in6_addr *gw_addr;
-		int gwa_type;
-
-		gw_addr = &cfg->fc_gateway;
-		gwa_type = ipv6_addr_type(gw_addr);
-
-		/* if gw_addr is local we will fail to detect this in case
-		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
-		 * will return already-added prefix route via interface that
-		 * prefix route was assigned to, which might be non-loopback.
-		 */
-		err = -EINVAL;
-		if (ipv6_chk_addr_and_flags(net, gw_addr,
-					    gwa_type & IPV6_ADDR_LINKLOCAL ?
-					    dev : NULL, 0, 0)) {
-			NL_SET_ERR_MSG(extack, "Invalid gateway address");
+		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+		if (err)
 			goto out;
-		}
-		rt->rt6i_gateway = *gw_addr;
 
-		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
-			/* IPv6 strictly inhibits using not link-local
-			   addresses as nexthop address.
-			   Otherwise, router will not able to send redirects.
-			   It is very good, but in some (rare!) circumstances
-			   (SIT, PtP, NBMA NOARP links) it is handy to allow
-			   some exceptions. --ANK
-			   We allow IPv4-mapped nexthops to support RFC4798-type
-			   addressing
-			 */
-			if (!(gwa_type & (IPV6_ADDR_UNICAST |
-					  IPV6_ADDR_MAPPED))) {
-				NL_SET_ERR_MSG(extack,
-					       "Invalid gateway address");
-				goto out;
-			}
-
-			if (cfg->fc_flags & RTNH_F_ONLINK) {
-				err = ip6_route_check_nh_onlink(net, cfg, dev,
-								extack);
-			} else {
-				err = ip6_route_check_nh(net, cfg, &dev, &idev);
-			}
-			if (err)
-				goto out;
-		}
-		err = -EINVAL;
-		if (!dev) {
-			NL_SET_ERR_MSG(extack, "Egress device not specified");
-			goto out;
-		} else if (dev->flags & IFF_LOOPBACK) {
-			NL_SET_ERR_MSG(extack,
-				       "Egress device can not be loopback device for this route");
-			goto out;
-		}
+		rt->rt6i_gateway = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;

From 232378e8db4780bc7145d7a0ee47f5f80a41ad6b Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Mar 2018 08:29:37 -0700
Subject: [PATCH 0947/1678] net/ipv6: Change address check to always take a
 device argument

ipv6_chk_addr_and_flags determines if an address is a local address and
optionally if it is an address on a specific device. For example, it is
called by ip6_route_info_create to determine if a given gateway address
is a local address. The address check currently does not consider L3
domains and as a result does not allow a route to be added in one VRF
if the nexthop points to an address in a second VRF. e.g.,

    $ ip route add 2001:db8:1::/64 vrf r2 via 2001:db8:102::23
    Error: Invalid gateway address.

where 2001:db8:102::23 is an address on an interface in vrf r1.

ipv6_chk_addr_and_flags needs to allow callers to always pass in a device
with a separate argument to not limit the address to the specific device.
The device is used used to determine the L3 domain of interest.

To that end add an argument to skip the device check and update callers
to always pass a device where possible and use the new argument to mean
any address in the domain.

Update a handful of users of ipv6_chk_addr with a NULL dev argument. This
patch handles the change to these callers without adding the domain check.

ip6_validate_gw needs to handle 2 cases - one where the device is given
as part of the nexthop spec and the other where the device is resolved.
There is at least 1 VRF case where deferring the check to only after
the route lookup has resolved the device fails with an unintuitive error
"RTNETLINK answers: No route to host" as opposed to the preferred
"Error: Gateway can not be a local address." The 'no route to host'
error is because of the fallback to a full lookup. The check is done
twice to avoid this error.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  4 ++--
 net/ipv6/addrconf.c    | 11 ++++++++---
 net/ipv6/anycast.c     |  9 ++++++---
 net/ipv6/datagram.c    |  5 +++--
 net/ipv6/ip6_tunnel.c  | 12 ++++++++----
 net/ipv6/ndisc.c       |  2 +-
 net/ipv6/route.c       | 19 +++++++++++++++----
 7 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c4185a7b0e90..132e5b95167a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict);
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags);
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b5fd116c046a..0677b9732d56 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1851,19 +1851,24 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict)
 {
-	return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+	return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+				       strict, IFA_F_TENTATIVE);
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
-			    const struct net_device *dev, int strict,
-			    u32 banned_flags)
+			    const struct net_device *dev, bool skip_dev_check,
+			    int strict, u32 banned_flags)
 {
 	unsigned int hash = inet6_addr_hash(net, addr);
 	struct inet6_ifaddr *ifp;
 	u32 ifp_flags;
 
 	rcu_read_lock();
+
+	if (skip_dev_check)
+		dev = NULL;
+
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index c61718dba2e6..d580d4d456a5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -EPERM;
 	if (ipv6_addr_is_multicast(addr))
 		return -EINVAL;
-	if (ipv6_chk_addr(net, addr, NULL, 0))
+
+	if (ifindex)
+		dev = __dev_get_by_index(net, ifindex);
+
+	if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
 		return -EINVAL;
 
 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			dev = __dev_get_by_flags(net, IFF_UP,
 						 IFF_UP | IFF_LOOPBACK);
 		}
-	} else
-		dev = __dev_get_by_index(net, ifindex);
+	}
 
 	if (!dev) {
 		err = -ENODEV;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..b27333d7b099 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
 				if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
-				    !ipv6_chk_addr(net, &src_info->ipi6_addr,
-						   strict ? dev : NULL, 0) &&
+				    !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+							     dev, !strict, 0,
+							     IFA_F_TENTATIVE) &&
 				    !ipv6_chk_acast_addr_src(net, dev,
 							     &src_info->ipi6_addr))
 					err = -EINVAL;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5c045fa407da..456fcf942f95 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 			ldev = dev_get_by_index_rcu(net, p->link);
 
 		if ((ipv6_addr_is_multicast(laddr) ||
-		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+		     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						    0, IFA_F_TENTATIVE))) &&
 		    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
-		     likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+		     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+						     0, IFA_F_TENTATIVE))))
 			ret = 1;
 	}
 	return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 		if (p->link)
 			ldev = dev_get_by_index_rcu(net, p->link);
 
-		if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+		if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+						      0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Local address not yet configured!\n",
 				p->name);
 		else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
 			 !ipv6_addr_is_multicast(raddr) &&
-			 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+			 unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+							  true, 0, IFA_F_TENTATIVE)))
 			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
 				p->name);
 		else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8af5eef464c1..10024eb0c521 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	int probes = atomic_read(&neigh->probes);
 
 	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
-					   dev, 1,
+					   dev, false, 1,
 					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
 		saddr = &ipv6_hdr(skb)->saddr;
 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 23ced851fdb1..939d122e71b4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2632,7 +2632,9 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 {
 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
 	int gwa_type = ipv6_addr_type(gw_addr);
+	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
 	const struct net_device *dev = *_dev;
+	bool need_addr_check = !dev;
 	int err = -EINVAL;
 
 	/* if gw_addr is local we will fail to detect this in case
@@ -2640,10 +2642,9 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 	 * will return already-added prefix route via interface that
 	 * prefix route was assigned to, which might be non-loopback.
 	 */
-	if (ipv6_chk_addr_and_flags(net, gw_addr,
-				    gwa_type & IPV6_ADDR_LINKLOCAL ?
-				    dev : NULL, 0, 0)) {
-		NL_SET_ERR_MSG(extack, "Invalid gateway address");
+	if (dev &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
 		goto out;
 	}
 
@@ -2683,6 +2684,16 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
 			       "Egress device can not be loopback device for this route");
 		goto out;
 	}
+
+	/* if we did not check gw_addr above, do so now that the
+	 * egress device has been resolved.
+	 */
+	if (need_addr_check &&
+	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+		goto out;
+	}
+
 	err = 0;
 out:
 	return err;

From 1893ff20275b9b9be4892b5b5785788ce45abdea Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Mar 2018 08:29:38 -0700
Subject: [PATCH 0948/1678] net/ipv6: Add l3mdev check to
 ipv6_chk_addr_and_flags

Lookup the L3 master device for the passed in device. Only consider
addresses on netdev's with the same master device. If the device is
not enslaved or is NULL, then the l3mdev is NULL which means only
devices not enslaved (ie, in the default domain) are considered.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0677b9732d56..6fd4bbdc444f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1856,22 +1856,37 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ *   1. does the address exist in the L3 domain that dev is part of
+ *      (skip_dev_check = true), or
+ *
+ *   2. does the address exist on the specific device
+ *      (skip_dev_check = false)
+ */
 int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
 			    const struct net_device *dev, bool skip_dev_check,
 			    int strict, u32 banned_flags)
 {
 	unsigned int hash = inet6_addr_hash(net, addr);
+	const struct net_device *l3mdev;
 	struct inet6_ifaddr *ifp;
 	u32 ifp_flags;
 
 	rcu_read_lock();
 
+	l3mdev = l3mdev_master_dev_rcu(dev);
 	if (skip_dev_check)
 		dev = NULL;
 
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
+
+		if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+			continue;
+
 		/* Decouple optimistic from tentative for evaluation here.
 		 * Ban optimistic addresses explicitly, when required.
 		 */

From 171a48717beeacb66be85c1e8e401111d3a0262c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Mar 2018 08:29:39 -0700
Subject: [PATCH 0949/1678] selftests: fib_tests: Use an alias for ip command

Replace 'ip -netns testns' with the alias IP. Shortens the line lengths
and makes running the commands manually a bit easier.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 169 ++++++++++++-----------
 1 file changed, 85 insertions(+), 84 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index b617985ecdc1..953254439e39 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -7,6 +7,7 @@
 ret=0
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
 
 log_test()
 {
@@ -32,19 +33,19 @@ setup()
 {
 	set -e
 	ip netns add testns
-	ip -netns testns link set dev lo up
+	$IP link set dev lo up
 
-	ip -netns testns link add dummy0 type dummy
-	ip -netns testns link set dev dummy0 up
-	ip -netns testns address add 198.51.100.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add 198.51.100.1/24 dev dummy0
+	$IP -6 address add 2001:db8:1::1/64 dev dummy0
 	set +e
 
 }
 
 cleanup()
 {
-	ip -netns testns link del dev dummy0 &> /dev/null
+	$IP link del dev dummy0 &> /dev/null
 	ip netns del testns
 }
 
@@ -56,19 +57,19 @@ fib_unreg_unicast_test()
 	setup
 
 	echo "    Start point"
-	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
 	set -e
-	ip -netns testns link del dev dummy0
+	$IP link del dev dummy0
 	set +e
 
 	echo "    Nexthop device deleted"
-	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
 	log_test $? 2 "IPv4 fibmatch - no route"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 2 "IPv6 fibmatch - no route"
 
 	cleanup
@@ -83,43 +84,43 @@ fib_unreg_multipath_test()
 	setup
 
 	set -e
-	ip -netns testns link add dummy1 type dummy
-	ip -netns testns link set dev dummy1 up
-	ip -netns testns address add 192.0.2.1/24 dev dummy1
-	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip -netns testns route add 203.0.113.0/24 \
+	$IP route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip -netns testns -6 route add 2001:db8:3::/64 \
+	$IP -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
 	set +e
 
 	echo "    Start point"
-	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
 	set -e
-	ip -netns testns link del dev dummy0
+	$IP link del dev dummy0
 	set +e
 
 	echo "    One nexthop device deleted"
-	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
 	log_test $? 2 "IPv4 - multipath route removed on delete"
 
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	# In IPv6 we do not flush the entire multipath route.
 	log_test $? 0 "IPv6 - multipath down to single path"
 
 	set -e
-	ip -netns testns link del dev dummy1
+	$IP link del dev dummy1
 	set +e
 
 	echo "    Second nexthop device deleted"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	log_test $? 2 "IPv6 - no route"
 
 	cleanup
@@ -139,19 +140,19 @@ fib_down_unicast_test()
 	setup
 
 	echo "    Start point"
-	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
 	set -e
-	ip -netns testns link set dev dummy0 down
+	$IP link set dev dummy0 down
 	set +e
 
 	echo "    Route deleted on down"
-	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
 	log_test $? 2 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 2 "IPv6 fibmatch"
 
 	cleanup
@@ -162,31 +163,31 @@ fib_down_multipath_test_do()
 	local down_dev=$1
 	local up_dev=$2
 
-	ip -netns testns route get fibmatch 203.0.113.1 \
+	$IP route get fibmatch 203.0.113.1 \
 		oif $down_dev &> /dev/null
 	log_test $? 2 "IPv4 fibmatch on down device"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
+	$IP -6 route get fibmatch 2001:db8:3::1 \
 		oif $down_dev &> /dev/null
 	log_test $? 2 "IPv6 fibmatch on down device"
 
-	ip -netns testns route get fibmatch 203.0.113.1 \
+	$IP route get fibmatch 203.0.113.1 \
 		oif $up_dev &> /dev/null
 	log_test $? 0 "IPv4 fibmatch on up device"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 \
+	$IP -6 route get fibmatch 2001:db8:3::1 \
 		oif $up_dev &> /dev/null
 	log_test $? 0 "IPv6 fibmatch on up device"
 
-	ip -netns testns route get fibmatch 203.0.113.1 | \
+	$IP route get fibmatch 203.0.113.1 | \
 		grep $down_dev | grep -q "dead linkdown"
 	log_test $? 0 "IPv4 flags on down device"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
 		grep $down_dev | grep -q "dead linkdown"
 	log_test $? 0 "IPv6 flags on down device"
 
-	ip -netns testns route get fibmatch 203.0.113.1 | \
+	$IP route get fibmatch 203.0.113.1 | \
 		grep $up_dev | grep -q "dead linkdown"
 	log_test $? 1 "IPv4 flags on up device"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 | \
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
 		grep $up_dev | grep -q "dead linkdown"
 	log_test $? 1 "IPv6 flags on up device"
 }
@@ -199,53 +200,53 @@ fib_down_multipath_test()
 	setup
 
 	set -e
-	ip -netns testns link add dummy1 type dummy
-	ip -netns testns link set dev dummy1 up
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
 
-	ip -netns testns address add 192.0.2.1/24 dev dummy1
-	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy1
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
 
-	ip -netns testns route add 203.0.113.0/24 \
+	$IP route add 203.0.113.0/24 \
 		nexthop via 198.51.100.2 dev dummy0 \
 		nexthop via 192.0.2.2 dev dummy1
-	ip -netns testns -6 route add 2001:db8:3::/64 \
+	$IP -6 route add 2001:db8:3::/64 \
 		nexthop via 2001:db8:1::2 dev dummy0 \
 		nexthop via 2001:db8:2::2 dev dummy1
 	set +e
 
 	echo "    Verify start point"
-	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
 
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
 	set -e
-	ip -netns testns link set dev dummy0 down
+	$IP link set dev dummy0 down
 	set +e
 
 	echo "    One device down, one up"
 	fib_down_multipath_test_do "dummy0" "dummy1"
 
 	set -e
-	ip -netns testns link set dev dummy0 up
-	ip -netns testns link set dev dummy1 down
+	$IP link set dev dummy0 up
+	$IP link set dev dummy1 down
 	set +e
 
 	echo "    Other device down and up"
 	fib_down_multipath_test_do "dummy1" "dummy0"
 
 	set -e
-	ip -netns testns link set dev dummy0 down
+	$IP link set dev dummy0 down
 	set +e
 
 	echo "    Both devices down"
-	ip -netns testns route get fibmatch 203.0.113.1 &> /dev/null
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
 	log_test $? 2 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
 	log_test $? 2 "IPv6 fibmatch"
 
-	ip -netns testns link del dev dummy1
+	$IP link del dev dummy1
 	cleanup
 }
 
@@ -264,55 +265,55 @@ fib_carrier_local_test()
 	setup
 
 	set -e
-	ip -netns testns link set dev dummy0 carrier on
+	$IP link set dev dummy0 carrier on
 	set +e
 
 	echo "    Start point"
-	ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
-	ip -netns testns route get fibmatch 198.51.100.1 | \
+	$IP route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv4 - no linkdown flag"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv6 - no linkdown flag"
 
 	set -e
-	ip -netns testns link set dev dummy0 carrier off
+	$IP link set dev dummy0 carrier off
 	sleep 1
 	set +e
 
 	echo "    Carrier off on nexthop"
-	ip -netns testns route get fibmatch 198.51.100.1 &> /dev/null
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
-	ip -netns testns route get fibmatch 198.51.100.1 | \
+	$IP route get fibmatch 198.51.100.1 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv4 - linkdown flag set"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::1 | \
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv6 - linkdown flag set"
 
 	set -e
-	ip -netns testns address add 192.0.2.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
 	set +e
 
 	echo "    Route to local address with carrier down"
-	ip -netns testns route get fibmatch 192.0.2.1 &> /dev/null
+	$IP route get fibmatch 192.0.2.1 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
-	ip -netns testns route get fibmatch 192.0.2.1 | \
+	$IP route get fibmatch 192.0.2.1 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv4 linkdown flag set"
-	ip -netns testns -6 route get fibmatch 2001:db8:2::1 | \
+	$IP -6 route get fibmatch 2001:db8:2::1 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv6 linkdown flag set"
 
@@ -329,54 +330,54 @@ fib_carrier_unicast_test()
 	setup
 
 	set -e
-	ip -netns testns link set dev dummy0 carrier on
+	$IP link set dev dummy0 carrier on
 	set +e
 
 	echo "    Start point"
-	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
-	ip -netns testns route get fibmatch 198.51.100.2 | \
+	$IP route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv4 no linkdown flag"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
 	log_test $? 1 "IPv6 no linkdown flag"
 
 	set -e
-	ip -netns testns link set dev dummy0 carrier off
+	$IP link set dev dummy0 carrier off
 	set +e
 
 	echo "    Carrier down"
-	ip -netns testns route get fibmatch 198.51.100.2 &> /dev/null
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
-	ip -netns testns route get fibmatch 198.51.100.2 | \
+	$IP route get fibmatch 198.51.100.2 | \
 		grep -q "linkdown"
 	log_test $? 0 "IPv4 linkdown flag set"
-	ip -netns testns -6 route get fibmatch 2001:db8:1::2 | \
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
 		grep -q "linkdown"
 	log_test $? 0 "IPv6 linkdown flag set"
 
 	set -e
-	ip -netns testns address add 192.0.2.1/24 dev dummy0
-	ip -netns testns -6 address add 2001:db8:2::1/64 dev dummy0
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
 	set +e
 
 	echo "    Second address added with carrier down"
-	ip -netns testns route get fibmatch 192.0.2.2 &> /dev/null
+	$IP route get fibmatch 192.0.2.2 &> /dev/null
 	log_test $? 0 "IPv4 fibmatch"
-	ip -netns testns -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+	$IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
 	log_test $? 0 "IPv6 fibmatch"
 
-	ip -netns testns route get fibmatch 192.0.2.2 | \
+	$IP route get fibmatch 192.0.2.2 | \
 		grep -q "linkdown"
 	log_test $? 0 "IPv4 linkdown flag set"
-	ip -netns testns -6 route get fibmatch 2001:db8:2::2 | \
+	$IP -6 route get fibmatch 2001:db8:2::2 | \
 		grep -q "linkdown"
 	log_test $? 0 "IPv6 linkdown flag set"
 

From a511858c7536fab15a59a0a2aee14c5a909da23a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Mar 2018 08:29:40 -0700
Subject: [PATCH 0950/1678] selftests: fib_tests: Allow user to run a specific
 test

Allow a user to run just a specific fib test by setting the TEST
environment variable.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 953254439e39..cfdeb35bfed5 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -392,9 +392,13 @@ fib_carrier_test()
 
 fib_test()
 {
-	fib_unreg_test
-	fib_down_test
-	fib_carrier_test
+	if [ -n "$TEST" ]; then
+		eval $TEST
+	else
+		fib_unreg_test
+		fib_down_test
+		fib_carrier_test
+	fi
 }
 
 if [ "$(id -u)" -ne 0 ];then

From 654d3a7821e838dd7608d88922d8d3db9bdb95b4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Mar 2018 08:29:41 -0700
Subject: [PATCH 0951/1678] selftests: fib_tests: Add IPv6 nexthop spec tests

Add series of tests for valid and invalid nexthop specs for IPv6.

$ TEST=fib_nexthop_test ./fib_tests.sh
...
IPv6 nexthop tests
    TEST: Directly connected nexthop, unicast address              [ OK ]
    TEST: Directly connected nexthop, unicast address with device  [ OK ]
    TEST: Gateway is linklocal address                             [ OK ]
    TEST: Gateway is linklocal address, no device                  [ OK ]
    TEST: Gateway can not be local unicast address                 [ OK ]
    TEST: Gateway can not be local unicast address, with device    [ OK ]
    TEST: Gateway can not be a local linklocal address             [ OK ]
    TEST: Gateway can be local address in a VRF                    [ OK ]
    TEST: Gateway can be local address in a VRF, with device       [ OK ]
    TEST: Gateway can be local linklocal address in a VRF          [ OK ]
    TEST: Redirect to VRF lookup                                   [ OK ]
    TEST: VRF route, gateway can be local address in default VRF   [ OK ]
    TEST: VRF route, gateway can not be a local address            [ OK ]
    TEST: VRF route, gateway can not be a local addr with device   [ OK ]

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 180 ++++++++++++++++++++++-
 1 file changed, 178 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index cfdeb35bfed5..9164e60d4b66 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,6 +6,7 @@
 
 ret=0
 
+VERBOSE=${VERBOSE:=0}
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 IP="ip -netns testns"
 
@@ -16,10 +17,10 @@ log_test()
 	local msg="$3"
 
 	if [ ${rc} -eq ${expected} ]; then
-		printf "        %-60s  [ OK ]\n" "${msg}"
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
 	else
 		ret=1
-		printf "        %-60s  [FAIL]\n" "${msg}"
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 		echo
 			echo "hit enter to continue, 'q' to quit"
@@ -49,6 +50,28 @@ cleanup()
 	ip netns del testns
 }
 
+get_linklocal()
+{
+	local dev=$1
+	local addr
+
+	addr=$($IP -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
 fib_unreg_unicast_test()
 {
 	echo
@@ -390,6 +413,158 @@ fib_carrier_test()
 	fib_carrier_unicast_test
 }
 
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+	local desc="$1"
+	local erc=$2
+	local vrf=$3
+	local pfx=$4
+	local gw=$5
+	local dev=$6
+	local cmd out rc
+
+	[ "$vrf" = "-" ] && vrf="default"
+	[ -n "$gw" ] && gw="via $gw"
+	[ -n "$dev" ] && dev="dev $dev"
+
+	cmd="$IP route add vrf $vrf $pfx $gw $dev"
+	if [ "$VERBOSE" = "1" ]; then
+		printf "\n    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+	log_test $rc $erc "$desc"
+}
+
+fib4_nexthop()
+{
+	echo
+	echo "IPv4 nexthop tests"
+
+	echo "<<< write me >>>"
+}
+
+fib6_nexthop()
+{
+	local lldummy=$(get_linklocal dummy0)
+	local llv1=$(get_linklocal dummy0)
+
+	if [ -z "$lldummy" ]; then
+		echo "Failed to get linklocal address for dummy0"
+		return 1
+	fi
+	if [ -z "$llv1" ]; then
+		echo "Failed to get linklocal address for veth1"
+		return 1
+	fi
+
+	echo
+	echo "IPv6 nexthop tests"
+
+	add_rt "Directly connected nexthop, unicast address" 0 \
+		- 2001:db8:101::/64 2001:db8:1::2
+	add_rt "Directly connected nexthop, unicast address with device" 0 \
+		- 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+	add_rt "Gateway is linklocal address" 0 \
+		- 2001:db8:103::1/64 $llv1 "veth0"
+
+	# fails because LL address requires a device
+	add_rt "Gateway is linklocal address, no device" 2 \
+		- 2001:db8:104::1/64 $llv1
+
+	# local address can not be a gateway
+	add_rt "Gateway can not be local unicast address" 2 \
+		- 2001:db8:105::/64 2001:db8:1::1
+	add_rt "Gateway can not be local unicast address, with device" 2 \
+		- 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+	add_rt "Gateway can not be a local linklocal address" 2 \
+		- 2001:db8:107::1/64 $lldummy "dummy0"
+
+	# VRF tests
+	add_rt "Gateway can be local address in a VRF" 0 \
+		- 2001:db8:108::/64 2001:db8:51::2
+	add_rt "Gateway can be local address in a VRF, with device" 0 \
+		- 2001:db8:109::/64 2001:db8:51::2 "veth0"
+	add_rt "Gateway can be local linklocal address in a VRF" 0 \
+		- 2001:db8:110::1/64 $llv1 "veth0"
+
+	add_rt "Redirect to VRF lookup" 0 \
+		- 2001:db8:111::/64 "" "red"
+
+	add_rt "VRF route, gateway can be local address in default VRF" 0 \
+		red 2001:db8:112::/64 2001:db8:51::1
+
+	# local address in same VRF fails
+	add_rt "VRF route, gateway can not be a local address" 2 \
+		red 2001:db8:113::1/64 2001:db8:2::1
+	add_rt "VRF route, gateway can not be a local addr with device" 2 \
+		red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+#   dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+#   veth0  - 192.0.2.1/24    2001:db8:51::1/64
+#
+# VRF red:
+#   dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+#   veth1  - 192.0.2.2/24   2001:db8:51::2/64
+#
+#  [ dummy0   veth0 ]--[ veth1   dummy1 ]
+
+fib_nexthop_test()
+{
+	setup
+
+	set -e
+
+	$IP -4 rule add pref 32765 table local
+	$IP -4 rule del pref 0
+	$IP -6 rule add pref 32765 table local
+	$IP -6 rule del pref 0
+
+	$IP link add red type vrf table 1
+	$IP link set red up
+	$IP -4 route add vrf red unreachable default metric 4278198272
+	$IP -6 route add vrf red unreachable default metric 4278198272
+
+	$IP link add veth0 type veth peer name veth1
+	$IP link set dev veth0 up
+	$IP address add 192.0.2.1/24 dev veth0
+	$IP -6 address add 2001:db8:51::1/64 dev veth0
+
+	$IP link set dev veth1 vrf red up
+	$IP address add 192.0.2.2/24 dev veth1
+	$IP -6 address add 2001:db8:51::2/64 dev veth1
+
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 vrf red up
+	$IP address add 192.168.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
+	set +e
+
+	sleep 1
+	fib4_nexthop
+	fib6_nexthop
+
+	(
+	$IP link del dev dummy1
+	$IP link del veth0
+	$IP link del red
+	) 2>/dev/null
+	cleanup
+}
+
+################################################################################
+#
+
 fib_test()
 {
 	if [ -n "$TEST" ]; then
@@ -398,6 +573,7 @@ fib_test()
 		fib_unreg_test
 		fib_down_test
 		fib_carrier_test
+		fib_nexthop_test
 	fi
 }
 

From 1e8029515816f771b9b3751f24f19fe6df4c72ae Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Tue, 13 Mar 2018 21:57:16 -0700
Subject: [PATCH 0952/1678] udp: Move the udp sysctl to namespace.

This patch moves the udp_rmem_min, udp_wmem_min
to namespace and init the udp_l3mdev_accept explicitly.

The udp_rmem_min/udp_wmem_min affect udp rx/tx queue,
with this patch namespaces can set them differently.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   |  3 ++
 net/ipv4/sysctl_net_ipv4.c | 32 +++++++-------
 net/ipv4/udp.c             | 86 ++++++++++++++++++++++----------------
 net/ipv6/udp.c             | 52 +++++++++++------------
 4 files changed, 96 insertions(+), 77 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3a970e429ab6..382bfd7583cf 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -168,6 +168,9 @@ struct netns_ipv4 {
 	atomic_t tfo_active_disable_times;
 	unsigned long tfo_active_disable_stamp;
 
+	int sysctl_udp_wmem_min;
+	int sysctl_udp_rmem_min;
+
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	int sysctl_udp_l3mdev_accept;
 #endif
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 011de9a20ec6..5b72d97693f8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -520,22 +520,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
-	{
-		.procname	= "udp_rmem_min",
-		.data		= &sysctl_udp_rmem_min,
-		.maxlen		= sizeof(sysctl_udp_rmem_min),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
-	{
-		.procname	= "udp_wmem_min",
-		.data		= &sysctl_udp_wmem_min,
-		.maxlen		= sizeof(sysctl_udp_wmem_min),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one
-	},
 	{ }
 };
 
@@ -1167,6 +1151,22 @@ static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &one,
 	},
+	{
+		.procname	= "udp_rmem_min",
+		.data		= &init_net.ipv4.sysctl_udp_rmem_min,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_rmem_min),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
+	{
+		.procname	= "udp_wmem_min",
+		.data		= &init_net.ipv4.sysctl_udp_wmem_min,
+		.maxlen		= sizeof(init_net.ipv4.sysctl_udp_wmem_min),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
 	{ }
 };
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3013404d0935..908fc02fb4f8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -122,12 +122,6 @@ EXPORT_SYMBOL(udp_table);
 long sysctl_udp_mem[3] __read_mostly;
 EXPORT_SYMBOL(sysctl_udp_mem);
 
-int sysctl_udp_rmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_rmem_min);
-
-int sysctl_udp_wmem_min __read_mostly;
-EXPORT_SYMBOL(sysctl_udp_wmem_min);
-
 atomic_long_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
@@ -2533,35 +2527,35 @@ int udp_abort(struct sock *sk, int err)
 EXPORT_SYMBOL_GPL(udp_abort);
 
 struct proto udp_prot = {
-	.name		   = "UDP",
-	.owner		   = THIS_MODULE,
-	.close		   = udp_lib_close,
-	.connect	   = ip4_datagram_connect,
-	.disconnect	   = udp_disconnect,
-	.ioctl		   = udp_ioctl,
-	.init		   = udp_init_sock,
-	.destroy	   = udp_destroy_sock,
-	.setsockopt	   = udp_setsockopt,
-	.getsockopt	   = udp_getsockopt,
-	.sendmsg	   = udp_sendmsg,
-	.recvmsg	   = udp_recvmsg,
-	.sendpage	   = udp_sendpage,
-	.release_cb	   = ip4_datagram_release_cb,
-	.hash		   = udp_lib_hash,
-	.unhash		   = udp_lib_unhash,
-	.rehash		   = udp_v4_rehash,
-	.get_port	   = udp_v4_get_port,
-	.memory_allocated  = &udp_memory_allocated,
-	.sysctl_mem	   = sysctl_udp_mem,
-	.sysctl_wmem	   = &sysctl_udp_wmem_min,
-	.sysctl_rmem	   = &sysctl_udp_rmem_min,
-	.obj_size	   = sizeof(struct udp_sock),
-	.h.udp_table	   = &udp_table,
+	.name			= "UDP",
+	.owner			= THIS_MODULE,
+	.close			= udp_lib_close,
+	.connect		= ip4_datagram_connect,
+	.disconnect		= udp_disconnect,
+	.ioctl			= udp_ioctl,
+	.init			= udp_init_sock,
+	.destroy		= udp_destroy_sock,
+	.setsockopt		= udp_setsockopt,
+	.getsockopt		= udp_getsockopt,
+	.sendmsg		= udp_sendmsg,
+	.recvmsg		= udp_recvmsg,
+	.sendpage		= udp_sendpage,
+	.release_cb		= ip4_datagram_release_cb,
+	.hash			= udp_lib_hash,
+	.unhash			= udp_lib_unhash,
+	.rehash			= udp_v4_rehash,
+	.get_port		= udp_v4_get_port,
+	.memory_allocated	= &udp_memory_allocated,
+	.sysctl_mem		= sysctl_udp_mem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+	.obj_size		= sizeof(struct udp_sock),
+	.h.udp_table		= &udp_table,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_udp_setsockopt,
-	.compat_getsockopt = compat_udp_getsockopt,
+	.compat_setsockopt	= compat_udp_setsockopt,
+	.compat_getsockopt	= compat_udp_getsockopt,
 #endif
-	.diag_destroy	   = udp_abort,
+	.diag_destroy		= udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
@@ -2831,6 +2825,26 @@ u32 udp_flow_hashrnd(void)
 }
 EXPORT_SYMBOL(udp_flow_hashrnd);
 
+static void __udp_sysctl_init(struct net *net)
+{
+	net->ipv4.sysctl_udp_rmem_min = SK_MEM_QUANTUM;
+	net->ipv4.sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	net->ipv4.sysctl_udp_l3mdev_accept = 0;
+#endif
+}
+
+static int __net_init udp_sysctl_init(struct net *net)
+{
+	__udp_sysctl_init(net);
+	return 0;
+}
+
+static struct pernet_operations __net_initdata udp_sysctl_ops = {
+	.init       = udp_sysctl_init,
+};
+
 void __init udp_init(void)
 {
 	unsigned long limit;
@@ -2843,8 +2857,7 @@ void __init udp_init(void)
 	sysctl_udp_mem[1] = limit;
 	sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
 
-	sysctl_udp_rmem_min = SK_MEM_QUANTUM;
-	sysctl_udp_wmem_min = SK_MEM_QUANTUM;
+	__udp_sysctl_init(&init_net);
 
 	/* 16 spinlocks per cpu */
 	udp_busylocks_log = ilog2(nr_cpu_ids) + 4;
@@ -2854,4 +2867,7 @@ void __init udp_init(void)
 		panic("UDP: failed to alloc udp_busylocks\n");
 	for (i = 0; i < (1U << udp_busylocks_log); i++)
 		spin_lock_init(udp_busylocks + i);
+
+	if (register_pernet_subsys(&udp_sysctl_ops))
+		panic("UDP: failed to init sysctl parameters.\n");
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52e3ea0e6f50..ad30f5e31969 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1509,34 +1509,34 @@ void udp6_proc_exit(struct net *net)
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
-	.name		   = "UDPv6",
-	.owner		   = THIS_MODULE,
-	.close		   = udp_lib_close,
-	.connect	   = ip6_datagram_connect,
-	.disconnect	   = udp_disconnect,
-	.ioctl		   = udp_ioctl,
-	.init		   = udp_init_sock,
-	.destroy	   = udpv6_destroy_sock,
-	.setsockopt	   = udpv6_setsockopt,
-	.getsockopt	   = udpv6_getsockopt,
-	.sendmsg	   = udpv6_sendmsg,
-	.recvmsg	   = udpv6_recvmsg,
-	.release_cb	   = ip6_datagram_release_cb,
-	.hash		   = udp_lib_hash,
-	.unhash		   = udp_lib_unhash,
-	.rehash		   = udp_v6_rehash,
-	.get_port	   = udp_v6_get_port,
-	.memory_allocated  = &udp_memory_allocated,
-	.sysctl_mem	   = sysctl_udp_mem,
-	.sysctl_wmem	   = &sysctl_udp_wmem_min,
-	.sysctl_rmem	   = &sysctl_udp_rmem_min,
-	.obj_size	   = sizeof(struct udp6_sock),
-	.h.udp_table	   = &udp_table,
+	.name			= "UDPv6",
+	.owner			= THIS_MODULE,
+	.close			= udp_lib_close,
+	.connect		= ip6_datagram_connect,
+	.disconnect		= udp_disconnect,
+	.ioctl			= udp_ioctl,
+	.init			= udp_init_sock,
+	.destroy		= udpv6_destroy_sock,
+	.setsockopt		= udpv6_setsockopt,
+	.getsockopt		= udpv6_getsockopt,
+	.sendmsg		= udpv6_sendmsg,
+	.recvmsg		= udpv6_recvmsg,
+	.release_cb		= ip6_datagram_release_cb,
+	.hash			= udp_lib_hash,
+	.unhash			= udp_lib_unhash,
+	.rehash			= udp_v6_rehash,
+	.get_port		= udp_v6_get_port,
+	.memory_allocated	= &udp_memory_allocated,
+	.sysctl_mem		= sysctl_udp_mem,
+	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+	.sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+	.obj_size		= sizeof(struct udp6_sock),
+	.h.udp_table		= &udp_table,
 #ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_udpv6_setsockopt,
-	.compat_getsockopt = compat_udpv6_getsockopt,
+	.compat_setsockopt	= compat_udpv6_setsockopt,
+	.compat_getsockopt	= compat_udpv6_getsockopt,
 #endif
-	.diag_destroy      = udp_abort,
+	.diag_destroy		= udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {

From 320bd6de79ef0de1ece7c184469a722de690ccb0 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Tue, 13 Mar 2018 21:57:17 -0700
Subject: [PATCH 0953/1678] doc: Change the udp/sctp rmem/wmem default value.

The SK_MEM_QUANTUM was changed from PAGE_SIZE to 4096.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 783675a730e5..1d1120753ae8 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -755,13 +755,13 @@ udp_rmem_min - INTEGER
 	Minimal size of receive buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for receiving data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 udp_wmem_min - INTEGER
 	Minimal size of send buffer used by UDP sockets in moderation.
 	Each UDP socket is able to use the size for sending data, even if
 	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 1 page
+	Default: 4K
 
 CIPSOv4 Variables:
 
@@ -2101,7 +2101,7 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
 	It is guaranteed to each SCTP socket (but not association) even
 	under moderate memory pressure.
 
-	Default: 1 page
+	Default: 4K
 
 sctp_wmem  - vector of 3 INTEGERs: min, default, max
 	Currently this tunable has no effect.

From 79ffdfc6522ae33d8a33e971070c08ee5f27439b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 14 Mar 2018 22:17:20 +0300
Subject: [PATCH 0954/1678] net: Add rtnl_lock_killable()

rtnl_lock() is widely used mutex in kernel. Some of kernel code
does memory allocations under it. In case of memory deficit this
may invoke OOM killer, but the problem is a killed task can't
exit if it's waiting for the mutex. This may be a reason of deadlock
and panic.

This patch adds a new primitive, which responds on SIGKILL, and
it allows to use it in the places, where we don't want to sleep
forever.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 1 +
 net/core/rtnetlink.c      | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3573b4bf2fdf..562a175c35a9 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -33,6 +33,7 @@ extern void rtnl_lock(void);
 extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
+extern int rtnl_lock_killable(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct rw_semaphore net_sem;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 67f375cfb982..87079eaa871b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -75,6 +75,12 @@ void rtnl_lock(void)
 }
 EXPORT_SYMBOL(rtnl_lock);
 
+int rtnl_lock_killable(void)
+{
+	return mutex_lock_killable(&rtnl_mutex);
+}
+EXPORT_SYMBOL(rtnl_lock_killable);
+
 static struct sk_buff *defer_kfree_skb_list;
 void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
 {

From b0f3debc9a1284d6b861e3f7cce0d119e6cd601d Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Wed, 14 Mar 2018 22:17:28 +0300
Subject: [PATCH 0955/1678] net: Use rtnl_lock_killable() in register_netdev()

This patch adds rtnl_lock_killable() to one of hot path
using rtnl_lock().

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 12a9aad0b057..d8887cc38e7b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8018,7 +8018,8 @@ int register_netdev(struct net_device *dev)
 {
 	int err;
 
-	rtnl_lock();
+	if (rtnl_lock_killable())
+		return -EINTR;
 	err = register_netdevice(dev);
 	rtnl_unlock();
 	return err;

From 8b7372c101a5f9f38a092ab2533a75fc85c6e52a Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 16 Mar 2018 14:22:57 +0530
Subject: [PATCH 0956/1678] cxgb4: notify fatal error to uld drivers

notify uld drivers if the adapter encounters fatal
error.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/cxgb4/device.c            |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h  |  3 ++-
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 7a9d0de89d6a..e96771ddc9a7 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -1217,6 +1217,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
 		if (ctx->dev)
 			c4iw_remove(ctx);
 		break;
+	case CXGB4_STATE_FATAL_ERROR:
 	case CXGB4_STATE_START_RECOVERY:
 		pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
 		if (ctx->dev) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index b2df0ffb7c94..a5c0a649f3c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -917,6 +917,7 @@ struct adapter {
 	struct work_struct tid_release_task;
 	struct work_struct db_full_task;
 	struct work_struct db_drop_task;
+	struct work_struct fatal_err_notify_task;
 	bool tid_release_task_busy;
 
 	/* lock for mailbox cmd list */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5a349e1576cb..72ec3f7dccbb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3255,6 +3255,14 @@ static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
 	.get_drvinfo       = cxgb4_mgmt_get_drvinfo,
 };
 
+static void notify_fatal_err(struct work_struct *work)
+{
+	struct adapter *adap;
+
+	adap = container_of(work, struct adapter, fatal_err_notify_task);
+	notify_ulds(adap, CXGB4_STATE_FATAL_ERROR);
+}
+
 void t4_fatal_err(struct adapter *adap)
 {
 	int port;
@@ -3279,6 +3287,7 @@ void t4_fatal_err(struct adapter *adap)
 		netif_carrier_off(dev);
 	}
 	dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
+	queue_work(adap->workq, &adap->fatal_err_notify_task);
 }
 
 static void setup_memwin(struct adapter *adap)
@@ -5479,6 +5488,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
 	INIT_WORK(&adapter->db_full_task, process_db_full);
 	INIT_WORK(&adapter->db_drop_task, process_db_drop);
+	INIT_WORK(&adapter->fatal_err_notify_task, notify_fatal_err);
 
 	err = t4_prep_adapter(adapter);
 	if (err)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index a14e8db51cdc..788146c08151 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -257,7 +257,8 @@ enum cxgb4_state {
 	CXGB4_STATE_UP,
 	CXGB4_STATE_START_RECOVERY,
 	CXGB4_STATE_DOWN,
-	CXGB4_STATE_DETACH
+	CXGB4_STATE_DETACH,
+	CXGB4_STATE_FATAL_ERROR
 };
 
 enum cxgb4_control {

From c246d942eabc3288f5d93930663411070093ac52 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 15:06:39 +0100
Subject: [PATCH 0957/1678] net/smc: restructure netinfo for CLC proposal msgs

Introduce functions smc_clc_prfx_set to retrieve IP information for the
CLC proposal msg and smc_clc_prfx_match to match the contents of a
proposal message against the IP addresses of the net device. The new
functions replace the functionality provided by smc_clc_netinfo_by_tcpsk,
which is removed by this patch. The match functionality is extended to
scan all ipv4 addresses of the net device for a match against the
ipv4 subnet from the proposal msg.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  |  14 ++----
 net/smc/smc_clc.c | 106 ++++++++++++++++++++++++++++++++++------------
 net/smc/smc_clc.h |   4 +-
 3 files changed, 85 insertions(+), 39 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 649489f825a5..949a2714a453 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -767,8 +767,6 @@ static void smc_listen_work(struct work_struct *work)
 	struct smc_link *link;
 	int reason_code = 0;
 	int rc = 0;
-	__be32 subnet;
-	u8 prefix_len;
 	u8 ibport;
 
 	/* check if peer is smc capable */
@@ -803,17 +801,11 @@ static void smc_listen_work(struct work_struct *work)
 		goto decline_rdma;
 	}
 
-	/* determine subnet and mask from internal TCP socket */
-	rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
-		goto decline_rdma;
-	}
-
 	pclc = (struct smc_clc_msg_proposal *)&buf;
 	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
-	if (pclc_prfx->outgoing_subnet != subnet ||
-	    pclc_prfx->prefix_len != prefix_len) {
+
+	rc = smc_clc_prfx_match(newclcsock, pclc_prfx);
+	if (rc) {
 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 		goto decline_rdma;
 	}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 874c5a75d6dd..dc3a2235978d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -74,13 +74,81 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 	return true;
 }
 
-/* determine subnet and mask of internal TCP socket */
-int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
-			     __be32 *subnet, u8 *prefix_len)
+/* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */
+static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
+				 struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dst->dev);
+
+	if (!in_dev)
+		return -ENODEV;
+	for_ifa(in_dev) {
+		if (!inet_ifa_match(ipv4, ifa))
+			continue;
+		prop->prefix_len = inet_mask_len(ifa->ifa_mask);
+		prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask;
+		/* prop->ipv6_prefixes_cnt = 0; already done by memset before */
+		return 0;
+	} endfor_ifa(in_dev);
+	return -ENOENT;
+}
+
+/* retrieve and set prefixes in CLC proposal msg */
+static int smc_clc_prfx_set(struct socket *clcsock,
+			    struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct dst_entry *dst = sk_dst_get(clcsock->sk);
+	struct sockaddr_storage addrs;
+	struct sockaddr_in *addr;
+	int rc = -ENOENT;
+
+	memset(prop, 0, sizeof(*prop));
+	if (!dst) {
+		rc = -ENOTCONN;
+		goto out;
+	}
+	if (!dst->dev) {
+		rc = -ENODEV;
+		goto out_rel;
+	}
+	/* get address to which the internal TCP socket is bound */
+	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+	/* analyze IP specific data of net_device belonging to TCP socket */
+	rcu_read_lock();
+	if (addrs.ss_family == PF_INET) {
+		/* IPv4 */
+		addr = (struct sockaddr_in *)&addrs;
+		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+	}
+	rcu_read_unlock();
+out_rel:
+	dst_release(dst);
+out:
+	return rc;
+}
+
+/* match ipv4 addrs of dev against addr in CLC proposal */
+static int smc_clc_prfx_match4_rcu(struct net_device *dev,
+				   struct smc_clc_msg_proposal_prefix *prop)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+	if (!in_dev)
+		return -ENODEV;
+	for_ifa(in_dev) {
+		if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) &&
+		    inet_ifa_match(prop->outgoing_subnet, ifa))
+			return 0;
+	} endfor_ifa(in_dev);
+
+	return -ENOENT;
+}
+
+/* check if proposed prefixes match one of our device prefixes */
+int smc_clc_prfx_match(struct socket *clcsock,
+		       struct smc_clc_msg_proposal_prefix *prop)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
-	struct in_device *in_dev;
-	struct sockaddr_in addr;
 	int rc = -ENOENT;
 
 	if (!dst) {
@@ -91,22 +159,10 @@ int smc_clc_netinfo_by_tcpsk(struct socket *clcsock,
 		rc = -ENODEV;
 		goto out_rel;
 	}
-
-	/* get address to which the internal TCP socket is bound */
-	kernel_getsockname(clcsock, (struct sockaddr *)&addr);
-	/* analyze IPv4 specific data of net_device belonging to TCP socket */
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(dst->dev);
-	for_ifa(in_dev) {
-		if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
-			continue;
-		*prefix_len = inet_mask_len(ifa->ifa_mask);
-		*subnet = ifa->ifa_address & ifa->ifa_mask;
-		rc = 0;
-		break;
-	} endfor_ifa(in_dev);
+	if (!prop->ipv6_prefixes_cnt)
+		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
 	rcu_read_unlock();
-
 out_rel:
 	dst_release(dst);
 out:
@@ -240,6 +296,11 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 	struct msghdr msg;
 	int len, plen, rc;
 
+	/* retrieve ip prefixes for CLC proposal msg */
+	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx);
+	if (rc)
+		return SMC_CLC_DECL_CNFERR; /* configuration error */
+
 	/* send SMC Proposal CLC message */
 	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
 	memset(&pclc, 0, sizeof(pclc));
@@ -252,13 +313,6 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
 	pclc.iparea_offset = htons(0);
 
-	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
-	/* determine subnet and mask from internal TCP socket */
-	rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
-				      &pclc_prfx.prefix_len);
-	if (rc)
-		return SMC_CLC_DECL_CNFERR; /* configuration error */
-	pclc_prfx.ipv6_prefixes_cnt = 0;
 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	memset(&msg, 0, sizeof(msg));
 	vec[0].iov_base = &pclc;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 20e048beac30..5ddb0d22eb8a 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -122,8 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
 	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
-int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
-			     u8 *prefix_len);
+int smc_clc_prfx_match(struct socket *clcsock,
+		       struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);

From 1a26d0201d7670fd6c9086e15504911ce240e6ff Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 15:06:40 +0100
Subject: [PATCH 0958/1678] net/smc: add ipv6 support to CLC layer

The CLC layer is updated to support ipv6 proposal messages from peers and
to match incoming proposal messages against the ipv6 addresses of the net
device. struct smc_clc_ipv6_prefix is updated to provide the space for an
ipv6 address (struct was not used before). SMC_CLC_MAX_LEN is updated to
include the size of the proposal prefix. Existing code in net is not
affected, the previous SMC_CLC_MAX_LEN value is large enough to hold ipv4
proposal messages.

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_clc.c | 109 ++++++++++++++++++++++++++++++++++++++++------
 net/smc/smc_clc.h |  13 ++++--
 2 files changed, 105 insertions(+), 17 deletions(-)

diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index dc3a2235978d..64fbc3230e6c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -5,7 +5,7 @@
  *  CLC (connection layer control) handshake over initial TCP socket to
  *  prepare for RDMA traffic
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  */
@@ -15,6 +15,7 @@
 #include <linux/if_ether.h>
 #include <linux/sched/signal.h>
 
+#include <net/addrconf.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 
@@ -93,12 +94,44 @@ static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4,
 	return -ENOENT;
 }
 
+/* fill CLC proposal msg with ipv6 prefixes from device */
+static int smc_clc_prfx_set6_rcu(struct dst_entry *dst,
+				 struct smc_clc_msg_proposal_prefix *prop,
+				 struct smc_clc_ipv6_prefix *ipv6_prfx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev = __in6_dev_get(dst->dev);
+	struct inet6_ifaddr *ifa;
+	int cnt = 0;
+
+	if (!in6_dev)
+		return -ENODEV;
+	/* use a maximum of 8 IPv6 prefixes from device */
+	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+			continue;
+		ipv6_addr_prefix(&ipv6_prfx[cnt].prefix,
+				 &ifa->addr, ifa->prefix_len);
+		ipv6_prfx[cnt].prefix_len = ifa->prefix_len;
+		cnt++;
+		if (cnt == SMC_CLC_MAX_V6_PREFIX)
+			break;
+	}
+	prop->ipv6_prefixes_cnt = cnt;
+	if (cnt)
+		return 0;
+#endif
+	return -ENOENT;
+}
+
 /* retrieve and set prefixes in CLC proposal msg */
 static int smc_clc_prfx_set(struct socket *clcsock,
-			    struct smc_clc_msg_proposal_prefix *prop)
+			    struct smc_clc_msg_proposal_prefix *prop,
+			    struct smc_clc_ipv6_prefix *ipv6_prfx)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 	struct sockaddr_storage addrs;
+	struct sockaddr_in6 *addr6;
 	struct sockaddr_in *addr;
 	int rc = -ENOENT;
 
@@ -114,11 +147,19 @@ static int smc_clc_prfx_set(struct socket *clcsock,
 	/* get address to which the internal TCP socket is bound */
 	kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
 	/* analyze IP specific data of net_device belonging to TCP socket */
+	addr6 = (struct sockaddr_in6 *)&addrs;
 	rcu_read_lock();
 	if (addrs.ss_family == PF_INET) {
 		/* IPv4 */
 		addr = (struct sockaddr_in *)&addrs;
 		rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop);
+	} else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) {
+		/* mapped IPv4 address - peer is IPv4 only */
+		rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3],
+					   prop);
+	} else {
+		/* IPv6 */
+		rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx);
 	}
 	rcu_read_unlock();
 out_rel:
@@ -144,12 +185,41 @@ static int smc_clc_prfx_match4_rcu(struct net_device *dev,
 	return -ENOENT;
 }
 
+/* match ipv6 addrs of dev against addrs in CLC proposal */
+static int smc_clc_prfx_match6_rcu(struct net_device *dev,
+				   struct smc_clc_msg_proposal_prefix *prop)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev = __in6_dev_get(dev);
+	struct smc_clc_ipv6_prefix *ipv6_prfx;
+	struct inet6_ifaddr *ifa;
+	int i, max;
+
+	if (!in6_dev)
+		return -ENODEV;
+	/* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */
+	ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop));
+	max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX);
+	list_for_each_entry(ifa, &in6_dev->addr_list, if_list) {
+		if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)
+			continue;
+		for (i = 0; i < max; i++) {
+			if (ifa->prefix_len == ipv6_prfx[i].prefix_len &&
+			    ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix,
+					      ifa->prefix_len))
+				return 0;
+		}
+	}
+#endif
+	return -ENOENT;
+}
+
 /* check if proposed prefixes match one of our device prefixes */
 int smc_clc_prfx_match(struct socket *clcsock,
 		       struct smc_clc_msg_proposal_prefix *prop)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
-	int rc = -ENOENT;
+	int rc;
 
 	if (!dst) {
 		rc = -ENOTCONN;
@@ -162,6 +232,8 @@ int smc_clc_prfx_match(struct socket *clcsock,
 	rcu_read_lock();
 	if (!prop->ipv6_prefixes_cnt)
 		rc = smc_clc_prfx_match4_rcu(dst->dev, prop);
+	else
+		rc = smc_clc_prfx_match6_rcu(dst->dev, prop);
 	rcu_read_unlock();
 out_rel:
 	dst_release(dst);
@@ -288,21 +360,24 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 			  struct smc_ib_device *smcibdev,
 			  u8 ibport)
 {
+	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
 	struct smc_clc_msg_proposal_prefix pclc_prfx;
 	struct smc_clc_msg_proposal pclc;
 	struct smc_clc_msg_trail trl;
+	int len, i, plen, rc;
 	int reason_code = 0;
-	struct kvec vec[3];
+	struct kvec vec[4];
 	struct msghdr msg;
-	int len, plen, rc;
 
 	/* retrieve ip prefixes for CLC proposal msg */
-	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx);
+	rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
 	if (rc)
 		return SMC_CLC_DECL_CNFERR; /* configuration error */
 
 	/* send SMC Proposal CLC message */
-	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
+	plen = sizeof(pclc) + sizeof(pclc_prfx) +
+	       (pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
+	       sizeof(trl);
 	memset(&pclc, 0, sizeof(pclc));
 	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	pclc.hdr.type = SMC_CLC_PROPOSAL;
@@ -315,14 +390,20 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 
 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	memset(&msg, 0, sizeof(msg));
-	vec[0].iov_base = &pclc;
-	vec[0].iov_len = sizeof(pclc);
-	vec[1].iov_base = &pclc_prfx;
-	vec[1].iov_len = sizeof(pclc_prfx);
-	vec[2].iov_base = &trl;
-	vec[2].iov_len = sizeof(trl);
+	i = 0;
+	vec[i].iov_base = &pclc;
+	vec[i++].iov_len = sizeof(pclc);
+	vec[i].iov_base = &pclc_prfx;
+	vec[i++].iov_len = sizeof(pclc_prfx);
+	if (pclc_prfx.ipv6_prefixes_cnt > 0) {
+		vec[i].iov_base = &ipv6_prfx[0];
+		vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
+				   sizeof(ipv6_prfx[0]);
+	}
+	vec[i].iov_base = &trl;
+	vec[i++].iov_len = sizeof(trl);
 	/* due to the few bytes needed for clc-handshake this cannot block */
-	len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
+	len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
 	if (len < sizeof(pclc)) {
 		if (len >= 0) {
 			reason_code = -ENETUNREACH;
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5ddb0d22eb8a..63bf1dc2c1f9 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -60,10 +60,15 @@ struct smc_clc_msg_local {	/* header2 of clc messages */
 	u8 mac[6];		/* mac of ib_device port */
 };
 
+#define SMC_CLC_MAX_V6_PREFIX	8
+
+/* Struct would be 4 byte aligned, but it is used in an array that is sent
+ * to peers and must conform to RFC7609, hence we need to use packed here.
+ */
 struct smc_clc_ipv6_prefix {
-	u8 prefix[4];
+	struct in6_addr prefix;
 	u8 prefix_len;
-} __packed;
+} __packed;			/* format defined in RFC7609 */
 
 struct smc_clc_msg_proposal_prefix {	/* prefix part of clc proposal message*/
 	__be32 outgoing_subnet;	/* subnet mask */
@@ -79,9 +84,11 @@ struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */
 } __aligned(4);
 
 #define SMC_CLC_PROPOSAL_MAX_OFFSET	0x28
-#define SMC_CLC_PROPOSAL_MAX_PREFIX	(8 * sizeof(struct smc_clc_ipv6_prefix))
+#define SMC_CLC_PROPOSAL_MAX_PREFIX	(SMC_CLC_MAX_V6_PREFIX * \
+					 sizeof(struct smc_clc_ipv6_prefix))
 #define SMC_CLC_MAX_LEN		(sizeof(struct smc_clc_msg_proposal) + \
 				 SMC_CLC_PROPOSAL_MAX_OFFSET + \
+				 sizeof(struct smc_clc_msg_proposal_prefix) + \
 				 SMC_CLC_PROPOSAL_MAX_PREFIX + \
 				 sizeof(struct smc_clc_msg_trail))
 

From aaa4d33f6da1e1a415f4f7c299a97defd845ce7d Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 15:06:41 +0100
Subject: [PATCH 0959/1678] net/smc: enable ipv6 support for smc

Add ipv6 support to the smc socket layer functions. Make use of the
updated clc layer functions to retrieve and match ipv6 information.
The indicator for ipv4 or ipv6 is the protocol constant that is provided
in the socket() call with address family AF_SMC.

Based-on-patch-by: Takanori Ueda <tkueda@jp.ibm.com>

Signed-off-by: Karsten Graul <kgraul@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 64 ++++++++++++++++++++++++++++++++++++------------
 net/smc/smc.h    |  4 ++-
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 949a2714a453..86913eb5cfa0 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -7,12 +7,11 @@
  *  applicable with RoCE-cards only
  *
  *  Initial restrictions:
- *    - IPv6 support postponed
  *    - support for alternate links postponed
  *    - partial support for non-blocking sockets only
  *    - support for urgent data postponed
  *
- *  Copyright IBM Corp. 2016
+ *  Copyright IBM Corp. 2016, 2018
  *
  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  *              based on prototype from Frank Blaschka
@@ -64,6 +63,10 @@ static struct smc_hashinfo smc_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
 };
 
+static struct smc_hashinfo smc_v6_hashinfo = {
+	.lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
+};
+
 int smc_hash_sk(struct sock *sk)
 {
 	struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
@@ -103,6 +106,18 @@ struct proto smc_proto = {
 };
 EXPORT_SYMBOL_GPL(smc_proto);
 
+struct proto smc_proto6 = {
+	.name		= "SMC6",
+	.owner		= THIS_MODULE,
+	.keepalive	= smc_set_keepalive,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v6_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+EXPORT_SYMBOL_GPL(smc_proto6);
+
 static int smc_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -159,19 +174,22 @@ static void smc_destruct(struct sock *sk)
 	sk_refcnt_debug_dec(sk);
 }
 
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+				   int protocol)
 {
 	struct smc_sock *smc;
+	struct proto *prot;
 	struct sock *sk;
 
-	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
+	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
 	if (!sk)
 		return NULL;
 
 	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 	sk->sk_state = SMC_INIT;
 	sk->sk_destruct = smc_destruct;
-	sk->sk_protocol = SMCPROTO_SMC;
+	sk->sk_protocol = protocol;
 	smc = smc_sk(sk);
 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 	INIT_LIST_HEAD(&smc->accept_q);
@@ -198,10 +216,13 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 		goto out;
 
 	rc = -EAFNOSUPPORT;
+	if (addr->sin_family != AF_INET &&
+	    addr->sin_family != AF_INET6 &&
+	    addr->sin_family != AF_UNSPEC)
+		goto out;
 	/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
-	if ((addr->sin_family != AF_INET) &&
-	    ((addr->sin_family != AF_UNSPEC) ||
-	     (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
+	if (addr->sin_family == AF_UNSPEC &&
+	    addr->sin_addr.s_addr != htonl(INADDR_ANY))
 		goto out;
 
 	lock_sock(sk);
@@ -529,7 +550,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 	/* separate smc parameter checking to be safe */
 	if (alen < sizeof(addr->sa_family))
 		goto out_err;
-	if (addr->sa_family != AF_INET)
+	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 		goto out_err;
 
 	lock_sock(sk);
@@ -571,7 +592,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 	int rc;
 
 	release_sock(lsk);
-	new_sk = smc_sock_alloc(sock_net(lsk), NULL);
+	new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
 	if (!new_sk) {
 		rc = -ENOMEM;
 		lsk->sk_err = ENOMEM;
@@ -1367,6 +1388,7 @@ static const struct proto_ops smc_sock_ops = {
 static int smc_create(struct net *net, struct socket *sock, int protocol,
 		      int kern)
 {
+	int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
 	struct smc_sock *smc;
 	struct sock *sk;
 	int rc;
@@ -1376,20 +1398,20 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 		goto out;
 
 	rc = -EPROTONOSUPPORT;
-	if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
+	if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
 		goto out;
 
 	rc = -ENOBUFS;
 	sock->ops = &smc_sock_ops;
-	sk = smc_sock_alloc(net, sock);
+	sk = smc_sock_alloc(net, sock, protocol);
 	if (!sk)
 		goto out;
 
 	/* create internal TCP socket for CLC handshake and fallback */
 	smc = smc_sk(sk);
 	smc->use_fallback = false; /* assume rdma capability first */
-	rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
-			      IPPROTO_TCP, &smc->clcsock);
+	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+			      &smc->clcsock);
 	if (rc) {
 		sk_common_release(sk);
 		goto out;
@@ -1429,16 +1451,23 @@ static int __init smc_init(void)
 
 	rc = proto_register(&smc_proto, 1);
 	if (rc) {
-		pr_err("%s: proto_register fails with %d\n", __func__, rc);
+		pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
 		goto out_pnet;
 	}
 
+	rc = proto_register(&smc_proto6, 1);
+	if (rc) {
+		pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
+		goto out_proto;
+	}
+
 	rc = sock_register(&smc_sock_family_ops);
 	if (rc) {
 		pr_err("%s: sock_register fails with %d\n", __func__, rc);
-		goto out_proto;
+		goto out_proto6;
 	}
 	INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
+	INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
 
 	rc = smc_ib_register_client();
 	if (rc) {
@@ -1451,6 +1480,8 @@ static int __init smc_init(void)
 
 out_sock:
 	sock_unregister(PF_SMC);
+out_proto6:
+	proto_unregister(&smc_proto6);
 out_proto:
 	proto_unregister(&smc_proto);
 out_pnet:
@@ -1475,6 +1506,7 @@ static void __exit smc_exit(void)
 	static_branch_disable(&tcp_have_smc);
 	smc_ib_unregister_client();
 	sock_unregister(PF_SMC);
+	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
 	smc_pnet_exit();
 }
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 268cdf11533c..e4829a2f46ba 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,11 +18,13 @@
 
 #include "smc_ib.h"
 
-#define SMCPROTO_SMC		0	/* SMC protocol */
+#define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */
+#define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */
 
 #define SMC_MAX_PORTS		2	/* Max # of ports */
 
 extern struct proto smc_proto;
+extern struct proto smc_proto6;
 
 #ifdef ATOMIC64_INIT
 #define KERNEL_HAS_ATOMIC64

From 5eb297a9a5aa5745b67ef1e7a399c0a7678d1d3f Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 16 Mar 2018 10:21:31 -0700
Subject: [PATCH 0960/1678] liquidio: Simplified napi poll

1) Moved interrupt enable related code from octeon_process_droq_poll_cmd()
   to separate function octeon_enable_irq().
2) Removed wrapper function octeon_process_droq_poll_cmd(), and directlyi
   using octeon_droq_process_poll_pkts().
3) Removed unused macros POLL_EVENT_XXX.

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_core.c   |  7 +-
 .../ethernet/cavium/liquidio/octeon_droq.c    | 83 +++++++------------
 .../ethernet/cavium/liquidio/octeon_droq.h    | 11 +--
 3 files changed, 35 insertions(+), 66 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index e7b6eb87ab14..666cf7e9cd09 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -627,9 +627,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 	iq_no = droq->q_no;
 
 	/* Handle Droq descriptors */
-	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-						 POLL_EVENT_PROCESS_PKTS,
-						 budget);
+	work_done = octeon_droq_process_poll_pkts(oct, droq, budget);
 
 	/* Flush the instruction queue */
 	iq = oct->instr_queue[iq_no];
@@ -660,8 +658,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 		tx_done = 1;
 		napi_complete_done(napi, work_done);
 
-		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
-					     POLL_EVENT_ENABLE_INTR, 0);
+		octeon_enable_irq(droq->oct_dev, droq->q_no);
 		return 0;
 	}
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 3461d65ff4eb..f044718cea52 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -788,7 +788,7 @@ octeon_droq_process_packets(struct octeon_device *oct,
  * called before calling this routine.
  */
 
-static int
+int
 octeon_droq_process_poll_pkts(struct octeon_device *oct,
 			      struct octeon_droq *droq, u32 budget)
 {
@@ -835,71 +835,46 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
 	return total_pkts_processed;
 }
 
+/* Enable Pkt Interrupt */
 int
-octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
-			     u32 arg)
+octeon_enable_irq(struct octeon_device *oct, u32 q_no)
 {
-	struct octeon_droq *droq;
-
-	droq = oct->droq[q_no];
-
-	if (cmd == POLL_EVENT_PROCESS_PKTS)
-		return octeon_droq_process_poll_pkts(oct, droq, arg);
-
-	if (cmd == POLL_EVENT_PENDING_PKTS) {
-		u32 pkt_cnt = atomic_read(&droq->pkts_pending);
-
-		return  octeon_droq_process_packets(oct, droq, pkt_cnt);
-	}
-
-	if (cmd == POLL_EVENT_ENABLE_INTR) {
-		u32 value;
+	switch (oct->chip_id) {
+	case OCTEON_CN66XX:
+	case OCTEON_CN68XX: {
+		struct octeon_cn6xxx *cn6xxx =
+			(struct octeon_cn6xxx *)oct->chip;
 		unsigned long flags;
+		u32 value;
 
-		/* Enable Pkt Interrupt */
-		switch (oct->chip_id) {
-		case OCTEON_CN66XX:
-		case OCTEON_CN68XX: {
-			struct octeon_cn6xxx *cn6xxx =
-				(struct octeon_cn6xxx *)oct->chip;
-			spin_lock_irqsave
-				(&cn6xxx->lock_for_droq_int_enb_reg, flags);
-			value =
-				octeon_read_csr(oct,
-						CN6XXX_SLI_PKT_TIME_INT_ENB);
-			value |= (1 << q_no);
-			octeon_write_csr(oct,
-					 CN6XXX_SLI_PKT_TIME_INT_ENB,
-					 value);
-			value =
-				octeon_read_csr(oct,
-						CN6XXX_SLI_PKT_CNT_INT_ENB);
-			value |= (1 << q_no);
-			octeon_write_csr(oct,
-					 CN6XXX_SLI_PKT_CNT_INT_ENB,
-					 value);
+		spin_lock_irqsave
+			(&cn6xxx->lock_for_droq_int_enb_reg, flags);
+		value = octeon_read_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB);
+		value |= (1 << q_no);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT_ENB, value);
+		value = octeon_read_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB);
+		value |= (1 << q_no);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, value);
 
-			/* don't bother flushing the enables */
+		/* don't bother flushing the enables */
 
-			spin_unlock_irqrestore
-				(&cn6xxx->lock_for_droq_int_enb_reg, flags);
-			return 0;
-		}
+		spin_unlock_irqrestore
+			(&cn6xxx->lock_for_droq_int_enb_reg, flags);
+	}
 		break;
-		case OCTEON_CN23XX_PF_VID: {
-			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
-		}
+	case OCTEON_CN23XX_PF_VID:
+		lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		break;
 
-		case OCTEON_CN23XX_VF_VID:
-			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+	case OCTEON_CN23XX_VF_VID:
+		lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		break;
-		}
-		return 0;
+	default:
+		dev_err(&oct->pci_dev->dev, "%s Unknown Chip\n", __func__);
+		return 1;
 	}
 
-	dev_err(&oct->pci_dev->dev, "%s Unknown command: %d\n", __func__, cmd);
-	return -EINVAL;
+	return 0;
 }
 
 int octeon_register_droq_ops(struct octeon_device *oct, u32 q_no,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 815a9f56fd59..f28f262d4ab6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -123,11 +123,6 @@ struct oct_droq_stats {
 
 };
 
-#define POLL_EVENT_INTR_ARRIVED  1
-#define POLL_EVENT_PROCESS_PKTS  2
-#define POLL_EVENT_PENDING_PKTS  3
-#define POLL_EVENT_ENABLE_INTR   4
-
 /* The maximum number of buffers that can be dispatched from the
  * output/dma queue. Set to 64 assuming 1K buffers in DROQ and the fact that
  * max packet size from DROQ is 64K.
@@ -414,8 +409,10 @@ int octeon_droq_process_packets(struct octeon_device *oct,
 				struct octeon_droq *droq,
 				u32 budget);
 
-int octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no,
-				 int cmd, u32 arg);
+int octeon_droq_process_poll_pkts(struct octeon_device *oct,
+				  struct octeon_droq *droq, u32 budget);
+
+int octeon_enable_irq(struct octeon_device *oct, u32 q_no);
 
 void octeon_droq_check_oom(struct octeon_droq *droq);
 

From 5dce837944de76b5ba9cff6d2a3ecde6f4838665 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 16 Mar 2018 10:41:14 -0700
Subject: [PATCH 0961/1678] selftests/txtimestamp: Add more configurable
 parameters

Add a way to configure if poll() should wait forever for an event, the
number of packets that should be sent for each and if there should be
any delay between packets.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/timestamping/txtimestamp.c     | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
index 5df07047ca86..81a98a240456 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -68,9 +68,11 @@ static int cfg_num_pkts = 4;
 static int do_ipv4 = 1;
 static int do_ipv6 = 1;
 static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
 static bool cfg_loop_nodata;
+static bool cfg_no_delay;
 static uint16_t dest_port = 9000;
 
 static struct sockaddr_in daddr;
@@ -171,7 +173,7 @@ static void __poll(int fd)
 
 	memset(&pollfd, 0, sizeof(pollfd));
 	pollfd.fd = fd;
-	ret = poll(&pollfd, 1, 100);
+	ret = poll(&pollfd, 1, cfg_poll_timeout);
 	if (ret != 1)
 		error(1, errno, "poll");
 }
@@ -371,7 +373,8 @@ static void do_test(int family, unsigned int opt)
 			error(1, errno, "send");
 
 		/* wait for all errors to be queued, else ACKs arrive OOO */
-		usleep(50 * 1000);
+		if (!cfg_no_delay)
+			usleep(50 * 1000);
 
 		__poll(fd);
 
@@ -392,6 +395,9 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -4:   only IPv4\n"
 			"  -6:   only IPv6\n"
 			"  -h:   show this message\n"
+			"  -c N: number of packets for each test\n"
+			"  -D:   no delay between packets\n"
+			"  -F:   poll() waits forever for an event\n"
 			"  -I:   request PKTINFO\n"
 			"  -l N: send N bytes at a time\n"
 			"  -n:   set no-payload option\n"
@@ -409,7 +415,7 @@ static void parse_opt(int argc, char **argv)
 	int proto_count = 0;
 	char c;
 
-	while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+	while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
 		switch (c) {
 		case '4':
 			do_ipv6 = 0;
@@ -417,6 +423,15 @@ static void parse_opt(int argc, char **argv)
 		case '6':
 			do_ipv4 = 0;
 			break;
+		case 'c':
+			cfg_num_pkts = strtoul(optarg, NULL, 10);
+			break;
+		case 'D':
+			cfg_no_delay = true;
+			break;
+		case 'F':
+			cfg_poll_timeout = -1;
+			break;
 		case 'I':
 			cfg_do_pktinfo = true;
 			break;

From 7156d194a0772f733865267e7207e0b08f81b02b Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Fri, 16 Mar 2018 10:51:07 -0700
Subject: [PATCH 0962/1678] tcp: add snd_ssthresh stat in
 SCM_TIMESTAMPING_OPT_STATS

This patch adds TCP_NLA_SND_SSTHRESH stat into SCM_TIMESTAMPING_OPT_STATS
that reports tcp_sock.snd_ssthresh.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 1 +
 net/ipv4/tcp.c           | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 4c0ae0faf7ca..560374c978f9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -243,6 +243,7 @@ enum {
 	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
 	TCP_NLA_SNDQ_SIZE,	/* Data (bytes) pending in send queue */
 	TCP_NLA_CA_STATE,	/* ca_state of socket */
+	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
 
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fb350f740f69..e553f84bde83 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3031,7 +3031,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	u32 rate;
 
 	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
-			  4 * nla_total_size(sizeof(u32)) +
+			  5 * nla_total_size(sizeof(u32)) +
 			  3 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
@@ -3061,6 +3061,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
 	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
+	nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
 
 	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
 	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);

From 53794570049d314742f156c99022914840a3d5ab Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Fri, 16 Mar 2018 10:51:49 -0700
Subject: [PATCH 0963/1678] net-tcp_bbr: set tp->snd_ssthresh to BDP upon
 STARTUP exit

Set tp->snd_ssthresh to BDP upon STARTUP exit. This allows us
to check if a BBR flow exited STARTUP and the BDP at the
time of STARTUP exit with SCM_TIMESTAMPING_OPT_STATS. Since BBR does not
use snd_ssthresh this fix has no impact on BBR's behavior.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index c92014cb1e16..158d105e76da 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -731,6 +731,8 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
 		bbr->mode = BBR_DRAIN;	/* drain queue we created */
 		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
 		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
+		tcp_sk(sk)->snd_ssthresh =
+				bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
 	}	/* fall through to check if in-flight is already small: */
 	if (bbr->mode == BBR_DRAIN &&
 	    tcp_packets_in_flight(tcp_sk(sk)) <=
@@ -834,6 +836,7 @@ static void bbr_init(struct sock *sk)
 	struct bbr *bbr = inet_csk_ca(sk);
 
 	bbr->prior_cwnd = 0;
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	bbr->rtt_cnt = 0;
 	bbr->next_rtt_delivered = 0;
 	bbr->prev_ca_state = TCP_CA_Open;
@@ -886,7 +889,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
 static u32 bbr_ssthresh(struct sock *sk)
 {
 	bbr_save_cwnd(sk);
-	return TCP_INFINITE_SSTHRESH;	 /* BBR does not use ssthresh */
+	return tcp_sk(sk)->snd_ssthresh;
 }
 
 static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,

From 489b30b53f0540b9f8e391cbb2839cea48b5d1c1 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 15 Mar 2018 12:10:57 +0300
Subject: [PATCH 0964/1678] net: Convert l2tp_net_ops

Init method is rather simple. Exit method queues del_work
for every tunnel from per-net list. This seems to be safe
to be marked async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 83421c6f0bef..189a12a5e4ac 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1787,6 +1787,7 @@ static struct pernet_operations l2tp_net_ops = {
 	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
+	.async = true,
 };
 
 static int __init l2tp_init(void)

From 8cec2f49dc413d6328067d22862b0bdd0f4305ec Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 15 Mar 2018 12:11:06 +0300
Subject: [PATCH 0965/1678] net: Convert mpls_net_ops

These pernet_operations register and unregister sysctl table.
Exit methods frees platform_labels from net::mpls::platform_label.
Everything is per-net, and they looks safe to be marked async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7a4de6d618b1..d4a89a8be013 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2488,6 +2488,7 @@ static void mpls_net_exit(struct net *net)
 static struct pernet_operations mpls_net_ops = {
 	.init = mpls_net_init,
 	.exit = mpls_net_exit,
+	.async = true,
 };
 
 static struct rtnl_af_ops mpls_af_ops __read_mostly = {

From ec716650a750334cd763024597159eea3569e207 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 15 Mar 2018 12:11:16 +0300
Subject: [PATCH 0966/1678] net: Convert ovs_net_ops

These pernet_operations initialize and destroy net_generic()
data pointed by ovs_net_id. Exit method destroys vports from
alive net to exiting net. Since they are only pernet_operations
interested in this data, and exit method is executed under
exclusive global lock (ovs_mutex), they are safe to be executed
in parallel.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ef38e5aecd28..100191df0371 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2384,6 +2384,7 @@ static struct pernet_operations ovs_net_ops = {
 	.exit = ovs_exit_net,
 	.id   = &ovs_net_id,
 	.size = sizeof(struct ovs_net),
+	.async = true,
 };
 
 static int __init dp_init(void)

From 554855ccdf37262def7fed557a5efec39f2c0f47 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 15 Mar 2018 12:11:25 +0300
Subject: [PATCH 0967/1678] net: Convert ipvs_core_ops

These pernet_operations register and unregister nf hooks,
/proc entries, sysctl, percpu statistics. There are several
global lists, and the only list modified without exclusive
locks is ip_vs_conn_tab in ip_vs_conn_flush(). We iterate
the list and force the timers expire at the moment. Since
there were possible several timer expirations before this
patch, and since they are safe, the patch does not invent
new parallelism of their destruction. These pernet_operations
look safe to be converted.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5f6f73cf2174..c5d16e2bc8e2 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2289,6 +2289,7 @@ static struct pernet_operations ipvs_core_ops = {
 	.exit = __ip_vs_cleanup,
 	.id   = &ip_vs_net_id,
 	.size = sizeof(struct netns_ipvs),
+	.async = true,
 };
 
 static struct pernet_operations ipvs_core_dev_ops = {

From d0edfbb4ba4a88399c169cf2c26b252d39f503bc Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 15 Mar 2018 12:11:35 +0300
Subject: [PATCH 0968/1678] net: Convert ipvs_core_dev_ops

Exit method stops two per-net threads and cancels
delayed work. Everything looks nicely per-net divided.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index c5d16e2bc8e2..6a6cb9db030b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2294,6 +2294,7 @@ static struct pernet_operations ipvs_core_ops = {
 
 static struct pernet_operations ipvs_core_dev_ops = {
 	.exit = __ip_vs_dev_cleanup,
+	.async = true,
 };
 
 /*

From 6c77e79557acd9b3b896a8075a19ef11ed887a99 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 15 Mar 2018 12:11:44 +0300
Subject: [PATCH 0969/1678] net: Convert ip_vs_ftp_ops

These pernet_operations register and unregister ipvs app.
register_ip_vs_app(), unregister_ip_vs_app() and
register_ip_vs_app_inc() modify per-net structures,
and there are no global structures touched. So,
this looks safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 58d5d05aec24..8b25aab41928 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -479,6 +479,7 @@ static void __ip_vs_ftp_exit(struct net *net)
 static struct pernet_operations ip_vs_ftp_ops = {
 	.init = __ip_vs_ftp_init,
 	.exit = __ip_vs_ftp_exit,
+	.async = true,
 };
 
 static int __init ip_vs_ftp_init(void)

From 928df1880e24bcd47d6359ff86df24db3dfba3c3 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Mar 2018 16:48:51 +0100
Subject: [PATCH 0970/1678] tipc: obsolete TIPC_ZONE_SCOPE

Publications for TIPC_CLUSTER_SCOPE and TIPC_ZONE_SCOPE are in all
aspects handled the same way, both on the publishing node and on the
receiving nodes.

Despite previous ambitions to the contrary, this is never going to change,
so we take the conseqeunce of this and obsolete TIPC_ZONE_SCOPE and related
macros/functions. Whenever a user is doing a bind() or a sendmsg() attempt
using ZONE_SCOPE we translate this internally to CLUSTER_SCOPE, while we
remain compatible with users and remote nodes still using ZONE_SCOPE.

Furthermore, the non-formalized scope value 0 has always been permitted
for use during lookup, with the same meaning as ZONE_SCOPE/CLUSTER_SCOPE.
We now permit it even as binding scope, but for compatibility reasons we
choose to not change the value of TIPC_CLUSTER_SCOPE.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 102 ++++++++++++++++++++------------------
 net/tipc/addr.c           |  31 ------------
 net/tipc/addr.h           |  10 ++++
 net/tipc/msg.c            |   2 +-
 net/tipc/name_table.c     |   3 +-
 net/tipc/net.c            |   2 +-
 net/tipc/socket.c         |  15 ++++--
 7 files changed, 77 insertions(+), 88 deletions(-)

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 14bacc7e6cef..4ac9f1f02b06 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -61,50 +61,6 @@ struct tipc_name_seq {
 	__u32 upper;
 };
 
-/* TIPC Address Size, Offset, Mask specification for Z.C.N
- */
-#define TIPC_NODE_BITS          12
-#define TIPC_CLUSTER_BITS       12
-#define TIPC_ZONE_BITS          8
-
-#define TIPC_NODE_OFFSET        0
-#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
-#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
-
-#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
-#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
-#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
-
-#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
-#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
-#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
-
-#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
-
-static inline __u32 tipc_addr(unsigned int zone,
-			      unsigned int cluster,
-			      unsigned int node)
-{
-	return (zone << TIPC_ZONE_OFFSET) |
-		(cluster << TIPC_CLUSTER_OFFSET) |
-		node;
-}
-
-static inline unsigned int tipc_zone(__u32 addr)
-{
-	return addr >> TIPC_ZONE_OFFSET;
-}
-
-static inline unsigned int tipc_cluster(__u32 addr)
-{
-	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
-}
-
-static inline unsigned int tipc_node(__u32 addr)
-{
-	return addr & TIPC_NODE_MASK;
-}
-
 /*
  * Application-accessible port name types
  */
@@ -117,9 +73,10 @@ static inline unsigned int tipc_node(__u32 addr)
 /*
  * Publication scopes when binding port names and port name sequences
  */
-#define TIPC_ZONE_SCOPE         1
-#define TIPC_CLUSTER_SCOPE      2
-#define TIPC_NODE_SCOPE         3
+enum tipc_scope {
+	TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
+	TIPC_NODE_SCOPE    = 3
+};
 
 /*
  * Limiting values for messages
@@ -243,7 +200,7 @@ struct sockaddr_tipc {
 struct tipc_group_req {
 	__u32 type;      /* group id */
 	__u32 instance;  /* member id */
-	__u32 scope;     /* zone/cluster/node */
+	__u32 scope;     /* cluster/node */
 	__u32 flags;
 };
 
@@ -268,4 +225,53 @@ struct tipc_sioc_ln_req {
 	__u32 bearer_id;
 	char linkname[TIPC_MAX_LINK_NAME];
 };
+
+
+/* The macros and functions below are deprecated:
+ */
+
+#define TIPC_ZONE_SCOPE         1
+
+#define TIPC_NODE_BITS          12
+#define TIPC_CLUSTER_BITS       12
+#define TIPC_ZONE_BITS          8
+
+#define TIPC_NODE_OFFSET        0
+#define TIPC_CLUSTER_OFFSET     TIPC_NODE_BITS
+#define TIPC_ZONE_OFFSET        (TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS)
+
+#define TIPC_NODE_SIZE          ((1UL << TIPC_NODE_BITS) - 1)
+#define TIPC_CLUSTER_SIZE       ((1UL << TIPC_CLUSTER_BITS) - 1)
+#define TIPC_ZONE_SIZE          ((1UL << TIPC_ZONE_BITS) - 1)
+
+#define TIPC_NODE_MASK		(TIPC_NODE_SIZE << TIPC_NODE_OFFSET)
+#define TIPC_CLUSTER_MASK	(TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET)
+#define TIPC_ZONE_MASK		(TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET)
+
+#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
+
+static inline __u32 tipc_addr(unsigned int zone,
+			      unsigned int cluster,
+			      unsigned int node)
+{
+	return (zone << TIPC_ZONE_OFFSET) |
+		(cluster << TIPC_CLUSTER_OFFSET) |
+		node;
+}
+
+static inline unsigned int tipc_zone(__u32 addr)
+{
+	return addr >> TIPC_ZONE_OFFSET;
+}
+
+static inline unsigned int tipc_cluster(__u32 addr)
+{
+	return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET;
+}
+
+static inline unsigned int tipc_node(__u32 addr)
+{
+	return addr & TIPC_NODE_MASK;
+}
+
 #endif
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 48fd3b5a73fb..97cd857d7f43 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -63,23 +63,6 @@ int in_own_node(struct net *net, u32 addr)
 	return (addr == tn->own_addr) || !addr;
 }
 
-/**
- * addr_domain - convert 2-bit scope value to equivalent message lookup domain
- *
- * Needed when address of a named message must be looked up a second time
- * after a network hop.
- */
-u32 addr_domain(struct net *net, u32 sc)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	if (likely(sc == TIPC_NODE_SCOPE))
-		return tn->own_addr;
-	if (sc == TIPC_CLUSTER_SCOPE)
-		return tipc_cluster_mask(tn->own_addr);
-	return tipc_zone_mask(tn->own_addr);
-}
-
 /**
  * tipc_addr_domain_valid - validates a network domain address
  *
@@ -124,20 +107,6 @@ int tipc_in_scope(u32 domain, u32 addr)
 	return 0;
 }
 
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-int tipc_addr_scope(u32 domain)
-{
-	if (likely(!domain))
-		return TIPC_ZONE_SCOPE;
-	if (tipc_node(domain))
-		return TIPC_NODE_SCOPE;
-	if (tipc_cluster(domain))
-		return TIPC_CLUSTER_SCOPE;
-	return TIPC_ZONE_SCOPE;
-}
-
 char *tipc_addr_string_fill(char *string, u32 addr)
 {
 	snprintf(string, 16, "<%u.%u.%u>",
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index bebb347803ce..2ecf5a5d40dd 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -60,6 +60,16 @@ static inline u32 tipc_cluster_mask(u32 addr)
 	return addr & TIPC_ZONE_CLUSTER_MASK;
 }
 
+static inline int tipc_node2scope(u32 node)
+{
+	return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE;
+}
+
+static inline int tipc_scope2node(struct net *net, int sc)
+{
+	return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
+}
+
 u32 tipc_own_addr(struct net *net);
 int in_own_cluster(struct net *net, u32 addr);
 int in_own_cluster_exact(struct net *net, u32 addr);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4e1c6f6450bb..b6c45dccba3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -580,7 +580,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	msg = buf_msg(skb);
 	if (msg_reroute_cnt(msg))
 		return false;
-	dnode = addr_domain(net, msg_lookup_scope(msg));
+	dnode = tipc_scope2node(net, msg_lookup_scope(msg));
 	dport = tipc_nametbl_translate(net, msg_nametype(msg),
 				       msg_nameinst(msg), &dnode);
 	if (!dport)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index e01c9c691ba2..6772390fcb00 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -473,8 +473,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 	struct name_seq *seq = nametbl_find_seq(net, type);
 	int index = hash(type);
 
-	if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
-	    (lower > upper)) {
+	if (scope > TIPC_NODE_SCOPE || lower > upper) {
 		pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
 			 type, lower, upper, scope);
 		return NULL;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 1a2fde0d6f61..5c4c4405b78e 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -118,7 +118,7 @@ int tipc_net_start(struct net *net, u32 addr)
 	tipc_sk_reinit(net);
 
 	tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
-			     TIPC_ZONE_SCOPE, 0, tn->own_addr);
+			     TIPC_CLUSTER_SCOPE, 0, tn->own_addr);
 
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, network identity %u\n",
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 8b04e601311c..910d3827f499 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -644,7 +644,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
 		goto exit;
 	}
 
-	res = (addr->scope > 0) ?
+	res = (addr->scope >= 0) ?
 		tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
 		tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
 exit:
@@ -1280,8 +1280,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct tipc_name_seq *seq;
 	struct sk_buff_head pkts;
-	u32 type, inst, domain;
 	u32 dnode, dport;
+	u32 type, inst;
 	int mtu, rc;
 
 	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1332,13 +1332,12 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	if (dest->addrtype == TIPC_ADDR_NAME) {
 		type = dest->addr.name.name.type;
 		inst = dest->addr.name.name.instance;
-		domain = dest->addr.name.domain;
-		dnode = domain;
+		dnode = dest->addr.name.domain;
 		msg_set_type(hdr, TIPC_NAMED_MSG);
 		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
 		msg_set_nametype(hdr, type);
 		msg_set_nameinst(hdr, inst);
-		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
+		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
 		dport = tipc_nametbl_translate(net, type, inst, &dnode);
 		msg_set_destnode(hdr, dnode);
 		msg_set_destport(hdr, dport);
@@ -2592,6 +2591,9 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	struct publication *publ;
 	u32 key;
 
+	if (scope != TIPC_NODE_SCOPE)
+		scope = TIPC_CLUSTER_SCOPE;
+
 	if (tipc_sk_connected(sk))
 		return -EINVAL;
 	key = tsk->portid + tsk->pub_count + 1;
@@ -2617,6 +2619,9 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 	struct publication *safe;
 	int rc = -EINVAL;
 
+	if (scope != TIPC_NODE_SCOPE)
+		scope = TIPC_CLUSTER_SCOPE;
+
 	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
 		if (seq) {
 			if (publ->scope != scope)

From 64a52b26d5633d6efc35cdf1e0c627cc4189e55a Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Mar 2018 16:48:52 +0100
Subject: [PATCH 0971/1678] tipc: remove zone publication list in name table

As a consequence of the previous commit we nan now eliminate zone scope
related lists in the name table. We start with name_table::publ_list[3],
which can now be replaced with two lists, one for node scope publications
and one for cluster scope publications.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h       |  5 +++++
 net/tipc/name_distr.c | 39 ++++++++++++++++++---------------------
 net/tipc/name_table.c |  5 ++---
 net/tipc/name_table.h |  6 ++++--
 4 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/net/tipc/core.h b/net/tipc/core.h
index ff8b071654f5..347f850dc872 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -131,6 +131,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
 	return &tipc_net(net)->node_list;
 }
 
+static inline struct name_table *tipc_name_table(struct net *net)
+{
+	return tipc_net(net)->nametbl;
+}
+
 static inline struct tipc_topsrv *tipc_topsrv(struct net *net)
 {
 	return tipc_net(net)->topsrv;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 23f8899e0f8c..11ce20505550 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -86,25 +86,25 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
  */
 struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf;
+	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
+	struct sk_buff *skb;
 
-	list_add_tail_rcu(&publ->local_list,
-			  &tn->nametbl->publ_list[publ->scope]);
-
-	if (publ->scope == TIPC_NODE_SCOPE)
+	if (publ->scope == TIPC_NODE_SCOPE) {
+		list_add_tail_rcu(&publ->local_list, &nt->node_scope);
 		return NULL;
+	}
+	list_add_tail_rcu(&publ->local_list, &nt->cluster_scope);
 
-	buf = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
-	if (!buf) {
+	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
+	if (!skb) {
 		pr_warn("Publication distribution failure\n");
 		return NULL;
 	}
 
-	item = (struct distr_item *)msg_data(buf_msg(buf));
+	item = (struct distr_item *)msg_data(buf_msg(skb));
 	publ_to_item(item, publ);
-	return buf;
+	return skb;
 }
 
 /**
@@ -184,16 +184,13 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
  */
 void tipc_named_node_up(struct net *net, u32 dnode)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
 	struct sk_buff_head head;
 
 	__skb_queue_head_init(&head);
 
 	rcu_read_lock();
-	named_distribute(net, &head, dnode,
-			 &tn->nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	named_distribute(net, &head, dnode,
-			 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
+	named_distribute(net, &head, dnode, &nt->cluster_scope);
 	rcu_read_unlock();
 
 	tipc_node_xmit(net, &head, dnode, 0);
@@ -382,16 +379,16 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
  */
 void tipc_named_reinit(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_net *tn = tipc_net(net);
 	struct publication *publ;
-	int scope;
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-		list_for_each_entry_rcu(publ, &tn->nametbl->publ_list[scope],
-					local_list)
-			publ->node = tn->own_addr;
+	list_for_each_entry_rcu(publ, &nt->node_scope, local_list)
+		publ->node = tn->own_addr;
+	list_for_each_entry_rcu(publ, &nt->cluster_scope, local_list)
+		publ->node = tn->own_addr;
 
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 6772390fcb00..1a3a327018c5 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -878,9 +878,8 @@ int tipc_nametbl_init(struct net *net)
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
 		INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
 
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
-	INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]);
+	INIT_LIST_HEAD(&tipc_nametbl->node_scope);
+	INIT_LIST_HEAD(&tipc_nametbl->cluster_scope);
 	tn->nametbl = tipc_nametbl;
 	spin_lock_init(&tn->nametbl_lock);
 	return 0;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 17652602d5e2..47f72cddfb39 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -88,12 +88,14 @@ struct publication {
 /**
  * struct name_table - table containing all existing port name publications
  * @seq_hlist: name sequence hash lists
- * @publ_list: pulication lists
+ * @node_scope: all local publications with node scope
+ * @cluster_scope: all local publications with cluster scope
  * @local_publ_count: number of publications issued by this node
  */
 struct name_table {
 	struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
-	struct list_head publ_list[TIPC_PUBL_SCOPE_NUM];
+	struct list_head node_scope;
+	struct list_head cluster_scope;
 	u32 local_publ_count;
 };
 

From ba765ec63786583e210b55073a908a9d7ea284fa Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Mar 2018 16:48:53 +0100
Subject: [PATCH 0972/1678] tipc: remove zone_list member in struct publication

As a further consequence of the previous commits, we can also remove
the member 'zone_list 'in struct name_info and struct publication.
Instead, we now let the member cluster_list take over the role a
container of all publications of a given <type,lower, upper>.
We also remove the counters for the size of those lists, since
they don't serve any purpose.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 101 ++++++++++++------------------------------
 net/tipc/name_table.h |   5 +--
 2 files changed, 30 insertions(+), 76 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1a3a327018c5..6d7b4c7ff4b7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -50,24 +50,12 @@
 
 /**
  * struct name_info - name sequence publication info
- * @node_list: circular list of publications made by own node
- * @cluster_list: circular list of publications made by own cluster
- * @zone_list: circular list of publications made by own zone
- * @node_list_size: number of entries in "node_list"
- * @cluster_list_size: number of entries in "cluster_list"
- * @zone_list_size: number of entries in "zone_list"
- *
- * Note: The zone list always contains at least one entry, since all
- *       publications of the associated name sequence belong to it.
- *       (The cluster and node lists may be empty.)
+ * @node_list: list of publications on own node of this <type,lower,upper>
+ * @cluster_list: list of all publications of this <type,lower,upper>
  */
 struct name_info {
 	struct list_head node_list;
 	struct list_head cluster_list;
-	struct list_head zone_list;
-	u32 node_list_size;
-	u32 cluster_list_size;
-	u32 zone_list_size;
 };
 
 /**
@@ -249,7 +237,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 		info = sseq->info;
 
 		/* Check if an identical publication already exists */
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->cluster_list, cluster_list) {
 			if ((publ->ref == port) && (publ->key == key) &&
 			    (!publ->node || (publ->node == node)))
 				return NULL;
@@ -292,7 +280,6 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 
 		INIT_LIST_HEAD(&info->node_list);
 		INIT_LIST_HEAD(&info->cluster_list);
-		INIT_LIST_HEAD(&info->zone_list);
 
 		/* Insert new sub-sequence */
 		sseq = &nseq->sseqs[inspos];
@@ -311,18 +298,10 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 	if (!publ)
 		return NULL;
 
-	list_add(&publ->zone_list, &info->zone_list);
-	info->zone_list_size++;
+	list_add(&publ->cluster_list, &info->cluster_list);
 
-	if (in_own_cluster(net, node)) {
-		list_add(&publ->cluster_list, &info->cluster_list);
-		info->cluster_list_size++;
-	}
-
-	if (in_own_node(net, node)) {
+	if (in_own_node(net, node))
 		list_add(&publ->node_list, &info->node_list);
-		info->node_list_size++;
-	}
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
@@ -363,7 +342,7 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
 	info = sseq->info;
 
 	/* Locate publication, if it exists */
-	list_for_each_entry(publ, &info->zone_list, zone_list) {
+	list_for_each_entry(publ, &info->cluster_list, cluster_list) {
 		if ((publ->key == key) && (publ->ref == ref) &&
 		    (!publ->node || (publ->node == node)))
 			goto found;
@@ -371,24 +350,12 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
 	return NULL;
 
 found:
-	/* Remove publication from zone scope list */
-	list_del(&publ->zone_list);
-	info->zone_list_size--;
-
-	/* Remove publication from cluster scope list, if present */
-	if (in_own_cluster(net, node)) {
-		list_del(&publ->cluster_list);
-		info->cluster_list_size--;
-	}
-
-	/* Remove publication from node scope list, if present */
-	if (in_own_node(net, node)) {
+	list_del(&publ->cluster_list);
+	if (in_own_node(net, node))
 		list_del(&publ->node_list);
-		info->node_list_size--;
-	}
 
 	/* Contract subseq list if no more publications for that subseq */
-	if (list_empty(&info->zone_list)) {
+	if (list_empty(&info->cluster_list)) {
 		kfree(info);
 		free = &nseq->sseqs[nseq->first_free--];
 		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
@@ -435,7 +402,8 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 			struct name_info *info = sseq->info;
 			int must_report = 1;
 
-			list_for_each_entry(crs, &info->zone_list, zone_list) {
+			list_for_each_entry(crs, &info->cluster_list,
+					    cluster_list) {
 				tipc_sub_report_overlap(sub, sseq->lower,
 							sseq->upper,
 							TIPC_PUBLISHED,
@@ -559,18 +527,12 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 						node_list);
 			list_move_tail(&publ->node_list,
 				       &info->node_list);
-		} else if (!list_empty(&info->cluster_list)) {
+		} else {
 			publ = list_first_entry(&info->cluster_list,
 						struct publication,
 						cluster_list);
 			list_move_tail(&publ->cluster_list,
 				       &info->cluster_list);
-		} else {
-			publ = list_first_entry(&info->zone_list,
-						struct publication,
-						zone_list);
-			list_move_tail(&publ->zone_list,
-				       &info->zone_list);
 		}
 	}
 
@@ -581,16 +543,10 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 		publ = list_first_entry(&info->node_list, struct publication,
 					node_list);
 		list_move_tail(&publ->node_list, &info->node_list);
-	} else if (in_own_cluster_exact(net, *destnode)) {
-		if (list_empty(&info->cluster_list))
-			goto no_match;
+	} else {
 		publ = list_first_entry(&info->cluster_list, struct publication,
 					cluster_list);
 		list_move_tail(&publ->cluster_list, &info->cluster_list);
-	} else {
-		publ = list_first_entry(&info->zone_list, struct publication,
-					zone_list);
-		list_move_tail(&publ->zone_list, &info->zone_list);
 	}
 
 	ref = publ->ref;
@@ -622,7 +578,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 	sseq = nameseq_find_subseq(seq, instance);
 	if (likely(sseq)) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->cluster_list, cluster_list) {
 			if (publ->scope != scope)
 				continue;
 			if (publ->ref == exclude && publ->node == self)
@@ -631,7 +587,8 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 			(*dstcnt)++;
 			if (all)
 				continue;
-			list_move_tail(&publ->zone_list, &info->zone_list);
+			list_move_tail(&publ->cluster_list,
+				       &info->cluster_list);
 			break;
 		}
 	}
@@ -641,15 +598,14 @@ exit:
 	return !list_empty(dsts);
 }
 
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			   u32 scope, bool exact, struct list_head *dports)
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+			    u32 scope, bool exact, struct list_head *dports)
 {
 	struct sub_seq *sseq_stop;
 	struct name_info *info;
 	struct publication *p;
 	struct name_seq *seq;
 	struct sub_seq *sseq;
-	int res = 0;
 
 	rcu_read_lock();
 	seq = nametbl_find_seq(net, type);
@@ -667,14 +623,10 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
 			if (p->scope == scope || (!exact && p->scope < scope))
 				tipc_dest_push(dports, 0, p->ref);
 		}
-
-		if (info->cluster_list_size != info->node_list_size)
-			res = 1;
 	}
 	spin_unlock_bh(&seq->lock);
 exit:
 	rcu_read_unlock();
-	return res;
 }
 
 /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
@@ -699,7 +651,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop && sseq->lower <= upper; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->zone_list, zone_list) {
+		list_for_each_entry(publ, &info->cluster_list, cluster_list) {
 			tipc_nlist_add(nodes, publ->node);
 		}
 	}
@@ -728,7 +680,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(p, &info->zone_list, zone_list) {
+		list_for_each_entry(p, &info->cluster_list, cluster_list) {
 			if (p->scope != scope)
 				continue;
 			tipc_group_add_member(grp, p->node, p->ref, p->lower);
@@ -899,7 +851,8 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
 	spin_lock_bh(&seq->lock);
 	sseq = seq->sseqs;
 	info = sseq->info;
-	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+	list_for_each_entry_safe(publ, safe, &info->cluster_list,
+				 cluster_list) {
 		tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
 					 publ->ref, publ->key);
 		kfree_rcu(publ, rcu);
@@ -948,17 +901,19 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &sseq->info->zone_list, zone_list)
+		list_for_each_entry(p, &sseq->info->cluster_list,
+				    cluster_list)
 			if (p->key == *last_publ)
 				break;
 		if (p->key != *last_publ)
 			return -EPIPE;
 	} else {
-		p = list_first_entry(&sseq->info->zone_list, struct publication,
-				     zone_list);
+		p = list_first_entry(&sseq->info->cluster_list,
+				     struct publication,
+				     cluster_list);
 	}
 
-	list_for_each_entry_from(p, &sseq->info->zone_list, zone_list) {
+	list_for_each_entry_from(p, &sseq->info->cluster_list, cluster_list) {
 		*last_publ = p->key;
 
 		hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 47f72cddfb39..a9063e25ee74 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -81,7 +81,6 @@ struct publication {
 	struct list_head pport_list;
 	struct list_head node_list;
 	struct list_head cluster_list;
-	struct list_head zone_list;
 	struct rcu_head rcu;
 };
 
@@ -102,8 +101,8 @@ struct name_table {
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			   u32 scope, bool exact, struct list_head *dports);
+void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
+			    u32 scope, bool exact, struct list_head *dports);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 			      u32 type, u32 domain);
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,

From 935439cc48ef24f0e50396be3684a0f27e609363 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Mar 2018 16:48:54 +0100
Subject: [PATCH 0973/1678] tipc: merge two lists in struct publication

The size of struct publication can be reduced further. Membership in
lists 'nodesub_list' and 'local_list' is mutually exlusive, in that
remote publications use the former and local publications the latter.
We replace the two lists with one single, named 'binding_node' which
reflects what it really is.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 20 ++++++++++----------
 net/tipc/name_table.h |  5 ++---
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 11ce20505550..4c54fb37875a 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -91,10 +91,10 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 	struct sk_buff *skb;
 
 	if (publ->scope == TIPC_NODE_SCOPE) {
-		list_add_tail_rcu(&publ->local_list, &nt->node_scope);
+		list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
 		return NULL;
 	}
-	list_add_tail_rcu(&publ->local_list, &nt->cluster_scope);
+	list_add_tail_rcu(&publ->binding_node, &nt->cluster_scope);
 
 	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
 	if (!skb) {
@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_del(&publ->local_list);
+	list_del(&publ->binding_node);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -147,7 +147,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
 			ITEM_SIZE) * ITEM_SIZE;
 	u32 msg_rem = msg_dsz;
 
-	list_for_each_entry(publ, pls, local_list) {
+	list_for_each_entry(publ, pls, binding_node) {
 		/* Prepare next buffer: */
 		if (!skb) {
 			skb = named_prepare_buf(net, PUBLICATION, msg_rem,
@@ -211,7 +211,7 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
 				     publ->node, publ->ref, publ->key);
 	if (p)
-		tipc_node_unsubscribe(net, &p->nodesub_list, addr);
+		tipc_node_unsubscribe(net, &p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (p != publ) {
@@ -246,7 +246,7 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
 {
 	struct publication *publ, *tmp;
 
-	list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list)
+	list_for_each_entry_safe(publ, tmp, nsub_list, binding_node)
 		tipc_publ_purge(net, publ, addr);
 	tipc_dist_queue_purge(net, addr);
 }
@@ -270,7 +270,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 						TIPC_CLUSTER_SCOPE, node,
 						ntohl(i->ref), ntohl(i->key));
 		if (publ) {
-			tipc_node_subscribe(net, &publ->nodesub_list, node);
+			tipc_node_subscribe(net, &publ->binding_node, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
@@ -279,7 +279,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 						node, ntohl(i->ref),
 						ntohl(i->key));
 		if (publ) {
-			tipc_node_unsubscribe(net, &publ->nodesub_list, node);
+			tipc_node_unsubscribe(net, &publ->binding_node, node);
 			kfree_rcu(publ, rcu);
 			return true;
 		}
@@ -385,9 +385,9 @@ void tipc_named_reinit(struct net *net)
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	list_for_each_entry_rcu(publ, &nt->node_scope, local_list)
+	list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
 		publ->node = tn->own_addr;
-	list_for_each_entry_rcu(publ, &nt->cluster_scope, local_list)
+	list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
 		publ->node = tn->own_addr;
 
 	spin_unlock_bh(&tn->nametbl_lock);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index a9063e25ee74..cb16bd883565 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.h: Include file for TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -76,8 +76,7 @@ struct publication {
 	u32 node;
 	u32 ref;
 	u32 key;
-	struct list_head nodesub_list;
-	struct list_head local_list;
+	struct list_head binding_node;
 	struct list_head pport_list;
 	struct list_head node_list;
 	struct list_head cluster_list;

From e50e73e10757ac86fcb1aaa986055049e060727a Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 15 Mar 2018 16:48:55 +0100
Subject: [PATCH 0974/1678] tipc: some name changes

We rename some lists and fields in struct publication both to make
the naming more consistent and to better reflect their roles. We
also update the descriptions of those lists.

node_list -> local_publ
cluster_list -> all_publ
pport_list -> binding_sock
ref -> port

There are no functional changes in this commit.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c |  12 ++--
 net/tipc/name_distr.h |   2 +-
 net/tipc/name_table.c | 143 +++++++++++++++++++++---------------------
 net/tipc/name_table.h |  38 ++++++-----
 net/tipc/socket.c     |  14 ++---
 5 files changed, 106 insertions(+), 103 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 4c54fb37875a..28d095a7d8bb 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -56,7 +56,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 	i->type = htonl(p->type);
 	i->lower = htonl(p->lower);
 	i->upper = htonl(p->upper);
-	i->ref = htonl(p->ref);
+	i->port = htonl(p->port);
 	i->key = htonl(p->key);
 }
 
@@ -209,15 +209,15 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 
 	spin_lock_bh(&tn->nametbl_lock);
 	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
-				     publ->node, publ->ref, publ->key);
+				     publ->node, publ->port, publ->key);
 	if (p)
 		tipc_node_unsubscribe(net, &p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (p != publ) {
 		pr_err("Unable to remove publication from failed node\n"
-		       " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
-		       publ->type, publ->lower, publ->node, publ->ref,
+		       " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+		       publ->type, publ->lower, publ->node, publ->port,
 		       publ->key);
 	}
 
@@ -268,7 +268,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 						ntohl(i->lower),
 						ntohl(i->upper),
 						TIPC_CLUSTER_SCOPE, node,
-						ntohl(i->ref), ntohl(i->key));
+						ntohl(i->port), ntohl(i->key));
 		if (publ) {
 			tipc_node_subscribe(net, &publ->binding_node, node);
 			return true;
@@ -276,7 +276,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 	} else if (dtype == WITHDRAWAL) {
 		publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
 						ntohl(i->lower),
-						node, ntohl(i->ref),
+						node, ntohl(i->port),
 						ntohl(i->key));
 		if (publ) {
 			tipc_node_unsubscribe(net, &publ->binding_node, node);
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 1264ba0af937..4753e628d7c4 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -63,7 +63,7 @@ struct distr_item {
 	__be32 type;
 	__be32 lower;
 	__be32 upper;
-	__be32 ref;
+	__be32 port;
 	__be32 key;
 };
 
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 6d7b4c7ff4b7..bbbfc0702634 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/name_table.c: TIPC name table code
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
  * All rights reserved.
  *
@@ -51,11 +51,11 @@
 /**
  * struct name_info - name sequence publication info
  * @node_list: list of publications on own node of this <type,lower,upper>
- * @cluster_list: list of all publications of this <type,lower,upper>
+ * @all_publ: list of all publications of this <type,lower,upper>
  */
 struct name_info {
-	struct list_head node_list;
-	struct list_head cluster_list;
+	struct list_head local_publ;
+	struct list_head all_publ;
 };
 
 /**
@@ -102,7 +102,7 @@ static int hash(int x)
  * publ_create - create a publication structure
  */
 static struct publication *publ_create(u32 type, u32 lower, u32 upper,
-				       u32 scope, u32 node, u32 port_ref,
+				       u32 scope, u32 node, u32 port,
 				       u32 key)
 {
 	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
@@ -116,9 +116,9 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 	publ->upper = upper;
 	publ->scope = scope;
 	publ->node = node;
-	publ->ref = port_ref;
+	publ->port = port;
 	publ->key = key;
-	INIT_LIST_HEAD(&publ->pport_list);
+	INIT_LIST_HEAD(&publ->binding_sock);
 	return publ;
 }
 
@@ -237,9 +237,9 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 		info = sseq->info;
 
 		/* Check if an identical publication already exists */
-		list_for_each_entry(publ, &info->cluster_list, cluster_list) {
-			if ((publ->ref == port) && (publ->key == key) &&
-			    (!publ->node || (publ->node == node)))
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
+			if (publ->port == port && publ->key == key &&
+			    (!publ->node || publ->node == node))
 				return NULL;
 		}
 	} else {
@@ -278,8 +278,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 			return NULL;
 		}
 
-		INIT_LIST_HEAD(&info->node_list);
-		INIT_LIST_HEAD(&info->cluster_list);
+		INIT_LIST_HEAD(&info->local_publ);
+		INIT_LIST_HEAD(&info->all_publ);
 
 		/* Insert new sub-sequence */
 		sseq = &nseq->sseqs[inspos];
@@ -298,15 +298,15 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 	if (!publ)
 		return NULL;
 
-	list_add(&publ->cluster_list, &info->cluster_list);
+	list_add(&publ->all_publ, &info->all_publ);
 
 	if (in_own_node(net, node))
-		list_add(&publ->node_list, &info->node_list);
+		list_add(&publ->local_publ, &info->local_publ);
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
 		tipc_sub_report_overlap(s, publ->lower, publ->upper,
-					TIPC_PUBLISHED, publ->ref,
+					TIPC_PUBLISHED, publ->port,
 					publ->node, publ->scope,
 					created_subseq);
 	}
@@ -327,7 +327,7 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net,
 static struct publication *tipc_nameseq_remove_publ(struct net *net,
 						    struct name_seq *nseq,
 						    u32 inst, u32 node,
-						    u32 ref, u32 key)
+						    u32 port, u32 key)
 {
 	struct publication *publ;
 	struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
@@ -342,20 +342,20 @@ static struct publication *tipc_nameseq_remove_publ(struct net *net,
 	info = sseq->info;
 
 	/* Locate publication, if it exists */
-	list_for_each_entry(publ, &info->cluster_list, cluster_list) {
-		if ((publ->key == key) && (publ->ref == ref) &&
-		    (!publ->node || (publ->node == node)))
+	list_for_each_entry(publ, &info->all_publ, all_publ) {
+		if (publ->key == key && publ->port == port &&
+		    (!publ->node || publ->node == node))
 			goto found;
 	}
 	return NULL;
 
 found:
-	list_del(&publ->cluster_list);
+	list_del(&publ->all_publ);
 	if (in_own_node(net, node))
-		list_del(&publ->node_list);
+		list_del(&publ->local_publ);
 
 	/* Contract subseq list if no more publications for that subseq */
-	if (list_empty(&info->cluster_list)) {
+	if (list_empty(&info->all_publ)) {
 		kfree(info);
 		free = &nseq->sseqs[nseq->first_free--];
 		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
@@ -365,8 +365,9 @@ found:
 	/* Notify any waiting subscriptions */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
 		tipc_sub_report_overlap(s, publ->lower, publ->upper,
-					TIPC_WITHDRAWN, publ->ref, publ->node,
-					publ->scope, removed_subseq);
+					TIPC_WITHDRAWN, publ->port,
+					publ->node, publ->scope,
+					removed_subseq);
 	}
 
 	return publ;
@@ -402,12 +403,12 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 			struct name_info *info = sseq->info;
 			int must_report = 1;
 
-			list_for_each_entry(crs, &info->cluster_list,
-					    cluster_list) {
+			list_for_each_entry(crs, &info->all_publ, all_publ) {
 				tipc_sub_report_overlap(sub, sseq->lower,
 							sseq->upper,
 							TIPC_PUBLISHED,
-							crs->ref, crs->node,
+							crs->port,
+							crs->node,
 							crs->scope,
 							must_report);
 				must_report = 0;
@@ -460,7 +461,7 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 node, u32 ref,
+					     u32 lower, u32 node, u32 port,
 					     u32 key)
 {
 	struct publication *publ;
@@ -470,7 +471,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 		return NULL;
 
 	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_remove_publ(net, seq, lower, node, ref, key);
+	publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key);
 	if (!seq->first_free && list_empty(&seq->subscriptions)) {
 		hlist_del_init_rcu(&seq->ns_list);
 		kfree(seq->sseqs);
@@ -503,7 +504,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	struct name_info *info;
 	struct publication *publ;
 	struct name_seq *seq;
-	u32 ref = 0;
+	u32 port = 0;
 	u32 node = 0;
 
 	if (!tipc_in_scope(*destnode, tn->own_addr))
@@ -521,42 +522,42 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 
 	/* Closest-First Algorithm */
 	if (likely(!*destnode)) {
-		if (!list_empty(&info->node_list)) {
-			publ = list_first_entry(&info->node_list,
+		if (!list_empty(&info->local_publ)) {
+			publ = list_first_entry(&info->local_publ,
 						struct publication,
-						node_list);
-			list_move_tail(&publ->node_list,
-				       &info->node_list);
+						local_publ);
+			list_move_tail(&publ->local_publ,
+				       &info->local_publ);
 		} else {
-			publ = list_first_entry(&info->cluster_list,
+			publ = list_first_entry(&info->all_publ,
 						struct publication,
-						cluster_list);
-			list_move_tail(&publ->cluster_list,
-				       &info->cluster_list);
+						all_publ);
+			list_move_tail(&publ->all_publ,
+				       &info->all_publ);
 		}
 	}
 
 	/* Round-Robin Algorithm */
 	else if (*destnode == tn->own_addr) {
-		if (list_empty(&info->node_list))
+		if (list_empty(&info->local_publ))
 			goto no_match;
-		publ = list_first_entry(&info->node_list, struct publication,
-					node_list);
-		list_move_tail(&publ->node_list, &info->node_list);
+		publ = list_first_entry(&info->local_publ, struct publication,
+					local_publ);
+		list_move_tail(&publ->local_publ, &info->local_publ);
 	} else {
-		publ = list_first_entry(&info->cluster_list, struct publication,
-					cluster_list);
-		list_move_tail(&publ->cluster_list, &info->cluster_list);
+		publ = list_first_entry(&info->all_publ, struct publication,
+					all_publ);
+		list_move_tail(&publ->all_publ, &info->all_publ);
 	}
 
-	ref = publ->ref;
+	port = publ->port;
 	node = publ->node;
 no_match:
 	spin_unlock_bh(&seq->lock);
 not_found:
 	rcu_read_unlock();
 	*destnode = node;
-	return ref;
+	return port;
 }
 
 bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
@@ -578,17 +579,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 	sseq = nameseq_find_subseq(seq, instance);
 	if (likely(sseq)) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->cluster_list, cluster_list) {
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
 			if (publ->scope != scope)
 				continue;
-			if (publ->ref == exclude && publ->node == self)
+			if (publ->port == exclude && publ->node == self)
 				continue;
-			tipc_dest_push(dsts, publ->node, publ->ref);
+			tipc_dest_push(dsts, publ->node, publ->port);
 			(*dstcnt)++;
 			if (all)
 				continue;
-			list_move_tail(&publ->cluster_list,
-				       &info->cluster_list);
+			list_move_tail(&publ->all_publ, &info->all_publ);
 			break;
 		}
 	}
@@ -619,9 +619,9 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
 		if (sseq->lower > upper)
 			break;
 		info = sseq->info;
-		list_for_each_entry(p, &info->node_list, node_list) {
+		list_for_each_entry(p, &info->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
-				tipc_dest_push(dports, 0, p->ref);
+				tipc_dest_push(dports, 0, p->port);
 		}
 	}
 	spin_unlock_bh(&seq->lock);
@@ -651,7 +651,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop && sseq->lower <= upper; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(publ, &info->cluster_list, cluster_list) {
+		list_for_each_entry(publ, &info->all_publ, all_publ) {
 			tipc_nlist_add(nodes, publ->node);
 		}
 	}
@@ -680,10 +680,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	stop = seq->sseqs + seq->first_free;
 	for (; sseq != stop; sseq++) {
 		info = sseq->info;
-		list_for_each_entry(p, &info->cluster_list, cluster_list) {
+		list_for_each_entry(p, &info->all_publ, all_publ) {
 			if (p->scope != scope)
 				continue;
-			tipc_group_add_member(grp, p->node, p->ref, p->lower);
+			tipc_group_add_member(grp, p->node, p->port, p->lower);
 		}
 	}
 	spin_unlock_bh(&seq->lock);
@@ -728,7 +728,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 /**
  * tipc_nametbl_withdraw - withdraw name publication from network name tables
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
 			  u32 key)
 {
 	struct publication *publ;
@@ -737,18 +737,18 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
 
 	spin_lock_bh(&tn->nametbl_lock);
 	publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
-					ref, key);
+					port, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count--;
 		skb = tipc_named_withdraw(net, publ);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
-		list_del_init(&publ->pport_list);
+		list_del_init(&publ->binding_sock);
 		kfree_rcu(publ, rcu);
 	} else {
 		pr_err("Unable to remove local publication\n"
-		       "(type=%u, lower=%u, ref=%u, key=%u)\n",
-		       type, lower, ref, key);
+		       "(type=%u, lower=%u, port=%u, key=%u)\n",
+		       type, lower, port, key);
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 
@@ -851,10 +851,9 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq)
 	spin_lock_bh(&seq->lock);
 	sseq = seq->sseqs;
 	info = sseq->info;
-	list_for_each_entry_safe(publ, safe, &info->cluster_list,
-				 cluster_list) {
+	list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) {
 		tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
-					 publ->ref, publ->key);
+					 publ->port, publ->key);
 		kfree_rcu(publ, rcu);
 	}
 	hlist_del_init_rcu(&seq->ns_list);
@@ -901,19 +900,17 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &sseq->info->cluster_list,
-				    cluster_list)
+		list_for_each_entry(p, &sseq->info->all_publ, all_publ)
 			if (p->key == *last_publ)
 				break;
 		if (p->key != *last_publ)
 			return -EPIPE;
 	} else {
-		p = list_first_entry(&sseq->info->cluster_list,
-				     struct publication,
-				     cluster_list);
+		p = list_first_entry(&sseq->info->all_publ, struct publication,
+				     all_publ);
 	}
 
-	list_for_each_entry_from(p, &sseq->info->cluster_list, cluster_list) {
+	list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) {
 		*last_publ = p->key;
 
 		hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
@@ -940,7 +937,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->ref))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
 			goto publ_msg_full;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index cb16bd883565..34a4ccb907aa 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -54,19 +54,22 @@ struct tipc_group;
  * @type: name sequence type
  * @lower: name sequence lower bound
  * @upper: name sequence upper bound
- * @scope: scope of publication
- * @node: network address of publishing port's node
- * @ref: publishing port
- * @key: publication key
- * @nodesub_list: subscription to "node down" event (off-node publication only)
- * @local_list: adjacent entries in list of publications made by this node
- * @pport_list: adjacent entries in list of publications made by this port
- * @node_list: adjacent matching name seq publications with >= node scope
- * @cluster_list: adjacent matching name seq publications with >= cluster scope
- * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
+ * @node: network address of publishing socket's node
+ * @port: publishing port
+ * @key: publication key, unique across the cluster
+ * @binding_node: all publications from the same node which bound this one
+ * - Remote publications: in node->publ_list
+ *   Used by node/name distr to withdraw publications when node is lost
+ * - Local/node scope publications: in name_table->node_scope list
+ * - Local/cluster scope publications: in name_table->cluster_scope list
+ * @binding_sock: all publications from the same socket which bound this one
+ *   Used by socket to withdraw publications when socket is unbound/released
+ * @local_publ: list of identical publications made from this node
+ *   Used by closest_first and multicast receive lookup algorithms
+ * @all_publ: all publications identical to this one, whatever node and scope
+ *   Used by round-robin lookup algorithm
  * @rcu: RCU callback head used for deferred freeing
- *
- * Note that the node list, cluster list, and zone list are circular lists.
  */
 struct publication {
 	u32 type;
@@ -74,12 +77,12 @@ struct publication {
 	u32 upper;
 	u32 scope;
 	u32 node;
-	u32 ref;
+	u32 port;
 	u32 key;
 	struct list_head binding_node;
-	struct list_head pport_list;
-	struct list_head node_list;
-	struct list_head cluster_list;
+	struct list_head binding_sock;
+	struct list_head local_publ;
+	struct list_head all_publ;
 	struct rcu_head rcu;
 };
 
@@ -87,7 +90,10 @@ struct publication {
  * struct name_table - table containing all existing port name publications
  * @seq_hlist: name sequence hash lists
  * @node_scope: all local publications with node scope
+ *               - used by name_distr during re-init of name table
  * @cluster_scope: all local publications with cluster scope
+ *               - used by name_distr to send bulk updates to new nodes
+ *               - used by name_distr during re-init of name table
  * @local_publ_count: number of publications issued by this node
  */
 struct name_table {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 910d3827f499..a4a9148d4629 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2605,7 +2605,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 	if (unlikely(!publ))
 		return -EINVAL;
 
-	list_add(&publ->pport_list, &tsk->publications);
+	list_add(&publ->binding_sock, &tsk->publications);
 	tsk->pub_count++;
 	tsk->published = 1;
 	return 0;
@@ -2622,7 +2622,7 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 	if (scope != TIPC_NODE_SCOPE)
 		scope = TIPC_CLUSTER_SCOPE;
 
-	list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
 		if (seq) {
 			if (publ->scope != scope)
 				continue;
@@ -2633,12 +2633,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			if (publ->upper != seq->upper)
 				break;
 			tipc_nametbl_withdraw(net, publ->type, publ->lower,
-					      publ->ref, publ->key);
+					      publ->port, publ->key);
 			rc = 0;
 			break;
 		}
 		tipc_nametbl_withdraw(net, publ->type, publ->lower,
-				      publ->ref, publ->key);
+				      publ->port, publ->key);
 		rc = 0;
 	}
 	if (list_empty(&tsk->publications))
@@ -3292,7 +3292,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 	struct publication *p;
 
 	if (*last_publ) {
-		list_for_each_entry(p, &tsk->publications, pport_list) {
+		list_for_each_entry(p, &tsk->publications, binding_sock) {
 			if (p->key == *last_publ)
 				break;
 		}
@@ -3309,10 +3309,10 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
 		}
 	} else {
 		p = list_first_entry(&tsk->publications, struct publication,
-				     pport_list);
+				     binding_sock);
 	}
 
-	list_for_each_entry_from(p, &tsk->publications, pport_list) {
+	list_for_each_entry_from(p, &tsk->publications, binding_sock) {
 		err = __tipc_nl_add_sk_publ(skb, cb, p);
 		if (err) {
 			*last_publ = p->key;

From 53d0e83f9329aa51dcc205b514dbee05cb4df309 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 15 Mar 2018 03:54:26 -0700
Subject: [PATCH 0975/1678] rds: tcp: must use spin_lock_irq* and not
 spin_lock_bh with rds_tcp_conn_lock

rds_tcp_connection allocation/free management has the potential to be
called from __rds_conn_create after IRQs have been disabled, so
spin_[un]lock_bh cannot be used with rds_tcp_conn_lock.

Bottom-halves that need to synchronize for critical sections protected
by rds_tcp_conn_lock should instead use rds_destroy_pending() correctly.

Reported-by: syzbot+c68e51bb5e699d3f8d91@syzkaller.appspotmail.com
Fixes: ebeeb1ad9b8a ("rds: tcp: use rds_destroy_pending() to synchronize
       netns/module teardown and rds connection/workq management")
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index eb04e7fa2467..08ea9cd5c2f6 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -272,13 +272,14 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 static void rds_tcp_conn_free(void *arg)
 {
 	struct rds_tcp_connection *tc = arg;
+	unsigned long flags;
 
 	rdsdebug("freeing tc %p\n", tc);
 
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irqsave(&rds_tcp_conn_lock, flags);
 	if (!tc->t_tcp_node_detached)
 		list_del(&tc->t_tcp_node);
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
 
 	kmem_cache_free(rds_tcp_conn_slab, tc);
 }
@@ -308,13 +309,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 		rdsdebug("rds_conn_path [%d] tc %p\n", i,
 			 conn->c_path[i].cp_transport_data);
 	}
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
 		tc = conn->c_path[i].cp_transport_data;
 		tc->t_tcp_node_detached = false;
 		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 fail:
 	if (ret) {
 		for (j = 0; j < i; j++)
@@ -527,7 +528,7 @@ static void rds_tcp_kill_sock(struct net *net)
 
 	rtn->rds_tcp_listen_sock = NULL;
 	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -540,7 +541,7 @@ static void rds_tcp_kill_sock(struct net *net)
 			tc->t_tcp_node_detached = true;
 		}
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
 		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
@@ -588,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
 
-	spin_lock_bh(&rds_tcp_conn_lock);
+	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
 
@@ -598,7 +599,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 		/* reconnect with new parameters */
 		rds_conn_path_drop(tc->t_cpath, false);
 	}
-	spin_unlock_bh(&rds_tcp_conn_lock);
+	spin_unlock_irq(&rds_tcp_conn_lock);
 }
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,

From d7cb44496a9bb458632cb3c18acb08949c210448 Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Thu, 15 Mar 2018 17:34:14 +0530
Subject: [PATCH 0976/1678] cxgb4: Fix queue free path of ULD drivers

Setting sge_uld_rxq_info to NULL in free_queues_uld().
We are referencing sge_uld_rxq_info in cxgb_up(). This
will fix a panic when interface is brought up after a
ULDq creation failure.

Fixes: 94cdb8bb993a (cxgb4: Add support for dynamic allocation
       of resources for ULD)
Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudhar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 6b5fea4532f3..2d827140a475 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -342,6 +342,7 @@ static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
+	adap->sge.uld_rxq_info[uld_type] = NULL;
 	kfree(rxq_info->rspq_id);
 	kfree(rxq_info->uldrxq);
 	kfree(rxq_info);

From a3a41d2f9487469364aeafae0f63d39c2e7179fd Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 15 Mar 2018 15:15:50 -0500
Subject: [PATCH 0977/1678] net: ethernet: ti: cpsw: enable vlan rx vlan
 offload

In VLAN_AWARE mode CPSW can insert VLAN header encapsulation word on Host
port 0 egress (RX) before the packet data if RX_VLAN_ENCAP bit is set in
CPSW_CONTROL register. VLAN header encapsulation word has following format:

 HDR_PKT_Priority bits 29-31 - Header Packet VLAN prio (Highest prio: 7)
 HDR_PKT_CFI 	  bits 28 - Header Packet VLAN CFI bit.
 HDR_PKT_Vid 	  bits 27-16 - Header Packet VLAN ID
 PKT_Type         bits 8-9 - Packet Type. Indicates whether the packet is
                 	VLAN-tagged, priority-tagged, or non-tagged.
	00: VLAN-tagged packet
	01: Reserved
	10: Priority-tagged packet
	11: Non-tagged packet

This feature can be used to implement TX VLAN offload in case of
VLAN-tagged packets and to insert VLAN tag in case Non-tagged packet was
received on port with PVID set. As per documentation, CPSW never modifies
packet data on Host egress (RX) and as result, without this feature
enabled, Host port will not be able to receive properly packets which
entered switch non-tagged through external Port with PVID set (when
non-tagged packet forwarded from external Port with PVID set to another
external Port - packet will be VLAN tagged properly).

Implementation details:
- on RX driver will check CPDMA status bit RX_VLAN_ENCAP BIT(19) in CPPI
descriptor to identify when VLAN header encapsulation word is present.
- PKT_Type = 0x01 or 0x02 then ignore VLAN header encapsulation word and
pass packet as is;
- if HDR_PKT_Vid = 0 then ignore VLAN header encapsulation word and pass
packet as is;
- In dual mac mode traffic is separated between ports using default port
vlans, which are not be visible to Host and so should not be reported.
Hence, check for default port vlans in dual mac mode and ignore VLAN header
encapsulation word;
- otherwise fill SKB with VLAN info using __vlan_hwaccel_put_tag();
- PKT_Type = 0x00 (VLAN-tagged) then strip out VLAN header from SKB.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c          | 67 +++++++++++++++++++++++--
 drivers/net/ethernet/ti/davinci_cpdma.c |  2 +-
 drivers/net/ethernet/ti/davinci_cpdma.h |  2 +
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 1b1b78fdc138..8af8891078e2 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -120,14 +120,18 @@ do {								\
 #define CPDMA_RXCP		0x60
 
 #define CPSW_POLL_WEIGHT	64
+#define CPSW_RX_VLAN_ENCAP_HDR_SIZE		4
 #define CPSW_MIN_PACKET_SIZE	(VLAN_ETH_ZLEN)
-#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+#define CPSW_MAX_PACKET_SIZE	(VLAN_ETH_FRAME_LEN +\
+				 ETH_FCS_LEN +\
+				 CPSW_RX_VLAN_ENCAP_HDR_SIZE)
 
 #define RX_PRIORITY_MAPPING	0x76543210
 #define TX_PRIORITY_MAPPING	0x33221100
 #define CPDMA_TX_PRIORITY_MAP	0x01234567
 
 #define CPSW_VLAN_AWARE		BIT(1)
+#define CPSW_RX_VLAN_ENCAP	BIT(2)
 #define CPSW_ALE_VLAN_AWARE	1
 
 #define CPSW_FIFO_NORMAL_MODE		(0 << 16)
@@ -148,6 +152,18 @@ do {								\
 #define CPSW_MAX_QUEUES		8
 #define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
 
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT	29
+#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK		GENMASK(2, 0)
+#define CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT	16
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT	8
+#define CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK	GENMASK(1, 0)
+enum {
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG = 0,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG,
+	CPSW_RX_VLAN_ENCAP_HDR_PKT_UNTAG,
+};
+
 static int debug_level;
 module_param(debug_level, int, 0);
 MODULE_PARM_DESC(debug_level, "cpsw debug level (NETIF_MSG bits)");
@@ -718,6 +734,49 @@ static void cpsw_tx_handler(void *token, int len, int status)
 	dev_kfree_skb_any(skb);
 }
 
+static void cpsw_rx_vlan_encap(struct sk_buff *skb)
+{
+	struct cpsw_priv *priv = netdev_priv(skb->dev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	u32 rx_vlan_encap_hdr = *((u32 *)skb->data);
+	u16 vtag, vid, prio, pkt_type;
+
+	/* Remove VLAN header encapsulation word */
+	skb_pull(skb, CPSW_RX_VLAN_ENCAP_HDR_SIZE);
+
+	pkt_type = (rx_vlan_encap_hdr >>
+		    CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_SHIFT) &
+		    CPSW_RX_VLAN_ENCAP_HDR_PKT_TYPE_MSK;
+	/* Ignore unknown & Priority-tagged packets*/
+	if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_RESERV ||
+	    pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_PRIO_TAG)
+		return;
+
+	vid = (rx_vlan_encap_hdr >>
+	       CPSW_RX_VLAN_ENCAP_HDR_VID_SHIFT) &
+	       VLAN_VID_MASK;
+	/* Ignore vid 0 and pass packet as is */
+	if (!vid)
+		return;
+	/* Ignore default vlans in dual mac mode */
+	if (cpsw->data.dual_emac &&
+	    vid == cpsw->slaves[priv->emac_port].port_vlan)
+		return;
+
+	prio = (rx_vlan_encap_hdr >>
+		CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT) &
+		CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK;
+
+	vtag = (prio << VLAN_PRIO_SHIFT) | vid;
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+
+	/* strip vlan tag for VLAN-tagged packet */
+	if (pkt_type == CPSW_RX_VLAN_ENCAP_HDR_PKT_VLAN_TAG) {
+		memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+		skb_pull(skb, VLAN_HLEN);
+	}
+}
+
 static void cpsw_rx_handler(void *token, int len, int status)
 {
 	struct cpdma_chan	*ch;
@@ -752,6 +811,8 @@ static void cpsw_rx_handler(void *token, int len, int status)
 	if (new_skb) {
 		skb_copy_queue_mapping(new_skb, skb);
 		skb_put(skb, len);
+		if (status & CPDMA_RX_VLAN_ENCAP)
+			cpsw_rx_vlan_encap(skb);
 		cpts_rx_timestamp(cpsw->cpts, skb);
 		skb->protocol = eth_type_trans(skb, ndev);
 		netif_receive_skb(skb);
@@ -1406,7 +1467,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
 			     CPSW_ALE_VLAN_AWARE);
 	control_reg = readl(&cpsw->regs->control);
-	control_reg |= CPSW_VLAN_AWARE;
+	control_reg |= CPSW_VLAN_AWARE | CPSW_RX_VLAN_ENCAP;
 	writel(control_reg, &cpsw->regs->control);
 	fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
 		     CPSW_FIFO_NORMAL_MODE;
@@ -3122,7 +3183,7 @@ static int cpsw_probe(struct platform_device *pdev)
 			cpsw->quirk_irq = true;
 	}
 
-	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 6f9173ff9414..31ae04117f0a 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -1164,7 +1164,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
 		outlen -= CPDMA_DESC_CRC_LEN;
 
 	status	= status & (CPDMA_DESC_EOQ | CPDMA_DESC_TD_COMPLETE |
-			    CPDMA_DESC_PORT_MASK);
+			    CPDMA_DESC_PORT_MASK | CPDMA_RX_VLAN_ENCAP);
 
 	chan->head = desc_from_phys(pool, desc_read(desc, hw_next));
 	chan_write(chan, cp, desc_dma);
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index fd65ce2b83de..d399af5389b8 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -19,6 +19,8 @@
 
 #define CPDMA_RX_SOURCE_PORT(__status__)	((__status__ >> 16) & 0x7)
 
+#define CPDMA_RX_VLAN_ENCAP BIT(19)
+
 #define CPDMA_EOI_RX_THRESH	0x0
 #define CPDMA_EOI_RX		0x1
 #define CPDMA_EOI_TX		0x2

From 0e96460e620df7f9a13c352b68b06f02eec3de94 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 16 Mar 2018 15:44:27 -0700
Subject: [PATCH 0978/1678] hv_netvsc: pass netvsc_device to rndis halt

The caller has a valid pointer, pass it to rndis_filter_halt_device
and avoid any possible RCU races here.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/rndis_filter.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 8927c483c217..668680abaffb 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -944,12 +944,11 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
 	return true;
 }
 
-static void rndis_filter_halt_device(struct rndis_device *dev)
+static void rndis_filter_halt_device(struct netvsc_device *nvdev,
+				     struct rndis_device *dev)
 {
 	struct rndis_request *request;
 	struct rndis_halt_request *halt;
-	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
 	/* Attempt to do a rndis device halt */
 	request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1348,7 +1347,7 @@ void rndis_filter_device_remove(struct hv_device *dev,
 	cancel_work_sync(&net_dev->subchan_work);
 
 	/* Halt and release the rndis device */
-	rndis_filter_halt_device(rndis_dev);
+	rndis_filter_halt_device(net_dev, rndis_dev);
 
 	net_dev->extension = NULL;
 

From ec9663812f32c03e36c8c2ccc83e52dd5a7486d9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 16 Mar 2018 15:44:28 -0700
Subject: [PATCH 0979/1678] hv_netvsc: add trace points

This adds tracepoints to the driver which has proved useful in
debugging startup and shutdown race conditions.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/Makefile       |   2 +-
 drivers/net/hyperv/netvsc.c       |  26 ++++-
 drivers/net/hyperv/netvsc_trace.c |   7 ++
 drivers/net/hyperv/netvsc_trace.h | 182 ++++++++++++++++++++++++++++++
 drivers/net/hyperv/rndis_filter.c |   5 +
 5 files changed, 220 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/hyperv/netvsc_trace.c
 create mode 100644 drivers/net/hyperv/netvsc_trace.h

diff --git a/drivers/net/hyperv/Makefile b/drivers/net/hyperv/Makefile
index c8a66827100c..3f25b9c8ea59 100644
--- a/drivers/net/hyperv/Makefile
+++ b/drivers/net/hyperv/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
 
-hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o
+hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0265d703eb03..aa95e81af6e5 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -36,6 +36,7 @@
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 /*
  * Switch the data path from the synthetic interface to the VF
@@ -57,6 +58,8 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 		init_pkt->msg.v4_msg.active_dp.active_datapath =
 			NVSP_DATAPATH_SYNTHETIC;
 
+	trace_nvsp_send(ndev, init_pkt);
+
 	vmbus_sendpacket(dev->channel, init_pkt,
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_pkt,
@@ -124,6 +127,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		revoke_packet->msg.v1_msg.
 		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+		trace_nvsp_send(ndev, revoke_packet);
+
 		ret = vmbus_sendpacket(device->channel,
 				       revoke_packet,
 				       sizeof(struct nvsp_message),
@@ -164,6 +169,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		revoke_packet->msg.v1_msg.revoke_send_buf.id =
 			NETVSC_SEND_BUFFER_ID;
 
+		trace_nvsp_send(ndev, revoke_packet);
+
 		ret = vmbus_sendpacket(device->channel,
 				       revoke_packet,
 				       sizeof(struct nvsp_message),
@@ -305,6 +312,8 @@ static int netvsc_init_buf(struct hv_device *device,
 	init_packet->msg.v1_msg.
 		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the gpadl notification request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -384,6 +393,8 @@ static int netvsc_init_buf(struct hv_device *device,
 		net_device->send_buf_gpadl_handle;
 	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the gpadl notification request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -452,6 +463,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the init request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 			       sizeof(struct nvsp_message),
@@ -484,6 +497,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 	}
 
+	trace_nvsp_send(ndev, init_packet);
+
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
 				(unsigned long)init_packet,
@@ -496,6 +511,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
 			      struct netvsc_device *net_device,
 			      const struct netvsc_device_info *device_info)
 {
+	struct net_device *ndev = hv_get_drvdata(device);
 	static const u32 ver_list[] = {
 		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
@@ -536,6 +552,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
 		send_ndis_ver.ndis_minor_ver =
 				ndis_version & 0xFFFF;
 
+	trace_nvsp_send(ndev, init_packet);
+
 	/* Send the init request */
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
@@ -747,7 +765,7 @@ static inline int netvsc_send_pkt(
 	struct sk_buff *skb)
 {
 	struct nvsp_message nvmsg;
-	struct nvsp_1_message_send_rndis_packet * const rpkt =
+	struct nvsp_1_message_send_rndis_packet *rpkt =
 		&nvmsg.msg.v1_msg.send_rndis_pkt;
 	struct netvsc_channel * const nvchan =
 		&net_device->chan_table[packet->q_idx];
@@ -776,6 +794,8 @@ static inline int netvsc_send_pkt(
 	if (out_channel->rescind)
 		return -ENODEV;
 
+	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
+
 	if (packet->page_buf_cnt) {
 		if (packet->cp_partial)
 			pb += packet->rmsg_pgcnt;
@@ -1079,6 +1099,8 @@ static int netvsc_receive(struct net_device *ndev,
 			+ vmxferpage_packet->ranges[i].byte_offset;
 		u32 buflen = vmxferpage_packet->ranges[i].byte_count;
 
+		trace_rndis_recv(ndev, q_idx, data);
+
 		/* Pass it to the upper layer */
 		status = rndis_filter_receive(ndev, net_device,
 					      channel, data, buflen);
@@ -1143,6 +1165,8 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
+	trace_nvsp_recv(ndev, channel, nvmsg);
+
 	switch (desc->type) {
 	case VM_PKT_COMP:
 		netvsc_send_completion(net_device, channel, device,
diff --git a/drivers/net/hyperv/netvsc_trace.c b/drivers/net/hyperv/netvsc_trace.c
new file mode 100644
index 000000000000..bb0ce5a2bcd5
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_trace.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/netdevice.h>
+
+#include "hyperv_net.h"
+
+#define CREATE_TRACE_POINTS
+#include "netvsc_trace.h"
diff --git a/drivers/net/hyperv/netvsc_trace.h b/drivers/net/hyperv/netvsc_trace.h
new file mode 100644
index 000000000000..f7585563dea5
--- /dev/null
+++ b/drivers/net/hyperv/netvsc_trace.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_NETVSC_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _NETVSC_TRACE_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM netvsc
+#define TRACE_INCLUDE_FILE netvsc_trace
+
+TRACE_DEFINE_ENUM(RNDIS_MSG_PACKET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INDICATE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_INIT_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_HALT);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY);
+TRACE_DEFINE_ENUM(RNDIS_MSG_QUERY_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_SET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET);
+TRACE_DEFINE_ENUM(RNDIS_MSG_RESET_C);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE);
+TRACE_DEFINE_ENUM(RNDIS_MSG_KEEPALIVE_C);
+
+#define show_rndis_type(type)					\
+	__print_symbolic(type,					\
+		 { RNDIS_MSG_PACKET,	  "PACKET" },		\
+		 { RNDIS_MSG_INDICATE,	  "INDICATE", },	\
+		 { RNDIS_MSG_INIT,	  "INIT", },		\
+		 { RNDIS_MSG_INIT_C,	  "INIT_C", },		\
+		 { RNDIS_MSG_HALT,	  "HALT", },		\
+		 { RNDIS_MSG_QUERY,	  "QUERY", },		\
+		 { RNDIS_MSG_QUERY_C,	  "QUERY_C", },		\
+		 { RNDIS_MSG_SET,	  "SET", },		\
+		 { RNDIS_MSG_SET_C,	  "SET_C", },		\
+		 { RNDIS_MSG_RESET,	  "RESET", },		\
+		 { RNDIS_MSG_RESET_C,	  "RESET_C", },		\
+		 { RNDIS_MSG_KEEPALIVE,	  "KEEPALIVE", },	\
+		 { RNDIS_MSG_KEEPALIVE_C, "KEEPALIVE_C", })
+
+DECLARE_EVENT_CLASS(rndis_msg_class,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg),
+       TP_STRUCT__entry(
+	       __string( name, ndev->name  )
+	       __field(	 u16,  queue	   )
+	       __field(	 u32,  req_id	   )
+	       __field(	 u32,  msg_type	   )
+	       __field(	 u32,  msg_len	   )
+       ),
+       TP_fast_assign(
+	       __assign_str(name, ndev->name);
+	       __entry->queue	 = q;
+	       __entry->req_id	 = msg->msg.init_req.req_id;
+	       __entry->msg_type = msg->ndis_msg_type;
+	       __entry->msg_len	 = msg->msg_len;
+       ),
+       TP_printk("dev=%s q=%u req=%#x type=%s msg_len=%u",
+		 __get_str(name), __entry->queue, __entry->req_id,
+		 show_rndis_type(__entry->msg_type), __entry->msg_len)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_send,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+DEFINE_EVENT(rndis_msg_class, rndis_recv,
+       TP_PROTO(const struct net_device *ndev, u16 q,
+		const struct rndis_message *msg),
+       TP_ARGS(ndev, q, msg)
+);
+
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT);
+TRACE_DEFINE_ENUM(NVSP_MSG_TYPE_INIT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_NDIS_VER);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_RECV_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_REVOKE_SEND_BUF);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT);
+TRACE_DEFINE_ENUM(NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE);
+TRACE_DEFINE_ENUM(NVSP_MSG2_TYPE_SEND_NDIS_CONFIG);
+
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION);
+TRACE_DEFINE_ENUM(NVSP_MSG4_TYPE_SWITCH_DATA_PATH);
+
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SUBCHANNEL);
+TRACE_DEFINE_ENUM(NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE);
+
+#define show_nvsp_type(type)								\
+	__print_symbolic(type,								\
+		  { NVSP_MSG_TYPE_INIT,			   "INIT" },			\
+		  { NVSP_MSG_TYPE_INIT_COMPLETE,	   "INIT_COMPLETE" },		\
+		  { NVSP_MSG1_TYPE_SEND_NDIS_VER,	   "SEND_NDIS_VER" },		\
+		  { NVSP_MSG1_TYPE_SEND_RECV_BUF,	   "SEND_RECV_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, "SEND_RECV_BUF_COMPLETE" },	\
+		  { NVSP_MSG1_TYPE_REVOKE_RECV_BUF,	   "REVOKE_RECV_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_SEND_BUF,	   "SEND_SEND_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, "SEND_SEND_BUF_COMPLETE" },	\
+		  { NVSP_MSG1_TYPE_REVOKE_SEND_BUF,	   "REVOKE_SEND_BUF" },		\
+		  { NVSP_MSG1_TYPE_SEND_RNDIS_PKT,	   "SEND_RNDIS_PKT" },		\
+		  { NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, "SEND_RNDIS_PKT_COMPLETE" },\
+		  { NVSP_MSG2_TYPE_SEND_NDIS_CONFIG,	   "SEND_NDIS_CONFIG" },	\
+		  { NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION,	   "SEND_VF_ASSOCIATION" },	\
+		  { NVSP_MSG4_TYPE_SWITCH_DATA_PATH,	   "SWITCH_DATA_PATH" },	\
+		  { NVSP_MSG5_TYPE_SUBCHANNEL,		    "SUBCHANNEL" },		\
+		  { NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,  "SEND_INDIRECTION_TABLE" })
+
+TRACE_EVENT(nvsp_send,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct nvsp_message *msg),
+	TP_ARGS(ndev, msg),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name  )
+		__field(  u32,	msg_type    )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->msg_type = msg->hdr.msg_type;
+	),
+	TP_printk("dev=%s type=%s",
+		  __get_str(name),
+		  show_nvsp_type(__entry->msg_type))
+);
+
+TRACE_EVENT(nvsp_send_pkt,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct vmbus_channel *chan,
+		 const struct nvsp_1_message_send_rndis_packet *rpkt),
+	TP_ARGS(ndev, chan, rpkt),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name    )
+		__field(  u16,	qid	      )
+		__field(  u32,	channel_type  )
+		__field(  u32,	section_index )
+		__field(  u32,	section_size  )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->qid = chan->offermsg.offer.sub_channel_index;
+		__entry->channel_type = rpkt->channel_type;
+		__entry->section_index = rpkt->send_buf_section_index;
+		__entry->section_size = rpkt->send_buf_section_size;
+	),
+	TP_printk("dev=%s qid=%u type=%s section=%u size=%d",
+		  __get_str(name), __entry->qid,
+		  __entry->channel_type ? "CONTROL" : "DATA",
+		  __entry->section_index, __entry->section_size)
+);
+
+TRACE_EVENT(nvsp_recv,
+	TP_PROTO(const struct net_device *ndev,
+		 const struct vmbus_channel *chan,
+		 const struct nvsp_message *msg),
+	TP_ARGS(ndev, chan, msg),
+	TP_STRUCT__entry(
+		__string( name,	ndev->name  )
+		__field(  u16,	qid	    )
+		__field(  u32,	msg_type    )
+	),
+	TP_fast_assign(
+		__assign_str(name, ndev->name);
+		__entry->qid = chan->offermsg.offer.sub_channel_index;
+		__entry->msg_type = msg->hdr.msg_type;
+	),
+	TP_printk("dev=%s qid=%u type=%s",
+		  __get_str(name), __entry->qid,
+		  show_nvsp_type(__entry->msg_type))
+);
+
+#endif /* _NETVSC_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/net/hyperv
+#include <trace/define_trace.h>
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 668680abaffb..2dc00f714482 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -31,6 +31,7 @@
 #include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
+#include "netvsc_trace.h"
 
 static void rndis_set_multicast(struct work_struct *w);
 
@@ -241,6 +242,8 @@ static int rndis_filter_send_request(struct rndis_device *dev,
 			pb[0].len;
 	}
 
+	trace_rndis_send(dev->ndev, 0, &req->request_msg);
+
 	rcu_read_lock_bh();
 	ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL);
 	rcu_read_unlock_bh();
@@ -1087,6 +1090,8 @@ void rndis_set_subchannel(struct work_struct *w)
 	init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
 	init_packet->msg.v5_msg.subchn_req.num_subchannels =
 						nvdev->num_chn - 1;
+	trace_nvsp_send(ndev, init_packet);
+
 	ret = vmbus_sendpacket(hv_dev->channel, init_packet,
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_packet,

From d47d08c8ca052df3d9fde7cfff518660335b16e7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Fri, 16 Mar 2018 23:32:51 +0000
Subject: [PATCH 0980/1678] sctp: use proc_remove_subtree()

use proc_remove_subtree() for subtree removal, both on setup failure
halfway through and on teardown.  No need to make simple things
complex...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 11 +----
 net/sctp/objcnt.c       |  8 ----
 net/sctp/proc.c         | 90 ++++++++++-------------------------------
 net/sctp/protocol.c     | 59 +++------------------------
 4 files changed, 28 insertions(+), 140 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index f7ae6b0a21d0..72c5b8fc3232 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -180,14 +180,7 @@ struct sctp_transport *sctp_epaddr_lookup_transport(
 /*
  * sctp/proc.c
  */
-int sctp_snmp_proc_init(struct net *net);
-void sctp_snmp_proc_exit(struct net *net);
-int sctp_eps_proc_init(struct net *net);
-void sctp_eps_proc_exit(struct net *net);
-int sctp_assocs_proc_init(struct net *net);
-void sctp_assocs_proc_exit(struct net *net);
-int sctp_remaddr_proc_init(struct net *net);
-void sctp_remaddr_proc_exit(struct net *net);
+int __net_init sctp_proc_init(struct net *net);
 
 /*
  * sctp/offload.c
@@ -318,7 +311,6 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
 {.label= #name, .counter= &sctp_dbg_objcnt_## name}
 
 void sctp_dbg_objcnt_init(struct net *);
-void sctp_dbg_objcnt_exit(struct net *);
 
 #else
 
@@ -326,7 +318,6 @@ void sctp_dbg_objcnt_exit(struct net *);
 #define SCTP_DBG_OBJCNT_DEC(name)
 
 static inline void sctp_dbg_objcnt_init(struct net *net) { return; }
-static inline void sctp_dbg_objcnt_exit(struct net *net) { return; }
 
 #endif /* CONFIG_SCTP_DBG_OBJCOUNT */
 
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index aeea6da81441..fd2684ad94c8 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -130,11 +130,3 @@ void sctp_dbg_objcnt_init(struct net *net)
 	if (!ent)
 		pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
-
-/* Cleanup the objcount entry in the proc filesystem.  */
-void sctp_dbg_objcnt_exit(struct net *net)
-{
-	remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
-}
-
-
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 537545ebcb0e..17d0155d9de3 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -101,25 +101,6 @@ static const struct file_operations sctp_snmp_seq_fops = {
 	.release = single_release_net,
 };
 
-/* Set up the proc fs entry for 'snmp' object. */
-int __net_init sctp_snmp_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_snmp_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(struct net *net)
-{
-	remove_proc_entry("snmp", net->sctp.proc_net_sctp);
-}
-
 /* Dump local addresses of an association/endpoint. */
 static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
 {
@@ -259,25 +240,6 @@ static const struct file_operations sctp_eps_seq_fops = {
 	.release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'eps' object. */
-int __net_init sctp_eps_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_eps_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(struct net *net)
-{
-	remove_proc_entry("eps", net->sctp.proc_net_sctp);
-}
-
 struct sctp_ht_iter {
 	struct seq_net_private p;
 	struct rhashtable_iter hti;
@@ -390,25 +352,6 @@ static const struct file_operations sctp_assocs_seq_fops = {
 	.release = seq_release_net,
 };
 
-/* Set up the proc fs entry for 'assocs' object. */
-int __net_init sctp_assocs_proc_init(struct net *net)
-{
-	struct proc_dir_entry *p;
-
-	p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_assocs_seq_fops);
-	if (!p)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(struct net *net)
-{
-	remove_proc_entry("assocs", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
 	struct sctp_association *assoc;
@@ -488,12 +431,6 @@ static const struct seq_operations sctp_remaddr_ops = {
 	.show  = sctp_remaddr_seq_show,
 };
 
-/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(struct net *net)
-{
-	remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
-}
-
 static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &sctp_remaddr_ops,
@@ -507,13 +444,28 @@ static const struct file_operations sctp_remaddr_seq_fops = {
 	.release = seq_release_net,
 };
 
-int __net_init sctp_remaddr_proc_init(struct net *net)
+/* Set up the proc fs entry for the SCTP protocol. */
+int __net_init sctp_proc_init(struct net *net)
 {
-	struct proc_dir_entry *p;
-
-	p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_remaddr_seq_fops);
-	if (!p)
+	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+	if (!net->sctp.proc_net_sctp)
 		return -ENOMEM;
+	if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_snmp_seq_fops))
+		goto cleanup;
+	if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_eps_seq_fops))
+		goto cleanup;
+	if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_assocs_seq_fops))
+		goto cleanup;
+	if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_remaddr_seq_fops))
+		goto cleanup;
 	return 0;
+
+cleanup:
+	remove_proc_subtree("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+	return -ENOMEM;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 606361ee9e4a..493b817f6a2a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -80,56 +80,6 @@ long sysctl_sctp_mem[3];
 int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
-/* Set up the proc fs entry for the SCTP protocol. */
-static int __net_init sctp_proc_init(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
-	if (!net->sctp.proc_net_sctp)
-		goto out_proc_net_sctp;
-	if (sctp_snmp_proc_init(net))
-		goto out_snmp_proc_init;
-	if (sctp_eps_proc_init(net))
-		goto out_eps_proc_init;
-	if (sctp_assocs_proc_init(net))
-		goto out_assocs_proc_init;
-	if (sctp_remaddr_proc_init(net))
-		goto out_remaddr_proc_init;
-
-	return 0;
-
-out_remaddr_proc_init:
-	sctp_assocs_proc_exit(net);
-out_assocs_proc_init:
-	sctp_eps_proc_exit(net);
-out_eps_proc_init:
-	sctp_snmp_proc_exit(net);
-out_snmp_proc_init:
-	remove_proc_entry("sctp", net->proc_net);
-	net->sctp.proc_net_sctp = NULL;
-out_proc_net_sctp:
-	return -ENOMEM;
-#endif /* CONFIG_PROC_FS */
-	return 0;
-}
-
-/* Clean up the proc fs entry for the SCTP protocol.
- * Note: Do not make this __exit as it is used in the init error
- * path.
- */
-static void sctp_proc_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
-	sctp_snmp_proc_exit(net);
-	sctp_eps_proc_exit(net);
-	sctp_assocs_proc_exit(net);
-	sctp_remaddr_proc_exit(net);
-
-	remove_proc_entry("sctp", net->proc_net);
-	net->sctp.proc_net_sctp = NULL;
-#endif
-}
-
 /* Private helper to extract ipv4 address and stash them in
  * the protocol structure.
  */
@@ -1285,10 +1235,12 @@ static int __net_init sctp_defaults_init(struct net *net)
 	if (status)
 		goto err_init_mibs;
 
+#ifdef CONFIG_PROC_FS
 	/* Initialize proc fs directory.  */
 	status = sctp_proc_init(net);
 	if (status)
 		goto err_init_proc;
+#endif
 
 	sctp_dbg_objcnt_init(net);
 
@@ -1320,9 +1272,10 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 	sctp_free_addr_wq(net);
 	sctp_free_local_addr_list(net);
 
-	sctp_dbg_objcnt_exit(net);
-
-	sctp_proc_exit(net);
+#ifdef CONFIG_PROC_FS
+	remove_proc_subtree("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+#endif
 	cleanup_sctp_mibs(net);
 	sctp_sysctl_net_unregister(net);
 }

From 4bd95a51b6e32c3267b8beb097bd719380147d36 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:24 -0500
Subject: [PATCH 0981/1678] ibmvnic: Generalize TX pool structure

Remove some unused fields in the structure and include values
describing the individual buffer size and number of buffers in
a TX pool. This allows us to use these fields for TX pool buffer
accounting as opposed to using hard coded values. Include a new
pool array for TSO transmissions.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 099c89d49945..d287dc78db45 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -917,11 +917,11 @@ struct ibmvnic_tx_pool {
 	int *free_map;
 	int consumer_index;
 	int producer_index;
-	wait_queue_head_t ibmvnic_tx_comp_q;
-	struct task_struct *work_thread;
 	struct ibmvnic_long_term_buff long_term_buff;
 	struct ibmvnic_long_term_buff tso_ltb;
 	int tso_index;
+	int num_buffers;
+	int buf_size;
 };
 
 struct ibmvnic_rx_buff {
@@ -1044,6 +1044,7 @@ struct ibmvnic_adapter {
 	u64 promisc;
 
 	struct ibmvnic_tx_pool *tx_pool;
+	struct ibmvnic_tx_pool *tso_pool;
 	struct completion init_done;
 	int init_done_rc;
 

From e26dc25bc0b6f66b51d740af03962a30ee33ca7e Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:25 -0500
Subject: [PATCH 0982/1678] ibmvnic: Update and clean up reset TX pool routine

Update TX pool reset routine to accommodate new TSO pool array. Introduce
a function that resets one TX pool, and use that function to initialize
each pool in both pool arrays.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 45 +++++++++++++++++-------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9c7d19c926f9..4dc304422ece 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -557,36 +557,41 @@ static int init_rx_pools(struct net_device *netdev)
 	return 0;
 }
 
+static int reset_one_tx_pool(struct ibmvnic_adapter *adapter,
+			     struct ibmvnic_tx_pool *tx_pool)
+{
+	int rc, i;
+
+	rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+	if (rc)
+		return rc;
+
+	memset(tx_pool->tx_buff, 0,
+	       tx_pool->num_buffers *
+	       sizeof(struct ibmvnic_tx_buff));
+
+	for (i = 0; i < tx_pool->num_buffers; i++)
+		tx_pool->free_map[i] = i;
+
+	tx_pool->consumer_index = 0;
+	tx_pool->producer_index = 0;
+
+	return 0;
+}
+
 static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	int tx_scrqs;
-	int i, j, rc;
+	int i, rc;
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
-		netdev_dbg(adapter->netdev, "Re-setting tx_pool[%d]\n", i);
-
-		tx_pool = &adapter->tx_pool[i];
-
-		rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
+		rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]);
 		if (rc)
 			return rc;
-
-		rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+		rc = reset_one_tx_pool(adapter, &adapter->tx_pool[i]);
 		if (rc)
 			return rc;
-
-		memset(tx_pool->tx_buff, 0,
-		       adapter->req_tx_entries_per_subcrq *
-		       sizeof(struct ibmvnic_tx_buff));
-
-		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-			tx_pool->free_map[j] = j;
-
-		tx_pool->consumer_index = 0;
-		tx_pool->producer_index = 0;
-		tx_pool->tso_index = 0;
 	}
 
 	return 0;

From fb79421c3c37f391efbe98258c930111e53590a1 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:26 -0500
Subject: [PATCH 0983/1678] ibmvnic: Update release TX pool routine

Introduce function that frees one TX pool.  Use that to release
each pool in both the standard TX pool and TSO pool arrays.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 4dc304422ece..258d54e3a616 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -608,25 +608,30 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
 	adapter->vpd = NULL;
 }
 
+static void release_one_tx_pool(struct ibmvnic_adapter *adapter,
+				struct ibmvnic_tx_pool *tx_pool)
+{
+	kfree(tx_pool->tx_buff);
+	kfree(tx_pool->free_map);
+	free_long_term_buff(adapter, &tx_pool->long_term_buff);
+}
+
 static void release_tx_pools(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	int i;
 
 	if (!adapter->tx_pool)
 		return;
 
 	for (i = 0; i < adapter->num_active_tx_pools; i++) {
-		netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
-		tx_pool = &adapter->tx_pool[i];
-		kfree(tx_pool->tx_buff);
-		free_long_term_buff(adapter, &tx_pool->long_term_buff);
-		free_long_term_buff(adapter, &tx_pool->tso_ltb);
-		kfree(tx_pool->free_map);
+		release_one_tx_pool(adapter, &adapter->tx_pool[i]);
+		release_one_tx_pool(adapter, &adapter->tso_pool[i]);
 	}
 
 	kfree(adapter->tx_pool);
 	adapter->tx_pool = NULL;
+	kfree(adapter->tso_pool);
+	adapter->tso_pool = NULL;
 	adapter->num_active_tx_pools = 0;
 }
 

From 3205306c6b8d5a8ede1d8222c9db6009b399299a Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:27 -0500
Subject: [PATCH 0984/1678] ibmvnic: Update TX pool initialization routine

Introduce function that initializes one TX pool. Use that to
create each pool entry in both the standard TX pool and TSO
pool arrays.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 90 ++++++++++++++++--------------
 1 file changed, 48 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 258d54e3a616..2bb5d562dde1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -635,13 +635,43 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 	adapter->num_active_tx_pools = 0;
 }
 
+static int init_one_tx_pool(struct net_device *netdev,
+			    struct ibmvnic_tx_pool *tx_pool,
+			    int num_entries, int buf_size)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	tx_pool->tx_buff = kcalloc(num_entries,
+				   sizeof(struct ibmvnic_tx_buff),
+				   GFP_KERNEL);
+	if (!tx_pool->tx_buff)
+		return -1;
+
+	if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
+				 num_entries * buf_size))
+		return -1;
+
+	tx_pool->free_map = kcalloc(num_entries, sizeof(int), GFP_KERNEL);
+	if (!tx_pool->free_map)
+		return -1;
+
+	for (i = 0; i < num_entries; i++)
+		tx_pool->free_map[i] = i;
+
+	tx_pool->consumer_index = 0;
+	tx_pool->producer_index = 0;
+	tx_pool->num_buffers = num_entries;
+	tx_pool->buf_size = buf_size;
+
+	return 0;
+}
+
 static int init_tx_pools(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_tx_pool *tx_pool;
 	int tx_subcrqs;
-	int i, j;
+	int i, rc;
 
 	tx_subcrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	adapter->tx_pool = kcalloc(tx_subcrqs,
@@ -649,53 +679,29 @@ static int init_tx_pools(struct net_device *netdev)
 	if (!adapter->tx_pool)
 		return -1;
 
+	adapter->tso_pool = kcalloc(tx_subcrqs,
+				    sizeof(struct ibmvnic_tx_pool), GFP_KERNEL);
+	if (!adapter->tso_pool)
+		return -1;
+
 	adapter->num_active_tx_pools = tx_subcrqs;
 
 	for (i = 0; i < tx_subcrqs; i++) {
-		tx_pool = &adapter->tx_pool[i];
-
-		netdev_dbg(adapter->netdev,
-			   "Initializing tx_pool[%d], %lld buffs\n",
-			   i, adapter->req_tx_entries_per_subcrq);
-
-		tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
-					   sizeof(struct ibmvnic_tx_buff),
-					   GFP_KERNEL);
-		if (!tx_pool->tx_buff) {
-			dev_err(dev, "tx pool buffer allocation failed\n");
+		rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
+				      adapter->req_tx_entries_per_subcrq,
+				      adapter->req_mtu + VLAN_HLEN);
+		if (rc) {
 			release_tx_pools(adapter);
-			return -1;
+			return rc;
 		}
 
-		if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-					 adapter->req_tx_entries_per_subcrq *
-					 (adapter->req_mtu + VLAN_HLEN))) {
+		init_one_tx_pool(netdev, &adapter->tso_pool[i],
+				 IBMVNIC_TSO_BUFS,
+				 IBMVNIC_TSO_BUF_SZ);
+		if (rc) {
 			release_tx_pools(adapter);
-			return -1;
+			return rc;
 		}
-
-		/* alloc TSO ltb */
-		if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
-					 IBMVNIC_TSO_BUFS *
-					 IBMVNIC_TSO_BUF_SZ)) {
-			release_tx_pools(adapter);
-			return -1;
-		}
-
-		tx_pool->tso_index = 0;
-
-		tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
-					    sizeof(int), GFP_KERNEL);
-		if (!tx_pool->free_map) {
-			release_tx_pools(adapter);
-			return -1;
-		}
-
-		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
-			tx_pool->free_map[j] = j;
-
-		tx_pool->consumer_index = 0;
-		tx_pool->producer_index = 0;
 	}
 
 	return 0;

From 06b3e35788a4c6f0afa931b6251ecfe20ce4787b Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:28 -0500
Subject: [PATCH 0985/1678] ibmvnic: Update TX and TX completion routines

Update TX and TX completion routines to account for TX pool
restructuring. TX routine first chooses the pool depending
on whether a packet is GSO or not, then uses it accordingly.

For the completion routine to know which pool it needs to use,
set the most significant bit of the correlator index to one
if the packet uses the TSO pool. On completion, unset the bit
and use the correlator index to release the buffer pool entry.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 55 +++++++++++++++---------------
 drivers/net/ethernet/ibm/ibmvnic.h |  1 +
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2bb5d562dde1..672e9221d4a5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1414,8 +1414,11 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		ret = NETDEV_TX_OK;
 		goto out;
 	}
+	if (skb_is_gso(skb))
+		tx_pool = &adapter->tso_pool[queue_num];
+	else
+		tx_pool = &adapter->tx_pool[queue_num];
 
-	tx_pool = &adapter->tx_pool[queue_num];
 	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
 	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
@@ -1423,20 +1426,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	index = tx_pool->free_map[tx_pool->consumer_index];
 
-	if (skb_is_gso(skb)) {
-		offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
-		dst = tx_pool->tso_ltb.buff + offset;
-		memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
-		data_dma_addr = tx_pool->tso_ltb.addr + offset;
-		tx_pool->tso_index++;
-		if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
-			tx_pool->tso_index = 0;
-	} else {
-		offset = index * (adapter->req_mtu + VLAN_HLEN);
-		dst = tx_pool->long_term_buff.buff + offset;
-		memset(dst, 0, adapter->req_mtu + VLAN_HLEN);
-		data_dma_addr = tx_pool->long_term_buff.addr + offset;
-	}
+	offset = index * tx_pool->buf_size;
+	dst = tx_pool->long_term_buff.buff + offset;
+	memset(dst, 0, tx_pool->buf_size);
+	data_dma_addr = tx_pool->long_term_buff.addr + offset;
 
 	if (skb_shinfo(skb)->nr_frags) {
 		int cur, i;
@@ -1459,8 +1452,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	tx_pool->consumer_index =
-	    (tx_pool->consumer_index + 1) %
-		adapter->req_tx_entries_per_subcrq;
+	    (tx_pool->consumer_index + 1) % tx_pool->num_buffers;
 
 	tx_buff = &tx_pool->tx_buff[index];
 	tx_buff->skb = skb;
@@ -1476,11 +1468,13 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_crq.v1.n_crq_elem = 1;
 	tx_crq.v1.n_sge = 1;
 	tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
-	tx_crq.v1.correlator = cpu_to_be32(index);
+
 	if (skb_is_gso(skb))
-		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+		tx_crq.v1.correlator =
+			cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK);
 	else
-		tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+		tx_crq.v1.correlator = cpu_to_be32(index);
+	tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
 	tx_crq.v1.sge_len = cpu_to_be32(skb->len);
 	tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
 
@@ -1543,7 +1537,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 		if (tx_pool->consumer_index == 0)
 			tx_pool->consumer_index =
-				adapter->req_tx_entries_per_subcrq - 1;
+				tx_pool->num_buffers - 1;
 		else
 			tx_pool->consumer_index--;
 
@@ -2547,6 +2541,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 			       struct ibmvnic_sub_crq_queue *scrq)
 {
 	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *txbuff;
 	union sub_crq *next;
 	int index;
@@ -2566,7 +2561,14 @@ restart_loop:
 				continue;
 			}
 			index = be32_to_cpu(next->tx_comp.correlators[i]);
-			txbuff = &adapter->tx_pool[pool].tx_buff[index];
+			if (index & IBMVNIC_TSO_POOL_MASK) {
+				tx_pool = &adapter->tso_pool[pool];
+				index &= ~IBMVNIC_TSO_POOL_MASK;
+			} else {
+				tx_pool = &adapter->tx_pool[pool];
+			}
+
+			txbuff = &tx_pool->tx_buff[index];
 
 			for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
 				if (!txbuff->data_dma[j])
@@ -2589,11 +2591,10 @@ restart_loop:
 
 			num_entries += txbuff->num_entries;
 
-			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
-						     producer_index] = index;
-			adapter->tx_pool[pool].producer_index =
-			    (adapter->tx_pool[pool].producer_index + 1) %
-			    adapter->req_tx_entries_per_subcrq;
+			tx_pool->free_map[tx_pool->producer_index] = index;
+			tx_pool->producer_index =
+				(tx_pool->producer_index + 1) %
+					tx_pool->num_buffers;
 		}
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index d287dc78db45..566927e2974a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -43,6 +43,7 @@
 
 #define IBMVNIC_TSO_BUF_SZ	65536
 #define IBMVNIC_TSO_BUFS	64
+#define IBMVNIC_TSO_POOL_MASK	0x80000000
 
 #define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE)
 #define IBMVNIC_BUFFER_HLEN 500

From 86b61a5f2e39bbee254aa4d2c5445059c7656f98 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:29 -0500
Subject: [PATCH 0986/1678] ibmvnic: Improve TX buffer accounting

Improve TX pool buffer accounting to prevent the producer
index from overruning the consumer. First, set the next free
index to an invalid value if it is in use. If next buffer
to be consumed is in use, drop the packet.

Finally, if the transmit fails for some other reason, roll
back the consumer index and set the free map entry to its original
value. This should also be done if the DMA map fails.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 672e9221d4a5..af6f8193cb67 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1426,6 +1426,16 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	index = tx_pool->free_map[tx_pool->consumer_index];
 
+	if (index == IBMVNIC_INVALID_MAP) {
+		dev_kfree_skb_any(skb);
+		tx_send_failed++;
+		tx_dropped++;
+		ret = NETDEV_TX_OK;
+		goto out;
+	}
+
+	tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP;
+
 	offset = index * tx_pool->buf_size;
 	dst = tx_pool->long_term_buff.buff + offset;
 	memset(dst, 0, tx_pool->buf_size);
@@ -1522,7 +1532,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 			tx_map_failed++;
 			tx_dropped++;
 			ret = NETDEV_TX_OK;
-			goto out;
+			goto tx_err_out;
 		}
 		lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num],
 					       (u64)tx_buff->indir_dma,
@@ -1534,13 +1544,6 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 	if (lpar_rc != H_SUCCESS) {
 		dev_err(dev, "tx failed with code %ld\n", lpar_rc);
-
-		if (tx_pool->consumer_index == 0)
-			tx_pool->consumer_index =
-				tx_pool->num_buffers - 1;
-		else
-			tx_pool->consumer_index--;
-
 		dev_kfree_skb_any(skb);
 		tx_buff->skb = NULL;
 
@@ -1556,7 +1559,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
-		goto out;
+		goto tx_err_out;
 	}
 
 	if (atomic_add_return(num_entries, &tx_scrq->used)
@@ -1569,7 +1572,16 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;
 	ret = NETDEV_TX_OK;
+	goto out;
 
+tx_err_out:
+	/* roll back consumer index and map array*/
+	if (tx_pool->consumer_index == 0)
+		tx_pool->consumer_index =
+			tx_pool->num_buffers - 1;
+	else
+		tx_pool->consumer_index--;
+	tx_pool->free_map[tx_pool->consumer_index] = index;
 out:
 	netdev->stats.tx_dropped += tx_dropped;
 	netdev->stats.tx_bytes += tx_bytes;

From e9e1e97884b7b65e5ab0666e549e54035238fd4e Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:30 -0500
Subject: [PATCH 0987/1678] ibmvnic: Update TX pool cleaning routine

Update routine that cleans up any outstanding transmits that
have not received completions when the device needs to close.
Introduces a helper function that cleans one TX pool to make
code more readable.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 42 ++++++++++++++++++------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index af6f8193cb67..5632c030811b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1128,34 +1128,42 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter)
 	}
 }
 
-static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
+			      struct ibmvnic_tx_pool *tx_pool)
 {
-	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *tx_buff;
 	u64 tx_entries;
-	int tx_scrqs;
-	int i, j;
+	int i;
 
-	if (!adapter->tx_pool)
+	if (!tx_pool && !tx_pool->tx_buff)
+		return;
+
+	tx_entries = tx_pool->num_buffers;
+
+	for (i = 0; i < tx_entries; i++) {
+		tx_buff = &tx_pool->tx_buff[i];
+		if (tx_buff && tx_buff->skb) {
+			dev_kfree_skb_any(tx_buff->skb);
+			tx_buff->skb = NULL;
+		}
+	}
+}
+
+static void clean_tx_pools(struct ibmvnic_adapter *adapter)
+{
+	int tx_scrqs;
+	int i;
+
+	if (!adapter->tx_pool || !adapter->tso_pool)
 		return;
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
-	tx_entries = adapter->req_tx_entries_per_subcrq;
 
 	/* Free any remaining skbs in the tx buffer pools */
 	for (i = 0; i < tx_scrqs; i++) {
-		tx_pool = &adapter->tx_pool[i];
-		if (!tx_pool && !tx_pool->tx_buff)
-			continue;
-
 		netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
-		for (j = 0; j < tx_entries; j++) {
-			tx_buff = &tx_pool->tx_buff[j];
-			if (tx_buff && tx_buff->skb) {
-				dev_kfree_skb_any(tx_buff->skb);
-				tx_buff->skb = NULL;
-			}
-		}
+		clean_one_tx_pool(adapter, &adapter->tx_pool[i]);
+		clean_one_tx_pool(adapter, &adapter->tso_pool[i]);
 	}
 }
 

From 76c15c911b8115e565bfe3a5161a8d58b0be2d28 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Fri, 16 Mar 2018 20:00:31 -0500
Subject: [PATCH 0988/1678] ibmvnic: Remove unused TSO resources in TX pool
 structure

Finally, remove the TSO-specific fields in the TX pool
strcutures. These are no longer needed with the introduction
of separate buffer pools for TSO transmissions.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 566927e2974a..89efe700eafe 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -919,8 +919,6 @@ struct ibmvnic_tx_pool {
 	int consumer_index;
 	int producer_index;
 	struct ibmvnic_long_term_buff long_term_buff;
-	struct ibmvnic_long_term_buff tso_ltb;
-	int tso_index;
 	int num_buffers;
 	int buf_size;
 };

From 380e29a6b71fac7ce7a437ab2f7e305c3de8076f Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:38 +0100
Subject: [PATCH 0989/1678] selftests: pmtu: Reverse return codes of functions

David suggests it's more intuitive to return non-zero on
failures, and zero on success.

No need to introduce tail 'return 0' in functions, they will
return the exit code of the last command anyway.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 6c19c148cef8..65842a2afa55 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -26,14 +26,12 @@ vti6_b_addr="fd00:2::b"
 vti6_mask="64"
 
 setup_namespaces() {
-	ip netns add ${NS_A} || return 0
+	ip netns add ${NS_A} || return 1
 	ip netns add ${NS_B}
-
-	return 1
 }
 
 setup_veth() {
-	${ns_a} ip link add veth_a type veth peer name veth_b || return 0
+	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
 	${ns_a} ip link set veth_b netns ${NS_B}
 
 	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
@@ -41,12 +39,10 @@ setup_veth() {
 
 	${ns_a} ip link set veth_a up
 	${ns_b} ip link set veth_b up
-
-	return 1
 }
 
 setup_vti6() {
-	${ns_a} ip link add vti_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 || return 0
+	${ns_a} ip link add vti_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 || return 1
 	${ns_b} ip link add vti_b type vti6 local ${veth6_b_addr} remote ${veth6_a_addr} key 10
 
 	${ns_a} ip addr add ${vti6_a_addr}/${vti6_mask} dev vti_a
@@ -56,12 +52,10 @@ setup_vti6() {
 	${ns_b} ip link set vti_b up
 
 	sleep 1
-
-	return 1
 }
 
 setup_xfrm() {
-	${ns_a} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 0
+	${ns_a} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
 	${ns_a} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
 	${ns_a} ip -6 xfrm policy add dir out mark 10 tmpl src ${veth6_a_addr} dst ${veth6_b_addr} proto esp mode tunnel
 	${ns_a} ip -6 xfrm policy add dir in mark 10 tmpl src ${veth6_b_addr} dst ${veth6_a_addr} proto esp mode tunnel
@@ -70,8 +64,6 @@ setup_xfrm() {
 	${ns_b} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
 	${ns_b} ip -6 xfrm policy add dir out mark 10 tmpl src ${veth6_b_addr} dst ${veth6_a_addr} proto esp mode tunnel
 	${ns_b} ip -6 xfrm policy add dir in mark 10 tmpl src ${veth6_a_addr} dst ${veth6_b_addr} proto esp mode tunnel
-
-	return 1
 }
 
 setup() {
@@ -79,13 +71,13 @@ setup() {
 
 	[ "$(id -u)" -ne 0 ] && echo "SKIP: need to run as root" && exit 0
 
-	setup_namespaces && echo "SKIP: namespaces not supported" && exit 0
-	setup_veth && echo "SKIP: veth not supported" && exit 0
+	setup_namespaces || { echo "SKIP: namespaces not supported"; exit 0; }
+	setup_veth || { echo "SKIP: veth not supported"; exit 0; }
 
 	case ${tunnel_type} in
 	"vti6")
-		setup_vti6 && echo "SKIP: vti6 not supported" && exit 0
-		setup_xfrm && echo "SKIP: xfrm not supported" && exit 0
+		setup_vti6 || { echo "SKIP: vti6 not supported"; exit 0; }
+		setup_xfrm || { echo "SKIP: xfrm not supported"; exit 0; }
 		;;
 	*)
 		;;

From 822d2f86c485113dc5df7648e97aaef3607c4479 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:39 +0100
Subject: [PATCH 0990/1678] selftests: pmtu: Use namespace command prefix to
 fetch route mtu

In 7af137b72131 ("selftests: net: Introduce first PMTU test") I
accidentally assumed route_get_* helpers would run from a single
namespace. Make them a bit more generic, by passing the
namespace command prefix as a parameter instead.

Fixes: 7af137b72131 ("selftests: net: Introduce first PMTU test")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 65842a2afa55..0d010f982272 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -98,15 +98,17 @@ mtu() {
 }
 
 route_get_dst_exception() {
-	dst="${1}"
+	ns_cmd="${1}"
+	dst="${2}"
 
-	${ns_a} ip route get "${dst}"
+	${ns_cmd} ip route get "${dst}"
 }
 
 route_get_dst_pmtu_from_exception() {
-	dst="${1}"
+	ns_cmd="${1}"
+	dst="${2}"
 
-	exception="$(route_get_dst_exception ${dst})"
+	exception="$(route_get_dst_exception "${ns_cmd}" ${dst})"
 	next=0
 	for i in ${exception}; do
 		[ ${next} -eq 1 ] && echo "${i}" && return
@@ -125,7 +127,7 @@ test_pmtu_vti6_exception() {
 	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
 
 	# Check that exception was created
-	if [ "$(route_get_dst_pmtu_from_exception ${vti6_b_addr})" = "" ]; then
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
 		echo "FAIL: Tunnel exceeding link layer MTU didn't create route exception"
 		exit 1
 	fi
@@ -133,14 +135,14 @@ test_pmtu_vti6_exception() {
 	# Decrease tunnel MTU, check for PMTU decrease in route exception
 	mtu "${ns_a}" vti_a 3000
 
-	if [ "$(route_get_dst_pmtu_from_exception ${vti6_b_addr})" -ne 3000 ]; then
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
 		echo "FAIL: Decreasing tunnel MTU didn't decrease route exception PMTU"
 		exit 1
 	fi
 
 	# Increase tunnel MTU, check for PMTU increase in route exception
 	mtu "${ns_a}" vti_a 9000
-	if [ "$(route_get_dst_pmtu_from_exception ${vti6_b_addr})" -ne 9000 ]; then
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
 		echo "FAIL: Increasing tunnel MTU didn't increase route exception PMTU"
 		exit 1
 	fi

From f2c929feeccd3d56be284b9e24e04cd3c4779a4f Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:40 +0100
Subject: [PATCH 0991/1678] selftests: pmtu: Factor out MTU parsing helper

...so that it can be used for any iproute command output.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 0d010f982272..2d33e533ad36 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -97,6 +97,16 @@ mtu() {
 	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
 }
 
+mtu_parse() {
+	input="${1}"
+
+	next=0
+	for i in ${input}; do
+		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ "${i}" = "mtu" ] && next=1
+	done
+}
+
 route_get_dst_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
@@ -108,12 +118,7 @@ route_get_dst_pmtu_from_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
 
-	exception="$(route_get_dst_exception "${ns_cmd}" ${dst})"
-	next=0
-	for i in ${exception}; do
-		[ ${next} -eq 1 ] && echo "${i}" && return
-		[ "${i}" = "mtu" ] && next=1
-	done
+	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
 }
 
 test_pmtu_vti6_exception() {

From 36455bd1e977ef1ccd8e89018bd14d0a5b96b4e5 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:41 +0100
Subject: [PATCH 0992/1678] selftests: pmtu: Introduce support for multiple
 tests

Introduce list of tests and their descriptions, and loop on it
in main body.

Tests will now just take care of calling setup with a list of
"units" they need, and return 0 on success, 1 on failure, 2 if
the test had to be skipped.

Main script body will take care of displaying results and
cleaning up after every test. Introduce guard variable so that
we don't clean up twice in case of interrupts or unexpected
failures.

The pmtu_vti6_exception test can now run its third step even if
the previous one failed, as we can return values from it.

Also introduce support to display test descriptions, and display
aligned OK/FAIL/SKIP test outcomes. Buffer error strings so that
in case of failure we can display them right under the outcome
for each test.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 85 ++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 2d33e533ad36..733de6053b5c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -5,13 +5,16 @@
 #
 # Tests currently implemented:
 #
-# - test_pmtu_vti6_exception
+# - pmtu_vti6_exception
 #	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is
 #	created by exceeding link layer MTU with ping to other endpoint. Then
 #	decrease and increase MTU of tunnel, checking that route exception PMTU
 #	changes accordingly
 
+tests="
+	pmtu_vti6_exception	vti6: PMTU exceptions"
+
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
 ns_a="ip netns exec ${NS_A}"
@@ -25,6 +28,19 @@ vti6_a_addr="fd00:2::a"
 vti6_b_addr="fd00:2::b"
 vti6_mask="64"
 
+cleanup_done=1
+err_buf=
+
+err() {
+	err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+	echo -n "${err_buf}"
+	err_buf=
+}
+
 setup_namespaces() {
 	ip netns add ${NS_A} || return 1
 	ip netns add ${NS_B}
@@ -67,26 +83,19 @@ setup_xfrm() {
 }
 
 setup() {
-	tunnel_type="$1"
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
 
-	[ "$(id -u)" -ne 0 ] && echo "SKIP: need to run as root" && exit 0
-
-	setup_namespaces || { echo "SKIP: namespaces not supported"; exit 0; }
-	setup_veth || { echo "SKIP: veth not supported"; exit 0; }
-
-	case ${tunnel_type} in
-	"vti6")
-		setup_vti6 || { echo "SKIP: vti6 not supported"; exit 0; }
-		setup_xfrm || { echo "SKIP: xfrm not supported"; exit 0; }
-		;;
-	*)
-		;;
-	esac
+	cleanup_done=0
+	for arg do
+		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
+	done
 }
 
 cleanup() {
+	[ ${cleanup_done} -eq 1 ] && return
 	ip netns del ${NS_A} 2 > /dev/null
 	ip netns del ${NS_B} 2 > /dev/null
+	cleanup_done=1
 }
 
 mtu() {
@@ -122,7 +131,8 @@ route_get_dst_pmtu_from_exception() {
 }
 
 test_pmtu_vti6_exception() {
-	setup vti6
+	setup namespaces veth vti6 xfrm || return 2
+	fail=0
 
 	# Create route exception by exceeding link layer MTU
 	mtu "${ns_a}" veth_a 4000
@@ -133,30 +143,55 @@ test_pmtu_vti6_exception() {
 
 	# Check that exception was created
 	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
-		echo "FAIL: Tunnel exceeding link layer MTU didn't create route exception"
-		exit 1
+		err "  tunnel exceeding link layer MTU didn't create route exception"
+		return 1
 	fi
 
 	# Decrease tunnel MTU, check for PMTU decrease in route exception
 	mtu "${ns_a}" vti_a 3000
 
 	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
-		echo "FAIL: Decreasing tunnel MTU didn't decrease route exception PMTU"
-		exit 1
+		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
+		fail=1
 	fi
 
 	# Increase tunnel MTU, check for PMTU increase in route exception
 	mtu "${ns_a}" vti_a 9000
 	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
-		echo "FAIL: Increasing tunnel MTU didn't increase route exception PMTU"
-		exit 1
+		err "  increasing tunnel MTU didn't increase route exception PMTU"
+		fail=1
 	fi
 
-	echo "PASS"
+	return ${fail}
 }
 
 trap cleanup EXIT
 
-test_pmtu_vti6_exception
+exitcode=0
+desc=0
+IFS="	
+"
+for t in ${tests}; do
+	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
 
-exit 0
+	(
+		unset IFS
+		eval test_${name}
+		ret=$?
+		cleanup
+
+		if [ $ret -eq 0 ]; then
+			printf "TEST: %-60s  [ OK ]\n" "${t}"
+		elif [ $ret -eq 1 ]; then
+			printf "TEST: %-60s  [FAIL]\n" "${t}"
+			err_flush
+			exit 1
+		elif [ $ret -eq 2 ]; then
+			printf "TEST: %-60s  [SKIP]\n" "${t}"
+			err_flush
+		fi
+	)
+	[ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}

From a41c789bdcc1e3d8fe0d82ee80c22aa4f4508004 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:42 +0100
Subject: [PATCH 0993/1678] selftests: pmtu: Add pmtu_vti4_default_mtu test

This test checks that the MTU assigned by default to a vti (IPv4)
interface created on top of veth is simply veth's MTU minus the
length of the encapsulated IPv4 header.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 86 ++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 733de6053b5c..be13b3232c12 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1,7 +1,8 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 #
-# Check that route PMTU values match expectations
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
 #
 # Tests currently implemented:
 #
@@ -11,19 +12,32 @@
 #	created by exceeding link layer MTU with ping to other endpoint. Then
 #	decrease and increase MTU of tunnel, checking that route exception PMTU
 #	changes accordingly
+#
+# - pmtu_vti4_default_mtu
+#	Set up vti4 tunnel on top of veth, in two namespaces with matching
+#	endpoints. Check that MTU assigned to vti interface is the MTU of the
+#	lower layer (veth) minus additional lower layer headers (zero, for veth)
+#	minus IPv4 header length
 
 tests="
-	pmtu_vti6_exception	vti6: PMTU exceptions"
+	pmtu_vti6_exception	vti6: PMTU exceptions
+	pmtu_vti4_default_mtu	vti4: default MTU assignment"
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
 
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
 veth6_a_addr="fd00:1::a"
 veth6_b_addr="fd00:1::b"
 veth6_mask="64"
 
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
 vti6_a_addr="fd00:2::a"
 vti6_b_addr="fd00:2::b"
 vti6_mask="64"
@@ -50,6 +64,9 @@ setup_veth() {
 	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
 	${ns_a} ip link set veth_b netns ${NS_B}
 
+	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
 	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
 	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
 
@@ -57,19 +74,36 @@ setup_veth() {
 	${ns_b} ip link set veth_b up
 }
 
-setup_vti6() {
-	${ns_a} ip link add vti_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 || return 1
-	${ns_b} ip link add vti_b type vti6 local ${veth6_b_addr} remote ${veth6_a_addr} key 10
+setup_vti() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+	vti_a_addr="${4}"
+	vti_b_addr="${5}"
+	vti_mask=${6}
 
-	${ns_a} ip addr add ${vti6_a_addr}/${vti6_mask} dev vti_a
-	${ns_b} ip addr add ${vti6_b_addr}/${vti6_mask} dev vti_b
+	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
 
-	${ns_a} ip link set vti_a up
-	${ns_b} ip link set vti_b up
+	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+	${ns_a} ip link set vti${proto}_a up
+	${ns_b} ip link set vti${proto}_b up
 
 	sleep 1
 }
 
+setup_vti4() {
+	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
 setup_xfrm() {
 	${ns_a} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
 	${ns_a} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
@@ -116,6 +150,20 @@ mtu_parse() {
 	done
 }
 
+link_get() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
 route_get_dst_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
@@ -137,8 +185,8 @@ test_pmtu_vti6_exception() {
 	# Create route exception by exceeding link layer MTU
 	mtu "${ns_a}" veth_a 4000
 	mtu "${ns_b}" veth_b 4000
-	mtu "${ns_a}" vti_a 5000
-	mtu "${ns_b}" vti_b 5000
+	mtu "${ns_a}" vti6_a 5000
+	mtu "${ns_b}" vti6_b 5000
 	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
 
 	# Check that exception was created
@@ -148,7 +196,7 @@ test_pmtu_vti6_exception() {
 	fi
 
 	# Decrease tunnel MTU, check for PMTU decrease in route exception
-	mtu "${ns_a}" vti_a 3000
+	mtu "${ns_a}" vti6_a 3000
 
 	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
 		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
@@ -156,7 +204,7 @@ test_pmtu_vti6_exception() {
 	fi
 
 	# Increase tunnel MTU, check for PMTU increase in route exception
-	mtu "${ns_a}" vti_a 9000
+	mtu "${ns_a}" vti6_a 9000
 	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
 		err "  increasing tunnel MTU didn't increase route exception PMTU"
 		fail=1
@@ -165,6 +213,18 @@ test_pmtu_vti6_exception() {
 	return ${fail}
 }
 
+test_pmtu_vti4_default_mtu() {
+	setup namespaces veth vti4 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv4 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+		return 1
+	fi
+}
+
 trap cleanup EXIT
 
 exitcode=0

From 35b49424b8a49edb6de6e7ec54bd8edb568031c2 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:43 +0100
Subject: [PATCH 0994/1678] selftests: pmtu: Add pmtu_vti6_default_mtu test

Same as pmtu_vti4_default_mtu, but on IPv6 with vti6.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index be13b3232c12..5d9af22b360a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -18,10 +18,14 @@
 #	endpoints. Check that MTU assigned to vti interface is the MTU of the
 #	lower layer (veth) minus additional lower layer headers (zero, for veth)
 #	minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+#	Same as above, for IPv6
 
 tests="
 	pmtu_vti6_exception	vti6: PMTU exceptions
-	pmtu_vti4_default_mtu	vti4: default MTU assignment"
+	pmtu_vti4_default_mtu	vti4: default MTU assignment
+	pmtu_vti6_default_mtu	vti6: default MTU assignment"
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
@@ -225,6 +229,18 @@ test_pmtu_vti4_default_mtu() {
 	fi
 }
 
+test_pmtu_vti6_default_mtu() {
+	setup namespaces veth vti6 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv6 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+		return 1
+	fi
+}
+
 trap cleanup EXIT
 
 exitcode=0

From 5e84430bb83e8241c485ccce728ff5e5e80789eb Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:44 +0100
Subject: [PATCH 0995/1678] selftests: pmtu: Add test_pmtu_vti4_exception test

This test checks that PMTU exceptions are created only when
needed on IPv4 routes with vti and xfrm, and their PMTU value is
checked as well.

We can't adopt the same approach as test_pmtu_vti6_exception()
here, because on IPv4 administrative MTU changes won't be
reflected directly on PMTU.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 78 +++++++++++++++++++++++++----
 1 file changed, 69 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 5d9af22b360a..ba11433d17d8 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -6,6 +6,14 @@
 #
 # Tests currently implemented:
 #
+# - pmtu_vti4_exception
+#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is not
+#	created if link layer MTU is not exceeded, then exceed it and check that
+#	exception is created with the expected PMTU. The approach described
+#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+#	changes alone won't affect PMTU
+#
 # - pmtu_vti6_exception
 #	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is
@@ -24,6 +32,7 @@
 
 tests="
 	pmtu_vti6_exception	vti6: PMTU exceptions
+	pmtu_vti4_exception	vti4: PMTU exceptions
 	pmtu_vti4_default_mtu	vti4: default MTU assignment
 	pmtu_vti6_default_mtu	vti6: default MTU assignment"
 
@@ -109,15 +118,27 @@ setup_vti6() {
 }
 
 setup_xfrm() {
-	${ns_a} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
-	${ns_a} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	${ns_a} ip -6 xfrm policy add dir out mark 10 tmpl src ${veth6_a_addr} dst ${veth6_b_addr} proto esp mode tunnel
-	${ns_a} ip -6 xfrm policy add dir in mark 10 tmpl src ${veth6_b_addr} dst ${veth6_a_addr} proto esp mode tunnel
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
 
-	${ns_b} ip -6 xfrm state add src ${veth6_a_addr} dst ${veth6_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	${ns_b} ip -6 xfrm state add src ${veth6_b_addr} dst ${veth6_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	${ns_b} ip -6 xfrm policy add dir out mark 10 tmpl src ${veth6_b_addr} dst ${veth6_a_addr} proto esp mode tunnel
-	${ns_b} ip -6 xfrm policy add dir in mark 10 tmpl src ${veth6_a_addr} dst ${veth6_b_addr} proto esp mode tunnel
+	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
 setup() {
@@ -182,8 +203,47 @@ route_get_dst_pmtu_from_exception() {
 	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
 }
 
+test_pmtu_vti4_exception() {
+	setup namespaces veth vti4 xfrm4 || return 2
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+	mtu "${ns_a}" veth_a ${veth_mtu}
+	mtu "${ns_b}" veth_b ${veth_mtu}
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" != "" ]; then
+		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+		return 1
+	fi
+
+	# Now exceed link layer MTU by one byte, check that exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" = "" ]; then
+		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+		return 1
+	fi
+
+	# ...with the right PMTU value
+	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+		return 1
+	fi
+}
+
 test_pmtu_vti6_exception() {
-	setup namespaces veth vti6 xfrm || return 2
+	setup namespaces veth vti6 xfrm6 || return 2
 	fail=0
 
 	# Create route exception by exceeding link layer MTU

From 719e121574497436e39536c2a888b901fbe30cfe Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:45 +0100
Subject: [PATCH 0996/1678] selftests: pmtu: Add pmtu_vti4_link_add_mtu test

This test checks that MTU given on vti link creation is actually
configured, and that tunnel is not created with an invalid MTU
value.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 45 ++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index ba11433d17d8..d9f7ef2c213d 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -29,12 +29,17 @@
 #
 # - pmtu_vti6_default_mtu
 #	Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+#	Set up vti4 interface passing MTU value at link creation, check MTU is
+#	configured, and that link is not created with invalid MTU values
 
 tests="
 	pmtu_vti6_exception	vti6: PMTU exceptions
 	pmtu_vti4_exception	vti4: PMTU exceptions
 	pmtu_vti4_default_mtu	vti4: default MTU assignment
-	pmtu_vti6_default_mtu	vti6: default MTU assignment"
+	pmtu_vti6_default_mtu	vti6: default MTU assignment
+	pmtu_vti4_link_add_mtu	vti4: MTU setting on link creation"
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
@@ -301,6 +306,44 @@ test_pmtu_vti6_default_mtu() {
 	fi
 }
 
+test_pmtu_vti4_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	${ns_a} ip link del vti4_a
+
+	fail=0
+
+	min=68
+	max=$((65528 - 20))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti tunnel created with invalid MTU ${mtu}"
+			fail=1
+		fi
+		${ns_a} ip link del vti4_a
+	done
+
+	# Now check valid values
+	for v in ${min} 1300 ${max}; do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		${ns_a} ip link del vti4_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
 trap cleanup EXIT
 
 exitcode=0

From 8b6022fc7839e3ba2255bb264b16b0fffe961212 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:46 +0100
Subject: [PATCH 0997/1678] selftests: pmtu: Add pmtu_vti6_link_add_mtu test

Same as pmtu_vti4_link_add_mtu test, but for IPv6.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 44 ++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index d9f7ef2c213d..cd12a1f5c003 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -33,13 +33,17 @@
 # - pmtu_vti4_link_add_mtu
 #	Set up vti4 interface passing MTU value at link creation, check MTU is
 #	configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+#	Same as above, for IPv6
 
 tests="
 	pmtu_vti6_exception	vti6: PMTU exceptions
 	pmtu_vti4_exception	vti4: PMTU exceptions
 	pmtu_vti4_default_mtu	vti4: default MTU assignment
 	pmtu_vti6_default_mtu	vti6: default MTU assignment
-	pmtu_vti4_link_add_mtu	vti4: MTU setting on link creation"
+	pmtu_vti4_link_add_mtu	vti4: MTU setting on link creation
+	pmtu_vti6_link_add_mtu	vti6: MTU setting on link creation"
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
@@ -344,6 +348,44 @@ test_pmtu_vti4_link_add_mtu() {
 	return ${fail}
 }
 
+test_pmtu_vti6_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	${ns_a} ip link del vti6_a
+
+	fail=0
+
+	min=1280
+	max=$((65535 - 40))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti6 tunnel created with invalid MTU ${v}"
+			fail=1
+		fi
+		${ns_a} ip link del vti6_a
+	done
+
+	# Now check valid values
+	for v in 1280 1300 $((65535 - 40)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		${ns_a} ip link del vti6_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
 trap cleanup EXIT
 
 exitcode=0

From 1fad59ea1c34b14f080725fc4cdfc6160651e371 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 17 Mar 2018 02:31:47 +0100
Subject: [PATCH 0998/1678] selftests: pmtu: Add pmtu_vti6_link_change_mtu test

This test checks that MTU configured from userspace is used on
link creation and changes, and that when it's not passed from
userspace, it's calculated properly from the MTU of the lower
layer.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 67 ++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index cd12a1f5c003..92197c05bac4 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -36,14 +36,21 @@
 #
 # - pmtu_vti6_link_add_mtu
 #	Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+#	and check that configured MTU is used on link creation and changes, and
+#	that MTU is properly calculated instead when MTU is not configured from
+#	userspace
 
 tests="
-	pmtu_vti6_exception	vti6: PMTU exceptions
-	pmtu_vti4_exception	vti4: PMTU exceptions
-	pmtu_vti4_default_mtu	vti4: default MTU assignment
-	pmtu_vti6_default_mtu	vti6: default MTU assignment
-	pmtu_vti4_link_add_mtu	vti4: MTU setting on link creation
-	pmtu_vti6_link_add_mtu	vti6: MTU setting on link creation"
+	pmtu_vti6_exception		vti6: PMTU exceptions
+	pmtu_vti4_exception		vti4: PMTU exceptions
+	pmtu_vti4_default_mtu		vti4: default MTU assignment
+	pmtu_vti6_default_mtu		vti6: default MTU assignment
+	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
+	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
 
 NS_A="ns-$(mktemp -u XXXXXX)"
 NS_B="ns-$(mktemp -u XXXXXX)"
@@ -64,6 +71,10 @@ vti6_a_addr="fd00:2::a"
 vti6_b_addr="fd00:2::b"
 vti6_mask="64"
 
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
 cleanup_done=1
 err_buf=
 
@@ -386,6 +397,50 @@ test_pmtu_vti6_link_add_mtu() {
 	return ${fail}
 }
 
+test_pmtu_vti6_link_change_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add dummy0 mtu 1500 type dummy
+	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	${ns_a} ip link add dummy1 mtu 3000 type dummy
+	${ns_a} ip link set dummy0 up
+	${ns_a} ip link set dummy1 up
+
+	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+	fail=0
+
+	# Create vti6 interface bound to device, passing MTU, check it
+	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1300 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
+		fail=1
+	fi
+
+	# Move to another device with different MTU, without passing MTU, check
+	# MTU is adjusted
+	echo "${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}" > /dev/kmsg
+	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne $((3000 - 40)) ]; then
+		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+		fail=1
+	fi
+
+	# Move it back, passing MTU, check MTU is not overridden
+	echo "${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}" > /dev/kmsg
+	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1280 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
 trap cleanup EXIT
 
 exitcode=0

From 76f38f1f3cf8f81be3c18ecb7a65dd74afb05851 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 17 Mar 2018 20:21:09 +0100
Subject: [PATCH 0999/1678] net: dsa: mv88e6xxx: Fix IRQ when loading module

Handle polled interrupts correctly when loading the module.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Fixes: 294d711ee8c0 ("net: dsa: mv88e6xxx: Poll when no interrupt defined")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index bd3ee84770c7..41f872d4ba3c 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -4204,15 +4204,18 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	mv88e6xxx_unregister_switch(chip);
 	mv88e6xxx_mdios_unregister(chip);
 
-	if (chip->irq > 0) {
-		mv88e6xxx_g1_vtu_prob_irq_free(chip);
-		mv88e6xxx_g1_atu_prob_irq_free(chip);
-		if (chip->info->g2_irqs > 0)
-			mv88e6xxx_g2_irq_free(chip);
-		mutex_lock(&chip->reg_lock);
+	mv88e6xxx_g1_vtu_prob_irq_free(chip);
+	mv88e6xxx_g1_atu_prob_irq_free(chip);
+
+	if (chip->info->g2_irqs > 0)
+		mv88e6xxx_g2_irq_free(chip);
+
+	mutex_lock(&chip->reg_lock);
+	if (chip->irq > 0)
 		mv88e6xxx_g1_irq_free(chip);
-		mutex_unlock(&chip->reg_lock);
-	}
+	else
+		mv88e6xxx_irq_poll_free(chip);
+	mutex_unlock(&chip->reg_lock);
 }
 
 static const struct of_device_id mv88e6xxx_of_match[] = {

From ef44d78d8937146269ec3f0de966817a6d630d5e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 18 Mar 2018 11:23:05 -0700
Subject: [PATCH 1000/1678] net: dsa: mv88e6xxx: Fix missing register lock in
 serdes_get_stats

We can hit the register lock not held assertion with the following path:

[   34.170631] mv88e6085 0.1:00: Switch registers lock not held!
[   34.176510] CPU: 0 PID: 950 Comm: ethtool Not tainted 4.16.0-rc4 #143
[   34.182985] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
[   34.189519] Backtrace:
[   34.192033] [<8010c4b4>] (dump_backtrace) from [<8010c788>] (show_stack+0x20/0x24)
[   34.199680]  r6:9f5dc010 r5:00000011 r4:9f5dc010 r3:00000000
[   34.205434] [<8010c768>] (show_stack) from [<80679d38>] (dump_stack+0x24/0x28)
[   34.212719] [<80679d14>] (dump_stack) from [<804844a8>] (mv88e6xxx_read+0x70/0x7c)
[   34.220376] [<80484438>] (mv88e6xxx_read) from [<804870dc>] (mv88e6xxx_port_get_cmode+0x34/0x4c)
[   34.229257]  r5:a09cd128 r4:9ee31d07
[   34.232880] [<804870a8>] (mv88e6xxx_port_get_cmode) from [<80487e6c>] (mv88e6352_port_has_serdes+0x24/0x64)
[   34.242690]  r4:9f5dc010
[   34.245309] [<80487e48>] (mv88e6352_port_has_serdes) from [<804880b8>] (mv88e6352_serdes_get_stats+0x28/0x12c)
[   34.255389]  r4:00000001
[   34.257973] [<80488090>] (mv88e6352_serdes_get_stats) from [<804811e8>] (mv88e6xxx_get_ethtool_stats+0xb0/0xc0)
[   34.268156]  r10:00000000 r9:00000000 r8:00000000 r7:a09cd020 r6:00000001 r5:9f5dc01c
[   34.276052]  r4:9f5dc010
[   34.278631] [<80481138>] (mv88e6xxx_get_ethtool_stats) from [<8064f740>] (dsa_slave_get_ethtool_stats+0xbc/0xc4)

mv88e6xxx_get_ethtool_stats() calls mv88e6xxx_get_stats() which calls both
chip->info->ops->stats_get_stats(), which holds the register lock, and
chip->info->ops->serdes_get_stats() which does not. Have
chip->info->ops->serdes_get_stats() be running with the register lock held to
avoid such assertions.

Fixes: 436fe17d273b ("net: dsa: mv88e6xxx: Allow the SERDES interfaces to have statistics")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 41f872d4ba3c..d21f6c93c0c2 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -849,7 +849,9 @@ static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
 
 	if (chip->info->ops->serdes_get_stats) {
 		data += count;
+		mutex_lock(&chip->reg_lock);
 		chip->info->ops->serdes_get_stats(chip, port, data);
+		mutex_unlock(&chip->reg_lock);
 	}
 }
 

From adfccf118211520ebe22f9f46e73834211ea492d Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 17 Mar 2018 20:32:03 +0100
Subject: [PATCH 1001/1678] net: dsa: mv88e6xxx: Add missing g1 IRQ numbers

With the recent change to polling for interrupts, it is important that
the number of global 1 interrupts is listed. Without it, the driver
requests an interrupt domain for zero interrupts, which returns
EINVAL, and the probe fails.

Add two missing entries.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d21f6c93c0c2..077e24fcf849 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3422,6 +3422,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g1_irqs = 9,
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
@@ -3730,6 +3731,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g1_irqs = 9,
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,

From bc3931557d1d55584100b1ffefc248137b5cb7e1 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 17 Mar 2018 20:32:04 +0100
Subject: [PATCH 1002/1678] net: dsa: mv88e6xxx: Add number of internal PHYs

Add to the info structure the number of internal PHYs, if they generate
interrupts. Some of the older generations of switches have internal
PHYs, but no interrupt registers. In this case, set the count to zero.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 28 ++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/chip.h |  1 +
 2 files changed, 29 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 077e24fcf849..a460673cf27e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3319,6 +3319,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6085",
 		.num_databases = 4096,
 		.num_ports = 10,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3339,6 +3340,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6095/88E6095F",
 		.num_databases = 256,
 		.num_ports = 11,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3357,6 +3359,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6097/88E6097F",
 		.num_databases = 4096,
 		.num_ports = 11,
+		.num_internal_phys = 8,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3377,6 +3380,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6123",
 		.num_databases = 4096,
 		.num_ports = 3,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3397,6 +3401,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6131",
 		.num_databases = 256,
 		.num_ports = 8,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3415,6 +3420,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6341",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 5,
 		.num_gpio = 11,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3436,6 +3442,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6161",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3456,6 +3463,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6165",
 		.num_databases = 4096,
 		.num_ports = 6,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3476,6 +3484,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6171",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3496,6 +3505,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6172",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3517,6 +3527,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6175",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3537,6 +3548,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6176",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3558,6 +3570,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6185",
 		.num_databases = 256,
 		.num_ports = 10,
+		.num_internal_phys = 0,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3576,6 +3589,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
@@ -3597,6 +3611,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6190X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
@@ -3618,6 +3633,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6191",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
@@ -3639,6 +3655,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6240",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3661,6 +3678,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6290",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
@@ -3683,6 +3701,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6320",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3690,6 +3709,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
@@ -3704,6 +3724,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6321",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3711,6 +3732,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
@@ -3723,6 +3745,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.family = MV88E6XXX_FAMILY_6341,
 		.name = "Marvell 88E6341",
 		.num_databases = 4096,
+		.num_internal_phys = 5,
 		.num_ports = 6,
 		.num_gpio = 11,
 		.max_vid = 4095,
@@ -3746,6 +3769,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6350",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3766,6 +3790,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6351",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
@@ -3786,6 +3811,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6352",
 		.num_databases = 4096,
 		.num_ports = 7,
+		.num_internal_phys = 5,
 		.num_gpio = 15,
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
@@ -3807,6 +3833,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
@@ -3828,6 +3855,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.name = "Marvell 88E6390X",
 		.num_databases = 4096,
 		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 11,
 		.num_gpio = 16,
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 26b9a618cdee..bad211014e91 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -110,6 +110,7 @@ struct mv88e6xxx_info {
 	const char *name;
 	unsigned int num_databases;
 	unsigned int num_ports;
+	unsigned int num_internal_phys;
 	unsigned int num_gpio;
 	unsigned int max_vid;
 	unsigned int port_base_addr;

From 6f88284f3bd77a0e51de22d4956f07557bcc0dbf Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 17 Mar 2018 20:32:05 +0100
Subject: [PATCH 1003/1678] net: dsa: mv88e6xxx: Add MDIO interrupts for
 internal PHYs

When registering an MDIO bus, it is possible to pass an array of
interrupts, one per address on the bus. phylib will then associate the
interrupt to the PHY device, if no other interrupt is provided.

Some of the global2 interrupts are PHY interrupts. Place them into the
MDIO bus structure.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c    | 10 +++++++++
 drivers/net/dsa/mv88e6xxx/global2.c | 32 +++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/global2.h | 16 +++++++++++++++
 3 files changed, 58 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index a460673cf27e..fe46b40195fa 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2293,12 +2293,19 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 	bus->write = mv88e6xxx_mdio_write;
 	bus->parent = chip->dev;
 
+	if (!external) {
+		err = mv88e6xxx_g2_irq_mdio_setup(chip, bus);
+		if (err)
+			return err;
+	}
+
 	if (np)
 		err = of_mdiobus_register(bus, np);
 	else
 		err = mdiobus_register(bus);
 	if (err) {
 		dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err);
+		mv88e6xxx_g2_irq_mdio_free(chip, bus);
 		return err;
 	}
 
@@ -2325,6 +2332,9 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip)
 	list_for_each_entry(mdio_bus, &chip->mdios, list) {
 		bus = mdio_bus->bus;
 
+		if (!mdio_bus->external)
+			mv88e6xxx_g2_irq_mdio_free(chip, bus);
+
 		mdiobus_unregister(bus);
 	}
 }
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 5f370f1fc7c4..6c620974fef3 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -1107,6 +1107,38 @@ out:
 	return err;
 }
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus)
+{
+	int phy, irq, err, err_phy;
+
+	for (phy = 0; phy < chip->info->num_internal_phys; phy++) {
+		irq = irq_find_mapping(chip->g2_irq.domain, phy);
+		if (irq < 0) {
+			err = irq;
+			goto out;
+		}
+		bus->irq[chip->info->port_base_addr + phy] = irq;
+	}
+	return 0;
+out:
+	err_phy = phy;
+
+	for (phy = 0; phy < err_phy; phy++)
+		irq_dispose_mapping(bus->irq[phy]);
+
+	return err;
+}
+
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus)
+{
+	int phy;
+
+	for (phy = 0; phy < chip->info->num_internal_phys; phy++)
+		irq_dispose_mapping(bus->irq[phy]);
+}
+
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 {
 	u16 reg;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index aa3f0a736966..520ec70d32e8 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -317,6 +317,11 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
 
+int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus);
+void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+				struct mii_bus *bus);
+
 int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
 
@@ -450,6 +455,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
+static inline int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip,
+					      struct mii_bus *bus)
+{
+	return 0;
+}
+
+static inline void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip,
+					      struct mii_bus *bus)
+{
+}
+
 static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;

From e3c72f3d37e4745dc3a6ae69f5fc2bd4c31ca4eb Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sun, 18 Mar 2018 21:58:12 +0100
Subject: [PATCH 1004/1678] selftests: pmtu: Drop prints to kernel log from
 pmtu_vti6_link_change_mtu

Reported-by: David Ahern <dsahern@gmail.com>
Fixes: 1fad59ea1c34 ("selftests: pmtu: Add pmtu_vti6_link_change_mtu test")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 92197c05bac4..1e428781a625 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -421,7 +421,6 @@ test_pmtu_vti6_link_change_mtu() {
 
 	# Move to another device with different MTU, without passing MTU, check
 	# MTU is adjusted
-	echo "${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}" > /dev/kmsg
 	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne $((3000 - 40)) ]; then
@@ -430,7 +429,6 @@ test_pmtu_vti6_link_change_mtu() {
 	fi
 
 	# Move it back, passing MTU, check MTU is not overridden
-	echo "${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}" > /dev/kmsg
 	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
 	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 	if [ ${mtu} -ne 1280 ]; then

From 640a8af5841fd66a26e26f5b9fb5476a3755852a Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 19 Mar 2018 09:28:03 -0700
Subject: [PATCH 1005/1678] i40evf: Reorder configure_clsflower to avoid
 deadlock on error

While doing some code review I noticed that we can get into a state where
we exit with the "IN_CRITICAL_TASK" bit set while notifying the PF of
flower filters. This patch is meant to address that plus tweak the ordering
of the while loop waiting on it slightly so that we don't wait an extra
period after we have failed for the last time.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40evf/i40evf_main.c   | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 486cf491b000..7e7cd80abaf4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2791,14 +2791,7 @@ static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
 {
 	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
 	struct i40evf_cloud_filter *filter = NULL;
-	int err = 0, count = 50;
-
-	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
-				&adapter->crit_section)) {
-		udelay(1);
-		if (--count == 0)
-			return -EINVAL;
-	}
+	int err = -EINVAL, count = 50;
 
 	if (tc < 0) {
 		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
@@ -2806,10 +2799,16 @@ static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
 	}
 
 	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	if (!filter) {
-		err = -ENOMEM;
-		goto clearout;
+	if (!filter)
+		return -ENOMEM;
+
+	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0)
+			goto err;
+		udelay(1);
 	}
+
 	filter->cookie = cls_flower->cookie;
 
 	/* set the mask to all zeroes to begin with */
@@ -2834,7 +2833,7 @@ static int i40evf_configure_clsflower(struct i40evf_adapter *adapter,
 err:
 	if (err)
 		kfree(filter);
-clearout:
+
 	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
 	return err;
 }

From 32c23b47dbd9765c6ec2542400f41f0d47a7d2c1 Mon Sep 17 00:00:00 2001
From: Jan Sokolowski <jan.sokolowski@intel.com>
Date: Mon, 19 Mar 2018 09:28:03 -0700
Subject: [PATCH 1006/1678] i40e: Properly check allowed advertisement
 capabilities

The i40e_set_link_ksettings and i40e_get_link_ksettings use different
codepaths to check available and supported advertisement modes. This
creates scenarios where it's possible to set a mode that's not allowed,
resulting in a link down.

Fix setting advertisement in i40e_set_link_ksettings by calling
i40e_get_link_ksettings to check what modes are allowed.

Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 89807e32a898..0ce68e8a45cc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -870,16 +870,6 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	/* save autoneg out of ksettings */
 	autoneg = copy_ks.base.autoneg;
 
-	memset(&safe_ks, 0, sizeof(safe_ks));
-	/* Get link modes supported by hardware and check against modes
-	 * requested by the user.  Return an error if unsupported mode was set.
-	 */
-	i40e_phy_type_to_ethtool(pf, &safe_ks);
-	if (!bitmap_subset(copy_ks.link_modes.advertising,
-			   safe_ks.link_modes.supported,
-			   __ETHTOOL_LINK_MODE_MASK_NBITS))
-		return -EINVAL;
-
 	/* get our own copy of the bits to check against */
 	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
 	safe_ks.base.cmd = copy_ks.base.cmd;
@@ -887,6 +877,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 		copy_ks.base.link_mode_masks_nwords;
 	i40e_get_link_ksettings(netdev, &safe_ks);
 
+	/* Get link modes supported by hardware and check against modes
+	 * requested by the user.  Return an error if unsupported mode was set.
+	 */
+	if (!bitmap_subset(copy_ks.link_modes.advertising,
+			   safe_ks.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS))
+		return -EINVAL;
+
 	/* set autoneg back to what it currently is */
 	copy_ks.base.autoneg = safe_ks.base.autoneg;
 

From 88244a48d2ddebbe041ec4f07e40598cfd8bf06f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Ma=C5=82ek?= <patryk.malek@intel.com>
Date: Mon, 19 Mar 2018 09:28:03 -0700
Subject: [PATCH 1007/1678] i40e: Prevent setting link speed on KX_X722
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting link settings on backplane devices shouldn't be allowed.
This patch adds one more device id to the list which we check
that against.

Signed-off-by: Patryk Małek <patryk.malek@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0ce68e8a45cc..d3d299ab9de3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -859,7 +859,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	if (hw->device_id == I40E_DEV_ID_KX_B ||
 	    hw->device_id == I40E_DEV_ID_KX_C ||
 	    hw->device_id == I40E_DEV_ID_20G_KR2 ||
-	    hw->device_id == I40E_DEV_ID_20G_KR2_A) {
+	    hw->device_id == I40E_DEV_ID_20G_KR2_A ||
+	    hw->device_id == I40E_DEV_ID_KX_X722) {
 		netdev_info(netdev, "Changing settings is not supported on backplane.\n");
 		return -EOPNOTSUPP;
 	}

From ddbb8d5dd9b7f58293f196eab71449d0242c028d Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Mon, 19 Mar 2018 09:28:03 -0700
Subject: [PATCH 1008/1678] i40e: Close client on suspend and restore client
 MSIx on resume

During suspend client MSIx vectors are freed while they are still
in use causing a crash on entering S3.

Fix this calling client close before freeing up its MSIx vectors.
Also update the client MSIx vectors on resume before client
open is called.

Fixes commit b980c0634fe5 ("i40e: shutdown all IRQs and disable MSI-X
when suspended")

Reported-by: Stefan Assmann <sassmann@redhat.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
 drivers/net/ethernet/intel/i40e/i40e_client.c | 16 +++++++++++++---
 drivers/net/ethernet/intel/i40e/i40e_main.c   |  8 ++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 36d9401a6258..271ab1a861b7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1041,6 +1041,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
 void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
 void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
 void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+void i40e_client_update_msix_info(struct i40e_pf *pf);
 int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 0de9610c1d8d..704695a61645 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -287,6 +287,17 @@ out:
 	return capable;
 }
 
+void i40e_client_update_msix_info(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -328,9 +339,6 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 		return;
 	}
 
-	cdev->lan_info.msix_count = pf->num_iwarp_msix;
-	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
-
 	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
 			       struct netdev_hw_addr, list);
 	if (mac)
@@ -340,6 +348,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 
 	cdev->client = registered_client;
 	pf->cinst = cdev;
+
+	i40e_client_update_msix_info(pf);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b78c06a1f82c..4a4401c61089 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10594,6 +10594,9 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
 	if (err)
 		goto err_unwind;
 
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+		i40e_client_update_msix_info(pf);
+
 	return 0;
 
 err_unwind:
@@ -14344,6 +14347,11 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	del_timer_sync(&pf->service_timer);
 	cancel_work_sync(&pf->service_task);
 
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
 	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 

From 35bea90436246507ec9c5bfc0c34d61696a572c8 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 19 Mar 2018 09:28:04 -0700
Subject: [PATCH 1009/1678] i40e: add doxygen comment for new mode parameter

A recent patch updated the signature for i40e_aq_set_switch_config() to
add a new parameter 'mode'. It forgot to document the parameter in the
doxygen function header comment. Add the parameter to the function
description now.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 242c4c789e8d..df852c23f45c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -2415,6 +2415,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
  * i40e_aq_set_switch_config
  * @hw: pointer to the hardware structure
  * @flags: bit flag values to set
+ * @mode: cloud filter mode
  * @valid_flags: which bit flags to set
  * @mode: cloud filter mode
  * @cmd_details: pointer to command details structure or NULL

From 85925cd0b84eb16cf2dfd730758c3266a243569c Mon Sep 17 00:00:00 2001
From: Doug Dziggel <douglas.a.dziggel@intel.com>
Date: Mon, 19 Mar 2018 09:28:04 -0700
Subject: [PATCH 1010/1678] i40e: Fix incorrect return types

Fix return types from i40e_status to enum i40e_status_code.

Signed-off-by: Doug Dziggel <douglas.a.dziggel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c    | 4 ++--
 drivers/net/ethernet/intel/i40e/i40e_prototype.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index df852c23f45c..a40b8f37d48f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -5553,7 +5553,7 @@ i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count)
@@ -5647,7 +5647,7 @@ i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
  * function.
  *
  **/
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 83798b7841b9..eabb636f6a19 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -287,7 +287,7 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
 		struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw,
 				   struct i40e_asq_cmd_details *cmd_details);
-i40e_status
+enum i40e_status_code
 i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count);
@@ -299,7 +299,7 @@ enum i40e_status_code
 i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
 			  struct i40e_aqc_cloud_filters_element_data *filters,
 			  u8 filter_count);
-i40e_status
+enum i40e_status_code
 i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
 			     struct i40e_aqc_cloud_filters_element_bb *filters,
 			     u8 filter_count);

From 3f8c8437277c51fbd7af175240112f831c590845 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Ma=C5=82ek?= <patryk.malek@intel.com>
Date: Mon, 19 Mar 2018 09:28:04 -0700
Subject: [PATCH 1011/1678] i40e: Prevent setting link speed on
 I40E_DEV_ID_25G_B
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting link settings on backplane devices shouldn't be allowed.
This patch adds one more device id to the list which we check
that against.

Signed-off-by: Patryk Małek <patryk.malek@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index d3d299ab9de3..0c7e7de595d3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -860,6 +860,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
 	    hw->device_id == I40E_DEV_ID_KX_C ||
 	    hw->device_id == I40E_DEV_ID_20G_KR2 ||
 	    hw->device_id == I40E_DEV_ID_20G_KR2_A ||
+	    hw->device_id == I40E_DEV_ID_25G_B ||
 	    hw->device_id == I40E_DEV_ID_KX_X722) {
 		netdev_info(netdev, "Changing settings is not supported on backplane.\n");
 		return -EOPNOTSUPP;

From 6b9a9c26ef2f78911ee848446f845b77c45032f4 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 19 Mar 2018 09:28:04 -0700
Subject: [PATCH 1012/1678] i40evf: remove flags that are never used

These flags were defined, but there is no use within the driver code, so
we don't need to keep them.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40evf/i40evf.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index e46555ad7122..279dced87e47 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -280,13 +280,10 @@ struct i40evf_adapter {
 
 	u32 flags;
 #define I40EVF_FLAG_RX_CSUM_ENABLED		BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED		BIT(1)
-#define I40EVF_FLAG_MQ_CAPABLE			BIT(2)
 #define I40EVF_FLAG_PF_COMMS_FAILED		BIT(3)
 #define I40EVF_FLAG_RESET_PENDING		BIT(4)
 #define I40EVF_FLAG_RESET_NEEDED		BIT(5)
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
-#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT(7)
 #define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(8)
 #define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
 #define I40EVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)

From 12d80bca0bb7178bc85a32e19795ff249a5903ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Jab=C5=82o=C5=84ski?=
 <pawel.jablonski@intel.com>
Date: Mon, 19 Mar 2018 09:28:04 -0700
Subject: [PATCH 1013/1678] i40e: Fix the polling mechanism of
 GLGEN_RSTAT.DEVSTATE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the polling mechanism of GLGEN_RSTAT.DEVSTATE in the
PF Reset path when Global Reset is in progress. While the driver
is polling for the end of the PF Reset and the Global Reset is
triggered, abandon the PF Reset path and prepare for the
upcoming Global Reset.

Signed-off-by: Paweł Jabłoński <pawel.jablonski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_common.c | 35 +++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index a40b8f37d48f..4fa31d87d9d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1208,6 +1208,29 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	return media;
 }
 
+/**
+ * i40e_poll_globr - Poll for Global Reset completion
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+static i40e_status i40e_poll_globr(struct i40e_hw *hw,
+				   u32 retry_limit)
+{
+	u32 cnt, reg = 0;
+
+	for (cnt = 0; cnt < retry_limit; cnt++) {
+		reg = rd32(hw, I40E_GLGEN_RSTAT);
+		if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+			return 0;
+		msleep(100);
+	}
+
+	hw_dbg(hw, "Global reset failed.\n");
+	hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg);
+
+	return I40E_ERR_RESET_FAILED;
+}
+
 #define I40E_PF_RESET_WAIT_COUNT_A0	200
 #define I40E_PF_RESET_WAIT_COUNT	200
 /**
@@ -1284,14 +1307,14 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw)
 			if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
 				break;
 			reg2 = rd32(hw, I40E_GLGEN_RSTAT);
-			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
-				hw_dbg(hw, "Core reset upcoming. Skipping PF reset request.\n");
-				hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg2);
-				return I40E_ERR_NOT_READY;
-			}
+			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK)
+				break;
 			usleep_range(1000, 2000);
 		}
-		if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+		if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+			if (i40e_poll_globr(hw, grst_del))
+				return I40E_ERR_RESET_FAILED;
+		} else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
 			hw_dbg(hw, "PF reset polling failed to complete.\n");
 			return I40E_ERR_RESET_FAILED;
 		}

From 2c3682f0be97a5f57c6c8b40fa154dfc77efb461 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:56:49 -0700
Subject: [PATCH 1014/1678] sock: make static tls function alloc_sg generic
 sock helper

The TLS ULP module builds scatterlists from a sock using
page_frag_refill(). This is going to be useful for other ULPs
so move it into sock file for more general use.

In the process remove useless goto at end of while loop.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock.h |  4 +++
 net/core/sock.c    | 56 +++++++++++++++++++++++++++++++++++++
 net/tls/tls_sw.c   | 69 +++++-----------------------------------------
 3 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index b9624581d639..447150c51feb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2141,6 +2141,10 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		int *sg_num_elem, unsigned int *sg_size,
+		int first_coalesce);
+
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/net/core/sock.c b/net/core/sock.c
index 27f218bba43f..f68dff0d7bc4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2239,6 +2239,62 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
+int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
+		int *sg_num_elem, unsigned int *sg_size,
+		int first_coalesce)
+{
+	struct page_frag *pfrag;
+	unsigned int size = *sg_size;
+	int num_elem = *sg_num_elem, use = 0, rc = 0;
+	struct scatterlist *sge;
+	unsigned int orig_offset;
+
+	len -= size;
+	pfrag = sk_page_frag(sk);
+
+	while (len > 0) {
+		if (!sk_page_frag_refill(sk, pfrag)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		use = min_t(int, len, pfrag->size - pfrag->offset);
+
+		if (!sk_wmem_schedule(sk, use)) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		sk_mem_charge(sk, use);
+		size += use;
+		orig_offset = pfrag->offset;
+		pfrag->offset += use;
+
+		sge = sg + num_elem - 1;
+		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
+		    sg->offset + sg->length == orig_offset) {
+			sg->length += use;
+		} else {
+			sge++;
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			++num_elem;
+			if (num_elem == MAX_SKB_FRAGS) {
+				rc = -ENOSPC;
+				break;
+			}
+		}
+
+		len -= use;
+	}
+out:
+	*sg_size = size;
+	*sg_num_elem = num_elem;
+	return rc;
+}
+EXPORT_SYMBOL(sk_alloc_sg);
+
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f26376e954ae..0fc8a24c6473 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -87,71 +87,16 @@ static void trim_both_sgl(struct sock *sk, int target_size)
 		target_size);
 }
 
-static int alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size,
-		    int first_coalesce)
-{
-	struct page_frag *pfrag;
-	unsigned int size = *sg_size;
-	int num_elem = *sg_num_elem, use = 0, rc = 0;
-	struct scatterlist *sge;
-	unsigned int orig_offset;
-
-	len -= size;
-	pfrag = sk_page_frag(sk);
-
-	while (len > 0) {
-		if (!sk_page_frag_refill(sk, pfrag)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		use = min_t(int, len, pfrag->size - pfrag->offset);
-
-		if (!sk_wmem_schedule(sk, use)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		sk_mem_charge(sk, use);
-		size += use;
-		orig_offset = pfrag->offset;
-		pfrag->offset += use;
-
-		sge = sg + num_elem - 1;
-		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
-		    sg->offset + sg->length == orig_offset) {
-			sg->length += use;
-		} else {
-			sge++;
-			sg_unmark_end(sge);
-			sg_set_page(sge, pfrag->page, use, orig_offset);
-			get_page(pfrag->page);
-			++num_elem;
-			if (num_elem == MAX_SKB_FRAGS) {
-				rc = -ENOSPC;
-				break;
-			}
-		}
-
-		len -= use;
-	}
-	goto out;
-
-out:
-	*sg_size = size;
-	*sg_num_elem = num_elem;
-	return rc;
-}
-
 static int alloc_encrypted_sg(struct sock *sk, int len)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = alloc_sg(sk, len, ctx->sg_encrypted_data,
-		      &ctx->sg_encrypted_num_elem, &ctx->sg_encrypted_size, 0);
+	rc = sk_alloc_sg(sk, len,
+			 ctx->sg_encrypted_data,
+			 &ctx->sg_encrypted_num_elem,
+			 &ctx->sg_encrypted_size, 0);
 
 	return rc;
 }
@@ -162,9 +107,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = alloc_sg(sk, len, ctx->sg_plaintext_data,
-		      &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
-		      tls_ctx->pending_open_record_frags);
+	rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data,
+			 &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
+			 tls_ctx->pending_open_record_frags);
 
 	return rc;
 }

From ffa35660017161524b7db79b0ce293fa1af16c4d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:56:54 -0700
Subject: [PATCH 1015/1678] sockmap: convert refcnt to an atomic refcnt

The sockmap refcnt up until now has been wrapped in the
sk_callback_lock(). So its not actually needed any locking of its
own. The counter itself tracks the lifetime of the psock object.
Sockets in a sockmap have a lifetime that is independent of the
map they are part of. This is possible because a single socket may
be in multiple maps. When this happens we can only release the
psock data associated with the socket when the refcnt reaches
zero. There are three possible delete sock reference decrement
paths first through the normal sockmap process, the user deletes
the socket from the map. Second the map is removed and all sockets
in the map are removed, delete path is similar to case 1. The third
case is an asyncronous socket event such as a closing the socket. The
last case handles removing sockets that are no longer available.
For completeness, although inc does not pose any problems in this
patch series, the inc case only happens when a psock is added to a
map.

Next we plan to add another socket prog type to handle policy and
monitoring on the TX path. When we do this however we will need to
keep a reference count open across the sendmsg/sendpage call and
holding the sk_callback_lock() here (on every send) seems less than
ideal, also it may sleep in cases where we hit memory pressure.
Instead of dealing with these issues in some clever way simply make
the reference counting a refcnt_t type and do proper atomic ops.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index a927e89dad6e..051b224270e3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -62,8 +62,7 @@ struct smap_psock_map_entry {
 
 struct smap_psock {
 	struct rcu_head	rcu;
-	/* refcnt is used inside sk_callback_lock */
-	u32 refcnt;
+	refcount_t refcnt;
 
 	/* datapath variables */
 	struct sk_buff_head rxqueue;
@@ -373,15 +372,13 @@ static void smap_destroy_psock(struct rcu_head *rcu)
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 {
-	psock->refcnt--;
-	if (psock->refcnt)
-		return;
-
-	tcp_cleanup_ulp(sock);
-	smap_stop_sock(psock, sock);
-	clear_bit(SMAP_TX_RUNNING, &psock->state);
-	rcu_assign_sk_user_data(sock, NULL);
-	call_rcu_sched(&psock->rcu, smap_destroy_psock);
+	if (refcount_dec_and_test(&psock->refcnt)) {
+		tcp_cleanup_ulp(sock);
+		smap_stop_sock(psock, sock);
+		clear_bit(SMAP_TX_RUNNING, &psock->state);
+		rcu_assign_sk_user_data(sock, NULL);
+		call_rcu_sched(&psock->rcu, smap_destroy_psock);
+	}
 }
 
 static int smap_parse_func_strparser(struct strparser *strp,
@@ -511,7 +508,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 	INIT_WORK(&psock->tx_work, smap_tx_work);
 	INIT_WORK(&psock->gc_work, smap_gc_work);
 	INIT_LIST_HEAD(&psock->maps);
-	psock->refcnt = 1;
+	refcount_set(&psock->refcnt, 1);
 
 	rcu_assign_sk_user_data(sock, psock);
 	sock_hold(sock);
@@ -772,7 +769,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			err = -EBUSY;
 			goto out_progs;
 		}
-		psock->refcnt++;
+		refcount_inc(&psock->refcnt);
 	} else {
 		psock = smap_init_psock(sock, stab);
 		if (IS_ERR(psock)) {

From 312fc2b4c82e96a48cb2d0da2bd4816eb253c499 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:00 -0700
Subject: [PATCH 1016/1678] net: do_tcp_sendpages flag to avoid
 SKBTX_SHARED_FRAG

When calling do_tcp_sendpages() from in kernel and we know the data
has no references from user side we can omit SKBTX_SHARED_FRAG flag.
This patch adds an internal flag, NO_SKBTX_SHARED_FRAG that can be used
to omit setting SKBTX_SHARED_FRAG.

The flag is not exposed to userspace because the sendpage call from
the splice logic masks out all bits except MSG_MORE.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/socket.h | 1 +
 net/ipv4/tcp.c         | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 1ce1f768a58c..60e01482a9c4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
 
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fb350f740f69..f90ec24c2cc8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -994,7 +994,9 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
-		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+		if (!(flags & MSG_NO_SHARED_FRAGS))
+			skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;

From 8c05dbf04b2882c3c0bc43fe7668c720210877f3 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:05 -0700
Subject: [PATCH 1017/1678] net: generalize sk_alloc_sg to work with
 scatterlist rings

The current implementation of sk_alloc_sg expects scatterlist to always
start at entry 0 and complete at entry MAX_SKB_FRAGS.

Future patches will want to support starting at arbitrary offset into
scatterlist so add an additional sg_start parameters and then default
to the current values in TLS code paths.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock.h |  2 +-
 net/core/sock.c    | 27 ++++++++++++++++-----------
 net/tls/tls_sw.c   |  4 ++--
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 447150c51feb..b7c75e024e37 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2142,7 +2142,7 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
 int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int *sg_num_elem, unsigned int *sg_size,
+		int sg_start, int *sg_curr, unsigned int *sg_size,
 		int first_coalesce);
 
 /*
diff --git a/net/core/sock.c b/net/core/sock.c
index f68dff0d7bc4..4f92c2910200 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2240,19 +2240,20 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 EXPORT_SYMBOL(sk_page_frag_refill);
 
 int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int *sg_num_elem, unsigned int *sg_size,
+		int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
 		int first_coalesce)
 {
+	int sg_curr = *sg_curr_index, use = 0, rc = 0;
+	unsigned int size = *sg_curr_size;
 	struct page_frag *pfrag;
-	unsigned int size = *sg_size;
-	int num_elem = *sg_num_elem, use = 0, rc = 0;
 	struct scatterlist *sge;
-	unsigned int orig_offset;
 
 	len -= size;
 	pfrag = sk_page_frag(sk);
 
 	while (len > 0) {
+		unsigned int orig_offset;
+
 		if (!sk_page_frag_refill(sk, pfrag)) {
 			rc = -ENOMEM;
 			goto out;
@@ -2270,17 +2271,21 @@ int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
 		orig_offset = pfrag->offset;
 		pfrag->offset += use;
 
-		sge = sg + num_elem - 1;
-		if (num_elem > first_coalesce && sg_page(sg) == pfrag->page &&
+		sge = sg + sg_curr - 1;
+		if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
 		    sg->offset + sg->length == orig_offset) {
 			sg->length += use;
 		} else {
-			sge++;
+			sge = sg + sg_curr;
 			sg_unmark_end(sge);
 			sg_set_page(sge, pfrag->page, use, orig_offset);
 			get_page(pfrag->page);
-			++num_elem;
-			if (num_elem == MAX_SKB_FRAGS) {
+			sg_curr++;
+
+			if (sg_curr == MAX_SKB_FRAGS)
+				sg_curr = 0;
+
+			if (sg_curr == sg_start) {
 				rc = -ENOSPC;
 				break;
 			}
@@ -2289,8 +2294,8 @@ int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
 		len -= use;
 	}
 out:
-	*sg_size = size;
-	*sg_num_elem = num_elem;
+	*sg_curr_size = size;
+	*sg_curr_index = sg_curr;
 	return rc;
 }
 EXPORT_SYMBOL(sk_alloc_sg);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 0fc8a24c6473..057a558ed6d7 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -94,7 +94,7 @@ static int alloc_encrypted_sg(struct sock *sk, int len)
 	int rc = 0;
 
 	rc = sk_alloc_sg(sk, len,
-			 ctx->sg_encrypted_data,
+			 ctx->sg_encrypted_data, 0,
 			 &ctx->sg_encrypted_num_elem,
 			 &ctx->sg_encrypted_size, 0);
 
@@ -107,7 +107,7 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	int rc = 0;
 
-	rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data,
+	rc = sk_alloc_sg(sk, len, ctx->sg_plaintext_data, 0,
 			 &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
 			 tls_ctx->pending_open_record_frags);
 

From 4f738adba30a7cfc006f605707e7aee847ffefa0 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:10 -0700
Subject: [PATCH 1018/1678] bpf: create tcp_bpf_ulp allowing BPF to monitor
 socket TX/RX data

This implements a BPF ULP layer to allow policy enforcement and
monitoring at the socket layer. In order to support this a new
program type BPF_PROG_TYPE_SK_MSG is used to run the policy at
the sendmsg/sendpage hook. To attach the policy to sockets a
sockmap is used with a new program attach type BPF_SK_MSG_VERDICT.

Similar to previous sockmap usages when a sock is added to a
sockmap, via a map update, if the map contains a BPF_SK_MSG_VERDICT
program type attached then the BPF ULP layer is created on the
socket and the attached BPF_PROG_TYPE_SK_MSG program is run for
every msg in sendmsg case and page/offset in sendpage case.

BPF_PROG_TYPE_SK_MSG Semantics/API:

BPF_PROG_TYPE_SK_MSG supports only two return codes SK_PASS and
SK_DROP. Returning SK_DROP free's the copied data in the sendmsg
case and in the sendpage case leaves the data untouched. Both cases
return -EACESS to the user. Returning SK_PASS will allow the msg to
be sent.

In the sendmsg case data is copied into kernel space buffers before
running the BPF program. The kernel space buffers are stored in a
scatterlist object where each element is a kernel memory buffer.
Some effort is made to coalesce data from the sendmsg call here.
For example a sendmsg call with many one byte iov entries will
likely be pushed into a single entry. The BPF program is run with
data pointers (start/end) pointing to the first sg element.

In the sendpage case data is not copied. We opt not to copy the
data by default here, because the BPF infrastructure does not
know what bytes will be needed nor when they will be needed. So
copying all bytes may be wasteful. Because of this the initial
start/end data pointers are (0,0). Meaning no data can be read or
written. This avoids reading data that may be modified by the
user. A new helper is added later in this series if reading and
writing the data is needed. The helper call will do a copy by
default so that the page is exclusively owned by the BPF call.

The verdict from the BPF_PROG_TYPE_SK_MSG applies to the entire msg
in the sendmsg() case and the entire page/offset in the sendpage case.
This avoids ambiguity on how to handle mixed return codes in the
sendmsg case. Again a helper is added later in the series if
a verdict needs to apply to multiple system calls and/or only
a subpart of the currently being processed message.

The helper msg_redirect_map() can be used to select the socket to
send the data on. This is used similar to existing redirect use
cases. This allows policy to redirect msgs.

Pseudo code simple example:

The basic logic to attach a program to a socket is as follows,

  // load the programs
  bpf_prog_load(SOCKMAP_TCP_MSG_PROG, BPF_PROG_TYPE_SK_MSG,
		&obj, &msg_prog);

  // lookup the sockmap
  bpf_map_msg = bpf_object__find_map_by_name(obj, "my_sock_map");

  // get fd for sockmap
  map_fd_msg = bpf_map__fd(bpf_map_msg);

  // attach program to sockmap
  bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);

Adding sockets to the map is done in the normal way,

  // Add a socket 'fd' to sockmap at location 'i'
  bpf_map_update_elem(map_fd_msg, &i, fd, BPF_ANY);

After the above any socket attached to "my_sock_map", in this case
'fd', will run the BPF msg verdict program (msg_prog) on every
sendmsg and sendpage system call.

For a complete example see BPF selftests or sockmap samples.

Implementation notes:

It seemed the simplest, to me at least, to use a refcnt to ensure
psock is not lost across the sendmsg copy into the sg, the bpf program
running on the data in sg_data, and the final pass to the TCP stack.
Some performance testing may show a better method to do this and avoid
the refcnt cost, but for now use the simpler method.

Another item that will come after basic support is in place is
supporting MSG_MORE flag. At the moment we call sendpages even if
the MSG_MORE flag is set. An enhancement would be to collect the
pages into a larger scatterlist and pass down the stack. Notice that
bpf_tcp_sendmsg() could support this with some additional state saved
across sendmsg calls. I built the code to support this without having
to do refactoring work. Other features TBD include ZEROCOPY and the
TCP_RECV_QUEUE/TCP_NO_QUEUE support. This will follow initial series
shortly.

Future work could improve size limits on the scatterlist rings used
here. Currently, we use MAX_SKB_FRAGS simply because this was being
used already in the TLS case. Future work could extend the kernel sk
APIs to tune this depending on workload. This is a trade-off
between memory usage and throughput performance.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h       |   1 +
 include/linux/bpf_types.h |   1 +
 include/linux/filter.h    |  17 +
 include/uapi/linux/bpf.h  |  22 +-
 kernel/bpf/sockmap.c      | 712 +++++++++++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c      |  14 +-
 kernel/bpf/verifier.c     |   5 +-
 net/core/filter.c         | 106 ++++++
 8 files changed, 857 insertions(+), 21 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 66df387106de..819229c80eca 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -21,6 +21,7 @@ struct bpf_verifier_env;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
+struct sock;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 19b8349a3809..5e2e8a49fb21 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -13,6 +13,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fdb691b520c0..109d05ccea9a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -507,6 +507,22 @@ struct xdp_buff {
 	struct xdp_rxq_info *rxq;
 };
 
+struct sk_msg_buff {
+	void *data;
+	void *data_end;
+	__u32 apply_bytes;
+	__u32 cork_bytes;
+	int sg_copybreak;
+	int sg_start;
+	int sg_curr;
+	int sg_end;
+	struct scatterlist sg_data[MAX_SKB_FRAGS];
+	bool sg_copy[MAX_SKB_FRAGS];
+	__u32 key;
+	__u32 flags;
+	struct bpf_map *map;
+};
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
@@ -771,6 +787,7 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 void bpf_warn_invalid_xdp_action(u32 act);
 
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
+struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1e15d1724d89..ef529539abde 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -718,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -779,7 +790,8 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -942,6 +954,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 051b224270e3..69c5bccabd22 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -38,6 +38,7 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
+#include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
 
@@ -47,6 +48,7 @@
 struct bpf_stab {
 	struct bpf_map map;
 	struct sock **sock_map;
+	struct bpf_prog *bpf_tx_msg;
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 };
@@ -73,7 +75,16 @@ struct smap_psock {
 	int save_off;
 	struct sk_buff *save_skb;
 
+	/* datapath variables for tx_msg ULP */
+	struct sock *sk_redir;
+	int apply_bytes;
+	int cork_bytes;
+	int sg_size;
+	int eval;
+	struct sk_msg_buff *cork;
+
 	struct strparser strp;
+	struct bpf_prog *bpf_tx_msg;
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 	struct list_head maps;
@@ -91,6 +102,11 @@ struct smap_psock {
 	void (*save_write_space)(struct sock *sk);
 };
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+			    int offset, size_t size, int flags);
+
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
 	return rcu_dereference_sk_user_data(sk);
@@ -115,27 +131,41 @@ static int bpf_tcp_init(struct sock *sk)
 
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
+
+	if (psock->bpf_tx_msg) {
+		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+	}
+
 	sk->sk_prot = &tcp_bpf_proto;
 	rcu_read_unlock();
 	return 0;
 }
 
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+
 static void bpf_tcp_release(struct sock *sk)
 {
 	struct smap_psock *psock;
 
 	rcu_read_lock();
 	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
 
-	if (likely(psock)) {
-		sk->sk_prot = psock->sk_proto;
-		psock->sk_proto = NULL;
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
 	}
+
+	sk->sk_prot = psock->sk_proto;
+	psock->sk_proto = NULL;
+out:
 	rcu_read_unlock();
 }
 
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
@@ -174,6 +204,7 @@ enum __sk_action {
 	__SK_DROP = 0,
 	__SK_PASS,
 	__SK_REDIRECT,
+	__SK_NONE,
 };
 
 static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
@@ -185,10 +216,621 @@ static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
 	.release	= bpf_tcp_release,
 };
 
+static int memcopy_from_iter(struct sock *sk,
+			     struct sk_msg_buff *md,
+			     struct iov_iter *from, int bytes)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_curr, rc = -ENOSPC;
+
+	do {
+		int copy;
+		char *to;
+
+		if (md->sg_copybreak >= sg[i].length) {
+			md->sg_copybreak = 0;
+
+			if (++i == MAX_SKB_FRAGS)
+				i = 0;
+
+			if (i == md->sg_end)
+				break;
+		}
+
+		copy = sg[i].length - md->sg_copybreak;
+		to = sg_virt(&sg[i]) + md->sg_copybreak;
+		md->sg_copybreak += copy;
+
+		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+			rc = copy_from_iter_nocache(to, copy, from);
+		else
+			rc = copy_from_iter(to, copy, from);
+
+		if (rc != copy) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		bytes -= copy;
+		if (!bytes)
+			break;
+
+		md->sg_copybreak = 0;
+		if (++i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != md->sg_end);
+out:
+	md->sg_curr = i;
+	return rc;
+}
+
+static int bpf_tcp_push(struct sock *sk, int apply_bytes,
+			struct sk_msg_buff *md,
+			int flags, bool uncharge)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sg;
+	int offset, ret = 0;
+	struct page *p;
+	size_t size;
+
+	while (1) {
+		sg = md->sg_data + md->sg_start;
+		size = (apply && apply_bytes < sg->length) ?
+			apply_bytes : sg->length;
+		offset = sg->offset;
+
+		tcp_rate_check_app_limited(sk);
+		p = sg_page(sg);
+retry:
+		ret = do_tcp_sendpages(sk, p, offset, size, flags);
+		if (ret != size) {
+			if (ret > 0) {
+				if (apply)
+					apply_bytes -= ret;
+				size -= ret;
+				offset += ret;
+				if (uncharge)
+					sk_mem_uncharge(sk, ret);
+				goto retry;
+			}
+
+			sg->length = size;
+			sg->offset = offset;
+			return ret;
+		}
+
+		if (apply)
+			apply_bytes -= ret;
+		sg->offset += ret;
+		sg->length -= ret;
+		if (uncharge)
+			sk_mem_uncharge(sk, ret);
+
+		if (!sg->length) {
+			put_page(p);
+			md->sg_start++;
+			if (md->sg_start == MAX_SKB_FRAGS)
+				md->sg_start = 0;
+			memset(sg, 0, sizeof(*sg));
+
+			if (md->sg_start == md->sg_end)
+				break;
+		}
+
+		if (apply && !apply_bytes)
+			break;
+	}
+	return 0;
+}
+
+static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data + md->sg_start;
+
+	if (md->sg_copy[md->sg_start]) {
+		md->data = md->data_end = 0;
+	} else {
+		md->data = sg_virt(sg);
+		md->data_end = md->data + sg->length;
+	}
+}
+
+static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_start;
+
+	do {
+		int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
+
+		sk_mem_uncharge(sk, uncharge);
+		bytes -= uncharge;
+		if (!bytes)
+			break;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != md->sg_end);
+}
+
+static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = md->sg_start, free;
+
+	while (bytes && sg[i].length) {
+		free = sg[i].length;
+		if (bytes < free) {
+			sg[i].length -= bytes;
+			sg[i].offset += bytes;
+			sk_mem_uncharge(sk, bytes);
+			break;
+		}
+
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		bytes -= sg[i].length;
+		sg[i].length = 0;
+		sg[i].page_link = 0;
+		sg[i].offset = 0;
+		i++;
+
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+}
+
+static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+{
+	struct scatterlist *sg = md->sg_data;
+	int i = start, free = 0;
+
+	while (sg[i].length) {
+		free += sg[i].length;
+		sk_mem_uncharge(sk, sg[i].length);
+		put_page(sg_page(&sg[i]));
+		sg[i].length = 0;
+		sg[i].page_link = 0;
+		sg[i].offset = 0;
+		i++;
+
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+
+	return free;
+}
+
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+	int free = free_sg(sk, md->sg_start, md);
+
+	md->sg_start = md->sg_end;
+	return free;
+}
+
+static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+	return free_sg(sk, md->sg_curr, md);
+}
+
+static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
+{
+	return ((_rc == SK_PASS) ?
+	       (md->map ? __SK_REDIRECT : __SK_PASS) :
+	       __SK_DROP);
+}
+
+static unsigned int smap_do_tx_msg(struct sock *sk,
+				   struct smap_psock *psock,
+				   struct sk_msg_buff *md)
+{
+	struct bpf_prog *prog;
+	unsigned int rc, _rc;
+
+	preempt_disable();
+	rcu_read_lock();
+
+	/* If the policy was removed mid-send then default to 'accept' */
+	prog = READ_ONCE(psock->bpf_tx_msg);
+	if (unlikely(!prog)) {
+		_rc = SK_PASS;
+		goto verdict;
+	}
+
+	bpf_compute_data_pointers_sg(md);
+	rc = (*prog->bpf_func)(md, prog->insnsi);
+	psock->apply_bytes = md->apply_bytes;
+
+	/* Moving return codes from UAPI namespace into internal namespace */
+	_rc = bpf_map_msg_verdict(rc, md);
+
+	/* The psock has a refcount on the sock but not on the map and because
+	 * we need to drop rcu read lock here its possible the map could be
+	 * removed between here and when we need it to execute the sock
+	 * redirect. So do the map lookup now for future use.
+	 */
+	if (_rc == __SK_REDIRECT) {
+		if (psock->sk_redir)
+			sock_put(psock->sk_redir);
+		psock->sk_redir = do_msg_redirect_map(md);
+		if (!psock->sk_redir) {
+			_rc = __SK_DROP;
+			goto verdict;
+		}
+		sock_hold(psock->sk_redir);
+	}
+verdict:
+	rcu_read_unlock();
+	preempt_enable();
+
+	return _rc;
+}
+
+static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
+				       struct sk_msg_buff *md,
+				       int flags)
+{
+	struct smap_psock *psock;
+	struct scatterlist *sg;
+	int i, err, free = 0;
+
+	sg = md->sg_data;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out_rcu;
+
+	if (!refcount_inc_not_zero(&psock->refcnt))
+		goto out_rcu;
+
+	rcu_read_unlock();
+	lock_sock(sk);
+	err = bpf_tcp_push(sk, send, md, flags, false);
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	if (unlikely(err))
+		goto out;
+	return 0;
+out_rcu:
+	rcu_read_unlock();
+out:
+	i = md->sg_start;
+	while (sg[i].length) {
+		free += sg[i].length;
+		put_page(sg_page(&sg[i]));
+		sg[i].length = 0;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	}
+	return free;
+}
+
+static inline void bpf_md_init(struct smap_psock *psock)
+{
+	if (!psock->apply_bytes) {
+		psock->eval =  __SK_NONE;
+		if (psock->sk_redir) {
+			sock_put(psock->sk_redir);
+			psock->sk_redir = NULL;
+		}
+	}
+}
+
+static void apply_bytes_dec(struct smap_psock *psock, int i)
+{
+	if (psock->apply_bytes) {
+		if (psock->apply_bytes < i)
+			psock->apply_bytes = 0;
+		else
+			psock->apply_bytes -= i;
+	}
+}
+
+static int bpf_exec_tx_verdict(struct smap_psock *psock,
+			       struct sk_msg_buff *m,
+			       struct sock *sk,
+			       int *copied, int flags)
+{
+	bool cork = false, enospc = (m->sg_start == m->sg_end);
+	struct sock *redir;
+	int err = 0;
+	int send;
+
+more_data:
+	if (psock->eval == __SK_NONE)
+		psock->eval = smap_do_tx_msg(sk, psock, m);
+
+	if (m->cork_bytes &&
+	    m->cork_bytes > psock->sg_size && !enospc) {
+		psock->cork_bytes = m->cork_bytes - psock->sg_size;
+		if (!psock->cork) {
+			psock->cork = kcalloc(1,
+					sizeof(struct sk_msg_buff),
+					GFP_ATOMIC | __GFP_NOWARN);
+
+			if (!psock->cork) {
+				err = -ENOMEM;
+				goto out_err;
+			}
+		}
+		memcpy(psock->cork, m, sizeof(*m));
+		goto out_err;
+	}
+
+	send = psock->sg_size;
+	if (psock->apply_bytes && psock->apply_bytes < send)
+		send = psock->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		err = bpf_tcp_push(sk, send, m, flags, true);
+		if (unlikely(err)) {
+			*copied -= free_start_sg(sk, m);
+			break;
+		}
+
+		apply_bytes_dec(psock, send);
+		psock->sg_size -= send;
+		break;
+	case __SK_REDIRECT:
+		redir = psock->sk_redir;
+		apply_bytes_dec(psock, send);
+
+		if (psock->cork) {
+			cork = true;
+			psock->cork = NULL;
+		}
+
+		return_mem_sg(sk, send, m);
+		release_sock(sk);
+
+		err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
+		lock_sock(sk);
+
+		if (cork) {
+			free_start_sg(sk, m);
+			kfree(m);
+			m = NULL;
+		}
+		if (unlikely(err))
+			*copied -= err;
+		else
+			psock->sg_size -= send;
+		break;
+	case __SK_DROP:
+	default:
+		free_bytes_sg(sk, send, m);
+		apply_bytes_dec(psock, send);
+		*copied -= send;
+		psock->sg_size -= send;
+		err = -EACCES;
+		break;
+	}
+
+	if (likely(!err)) {
+		bpf_md_init(psock);
+		if (m &&
+		    m->sg_data[m->sg_start].page_link &&
+		    m->sg_data[m->sg_start].length)
+			goto more_data;
+	}
+
+out_err:
+	return err;
+}
+
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
+	struct sk_msg_buff md = {0};
+	unsigned int sg_copy = 0;
+	struct smap_psock *psock;
+	int copied = 0, err = 0;
+	struct scatterlist *sg;
+	long timeo;
+
+	/* Its possible a sock event or user removed the psock _but_ the ops
+	 * have not been reprogrammed yet so we get here. In this case fallback
+	 * to tcp_sendmsg. Note this only works because we _only_ ever allow
+	 * a single ULP there is no hierarchy here.
+	 */
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return tcp_sendmsg(sk, msg, size);
+	}
+
+	/* Increment the psock refcnt to ensure its not released while sending a
+	 * message. Required because sk lookup and bpf programs are used in
+	 * separate rcu critical sections. Its OK if we lose the map entry
+	 * but we can't lose the sock reference.
+	 */
+	if (!refcount_inc_not_zero(&psock->refcnt)) {
+		rcu_read_unlock();
+		return tcp_sendmsg(sk, msg, size);
+	}
+
+	sg = md.sg_data;
+	sg_init_table(sg, MAX_SKB_FRAGS);
+	rcu_read_unlock();
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	while (msg_data_left(msg)) {
+		struct sk_msg_buff *m;
+		bool enospc = false;
+		int copy;
+
+		if (sk->sk_err) {
+			err = sk->sk_err;
+			goto out_err;
+		}
+
+		copy = msg_data_left(msg);
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+
+		m = psock->cork_bytes ? psock->cork : &md;
+		m->sg_curr = m->sg_copybreak ? m->sg_curr : m->sg_end;
+		err = sk_alloc_sg(sk, copy, m->sg_data,
+				  m->sg_start, &m->sg_end, &sg_copy,
+				  m->sg_end - 1);
+		if (err) {
+			if (err != -ENOSPC)
+				goto wait_for_memory;
+			enospc = true;
+			copy = sg_copy;
+		}
+
+		err = memcopy_from_iter(sk, m, &msg->msg_iter, copy);
+		if (err < 0) {
+			free_curr_sg(sk, m);
+			goto out_err;
+		}
+
+		psock->sg_size += copy;
+		copied += copy;
+		sg_copy = 0;
+
+		/* When bytes are being corked skip running BPF program and
+		 * applying verdict unless there is no more buffer space. In
+		 * the ENOSPC case simply run BPF prorgram with currently
+		 * accumulated data. We don't have much choice at this point
+		 * we could try extending the page frags or chaining complex
+		 * frags but even in these cases _eventually_ we will hit an
+		 * OOM scenario. More complex recovery schemes may be
+		 * implemented in the future, but BPF programs must handle
+		 * the case where apply_cork requests are not honored. The
+		 * canonical method to verify this is to check data length.
+		 */
+		if (psock->cork_bytes) {
+			if (copy > psock->cork_bytes)
+				psock->cork_bytes = 0;
+			else
+				psock->cork_bytes -= copy;
+
+			if (psock->cork_bytes && !enospc)
+				goto out_cork;
+
+			/* All cork bytes accounted for re-run filter */
+			psock->eval = __SK_NONE;
+			psock->cork_bytes = 0;
+		}
+
+		err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+		if (unlikely(err < 0))
+			goto out_err;
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto out_err;
+	}
+out_err:
+	if (err < 0)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+out_cork:
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied ? copied : err;
+}
+
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+			    int offset, size_t size, int flags)
+{
+	struct sk_msg_buff md = {0}, *m = NULL;
+	int err = 0, copied = 0;
+	struct smap_psock *psock;
+	struct scatterlist *sg;
+	bool enospc = false;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto accept;
+
+	if (!refcount_inc_not_zero(&psock->refcnt))
+		goto accept;
+	rcu_read_unlock();
+
+	lock_sock(sk);
+
+	if (psock->cork_bytes)
+		m = psock->cork;
+	else
+		m = &md;
+
+	/* Catch case where ring is full and sendpage is stalled. */
+	if (unlikely(m->sg_end == m->sg_start &&
+	    m->sg_data[m->sg_end].length))
+		goto out_err;
+
+	psock->sg_size += size;
+	sg = &m->sg_data[m->sg_end];
+	sg_set_page(sg, page, size, offset);
+	get_page(page);
+	m->sg_copy[m->sg_end] = true;
+	sk_mem_charge(sk, size);
+	m->sg_end++;
+	copied = size;
+
+	if (m->sg_end == MAX_SKB_FRAGS)
+		m->sg_end = 0;
+
+	if (m->sg_end == m->sg_start)
+		enospc = true;
+
+	if (psock->cork_bytes) {
+		if (size > psock->cork_bytes)
+			psock->cork_bytes = 0;
+		else
+			psock->cork_bytes -= size;
+
+		if (psock->cork_bytes && !enospc)
+			goto out_err;
+
+		/* All cork bytes accounted for re-run filter */
+		psock->eval = __SK_NONE;
+		psock->cork_bytes = 0;
+	}
+
+	err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
+out_err:
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied ? copied : err;
+accept:
+	rcu_read_unlock();
+	return tcp_sendpage(sk, page, offset, size, flags);
+}
+
+static void bpf_tcp_msg_add(struct smap_psock *psock,
+			    struct sock *sk,
+			    struct bpf_prog *tx_msg)
+{
+	struct bpf_prog *orig_tx_msg;
+
+	orig_tx_msg = xchg(&psock->bpf_tx_msg, tx_msg);
+	if (orig_tx_msg)
+		bpf_prog_put(orig_tx_msg);
+}
+
 static int bpf_tcp_ulp_register(void)
 {
 	tcp_bpf_proto = tcp_prot;
 	tcp_bpf_proto.close = bpf_tcp_close;
+	/* Once BPF TX ULP is registered it is never unregistered. It
+	 * will be in the ULP list for the lifetime of the system. Doing
+	 * duplicate registers is not a problem.
+	 */
 	return tcp_register_ulp(&bpf_tcp_ulp_ops);
 }
 
@@ -412,7 +1054,6 @@ static int smap_parse_func_strparser(struct strparser *strp,
 	return rc;
 }
 
-
 static int smap_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -482,12 +1123,22 @@ static void smap_gc_work(struct work_struct *w)
 		bpf_prog_put(psock->bpf_parse);
 	if (psock->bpf_verdict)
 		bpf_prog_put(psock->bpf_verdict);
+	if (psock->bpf_tx_msg)
+		bpf_prog_put(psock->bpf_tx_msg);
+
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+	}
 
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		list_del(&e->list);
 		kfree(e);
 	}
 
+	if (psock->sk_redir)
+		sock_put(psock->sk_redir);
+
 	sock_put(psock->sock);
 	kfree(psock);
 }
@@ -503,6 +1154,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 	if (!psock)
 		return ERR_PTR(-ENOMEM);
 
+	psock->eval =  __SK_NONE;
 	psock->sock = sock;
 	skb_queue_head_init(&psock->rxqueue);
 	INIT_WORK(&psock->tx_work, smap_tx_work);
@@ -711,10 +1363,11 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 {
 	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
 	struct smap_psock_map_entry *e = NULL;
-	struct bpf_prog *verdict, *parse;
+	struct bpf_prog *verdict, *parse, *tx_msg;
 	struct sock *osock, *sock;
 	struct smap_psock *psock;
 	u32 i = *(u32 *)key;
+	bool new = false;
 	int err;
 
 	if (unlikely(flags > BPF_EXIST))
@@ -737,6 +1390,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	 */
 	verdict = READ_ONCE(stab->bpf_verdict);
 	parse = READ_ONCE(stab->bpf_parse);
+	tx_msg = READ_ONCE(stab->bpf_tx_msg);
 
 	if (parse && verdict) {
 		/* bpf prog refcnt may be zero if a concurrent attach operation
@@ -755,6 +1409,17 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		}
 	}
 
+	if (tx_msg) {
+		tx_msg = bpf_prog_inc_not_zero(stab->bpf_tx_msg);
+		if (IS_ERR(tx_msg)) {
+			if (verdict)
+				bpf_prog_put(verdict);
+			if (parse)
+				bpf_prog_put(parse);
+			return PTR_ERR(tx_msg);
+		}
+	}
+
 	write_lock_bh(&sock->sk_callback_lock);
 	psock = smap_psock_sk(sock);
 
@@ -769,7 +1434,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			err = -EBUSY;
 			goto out_progs;
 		}
-		refcount_inc(&psock->refcnt);
+		if (READ_ONCE(psock->bpf_tx_msg) && tx_msg) {
+			err = -EBUSY;
+			goto out_progs;
+		}
+		if (!refcount_inc_not_zero(&psock->refcnt)) {
+			err = -EAGAIN;
+			goto out_progs;
+		}
 	} else {
 		psock = smap_init_psock(sock, stab);
 		if (IS_ERR(psock)) {
@@ -777,11 +1449,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			goto out_progs;
 		}
 
-		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
-		if (err)
-			goto out_progs;
-
 		set_bit(SMAP_TX_RUNNING, &psock->state);
+		new = true;
 	}
 
 	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
@@ -794,6 +1463,14 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	/* 3. At this point we have a reference to a valid psock that is
 	 * running. Attach any BPF programs needed.
 	 */
+	if (tx_msg)
+		bpf_tcp_msg_add(psock, sock, tx_msg);
+	if (new) {
+		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
+		if (err)
+			goto out_free;
+	}
+
 	if (parse && verdict && !psock->strp_enabled) {
 		err = smap_init_sock(psock, sock);
 		if (err)
@@ -815,8 +1492,6 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		struct smap_psock *opsock = smap_psock_sk(osock);
 
 		write_lock_bh(&osock->sk_callback_lock);
-		if (osock != sock && parse)
-			smap_stop_sock(opsock, osock);
 		smap_list_remove(opsock, &stab->sock_map[i]);
 		smap_release_sock(opsock, osock);
 		write_unlock_bh(&osock->sk_callback_lock);
@@ -829,6 +1504,8 @@ out_progs:
 		bpf_prog_put(verdict);
 	if (parse)
 		bpf_prog_put(parse);
+	if (tx_msg)
+		bpf_prog_put(tx_msg);
 	write_unlock_bh(&sock->sk_callback_lock);
 	kfree(e);
 	return err;
@@ -843,6 +1520,9 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 		return -EINVAL;
 
 	switch (type) {
+	case BPF_SK_MSG_VERDICT:
+		orig = xchg(&stab->bpf_tx_msg, prog);
+		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 		orig = xchg(&stab->bpf_parse, prog);
 		break;
@@ -904,6 +1584,10 @@ static void sock_map_release(struct bpf_map *map, struct file *map_file)
 	orig = xchg(&stab->bpf_verdict, NULL);
 	if (orig)
 		bpf_prog_put(orig);
+
+	orig = xchg(&stab->bpf_tx_msg, NULL);
+	if (orig)
+		bpf_prog_put(orig);
 }
 
 const struct bpf_map_ops sock_map_ops = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e24aa3241387..3aeb4ea2a93a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1315,7 +1315,8 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
+static int sockmap_get_from_fd(const union bpf_attr *attr,
+			       int type, bool attach)
 {
 	struct bpf_prog *prog = NULL;
 	int ufd = attr->target_fd;
@@ -1329,8 +1330,7 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
 		return PTR_ERR(map);
 
 	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd,
-					 BPF_PROG_TYPE_SK_SKB);
+		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
 		if (IS_ERR(prog)) {
 			fdput(f);
 			return PTR_ERR(prog);
@@ -1382,9 +1382,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_SK_MSG_VERDICT:
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, true);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
 	default:
 		return -EINVAL;
 	}
@@ -1437,9 +1439,11 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_SK_MSG_VERDICT:
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
 	default:
 		return -EINVAL;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index eb79a34359c0..e9f7c20691c1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1248,6 +1248,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_XDP:
 	case BPF_PROG_TYPE_LWT_XMIT:
 	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
 		if (meta)
 			return meta->pkt_access;
 
@@ -2071,7 +2072,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_SOCKMAP:
 		if (func_id != BPF_FUNC_sk_redirect_map &&
 		    func_id != BPF_FUNC_sock_map_update &&
-		    func_id != BPF_FUNC_map_delete_elem)
+		    func_id != BPF_FUNC_map_delete_elem &&
+		    func_id != BPF_FUNC_msg_redirect_map)
 			goto error;
 		break;
 	default:
@@ -2109,6 +2111,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_FUNC_sk_redirect_map:
+	case BPF_FUNC_msg_redirect_map:
 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
 			goto error;
 		break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 33edfa8372fd..2b6c47597eab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1890,6 +1890,44 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
 	.arg4_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	/* If user passes invalid input drop the packet. */
+	if (unlikely(flags))
+		return SK_DROP;
+
+	msg->key = key;
+	msg->flags = flags;
+	msg->map = map;
+
+	return SK_PASS;
+}
+
+struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
+{
+	struct sock *sk = NULL;
+
+	if (msg->map) {
+		sk = __sock_map_lookup_elem(msg->map, msg->key);
+
+		msg->key = 0;
+		msg->map = NULL;
+	}
+
+	return sk;
+}
+
+static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+	.func           = bpf_msg_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -3591,6 +3629,16 @@ static const struct bpf_func_proto *
 	}
 }
 
+static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_msg_redirect_map:
+		return &bpf_msg_redirect_map_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
 static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -3980,6 +4028,32 @@ static bool sk_skb_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, info);
 }
 
+static bool sk_msg_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE)
+		return false;
+
+	switch (off) {
+	case offsetof(struct sk_msg_md, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct sk_msg_md, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	if (off < 0 || off >= sizeof(struct sk_msg_md))
+		return false;
+	if (off % size != 0)
+		return false;
+	if (size != sizeof(__u64))
+		return false;
+
+	return true;
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -4778,6 +4852,29 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+				     const struct bpf_insn *si,
+				     struct bpf_insn *insn_buf,
+				     struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct sk_msg_md, data):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_buff, data));
+		break;
+	case offsetof(struct sk_msg_md, data_end):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg_buff, data_end));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 const struct bpf_verifier_ops sk_filter_verifier_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
@@ -4868,6 +4965,15 @@ const struct bpf_verifier_ops sk_skb_verifier_ops = {
 const struct bpf_prog_ops sk_skb_prog_ops = {
 };
 
+const struct bpf_verifier_ops sk_msg_verifier_ops = {
+	.get_func_proto		= sk_msg_func_proto,
+	.is_valid_access	= sk_msg_is_valid_access,
+	.convert_ctx_access	= sk_msg_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_msg_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;

From 2a100317c9ebc204a166f16294884fbf9da074ce Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:15 -0700
Subject: [PATCH 1019/1678] bpf: sockmap, add bpf_msg_apply_bytes() helper

A single sendmsg or sendfile system call can contain multiple logical
messages that a BPF program may want to read and apply a verdict. But,
without an apply_bytes helper any verdict on the data applies to all
bytes in the sendmsg/sendfile. Alternatively, a BPF program may only
care to read the first N bytes of a msg. If the payload is large say
MB or even GB setting up and calling the BPF program repeatedly for
all bytes, even though the verdict is already known, creates
unnecessary overhead.

To allow BPF programs to control how many bytes a given verdict
applies to we implement a bpf_msg_apply_bytes() helper. When called
from within a BPF program this sets a counter, internal to the
BPF infrastructure, that applies the last verdict to the next N
bytes. If the N is smaller than the current data being processed
from a sendmsg/sendfile call, the first N bytes will be sent and
the BPF program will be re-run with start_data pointing to the N+1
byte. If N is larger than the current data being processed the
BPF verdict will be applied to multiple sendmsg/sendfile calls
until N bytes are consumed.

Note1 if a socket closes with apply_bytes counter non-zero this
is not a problem because data is not being buffered for N bytes
and is sent as its received.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h |  3 ++-
 net/core/filter.c        | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ef529539abde..a557a2a5d72d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -791,7 +791,8 @@ union bpf_attr {
 	FN(getsockopt),			\
 	FN(override_return),		\
 	FN(sock_ops_cb_flags_set),	\
-	FN(msg_redirect_map),
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 2b6c47597eab..17d6775f5431 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1928,6 +1928,20 @@ static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
 	.arg4_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->apply_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
+	.func           = bpf_msg_apply_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -3634,6 +3648,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
 	switch (func_id) {
 	case BPF_FUNC_msg_redirect_map:
 		return &bpf_msg_redirect_map_proto;
+	case BPF_FUNC_msg_apply_bytes:
+		return &bpf_msg_apply_bytes_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}

From 91843d540a139eb8070bcff8aa10089164436deb Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:20 -0700
Subject: [PATCH 1020/1678] bpf: sockmap, add msg_cork_bytes() helper

In the case where we need a specific number of bytes before a
verdict can be assigned, even if the data spans multiple sendmsg
or sendfile calls. The BPF program may use msg_cork_bytes().

The extreme case is a user can call sendmsg repeatedly with
1-byte msg segments. Obviously, this is bad for performance but
is still valid. If the BPF program needs N bytes to validate
a header it can use msg_cork_bytes to specify N bytes and the
BPF program will not be called again until N bytes have been
accumulated. The infrastructure will attempt to coalesce data
if possible so in many cases (most my use cases at least) the
data will be in a single scatterlist element with data pointers
pointing to start/end of the element. However, this is dependent
on available memory so is not guaranteed. So BPF programs must
validate data pointer ranges, but this is the case anyways to
convince the verifier the accesses are valid.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h |  3 ++-
 net/core/filter.c        | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a557a2a5d72d..1765cfb16c99 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -792,7 +792,8 @@ union bpf_attr {
 	FN(override_return),		\
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 17d6775f5431..0c9daf6ee555 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1942,6 +1942,20 @@ static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+{
+	msg->cork_bytes = bytes;
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
+	.func           = bpf_msg_cork_bytes,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -3650,6 +3664,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
 		return &bpf_msg_redirect_map_proto;
 	case BPF_FUNC_msg_apply_bytes:
 		return &bpf_msg_apply_bytes_proto;
+	case BPF_FUNC_msg_cork_bytes:
+		return &bpf_msg_cork_bytes_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}

From 015632bb30daaaee64e1bcac07570860e0bf3092 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:25 -0700
Subject: [PATCH 1021/1678] bpf: sk_msg program helper bpf_sk_msg_pull_data

Currently, if a bpf sk msg program is run the program
can only parse data that the (start,end) pointers already
consumed. For sendmsg hooks this is likely the first
scatterlist element. For sendpage this will be the range
(0,0) because the data is shared with userspace and by
default we want to avoid allowing userspace to modify
data while (or after) BPF verdict is being decided.

To support pulling in additional bytes for parsing use
a new helper bpf_sk_msg_pull(start, end, flags) which
works similar to cls tc logic. This helper will attempt
to point the data start pointer at 'start' bytes offest
into msg and data end pointer at 'end' bytes offset into
message.

After basic sanity checks to ensure 'start' <= 'end' and
'end' <= msg_length there are a few cases we need to
handle.

First the sendmsg hook has already copied the data from
userspace and has exclusive access to it. Therefor, it
is not necessesary to copy the data. However, it may
be required. After finding the scatterlist element with
'start' offset byte in it there are two cases. One the
range (start,end) is entirely contained in the sg element
and is already linear. All that is needed is to update the
data pointers, no allocate/copy is needed. The other case
is (start, end) crosses sg element boundaries. In this
case we allocate a block of size 'end - start' and copy
the data to linearize it.

Next sendpage hook has not copied any data in initial
state so that data pointers are (0,0). In this case we
handle it similar to the above sendmsg case except the
allocation/copy must always happen. Then when sending
the data we have possibly three memory regions that
need to be sent, (0, start - 1), (start, end), and
(end + 1, msg_length). This is required to ensure any
writes by the BPF program are correctly transmitted.

Lastly this operation will invalidate any previous
data checks so BPF programs will have to revalidate
pointers after making this BPF call.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h |   3 +-
 net/core/filter.c        | 135 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1765cfb16c99..18b7c510c511 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -793,7 +793,8 @@ union bpf_attr {
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
-	FN(msg_cork_bytes),
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c9daf6ee555..c86f03fd9ea5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1956,6 +1956,136 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_pull_data,
+	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+{
+	unsigned int len = 0, offset = 0, copy = 0;
+	struct scatterlist *sg = msg->sg_data;
+	int first_sg, last_sg, i, shift;
+	unsigned char *p, *to, *from;
+	int bytes = end - start;
+	struct page *page;
+
+	if (unlikely(flags || end <= start))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg_start;
+	do {
+		len = sg[i].length;
+		offset += len;
+		if (start < offset + len)
+			break;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != msg->sg_end);
+
+	if (unlikely(start >= offset + len))
+		return -EINVAL;
+
+	if (!msg->sg_copy[i] && bytes <= len)
+		goto out;
+
+	first_sg = i;
+
+	/* At this point we need to linearize multiple scatterlist
+	 * elements or a single shared page. Either way we need to
+	 * copy into a linear buffer exclusively owned by BPF. Then
+	 * place the buffer in the scatterlist and fixup the original
+	 * entries by removing the entries now in the linear buffer
+	 * and shifting the remaining entries. For now we do not try
+	 * to copy partial entries to avoid complexity of running out
+	 * of sg_entry slots. The downside is reading a single byte
+	 * will copy the entire sg entry.
+	 */
+	do {
+		copy += sg[i].length;
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+		if (bytes < copy)
+			break;
+	} while (i != msg->sg_end);
+	last_sg = i;
+
+	if (unlikely(copy < end - start))
+		return -EINVAL;
+
+	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
+	if (unlikely(!page))
+		return -ENOMEM;
+	p = page_address(page);
+	offset = 0;
+
+	i = first_sg;
+	do {
+		from = sg_virt(&sg[i]);
+		len = sg[i].length;
+		to = p + offset;
+
+		memcpy(to, from, len);
+		offset += len;
+		sg[i].length = 0;
+		put_page(sg_page(&sg[i]));
+
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (i != last_sg);
+
+	sg[first_sg].length = copy;
+	sg_set_page(&sg[first_sg], page, copy, 0);
+
+	/* To repair sg ring we need to shift entries. If we only
+	 * had a single entry though we can just replace it and
+	 * be done. Otherwise walk the ring and shift the entries.
+	 */
+	shift = last_sg - first_sg - 1;
+	if (!shift)
+		goto out;
+
+	i = first_sg + 1;
+	do {
+		int move_from;
+
+		if (i + shift >= MAX_SKB_FRAGS)
+			move_from = i + shift - MAX_SKB_FRAGS;
+		else
+			move_from = i + shift;
+
+		if (move_from == msg->sg_end)
+			break;
+
+		sg[i] = sg[move_from];
+		sg[move_from].length = 0;
+		sg[move_from].page_link = 0;
+		sg[move_from].offset = 0;
+
+		i++;
+		if (i == MAX_SKB_FRAGS)
+			i = 0;
+	} while (1);
+	msg->sg_end -= shift;
+	if (msg->sg_end < 0)
+		msg->sg_end += MAX_SKB_FRAGS;
+out:
+	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data_end = msg->data + bytes;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pull_data_proto = {
+	.func		= bpf_msg_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -2897,7 +3027,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
 	    func == bpf_xdp_adjust_head ||
-	    func == bpf_xdp_adjust_meta)
+	    func == bpf_xdp_adjust_meta ||
+	    func == bpf_msg_pull_data)
 		return true;
 
 	return false;
@@ -3666,6 +3797,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
 		return &bpf_msg_apply_bytes_proto;
 	case BPF_FUNC_msg_cork_bytes:
 		return &bpf_msg_cork_bytes_proto;
+	case BPF_FUNC_msg_pull_data:
+		return &bpf_msg_pull_data_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}

From 82a8616889d506cb690cfc0afb2ccadda120461d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:31 -0700
Subject: [PATCH 1022/1678] bpf: add map tests for BPF_PROG_TYPE_SK_MSG

Add map tests to attach BPF_PROG_TYPE_SK_MSG types to a sockmap.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h                | 10 ++++
 tools/testing/selftests/bpf/Makefile          |  3 +-
 tools/testing/selftests/bpf/bpf_helpers.h     |  2 +
 .../selftests/bpf/sockmap_parse_prog.c        | 15 ++++-
 .../selftests/bpf/sockmap_tcp_msg_prog.c      | 33 +++++++++++
 .../selftests/bpf/sockmap_verdict_prog.c      |  7 +++
 tools/testing/selftests/bpf/test_maps.c       | 55 +++++++++++++++++--
 7 files changed, 118 insertions(+), 7 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1944d0aee7e8..13d9c59d79ce 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -133,6 +133,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +144,7 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -941,6 +943,14 @@ enum sk_action {
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b0d29fd5e51c..f35fb02bdf56 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -29,7 +29,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
-	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o
+	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+	sockmap_tcp_msg_prog.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index dde2c11d7771..1558fe8bcf3e 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -123,6 +123,8 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
 static int (*bpf_skb_change_head)(void *, int len, int flags) =
 	(void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+	(void *) BPF_FUNC_skb_pull_data;
 
 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */
 #if defined(__TARGET_ARCH_x86)
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
index a1dec2b6d9c5..0f92858f6226 100644
--- a/tools/testing/selftests/bpf/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -20,14 +20,25 @@ int bpf_prog1(struct __sk_buff *skb)
 	__u32 lport = skb->local_port;
 	__u32 rport = skb->remote_port;
 	__u8 *d = data;
+	__u32 len = (__u32) data_end - (__u32) data;
+	int err;
 
-	if (data + 10 > data_end)
-		return skb->len;
+	if (data + 10 > data_end) {
+		err = bpf_skb_pull_data(skb, 10);
+		if (err)
+			return SK_DROP;
+
+		data_end = (void *)(long)skb->data_end;
+		data = (void *)(long)skb->data;
+		if (data + 10 > data_end)
+			return SK_DROP;
+	}
 
 	/* This write/read is a bit pointless but tests the verifier and
 	 * strparser handler for read/write pkt data and access into sk
 	 * fields.
 	 */
+	d = data;
 	d[7] = 1;
 	return skb->len;
 }
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644
index 000000000000..12a7b5c82ed6
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+
+	char *d;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+	d = (char *)data;
+	bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
index d7bea972cb21..2ce7634a4012 100644
--- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -26,6 +26,13 @@ struct bpf_map_def SEC("maps") sock_map_tx = {
 	.max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_msg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
 struct bpf_map_def SEC("maps") sock_map_break = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(int),
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 1238733c5b33..6c253343a6f9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -464,15 +464,17 @@ static void test_devmap(int task, void *data)
 #include <linux/err.h>
 #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
 #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
 static void test_sockmap(int tasks, void *data)
 {
-	int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
-	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
+	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
 	int ports[] = {50200, 50201, 50202, 50204};
 	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
 	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
-	int parse_prog, verdict_prog;
+	int parse_prog, verdict_prog, msg_prog;
 	struct sockaddr_in addr;
+	int one = 1, s, sc, rc;
 	struct bpf_object *obj;
 	struct timeval to;
 	__u32 key, value;
@@ -584,6 +586,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid msg verdict prog attach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
 	if (!err) {
 		printf("Failed unknown prog attach\n");
@@ -602,6 +610,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+	if (err) {
+		printf("Failed empty msg verdict prog detach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
 	if (!err) {
 		printf("Detach invalid prog successful\n");
@@ -616,6 +630,13 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+			    BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+	if (err) {
+		printf("Failed to load SK_SKB msg prog\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
 			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
 	if (err) {
@@ -631,7 +652,7 @@ static void test_sockmap(int tasks, void *data)
 
 	map_fd_rx = bpf_map__fd(bpf_map_rx);
 	if (map_fd_rx < 0) {
-		printf("Failed to get map fd\n");
+		printf("Failed to get map rx fd\n");
 		goto out_sockmap;
 	}
 
@@ -647,6 +668,18 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+	if (IS_ERR(bpf_map_msg)) {
+		printf("Failed to load map msg from msg_verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_msg = bpf_map__fd(bpf_map_msg);
+	if (map_fd_msg < 0) {
+		printf("Failed to get map msg fd\n");
+		goto out_sockmap;
+	}
+
 	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
 	if (IS_ERR(bpf_map_break)) {
 		printf("Failed to load map tx from verdict prog\n");
@@ -680,6 +713,12 @@ static void test_sockmap(int tasks, void *data)
 		goto out_sockmap;
 	}
 
+	err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+	if (err) {
+		printf("Failed msg verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
 	err = bpf_prog_attach(verdict_prog, map_fd_rx,
 			      __MAX_BPF_ATTACH_TYPE, 0);
 	if (!err) {
@@ -719,6 +758,14 @@ static void test_sockmap(int tasks, void *data)
 		}
 	}
 
+	/* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+	i = 0;
+	err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+	if (err) {
+		printf("Failed map_fd_msg update sockmap %i\n", err);
+		goto out_sockmap;
+	}
+
 	/* Test map send/recv */
 	for (i = 0; i < 2; i++) {
 		buf[0] = i;

From 1acc60b6a41bd4e43c3cb10e5395a5c812e158f5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:36 -0700
Subject: [PATCH 1023/1678] bpf: add verifier tests for BPF_PROG_TYPE_SK_MSG

Test read and writes for BPF_PROG_TYPE_SK_MSG.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 54 +++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 86d7ff491b6f..3e7718b1a9ae 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1596,6 +1596,60 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SK_SKB,
 	},
+	{
+		"direct packet read for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"direct packet write for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"overlapping checks for direct packet access SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
 	{
 		"check skb->mark is not writeable by sockets",
 		.insns = {

From 4c4c3c276c099f265c8b11e0132ce826ee718e2c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:41 -0700
Subject: [PATCH 1024/1678] bpf: sockmap sample, add option to attach SK_MSG
 program

Add sockmap option to use SK_MSG program types.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_load.c                    |  8 ++-
 samples/sockmap/sockmap_kern.c            | 52 ++++++++++++++++++
 samples/sockmap/sockmap_user.c            | 67 +++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h            | 13 ++++-
 tools/lib/bpf/libbpf.c                    |  1 +
 tools/testing/selftests/bpf/bpf_helpers.h |  3 +
 6 files changed, 135 insertions(+), 9 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 69806d74fa53..b1a310c3ae89 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -67,6 +67,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
 	bool is_sockops = strncmp(event, "sockops", 7) == 0;
 	bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
+	bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -96,6 +97,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_SOCK_OPS;
 	} else if (is_sk_skb) {
 		prog_type = BPF_PROG_TYPE_SK_SKB;
+	} else if (is_sk_msg) {
+		prog_type = BPF_PROG_TYPE_SK_MSG;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -113,7 +116,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket || is_sockops || is_sk_skb) {
+	if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
 		if (is_socket)
 			event += 6;
 		else
@@ -589,7 +592,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "socket", 6) == 0 ||
 		    memcmp(shname, "cgroup/", 7) == 0 ||
 		    memcmp(shname, "sockops", 7) == 0 ||
-		    memcmp(shname, "sk_skb", 6) == 0) {
+		    memcmp(shname, "sk_skb", 6) == 0 ||
+		    memcmp(shname, "sk_msg", 6) == 0) {
 			ret = load_and_attach(shname, data->d_buf,
 					      data->d_size);
 			if (ret != 0)
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 52b0053274f4..75edb2f151d4 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -43,6 +43,20 @@ struct bpf_map_def SEC("maps") sock_map = {
 	.max_entries = 20,
 };
 
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -105,4 +119,42 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 
 	return 0;
 }
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+
+	bpf_printk("sk_msg2: data length %i\n", (__u32)data_end - (__u32)data);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0;
+
+	return bpf_msg_redirect_map(msg, &sock_map_redir, ret, 0);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0;
+
+	bpf_printk("sk_msg3: redirect(%iB)\n", (__u32)data_end - (__u32)data);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, ret, 0);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 95a54a89a532..bbfe3a2b9885 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -54,6 +54,11 @@ void running_handler(int a);
 /* global sockets */
 int s1, s2, c1, c2, p1, p2;
 
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
 	{"cgroup",	required_argument,	NULL, 'c' },
@@ -62,6 +67,10 @@ static const struct option long_options[] = {
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
+	{"txmsg",		no_argument,		&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,		&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,		&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,		&txmsg_redir_noisy, 1},
 	{0, 0, NULL, 0 }
 };
 
@@ -447,13 +456,13 @@ enum {
 
 int main(int argc, char **argv)
 {
-	int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+	int iov_count = 1, length = 1024, rate = 1, verbose = 0, tx_prog_fd;
 	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 	int opt, longindex, err, cg_fd = 0;
 	int test = PING_PONG;
 	char filename[256];
 
-	while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+	while ((opt = getopt_long(argc, argv, ":hvc:r:i:l:t:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		/* Cgroup configuration */
@@ -490,6 +499,8 @@ int main(int argc, char **argv)
 				return -1;
 			}
 			break;
+		case 0:
+			break;
 		case 'h':
 		default:
 			usage(argv);
@@ -515,16 +526,16 @@ int main(int argc, char **argv)
 	/* catch SIGINT */
 	signal(SIGINT, running_handler);
 
-	/* If base test skip BPF setup */
-	if (test == BASE)
-		goto run;
-
 	if (load_bpf_file(filename)) {
 		fprintf(stderr, "load_bpf_file: (%s) %s\n",
 			filename, strerror(errno));
 		return 1;
 	}
 
+	/* If base test skip BPF setup */
+	if (test == BASE)
+		goto run;
+
 	/* Attach programs to sockmap */
 	err = bpf_prog_attach(prog_fd[0], map_fd[0],
 				BPF_SK_SKB_STREAM_PARSER, 0);
@@ -557,6 +568,50 @@ run:
 		goto out;
 	}
 
+	/* Attach txmsg program to sockmap */
+	if (txmsg_pass)
+		tx_prog_fd = prog_fd[3];
+	else if (txmsg_noisy)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[5];
+	else if (txmsg_redir_noisy)
+		tx_prog_fd = prog_fd[6];
+	else
+		tx_prog_fd = 0;
+
+	if (tx_prog_fd) {
+		int redir_fd, i = 0;
+
+		err = bpf_prog_attach(tx_prog_fd,
+				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+				err, strerror(errno));
+			return err;
+		}
+
+		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			return err;
+		}
+		if (test == SENDMSG)
+			redir_fd = c2;
+		else
+			redir_fd = c1;
+
+		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			return err;
+		}
+	}
 	if (test == PING_PONG)
 		err = forever_ping_pong(rate, verbose);
 	else if (test == SENDMSG)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 13d9c59d79ce..01b9c97b172e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -720,6 +720,15 @@ union bpf_attr {
  * int bpf_override_return(pt_regs, rc)
  *	@pt_regs: pointer to struct pt_regs
  *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -781,7 +790,9 @@ union bpf_attr {
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5bbbf285af74..64a8fc384186 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1857,6 +1857,7 @@ static const struct {
 	BPF_PROG_SEC("lwt_xmit",	BPF_PROG_TYPE_LWT_XMIT),
 	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
+	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
 };
 #undef BPF_PROG_SEC
 
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 1558fe8bcf3e..bba7ee6d93ab 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -86,6 +86,9 @@ static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
 	(void *) BPF_FUNC_perf_prog_read_value;
 static int (*bpf_override_return)(void *ctx, unsigned long rc) =
 	(void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_map;
+
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions

From e67463cb5d9345d762aa7325e58aaeff5bc68ee1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:46 -0700
Subject: [PATCH 1025/1678] bpf: sockmap sample, add sendfile test

To exercise TX ULP sendpage implementation we need a test that does
a sendfile. Add sendfile test option here.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_user.c | 70 +++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index bbfe3a2b9885..ec624a801306 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
+#include <sys/sendfile.h>
 
 #include <linux/netlink.h>
 #include <linux/socket.h>
@@ -67,10 +68,10 @@ static const struct option long_options[] = {
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
-	{"txmsg",		no_argument,		&txmsg_pass,  1  },
-	{"txmsg_noisy",		no_argument,		&txmsg_noisy, 1  },
-	{"txmsg_redir",		no_argument,		&txmsg_redir, 1  },
-	{"txmsg_redir_noisy",	no_argument,		&txmsg_redir_noisy, 1},
+	{"txmsg",		no_argument,	&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
 	{0, 0, NULL, 0 }
 };
 
@@ -204,6 +205,35 @@ struct msg_stats {
 	struct timespec end;
 };
 
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+			     struct msg_stats *s)
+{
+	off_t offset = 0;
+	FILE *file;
+	int i, fp;
+
+	file = fopen(".sendpage_tst.tmp", "w+");
+	fseek(file, iov_length * cnt, SEEK_CUR);
+	fprintf(file, "A");
+	fseek(file, 0, SEEK_SET);
+
+	fp = fileno(file);
+	clock_gettime(CLOCK_MONOTONIC, &s->start);
+	for (i = 0; i < cnt; i++) {
+		int sent = sendfile(fd, fp, &offset, iov_length);
+
+		if (sent < 0) {
+			perror("send loop error:");
+			fclose(file);
+			return sent;
+		}
+		s->bytes_sent += sent;
+	}
+	clock_gettime(CLOCK_MONOTONIC, &s->end);
+	fclose(file);
+	return 0;
+}
+
 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		    struct msg_stats *s, bool tx)
 {
@@ -309,7 +339,7 @@ static inline float recvdBps(struct msg_stats s)
 }
 
 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-			int verbose, bool base)
+			int verbose, bool base, bool sendpage)
 {
 	float sent_Bps = 0, recvd_Bps = 0;
 	int rx_fd, txpid, rxpid, err = 0;
@@ -325,6 +355,8 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	rxpid = fork();
 	if (rxpid == 0) {
+		if (sendpage)
+			iov_count = 1;
 		err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
 		if (err)
 			fprintf(stderr,
@@ -348,7 +380,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	txpid = fork();
 	if (txpid == 0) {
-		err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+		if (sendpage)
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s);
+		else
+			err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+
 		if (err)
 			fprintf(stderr,
 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -452,6 +488,8 @@ enum {
 	PING_PONG,
 	SENDMSG,
 	BASE,
+	BASE_SENDPAGE,
+	SENDPAGE,
 };
 
 int main(int argc, char **argv)
@@ -494,6 +532,10 @@ int main(int argc, char **argv)
 				test = SENDMSG;
 			} else if (strcmp(optarg, "base") == 0) {
 				test = BASE;
+			} else if (strcmp(optarg, "base_sendpage") == 0) {
+				test = BASE_SENDPAGE;
+			} else if (strcmp(optarg, "sendpage") == 0) {
+				test = SENDPAGE;
 			} else {
 				usage(argv);
 				return -1;
@@ -533,7 +575,7 @@ int main(int argc, char **argv)
 	}
 
 	/* If base test skip BPF setup */
-	if (test == BASE)
+	if (test == BASE || test == BASE_SENDPAGE)
 		goto run;
 
 	/* Attach programs to sockmap */
@@ -599,7 +641,7 @@ run:
 				err, strerror(errno));
 			return err;
 		}
-		if (test == SENDMSG)
+		if (txmsg_redir || txmsg_redir_noisy)
 			redir_fd = c2;
 		else
 			redir_fd = c1;
@@ -615,9 +657,17 @@ run:
 	if (test == PING_PONG)
 		err = forever_ping_pong(rate, verbose);
 	else if (test == SENDMSG)
-		err = sendmsg_test(iov_count, length, rate, verbose, false);
+		err = sendmsg_test(iov_count, length, rate,
+				   verbose, false, false);
+	else if (test == SENDPAGE)
+		err = sendmsg_test(iov_count, length, rate,
+				   verbose, false, true);
 	else if (test == BASE)
-		err = sendmsg_test(iov_count, length, rate, verbose, true);
+		err = sendmsg_test(iov_count, length, rate,
+				   verbose, true, false);
+	else if (test == BASE_SENDPAGE)
+		err = sendmsg_test(iov_count, length, rate,
+				   verbose, true, true);
 	else
 		fprintf(stderr, "unknown test\n");
 out:

From 6bce9d2ca65238d37890186bf41acd7f7d9a9820 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:51 -0700
Subject: [PATCH 1026/1678] bpf: sockmap sample, add data verification option

To verify data is not being dropped or corrupted this adds an option
to verify test-patterns on recv.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_user.c | 118 +++++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 34 deletions(-)

diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index ec624a801306..8017ad7afea9 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -68,6 +68,7 @@ static const struct option long_options[] = {
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
+	{"data_test",   no_argument,		NULL, 'd' },
 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
 	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
@@ -208,45 +209,49 @@ struct msg_stats {
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 			     struct msg_stats *s)
 {
-	off_t offset = 0;
+	unsigned char k = 0;
 	FILE *file;
 	int i, fp;
 
 	file = fopen(".sendpage_tst.tmp", "w+");
-	fseek(file, iov_length * cnt, SEEK_CUR);
-	fprintf(file, "A");
+	for (i = 0; i < iov_length * cnt; i++, k++)
+		fwrite(&k, sizeof(char), 1, file);
+	fflush(file);
 	fseek(file, 0, SEEK_SET);
+	fclose(file);
 
-	fp = fileno(file);
+	fp = open(".sendpage_tst.tmp", O_RDONLY);
 	clock_gettime(CLOCK_MONOTONIC, &s->start);
 	for (i = 0; i < cnt; i++) {
-		int sent = sendfile(fd, fp, &offset, iov_length);
+		int sent = sendfile(fd, fp, NULL, iov_length);
 
 		if (sent < 0) {
 			perror("send loop error:");
-			fclose(file);
+			close(fp);
 			return sent;
 		}
 		s->bytes_sent += sent;
 	}
 	clock_gettime(CLOCK_MONOTONIC, &s->end);
-	fclose(file);
+	close(fp);
 	return 0;
 }
 
 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-		    struct msg_stats *s, bool tx)
+		    struct msg_stats *s, bool tx, bool data_test)
 {
 	struct msghdr msg = {0};
 	int err, i, flags = MSG_NOSIGNAL;
 	struct iovec *iov;
+	unsigned char k;
 
 	iov = calloc(iov_count, sizeof(struct iovec));
 	if (!iov)
 		return errno;
 
+	k = 0;
 	for (i = 0; i < iov_count; i++) {
-		char *d = calloc(iov_length, sizeof(char));
+		unsigned char *d = calloc(iov_length, sizeof(char));
 
 		if (!d) {
 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
@@ -254,10 +259,18 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		}
 		iov[i].iov_base = d;
 		iov[i].iov_len = iov_length;
+
+		if (data_test && tx) {
+			int j;
+
+			for (j = 0; j < iov_length; j++)
+				d[j] = k++;
+		}
 	}
 
 	msg.msg_iov = iov;
 	msg.msg_iovlen = iov_count;
+	k = 0;
 
 	if (tx) {
 		clock_gettime(CLOCK_MONOTONIC, &s->start);
@@ -311,6 +324,26 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 			}
 
 			s->bytes_recvd += recv;
+
+			if (data_test) {
+				int j;
+
+				for (i = 0; i < msg.msg_iovlen; i++) {
+					unsigned char *d = iov[i].iov_base;
+
+					for (j = 0;
+					     j < iov[i].iov_len && recv; j++) {
+						if (d[j] != k++) {
+							errno = -EIO;
+							fprintf(stderr,
+								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+								i, j, d[j], k - 1, d[j+1], k + 1);
+							goto out_errno;
+						}
+						recv--;
+					}
+				}
+			}
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	}
@@ -338,8 +371,15 @@ static inline float recvdBps(struct msg_stats s)
 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
 }
 
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+};
+
 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-			int verbose, bool base, bool sendpage)
+			struct sockmap_options *opt)
 {
 	float sent_Bps = 0, recvd_Bps = 0;
 	int rx_fd, txpid, rxpid, err = 0;
@@ -348,16 +388,17 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	errno = 0;
 
-	if (base)
+	if (opt->base)
 		rx_fd = p1;
 	else
 		rx_fd = p2;
 
 	rxpid = fork();
 	if (rxpid == 0) {
-		if (sendpage)
+		if (opt->sendpage)
 			iov_count = 1;
-		err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+		err = msg_loop(rx_fd, iov_count, iov_buf,
+			       cnt, &s, false, opt->data_test);
 		if (err)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -380,10 +421,11 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	txpid = fork();
 	if (txpid == 0) {
-		if (sendpage)
+		if (opt->sendpage)
 			err = msg_loop_sendpage(c1, iov_buf, cnt, &s);
 		else
-			err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+			err = msg_loop(c1, iov_count, iov_buf,
+				       cnt, &s, true, opt->data_test);
 
 		if (err)
 			fprintf(stderr,
@@ -409,7 +451,7 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 	return err;
 }
 
-static int forever_ping_pong(int rate, int verbose)
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
 {
 	struct timeval timeout;
 	char buf[1024] = {0};
@@ -474,7 +516,7 @@ static int forever_ping_pong(int rate, int verbose)
 		if (rate)
 			sleep(rate);
 
-		if (verbose) {
+		if (opt->verbose) {
 			printf(".");
 			fflush(stdout);
 
@@ -494,13 +536,14 @@ enum {
 
 int main(int argc, char **argv)
 {
-	int iov_count = 1, length = 1024, rate = 1, verbose = 0, tx_prog_fd;
+	int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
 	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 	int opt, longindex, err, cg_fd = 0;
+	struct sockmap_options options = {0};
 	int test = PING_PONG;
 	char filename[256];
 
-	while ((opt = getopt_long(argc, argv, ":hvc:r:i:l:t:",
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		/* Cgroup configuration */
@@ -517,7 +560,7 @@ int main(int argc, char **argv)
 			rate = atoi(optarg);
 			break;
 		case 'v':
-			verbose = 1;
+			options.verbose = 1;
 			break;
 		case 'i':
 			iov_count = atoi(optarg);
@@ -525,6 +568,9 @@ int main(int argc, char **argv)
 		case 'l':
 			length = atoi(optarg);
 			break;
+		case 'd':
+			options.data_test = true;
+			break;
 		case 't':
 			if (strcmp(optarg, "ping") == 0) {
 				test = PING_PONG;
@@ -655,20 +701,24 @@ run:
 		}
 	}
 	if (test == PING_PONG)
-		err = forever_ping_pong(rate, verbose);
-	else if (test == SENDMSG)
-		err = sendmsg_test(iov_count, length, rate,
-				   verbose, false, false);
-	else if (test == SENDPAGE)
-		err = sendmsg_test(iov_count, length, rate,
-				   verbose, false, true);
-	else if (test == BASE)
-		err = sendmsg_test(iov_count, length, rate,
-				   verbose, true, false);
-	else if (test == BASE_SENDPAGE)
-		err = sendmsg_test(iov_count, length, rate,
-				   verbose, true, true);
-	else
+		err = forever_ping_pong(rate, &options);
+	else if (test == SENDMSG) {
+		options.base = false;
+		options.sendpage = false;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == SENDPAGE) {
+		options.base = false;
+		options.sendpage = true;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == BASE) {
+		options.base = true;
+		options.sendpage = false;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else if (test == BASE_SENDPAGE) {
+		options.base = true;
+		options.sendpage = true;
+		err = sendmsg_test(iov_count, length, rate, &options);
+	} else
 		fprintf(stderr, "unknown test\n");
 out:
 	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);

From 1c16c3126ac938562a68ab7041fe74ce0de53166 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:57:56 -0700
Subject: [PATCH 1027/1678] bpf: sockmap, add sample option to test apply_bytes
 helper

This adds an option to test the apply_bytes helper. This option lets
the user specify an int on the command line specifying how much data
each verdict should apply to.

When this is set a map entry is set with the bytes input by the user
and then the specified program --txmsg or --txmsg_redir will use the
value and set the applied data. If no other option is set then a
default --txmsg_apply program is run. This program will drop pkts
if an error is detected on the bytes map lookup. Useful to verify
the map lookup and apply helper are working and causing a hard
error if it is not.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_kern.c            | 54 ++++++++++++++++++++---
 samples/sockmap/sockmap_user.c            | 19 +++++++-
 tools/testing/selftests/bpf/bpf_helpers.h |  3 +-
 3 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 75edb2f151d4..205ec36449d1 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -57,6 +57,13 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
 	.max_entries = 1,
 };
 
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -123,6 +130,11 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 SEC("sk_msg1")
 int bpf_prog4(struct sk_msg_md *msg)
 {
+	int *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
 	return SK_PASS;
 }
 
@@ -131,8 +143,13 @@ int bpf_prog5(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
 	void *data = (void *)(long) msg->data;
+	int *bytes, err = 0, zero = 0;
 
-	bpf_printk("sk_msg2: data length %i\n", (__u32)data_end - (__u32)data);
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err = bpf_msg_apply_bytes(msg, *bytes);
+	bpf_printk("sk_msg2: data length %i err %i\n",
+		   (__u64)data_end - (__u64)data, err);
 	return SK_PASS;
 }
 
@@ -141,9 +158,12 @@ int bpf_prog6(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
 	void *data = (void *)(long) msg->data;
-	int ret = 0;
+	int *bytes, zero = 0;
 
-	return bpf_msg_redirect_map(msg, &sock_map_redir, ret, 0);
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
 }
 
 SEC("sk_msg4")
@@ -151,10 +171,32 @@ int bpf_prog7(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
 	void *data = (void *)(long) msg->data;
-	int ret = 0;
+	int *bytes, err = 0, zero = 0;
 
-	bpf_printk("sk_msg3: redirect(%iB)\n", (__u32)data_end - (__u32)data);
-	return bpf_msg_redirect_map(msg, &sock_map_redir, ret, 0);
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err = bpf_msg_apply_bytes(msg, *bytes);
+	bpf_printk("sk_msg3: redirect(%iB) err=%i\n",
+		   (__u64)data_end - (__u64)data, err);
+	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
 }
 
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 8017ad7afea9..41774ec0f0b3 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -59,6 +59,7 @@ int txmsg_pass;
 int txmsg_noisy;
 int txmsg_redir;
 int txmsg_redir_noisy;
+int txmsg_apply;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -73,6 +74,7 @@ static const struct option long_options[] = {
 	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
 	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_apply",	required_argument,	NULL, 'a'},
 	{0, 0, NULL, 0 }
 };
 
@@ -546,7 +548,9 @@ int main(int argc, char **argv)
 	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
-		/* Cgroup configuration */
+		case 'a':
+			txmsg_apply = atoi(optarg);
+			break;
 		case 'c':
 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
 			if (cg_fd < 0) {
@@ -665,6 +669,8 @@ run:
 		tx_prog_fd = prog_fd[5];
 	else if (txmsg_redir_noisy)
 		tx_prog_fd = prog_fd[6];
+	else if (txmsg_apply)
+		tx_prog_fd = prog_fd[7];
 	else
 		tx_prog_fd = 0;
 
@@ -699,6 +705,17 @@ run:
 				err, strerror(errno));
 			return err;
 		}
+
+		if (txmsg_apply) {
+			err = bpf_map_update_elem(map_fd[3],
+						  &i, &txmsg_apply, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
 	}
 	if (test == PING_PONG)
 		err = forever_ping_pong(rate, &options);
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index bba7ee6d93ab..4713de48cf88 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -88,7 +88,8 @@ static int (*bpf_override_return)(void *ctx, unsigned long rc) =
 	(void *) BPF_FUNC_override_return;
 static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
 	(void *) BPF_FUNC_msg_redirect_map;
-
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_apply_bytes;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions

From 468b3fdea8826b232a570d95ba45272eb38919cb Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:58:02 -0700
Subject: [PATCH 1028/1678] bpf: sockmap sample support for
 bpf_msg_cork_bytes()

Add sample application support for the bpf_msg_cork_bytes helper. This
lets the user specify how many bytes each verdict should apply to.

Similar to apply_bytes() tests these can be run as a stand-alone test
when used without other options or inline with other tests by using
the txmsg_cork option along with any of the basic tests txmsg,
txmsg_redir, txmsg_drop.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_kern.c            | 53 +++++++++++++++++++----
 samples/sockmap/sockmap_user.c            | 19 ++++++++
 tools/include/uapi/linux/bpf.h            |  3 +-
 tools/testing/selftests/bpf/bpf_helpers.h |  2 +
 4 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 205ec36449d1..735226794ce1 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -64,6 +64,13 @@ struct bpf_map_def SEC("maps") sock_apply_bytes = {
 	.max_entries = 1
 };
 
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -135,6 +142,9 @@ int bpf_prog4(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
 		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
 	return SK_PASS;
 }
 
@@ -143,13 +153,16 @@ int bpf_prog5(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
 	void *data = (void *)(long) msg->data;
-	int *bytes, err = 0, zero = 0;
+	int *bytes, err1 = -1, err2 = -1, zero = 0;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
-		err = bpf_msg_apply_bytes(msg, *bytes);
-	bpf_printk("sk_msg2: data length %i err %i\n",
-		   (__u64)data_end - (__u64)data, err);
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   (__u64)data_end - (__u64)data, err1, err2);
 	return SK_PASS;
 }
 
@@ -163,6 +176,9 @@ int bpf_prog6(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
 		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
 	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
 }
 
@@ -171,13 +187,17 @@ int bpf_prog7(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
 	void *data = (void *)(long) msg->data;
-	int *bytes, err = 0, zero = 0;
+	int *bytes, err1 = 0, err2 = 0, zero = 0;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
-		err = bpf_msg_apply_bytes(msg, *bytes);
-	bpf_printk("sk_msg3: redirect(%iB) err=%i\n",
-		   (__u64)data_end - (__u64)data, err);
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+
+	bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
+		   (__u64)data_end - (__u64)data, err1, err2);
 	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
 }
 
@@ -198,5 +218,22 @@ int bpf_prog8(struct sk_msg_md *msg)
 	}
 	return SK_PASS;
 }
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
 
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 41774ec0f0b3..4e0a3d87881f 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -60,6 +60,7 @@ int txmsg_noisy;
 int txmsg_redir;
 int txmsg_redir_noisy;
 int txmsg_apply;
+int txmsg_cork;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -75,6 +76,7 @@ static const struct option long_options[] = {
 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
 	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
 	{"txmsg_apply",	required_argument,	NULL, 'a'},
+	{"txmsg_cork",	required_argument,	NULL, 'k'},
 	{0, 0, NULL, 0 }
 };
 
@@ -551,6 +553,9 @@ int main(int argc, char **argv)
 		case 'a':
 			txmsg_apply = atoi(optarg);
 			break;
+		case 'k':
+			txmsg_cork = atoi(optarg);
+			break;
 		case 'c':
 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
 			if (cg_fd < 0) {
@@ -671,6 +676,8 @@ run:
 		tx_prog_fd = prog_fd[6];
 	else if (txmsg_apply)
 		tx_prog_fd = prog_fd[7];
+	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[8];
 	else
 		tx_prog_fd = 0;
 
@@ -716,6 +723,18 @@ run:
 				return err;
 			}
 		}
+
+		if (txmsg_cork) {
+			err = bpf_map_update_elem(map_fd[4],
+						  &i, &txmsg_cork, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
 	}
 	if (test == PING_PONG)
 		err = forever_ping_pong(rate, &options);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 01b9c97b172e..e6924ab20fc3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -792,7 +792,8 @@ union bpf_attr {
 	FN(override_return),		\
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
-	FN(msg_apply_bytes),
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 4713de48cf88..b5b45ff705ff 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -90,6 +90,8 @@ static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
 	(void *) BPF_FUNC_msg_redirect_map;
 static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_cork_bytes;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions

From e6373ce70a01292424a118d9457d927349dad51d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:58:07 -0700
Subject: [PATCH 1029/1678] bpf: sockmap add SK_DROP tests

Add tests for SK_DROP.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_kern.c | 15 ++++++++
 samples/sockmap/sockmap_user.c | 62 +++++++++++++++++++++++++---------
 2 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 735226794ce1..8b6c34cd5f46 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -236,4 +236,19 @@ int bpf_prog9(struct sk_msg_md *msg)
 	return SK_PASS;
 }
 
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	return SK_DROP;
+}
+
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 4e0a3d87881f..52c4ed7774d4 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -59,6 +59,7 @@ int txmsg_pass;
 int txmsg_noisy;
 int txmsg_redir;
 int txmsg_redir_noisy;
+int txmsg_drop;
 int txmsg_apply;
 int txmsg_cork;
 
@@ -75,6 +76,7 @@ static const struct option long_options[] = {
 	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
 	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
 	{"txmsg_apply",	required_argument,	NULL, 'a'},
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
 	{0, 0, NULL, 0 }
@@ -210,9 +212,19 @@ struct msg_stats {
 	struct timespec end;
 };
 
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+};
+
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
-			     struct msg_stats *s)
+			     struct msg_stats *s,
+			     struct sockmap_options *opt)
 {
+	bool drop = opt->drop_expected;
 	unsigned char k = 0;
 	FILE *file;
 	int i, fp;
@@ -229,12 +241,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 	for (i = 0; i < cnt; i++) {
 		int sent = sendfile(fd, fp, NULL, iov_length);
 
-		if (sent < 0) {
+		if (!drop && sent < 0) {
 			perror("send loop error:");
 			close(fp);
 			return sent;
+		} else if (drop && sent >= 0) {
+			printf("sendpage loop error expected: %i\n", sent);
+			close(fp);
+			return -EIO;
 		}
-		s->bytes_sent += sent;
+
+		if (sent > 0)
+			s->bytes_sent += sent;
 	}
 	clock_gettime(CLOCK_MONOTONIC, &s->end);
 	close(fp);
@@ -242,12 +260,15 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 }
 
 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-		    struct msg_stats *s, bool tx, bool data_test)
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
 {
 	struct msghdr msg = {0};
 	int err, i, flags = MSG_NOSIGNAL;
 	struct iovec *iov;
 	unsigned char k;
+	bool data_test = opt->data_test;
+	bool drop = opt->drop_expected;
 
 	iov = calloc(iov_count, sizeof(struct iovec));
 	if (!iov)
@@ -281,11 +302,16 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		for (i = 0; i < cnt; i++) {
 			int sent = sendmsg(fd, &msg, flags);
 
-			if (sent < 0) {
+			if (!drop && sent < 0) {
 				perror("send loop error:");
 				goto out_errno;
+			} else if (drop && sent >= 0) {
+				printf("send loop error expected: %i\n", sent);
+				errno = -EIO;
+				goto out_errno;
 			}
-			s->bytes_sent += sent;
+			if (sent > 0)
+				s->bytes_sent += sent;
 		}
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	} else {
@@ -375,13 +401,6 @@ static inline float recvdBps(struct msg_stats s)
 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
 }
 
-struct sockmap_options {
-	int verbose;
-	bool base;
-	bool sendpage;
-	bool data_test;
-};
-
 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 			struct sockmap_options *opt)
 {
@@ -399,10 +418,13 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 
 	rxpid = fork();
 	if (rxpid == 0) {
+		if (opt->drop_expected)
+			exit(1);
+
 		if (opt->sendpage)
 			iov_count = 1;
 		err = msg_loop(rx_fd, iov_count, iov_buf,
-			       cnt, &s, false, opt->data_test);
+			       cnt, &s, false, opt);
 		if (err)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
@@ -426,10 +448,10 @@ static int sendmsg_test(int iov_count, int iov_buf, int cnt,
 	txpid = fork();
 	if (txpid == 0) {
 		if (opt->sendpage)
-			err = msg_loop_sendpage(c1, iov_buf, cnt, &s);
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
 		else
 			err = msg_loop(c1, iov_count, iov_buf,
-				       cnt, &s, true, opt->data_test);
+				       cnt, &s, true, opt);
 
 		if (err)
 			fprintf(stderr,
@@ -674,6 +696,9 @@ run:
 		tx_prog_fd = prog_fd[5];
 	else if (txmsg_redir_noisy)
 		tx_prog_fd = prog_fd[6];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[9];
+	/* apply and cork must be last */
 	else if (txmsg_apply)
 		tx_prog_fd = prog_fd[7];
 	else if (txmsg_cork)
@@ -700,6 +725,7 @@ run:
 				err, strerror(errno));
 			return err;
 		}
+
 		if (txmsg_redir || txmsg_redir_noisy)
 			redir_fd = c2;
 		else
@@ -736,6 +762,10 @@ run:
 		}
 
 	}
+
+	if (txmsg_drop)
+		options.drop_expected = true;
+
 	if (test == PING_PONG)
 		err = forever_ping_pong(rate, &options);
 	else if (test == SENDMSG) {

From 0dcbbf6785799ba05b44df2ba6bcc1195f4f945c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:58:12 -0700
Subject: [PATCH 1030/1678] bpf: sockmap sample test for bpf_msg_pull_data

This adds an option to test the msg_pull_data helper. This
uses two options txmsg_start and txmsg_end to let the user
specify start and end bytes to pull.

The options can be used with txmsg_apply, txmsg_cork options
as well as with any of the basic tests, txmsg, txmsg_redir and
txmsg_drop (plus noisy variants) to run pull_data inline with
those tests. By giving user direct control over the variables
we can easily do negative testing as well as positive tests.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_kern.c            | 77 +++++++++++++++++++----
 samples/sockmap/sockmap_user.c            | 32 ++++++++++
 tools/include/uapi/linux/bpf.h            |  3 +-
 tools/testing/selftests/bpf/bpf_helpers.h |  2 +
 4 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 8b6c34cd5f46..9ad5ba79c85a 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -71,6 +71,14 @@ struct bpf_map_def SEC("maps") sock_cork_bytes = {
 	.max_entries = 1
 };
 
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2
+};
+
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -137,7 +145,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 SEC("sk_msg1")
 int bpf_prog4(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0;
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -145,15 +154,18 @@ int bpf_prog4(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
 	return SK_PASS;
 }
 
 SEC("sk_msg2")
 int bpf_prog5(struct sk_msg_md *msg)
 {
-	void *data_end = (void *)(long) msg->data_end;
-	void *data = (void *)(long) msg->data;
-	int *bytes, err1 = -1, err2 = -1, zero = 0;
+	int err1 = -1, err2 = -1, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -161,17 +173,32 @@ int bpf_prog5(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
 	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
-		   (__u64)data_end - (__u64)data, err1, err2);
+		   len1, err1, err2);
 	return SK_PASS;
 }
 
 SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
-	void *data_end = (void *)(long) msg->data_end;
-	void *data = (void *)(long) msg->data;
-	int *bytes, zero = 0;
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -179,15 +206,18 @@ int bpf_prog6(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
 	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
 }
 
 SEC("sk_msg4")
 int bpf_prog7(struct sk_msg_md *msg)
 {
-	void *data_end = (void *)(long) msg->data_end;
-	void *data = (void *)(long) msg->data;
-	int *bytes, err1 = 0, err2 = 0, zero = 0;
+	int err1 = 0, err2 = 0, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -195,9 +225,24 @@ int bpf_prog7(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
 
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
 	bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
-		   (__u64)data_end - (__u64)data, err1, err2);
+		   len1, err1, err2);
 	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
 }
 
@@ -239,7 +284,8 @@ int bpf_prog9(struct sk_msg_md *msg)
 SEC("sk_msg7")
 int bpf_prog10(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0;
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -247,6 +293,11 @@ int bpf_prog10(struct sk_msg_md *msg)
 	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
 	if (bytes)
 		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
 	return SK_DROP;
 }
 
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 52c4ed7774d4..07aa237221d1 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -62,6 +62,8 @@ int txmsg_redir_noisy;
 int txmsg_drop;
 int txmsg_apply;
 int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -79,6 +81,8 @@ static const struct option long_options[] = {
 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
 	{"txmsg_apply",	required_argument,	NULL, 'a'},
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
+	{"txmsg_start", required_argument,	NULL, 's'},
+	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{0, 0, NULL, 0 }
 };
 
@@ -572,6 +576,12 @@ int main(int argc, char **argv)
 	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
+		case 's':
+			txmsg_start = atoi(optarg);
+			break;
+		case 'e':
+			txmsg_end = atoi(optarg);
+			break;
 		case 'a':
 			txmsg_apply = atoi(optarg);
 			break;
@@ -761,6 +771,28 @@ run:
 			}
 		}
 
+		if (txmsg_start) {
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
+
+		if (txmsg_end) {
+			i = 1;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+					err, strerror(errno));
+				return err;
+			}
+		}
 	}
 
 	if (txmsg_drop)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6924ab20fc3..d245c41213ac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -793,7 +793,8 @@ union bpf_attr {
 	FN(sock_ops_cb_flags_set),	\
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
-	FN(msg_cork_bytes),
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index b5b45ff705ff..7cae376d8d0c 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -92,6 +92,8 @@ static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_apply_bytes;
 static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_pull_data;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions

From ae30727fa4beabd8403b016896eb1f714e6c0fd4 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 18 Mar 2018 12:58:17 -0700
Subject: [PATCH 1031/1678] bpf: sockmap test script

This adds the test script I am currently using to validate
the latest sockmap changes. Shortly sockmap will be ported
to selftests and these will be run from the infrastructure
there. Until then add the script here so we have a coverage
checklist when porting into selftests.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_test.sh | 450 ++++++++++++++++++++++++++++++++
 1 file changed, 450 insertions(+)
 create mode 100755 samples/sockmap/sockmap_test.sh

diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
new file mode 100755
index 000000000000..6d8cc40cca22
--- /dev/null
+++ b/samples/sockmap/sockmap_test.sh
@@ -0,0 +1,450 @@
+#Test a bunch of positive cases to verify basic functionality
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+	for i in 1 10 100; do
+		for l in 1 10 100; do
+			TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+			echo $TEST
+			$TEST
+			sleep 2
+		done
+	done
+done
+done
+done
+
+#Test max iov
+t="sendmsg"
+r=1
+i=1024
+l=1
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+
+# Test max iov with 1k send
+
+t="sendmsg"
+r=1
+i=1024
+l=1024
+prog="--txmsg"
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+prog="--txmsg_redir"
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+echo $TEST
+$TEST
+sleep 2
+
+# Test apply with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with 1B
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_apply 1"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply and redirect with apply that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_apply 2048"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with 1B not really useful but test it anyways
+r=1
+i=1024
+l=1024
+prog="--txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=1
+i=1024
+l=1024
+prog="--txmsg_redir --txmsg_cork 1"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with a more reasonable 100B
+r=1
+i=1000
+l=1000
+prog="--txmsg_redir --txmsg_cork 100"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with larger value than send
+r=1
+i=8
+l=1024
+prog="--txmsg_redir --txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test cork with cork that never reaches limit
+r=1024
+i=1
+l=1
+prog="--txmsg_cork 2048"
+
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# mix and match cork and apply not really useful but valid programs
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply < cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Try again with larger sizes so we hit overflow case
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Test apply > cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Again with larger sizes so we hit overflow cases
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
+# Test apply = cork
+r=100
+i=1
+l=5
+prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+r=100
+i=1000
+l=2048
+prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
+for t in "sendpage" "sendmsg"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+# Tests for bpf_msg_pull_data()
+for i in `seq 99 100 1600`; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+		--txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+for i in `seq 199 100 1600`; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+		--txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
+echo $TEST
+$TEST
+sleep 2
+
+TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
+	--txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
+echo $TEST
+$TEST
+sleep 2
+
+# Run through gamut again with start and end
+for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for t in "sendmsg" "sendpage"; do
+for r in 1 10 100; do
+	for i in 1 10 100; do
+		for l in 1 10 100; do
+			TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
+			echo $TEST
+			$TEST
+			sleep 2
+		done
+	done
+done
+done
+done
+
+# Some specific tests to cover specific code paths
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
+./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
+	-r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5

From 6aec208786c2a54cbf6135a0242b224e845bef98 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Sun, 4 Mar 2018 15:29:51 -0800
Subject: [PATCH 1032/1678] netfilter: Refactor nf_conncount

Remove parameter 'family' in nf_conncount_count() and count_tree().
It is because the parameter is not useful after commit 625c556118f3
("netfilter: connlimit: split xt_connlimit into front and backend").

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 1 -
 net/netfilter/nf_conncount.c               | 4 +---
 net/netfilter/xt_connlimit.c               | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index adf8db44cf86..e61184fbfb71 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -11,7 +11,6 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
 unsigned int nf_conncount_count(struct net *net,
 				struct nf_conncount_data *data,
 				const u32 *key,
-				unsigned int family,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 #endif
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 6d65389e308f..9305a08b4422 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -158,7 +158,6 @@ static void tree_nodes_free(struct rb_root *root,
 static unsigned int
 count_tree(struct net *net, struct rb_root *root,
 	   const u32 *key, u8 keylen,
-	   u8 family,
 	   const struct nf_conntrack_tuple *tuple,
 	   const struct nf_conntrack_zone *zone)
 {
@@ -246,7 +245,6 @@ count_tree(struct net *net, struct rb_root *root,
 unsigned int nf_conncount_count(struct net *net,
 				struct nf_conncount_data *data,
 				const u32 *key,
-				unsigned int family,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone)
 {
@@ -259,7 +257,7 @@ unsigned int nf_conncount_count(struct net *net,
 
 	spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 
-	count = count_tree(net, root, key, data->keylen, family, tuple, zone);
+	count = count_tree(net, root, key, data->keylen, tuple, zone);
 
 	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b1b17b9353e1..6275106ccf50 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -67,8 +67,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		key[1] = zone->id;
 	}
 
-	connections = nf_conncount_count(net, info->data, key,
-					 xt_family(par), tuple_ptr, zone);
+	connections = nf_conncount_count(net, info->data, key, tuple_ptr,
+					 zone);
 	if (connections == 0)
 		/* kmalloc failed, drop it entirely */
 		goto hotdrop;

From 35d8deb80c30fdb2dee3e2dac71eab00d8a6fed5 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Sun, 4 Mar 2018 15:29:52 -0800
Subject: [PATCH 1033/1678] netfilter: conncount: Support count only use case

Currently, nf_conncount_count() counts the number of connections that
matches key and inserts a conntrack 'tuple' with the same key into the
accounting data structure.  This patch supports another use case that only
counts the number of connections where 'tuple' is not provided.  Therefore,
proper changes are made on nf_conncount_count() to support the case where
'tuple' is NULL.  This could be useful for querying statistics or
debugging purpose.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 9305a08b4422..153e690e2893 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -104,7 +104,7 @@ static unsigned int check_hlist(struct net *net,
 	struct nf_conn *found_ct;
 	unsigned int length = 0;
 
-	*addit = true;
+	*addit = tuple ? true : false;
 
 	/* check the saved connections */
 	hlist_for_each_entry_safe(conn, n, head, node) {
@@ -117,7 +117,7 @@ static unsigned int check_hlist(struct net *net,
 
 		found_ct = nf_ct_tuplehash_to_ctrack(found);
 
-		if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
+		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) {
 			/*
 			 * Just to be sure we have it only once in the list.
 			 * We should not see tuples twice unless someone hooks
@@ -220,6 +220,9 @@ count_tree(struct net *net, struct rb_root *root,
 		goto restart;
 	}
 
+	if (!tuple)
+		return 0;
+
 	/* no match, need to insert new node */
 	rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
 	if (rbconn == NULL)
@@ -242,6 +245,9 @@ count_tree(struct net *net, struct rb_root *root,
 	return 1;
 }
 
+/* Count and return number of conntrack entries in 'net' with particular 'key'.
+ * If 'tuple' is not null, insert it into the accounting data structure.
+ */
 unsigned int nf_conncount_count(struct net *net,
 				struct nf_conncount_data *data,
 				const u32 *key,

From d719e3f21cf91d3f82bd827d46199ba41af2f73a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 9 Mar 2018 11:57:20 +0100
Subject: [PATCH 1034/1678] netfilter: nft_ct: add NFT_CT_{SRC,DST}_{IP,IP6}

All existing keys, except the NFT_CT_SRC and NFT_CT_DST are assumed to
have strict datatypes. This is causing problems with sets and
concatenations given the specific length of these keys is not known.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Florian Westphal <fw@strlen.de>
---
 include/uapi/linux/netfilter/nf_tables.h | 12 ++++++--
 net/netfilter/nft_ct.c                   | 38 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 66dceee0ae30..6a3d653d5b27 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -909,8 +909,8 @@ enum nft_rt_attributes {
  * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
  * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
  * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
- * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
- * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
+ * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address, deprecated)
+ * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address, deprecated)
  * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
  * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
  * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
@@ -920,6 +920,10 @@ enum nft_rt_attributes {
  * @NFT_CT_AVGPKT: conntrack average bytes per packet
  * @NFT_CT_ZONE: conntrack zone
  * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack
+ * @NFT_CT_SRC_IP: conntrack layer 3 protocol source (IPv4 address)
+ * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address)
+ * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address)
+ * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address)
  */
 enum nft_ct_keys {
 	NFT_CT_STATE,
@@ -941,6 +945,10 @@ enum nft_ct_keys {
 	NFT_CT_AVGPKT,
 	NFT_CT_ZONE,
 	NFT_CT_EVENTMASK,
+	NFT_CT_SRC_IP,
+	NFT_CT_DST_IP,
+	NFT_CT_SRC_IP6,
+	NFT_CT_DST_IP6,
 };
 
 /**
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 6ab274b14484..ea737fd789e8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -196,6 +196,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 	case NFT_CT_PROTO_DST:
 		nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
 		return;
+	case NFT_CT_SRC_IP:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+			goto err;
+		*dest = tuple->src.u3.ip;
+		return;
+	case NFT_CT_DST_IP:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV4)
+			goto err;
+		*dest = tuple->dst.u3.ip;
+		return;
+	case NFT_CT_SRC_IP6:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+			goto err;
+		memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr));
+		return;
+	case NFT_CT_DST_IP6:
+		if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+			goto err;
+		memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr));
+		return;
 	default:
 		break;
 	}
@@ -419,6 +439,20 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
 			return -EAFNOSUPPORT;
 		}
 		break;
+	case NFT_CT_SRC_IP:
+	case NFT_CT_DST_IP:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+
+		len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
+		break;
+	case NFT_CT_SRC_IP6:
+	case NFT_CT_DST_IP6:
+		if (tb[NFTA_CT_DIRECTION] == NULL)
+			return -EINVAL;
+
+		len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
+		break;
 	case NFT_CT_PROTO_SRC:
 	case NFT_CT_PROTO_DST:
 		if (tb[NFTA_CT_DIRECTION] == NULL)
@@ -588,6 +622,10 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	switch (priv->key) {
 	case NFT_CT_SRC:
 	case NFT_CT_DST:
+	case NFT_CT_SRC_IP:
+	case NFT_CT_DST_IP:
+	case NFT_CT_SRC_IP6:
+	case NFT_CT_DST_IP6:
 	case NFT_CT_PROTO_SRC:
 	case NFT_CT_PROTO_DST:
 		if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))

From 8039ab43eeac029a9c47c0411918ea82c9ce87cd Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 12 Mar 2018 18:14:42 -0500
Subject: [PATCH 1035/1678] netfilter: cttimeout: remove VLA usage

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation.

>From a security viewpoint, the use of Variable Length Arrays can be
a vector for stack overflow attacks. Also, in general, as the code
evolves it is easy to lose track of how big a VLA can get. Thus, we
can end up having segfaults that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

While at it, remove likely() notation which is not necessary from the
control plane code.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cttimeout.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 95b04702a655..9ee5fa551fa6 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -51,19 +51,27 @@ ctnl_timeout_parse_policy(void *timeouts,
 			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
+	struct nlattr **tb;
 	int ret = 0;
 
-	if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) {
-		struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1];
+	if (!l4proto->ctnl_timeout.nlattr_to_obj)
+		return 0;
 
-		ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
-				       attr, l4proto->ctnl_timeout.nla_policy,
-				       NULL);
-		if (ret < 0)
-			return ret;
+	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
+		     GFP_KERNEL);
 
-		ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
-	}
+	if (!tb)
+		return -ENOMEM;
+
+	ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr,
+			       l4proto->ctnl_timeout.nla_policy, NULL);
+	if (ret < 0)
+		goto err;
+
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+
+err:
+	kfree(tb);
 	return ret;
 }
 

From 1446385904add0e89f990ee0518434365e50ce86 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 12 Mar 2018 19:21:38 -0500
Subject: [PATCH 1036/1678] netfilter: nfnetlink_cthelper: Remove VLA usage

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation.

>From a security viewpoint, the use of Variable Length Arrays can be
a vector for stack overflow attacks. Also, in general, as the code
evolves it is easy to lose track of how big a VLA can get. Thus, we
can end up having segfaults that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d33ce6d5ebce..4a4b293fb2e5 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -314,23 +314,30 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
 static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
 					   struct nf_conntrack_helper *helper)
 {
-	struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+	struct nf_conntrack_expect_policy *new_policy;
 	struct nf_conntrack_expect_policy *policy;
-	int i, err;
+	int i, ret = 0;
+
+	new_policy = kmalloc_array(helper->expect_class_max + 1,
+				   sizeof(*new_policy), GFP_KERNEL);
+	if (!new_policy)
+		return -ENOMEM;
 
 	/* Check first that all policy attributes are well-formed, so we don't
 	 * leave things in inconsistent state on errors.
 	 */
 	for (i = 0; i < helper->expect_class_max + 1; i++) {
 
-		if (!tb[NFCTH_POLICY_SET + i])
-			return -EINVAL;
+		if (!tb[NFCTH_POLICY_SET + i]) {
+			ret = -EINVAL;
+			goto err;
+		}
 
-		err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+		ret = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
 						      &new_policy[i],
 						      tb[NFCTH_POLICY_SET + i]);
-		if (err < 0)
-			return err;
+		if (ret < 0)
+			goto err;
 	}
 	/* Now we can safely update them. */
 	for (i = 0; i < helper->expect_class_max + 1; i++) {
@@ -340,7 +347,9 @@ static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
 		policy->timeout	= new_policy->timeout;
 	}
 
-	return 0;
+err:
+	kfree(new_policy);
+	return ret;
 }
 
 static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,

From 5b4c6e3860daaf089c28e0161dffef7b5ad8000f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 12 Mar 2018 22:16:17 -0500
Subject: [PATCH 1037/1678] netfilter: nf_tables: remove VLA usage

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation.

>From a security viewpoint, the use of Variable Length Arrays can be
a vector for stack overflow attacks. Also, in general, as the code
evolves it is easy to lose track of how big a VLA can get. Thus, we
can end up having segfaults that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8cc7fc970f0c..92f5606b0dea 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4357,16 +4357,20 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 				       const struct nft_object_type *type,
 				       const struct nlattr *attr)
 {
-	struct nlattr *tb[type->maxattr + 1];
+	struct nlattr **tb;
 	const struct nft_object_ops *ops;
 	struct nft_object *obj;
-	int err;
+	int err = -ENOMEM;
+
+	tb = kmalloc_array(type->maxattr + 1, sizeof(*tb), GFP_KERNEL);
+	if (!tb)
+		goto err1;
 
 	if (attr) {
 		err = nla_parse_nested(tb, type->maxattr, attr, type->policy,
 				       NULL);
 		if (err < 0)
-			goto err1;
+			goto err2;
 	} else {
 		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
 	}
@@ -4375,7 +4379,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
 		if (IS_ERR(ops)) {
 			err = PTR_ERR(ops);
-			goto err1;
+			goto err2;
 		}
 	} else {
 		ops = type->ops;
@@ -4383,18 +4387,21 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 
 	err = -ENOMEM;
 	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
-	if (obj == NULL)
-		goto err1;
+	if (!obj)
+		goto err2;
 
 	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	obj->ops = ops;
 
+	kfree(tb);
 	return obj;
-err2:
+err3:
 	kfree(obj);
+err2:
+	kfree(tb);
 err1:
 	return ERR_PTR(err);
 }

From d72133e6288030121e425b89584ab3dfb68871cc Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 14 Mar 2018 23:36:53 +0900
Subject: [PATCH 1038/1678] netfilter: ebtables: use ADD_COUNTER macro

xtables uses ADD_COUNTER macro to increase
packet and byte count. ebtables also can use this.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 217aa79f7b2a..9a26d2b7420f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -223,9 +223,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 			return NF_DROP;
 		}
 
-		/* increase counter */
-		(*(counter_base + i)).pcnt++;
-		(*(counter_base + i)).bcnt += skb->len;
+		ADD_COUNTER(*(counter_base + i), 1, skb->len);
 
 		/* these should only watch: not modify, nor tell us
 		 * what to do with the packet
@@ -968,10 +966,9 @@ static void get_counters(const struct ebt_counter *oldcounters,
 		if (cpu == 0)
 			continue;
 		counter_base = COUNTER_BASE(oldcounters, nentries, cpu);
-		for (i = 0; i < nentries; i++) {
-			counters[i].pcnt += counter_base[i].pcnt;
-			counters[i].bcnt += counter_base[i].bcnt;
-		}
+		for (i = 0; i < nentries; i++)
+			ADD_COUNTER(counters[i], counter_base[i].pcnt,
+				    counter_base[i].bcnt);
 	}
 }
 
@@ -1324,10 +1321,8 @@ static int do_update_counters(struct net *net, const char *name,
 	write_lock_bh(&t->lock);
 
 	/* we add to the counters of the first cpu */
-	for (i = 0; i < num_counters; i++) {
-		t->private->counters[i].pcnt += tmp[i].pcnt;
-		t->private->counters[i].bcnt += tmp[i].bcnt;
-	}
+	for (i = 0; i < num_counters; i++)
+		ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt);
 
 	write_unlock_bh(&t->lock);
 	ret = 0;

From 472a73e00757b971d613d796374d2727b2e4954d Mon Sep 17 00:00:00 2001
From: Jack Ma <jack.ma@alliedtelesis.co.nz>
Date: Mon, 19 Mar 2018 09:41:59 +1300
Subject: [PATCH 1039/1678] netfilter: xt_conntrack: Support bit-shifting for
 CONNMARK & MARK targets.

This patch introduces a new feature that allows bitshifting (left
and right) operations to co-operate with existing iptables options.

Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jack Ma <jack.ma@alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_connmark.h | 10 +++
 net/netfilter/xt_connmark.c                | 77 +++++++++++++++++-----
 2 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h
index 408a9654f05c..1aa5c955ee1e 100644
--- a/include/uapi/linux/netfilter/xt_connmark.h
+++ b/include/uapi/linux/netfilter/xt_connmark.h
@@ -19,11 +19,21 @@ enum {
 	XT_CONNMARK_RESTORE
 };
 
+enum {
+	D_SHIFT_LEFT = 0,
+	D_SHIFT_RIGHT,
+};
+
 struct xt_connmark_tginfo1 {
 	__u32 ctmark, ctmask, nfmask;
 	__u8 mode;
 };
 
+struct xt_connmark_tginfo2 {
+	__u32 ctmark, ctmask, nfmask;
+	__u8 shift_dir, shift_bits, mode;
+};
+
 struct xt_connmark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 809639ce6f5a..773da82190dc 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -36,9 +36,10 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+connmark_tg_shift(struct sk_buff *skb,
+		const struct xt_connmark_tginfo1 *info,
+		u8 shift_bits, u8 shift_dir)
 {
-	const struct xt_connmark_tginfo1 *info = par->targinfo;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	u_int32_t newmark;
@@ -50,6 +51,10 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	switch (info->mode) {
 	case XT_CONNMARK_SET:
 		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+		if (shift_dir == D_SHIFT_RIGHT)
+			newmark >>= shift_bits;
+		else
+			newmark <<= shift_bits;
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
 			nf_conntrack_event_cache(IPCT_MARK, ct);
@@ -57,7 +62,11 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		break;
 	case XT_CONNMARK_SAVE:
 		newmark = (ct->mark & ~info->ctmask) ^
-		          (skb->mark & info->nfmask);
+			  (skb->mark & info->nfmask);
+		if (shift_dir == D_SHIFT_RIGHT)
+			newmark >>= shift_bits;
+		else
+			newmark <<= shift_bits;
 		if (ct->mark != newmark) {
 			ct->mark = newmark;
 			nf_conntrack_event_cache(IPCT_MARK, ct);
@@ -65,14 +74,34 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		break;
 	case XT_CONNMARK_RESTORE:
 		newmark = (skb->mark & ~info->nfmask) ^
-		          (ct->mark & info->ctmask);
+			  (ct->mark & info->ctmask);
+		if (shift_dir == D_SHIFT_RIGHT)
+			newmark >>= shift_bits;
+		else
+			newmark <<= shift_bits;
 		skb->mark = newmark;
 		break;
 	}
-
 	return XT_CONTINUE;
 }
 
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
+
+	return connmark_tg_shift(skb, info, 0, 0);
+}
+
+static unsigned int
+connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_connmark_tginfo2 *info = par->targinfo;
+
+	return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info,
+				 info->shift_bits, info->shift_dir);
+}
+
 static int connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	int ret;
@@ -119,15 +148,27 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_netns_put(par->net, par->family);
 }
 
-static struct xt_target connmark_tg_reg __read_mostly = {
-	.name           = "CONNMARK",
-	.revision       = 1,
-	.family         = NFPROTO_UNSPEC,
-	.checkentry     = connmark_tg_check,
-	.target         = connmark_tg,
-	.targetsize     = sizeof(struct xt_connmark_tginfo1),
-	.destroy        = connmark_tg_destroy,
-	.me             = THIS_MODULE,
+static struct xt_target connmark_tg_reg[] __read_mostly = {
+	{
+		.name           = "CONNMARK",
+		.revision       = 1,
+		.family         = NFPROTO_UNSPEC,
+		.checkentry     = connmark_tg_check,
+		.target         = connmark_tg,
+		.targetsize     = sizeof(struct xt_connmark_tginfo1),
+		.destroy        = connmark_tg_destroy,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "CONNMARK",
+		.revision       = 2,
+		.family         = NFPROTO_UNSPEC,
+		.checkentry     = connmark_tg_check,
+		.target         = connmark_tg_v2,
+		.targetsize     = sizeof(struct xt_connmark_tginfo2),
+		.destroy        = connmark_tg_destroy,
+		.me             = THIS_MODULE,
+	}
 };
 
 static struct xt_match connmark_mt_reg __read_mostly = {
@@ -145,12 +186,14 @@ static int __init connmark_mt_init(void)
 {
 	int ret;
 
-	ret = xt_register_target(&connmark_tg_reg);
+	ret = xt_register_targets(connmark_tg_reg,
+				  ARRAY_SIZE(connmark_tg_reg));
 	if (ret < 0)
 		return ret;
 	ret = xt_register_match(&connmark_mt_reg);
 	if (ret < 0) {
-		xt_unregister_target(&connmark_tg_reg);
+		xt_unregister_targets(connmark_tg_reg,
+				      ARRAY_SIZE(connmark_tg_reg));
 		return ret;
 	}
 	return 0;
@@ -159,7 +202,7 @@ static int __init connmark_mt_init(void)
 static void __exit connmark_mt_exit(void)
 {
 	xt_unregister_match(&connmark_mt_reg);
-	xt_unregister_target(&connmark_tg_reg);
+	xt_unregister_target(connmark_tg_reg);
 }
 
 module_init(connmark_mt_init);

From 5191d70f83fd1878c40029cffe69f6a2bf65fa0e Mon Sep 17 00:00:00 2001
From: Arushi Singhal <arushisinghal19971997@gmail.com>
Date: Mon, 12 Mar 2018 18:36:29 +0530
Subject: [PATCH 1040/1678] netfilter: Replace printk() with pr_*() and define
 pr_fmt()

Using pr_<loglevel>() is more concise than printk(KERN_<LOGLEVEL>).
This patch:
* Replace printks having a log level with the appropriate
pr_*() macros.
* Define pr_fmt() to include relevant name.
* Remove redundant prefixes from pr_*() calls.
* Indent the code where possible.
* Remove the useless output messages.
* Remove periods from messages.

Signed-off-by: Arushi Singhal <arushisinghal19971997@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_acct.c      |  6 ++++--
 net/netfilter/nf_conntrack_ecache.c    |  6 ++++--
 net/netfilter/nf_conntrack_timestamp.c |  6 ++++--
 net/netfilter/nf_nat_core.c            |  4 +++-
 net/netfilter/nf_nat_ftp.c             |  7 ++++---
 net/netfilter/nf_nat_irc.c             |  7 ++++---
 net/netfilter/nfnetlink_queue.c        | 14 +++++++-------
 net/netfilter/xt_time.c                | 13 +++++++------
 8 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 866916712905..1d66de5151b2 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/netfilter.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -80,7 +82,7 @@ static int nf_conntrack_acct_init_sysctl(struct net *net)
 	net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
 							 table);
 	if (!net->ct.acct_sysctl_header) {
-		printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n");
+		pr_err("can't register to sysctl\n");
 		goto out_register;
 	}
 	return 0;
@@ -125,7 +127,7 @@ int nf_conntrack_acct_init(void)
 {
 	int ret = nf_ct_extend_register(&acct_extend);
 	if (ret < 0)
-		pr_err("nf_conntrack_acct: Unable to register extension\n");
+		pr_err("Unable to register extension\n");
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index caac41ad9483..c11822a7d2bf 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -11,6 +11,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/skbuff.h>
@@ -372,7 +374,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
 	net->ct.event_sysctl_header =
 		register_net_sysctl(net, "net/netfilter", table);
 	if (!net->ct.event_sysctl_header) {
-		printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
+		pr_err("can't register to sysctl\n");
 		goto out_register;
 	}
 	return 0;
@@ -419,7 +421,7 @@ int nf_conntrack_ecache_init(void)
 {
 	int ret = nf_ct_extend_register(&event_extend);
 	if (ret < 0)
-		pr_err("nf_ct_event: Unable to register event extension.\n");
+		pr_err("Unable to register event extension\n");
 
 	BUILD_BUG_ON(__IPCT_MAX >= 16);	/* ctmask, missed use u16 */
 
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 4c4734b78318..56766cb26e40 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -6,6 +6,8 @@
  * published by the Free Software Foundation (or any later at your option).
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/netfilter.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -58,7 +60,7 @@ static int nf_conntrack_tstamp_init_sysctl(struct net *net)
 	net->ct.tstamp_sysctl_header = register_net_sysctl(net,	"net/netfilter",
 							   table);
 	if (!net->ct.tstamp_sysctl_header) {
-		printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+		pr_err("can't register to sysctl\n");
 		goto out_register;
 	}
 	return 0;
@@ -104,7 +106,7 @@ int nf_conntrack_tstamp_init(void)
 	int ret;
 	ret = nf_ct_extend_register(&tstamp_extend);
 	if (ret < 0)
-		pr_err("nf_ct_tstamp: Unable to register extension\n");
+		pr_err("Unable to register extension\n");
 	return ret;
 }
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6c38421e31f9..617693ff9f4c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/timer.h>
@@ -814,7 +816,7 @@ static int __init nf_nat_init(void)
 	ret = nf_ct_extend_register(&nat_extend);
 	if (ret < 0) {
 		nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
-		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
+		pr_err("Unable to register extension\n");
 		return ret;
 	}
 
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index d76afafdc699..5063cbf1689c 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -8,6 +8,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/inet.h>
@@ -71,7 +73,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 	char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
 	unsigned int buflen;
 
-	pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
+	pr_debug("type %i, off %u len %u\n", type, matchoff, matchlen);
 
 	/* Connection will come from wherever this packet goes, hence !dir */
 	newaddr = ct->tuplehash[!dir].tuple.dst.u3;
@@ -136,8 +138,7 @@ static int __init nf_nat_ftp_init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, const struct kernel_param *kp)
 {
-	printk(KERN_INFO KBUILD_MODNAME
-	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+	pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
 module_param_call(ports, warn_set, NULL, NULL, 0);
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index dcb5f6375d9d..3aa35a43100d 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -10,6 +10,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/tcp.h>
@@ -79,7 +81,7 @@ static unsigned int help(struct sk_buff *skb,
 	 */
 	/* AAA = "us", ie. where server normally talks to. */
 	snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port);
-	pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
+	pr_debug("inserting '%s' == %pI4, port %u\n",
 		 buffer, &newaddr.ip, port);
 
 	if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
@@ -108,8 +110,7 @@ static int __init nf_nat_irc_init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, const struct kernel_param *kp)
 {
-	printk(KERN_INFO KBUILD_MODNAME
-	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
+	pr_info("kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
 module_param_call(ports, warn_set, NULL, NULL, 0);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8bba23160a68..74a04638ef03 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -14,6 +14,9 @@
  * published by the Free Software Foundation.
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
@@ -833,11 +836,8 @@ nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
 		if (diff > skb_tailroom(e->skb)) {
 			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
 					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "nf_queue: OOM "
-				      "in mangle, dropping packet\n");
+			if (!nskb)
 				return -ENOMEM;
-			}
 			kfree_skb(e->skb);
 			e->skb = nskb;
 		}
@@ -1536,20 +1536,20 @@ static int __init nfnetlink_queue_init(void)
 
 	status = register_pernet_subsys(&nfnl_queue_net_ops);
 	if (status < 0) {
-		pr_err("nf_queue: failed to register pernet ops\n");
+		pr_err("failed to register pernet ops\n");
 		goto out;
 	}
 
 	netlink_register_notifier(&nfqnl_rtnl_notifier);
 	status = nfnetlink_subsys_register(&nfqnl_subsys);
 	if (status < 0) {
-		pr_err("nf_queue: failed to create netlink socket\n");
+		pr_err("failed to create netlink socket\n");
 		goto cleanup_netlink_notifier;
 	}
 
 	status = register_netdevice_notifier(&nfqnl_dev_notifier);
 	if (status < 0) {
-		pr_err("nf_queue: failed to register netdevice notifier\n");
+		pr_err("failed to register netdevice notifier\n");
 		goto cleanup_netlink_subsys;
 	}
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 0160f505e337..c13bcd0ab491 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -9,6 +9,9 @@
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from gnu.org/gpl.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -266,13 +269,11 @@ static int __init time_mt_init(void)
 	int minutes = sys_tz.tz_minuteswest;
 
 	if (minutes < 0) /* east of Greenwich */
-		printk(KERN_INFO KBUILD_MODNAME
-		       ": kernel timezone is +%02d%02d\n",
-		       -minutes / 60, -minutes % 60);
+		pr_info("kernel timezone is +%02d%02d\n",
+			-minutes / 60, -minutes % 60);
 	else /* west of Greenwich */
-		printk(KERN_INFO KBUILD_MODNAME
-		       ": kernel timezone is -%02d%02d\n",
-		       minutes / 60, minutes % 60);
+		pr_info("kernel timezone is -%02d%02d\n",
+			minutes / 60, minutes % 60);
 
 	return xt_register_match(&xt_time_mt_reg);
 }

From 20710b3b81895c89e92bcc32ce85c0bede1171f8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 20 Mar 2018 12:33:51 +0100
Subject: [PATCH 1041/1678] netfilter: ctnetlink: synproxy support

This patch exposes synproxy information per-conntrack. Moreover, send
sequence adjustment events once server sends us the SYN,ACK packet, so
we can synchronize the sequence adjustment too for packets going as
reply from the server, as part of the synproxy logic.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 .../linux/netfilter/nf_conntrack_common.h     |  1 +
 .../linux/netfilter/nfnetlink_conntrack.h     | 10 +++
 net/ipv4/netfilter/ipt_SYNPROXY.c             |  8 +-
 net/ipv6/netfilter/ip6t_SYNPROXY.c            |  8 +-
 net/netfilter/nf_conntrack_netlink.c          | 87 ++++++++++++++++++-
 5 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 9574bd40870b..c712eb6879f1 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -129,6 +129,7 @@ enum ip_conntrack_events {
 	IPCT_NATSEQADJ = IPCT_SEQADJ,
 	IPCT_SECMARK,		/* new security mark has been set */
 	IPCT_LABEL,		/* new connlabel has been set */
+	IPCT_SYNPROXY,		/* synproxy has been set */
 #ifdef __KERNEL__
 	__IPCT_MAX
 #endif
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 7397e022ce6e..77987111cab0 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -54,6 +54,7 @@ enum ctattr_type {
 	CTA_MARK_MASK,
 	CTA_LABELS,
 	CTA_LABELS_MASK,
+	CTA_SYNPROXY,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -190,6 +191,15 @@ enum ctattr_natseq {
 };
 #define CTA_NAT_SEQ_MAX (__CTA_NAT_SEQ_MAX - 1)
 
+enum ctattr_synproxy {
+	CTA_SYNPROXY_UNSPEC,
+	CTA_SYNPROXY_ISN,
+	CTA_SYNPROXY_ITS,
+	CTA_SYNPROXY_TSOFF,
+	__CTA_SYNPROXY_MAX,
+};
+#define CTA_SYNPROXY_MAX (__CTA_SYNPROXY_MAX - 1)
+
 enum ctattr_expect {
 	CTA_EXPECT_UNSPEC,
 	CTA_EXPECT_MASTER,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f75fc6b53115..690b17ef6a44 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 
 static struct iphdr *
 synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr,
@@ -384,6 +385,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 		synproxy->isn = ntohl(th->ack_seq);
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy->its = opts.tsecr;
+
+		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
 		break;
 	case TCP_CONNTRACK_SYN_RECV:
 		if (!th->syn || !th->ack)
@@ -392,8 +395,10 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 		if (!synproxy_parse_options(skb, thoff, th, &opts))
 			return NF_DROP;
 
-		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
 			synproxy->tsoff = opts.tsval - synproxy->its;
+			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		}
 
 		opts.options &= ~(XT_SYNPROXY_OPT_MSS |
 				  XT_SYNPROXY_OPT_WSCALE |
@@ -403,6 +408,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 		synproxy_send_server_ack(net, state, skb, th, &opts);
 
 		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
 
 		swap(opts.tsval, opts.tsecr);
 		synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 437af8c95277..cb6d42b03cb5 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -18,6 +18,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 
 static struct ipv6hdr *
 synproxy_build_ip(struct net *net, struct sk_buff *skb,
@@ -405,6 +406,8 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 		synproxy->isn = ntohl(th->ack_seq);
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy->its = opts.tsecr;
+
+		nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
 		break;
 	case TCP_CONNTRACK_SYN_RECV:
 		if (!th->syn || !th->ack)
@@ -413,8 +416,10 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 		if (!synproxy_parse_options(skb, thoff, th, &opts))
 			return NF_DROP;
 
-		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
 			synproxy->tsoff = opts.tsval - synproxy->its;
+			nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+		}
 
 		opts.options &= ~(XT_SYNPROXY_OPT_MSS |
 				  XT_SYNPROXY_OPT_WSCALE |
@@ -424,6 +429,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 		synproxy_send_server_ack(net, state, skb, th, &opts);
 
 		nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+		nf_conntrack_event_cache(IPCT_SEQADJ, ct);
 
 		swap(opts.tsval, opts.tsecr);
 		synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8884d302d33a..11ef85a57244 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -440,6 +440,31 @@ err:
 	return -1;
 }
 
+static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct)
+{
+	struct nf_conn_synproxy *synproxy = nfct_synproxy(ct);
+	struct nlattr *nest_parms;
+
+	if (!synproxy)
+		return 0;
+
+	nest_parms = nla_nest_start(skb, CTA_SYNPROXY | NLA_F_NESTED);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, CTA_SYNPROXY_ISN, htonl(synproxy->isn)) ||
+	    nla_put_be32(skb, CTA_SYNPROXY_ITS, htonl(synproxy->its)) ||
+	    nla_put_be32(skb, CTA_SYNPROXY_TSOFF, htonl(synproxy->tsoff)))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
 {
 	if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
@@ -518,7 +543,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	    ctnetlink_dump_id(skb, ct) < 0 ||
 	    ctnetlink_dump_use(skb, ct) < 0 ||
 	    ctnetlink_dump_master(skb, ct) < 0 ||
-	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
+	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
+	    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
@@ -730,6 +756,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		if (events & (1 << IPCT_SEQADJ) &&
 		    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
 			goto nla_put_failure;
+
+		if (events & (1 << IPCT_SYNPROXY) &&
+		    ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+			goto nla_put_failure;
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
@@ -1689,6 +1719,39 @@ err:
 	return ret;
 }
 
+static const struct nla_policy synproxy_policy[CTA_SYNPROXY_MAX + 1] = {
+	[CTA_SYNPROXY_ISN]	= { .type = NLA_U32 },
+	[CTA_SYNPROXY_ITS]	= { .type = NLA_U32 },
+	[CTA_SYNPROXY_TSOFF]	= { .type = NLA_U32 },
+};
+
+static int ctnetlink_change_synproxy(struct nf_conn *ct,
+				     const struct nlattr * const cda[])
+{
+	struct nf_conn_synproxy *synproxy = nfct_synproxy(ct);
+	struct nlattr *tb[CTA_SYNPROXY_MAX + 1];
+	int err;
+
+	if (!synproxy)
+		return 0;
+
+	err = nla_parse_nested(tb, CTA_SYNPROXY_MAX, cda[CTA_SYNPROXY],
+			       synproxy_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_SYNPROXY_ISN] ||
+	    !tb[CTA_SYNPROXY_ITS] ||
+	    !tb[CTA_SYNPROXY_TSOFF])
+		return -EINVAL;
+
+	synproxy->isn = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ISN]));
+	synproxy->its = ntohl(nla_get_be32(tb[CTA_SYNPROXY_ITS]));
+	synproxy->tsoff = ntohl(nla_get_be32(tb[CTA_SYNPROXY_TSOFF]));
+
+	return 0;
+}
+
 static int
 ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
 {
@@ -1759,6 +1822,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 			return err;
 	}
 
+	if (cda[CTA_SYNPROXY]) {
+		err = ctnetlink_change_synproxy(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
 	if (cda[CTA_LABELS]) {
 		err = ctnetlink_attach_labels(ct, cda);
 		if (err < 0)
@@ -1880,6 +1949,12 @@ ctnetlink_create_conntrack(struct net *net,
 			goto err2;
 	}
 
+	if (cda[CTA_SYNPROXY]) {
+		err = ctnetlink_change_synproxy(ct, cda);
+		if (err < 0)
+			goto err2;
+	}
+
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
@@ -1991,7 +2066,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_SEQADJ) |
-						      (1 << IPCT_MARK) | events,
+						      (1 << IPCT_MARK) |
+						      (1 << IPCT_SYNPROXY) |
+						      events,
 						      ct, NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
 			nf_ct_put(ct);
@@ -2012,7 +2089,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 						      (1 << IPCT_LABEL) |
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_SEQADJ) |
-						      (1 << IPCT_MARK),
+						      (1 << IPCT_MARK) |
+						      (1 << IPCT_SYNPROXY),
 						      ct, NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
 		}
@@ -2282,6 +2360,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 	    ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
 		goto nla_put_failure;
 
+	if (ctnetlink_dump_ct_synproxy(skb, ct) < 0)
+		goto nla_put_failure;
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
 	if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0)
 		goto nla_put_failure;

From c846d8da5640a6c61d57e2c14a1a6d74b9c643b0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 14 Mar 2018 19:57:24 -0700
Subject: [PATCH 1042/1678] mlx5: Remove call to ida_pre_get

The mlx5 driver calls ida_pre_get() in a loop for no readily apparent
reason.  The driver uses ida_simple_get() which will call ida_pre_get()
by itself and there's no need to use ida_pre_get() unless using
ida_get_new().

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 10e16381f20a..3ba07c7096ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1647,7 +1647,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 
 	list_for_each_entry(iter, match_head, list) {
 		nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
-		ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL);
 	}
 
 search_again_locked:

From 830a8b1b001d3c2b4dfaa97d0eea5b9d6b03ae62 Mon Sep 17 00:00:00 2001
From: Shalom Toledo <shalomt@mellanox.com>
Date: Mon, 19 Mar 2018 09:51:00 +0200
Subject: [PATCH 1043/1678] mlxsw: pci: Set mbox dma addresses to zero when not
 used

Some of the opcodes don't use in, out or both mboxes. In such cases, the
mbox address is a reserved field and FW expects it to be zero.

Signed-off-by: Shalom Toledo <shalomt@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 85faa87bf42d..e30c6ce3dcb4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1519,8 +1519,7 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 			      u8 *p_status)
 {
 	struct mlxsw_pci *mlxsw_pci = bus_priv;
-	dma_addr_t in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
-	dma_addr_t out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
+	dma_addr_t in_mapaddr = 0, out_mapaddr = 0;
 	bool evreq = mlxsw_pci->cmd.nopoll;
 	unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
 	bool *p_wait_done = &mlxsw_pci->cmd.wait_done;
@@ -1532,11 +1531,15 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
 	if (err)
 		return err;
 
-	if (in_mbox)
+	if (in_mbox) {
 		memcpy(mlxsw_pci->cmd.in_mbox.buf, in_mbox, in_mbox_size);
+		in_mapaddr = mlxsw_pci->cmd.in_mbox.mapaddr;
+	}
 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, upper_32_bits(in_mapaddr));
 	mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, lower_32_bits(in_mapaddr));
 
+	if (out_mbox)
+		out_mapaddr = mlxsw_pci->cmd.out_mbox.mapaddr;
 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, upper_32_bits(out_mapaddr));
 	mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, lower_32_bits(out_mapaddr));
 

From 7e8c711661ce21ceba5be861b1b3d30f325a4db1 Mon Sep 17 00:00:00 2001
From: Tal Bar <talb@mellanox.com>
Date: Mon, 19 Mar 2018 09:51:01 +0200
Subject: [PATCH 1044/1678] mlxsw: spectrum: Reserved field in mbox profile
 shouldn't be set

There is no need to set some of the fields within 'mbox_config_profile',
since they are reserved and capability mask should be set to zero.

Signed-off-by: Tal Bar <talb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 7884e8a2de35..a120602bca26 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3779,12 +3779,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 }
 
 static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
-	.used_max_vepa_channels		= 1,
-	.max_vepa_channels		= 0,
 	.used_max_mid			= 1,
 	.max_mid			= MLXSW_SP_MID_MAX,
-	.used_max_pgt			= 1,
-	.max_pgt			= 0,
 	.used_flood_tables		= 1,
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,

From 808be37ae323ed5585a3e6257ccb5b435bd1a4b9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 19 Mar 2018 09:51:02 +0200
Subject: [PATCH 1045/1678] mlxsw: spectrum_acl: Adapt ACL configuration to new
 firmware versions

The driver currently creates empty ACL groups, binds them to the
requested port and then fills them with actual ACLs that point to TCAM
regions.

However, empty ACL groups are considered invalid and upcoming firmware
versions are going to forbid their binding.

Work around this limitation by only performing the binding after the
first ACL was added to the group.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_acl.c    | 43 +++++++++++--------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 21ed27ae51e3..1c1601a43978 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -160,6 +160,13 @@ bool mlxsw_sp_acl_block_disabled(struct mlxsw_sp_acl_block *block)
 	return block->disable_count;
 }
 
+static bool
+mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
+{
+	/* We hold a reference on ruleset ourselves */
+	return ruleset->ref_count == 2;
+}
+
 static int
 mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 			  struct mlxsw_sp_acl_block *block,
@@ -341,21 +348,8 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_ht_insert;
 
-	if (!chain_index) {
-		/* We only need ruleset with chain index 0, the implicit one,
-		 * to be directly bound to device. The rest of the rulesets
-		 * are bound by "Goto action set".
-		 */
-		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block);
-		if (err)
-			goto err_ruleset_bind;
-	}
-
 	return ruleset;
 
-err_ruleset_bind:
-	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
-			       mlxsw_sp_acl_ruleset_ht_params);
 err_ht_insert:
 	ops->ruleset_del(mlxsw_sp, ruleset->priv);
 err_ops_ruleset_add:
@@ -369,12 +363,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_acl_ruleset *ruleset)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
-	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
-	u32 chain_index = ruleset->ht_key.chain_index;
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 
-	if (!chain_index)
-		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block);
 	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
 			       mlxsw_sp_acl_ruleset_ht_params);
 	ops->ruleset_del(mlxsw_sp, ruleset->priv);
@@ -688,10 +678,25 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_rhashtable_insert;
 
+	if (!ruleset->ht_key.chain_index &&
+	    mlxsw_sp_acl_ruleset_is_singular(ruleset)) {
+		/* We only need ruleset with chain index 0, the implicit
+		 * one, to be directly bound to device. The rest of the
+		 * rulesets are bound by "Goto action set".
+		 */
+		err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset,
+						      ruleset->ht_key.block);
+		if (err)
+			goto err_ruleset_block_bind;
+	}
+
 	list_add_tail(&rule->list, &mlxsw_sp->acl->rules);
 	ruleset->ht_key.block->rule_count++;
 	return 0;
 
+err_ruleset_block_bind:
+	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
+			       mlxsw_sp_acl_rule_ht_params);
 err_rhashtable_insert:
 	ops->rule_del(mlxsw_sp, rule->priv);
 	return err;
@@ -705,6 +710,10 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 
 	ruleset->ht_key.block->rule_count--;
 	list_del(&rule->list);
+	if (!ruleset->ht_key.chain_index &&
+	    mlxsw_sp_acl_ruleset_is_singular(ruleset))
+		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
+						  ruleset->ht_key.block);
 	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
 			       mlxsw_sp_acl_rule_ht_params);
 	ops->rule_del(mlxsw_sp, rule->priv);

From 04719507b756e5eb38282c33162a9c89beb6f440 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 19 Mar 2018 09:51:03 +0200
Subject: [PATCH 1046/1678] mlxsw: spectrum_acl: Do not invalidate already
 invalid ACL groups

When a new ACL group is created its region (ACL) list is initially
empty. Thus, the call to mlxsw_sp_acl_tcam_group_update() would
basically invalidate an already invalid (non-existent) group.

Remove the unnecessary call and make the function symmetric to its del()
counterpart.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index c6e180c2be1e..ad1b548e3cac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -228,10 +228,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		return err;
 
-	err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group);
-	if (err)
-		goto err_group_update;
-
 	err = rhashtable_init(&group->chunk_ht,
 			      &mlxsw_sp_acl_tcam_chunk_ht_params);
 	if (err)
@@ -240,7 +236,6 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_rhashtable_init:
-err_group_update:
 	mlxsw_sp_acl_tcam_group_id_put(tcam, group->id);
 	return err;
 }

From 5adc1668ddc42bb44fd6d006cacad74ed0cbf49d Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 4 Mar 2018 09:28:53 +0100
Subject: [PATCH 1047/1678] netfilter: ebtables: add support for matching ICMP
 type and code

We already have ICMPv6 type/code matches. This adds support for IPv4 ICMP
matches in the same way.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebt_ip.h | 13 ++++--
 net/bridge/netfilter/ebt_ip.c                | 43 +++++++++++++++-----
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h
index 8e462fb1983f..4ed7fbb0a482 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_ip.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h
@@ -24,8 +24,9 @@
 #define EBT_IP_PROTO 0x08
 #define EBT_IP_SPORT 0x10
 #define EBT_IP_DPORT 0x20
+#define EBT_IP_ICMP 0x40
 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\
- EBT_IP_SPORT | EBT_IP_DPORT )
+		     EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP)
 #define EBT_IP_MATCH "ip"
 
 /* the same values are used for the invflags */
@@ -38,8 +39,14 @@ struct ebt_ip_info {
 	__u8  protocol;
 	__u8  bitmask;
 	__u8  invflags;
-	__u16 sport[2];
-	__u16 dport[2];
+	union {
+		__u16 sport[2];
+		__u8 icmp_type[2];
+	};
+	union {
+		__u16 dport[2];
+		__u8 icmp_code[2];
+	};
 };
 
 #endif
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 2b46c50abce0..8cb8f8395768 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -19,9 +19,15 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ip.h>
 
-struct tcpudphdr {
-	__be16 src;
-	__be16 dst;
+union pkthdr {
+	struct {
+		__be16 src;
+		__be16 dst;
+	} tcpudphdr;
+	struct {
+		u8 type;
+		u8 code;
+	} icmphdr;
 };
 
 static bool
@@ -30,8 +36,8 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
 	struct iphdr _iph;
-	const struct tcpudphdr *pptr;
-	struct tcpudphdr _ports;
+	const union pkthdr *pptr;
+	union pkthdr _pkthdr;
 
 	ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
 	if (ih == NULL)
@@ -50,29 +56,38 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (info->bitmask & EBT_IP_PROTO) {
 		if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
 			return false;
-		if (!(info->bitmask & EBT_IP_DPORT) &&
-		    !(info->bitmask & EBT_IP_SPORT))
+		if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT |
+				       EBT_IP_ICMP)))
 			return true;
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			return false;
+
+		/* min icmp headersize is 4, so sizeof(_pkthdr) is ok. */
 		pptr = skb_header_pointer(skb, ih->ihl*4,
-					  sizeof(_ports), &_ports);
+					  sizeof(_pkthdr), &_pkthdr);
 		if (pptr == NULL)
 			return false;
 		if (info->bitmask & EBT_IP_DPORT) {
-			u32 dst = ntohs(pptr->dst);
+			u32 dst = ntohs(pptr->tcpudphdr.dst);
 			if (NF_INVF(info, EBT_IP_DPORT,
 				    dst < info->dport[0] ||
 				    dst > info->dport[1]))
 				return false;
 		}
 		if (info->bitmask & EBT_IP_SPORT) {
-			u32 src = ntohs(pptr->src);
+			u32 src = ntohs(pptr->tcpudphdr.src);
 			if (NF_INVF(info, EBT_IP_SPORT,
 				    src < info->sport[0] ||
 				    src > info->sport[1]))
 				return false;
 		}
+		if ((info->bitmask & EBT_IP_ICMP) &&
+		    NF_INVF(info, EBT_IP_ICMP,
+			    pptr->icmphdr.type < info->icmp_type[0] ||
+			    pptr->icmphdr.type > info->icmp_type[1] ||
+			    pptr->icmphdr.code < info->icmp_code[0] ||
+			    pptr->icmphdr.code > info->icmp_code[1]))
+			return false;
 	}
 	return true;
 }
@@ -101,6 +116,14 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 	if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
 		return -EINVAL;
+	if (info->bitmask & EBT_IP_ICMP) {
+		if ((info->invflags & EBT_IP_PROTO) ||
+		    info->protocol != IPPROTO_ICMP)
+			return -EINVAL;
+		if (info->icmp_type[0] > info->icmp_type[1] ||
+		    info->icmp_code[0] > info->icmp_code[1])
+			return -EINVAL;
+	}
 	return 0;
 }
 

From 78d9f4d49bbecd101b4e5faf19f8f70719fee2ca Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sun, 4 Mar 2018 09:28:54 +0100
Subject: [PATCH 1048/1678] netfilter: ebtables: add support for matching IGMP
 type

We already have ICMPv6 type/code matches (which can be used to distinguish
different types of MLD packets). Add support for IPv4 IGMP matches in the
same way.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebt_ip.h |  4 +++-
 net/bridge/netfilter/ebt_ip.c                | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h
index 4ed7fbb0a482..46d6261370b0 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_ip.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_ip.h
@@ -25,8 +25,9 @@
 #define EBT_IP_SPORT 0x10
 #define EBT_IP_DPORT 0x20
 #define EBT_IP_ICMP 0x40
+#define EBT_IP_IGMP 0x80
 #define EBT_IP_MASK (EBT_IP_SOURCE | EBT_IP_DEST | EBT_IP_TOS | EBT_IP_PROTO |\
-		     EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP)
+		     EBT_IP_SPORT | EBT_IP_DPORT | EBT_IP_ICMP | EBT_IP_IGMP)
 #define EBT_IP_MATCH "ip"
 
 /* the same values are used for the invflags */
@@ -42,6 +43,7 @@ struct ebt_ip_info {
 	union {
 		__u16 sport[2];
 		__u8 icmp_type[2];
+		__u8 igmp_type[2];
 	};
 	union {
 		__u16 dport[2];
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 8cb8f8395768..ffaa8ce2e724 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -28,6 +28,9 @@ union pkthdr {
 		u8 type;
 		u8 code;
 	} icmphdr;
+	struct {
+		u8 type;
+	} igmphdr;
 };
 
 static bool
@@ -57,12 +60,12 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
 			return false;
 		if (!(info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT |
-				       EBT_IP_ICMP)))
+				       EBT_IP_ICMP | EBT_IP_IGMP)))
 			return true;
 		if (ntohs(ih->frag_off) & IP_OFFSET)
 			return false;
 
-		/* min icmp headersize is 4, so sizeof(_pkthdr) is ok. */
+		/* min icmp/igmp headersize is 4, so sizeof(_pkthdr) is ok. */
 		pptr = skb_header_pointer(skb, ih->ihl*4,
 					  sizeof(_pkthdr), &_pkthdr);
 		if (pptr == NULL)
@@ -88,6 +91,11 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			    pptr->icmphdr.code < info->icmp_code[0] ||
 			    pptr->icmphdr.code > info->icmp_code[1]))
 			return false;
+		if ((info->bitmask & EBT_IP_IGMP) &&
+		    NF_INVF(info, EBT_IP_IGMP,
+			    pptr->igmphdr.type < info->igmp_type[0] ||
+			    pptr->igmphdr.type > info->igmp_type[1]))
+			return false;
 	}
 	return true;
 }
@@ -124,6 +132,13 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 		    info->icmp_code[0] > info->icmp_code[1])
 			return -EINVAL;
 	}
+	if (info->bitmask & EBT_IP_IGMP) {
+		if ((info->invflags & EBT_IP_PROTO) ||
+		    info->protocol != IPPROTO_IGMP)
+			return -EINVAL;
+		if (info->igmp_type[0] > info->igmp_type[1])
+			return -EINVAL;
+	}
 	return 0;
 }
 

From 79a68b2631d8ec3e149081b1ecfb23509c040b4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 20 Mar 2018 10:44:40 +0100
Subject: [PATCH 1049/1678] net: dsa: mv88e6xxx: Fix name of switch 88E6141
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The switch name is emitted in the kernel log, so having the right name
there is nice.

Fixes: 1558727a1c1b ("net: dsa: mv88e6xxx: Add support for ethernet switch 88E6141")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index fe46b40195fa..6439f7d6c4d6 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3427,7 +3427,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 	[MV88E6141] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141,
 		.family = MV88E6XXX_FAMILY_6341,
-		.name = "Marvell 88E6341",
+		.name = "Marvell 88E6141",
 		.num_databases = 4096,
 		.num_ports = 6,
 		.num_internal_phys = 5,

From a708767e40ec82079f6d2b1f01acedc352fb5d57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 20 Mar 2018 10:44:41 +0100
Subject: [PATCH 1050/1678] net: dsa: mv88e6xxx: Fix typo in a comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 6439f7d6c4d6..fd78378ad6b1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -4175,7 +4175,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 	}
 
 	/* Has to be performed before the MDIO bus is created, because
-	 * the PHYs will link there interrupts to these interrupt
+	 * the PHYs will link their interrupts to these interrupt
 	 * controllers
 	 */
 	mutex_lock(&chip->reg_lock);

From 36d6ea94b07120bfa3c7db7772175037d9799d5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 20 Mar 2018 10:44:42 +0100
Subject: [PATCH 1051/1678] net: dsa: mv88e6xxx: Fix interrupt name for g2 irq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This changes the respective line in /proc/interrupts from

 49:          x          x  mv88e6xxx-g1   7 Edge      mv88e6xxx-g1

to

 49:          x          x  mv88e6xxx-g1   7 Edge      mv88e6xxx-g2

which makes more sense.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 6c620974fef3..0ce627fded48 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -1090,7 +1090,7 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 
 	err = request_threaded_irq(chip->device_irq, NULL,
 				   mv88e6xxx_g2_irq_thread_fn,
-				   IRQF_ONESHOT, "mv88e6xxx-g1", chip);
+				   IRQF_ONESHOT, "mv88e6xxx-g2", chip);
 	if (err)
 		goto out;
 

From 78262f4575c29f185947fe58952cd1beabc74f82 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 20 Mar 2018 00:21:15 +0100
Subject: [PATCH 1052/1678] bpf, doc: add description wrt native/bpf clang
 target and pointer size

As this recently came up on netdev [0], lets add it to the BPF devel doc.

  [0] https://www.spinics.net/lists/netdev/msg489612.html

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/bpf_devel_QA.txt | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index 84cbb302f2b5..1a0b704e1a38 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -539,6 +539,18 @@ A: Although LLVM IR generation and optimization try to stay architecture
        The clang option "-fno-jump-tables" can be used to disable
        switch table generation.
 
+     - For clang -target bpf, it is guaranteed that pointer or long /
+       unsigned long types will always have a width of 64 bit, no matter
+       whether underlying clang binary or default target (or kernel) is
+       32 bit. However, when native clang target is used, then it will
+       compile these types based on the underlying architecture's conventions,
+       meaning in case of 32 bit architecture, pointer or long / unsigned
+       long types e.g. in BPF context structure will have width of 32 bit
+       while the BPF LLVM back end still operates in 64 bit. The native
+       target is mostly needed in tracing for the case of walking pt_regs
+       or other kernel structures where CPU's register width matters.
+       Otherwise, clang -target bpf is generally recommended.
+
    You should use default target when:
 
      - Your program includes a header file, e.g., ptrace.h, which eventually

From 796e1112176ada7ebc084491458dfbfbe3a193b6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 21 Mar 2018 09:07:02 +0100
Subject: [PATCH 1053/1678] mac80211_hwsim: fix secondary MAC address
 assignment

OR'ing in 0x40 before a memcpy() to overwrite the value doesn't
do much good - flip the order of operations are reported and
tested by Jouni.

Fixes: cb1a5bae5684 ("mac80211_hwsim: add permanent mac address option for new radios")
Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 7b6c3640a94f..930ddef91093 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2584,8 +2584,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 		addr[4] = idx;
 		memcpy(data->addresses[0].addr, addr, ETH_ALEN);
 		/* Why need here second address ? */
-		data->addresses[1].addr[0] |= 0x40;
 		memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+		data->addresses[1].addr[0] |= 0x40;
 		hw->wiphy->n_addresses = 2;
 		hw->wiphy->addresses = data->addresses;
 		/* possible address clash is checked at hash table insertion */

From 8cfd36a0b53aeb4ec21d81eb79706697b84dfc3d Mon Sep 17 00:00:00 2001
From: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Date: Wed, 7 Mar 2018 18:11:07 +0100
Subject: [PATCH 1054/1678] mac80211_hwsim: fix use-after-free bug in
 hwsim_exit_net

When destroying a net namespace, all hwsim interfaces, which are not
created in default namespace are deleted. But the async deletion of the
interfaces could last longer than the actual destruction of the
namespace, which results to an use after free bug. Therefore use
synchronous deletion in this case.

Fixes: 100cb9ff40e0 ("mac80211_hwsim: Allow managing radios from non-initial namespaces")
Reported-by: syzbot+70ce058e01259de7bb1d@syzkaller.appspotmail.com
Signed-off-by: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 930ddef91093..d9527c7b50d4 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3528,8 +3528,12 @@ static void __net_exit hwsim_exit_net(struct net *net)
 		list_del(&data->list);
 		rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
 				       hwsim_rht_params);
-		INIT_WORK(&data->destroy_work, destroy_radio);
-		queue_work(hwsim_wq, &data->destroy_work);
+		hwsim_radios_generation++;
+		spin_unlock_bh(&hwsim_radio_lock);
+		mac80211_hwsim_del_radio(data,
+					 wiphy_name(data->hw->wiphy),
+					 NULL);
+		spin_lock_bh(&hwsim_radio_lock);
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
 

From 1ad22fb5bb53ce6bf5377f25529c0a007c61c6f5 Mon Sep 17 00:00:00 2001
From: Tosoni <jp.tosoni@acksys.fr>
Date: Wed, 14 Mar 2018 16:58:34 +0000
Subject: [PATCH 1055/1678] mac80211: inform wireless layer when frame RSSI is
 invalid

When the low-level driver returns an invalid RSSI indication,
set the signal value to 0 as an indication to the upper layer.

Also, skip average level computation if signal is invalid.

Signed-off-by: Jean Pierre TOSONI <jp.tosoni@acksys.fr>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 160 ++++++++++++++++++++++++--------------------
 net/mac80211/rx.c   |   6 +-
 net/mac80211/scan.c |   4 +-
 3 files changed, 93 insertions(+), 77 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0024eff9bb84..ea3ba03fb952 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3306,82 +3306,14 @@ static const u64 care_about_ies =
 	(1ULL << WLAN_EID_HT_OPERATION) |
 	(1ULL << WLAN_EID_EXT_CHANSWITCH_ANN);
 
-static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_mgmt *mgmt, size_t len,
-				     struct ieee80211_rx_status *rx_status)
+static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata,
+					struct ieee80211_if_managed *ifmgd,
+					struct ieee80211_bss_conf *bss_conf,
+					struct ieee80211_local *local,
+					struct ieee80211_rx_status *rx_status)
 {
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
-	size_t baselen;
-	struct ieee802_11_elems elems;
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_chanctx_conf *chanctx_conf;
-	struct ieee80211_channel *chan;
-	struct sta_info *sta;
-	u32 changed = 0;
-	bool erp_valid;
-	u8 erp_value = 0;
-	u32 ncrc;
-	u8 *bssid;
-	u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
-
-	sdata_assert_lock(sdata);
-
-	/* Process beacon from the current BSS */
-	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
-	if (baselen > len)
-		return;
-
-	rcu_read_lock();
-	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-	if (!chanctx_conf) {
-		rcu_read_unlock();
-		return;
-	}
-
-	if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
-		rcu_read_unlock();
-		return;
-	}
-	chan = chanctx_conf->def.chan;
-	rcu_read_unlock();
-
-	if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
-	    ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
-		ieee802_11_parse_elems(mgmt->u.beacon.variable,
-				       len - baselen, false, &elems);
-
-		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
-		if (elems.tim && !elems.parse_error) {
-			const struct ieee80211_tim_ie *tim_ie = elems.tim;
-			ifmgd->dtim_period = tim_ie->dtim_period;
-		}
-		ifmgd->have_beacon = true;
-		ifmgd->assoc_data->need_beacon = false;
-		if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
-			sdata->vif.bss_conf.sync_tsf =
-				le64_to_cpu(mgmt->u.beacon.timestamp);
-			sdata->vif.bss_conf.sync_device_ts =
-				rx_status->device_timestamp;
-			if (elems.tim)
-				sdata->vif.bss_conf.sync_dtim_count =
-					elems.tim->dtim_count;
-			else
-				sdata->vif.bss_conf.sync_dtim_count = 0;
-		}
-		/* continue assoc process */
-		ifmgd->assoc_data->timeout = jiffies;
-		ifmgd->assoc_data->timeout_started = true;
-		run_again(sdata, ifmgd->assoc_data->timeout);
-		return;
-	}
-
-	if (!ifmgd->associated ||
-	    !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
-		return;
-	bssid = ifmgd->associated->bssid;
-
 	/* Track average RSSI from the Beacon frames of the current AP */
+
 	if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
 		ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
 		ewma_beacon_signal_init(&ifmgd->ave_beacon_signal);
@@ -3468,6 +3400,86 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				sig, GFP_KERNEL);
 		}
 	}
+}
+
+static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
+				     struct ieee80211_mgmt *mgmt, size_t len,
+				     struct ieee80211_rx_status *rx_status)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
+	size_t baselen;
+	struct ieee802_11_elems elems;
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx_conf *chanctx_conf;
+	struct ieee80211_channel *chan;
+	struct sta_info *sta;
+	u32 changed = 0;
+	bool erp_valid;
+	u8 erp_value = 0;
+	u32 ncrc;
+	u8 *bssid;
+	u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
+
+	sdata_assert_lock(sdata);
+
+	/* Process beacon from the current BSS */
+	baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
+	if (baselen > len)
+		return;
+
+	rcu_read_lock();
+	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+	if (!chanctx_conf) {
+		rcu_read_unlock();
+		return;
+	}
+
+	if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
+		rcu_read_unlock();
+		return;
+	}
+	chan = chanctx_conf->def.chan;
+	rcu_read_unlock();
+
+	if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
+	    ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
+		ieee802_11_parse_elems(mgmt->u.beacon.variable,
+				       len - baselen, false, &elems);
+
+		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+		if (elems.tim && !elems.parse_error) {
+			const struct ieee80211_tim_ie *tim_ie = elems.tim;
+			ifmgd->dtim_period = tim_ie->dtim_period;
+		}
+		ifmgd->have_beacon = true;
+		ifmgd->assoc_data->need_beacon = false;
+		if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+			sdata->vif.bss_conf.sync_tsf =
+				le64_to_cpu(mgmt->u.beacon.timestamp);
+			sdata->vif.bss_conf.sync_device_ts =
+				rx_status->device_timestamp;
+			if (elems.tim)
+				sdata->vif.bss_conf.sync_dtim_count =
+					elems.tim->dtim_count;
+			else
+				sdata->vif.bss_conf.sync_dtim_count = 0;
+		}
+		/* continue assoc process */
+		ifmgd->assoc_data->timeout = jiffies;
+		ifmgd->assoc_data->timeout_started = true;
+		run_again(sdata, ifmgd->assoc_data->timeout);
+		return;
+	}
+
+	if (!ifmgd->associated ||
+	    !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+		return;
+	bssid = ifmgd->associated->bssid;
+
+	if (!(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL))
+		ieee80211_handle_beacon_sig(sdata, ifmgd, bss_conf,
+					    local, rx_status);
 
 	if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) {
 		mlme_dbg_ratelimited(sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9c898a3688c6..27bb1f0b5e52 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2804,7 +2804,8 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
 	    !(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
 		int sig = 0;
 
-		if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
+		if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
+		    !(status->flag & RX_FLAG_NO_SIGNAL_VAL))
 			sig = status->signal;
 
 		cfg80211_report_obss_beacon(rx->local->hw.wiphy,
@@ -3145,7 +3146,8 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
 	 * it transmitted were processed or returned.
 	 */
 
-	if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM))
+	if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
+	    !(status->flag & RX_FLAG_NO_SIGNAL_VAL))
 		sig = status->signal;
 
 	if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index ef2becaade50..a3b1bcc2b461 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -73,7 +73,9 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	bool signal_valid;
 	struct ieee80211_sub_if_data *scan_sdata;
 
-	if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
+	if (rx_status->flag & RX_FLAG_NO_SIGNAL_VAL)
+		bss_meta.signal = 0; /* invalid signal indication */
+	else if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
 		bss_meta.signal = rx_status->signal * 100;
 	else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
 		bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;

From 2cb021f5de55b1d158fa18b0215a4613c3289a82 Mon Sep 17 00:00:00 2001
From: Dmitry Lebed <dlebed@quantenna.com>
Date: Thu, 1 Mar 2018 12:39:16 +0300
Subject: [PATCH 1056/1678] cfg80211/nl80211: add CAC_STARTED event

CAC_STARTED event is needed for DFS offload feature and
should be generated by driver/HW if DFS_OFFLOAD is enabled.

Signed-off-by: Dmitry Lebed <dlebed@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/mlme.c          | 7 +++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c13c84304be3..b8e989073cbb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5204,6 +5204,8 @@ enum nl80211_smps_mode {
  *	non-operating channel is expired and no longer valid. New CAC must
  *	be done on this channel before starting the operation. This is not
  *	applicable for ETSI dfs domain where pre-CAC is valid for ever.
+ * @NL80211_RADAR_CAC_STARTED: Channel Availability Check has been started,
+ *	should be generated by HW if NL80211_EXT_FEATURE_DFS_OFFLOAD is enabled.
  */
 enum nl80211_radar_event {
 	NL80211_RADAR_DETECTED,
@@ -5211,6 +5213,7 @@ enum nl80211_radar_event {
 	NL80211_RADAR_CAC_ABORTED,
 	NL80211_RADAR_NOP_FINISHED,
 	NL80211_RADAR_PRE_CAC_EXPIRED,
+	NL80211_RADAR_CAC_STARTED,
 };
 
 /**
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index bbb9907bfa86..6b6818dd76bd 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -888,14 +888,17 @@ void cfg80211_cac_event(struct net_device *netdev,
 		       sizeof(struct cfg80211_chan_def));
 		queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk);
 		cfg80211_sched_dfs_chan_update(rdev);
-		break;
+		/* fall through */
 	case NL80211_RADAR_CAC_ABORTED:
+		wdev->cac_started = false;
+		break;
+	case NL80211_RADAR_CAC_STARTED:
+		wdev->cac_started = true;
 		break;
 	default:
 		WARN_ON(1);
 		return;
 	}
-	wdev->cac_started = false;
 
 	nl80211_radar_notify(rdev, chandef, event, netdev, gfp);
 }

From 13cf6dec93e021ebd297619ea1926aea31b6430b Mon Sep 17 00:00:00 2001
From: Dmitry Lebed <dlebed@quantenna.com>
Date: Thu, 1 Mar 2018 12:39:15 +0300
Subject: [PATCH 1057/1678] cfg80211/nl80211: add DFS offload flag

Add wiphy EXT_FEATURE flag to indicate that HW or driver does
all DFS actions by itself.
User-space functionality already implemented in hostapd using
vendor-specific (QCA) OUI to advertise DFS offload support.
Need to introduce generic flag to inform about DFS offload support.
For devices with DFS_OFFLOAD flag set user-space will no longer
need to issue CAC or do any actions in response to
"radar detected" events. HW will do everything by itself and send
events to user-space to indicate that CAC was started/finished, etc.

Signed-off-by: Dmitrii Lebed <dlebed@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  7 +++++++
 net/wireless/nl80211.c       | 12 ++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b8e989073cbb..60fefc5a2ea4 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4999,6 +4999,12 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_LOW_SPAN_SCAN: Driver supports low span scan.
  * @NL80211_EXT_FEATURE_LOW_POWER_SCAN: Driver supports low power scan.
  * @NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN: Driver supports high accuracy scan.
+ * @NL80211_EXT_FEATURE_DFS_OFFLOAD: HW/driver will offload DFS actions.
+ *	Device or driver will do all DFS-related actions by itself,
+ *	informing user-space about CAC progress, radar detection event,
+ *	channel change triggered by radar detection event.
+ *	No need to start CAC from user-space, no need to react to
+ *	"radar detected" event.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5029,6 +5035,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_LOW_SPAN_SCAN,
 	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
 	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
+	NL80211_EXT_FEATURE_DFS_OFFLOAD,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a910150f8169..fe27ab443d01 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7551,12 +7551,13 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_chan_def chandef;
 	enum nl80211_dfs_regions dfs_region;
 	unsigned int cac_time_ms;
 	int err;
 
-	dfs_region = reg_get_dfs_region(wdev->wiphy);
+	dfs_region = reg_get_dfs_region(wiphy);
 	if (dfs_region == NL80211_DFS_UNSET)
 		return -EINVAL;
 
@@ -7570,17 +7571,20 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
 	if (wdev->cac_started)
 		return -EBUSY;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef,
-					    wdev->iftype);
+	err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype);
 	if (err < 0)
 		return err;
 
 	if (err == 0)
 		return -EINVAL;
 
-	if (!cfg80211_chandef_dfs_usable(wdev->wiphy, &chandef))
+	if (!cfg80211_chandef_dfs_usable(wiphy, &chandef))
 		return -EINVAL;
 
+	/* CAC start is offloaded to HW and can't be started manually */
+	if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD))
+		return -EOPNOTSUPP;
+
 	if (!rdev->ops->start_radar_detection)
 		return -EOPNOTSUPP;
 

From 1ae7762760736d4f7e4ea43e9ed03a608685c3d9 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Mar 2018 14:39:05 +0300
Subject: [PATCH 1058/1678] net: Convert can_pernet_ops

These pernet_operations create and destroy /proc entries
and cancel per-net timer.

Also, there are unneed iterations over empty list of net
devices, since all net devices must be already moved
to init_net or unregistered by default_device_ops. This
already was mentioned here:

https://marc.info/?l=linux-can&m=150169589119335&w=2

So, it looks safe to make them async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/af_can.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6da324550eec..e899970398a1 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -954,6 +954,7 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
 static struct pernet_operations can_pernet_ops __read_mostly = {
 	.init = can_pernet_init,
 	.exit = can_pernet_exit,
+	.async = true,
 };
 
 static __init int can_init(void)

From 08012631d6277cebe388971cd84fe14d1cc49a00 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Mar 2018 14:45:37 +0300
Subject: [PATCH 1059/1678] net: Convert lowpan_frags_ops

These pernet_operations register and unregister sysctl.
Also, there is inet_frags_exit_net() called in exit method,
which has to be safe after a560002437d3 "net: Fix hlist
corruptions in inet_evict_bucket()".

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan/reassembly.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 85bf86ad6b18..a9ccb1322f69 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -603,6 +603,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
 static struct pernet_operations lowpan_frags_ops = {
 	.init = lowpan_frags_init_net,
 	.exit = lowpan_frags_exit_net,
+	.async = true,
 };
 
 int __init lowpan_net_frag_init(void)

From aa65f636540539e2e1fd77bdcd8fc7060d19d47b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 19 Mar 2018 14:45:46 +0300
Subject: [PATCH 1060/1678] net: Convert nf_ct_net_ops

These pernet_operations register and unregister sysctl.
Also, there is inet_frags_exit_net() called in exit method,
which has to be safe after a560002437d3 "net: Fix hlist
corruptions in inet_evict_bucket()".

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..34136fe80ed5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -646,6 +646,7 @@ static void nf_ct_net_exit(struct net *net)
 static struct pernet_operations nf_ct_net_ops = {
 	.init = nf_ct_net_init,
 	.exit = nf_ct_net_exit,
+	.async = true,
 };
 
 int nf_ct_frag6_init(void)

From 94e5e3087a67c765be98592b36d8d187566478d5 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Mon, 19 Mar 2018 13:17:30 +0100
Subject: [PATCH 1061/1678] net: add uevent socket member

This commit adds struct uevent_sock to struct net. Since struct uevent_sock
records the position of the uevent socket in the uevent socket list we can
trivially remove it from the uevent socket list during cleanup. This speeds
up the old removal codepath.
Note, list_del() will hit __list_del_entry_valid() in its call chain which
will validate that the element is a member of the list. If it isn't it will
take care that the list is not modified.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  4 +++-
 lib/kobject_uevent.c        | 17 +++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 71abc8d79178..09e30bdc7876 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -40,7 +40,7 @@ struct net_device;
 struct sock;
 struct ctl_table_header;
 struct net_generic;
-struct sock;
+struct uevent_sock;
 struct netns_ipvs;
 
 
@@ -83,6 +83,8 @@ struct net {
 	struct sock 		*rtnl;			/* rtnetlink socket */
 	struct sock		*genl_sock;
 
+	struct uevent_sock	*uevent_sock;		/* uevent socket */
+
 	struct list_head 	dev_base_head;
 	struct hlist_head 	*dev_name_head;
 	struct hlist_head	*dev_index_head;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9539d7ab3ea8..54cfbaeb3a4e 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -32,11 +32,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -621,6 +623,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -629,17 +634,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 

From 692ec06d7c92af8ca841a6367648b9b3045344fd Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Mon, 19 Mar 2018 13:17:31 +0100
Subject: [PATCH 1062/1678] netns: send uevent messages

This patch adds a receive method to NETLINK_KOBJECT_UEVENT netlink sockets
to allow sending uevent messages into the network namespace the socket
belongs to.

Currently non-initial network namespaces are already isolated and don't
receive uevents. There are a number of cases where it is beneficial for a
sufficiently privileged userspace process to send a uevent into a network
namespace.

One such use case would be debugging and fuzzing of a piece of software
which listens and reacts to uevents. By running a copy of that software
inside a network namespace, specific uevents could then be presented to it.
More concretely, this would allow for easy testing of udevd/ueventd.

This will also allow some piece of software to run components inside a
separate network namespace and then effectively filter what that software
can receive. Some examples of software that do directly listen to uevents
and that we have in the past attempted to run inside a network namespace
are rbd (CEPH client) or the X server.

Implementation:
The implementation has been kept as simple as possible from the kernel's
perspective. Specifically, a simple input method uevent_net_rcv() is added
to NETLINK_KOBJECT_UEVENT sockets which completely reuses existing
af_netlink infrastructure and does neither add an additional netlink family
nor requires any user-visible changes.

For example, by using netlink_rcv_skb() we can make use of existing netlink
infrastructure to report back informative error messages to userspace.

Furthermore, this implementation does not introduce any overhead for
existing uevent generating codepaths. The struct netns got a new uevent
socket member that records the uevent socket associated with that network
namespace including its position in the uevent socket list. Since we record
the uevent socket for each network namespace in struct net we don't have to
walk the whole uevent socket list. Instead we can directly retrieve the
relevant uevent socket and send the message. At exit time we can now also
trivially remove the uevent socket from the uevent socket list. This keeps
the codepath very performant without introducing needless overhead and even
makes older codepaths faster.

Uevent sequence numbers are kept global. When a uevent message is sent to
another network namespace the implementation will simply increment the
global uevent sequence number and append it to the received uevent. This
has the advantage that the kernel will never need to parse the received
uevent message to replace any existing uevent sequence numbers. Instead it
is up to the userspace process to remove any existing uevent sequence
numbers in case the uevent message to be sent contains any.

Security:
In order for a caller to send uevent messages to a target network namespace
the caller must have CAP_SYS_ADMIN in the owning user namespace of the
target network namespace. Additionally, any received uevent message is
verified to not exceed size UEVENT_BUFFER_SIZE. This includes the space
needed to append the uevent sequence number.

Testing:
This patch has been tested and verified to work with the following udev
implementations:
1. CentOS 6 with udevd version 147
2. Debian Sid with systemd-udevd version 237
3. Android 7.1.1 with ueventd

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/kobject_uevent.c | 79 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 54cfbaeb3a4e..fa10ad8e9b17 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -604,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);

From bdf5bd7f21323493dbe5f2c723dc33f2fbb0241a Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 19 Mar 2018 06:52:48 -0700
Subject: [PATCH 1063/1678] rds: tcp: remove register_netdevice_notifier
 infrastructure.

The netns deletion path does not need to wait for all net_devices
to be unregistered before dismantling rds_tcp state for the netns
(we are able to dismantle this state on module unload even when
all net_devices are active so there is no dependency here).

This patch removes code related to netdevice notifiers and
refactors all the code needed to dismantle rds_tcp state
into a ->exit callback for the pernet_operations used with
register_pernet_device().

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 93 +++++++++++++--------------------------------------
 1 file changed, 23 insertions(+), 70 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08ea9cd5c2f6..4f3a32c38bf5 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -485,40 +485,6 @@ fail:
 	return err;
 }
 
-static void __net_exit rds_tcp_exit_net(struct net *net)
-{
-	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
-
-	if (rtn->rds_tcp_sysctl)
-		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-
-	if (net != &init_net && rtn->ctl_table)
-		kfree(rtn->ctl_table);
-
-	/* If rds_tcp_exit_net() is called as a result of netns deletion,
-	 * the rds_tcp_kill_sock() device notifier would already have cleaned
-	 * up the listen socket, thus there is no work to do in this function.
-	 *
-	 * If rds_tcp_exit_net() is called as a result of module unload,
-	 * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
-	 * we do need to clean up the listen socket here.
-	 */
-	if (rtn->rds_tcp_listen_sock) {
-		struct socket *lsock = rtn->rds_tcp_listen_sock;
-
-		rtn->rds_tcp_listen_sock = NULL;
-		rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
-	}
-}
-
-static struct pernet_operations rds_tcp_net_ops = {
-	.init = rds_tcp_init_net,
-	.exit = rds_tcp_exit_net,
-	.id = &rds_tcp_netid,
-	.size = sizeof(struct rds_tcp_net),
-	.async = true,
-};
-
 static void rds_tcp_kill_sock(struct net *net)
 {
 	struct rds_tcp_connection *tc, *_tc;
@@ -546,6 +512,27 @@ static void rds_tcp_kill_sock(struct net *net)
 		rds_conn_destroy(tc->t_cpath->cp_conn);
 }
 
+static void __net_exit rds_tcp_exit_net(struct net *net)
+{
+	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+	rds_tcp_kill_sock(net);
+
+	if (rtn->rds_tcp_sysctl)
+		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+
+	if (net != &init_net && rtn->ctl_table)
+		kfree(rtn->ctl_table);
+}
+
+static struct pernet_operations rds_tcp_net_ops = {
+	.init = rds_tcp_init_net,
+	.exit = rds_tcp_exit_net,
+	.id = &rds_tcp_netid,
+	.size = sizeof(struct rds_tcp_net),
+	.async = true,
+};
+
 void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
@@ -557,29 +544,6 @@ void *rds_tcp_listen_sock_def_readable(struct net *net)
 	return lsock->sk->sk_user_data;
 }
 
-static int rds_tcp_dev_event(struct notifier_block *this,
-			     unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-	/* rds-tcp registers as a pernet subys, so the ->exit will only
-	 * get invoked after network acitivity has quiesced. We need to
-	 * clean up all sockets  to quiesce network activity, and use
-	 * the unregistration of the per-net loopback device as a trigger
-	 * to start that cleanup.
-	 */
-	if (event == NETDEV_UNREGISTER_FINAL &&
-	    dev->ifindex == LOOPBACK_IFINDEX)
-		rds_tcp_kill_sock(dev_net(dev));
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block rds_tcp_dev_notifier = {
-	.notifier_call        = rds_tcp_dev_event,
-	.priority = -10, /* must be called after other network notifiers */
-};
-
 /* when sysctl is used to modify some kernel socket parameters,this
  * function  resets the RDS connections in that netns  so that we can
  * restart with new parameters.  The assumption is that such reset
@@ -625,9 +589,7 @@ static void rds_tcp_exit(void)
 	rds_tcp_set_unloading();
 	synchronize_rcu();
 	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-	unregister_pernet_subsys(&rds_tcp_net_ops);
-	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-		pr_warn("could not unregister rds_tcp_dev_notifier\n");
+	unregister_pernet_device(&rds_tcp_net_ops);
 	rds_tcp_destroy_conns();
 	rds_trans_unregister(&rds_tcp_transport);
 	rds_tcp_recv_exit();
@@ -651,24 +613,15 @@ static int rds_tcp_init(void)
 	if (ret)
 		goto out_slab;
 
-	ret = register_pernet_subsys(&rds_tcp_net_ops);
+	ret = register_pernet_device(&rds_tcp_net_ops);
 	if (ret)
 		goto out_recv;
 
-	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-	if (ret) {
-		pr_warn("could not register rds_tcp_dev_notifier\n");
-		goto out_pernet;
-	}
-
 	rds_trans_register(&rds_tcp_transport);
 
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
 	goto out;
-
-out_pernet:
-	unregister_pernet_subsys(&rds_tcp_net_ops);
 out_recv:
 	rds_tcp_recv_exit();
 out_slab:

From 1f7aa2bc268efe8c462f1d3f8778f9040047bc75 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Mar 2018 07:45:33 +0100
Subject: [PATCH 1064/1678] r8169: simplify rtl_set_mac_address

Replace open-coded functionality with eth_mac_addr().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 7055db161b1b..d4d2bb0c17ff 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4616,12 +4616,11 @@ static int rtl_set_mac_address(struct net_device *dev, void *p)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	struct device *d = &tp->pci_dev->dev;
-	struct sockaddr *addr = p;
+	int ret;
 
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	ret = eth_mac_addr(dev, p);
+	if (ret)
+		return ret;
 
 	pm_runtime_get_noresume(d);
 

From cb73200c59e2f0493436a9f05835be8e37056b69 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Mar 2018 07:45:35 +0100
Subject: [PATCH 1065/1678] r8169: change type of first argument in
 rtl_tx_performance_tweak

Changing the type of the first argument to struct rtl8169_private * is more
in line with the other functions in the driver and it allows to reduce the
code size.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 62 +++++++++++-----------------
 1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index d4d2bb0c17ff..c67760b70c70 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -897,9 +897,9 @@ static void rtl_unlock_work(struct rtl8169_private *tp)
 	mutex_unlock(&tp->wk.mutex);
 }
 
-static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force)
+static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force)
 {
-	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
+	pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL,
 					   PCI_EXP_DEVCTL_READRQ, force);
 }
 
@@ -5111,14 +5111,14 @@ static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | Jumbo_En1);
-	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168c_hw_jumbo_disable(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~Jumbo_En1);
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168dp_hw_jumbo_enable(struct rtl8169_private *tp)
@@ -5136,7 +5136,7 @@ static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, 0x3f);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
-	rtl_tx_performance_tweak(tp->pci_dev, PCI_EXP_DEVCTL_READRQ_512B);
+	rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_512B);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
@@ -5144,18 +5144,18 @@ static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, 0x0c);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
 	RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 }
 
 static void r8168b_0_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-	rtl_tx_performance_tweak(tp->pci_dev,
+	rtl_tx_performance_tweak(tp,
 		PCI_EXP_DEVCTL_READRQ_512B | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
 static void r8168b_0_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-	rtl_tx_performance_tweak(tp->pci_dev,
+	rtl_tx_performance_tweak(tp,
 		(0x5 << MAX_READ_REQUEST_SHIFT) | PCI_EXP_DEVCTL_NOSNOOP_EN);
 }
 
@@ -5724,14 +5724,12 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
 
 static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN) {
-		rtl_tx_performance_tweak(pdev, (0x5 << MAX_READ_REQUEST_SHIFT) |
+		rtl_tx_performance_tweak(tp, (0x5 << MAX_READ_REQUEST_SHIFT) |
 					 PCI_EXP_DEVCTL_NOSNOOP_EN);
 	}
 }
@@ -5754,7 +5752,7 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_disable_clock_request(pdev);
 
@@ -5780,22 +5778,18 @@ static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
 
 static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -5806,7 +5800,7 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
@@ -5865,7 +5859,7 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
@@ -5877,7 +5871,7 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 	rtl_csi_access_enable_1(tp);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5895,7 +5889,7 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5928,7 +5922,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_1, ARRAY_SIZE(e_info_8168e_1));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5954,7 +5948,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_2, ARRAY_SIZE(e_info_8168e_2));
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
-		rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -5986,7 +5980,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
@@ -6048,8 +6042,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC);
@@ -6059,7 +6051,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6134,7 +6126,6 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
 	int rg_saw_cnt;
 	u32 data;
 	static const struct ephy_info e_info_8168h_1[] = {
@@ -6160,7 +6151,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6231,8 +6222,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl8168ep_stop_cmac(tp);
 
 	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
@@ -6244,7 +6233,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_csi_access_enable_1(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x00, 0x01, ERIAR_EXGMAC);
 	rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC);
@@ -6494,7 +6483,6 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8102e_1[] = {
 		{ 0x01,	0, 0x6e65 },
 		{ 0x02,	0, 0x091f },
@@ -6511,7 +6499,7 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 	RTL_W8(tp, DBG_REG, FIX_NAK_1);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W8(tp, Config1,
 	       LEDS1 | LEDS0 | Speed_down | MEMMAP | IOMAP | VPD | PMEnable);
@@ -6526,11 +6514,9 @@ static void rtl_hw_start_8102e_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8102e_2(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	RTL_W8(tp, Config1, MEMMAP | IOMAP | VPD | PMEnable);
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -6593,7 +6579,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402));
 
-	rtl_tx_performance_tweak(tp->pci_dev, 0x5 << MAX_READ_REQUEST_SHIFT);
+	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
 	rtl_eri_write(tp, 0xc8, ERIAR_MASK_1111, 0x00000002, ERIAR_EXGMAC);
 	rtl_eri_write(tp, 0xe8, ERIAR_MASK_1111, 0x00000006, ERIAR_EXGMAC);

From 73c86ee38b98203fc7ebc1006e4ffe22342107c3 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Mar 2018 07:45:40 +0100
Subject: [PATCH 1066/1678] r8169: change type of argument in
 rtl_disable/enable_clock_request

Changing the argument type to struct rtl8169_private * is more in line
with the other functions in the driver and it allows to reduce the code size.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 33 ++++++++++------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index c67760b70c70..47a30eab7346 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5685,15 +5685,15 @@ static void rtl_ephy_init(struct rtl8169_private *tp, const struct ephy_info *e,
 	}
 }
 
-static void rtl_disable_clock_request(struct pci_dev *pdev)
+static void rtl_disable_clock_request(struct rtl8169_private *tp)
 {
-	pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL,
+	pcie_capability_clear_word(tp->pci_dev, PCI_EXP_LNKCTL,
 				   PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
-static void rtl_enable_clock_request(struct pci_dev *pdev)
+static void rtl_enable_clock_request(struct rtl8169_private *tp)
 {
-	pcie_capability_set_word(pdev, PCI_EXP_LNKCTL,
+	pcie_capability_set_word(tp->pci_dev, PCI_EXP_LNKCTL,
 				 PCI_EXP_LNKCTL_CLKREQ_EN);
 }
 
@@ -5745,8 +5745,6 @@ static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
 
 static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	RTL_W8(tp, Config1, RTL_R8(tp, Config1) | Speed_down);
 
 	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
@@ -5754,7 +5752,7 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
 	if (tp->dev->mtu <= ETH_DATA_LEN)
 		rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	RTL_W16(tp, CPlusCmd, RTL_R16(tp, CPlusCmd) & ~R8168_CPCMD_QUIRK_MASK);
 }
@@ -5850,11 +5848,9 @@ static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
@@ -5866,8 +5862,6 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_1(tp);
 
 	if (tp->dev->mtu <= ETH_DATA_LEN)
@@ -5875,12 +5869,11 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp)
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168d_4[] = {
 		{ 0x0b, 0x0000,	0x0048 },
 		{ 0x19, 0x0020,	0x0050 },
@@ -5895,12 +5888,11 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8168d_4, ARRAY_SIZE(e_info_8168d_4));
 
-	rtl_enable_clock_request(pdev);
+	rtl_enable_clock_request(tp);
 }
 
 static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_1[] = {
 		{ 0x00, 0x0200,	0x0100 },
 		{ 0x00, 0x0000,	0x0004 },
@@ -5926,7 +5918,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 
 	RTL_W8(tp, MaxTxPacketSize, TxPacketMax);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	/* Reset tx FIFO pointer */
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
@@ -5937,7 +5929,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
 	static const struct ephy_info e_info_8168e_2[] = {
 		{ 0x09, 0x0000,	0x0080 },
 		{ 0x19, 0x0000,	0x0224 }
@@ -5961,7 +5952,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
 	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
@@ -5976,8 +5967,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 
 static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 {
-	struct pci_dev *pdev = tp->pci_dev;
-
 	rtl_csi_access_enable_2(tp);
 
 	rtl_tx_performance_tweak(tp, 0x5 << MAX_READ_REQUEST_SHIFT);
@@ -5995,7 +5984,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 
 	RTL_W8(tp, MaxTxPacketSize, EarlySize);
 
-	rtl_disable_clock_request(pdev);
+	rtl_disable_clock_request(tp);
 
 	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);

From 1e1205b7d3e992439b812f59fe0e79bd99b8c79a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Mar 2018 07:45:42 +0100
Subject: [PATCH 1067/1678] r8169: add helper tp_to_dev

In several places struct device is referenced by using &tp->pci_dev->dev.
Add helper tp_to_dev() to improve code readability.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 37 +++++++++++++++++-----------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 47a30eab7346..630409e0337f 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -887,6 +887,11 @@ MODULE_FIRMWARE(FIRMWARE_8168H_2);
 MODULE_FIRMWARE(FIRMWARE_8107E_1);
 MODULE_FIRMWARE(FIRMWARE_8107E_2);
 
+static inline struct device *tp_to_dev(struct rtl8169_private *tp)
+{
+	return &tp->pci_dev->dev;
+}
+
 static void rtl_lock_work(struct rtl8169_private *tp)
 {
 	mutex_lock(&tp->wk.mutex);
@@ -1609,17 +1614,19 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
 static void rtl8169_check_link_status(struct net_device *dev,
 				      struct rtl8169_private *tp)
 {
+	struct device *d = tp_to_dev(tp);
+
 	if (tp->link_ok(tp)) {
 		rtl_link_chg_patch(tp);
 		/* This is to cancel a scheduled suspend if there's one. */
-		pm_request_resume(&tp->pci_dev->dev);
+		pm_request_resume(d);
 		netif_carrier_on(dev);
 		if (net_ratelimit())
 			netif_info(tp, ifup, dev, "link up\n");
 	} else {
 		netif_carrier_off(dev);
 		netif_info(tp, ifdown, dev, "link down\n");
-		pm_runtime_idle(&tp->pci_dev->dev);
+		pm_runtime_idle(d);
 	}
 }
 
@@ -1678,7 +1685,7 @@ static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
 static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	pm_runtime_get_noresume(d);
 
@@ -1781,7 +1788,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	pm_runtime_get_noresume(d);
 
@@ -1794,7 +1801,7 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 
 	rtl_unlock_work(tp);
 
-	device_set_wakeup_enable(&tp->pci_dev->dev, wol->wolopts);
+	device_set_wakeup_enable(d, wol->wolopts);
 
 	pm_runtime_put_noidle(d);
 
@@ -2234,7 +2241,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev,
 				      struct ethtool_stats *stats, u64 *data)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	struct rtl8169_counters *counters = tp->counters;
 
 	ASSERT_RTNL();
@@ -4615,7 +4622,7 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
 static int rtl_set_mac_address(struct net_device *dev, void *p)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	int ret;
 
 	ret = eth_mac_addr(dev, p);
@@ -5261,7 +5268,7 @@ static void rtl_request_uncached_firmware(struct rtl8169_private *tp)
 	if (!rtl_fw)
 		goto err_warn;
 
-	rc = request_firmware(&rtl_fw->fw, name, &tp->pci_dev->dev);
+	rc = request_firmware(&rtl_fw->fw, name, tp_to_dev(tp));
 	if (rc < 0)
 		goto err_free;
 
@@ -6692,7 +6699,7 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
 static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
 				     void **data_buff, struct RxDesc *desc)
 {
-	dma_unmap_single(&tp->pci_dev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
+	dma_unmap_single(tp_to_dev(tp), le64_to_cpu(desc->addr), rx_buf_sz,
 			 DMA_FROM_DEVICE);
 
 	kfree(*data_buff);
@@ -6727,7 +6734,7 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 {
 	void *data;
 	dma_addr_t mapping;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	struct net_device *dev = tp->dev;
 	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 
@@ -6839,7 +6846,7 @@ static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start,
 		if (len) {
 			struct sk_buff *skb = tx_skb->skb;
 
-			rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+			rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 					     tp->TxDescArray + entry);
 			if (skb) {
 				dev_consume_skb_any(skb);
@@ -6891,7 +6898,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
 	struct skb_shared_info *info = skb_shinfo(skb);
 	unsigned int cur_frag, entry;
 	struct TxDesc *uninitialized_var(txd);
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	entry = tp->cur_tx;
 	for (cur_frag = 0; cur_frag < info->nr_frags; cur_frag++) {
@@ -7123,7 +7130,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	struct rtl8169_private *tp = netdev_priv(dev);
 	unsigned int entry = tp->cur_tx % NUM_TX_DESC;
 	struct TxDesc *txd = tp->TxDescArray + entry;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 	dma_addr_t mapping;
 	u32 status, len;
 	u32 opts[2];
@@ -7287,7 +7294,7 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp)
 		 */
 		dma_rmb();
 
-		rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
+		rtl8169_unmap_tx_skb(tp_to_dev(tp), tx_skb,
 				     tp->TxDescArray + entry);
 		if (status & LastFrag) {
 			u64_stats_update_begin(&tp->tx_stats.syncp);
@@ -7348,7 +7355,7 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 					   dma_addr_t addr)
 {
 	struct sk_buff *skb;
-	struct device *d = &tp->pci_dev->dev;
+	struct device *d = tp_to_dev(tp);
 
 	data = rtl8169_align(data);
 	dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);

From a3cdaa69e4aefd5858af1c4e763b57e0d88ed31e Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Tue, 20 Mar 2018 15:41:38 +0530
Subject: [PATCH 1068/1678] cxgb4: Adds CPL support for Shared Receive Queues

- Add srq table query cpl support for srq
- Add cpl_abort_req_rss6 and cpl_abort_rpl_rss6 structs.
- Add accessors, macros to get the SRQ IDX value.

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h   | 71 +++++++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h |  2 +
 2 files changed, 73 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index d0db4427b77e..5e8f5ca8e3ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -52,6 +52,7 @@ enum {
 	CPL_L2T_WRITE_REQ     = 0x12,
 	CPL_SMT_WRITE_REQ     = 0x14,
 	CPL_TID_RELEASE       = 0x1A,
+	CPL_SRQ_TABLE_REQ     = 0x1C,
 	CPL_TX_DATA_ISO	      = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
@@ -102,6 +103,7 @@ enum {
 	CPL_FW4_MSG           = 0xC0,
 	CPL_FW4_PLD           = 0xC1,
 	CPL_FW4_ACK           = 0xC3,
+	CPL_SRQ_TABLE_RPL     = 0xCC,
 
 	CPL_RX_PHYS_DSGL      = 0xD0,
 
@@ -136,6 +138,8 @@ enum CPL_error {
 	CPL_ERR_KEEPALV_NEG_ADVICE = 37,
 	CPL_ERR_ABORT_FAILED       = 42,
 	CPL_ERR_IWARP_FLM          = 50,
+	CPL_CONTAINS_READ_RPL      = 60,
+	CPL_CONTAINS_WRITE_RPL     = 61,
 };
 
 enum {
@@ -198,6 +202,7 @@ union opcode_tid {
 /* partitioning of TID fields that also carry a queue id */
 #define TID_TID_S    0
 #define TID_TID_M    0x3fff
+#define TID_TID_V(x) ((x) << TID_TID_S)
 #define TID_TID_G(x) (((x) >> TID_TID_S) & TID_TID_M)
 
 #define TID_QID_S    14
@@ -743,6 +748,22 @@ struct cpl_abort_req_rss {
 	u8 status;
 };
 
+struct cpl_abort_req_rss6 {
+	WR_HDR;
+	union opcode_tid ot;
+	__u32 srqidx_status;
+};
+
+#define ABORT_RSS_STATUS_S    0
+#define ABORT_RSS_STATUS_M    0xff
+#define ABORT_RSS_STATUS_V(x) ((x) << ABORT_RSS_STATUS_S)
+#define ABORT_RSS_STATUS_G(x) (((x) >> ABORT_RSS_STATUS_S) & ABORT_RSS_STATUS_M)
+
+#define ABORT_RSS_SRQIDX_S    8
+#define ABORT_RSS_SRQIDX_M    0xffffff
+#define ABORT_RSS_SRQIDX_V(x) ((x) << ABORT_RSS_SRQIDX_S)
+#define ABORT_RSS_SRQIDX_G(x) (((x) >> ABORT_RSS_SRQIDX_S) & ABORT_RSS_SRQIDX_M)
+
 struct cpl_abort_req {
 	WR_HDR;
 	union opcode_tid ot;
@@ -758,6 +779,11 @@ struct cpl_abort_rpl_rss {
 	u8 status;
 };
 
+struct cpl_abort_rpl_rss6 {
+	union opcode_tid ot;
+	__u32 srqidx_status;
+};
+
 struct cpl_abort_rpl {
 	WR_HDR;
 	union opcode_tid ot;
@@ -2112,4 +2138,49 @@ enum {
 	X_CPL_RX_MPS_PKT_TYPE_QFC   = 1 << 2,
 	X_CPL_RX_MPS_PKT_TYPE_PTP   = 1 << 3
 };
+
+struct cpl_srq_table_req {
+	WR_HDR;
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[2];
+	__u8 idx;
+	__be64 rsvd_pdid;
+	__be32 qlen_qbase;
+	__be16 cur_msn;
+	__be16 max_msn;
+};
+
+struct cpl_srq_table_rpl {
+	union opcode_tid ot;
+	__u8 status;
+	__u8 rsvd[2];
+	__u8 idx;
+	__be64 rsvd_pdid;
+	__be32 qlen_qbase;
+	__be16 cur_msn;
+	__be16 max_msn;
+};
+
+/* cpl_srq_table_{req,rpl}.params fields */
+#define SRQT_QLEN_S   28
+#define SRQT_QLEN_M   0xF
+#define SRQT_QLEN_V(x) ((x) << SRQT_QLEN_S)
+#define SRQT_QLEN_G(x) (((x) >> SRQT_QLEN_S) & SRQT_QLEN_M)
+
+#define SRQT_QBASE_S    0
+#define SRQT_QBASE_M   0x3FFFFFF
+#define SRQT_QBASE_V(x) ((x) << SRQT_QBASE_S)
+#define SRQT_QBASE_G(x) (((x) >> SRQT_QBASE_S) & SRQT_QBASE_M)
+
+#define SRQT_PDID_S    0
+#define SRQT_PDID_M   0xFF
+#define SRQT_PDID_V(x) ((x) << SRQT_PDID_S)
+#define SRQT_PDID_G(x) (((x) >> SRQT_PDID_S) & SRQT_PDID_M)
+
+#define SRQT_IDX_S    0
+#define SRQT_IDX_M    0xF
+#define SRQT_IDX_V(x) ((x) << SRQT_IDX_S)
+#define SRQT_IDX_G(x) (((x) >> SRQT_IDX_S) & SRQT_IDX_M)
+
 #endif  /* __T4_MSG_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index e40217a1c9e6..3b0074c0200f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1244,6 +1244,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_SQRQ_END	= 0x16,
 	FW_PARAMS_PARAM_PFVF_CQ_START	= 0x17,
 	FW_PARAMS_PARAM_PFVF_CQ_END	= 0x18,
+	FW_PARAMS_PARAM_PFVF_SRQ_START  = 0x19,
+	FW_PARAMS_PARAM_PFVF_SRQ_END    = 0x1A,
 	FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH = 0x20,
 	FW_PARAMS_PARAM_PFVF_VIID       = 0x24,
 	FW_PARAMS_PARAM_PFVF_CPMASK     = 0x25,

From e47094751ddc117c686c399af9810d57367922c9 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Tue, 20 Mar 2018 15:41:39 +0530
Subject: [PATCH 1069/1678] cxgb4: Add support to initialise/read SRQ entries

- This patch adds support to initialise srq table and read srq entries

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/Makefile |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |   2 +
 drivers/net/ethernet/chelsio/cxgb4/srq.c    | 138 ++++++++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4/srq.h    |  65 +++++++++
 4 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/srq.c
 create mode 100644 drivers/net/ethernet/chelsio/cxgb4/srq.h

diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 53b6a02c778e..bea6a059a8f1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
 cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
-	      cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
+	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
 	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
 	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a5c0a649f3c7..6ce5c0d39d59 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -960,6 +960,8 @@ struct adapter {
 
 	/* HMA */
 	struct hma_data hma;
+
+	struct srq_data *srq;
 };
 
 /* Support for "sched-class" command to allow a TX Scheduling Class to be
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.c b/drivers/net/ethernet/chelsio/cxgb4/srq.c
new file mode 100644
index 000000000000..6228a5708307
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.c
@@ -0,0 +1,138 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_msg.h"
+#include "srq.h"
+
+struct srq_data *t4_init_srq(int srq_size)
+{
+	struct srq_data *s;
+
+	s = kvzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return NULL;
+
+	s->srq_size = srq_size;
+	init_completion(&s->comp);
+	mutex_init(&s->lock);
+
+	return s;
+}
+
+/* cxgb4_get_srq_entry: read the SRQ table entry
+ * @dev: Pointer to the net_device
+ * @idx: Index to the srq
+ * @entryp: pointer to the srq entry
+ *
+ * Sends CPL_SRQ_TABLE_REQ message for the given index.
+ * Contents will be returned in CPL_SRQ_TABLE_RPL message.
+ *
+ * Returns zero if the read is successful, else a error
+ * number will be returned. Caller should not use the srq
+ * entry if the return value is non-zero.
+ *
+ *
+ */
+int cxgb4_get_srq_entry(struct net_device *dev,
+			int srq_idx, struct srq_entry *entryp)
+{
+	struct cpl_srq_table_req *req;
+	struct adapter *adap;
+	struct sk_buff *skb;
+	struct srq_data *s;
+	int rc = -ENODEV;
+
+	adap = netdev2adap(dev);
+	s = adap->srq;
+
+	if (!(adap->flags & FULL_INIT_DONE) || !s)
+		goto out;
+
+	skb = alloc_skb(sizeof(*req), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	req = (struct cpl_srq_table_req *)
+		__skb_put(skb, sizeof(*req));
+	memset(req, 0, sizeof(*req));
+	INIT_TP_WR(req, 0);
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SRQ_TABLE_REQ,
+					      TID_TID_V(srq_idx) |
+				TID_QID_V(adap->sge.fw_evtq.abs_id)));
+	req->idx = srq_idx;
+
+	mutex_lock(&s->lock);
+
+	s->entryp = entryp;
+	t4_mgmt_tx(adap, skb);
+
+	rc = wait_for_completion_timeout(&s->comp, SRQ_WAIT_TO);
+	if (rc)
+		rc = 0;
+	else /* !rc means we timed out */
+		rc = -ETIMEDOUT;
+
+	WARN_ON_ONCE(entryp->idx != srq_idx);
+	mutex_unlock(&s->lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(cxgb4_get_srq_entry);
+
+void do_srq_table_rpl(struct adapter *adap,
+		      const struct cpl_srq_table_rpl *rpl)
+{
+	unsigned int idx = TID_TID_G(GET_TID(rpl));
+	struct srq_data *s = adap->srq;
+	struct srq_entry *e;
+
+	if (unlikely(rpl->status != CPL_CONTAINS_READ_RPL)) {
+		dev_err(adap->pdev_dev,
+			"Unexpected SRQ_TABLE_RPL status %u for entry %u\n",
+				rpl->status, idx);
+		goto out;
+	}
+
+	/* Store the read entry */
+	e = s->entryp;
+	e->valid = 1;
+	e->idx = idx;
+	e->pdid = SRQT_PDID_G(be64_to_cpu(rpl->rsvd_pdid));
+	e->qlen = SRQT_QLEN_G(be32_to_cpu(rpl->qlen_qbase));
+	e->qbase = SRQT_QBASE_G(be32_to_cpu(rpl->qlen_qbase));
+	e->cur_msn = be16_to_cpu(rpl->cur_msn);
+	e->max_msn = be16_to_cpu(rpl->max_msn);
+out:
+	complete(&s->comp);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/srq.h b/drivers/net/ethernet/chelsio/cxgb4/srq.h
new file mode 100644
index 000000000000..ec85cf93865a
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/srq.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of the Chelsio T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017-2018 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SRQ_H
+#define __CXGB4_SRQ_H
+
+struct adapter;
+struct cpl_srq_table_rpl;
+
+#define SRQ_WAIT_TO	(HZ * 5)
+
+struct srq_entry {
+	u8 valid;
+	u8 idx;
+	u8 qlen;
+	u16 pdid;
+	u16 cur_msn;
+	u16 max_msn;
+	u32 qbase;
+};
+
+struct srq_data {
+	unsigned int srq_size;
+	struct srq_entry *entryp;
+	struct completion comp;
+	struct mutex lock; /* generic mutex for srq data */
+};
+
+struct srq_data *t4_init_srq(int srq_size);
+int cxgb4_get_srq_entry(struct net_device *dev,
+			int srq_idx, struct srq_entry *entryp);
+void do_srq_table_rpl(struct adapter *adap,
+		      const struct cpl_srq_table_rpl *rpl);
+#endif  /* __CXGB4_SRQ_H */

From c68644ef16103a2462c34e0691343d8985466eab Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Tue, 20 Mar 2018 15:41:40 +0530
Subject: [PATCH 1070/1678] cxgb4: Add support to query HW SRQ parameters

This patch adds support to query FW for the HW SRQ table start/end, and
advertise that for ULDs.

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   | 20 +++++++++++++++++++
 .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h    |  1 +
 2 files changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 72ec3f7dccbb..3ce496494f3c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -75,6 +75,7 @@
 #include "t4fw_api.h"
 #include "t4fw_version.h"
 #include "cxgb4_dcb.h"
+#include "srq.h"
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
@@ -586,6 +587,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 		const struct cpl_abort_rpl_rss *p = (void *)rsp;
 
 		hash_del_filter_rpl(q->adap, p);
+	} else if (opcode == CPL_SRQ_TABLE_RPL) {
+		const struct cpl_srq_table_rpl *p = (void *)rsp;
+
+		do_srq_table_rpl(q->adap, p);
 	} else
 		dev_err(q->adap->pdev_dev,
 			"unexpected CPL %#x on FW event queue\n", opcode);
@@ -4467,6 +4472,20 @@ static int adap_init0(struct adapter *adap)
 		adap->vres.pbl.start = val[4];
 		adap->vres.pbl.size = val[5] - val[4] + 1;
 
+		params[0] = FW_PARAM_PFVF(SRQ_START);
+		params[1] = FW_PARAM_PFVF(SRQ_END);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+				      params, val);
+		if (!ret) {
+			adap->vres.srq.start = val[0];
+			adap->vres.srq.size = val[1] - val[0] + 1;
+		}
+		if (adap->vres.srq.size) {
+			adap->srq = t4_init_srq(adap->vres.srq.size);
+			if (!adap->srq)
+				dev_warn(&adap->pdev->dev, "could not allocate SRQ, continuing\n");
+		}
+
 		params[0] = FW_PARAM_PFVF(SQRQ_START);
 		params[1] = FW_PARAM_PFVF(SQRQ_END);
 		params[2] = FW_PARAM_PFVF(CQ_START);
@@ -5135,6 +5154,7 @@ static void free_some_resources(struct adapter *adapter)
 
 	kvfree(adapter->smt);
 	kvfree(adapter->l2t);
+	kvfree(adapter->srq);
 	t4_cleanup_sched(adapter);
 	kvfree(adapter->tids.tid_tab);
 	cxgb4_cleanup_tc_flower(adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 788146c08151..96a69bdacb4c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -284,6 +284,7 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range iscsi;
 	struct cxgb4_range stag;
 	struct cxgb4_range rq;
+	struct cxgb4_range srq;
 	struct cxgb4_range pbl;
 	struct cxgb4_range qp;
 	struct cxgb4_range cq;

From 43db92964039d409c8e76613f607b79f6e93ee38 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Tue, 20 Mar 2018 15:41:41 +0530
Subject: [PATCH 1071/1678] cxgb4: Support firmware rdma write with immediate
 work request.

If FW supports RDMA WRITE_WITH_IMMEDATE functionality, then advertise
that
to the ULDs. This will be used by iw_cxgb4 to allow WRITE_WITH_IMMEDIATE
work requests.

Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      | 1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 6 ++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c  | 1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h  | 1 +
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   | 1 +
 5 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 6ce5c0d39d59..36110cf68595 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -390,6 +390,7 @@ struct adapter_params {
 	 * used by the Port
 	 */
 	u8 mps_bg_map[MAX_NPORTS];	/* MPS Buffer Group Map */
+	bool write_w_imm_support;       /* FW supports WRITE_WITH_IMMEDIATE */
 };
 
 /* State needed to monitor the forward progress of SGE Ingress DMA activities
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 3ce496494f3c..b31661ce2c75 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4519,6 +4519,12 @@ static int adap_init0(struct adapter *adap)
 			 "max_ordird_qp %d max_ird_adapter %d\n",
 			 adap->params.max_ordird_qp,
 			 adap->params.max_ird_adapter);
+
+		/* Enable write_with_immediate if FW supports it */
+		params[0] = FW_PARAM_DEV(RDMA_WRITE_WITH_IMM);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+				      val);
+		adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
 		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.iscsicaps) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 2d827140a475..d8748e1752be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -666,6 +666,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
 	lld->nodeid = dev_to_node(adap->pdev_dev);
 	lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
+	lld->write_w_imm_support = adap->params.write_w_imm_support;
 }
 
 static void uld_attach(struct adapter *adap, unsigned int uld)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 96a69bdacb4c..fa01a5ce21fa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -354,6 +354,7 @@ struct cxgb4_lld_info {
 	void **iscsi_ppm;		     /* iscsi page pod manager */
 	int nodeid;			     /* device numa node id */
 	bool fr_nsmr_tpte_wr_support;	     /* FW supports FR_NSMR_TPTE_WR */
+	bool write_w_imm_support;         /* FW supports WRITE_WITH_IMMEDIATE */
 };
 
 struct cxgb4_uld_info {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 3b0074c0200f..ef7cb5ceefc4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1213,6 +1213,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
 	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
+	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
 };
 
 /*

From f3910c6278f380d87f8bf8e46a8db46b2d0cff27 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Tue, 20 Mar 2018 15:41:42 +0530
Subject: [PATCH 1072/1678] cxgb4: Support firmware rdma write completion work
 request.

If FW supports RDMA WRITE_COMPLETION functionality, then advertise that
to the ULDs. This will be used by iw_cxgb4 to allow WRITE_COMPLETION
work requests.

Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      | 1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 6 ++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c  | 1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h  | 1 +
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   | 2 ++
 5 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 36110cf68595..688f95440af2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -391,6 +391,7 @@ struct adapter_params {
 	 */
 	u8 mps_bg_map[MAX_NPORTS];	/* MPS Buffer Group Map */
 	bool write_w_imm_support;       /* FW supports WRITE_WITH_IMMEDIATE */
+	bool write_cmpl_support;        /* FW supports WRITE_CMPL */
 };
 
 /* State needed to monitor the forward progress of SGE Ingress DMA activities
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index b31661ce2c75..99c9b88d6d34 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4525,6 +4525,12 @@ static int adap_init0(struct adapter *adap)
 		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
 				      val);
 		adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
+
+		/* Enable write_cmpl if FW supports it */
+		params[0] = FW_PARAM_DEV(RI_WRITE_CMPL_WR);
+		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
+				      val);
+		adap->params.write_cmpl_support = (ret == 0 && val[0] != 0);
 		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.iscsicaps) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index d8748e1752be..a95cde0fadf7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -667,6 +667,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->nodeid = dev_to_node(adap->pdev_dev);
 	lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support;
 	lld->write_w_imm_support = adap->params.write_w_imm_support;
+	lld->write_cmpl_support = adap->params.write_cmpl_support;
 }
 
 static void uld_attach(struct adapter *adap, unsigned int uld)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index fa01a5ce21fa..b0ca06edaa7c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -355,6 +355,7 @@ struct cxgb4_lld_info {
 	int nodeid;			     /* device numa node id */
 	bool fr_nsmr_tpte_wr_support;	     /* FW supports FR_NSMR_TPTE_WR */
 	bool write_w_imm_support;         /* FW supports WRITE_WITH_IMMEDIATE */
+	bool write_cmpl_support;             /* FW supports WRITE_CMPL WR */
 };
 
 struct cxgb4_uld_info {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ef7cb5ceefc4..544757f6ab3a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -101,6 +101,7 @@ enum fw_wr_opcodes {
 	FW_RI_BIND_MW_WR               = 0x18,
 	FW_RI_FR_NSMR_WR               = 0x19,
 	FW_RI_FR_NSMR_TPTE_WR	       = 0x20,
+	FW_RI_RDMA_WRITE_CMPL_WR       = 0x21,
 	FW_RI_INV_LSTAG_WR             = 0x1a,
 	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_PTP_TX_PKT_WR               = 0x46,
@@ -1214,6 +1215,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
 	FW_PARAMS_PARAM_DEV_HMA_SIZE	= 0x20,
 	FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
+	FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR    = 0x24,
 };
 
 /*

From 8ae797aaa845317bddfbf14f4b37514b017045a6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 20 Mar 2018 10:04:30 -0700
Subject: [PATCH 1073/1678] selftests: Add multipath tests for onlink flag

Add multipath tests for onlink flag: one test with onlink added to
both nexthops, then tests with onlink added to only 1 nexthop.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/fib-onlink-tests.sh | 98 ++++++++++++++++++-
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 06b1d7cc12cc..3991ad1a368d 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -56,7 +56,8 @@ TEST_NET6[2]=2001:db8:102
 
 # connected gateway
 CONGW[1]=169.254.1.254
-CONGW[2]=169.254.5.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
 
 # recursive gateway
 RECGW4[1]=169.254.11.254
@@ -232,6 +233,23 @@ run_ip()
 	log_test $? ${exp_rc} "${desc}"
 }
 
+run_ip_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local nh1="$3"
+	local nh2="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
 valid_onlink_ipv4()
 {
 	# - unicast connected, unicast recursive
@@ -243,13 +261,37 @@ valid_onlink_ipv4()
 
 	log_subsection "VRF ${VRF}"
 
-	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
 	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
 
 	log_subsection "VRF device, PBR table"
 
-	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[2]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
 	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+		"${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+		"${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast recursive - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+		"${CONGW[1]} dev ${NETIFS[p1]}"        \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]}"        \
+		0 "unicast connected - multipath onlink second only"
 }
 
 invalid_onlink_ipv4()
@@ -289,6 +331,21 @@ run_ip6()
 	log_test $? ${exp_rc} "${desc}"
 }
 
+run_ip6_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local opts="$3"
+	local nh1="$4"
+	local nh2="$5"
+	local exp_rc="$6"
+	local desc="$7"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
 valid_onlink_ipv6()
 {
 	# - unicast connected, unicast recursive, v4-mapped
@@ -310,6 +367,40 @@ valid_onlink_ipv6()
 	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
 	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
 	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+		"${RECGW6[1]} dev ${NETIFS[p1]}" \
+		"${RECGW6[2]} dev ${NETIFS[p3]}" \
+		0 "unicast recursive - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+		"::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+		"::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+		0 "v4-mapped - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink both nexthops"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}"        \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink second only"
 }
 
 invalid_onlink_ipv6()
@@ -355,6 +446,7 @@ run_onlink_tests()
 	log_section "IPv6 onlink"
 	log_subsection "Valid onlink commands"
 	valid_onlink_ipv6
+	log_subsection "Invalid onlink commands"
 	invalid_onlink_ipv6
 }
 

From 145307460ba9c11489807de7acd3f4c7395f60b7 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 20 Mar 2018 19:31:14 -0700
Subject: [PATCH 1074/1678] devlink: Remove top_hierarchy arg to
 devlink_resource_register

top_hierarchy arg can be determined by comparing parent_resource_id to
DEVLINK_RESOURCE_ID_PARENT_TOP so it does not need to be a separate
argument.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      | 9 ++++-----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 6 +++---
 include/net/devlink.h                               | 1 -
 net/core/devlink.c                                  | 4 +++-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index a120602bca26..83886a9df206 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3876,8 +3876,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 
 	kvd_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
-					true, kvd_size,
-					MLXSW_SP_RESOURCE_KVD,
+					kvd_size, MLXSW_SP_RESOURCE_KVD,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
 					&kvd_size_params,
 					NULL);
@@ -3886,7 +3885,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 
 	linear_size = profile->kvd_linear_size;
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR,
-					false, linear_size,
+					linear_size,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					MLXSW_SP_RESOURCE_KVD,
 					&linear_size_params,
@@ -3904,7 +3903,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 		       profile->kvd_hash_single_parts;
 	double_size = rounddown(double_size, profile->kvd_hash_granularity);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE,
-					false, double_size,
+					double_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
 					MLXSW_SP_RESOURCE_KVD,
 					&hash_double_size_params,
@@ -3914,7 +3913,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 
 	single_size = kvd_size - double_size - linear_size;
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_SINGLE,
-					false, single_size,
+					single_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 					MLXSW_SP_RESOURCE_KVD,
 					&hash_single_size_params,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 4c9bff2fa055..85503e93b93f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -459,7 +459,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 
 	mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
-					false, MLXSW_SP_KVDL_SINGLE_SIZE,
+					MLXSW_SP_KVDL_SINGLE_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					&mlxsw_sp_kvdl_single_size_params,
@@ -468,7 +468,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 		return err;
 
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
-					false, MLXSW_SP_KVDL_CHUNKS_SIZE,
+					MLXSW_SP_KVDL_CHUNKS_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					&mlxsw_sp_kvdl_chunks_size_params,
@@ -477,7 +477,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 		return err;
 
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
-					false, MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
+					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					&mlxsw_sp_kvdl_large_chunks_size_params,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index c83125ad20ff..d5b707375e48 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -406,7 +406,6 @@ extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
 
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
-			      bool top_hierarchy,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f23e5ed7c90f..d03b96f87c25 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3174,7 +3174,6 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  */
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
-			      bool top_hierarchy,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
@@ -3183,8 +3182,11 @@ int devlink_resource_register(struct devlink *devlink,
 {
 	struct devlink_resource *resource;
 	struct list_head *resource_list;
+	bool top_hierarchy;
 	int err = 0;
 
+	top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP;
+
 	mutex_lock(&devlink->lock);
 	resource = devlink_resource_find(devlink, NULL, resource_id);
 	if (resource) {

From dd72140ca9fd44c5e3ea3cb6b485f40e6751f5e9 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:20 +0800
Subject: [PATCH 1075/1678] net: hns3: reallocate tx/rx buffer after changing
 mtu

When changing the mtu, the max frame size also will be changed. The tx
buffer size and the rx buffer size to be allocated are determined by max
frame size. So when max frame size is changed, the tx buffer and rx buffer
need to be reallocated.

When the tc_num is changed, the tx buffer and rx buffer need to be
reallocated too. So calling set_mtu and buffer_alloc separately is better.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 36 +++++++++++--------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d70619b5ff15..e110c65cd91a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4772,11 +4772,9 @@ static int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 	return hclge_set_vlan_rx_offload_cfg(vport);
 }
 
-static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mtu)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_config_max_frm_size_cmd *req;
-	struct hclge_dev *hdev = vport->back;
 	struct hclge_desc desc;
 	int max_frm_size;
 	int ret;
@@ -4805,6 +4803,27 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
 	return 0;
 }
 
+static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	ret = hclge_set_mac_mtu(hdev, new_mtu);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Change mtu fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_buffer_alloc(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Allocate buffer fail, ret =%d\n", ret);
+
+	return ret;
+}
+
 static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 				    bool enable)
 {
@@ -5392,11 +5411,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
 		return ret;
 	}
-	ret = hclge_buffer_alloc(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-		return  ret;
-	}
 
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
@@ -5503,12 +5517,6 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	ret = hclge_buffer_alloc(hdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
-		return ret;
-	}
-
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
 		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);

From 1a426f8b40fca920f15558c9d2fa6efab6921002 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 21 Mar 2018 15:49:21 +0800
Subject: [PATCH 1076/1678] net: hns3: fix the VF queue reset flow error

VF queue reset flow is different from PF queue reset flow.
VF driver should stop VF queue first, then send message to PF
and PF do the reset. PF should send a response to VF after
PF complete the queue reset, VF can initialize the queue hw
after get the response.
This patch fixes the VF queue reset flow as the correct step.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 37 +++++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  1 +
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 11 ++++--
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 10 ++++-
 4 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e110c65cd91a..588f2312a034 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4926,6 +4926,43 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	}
 }
 
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
+{
+	struct hclge_dev *hdev = vport->back;
+	int reset_try_times = 0;
+	int reset_status;
+	u16 queue_gid;
+	int ret;
+
+	queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Send reset tqp cmd fail, ret = %d\n", ret);
+		return;
+	}
+
+	reset_try_times = 0;
+	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+		/* Wait for tqp hw reset */
+		msleep(20);
+		reset_status = hclge_get_reset_status(hdev, queue_gid);
+		if (reset_status)
+			break;
+	}
+
+	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
+	if (ret)
+		dev_warn(&hdev->pdev->dev,
+			 "Deassert the soft reset fail, ret = %d\n", ret);
+}
+
 static u32 hclge_get_fw_version(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 7bff6efe3e6d..edbcb739841a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -646,5 +646,6 @@ void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 4a49a6b2f4c3..cef14e73edb8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -322,14 +322,17 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 				  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
 }
 
-static void hclge_reset_vf_queue(struct hclge_vport *vport,
-				 struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
+				     struct hclge_mbx_vf_to_pf_cmd *mbx_req)
 {
 	u16 queue_id;
 
 	memcpy(&queue_id, &mbx_req->msg[2], sizeof(queue_id));
 
-	hclge_reset_tqp(&vport->nic, queue_id);
+	hclge_reset_vf_queue(vport, queue_id);
+
+	/* send response msg to VF after queue reset complete*/
+	hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
 }
 
 void hclge_mbx_handler(struct hclge_dev *hdev)
@@ -407,7 +410,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_QUEUE_RESET:
-			hclge_reset_vf_queue(vport, req);
+			hclge_mbx_reset_vf_queue(vport, req);
 			break;
 		default:
 			dev_err(&hdev->pdev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 23370258aaeb..c96cf031a0d9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -817,11 +817,17 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[2];
+	int ret;
 
 	memcpy(&msg_data[0], &queue_id, sizeof(queue_id));
 
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, 2, false,
-			     NULL, 0);
+	/* disable vf queue before send queue reset msg to PF */
+	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
+	if (ret)
+		return;
+
+	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+			     2, true, NULL, 0);
 }
 
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)

From 681ec3999b3d1548b97c077df77433b718b3bfb3 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 21 Mar 2018 15:49:22 +0800
Subject: [PATCH 1077/1678] net: hns3: fix for vlan table lost problem when
 resetting

The vlan table in hardware is clear after PF/Core/IMP/Global
reset, which will cause vlan tagged packets not being received
problem.

This patch fixes it by restoring the vlan table after reset.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 26 +++++++++++++++++++
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  3 +++
 2 files changed, 29 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 94f0b92ead38..f700ec18a91f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1404,11 +1404,15 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
 				__be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
 
+	if (!ret)
+		set_bit(vid, priv->active_vlans);
+
 	return ret;
 }
 
@@ -1416,14 +1420,32 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 				 __be16 proto, u16 vid)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret = -EIO;
 
 	if (h->ae_algo->ops->set_vlan_filter)
 		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
 
+	if (!ret)
+		clear_bit(vid, priv->active_vlans);
+
 	return ret;
 }
 
+static void hns3_restore_vlan(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	u16 vid;
+	int ret;
+
+	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
+		ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+		if (ret)
+			netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
+				    vid, ret);
+	}
+}
+
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 				u8 qos, __be16 vlan_proto)
 {
@@ -3341,6 +3363,10 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	hns3_nic_set_rx_mode(netdev);
 	hns3_recover_hw_addr(netdev);
 
+	/* Hardware table is only clear when pf resets */
+	if (!(handle->flags & HNAE3_SUPPORT_VF))
+		hns3_restore_vlan(netdev);
+
 	/* Carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index a5f4550483d9..c313780ef2a7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -10,6 +10,8 @@
 #ifndef __HNS3_ENET_H
 #define __HNS3_ENET_H
 
+#include <linux/if_vlan.h>
+
 #include "hnae3.h"
 
 extern const char hns3_driver_version[];
@@ -539,6 +541,7 @@ struct hns3_nic_priv {
 	struct notifier_block notifier_block;
 	/* Vxlan/Geneve information */
 	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 };
 
 union l3_hdr_info {

From 2f550a467895b8715e17ae9bd6da048e8fce8c92 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 21 Mar 2018 15:49:23 +0800
Subject: [PATCH 1078/1678] net: hns3: export pci table of hclge and hclgevf to
 userspace

There is no module that is dependent on hclge or hclgevf's symbol,
but hns_enet need them to provide ops for it to run. When there is
a need to auto load the hns3 driver, the auto load will fail because
hclge or hclgevf is not loaded.

Hns_enet has already exported the pci table, so this patch exports
the pci table for hclge and hclgevf module too.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c   | 2 ++
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 588f2312a034..869e98a5d829 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -55,6 +55,8 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 	{0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
+
 static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
 	"Mac    Loopback test",
 	"Serdes Loopback test",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index c96cf031a0d9..14b0e2685651 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -18,6 +18,8 @@ static const struct pci_device_id ae_algovf_pci_tbl[] = {
 	{0, }
 };
 
+MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
+
 static inline struct hclgevf_dev *hclgevf_ae_get_hdev(
 	struct hnae3_handle *handle)
 {

From d458c815e7901758651f2357153518d1eb0a95d0 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Wed, 21 Mar 2018 15:49:24 +0800
Subject: [PATCH 1079/1678] net: hns3: increase the max time for IMP handle
 command

It may need more time for IMP handle some command, such as reset.
This patch enlarges the max time for cmd timeout.

Driver will check the IMP result every us, it may break through the
loop when get the right result. So not all command need the max time.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h   | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 3fd10a6bec53..aae4abe37d5e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -12,7 +12,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
-#define HCLGE_CMDQ_TX_TIMEOUT		1000
+#define HCLGE_CMDQ_TX_TIMEOUT		30000
 
 struct hclge_dev;
 struct hclge_desc {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 2caca9317f8c..621c6cbacf76 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include "hnae3.h"
 
-#define HCLGEVF_CMDQ_TX_TIMEOUT		200
+#define HCLGEVF_CMDQ_TX_TIMEOUT		30000
 #define HCLGEVF_CMDQ_RX_INVLD_B		0
 #define HCLGEVF_CMDQ_RX_OUTVLD_B	1
 

From cd9d187b07c23df4925c3a47e06367315ba794ca Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:25 +0800
Subject: [PATCH 1080/1678] net: hns3: change GL update rate

The interrupt coalescing self-adaptive function updates the int_gl every
interrupt. The GL update rate is too faster to get a better new GL value.
This patch changes the GL update rate to every one hundred interrupts.
The GL update rate is defined by HNS3_INT_ADAPT_DOWN_START.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 ++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f700ec18a91f..e7cf7b4b40d3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -214,6 +214,7 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector,
 	/* Default: disable RL */
 	h->kinfo.int_rl_setting = 0;
 
+	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 	tqp_vector->rx_group.coal.flow_level = HNS3_FLOW_LOW;
 	tqp_vector->tx_group.coal.flow_level = HNS3_FLOW_LOW;
 }
@@ -2492,6 +2493,11 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
 	struct hns3_enet_ring_group *tx_group = &tqp_vector->tx_group;
 	bool rx_update, tx_update;
 
+	if (tqp_vector->int_adapt_down > 0) {
+		tqp_vector->int_adapt_down--;
+		return;
+	}
+
 	if (rx_group->coal.gl_adapt_enable) {
 		rx_update = hns3_get_new_int_gl(rx_group);
 		if (rx_update)
@@ -2505,6 +2511,8 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
 			hns3_set_vector_coalesce_tx_gl(tqp_vector,
 						       tx_group->coal.int_gl);
 	}
+
+	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 }
 
 static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index c313780ef2a7..2fe870b580b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -462,6 +462,8 @@ enum hns3_link_mode_bits {
 #define HNS3_INT_RL_MAX			0x00EC
 #define HNS3_INT_RL_ENABLE_MASK		0x40
 
+#define HNS3_INT_ADAPT_DOWN_START	100
+
 struct hns3_enet_coalesce {
 	u16 int_gl;
 	u8 gl_adapt_enable;

From a95e1f8666e910f43f47b5b1bec2cdf25f513f74 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:26 +0800
Subject: [PATCH 1081/1678] net: hns3: change the time interval of int_gl
 calculating

Since we change the update rate of int_gl from every interrupt to every
one hundred interrupts, the old way to get time interval by int_gl value
is not accurate. This patch calculates the time interval using the jiffies
value.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 46 ++++++++++++-------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  1 +
 2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index e7cf7b4b40d3..0b4a676999ca 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2406,15 +2406,15 @@ out:
 
 static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 {
-#define HNS3_RX_ULTRA_PACKET_RATE 40000
+	struct hns3_enet_tqp_vector *tqp_vector =
+					ring_group->ring->tqp_vector;
 	enum hns3_flow_level_range new_flow_level;
-	struct hns3_enet_tqp_vector *tqp_vector;
-	int packets_per_secs;
-	int bytes_per_usecs;
+	int packets_per_msecs;
+	int bytes_per_msecs;
+	u32 time_passed_ms;
 	u16 new_int_gl;
-	int usecs;
 
-	if (!ring_group->coal.int_gl)
+	if (!ring_group->coal.int_gl || !tqp_vector->last_jiffies)
 		return false;
 
 	if (ring_group->total_packets == 0) {
@@ -2431,33 +2431,44 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
 	 */
 	new_flow_level = ring_group->coal.flow_level;
 	new_int_gl = ring_group->coal.int_gl;
-	tqp_vector = ring_group->ring->tqp_vector;
-	usecs = (ring_group->coal.int_gl << 1);
-	bytes_per_usecs = ring_group->total_bytes / usecs;
-	/* 1000000 microseconds */
-	packets_per_secs = ring_group->total_packets * 1000000 / usecs;
+	time_passed_ms =
+		jiffies_to_msecs(jiffies - tqp_vector->last_jiffies);
+
+	if (!time_passed_ms)
+		return false;
+
+	do_div(ring_group->total_packets, time_passed_ms);
+	packets_per_msecs = ring_group->total_packets;
+
+	do_div(ring_group->total_bytes, time_passed_ms);
+	bytes_per_msecs = ring_group->total_bytes;
+
+#define HNS3_RX_LOW_BYTE_RATE 10000
+#define HNS3_RX_MID_BYTE_RATE 20000
 
 	switch (new_flow_level) {
 	case HNS3_FLOW_LOW:
-		if (bytes_per_usecs > 10)
+		if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_MID;
 		break;
 	case HNS3_FLOW_MID:
-		if (bytes_per_usecs > 20)
+		if (bytes_per_msecs > HNS3_RX_MID_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_HIGH;
-		else if (bytes_per_usecs <= 10)
+		else if (bytes_per_msecs <= HNS3_RX_LOW_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_LOW;
 		break;
 	case HNS3_FLOW_HIGH:
 	case HNS3_FLOW_ULTRA:
 	default:
-		if (bytes_per_usecs <= 20)
+		if (bytes_per_msecs <= HNS3_RX_MID_BYTE_RATE)
 			new_flow_level = HNS3_FLOW_MID;
 		break;
 	}
 
-	if ((packets_per_secs > HNS3_RX_ULTRA_PACKET_RATE) &&
-	    (&tqp_vector->rx_group == ring_group))
+#define HNS3_RX_ULTRA_PACKET_RATE 40
+
+	if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE &&
+	    &tqp_vector->rx_group == ring_group)
 		new_flow_level = HNS3_FLOW_ULTRA;
 
 	switch (new_flow_level) {
@@ -2512,6 +2523,7 @@ static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
 						       tx_group->coal.int_gl);
 	}
 
+	tqp_vector->last_jiffies = jiffies;
 	tqp_vector->int_adapt_down = HNS3_INT_ADAPT_DOWN_START;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 2fe870b580b1..39daa01f08d5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -499,6 +499,7 @@ struct hns3_enet_tqp_vector {
 
 	/* when 0 should adjust interrupt coalesce parameter */
 	u8 int_adapt_down;
+	unsigned long last_jiffies;
 } ____cacheline_internodealigned_in_smp;
 
 enum hns3_udp_tnl_type {

From 0979aa0bfdc6d873f495310a8680436a66ff9c81 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:27 +0800
Subject: [PATCH 1082/1678] net: hns3: fix for getting wrong link mode problem

Fixed link mode is returned by hns3_get_link_ksettings. It is
unreasonable.

This patch fixes it by adding some related functions to get link
mode from hardware.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  4 +
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    | 98 ++-----------------
 .../hisilicon/hns3/hns3pf/hclge_cmd.h         |  2 +
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 83 ++++++++++++++++
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  9 ++
 5 files changed, 107 insertions(+), 89 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 70441d281c27..9daa88d9041e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -411,6 +411,10 @@ struct hnae3_ae_ops {
 				 u32 *flowctrl_adv);
 	int (*set_led_id)(struct hnae3_handle *handle,
 			  enum ethtool_phys_id_state status);
+	void (*get_link_mode)(struct hnae3_handle *handle,
+			      unsigned long *supported,
+			      unsigned long *advertising);
+	void (*get_port_type)(struct hnae3_handle *handle, u8 *port_type);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 2db127c7e463..502f34761546 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -74,19 +74,6 @@ struct hns3_link_mode_mapping {
 	u32 ethtool_link_mode;
 };
 
-static const struct hns3_link_mode_mapping hns3_lm_map[] = {
-	{HNS3_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
-	{HNS3_LM_AUTONEG_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
-	{HNS3_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
-	{HNS3_LM_PAUSE_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-	{HNS3_LM_BACKPLANE_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
-	{HNS3_LM_10BASET_HALF_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT},
-	{HNS3_LM_10BASET_FULL_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT},
-	{HNS3_LM_100BASET_HALF_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT},
-	{HNS3_LM_100BASET_FULL_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT},
-	{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
-};
-
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
 {
 	struct hnae3_handle *h = hns3_get_handle(ndev);
@@ -365,24 +352,6 @@ static void hns3_self_test(struct net_device *ndev,
 		dev_open(ndev);
 }
 
-static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
-				  bool is_advertised)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hns3_lm_map); i++) {
-		if (!(caps & hns3_lm_map[i].hns3_link_mode))
-			continue;
-
-		if (is_advertised)
-			__set_bit(hns3_lm_map[i].ethtool_link_mode,
-				  cmd->link_modes.advertising);
-		else
-			__set_bit(hns3_lm_map[i].ethtool_link_mode,
-				  cmd->link_modes.supported);
-	}
-}
-
 static int hns3_get_sset_count(struct net_device *netdev, int stringset)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -594,9 +563,6 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u32 flowctrl_adv = 0;
-	u32 supported_caps;
-	u32 advertised_caps;
-	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
 	u8 link_stat;
 
 	if (!h->ae_algo || !h->ae_algo->ops)
@@ -619,62 +585,16 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	/* 2.media_type get from bios parameter block */
-	if (h->ae_algo->ops->get_media_type) {
-		h->ae_algo->ops->get_media_type(h, &media_type);
+	/* 2.get link mode and port type*/
+	if (h->ae_algo->ops->get_link_mode)
+		h->ae_algo->ops->get_link_mode(h,
+					       cmd->link_modes.supported,
+					       cmd->link_modes.advertising);
 
-		switch (media_type) {
-		case HNAE3_MEDIA_TYPE_FIBER:
-			cmd->base.port = PORT_FIBRE;
-			supported_caps = HNS3_LM_FIBRE_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT;
-
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_COPPER:
-			cmd->base.port = PORT_TP;
-			supported_caps = HNS3_LM_TP_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT |
-					 HNS3_LM_100BASET_FULL_BIT |
-					 HNS3_LM_100BASET_HALF_BIT |
-					 HNS3_LM_10BASET_FULL_BIT |
-					 HNS3_LM_10BASET_HALF_BIT;
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_BACKPLANE:
-			cmd->base.port = PORT_NONE;
-			supported_caps = HNS3_LM_BACKPLANE_BIT |
-					 HNS3_LM_PAUSE_BIT |
-					 HNS3_LM_AUTONEG_BIT |
-					 HNS3_LM_1000BASET_FULL_BIT |
-					 HNS3_LM_100BASET_FULL_BIT |
-					 HNS3_LM_100BASET_HALF_BIT |
-					 HNS3_LM_10BASET_FULL_BIT |
-					 HNS3_LM_10BASET_HALF_BIT;
-
-			advertised_caps = supported_caps;
-			break;
-		case HNAE3_MEDIA_TYPE_UNKNOWN:
-		default:
-			cmd->base.port = PORT_OTHER;
-			supported_caps = 0;
-			advertised_caps = 0;
-			break;
-		}
-
-		if (!cmd->base.autoneg)
-			advertised_caps &= ~HNS3_LM_AUTONEG_BIT;
-
-		advertised_caps &= ~HNS3_LM_PAUSE_BIT;
-
-		/* now, map driver link modes to ethtool link modes */
-		hns3_driv_to_eth_caps(supported_caps, cmd, false);
-		hns3_driv_to_eth_caps(advertised_caps, cmd, true);
-	}
+	cmd->base.port = PORT_NONE;
+	if (h->ae_algo->ops->get_port_type)
+		h->ae_algo->ops->get_port_type(h,
+					       &cmd->base.port);
 
 	/* 3.mdix_ctrl&mdix get from phy reg */
 	if (h->ae_algo->ops->get_mdix_mode)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index aae4abe37d5e..ee3cbac6dfaa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -414,6 +414,8 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_DEFAULT_SPEED_M	GENMASK(23, 16)
 #define HCLGE_CFG_RSS_SIZE_S	24
 #define HCLGE_CFG_RSS_SIZE_M	GENMASK(31, 24)
+#define HCLGE_CFG_SPEED_ABILITY_S	0
+#define HCLGE_CFG_SPEED_ABILITY_M	GENMASK(7, 0)
 
 struct hclge_cfg_param_cmd {
 	__le32 offset;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 869e98a5d829..31e90b588e92 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1026,6 +1026,45 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
 	return 0;
 }
 
+static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
+					u8 speed_ability)
+{
+	unsigned long *supported = hdev->hw.mac.supported;
+
+	if (speed_ability & HCLGE_SUPPORT_1G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_10G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_25G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_50G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+			supported);
+
+	if (speed_ability & HCLGE_SUPPORT_100G_BIT)
+		set_bit(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+			supported);
+
+	set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
+	set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+}
+
+static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability)
+{
+	u8 media_type = hdev->hw.mac.media_type;
+
+	if (media_type != HNAE3_MEDIA_TYPE_FIBER)
+		return;
+
+	hclge_parse_fiber_link_mode(hdev, speed_ability);
+}
+
 static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 {
 	struct hclge_cfg_param_cmd *req;
@@ -1074,6 +1113,10 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 
 	req = (struct hclge_cfg_param_cmd *)desc[1].data;
 	cfg->numa_node_map = __le32_to_cpu(req->param[0]);
+
+	cfg->speed_ability = hnae_get_field(__le32_to_cpu(req->param[1]),
+					    HCLGE_CFG_SPEED_ABILITY_M,
+					    HCLGE_CFG_SPEED_ABILITY_S);
 }
 
 /* hclge_get_cfg: query the static parameter from flash
@@ -1162,6 +1205,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 		return ret;
 	}
 
+	hclge_parse_link_mode(hdev, cfg.speed_ability);
+
 	if ((hdev->tc_max > HNAE3_MAX_TC) ||
 	    (hdev->tc_max < 1)) {
 		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
@@ -6061,6 +6106,42 @@ static int hclge_update_led_status(struct hclge_dev *hdev)
 					HCLGE_LED_NO_CHANGE);
 }
 
+static void hclge_get_link_mode(struct hnae3_handle *handle,
+				unsigned long *supported,
+				unsigned long *advertising)
+{
+	unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	unsigned int idx = 0;
+
+	for (; idx < size; idx++) {
+		supported[idx] = hdev->hw.mac.supported[idx];
+		advertising[idx] = hdev->hw.mac.advertising[idx];
+	}
+}
+
+static void hclge_get_port_type(struct hnae3_handle *handle,
+				u8 *port_type)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u8 media_type = hdev->hw.mac.media_type;
+
+	switch (media_type) {
+	case HNAE3_MEDIA_TYPE_FIBER:
+		*port_type = PORT_FIBRE;
+		break;
+	case HNAE3_MEDIA_TYPE_COPPER:
+		*port_type = PORT_TP;
+		break;
+	case HNAE3_MEDIA_TYPE_UNKNOWN:
+	default:
+		*port_type = PORT_OTHER;
+		break;
+	}
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -6116,6 +6197,8 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_regs_len = hclge_get_regs_len,
 	.get_regs = hclge_get_regs,
 	.set_led_id = hclge_set_led_id,
+	.get_link_mode = hclge_get_link_mode,
+	.get_port_type = hclge_get_port_type,
 };
 
 static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index edbcb739841a..8c14d10c88cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -106,6 +106,12 @@
 #define HCLGE_MAC_MIN_FRAME		64
 #define HCLGE_MAC_MAX_FRAME		9728
 
+#define HCLGE_SUPPORT_1G_BIT		BIT(0)
+#define HCLGE_SUPPORT_10G_BIT		BIT(1)
+#define HCLGE_SUPPORT_25G_BIT		BIT(2)
+#define HCLGE_SUPPORT_50G_BIT		BIT(3)
+#define HCLGE_SUPPORT_100G_BIT		BIT(4)
+
 enum HCLGE_DEV_STATE {
 	HCLGE_STATE_REINITING,
 	HCLGE_STATE_DOWN,
@@ -170,6 +176,8 @@ struct hclge_mac {
 	struct phy_device *phydev;
 	struct mii_bus *mdio_bus;
 	phy_interface_t phy_if;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
 };
 
 struct hclge_hw {
@@ -236,6 +244,7 @@ struct hclge_cfg {
 	u8 mac_addr[ETH_ALEN];
 	u8 default_speed;
 	u32 numa_node_map;
+	u8 speed_ability;
 };
 
 struct hclge_tm_info {

From 175ec96b46ef710de200c49deeb44b41390ec644 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:28 +0800
Subject: [PATCH 1083/1678] net: hns3: add get_link support to VF

This patch adds ethtool_ops.get_link support to VF.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c        | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 502f34761546..513d8d6dd1ad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1053,6 +1053,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
 	.get_channels = hns3_get_channels,
 	.get_coalesce = hns3_get_coalesce,
 	.set_coalesce = hns3_set_coalesce,
+	.get_link = hns3_get_link,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 14b0e2685651..f917a1e9783d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1468,6 +1468,13 @@ static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle,
 	*max_rss_size = hdev->rss_size_max;
 }
 
+static int hclgevf_get_status(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	return hdev->hw.mac.link;
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
@@ -1500,6 +1507,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.set_vlan_filter = hclgevf_set_vlan_filter,
 	.get_channels = hclgevf_get_channels,
 	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
+	.get_status = hclgevf_get_status,
 };
 
 static struct hnae3_ae_algo ae_algovf = {

From 4a152de95d71652b86d391298c56b4503e0b9932 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:29 +0800
Subject: [PATCH 1084/1678] net: hns3: add querying speed and duplex support to
 VF

This patch adds support for querying speed and duplex by ethtool ethX
to VF.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         |  8 +++++--
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 22 +++++++++++++++++++
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |  4 ++++
 .../hisilicon/hns3/hns3vf/hclgevf_mbx.c       |  5 +++++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index cef14e73edb8..949da0c993cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -309,16 +309,20 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 {
 	struct hclge_dev *hdev = vport->back;
 	u16 link_status;
-	u8 msg_data[2];
+	u8 msg_data[8];
 	u8 dest_vfid;
+	u16 duplex;
 
 	/* mac.link can only be 0 or 1 */
 	link_status = (u16)hdev->hw.mac.link;
+	duplex = hdev->hw.mac.duplex;
 	memcpy(&msg_data[0], &link_status, sizeof(u16));
+	memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
+	memcpy(&msg_data[6], &duplex, sizeof(u16));
 	dest_vfid = mbx_req->mbx_src_vfid;
 
 	/* send this requested info to VF */
-	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
 				  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index f917a1e9783d..906dfa3d0fc6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1475,6 +1475,27 @@ static int hclgevf_get_status(struct hnae3_handle *handle)
 	return hdev->hw.mac.link;
 }
 
+static void hclgevf_get_ksettings_an_result(struct hnae3_handle *handle,
+					    u8 *auto_neg, u32 *speed,
+					    u8 *duplex)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	if (speed)
+		*speed = hdev->hw.mac.speed;
+	if (duplex)
+		*duplex = hdev->hw.mac.duplex;
+	if (auto_neg)
+		*auto_neg = AUTONEG_DISABLE;
+}
+
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+				 u8 duplex)
+{
+	hdev->hw.mac.speed = speed;
+	hdev->hw.mac.duplex = duplex;
+}
+
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
@@ -1508,6 +1529,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_channels = hclgevf_get_channels,
 	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
 	.get_status = hclgevf_get_status,
+	.get_ksettings_an_result = hclgevf_get_ksettings_an_result,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index a63bee4a3674..0eaea063e7c5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -61,6 +61,8 @@ enum hclgevf_states {
 struct hclgevf_mac {
 	u8 mac_addr[ETH_ALEN];
 	int link;
+	u8 duplex;
+	u32 speed;
 };
 
 struct hclgevf_hw {
@@ -161,4 +163,6 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
 			 u8 *resp_data, u16 resp_len);
 void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
+void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
+				 u8 duplex);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 9768f71f5b18..a63ed3aa2c00 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -133,6 +133,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	struct hclgevf_cmq_ring *crq;
 	struct hclgevf_desc *desc;
 	u16 link_status, flag;
+	u32 speed;
+	u8 duplex;
 	u8 *temp;
 	int i;
 
@@ -164,9 +166,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			break;
 		case HCLGE_MBX_LINK_STAT_CHANGE:
 			link_status = le16_to_cpu(req->msg[1]);
+			memcpy(&speed, &req->msg[2], sizeof(speed));
+			duplex = (u8)le16_to_cpu(req->msg[4]);
 
 			/* update upper layer with new link link status */
 			hclgevf_update_link_status(hdev, link_status);
+			hclgevf_update_speed_duplex(hdev, speed, duplex);
 
 			break;
 		default:

From 1931dc20839e31af758a5f110f31200f2ea15539 Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Wed, 21 Mar 2018 15:49:30 +0800
Subject: [PATCH 1085/1678] net: hns3: fix for not returning problem in
 get_link_ksettings when phy exists

When phy exists, phy_ethtool_ksettings_get function is enough to get the
link ksettings. If the phy exists, get_link_ksettings function can return
directly after phy_ethtool_ksettings_get is called.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 513d8d6dd1ad..9d07116a4426 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -569,9 +569,13 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		return -EOPNOTSUPP;
 
 	/* 1.auto_neg & speed & duplex from cmd */
-	if (netdev->phydev)
+	if (netdev->phydev) {
 		phy_ethtool_ksettings_get(netdev->phydev, cmd);
-	else if (h->ae_algo->ops->get_ksettings_an_result)
+
+		return 0;
+	}
+
+	if (h->ae_algo->ops->get_ksettings_an_result)
 		h->ae_algo->ops->get_ksettings_an_result(h,
 							 &cmd->base.autoneg,
 							 &cmd->base.speed,

From 1c6e1037800ed70448befd0f9143e0dd57db9105 Mon Sep 17 00:00:00 2001
From: Tal Bar <talb@mellanox.com>
Date: Wed, 21 Mar 2018 09:34:05 +0200
Subject: [PATCH 1086/1678] mlxsw: spectrum: Update the supported firmware to
 version 13.1620.192

This new firmware contains:
	- Support for auto-neg disable mode

Signed-off-by: Tal Bar <talb@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 83886a9df206..92f0210dec8e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -75,8 +75,8 @@
 #include "../mlxfw/mlxfw.h"
 
 #define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1530
-#define MLXSW_FWREV_SUBMINOR 152
+#define MLXSW_FWREV_MINOR 1620
+#define MLXSW_FWREV_SUBMINOR 192
 #define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
 #define MLXSW_SP_FW_FILENAME \

From 8e1ed7392c1b19db43c2b47998371cbea9babb3c Mon Sep 17 00:00:00 2001
From: Tal Bar <talb@mellanox.com>
Date: Wed, 21 Mar 2018 09:34:06 +0200
Subject: [PATCH 1087/1678] mlxsw: spectrum: Add support for auto-negotiation
 disable mode

In 'auto-neg off' the device have sent AN (auto-negotiation) frames
with the forced speed. Thus, fix it using an_disable_admin field in
Port type and speed (PTYS) register. This field indicates if speed
negotiation frames would be send by the port or not.

Add the field and enable/disable it for 'auto-neg on/off', make the
port to start/stop sending AN (auto-negotiation) frames. Note that for
SwitchX2 the behavior doesn't change (i.e support only AN enabled with
forced speed).

Signed-off-by: Tal Bar <talb@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 11 ++++++++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  8 ++++----
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c |  8 ++++----
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index cb5f77f09f8e..e002398364c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2872,6 +2872,14 @@ static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port,
 
 MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
 
+/* an_disable_admin
+ * Auto negotiation disable administrative configuration
+ * 0 - Device doesn't support AN disable.
+ * 1 - Device supports AN disable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, an_disable_admin, 0x00, 30, 1);
+
 /* reg_ptys_local_port
  * Local port number.
  * Access: Index
@@ -3000,12 +3008,13 @@ MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
 MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
 
 static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
-					   u32 proto_admin)
+					   u32 proto_admin, bool autoneg)
 {
 	MLXSW_REG_ZERO(ptys, payload);
 	mlxsw_reg_ptys_local_port_set(payload, local_port);
 	mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH);
 	mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
+	mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg);
 }
 
 static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 92f0210dec8e..7885fc475f7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2390,7 +2390,7 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 	int err;
 
 	autoneg = mlxsw_sp_port->link.autoneg;
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2424,7 +2424,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	bool autoneg;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2442,7 +2442,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	}
 
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_new);
+				eth_proto_new, autoneg);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2653,7 +2653,7 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 
 	eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
-				eth_proto_admin);
+				eth_proto_admin, mlxsw_sp_port->link.autoneg);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index f3c29bbf07e2..c87b0934a405 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -789,7 +789,7 @@ mlxsw_sx_port_get_link_ksettings(struct net_device *dev,
 	u32 supported, advertising, lp_advertising;
 	int err;
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
@@ -879,7 +879,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
 		mlxsw_sx_to_ptys_advert_link(advertising) :
 		mlxsw_sx_to_ptys_speed(speed);
 
-	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0, false);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
@@ -897,7 +897,7 @@ mlxsw_sx_port_set_link_ksettings(struct net_device *dev,
 		return 0;
 
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_new);
+				eth_proto_new, true);
 	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to set proto admin");
@@ -1029,7 +1029,7 @@ mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
 
 	eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
 	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
-				eth_proto_admin);
+				eth_proto_admin, true);
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 }
 

From dfde331e757fd792e1c9579b72a8370ca665e5ed Mon Sep 17 00:00:00 2001
From: GhantaKrishnamurthy MohanKrishna
 <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Date: Wed, 21 Mar 2018 14:37:43 +0100
Subject: [PATCH 1088/1678] tipc: modify socket iterator for sock_diag

The current socket iterator function tipc_nl_sk_dump, handles socket
locks and calls __tipc_nl_add_sk for each socket.
To reuse this logic in sock_diag implementation, we do minor
modifications to make these functions generic as described below.

In this commit, we add a two new functions __tipc_nl_sk_walk,
__tipc_nl_add_sk_info and modify tipc_nl_sk_dump, __tipc_nl_add_sk
accordingly.

In __tipc_nl_sk_walk we:
1. acquire and release socket locks
2. for each socket, execute the specified callback function

In __tipc_nl_add_sk we:
- Move the netlink attribute insertion to __tipc_nl_add_sk_info.

tipc_nl_sk_dump calls tipc_nl_sk_walk with __tipc_nl_add_sk as argument.

sock_diag will use these generic functions in a later commit.

There is no functional change in this commit.
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: GhantaKrishnamurthy MohanKrishna <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 65 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a4a9148d4629..35ac0f5b9529 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3160,16 +3160,33 @@ msg_full:
 	return -EMSGSIZE;
 }
 
+static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
+			  *tsk)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = tipc_net(net);
+	struct sock *sk = &tsk->sk;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+		return -EMSGSIZE;
+
+	if (tipc_sk_connected(sk)) {
+		if (__tipc_nl_add_sk_con(skb, tsk))
+			return -EMSGSIZE;
+	} else if (!list_empty(&tsk->publications)) {
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
+			return -EMSGSIZE;
+	}
+	return 0;
+}
+
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct tipc_sock *tsk)
 {
-	int err;
-	void *hdr;
 	struct nlattr *attrs;
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sock *sk = &tsk->sk;
+	void *hdr;
 
 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -3179,19 +3196,10 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
 	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
 	if (!attrs)
 		goto genlmsg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid))
-		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+
+	if (__tipc_nl_add_sk_info(skb, tsk))
 		goto attr_msg_cancel;
 
-	if (tipc_sk_connected(sk)) {
-		err = __tipc_nl_add_sk_con(skb, tsk);
-		if (err)
-			goto attr_msg_cancel;
-	} else if (!list_empty(&tsk->publications)) {
-		if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL))
-			goto attr_msg_cancel;
-	}
 	nla_nest_end(skb, attrs);
 	genlmsg_end(skb, hdr);
 
@@ -3205,16 +3213,19 @@ msg_cancel:
 	return -EMSGSIZE;
 }
 
-int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int __tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+			     int (*skb_handler)(struct sk_buff *skb,
+						struct netlink_callback *cb,
+						struct tipc_sock *tsk))
 {
-	int err;
-	struct tipc_sock *tsk;
-	const struct bucket_table *tbl;
-	struct rhash_head *pos;
 	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 tbl_id = cb->args[0];
+	struct tipc_net *tn = tipc_net(net);
+	const struct bucket_table *tbl;
 	u32 prev_portid = cb->args[1];
+	u32 tbl_id = cb->args[0];
+	struct rhash_head *pos;
+	struct tipc_sock *tsk;
+	int err;
 
 	rcu_read_lock();
 	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
@@ -3226,12 +3237,13 @@ int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			}
 
-			err = __tipc_nl_add_sk(skb, cb, tsk);
+			err = skb_handler(skb, cb, tsk);
 			if (err) {
 				prev_portid = tsk->portid;
 				spin_unlock_bh(&tsk->sk.sk_lock.slock);
 				goto out;
 			}
+
 			prev_portid = 0;
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
@@ -3244,6 +3256,11 @@ out:
 	return skb->len;
 }
 
+int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return __tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+}
+
 /* Caller should hold socket lock for the passed tipc socket. */
 static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
 				 struct netlink_callback *cb,

From c30b70deb5f4861f590031c33fd3ec6cc63f1df1 Mon Sep 17 00:00:00 2001
From: GhantaKrishnamurthy MohanKrishna
 <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Date: Wed, 21 Mar 2018 14:37:44 +0100
Subject: [PATCH 1089/1678] tipc: implement socket diagnostics for AF_TIPC

This commit adds socket diagnostics capability for AF_TIPC in netlink
family NETLINK_SOCK_DIAG in a new kernel module (diag.ko).

The following are key design considerations:
- config TIPC_DIAG has default y, like INET_DIAG.
- only requests with flag NLM_F_DUMP is supported (dump all).
- tipc_sock_diag_req message is introduced to send filter parameters.
- the response attributes are of TLV, some nested.

To avoid exposing data structures between diag and tipc modules and
avoid code duplication, the following additions are required:
- export tipc_nl_sk_walk function to reuse socket iterator.
- export tipc_sk_fill_sock_diag to fill the tipc diag attributes.
- create a sock_diag response message in __tipc_add_sock_diag defined
  in diag.c and use the above exported tipc_sk_fill_sock_diag
  to fill response.

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: GhantaKrishnamurthy MohanKrishna <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h      |  18 ++++
 include/uapi/linux/tipc_sockets_diag.h |  17 ++++
 net/tipc/Kconfig                       |   8 ++
 net/tipc/Makefile                      |   5 ++
 net/tipc/diag.c                        | 114 +++++++++++++++++++++++++
 net/tipc/socket.c                      |  72 ++++++++++++++--
 net/tipc/socket.h                      |  10 ++-
 7 files changed, 238 insertions(+), 6 deletions(-)
 create mode 100644 include/uapi/linux/tipc_sockets_diag.h
 create mode 100644 net/tipc/diag.c

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 469aa67a5ecb..d7cec0480d70 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -114,6 +114,13 @@ enum {
 	TIPC_NLA_SOCK_REF,		/* u32 */
 	TIPC_NLA_SOCK_CON,		/* nest */
 	TIPC_NLA_SOCK_HAS_PUBL,		/* flag */
+	TIPC_NLA_SOCK_STAT,		/* nest */
+	TIPC_NLA_SOCK_TYPE,		/* u32 */
+	TIPC_NLA_SOCK_INO,		/* u32 */
+	TIPC_NLA_SOCK_UID,		/* u32 */
+	TIPC_NLA_SOCK_TIPC_STATE,	/* u32 */
+	TIPC_NLA_SOCK_COOKIE,		/* u64 */
+	TIPC_NLA_SOCK_PAD,		/* flag */
 
 	__TIPC_NLA_SOCK_MAX,
 	TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -238,6 +245,17 @@ enum {
 	TIPC_NLA_CON_MAX = __TIPC_NLA_CON_MAX - 1
 };
 
+/* Nest, socket statistics info */
+enum {
+	TIPC_NLA_SOCK_STAT_RCVQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_SENDQ,	/* u32 */
+	TIPC_NLA_SOCK_STAT_LINK_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_CONN_CONG,	/* flag */
+
+	__TIPC_NLA_SOCK_STAT_MAX,
+	TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
+};
+
 /* Nest, link propreties. Valid for link, media and bearer */
 enum {
 	TIPC_NLA_PROP_UNSPEC,
diff --git a/include/uapi/linux/tipc_sockets_diag.h b/include/uapi/linux/tipc_sockets_diag.h
new file mode 100644
index 000000000000..7678cf2f0dcc
--- /dev/null
+++ b/include/uapi/linux/tipc_sockets_diag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* AF_TIPC sock_diag interface for querying open sockets */
+
+#ifndef _UAPI__TIPC_SOCKETS_DIAG_H__
+#define _UAPI__TIPC_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+#include <linux/sock_diag.h>
+
+/* Request */
+struct tipc_sock_diag_req {
+	__u8	sdiag_family;	/* must be AF_TIPC */
+	__u8	sdiag_protocol;	/* must be 0 */
+	__u16	pad;		/* must be 0 */
+	__u32	tidiag_states;	/* query*/
+};
+#endif /* _UAPI__TIPC_SOCKETS_DIAG_H__ */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c25a3a149dc4..e450212121d2 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -34,3 +34,11 @@ config TIPC_MEDIA_UDP
 	  Saying Y here will enable support for running TIPC over IP/UDP
 	bool
 	default y
+
+config TIPC_DIAG
+	tristate "TIPC: socket monitoring interface"
+	depends on TIPC
+	default y
+	---help---
+	Support for TIPC socket monitoring interface used by ss tool.
+	If unsure, say Y.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 1edb7192aa2f..aca168f2abb1 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -14,3 +14,8 @@ tipc-y	+= addr.o bcast.o bearer.o \
 tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
 tipc-$(CONFIG_SYSCTL)		+= sysctl.o
+
+
+obj-$(CONFIG_TIPC_DIAG)	+= diag.o
+
+tipc_diag-y	:= diag.o
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
new file mode 100644
index 000000000000..46d9cd62f781
--- /dev/null
+++ b/net/tipc/diag.c
@@ -0,0 +1,114 @@
+/*
+ * net/tipc/diag.c: TIPC socket diag
+ *
+ * Copyright (c) 2018, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "socket.h"
+#include <linux/sock_diag.h>
+#include <linux/tipc_sockets_diag.h>
+
+static u64 __tipc_diag_gen_cookie(struct sock *sk)
+{
+	u32 res[2];
+
+	sock_diag_save_cookie(sk, res);
+	return *((u64 *)res);
+}
+
+static int __tipc_add_sock_diag(struct sk_buff *skb,
+				struct netlink_callback *cb,
+				struct tipc_sock *tsk)
+{
+	struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh);
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0,
+			       NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states,
+				     __tipc_diag_gen_cookie);
+	if (err)
+		return err;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
+static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag);
+}
+
+static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
+				       struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct tipc_sock_diag_req);
+	struct net *net = sock_net(skb->sk);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = tipc_diag_dump,
+		};
+		netlink_dump_start(net->diag_nlsk, skb, h, &c);
+		return 0;
+	}
+	return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler tipc_sock_diag_handler = {
+	.family = AF_TIPC,
+	.dump = tipc_sock_diag_handler_dump,
+};
+
+static int __init tipc_diag_init(void)
+{
+	return sock_diag_register(&tipc_sock_diag_handler);
+}
+
+static void __exit tipc_diag_exit(void)
+{
+	sock_diag_unregister(&tipc_sock_diag_handler);
+}
+
+module_init(tipc_diag_init);
+module_exit(tipc_diag_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 35ac0f5b9529..07559ce4b8ba 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3213,10 +3213,10 @@ msg_cancel:
 	return -EMSGSIZE;
 }
 
-static int __tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
-			     int (*skb_handler)(struct sk_buff *skb,
-						struct netlink_callback *cb,
-						struct tipc_sock *tsk))
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+		    int (*skb_handler)(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct tipc_sock *tsk))
 {
 	struct net *net = sock_net(skb->sk);
 	struct tipc_net *tn = tipc_net(net);
@@ -3255,10 +3255,72 @@ out:
 
 	return skb->len;
 }
+EXPORT_SYMBOL(tipc_nl_sk_walk);
+
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+			   u32 sk_filter_state,
+			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
+{
+	struct sock *sk = &tsk->sk;
+	struct nlattr *attrs;
+	struct nlattr *stat;
+
+	/*filter response w.r.t sk_state*/
+	if (!(sk_filter_state & (1 << sk->sk_state)))
+		return 0;
+
+	attrs = nla_nest_start(skb, TIPC_NLA_SOCK);
+	if (!attrs)
+		goto msg_cancel;
+
+	if (__tipc_nl_add_sk_info(skb, tsk))
+		goto attr_msg_cancel;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
+			from_kuid_munged(sk_user_ns(sk), sock_i_uid(sk))) ||
+	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
+			      tipc_diag_gen_cookie(sk),
+			      TIPC_NLA_SOCK_PAD))
+		goto attr_msg_cancel;
+
+	stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT);
+	if (!stat)
+		goto attr_msg_cancel;
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
+			skb_queue_len(&sk->sk_receive_queue)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
+			skb_queue_len(&sk->sk_write_queue)))
+		goto stat_msg_cancel;
+
+	if (tsk->cong_link_cnt &&
+	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG))
+		goto stat_msg_cancel;
+
+	if (tsk_conn_cong(tsk) &&
+	    nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG))
+		goto stat_msg_cancel;
+
+	nla_nest_end(skb, stat);
+	nla_nest_end(skb, attrs);
+
+	return 0;
+
+stat_msg_cancel:
+	nla_nest_cancel(skb, stat);
+attr_msg_cancel:
+	nla_nest_cancel(skb, attrs);
+msg_cancel:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL(tipc_sk_fill_sock_diag);
 
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	return __tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
+	return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk);
 }
 
 /* Caller should hold socket lock for the passed tipc socket. */
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 06fb5944cf76..aae3fd4cd06c 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -49,6 +49,8 @@
 #define RCVBUF_DEF  (FLOWCTL_BLK_SZ * 1024 * 2)
 #define RCVBUF_MAX  (FLOWCTL_BLK_SZ * 1024 * 16)
 
+struct tipc_sock;
+
 int tipc_socket_init(void);
 void tipc_socket_stop(void);
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
@@ -59,5 +61,11 @@ int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
+			   u32 sk_filter_state,
+			   u64 (*tipc_diag_gen_cookie)(struct sock *sk));
+int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
+		    int (*skb_handler)(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct tipc_sock *tsk));
 #endif

From 872619d8cf810c17279335ef531a2a34f3b4e589 Mon Sep 17 00:00:00 2001
From: GhantaKrishnamurthy MohanKrishna
 <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Date: Wed, 21 Mar 2018 14:37:45 +0100
Subject: [PATCH 1090/1678] tipc: step sk->sk_drops when rcv buffer is full

Currently when tipc is unable to queue a received message on a
socket, the message is rejected back to the sender with error
TIPC_ERR_OVERLOAD. However, the application on this socket
has no knowledge about these discards.

In this commit, we try to step the sk_drops counter when tipc
is unable to queue a received message. Export sk_drops
using tipc socket diagnostics.

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: GhantaKrishnamurthy MohanKrishna <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 1 +
 net/tipc/socket.c                 | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d7cec0480d70..d896ded51bcb 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -251,6 +251,7 @@ enum {
 	TIPC_NLA_SOCK_STAT_SENDQ,	/* u32 */
 	TIPC_NLA_SOCK_STAT_LINK_CONG,	/* flag */
 	TIPC_NLA_SOCK_STAT_CONN_CONG,	/* flag */
+	TIPC_NLA_SOCK_STAT_DROP,	/* u32 */
 
 	__TIPC_NLA_SOCK_STAT_MAX,
 	TIPC_NLA_SOCK_STAT_MAX = __TIPC_NLA_SOCK_STAT_MAX - 1
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 07559ce4b8ba..732ec894f69f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2122,8 +2122,10 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
 		    (!sk_conn && msg_connected(hdr)) ||
 		    (!grp && msg_in_group(hdr)))
 			err = TIPC_ERR_NO_PORT;
-		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+		else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) {
+			atomic_inc(&sk->sk_drops);
 			err = TIPC_ERR_OVERLOAD;
+		}
 
 		if (unlikely(err)) {
 			tipc_skb_reject(net, err, skb, xmitq);
@@ -2202,6 +2204,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Overload => reject message back to sender */
 		onode = tipc_own_addr(sock_net(sk));
+		atomic_inc(&sk->sk_drops);
 		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
 			__skb_queue_tail(xmitq, skb);
 		break;
@@ -3293,7 +3296,9 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
 	if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ,
 			skb_queue_len(&sk->sk_receive_queue)) ||
 	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ,
-			skb_queue_len(&sk->sk_write_queue)))
+			skb_queue_len(&sk->sk_write_queue)) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP,
+			atomic_read(&sk->sk_drops)))
 		goto stat_msg_cancel;
 
 	if (tsk->cong_link_cnt &&

From e2e031640b3a9482b137b68193b7d59b8308185c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 21 Mar 2018 17:31:15 +0000
Subject: [PATCH 1091/1678] net: mvpp2: use correct index on array mvpp2_pools

Array mvpp2_pools is being indexed by long_log_pool, however this
looks like a cut-n-paste bug and in fact should be short_log_pool.

Detected by CoverityScan, CID#1466113 ("Copy-paste error")

Fixes: 576193f2d579 ("net: mvpp2: jumbo frames support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9bd35f2291d6..f8bc3d4a39ff 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -4632,7 +4632,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 	if (!port->pool_short) {
 		port->pool_short =
 			mvpp2_bm_pool_use(port, short_log_pool,
-					  mvpp2_pools[long_log_pool].pkt_size);
+					  mvpp2_pools[short_log_pool].pkt_size);
 		if (!port->pool_short)
 			return -ENOMEM;
 

From faf4db008163607e993279b7f7aa456cb4c11e9a Mon Sep 17 00:00:00 2001
From: Tal Gilboa <talgi@mellanox.com>
Date: Wed, 21 Mar 2018 20:33:45 +0200
Subject: [PATCH 1092/1678] Documentation/networking: Add net DIM documentation

Net DIM is a generic algorithm, purposed for dynamically
optimizing network devices interrupt moderation. This
document describes how it works and how to use it.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/net_dim.txt | 174 +++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 Documentation/networking/net_dim.txt

diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt
new file mode 100644
index 000000000000..9cb31c5e2dcd
--- /dev/null
+++ b/Documentation/networking/net_dim.txt
@@ -0,0 +1,174 @@
+Net DIM - Generic Network Dynamic Interrupt Moderation
+======================================================
+
+Author:
+	Tal Gilboa <talgi@mellanox.com>
+
+
+Contents
+=========
+
+- Assumptions
+- Introduction
+- The Net DIM Algorithm
+- Registering a Network Device to DIM
+- Example
+
+Part 0: Assumptions
+======================
+
+This document assumes the reader has basic knowledge in network drivers
+and in general interrupt moderation.
+
+
+Part I: Introduction
+======================
+
+Dynamic Interrupt Moderation (DIM) (in networking) refers to changing the
+interrupt moderation configuration of a channel in order to optimize packet
+processing. The mechanism includes an algorithm which decides if and how to
+change moderation parameters for a channel, usually by performing an analysis on
+runtime data sampled from the system. Net DIM is such a mechanism. In each
+iteration of the algorithm, it analyses a given sample of the data, compares it
+to the previous sample and if required, it can decide to change some of the
+interrupt moderation configuration fields. The data sample is composed of data
+bandwidth, the number of packets and the number of events. The time between
+samples is also measured. Net DIM compares the current and the previous data and
+returns an adjusted interrupt moderation configuration object. In some cases,
+the algorithm might decide not to change anything. The configuration fields are
+the minimum duration (microseconds) allowed between events and the maximum
+number of wanted packets per event. The Net DIM algorithm ascribes importance to
+increase bandwidth over reducing interrupt rate.
+
+
+Part II: The Net DIM Algorithm
+===============================
+
+Each iteration of the Net DIM algorithm follows these steps:
+1. Calculates new data sample.
+2. Compares it to previous sample.
+3. Makes a decision - suggests interrupt moderation configuration fields.
+4. Applies a schedule work function, which applies suggested configuration.
+
+The first two steps are straightforward, both the new and the previous data are
+supplied by the driver registered to Net DIM. The previous data is the new data
+supplied to the previous iteration. The comparison step checks the difference
+between the new and previous data and decides on the result of the last step.
+A step would result as "better" if bandwidth increases and as "worse" if
+bandwidth reduces. If there is no change in bandwidth, the packet rate is
+compared in a similar fashion - increase == "better" and decrease == "worse".
+In case there is no change in the packet rate as well, the interrupt rate is
+compared. Here the algorithm tries to optimize for lower interrupt rate so an
+increase in the interrupt rate is considered "worse" and a decrease is
+considered "better". Step #2 has an optimization for avoiding false results: it
+only considers a difference between samples as valid if it is greater than a
+certain percentage. Also, since Net DIM does not measure anything by itself, it
+assumes the data provided by the driver is valid.
+
+Step #3 decides on the suggested configuration based on the result from step #2
+and the internal state of the algorithm. The states reflect the "direction" of
+the algorithm: is it going left (reducing moderation), right (increasing
+moderation) or standing still. Another optimization is that if a decision
+to stay still is made multiple times, the interval between iterations of the
+algorithm would increase in order to reduce calculation overhead. Also, after
+"parking" on one of the most left or most right decisions, the algorithm may
+decide to verify this decision by taking a step in the other direction. This is
+done in order to avoid getting stuck in a "deep sleep" scenario. Once a
+decision is made, an interrupt moderation configuration is selected from
+the predefined profiles.
+
+The last step is to notify the registered driver that it should apply the
+suggested configuration. This is done by scheduling a work function, defined by
+the Net DIM API and provided by the registered driver.
+
+As you can see, Net DIM itself does not actively interact with the system. It
+would have trouble making the correct decisions if the wrong data is supplied to
+it and it would be useless if the work function would not apply the suggested
+configuration. This does, however, allow the registered driver some room for
+manoeuvre as it may provide partial data or ignore the algorithm suggestion
+under some conditions.
+
+
+Part III: Registering a Network Device to DIM
+==============================================
+
+Net DIM API exposes the main function net_dim(struct net_dim *dim,
+struct net_dim_sample end_sample). This function is the entry point to the Net
+DIM algorithm and has to be called every time the driver would like to check if
+it should change interrupt moderation parameters. The driver should provide two
+data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+describes the state of DIM for a specific object (RX queue, TX queue,
+other queues, etc.). This includes the current selected profile, previous data
+samples, the callback function provided by the driver and more.
+Struct net_dim_sample describes a data sample, which will be compared to the
+data sample stored in struct net_dim in order to decide on the algorithm's next
+step. The sample should include bytes, packets and interrupts, measured by
+the driver.
+
+In order to use Net DIM from a networking driver, the driver needs to call the
+main net_dim() function. The recommended method is to call net_dim() on each
+interrupt. Since Net DIM has a built-in moderation and it might decide to skip
+iterations under certain conditions, there is no need to moderate the net_dim()
+calls as well. As mentioned above, the driver needs to provide an object of type
+struct net_dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct net_dim as part of its data structure and use it
+as the main Net DIM API object. The struct net_dim_sample should hold the latest
+bytes, packets and interrupts count. No need to perform any calculations, just
+include the raw data.
+
+The net_dim() call itself does not return anything. Instead Net DIM relies on
+the driver to provide a callback function, which is called when the algorithm
+decides to make a change in the interrupt moderation parameters. This callback
+will be scheduled and run in a separate thread in order not to add overhead to
+the data flow. After the work is done, Net DIM algorithm needs to be set to
+the proper state in order to move to the next iteration.
+
+
+Part IV: Example
+=================
+
+The following code demonstrates how to register a driver to Net DIM. The actual
+usage is not complete but it should make the outline of the usage clear.
+
+my_driver.c:
+
+#include <linux/net_dim.h>
+
+/* Callback for net DIM to schedule on a decision to change moderation */
+void my_driver_do_dim_work(struct work_struct *work)
+{
+	/* Get struct net_dim from struct work_struct */
+	struct net_dim *dim = container_of(work, struct net_dim,
+					   work);
+	/* Do interrupt moderation related stuff */
+	...
+
+	/* Signal net DIM work is done and it should move to next iteration */
+	dim->state = NET_DIM_START_MEASURE;
+}
+
+/* My driver's interrupt handler */
+int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* A struct to hold current measured data */
+	struct net_dim_sample dim_sample;
+	...
+	/* Initiate data sample struct with current data */
+	net_dim_sample(my_entity->events,
+		       my_entity->packets,
+		       my_entity->bytes,
+		       &dim_sample);
+	/* Call net DIM */
+	net_dim(&my_entity->dim, dim_sample);
+	...
+}
+
+/* My entity's initialization function (my_entity was already allocated) */
+int my_driver_init_my_entity(struct my_driver_entity *my_entity, ...)
+{
+	...
+	/* Initiate struct work_struct with my driver's callback function */
+	INIT_WORK(&my_entity->dim.work, my_driver_do_dim_work);
+	...
+}

From 1574639411b3ce311b846cae7fda56bf1af7d027 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 21 Mar 2018 19:34:58 +0000
Subject: [PATCH 1093/1678] gre: fix TUNNEL_SEQ bit check on sequence numbering

The current logic of flags | TUNNEL_SEQ is always non-zero and hence
sequence numbers are always incremented no matter the setting of the
TUNNEL_SEQ bit.  Fix this by using & instead of |.

Detected by CoverityScan, CID#1466039 ("Operands don't affect result")

Fixes: 77a5196a804e ("gre: add sequence number for collect md mode.")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c  | 2 +-
 net/ipv6/ip6_gre.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2fa2ef2e2af9..9ab1aa2f7660 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -550,7 +550,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 		(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 	gre_build_header(skb, tunnel_hlen, flags, proto,
 			 tunnel_id_to_key32(tun_info->key.tun_id),
-			 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+			 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 
 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7d8775c9570d..6adbcf40cf8c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -724,7 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		gre_build_header(skb, tunnel->tun_hlen,
 				 flags, protocol,
 				 tunnel_id_to_key32(tun_info->key.tun_id),
-				 (flags | TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+				 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
 						      : 0);
 
 	} else {

From 6e010dd9b16b1a320bbf8312359ac294d7e1d9a8 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 21 Mar 2018 19:48:11 -0600
Subject: [PATCH 1094/1678] net: qualcomm: rmnet: Fix casting issues

Fix warnings which were reported when running with sparse
(make C=1 CF=-D__CHECK_ENDIAN__)

drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c:81:15:
warning: cast to restricted __be16
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:271:37:
warning: incorrect type in assignment (different base types)
expected unsigned short [unsigned] [usertype] pkt_len
got restricted __be16 [usertype] <noident>
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:287:29:
warning: incorrect type in assignment (different base types)
expected unsigned short [unsigned] [usertype] pkt_len
got restricted __be16 [usertype] <noident>
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:310:22:
warning: cast to restricted __be16
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c:319:13:
warning: cast to restricted __be16
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c:49:18:
warning: cast to restricted __be16
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c:50:18:
warning: cast to restricted __be32
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c:74:21:
warning: cast to restricted __be16

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 6ce31e29136d..4f362dfb356e 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -23,8 +23,8 @@ struct rmnet_map_control_command {
 		struct {
 			u16 ip_family:2;
 			u16 reserved:14;
-			u16 flow_control_seq_num;
-			u32 qos_id;
+			__be16 flow_control_seq_num;
+			__be32 qos_id;
 		} flow_control;
 		u8 data[0];
 	};
@@ -44,7 +44,7 @@ struct rmnet_map_header {
 	u8  reserved_bit:1;
 	u8  cd_bit:1;
 	u8  mux_id;
-	u16 pkt_len;
+	__be16 pkt_len;
 }  __aligned(1);
 
 struct rmnet_map_dl_csum_trailer {

From 9dcaec042600dce169ae74527e5eead349f8eb90 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 21 Mar 2018 19:48:12 -0600
Subject: [PATCH 1095/1678] net: qualcomm: rmnet: Update copyright year to 2018

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c      | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h      | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c    | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h         | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c    | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h     | 2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c         | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index c4949183eef3..096301a31c4f 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 00e4634100d3..0b5b5da80198 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 601edec28c5f..c758248bf2cd 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 4f362dfb356e..884f1f52dcc2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index b0dbca070c00..afa2b862515f 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index c74a6c56d315..49e420e22776 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index de0143eaa05a..98365efc2d9a 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, 2016-2018 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 346d310914df..2ea16a088de8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2018, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and

From 378e25357ac78ad02fbc98bec9b4e3baaa916c5c Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 21 Mar 2018 19:48:13 -0600
Subject: [PATCH 1096/1678] net: qualcomm: rmnet: Remove unnecessary device
 assignment

Device of the de-aggregated skb is correctly assigned after inspecting
the mux_id, so remove the assignment in rmnet_map_deaggregate().

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 49e420e22776..e8f6c79157be 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -323,7 +323,6 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	if (!skbn)
 		return NULL;
 
-	skbn->dev = skb->dev;
 	skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
 	skb_put(skbn, packet_len);
 	memcpy(skbn->data, skb->data, packet_len);

From 14452ca3b5ce304fb2fea96dbc9ca1e4e7978551 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 21 Mar 2018 19:48:14 -0600
Subject: [PATCH 1097/1678] net: qualcomm: rmnet: Export mux_id and flags to
 netlink

Define new netlink attributes for rmnet mux_id and flags. These
flags / mux_id were earlier using vlan flags / id respectively.
The flag bits are also moved to uapi and are renamed with
prefix RMNET_FLAG_*.

Also add the rmnet policy to handle the new netlink attributes.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_config.c    | 41 +++++++++++--------
 .../ethernet/qualcomm/rmnet/rmnet_handlers.c  | 10 ++---
 .../qualcomm/rmnet/rmnet_map_command.c        |  2 +-
 .../ethernet/qualcomm/rmnet/rmnet_map_data.c  |  2 +-
 .../ethernet/qualcomm/rmnet/rmnet_private.h   |  6 ---
 include/uapi/linux/if_link.h                  | 21 ++++++++++
 6 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 096301a31c4f..c5b7b2af25d8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -43,6 +43,11 @@
 
 /* Local Definitions and Declarations */
 
+static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = {
+	[IFLA_RMNET_MUX_ID]	= { .type = NLA_U16 },
+	[IFLA_RMNET_FLAGS]	= { .len = sizeof(struct ifla_rmnet_flags) },
+};
+
 static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
 {
 	return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler;
@@ -131,7 +136,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
 {
-	u32 data_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
+	u32 data_format = RMNET_FLAGS_INGRESS_DEAGGREGATION;
 	struct net_device *real_dev;
 	int mode = RMNET_EPMODE_VND;
 	struct rmnet_endpoint *ep;
@@ -143,14 +148,14 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 	if (!real_dev || !dev)
 		return -ENODEV;
 
-	if (!data[IFLA_VLAN_ID])
+	if (!data[IFLA_RMNET_MUX_ID])
 		return -EINVAL;
 
 	ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
 	if (!ep)
 		return -ENOMEM;
 
-	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 
 	err = rmnet_register_real_device(real_dev);
 	if (err)
@@ -165,10 +170,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 
 	hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
 
-	if (data[IFLA_VLAN_FLAGS]) {
-		struct ifla_vlan_flags *flags;
+	if (data[IFLA_RMNET_FLAGS]) {
+		struct ifla_rmnet_flags *flags;
 
-		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		flags = nla_data(data[IFLA_RMNET_FLAGS]);
 		data_format = flags->flags & flags->mask;
 	}
 
@@ -276,10 +281,10 @@ static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
 {
 	u16 mux_id;
 
-	if (!data || !data[IFLA_VLAN_ID])
+	if (!data || !data[IFLA_RMNET_MUX_ID])
 		return -EINVAL;
 
-	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 	if (mux_id > (RMNET_MAX_LOGICAL_EP - 1))
 		return -ERANGE;
 
@@ -304,8 +309,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 	port = rmnet_get_port_rtnl(real_dev);
 
-	if (data[IFLA_VLAN_ID]) {
-		mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	if (data[IFLA_RMNET_MUX_ID]) {
+		mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 		ep = rmnet_get_endpoint(port, priv->mux_id);
 
 		hlist_del_init_rcu(&ep->hlnode);
@@ -315,10 +320,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 		priv->mux_id = mux_id;
 	}
 
-	if (data[IFLA_VLAN_FLAGS]) {
-		struct ifla_vlan_flags *flags;
+	if (data[IFLA_RMNET_FLAGS]) {
+		struct ifla_rmnet_flags *flags;
 
-		flags = nla_data(data[IFLA_VLAN_FLAGS]);
+		flags = nla_data(data[IFLA_RMNET_FLAGS]);
 		port->data_format = flags->flags & flags->mask;
 	}
 
@@ -327,13 +332,16 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 
 static size_t rmnet_get_size(const struct net_device *dev)
 {
-	return nla_total_size(2) /* IFLA_VLAN_ID */ +
-	       nla_total_size(sizeof(struct ifla_vlan_flags)); /* IFLA_VLAN_FLAGS */
+	return
+		/* IFLA_RMNET_MUX_ID */
+		nla_total_size(2) +
+		/* IFLA_RMNET_FLAGS */
+		nla_total_size(sizeof(struct ifla_rmnet_flags));
 }
 
 struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.kind		= "rmnet",
-	.maxtype	= __IFLA_VLAN_MAX,
+	.maxtype	= __IFLA_RMNET_MAX,
 	.priv_size	= sizeof(struct rmnet_priv),
 	.setup		= rmnet_vnd_setup,
 	.validate	= rmnet_rtnl_validate,
@@ -341,6 +349,7 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.dellink	= rmnet_dellink,
 	.get_size	= rmnet_get_size,
 	.changelink     = rmnet_changelink,
+	.policy		= rmnet_policy,
 };
 
 /* Needs either rcu_read_lock() or rtnl lock */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index c758248bf2cd..6fcd586e9804 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -70,7 +70,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	u8 mux_id;
 
 	if (RMNET_MAP_GET_CD_BIT(skb)) {
-		if (port->data_format & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
 		goto free_skb;
@@ -93,7 +93,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	skb_pull(skb, sizeof(struct rmnet_map_header));
 	rmnet_set_skb_proto(skb);
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
@@ -121,7 +121,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
 		skb_push(skb, ETH_HLEN);
 	}
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_DEAGGREGATION) {
 		while ((skbn = rmnet_map_deaggregate(skb, port)) != NULL)
 			__rmnet_map_ingress_handler(skbn, port);
 
@@ -141,7 +141,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	additional_header_len = 0;
 	required_headroom = sizeof(struct rmnet_map_header);
 
-	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4) {
 		additional_header_len = sizeof(struct rmnet_map_ul_csum_header);
 		required_headroom += additional_header_len;
 	}
@@ -151,7 +151,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 			goto fail;
 	}
 
-	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_EGRESS_MAP_CKSUMV4)
 		rmnet_map_checksum_uplink_packet(skb, orig_dev);
 
 	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index afa2b862515f..78fdad0c6f76 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -69,7 +69,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 	struct rmnet_map_control_command *cmd;
 	int xmit_status;
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) {
 		if (skb->len < sizeof(struct rmnet_map_header) +
 		    RMNET_MAP_GET_LENGTH(skb) +
 		    sizeof(struct rmnet_map_dl_csum_trailer)) {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index e8f6c79157be..a6ea09416f8d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -309,7 +309,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 	maph = (struct rmnet_map_header *)skb->data;
 	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
 
-	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4)
+	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
 		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
 
 	if (((int)skb->len - (int)packet_len) < 0)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index 98365efc2d9a..b9cc4f85f229 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -18,12 +18,6 @@
 #define RMNET_NEEDED_HEADROOM      16
 #define RMNET_TX_QUEUE_LEN         1000
 
-/* Constants */
-#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(0)
-#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(1)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(2)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(3)
-
 /* Replace skb->dev to a virtual rmnet device and pass up the stack */
 #define RMNET_EPMODE_VND (1)
 /* Pass the frame directly to another device with dev_queue_xmit() */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 11d0c0ea2bfa..68699f654118 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -959,4 +959,25 @@ enum {
 
 #define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
 
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+	IFLA_RMNET_UNSPEC,
+	IFLA_RMNET_MUX_ID,
+	IFLA_RMNET_FLAGS,
+	__IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX	(__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+	__u32	flags;
+	__u32	mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */

From be81a85f5f87d3c7ae994b646d00b1d828e754a1 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Wed, 21 Mar 2018 19:48:15 -0600
Subject: [PATCH 1098/1678] net: qualcomm: rmnet: Implement fill_info

This is needed to query the mux_id and flags of a rmnet device.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/qualcomm/rmnet/rmnet_config.c    | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index c5b7b2af25d8..38d9356ebcc4 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -339,6 +339,35 @@ static size_t rmnet_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(struct ifla_rmnet_flags));
 }
 
+static int rmnet_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+	struct net_device *real_dev;
+	struct ifla_rmnet_flags f;
+	struct rmnet_port *port;
+
+	real_dev = priv->real_dev;
+
+	if (!rmnet_is_real_dev_registered(real_dev))
+		return -ENODEV;
+
+	if (nla_put_u16(skb, IFLA_RMNET_MUX_ID, priv->mux_id))
+		goto nla_put_failure;
+
+	port = rmnet_get_port_rtnl(real_dev);
+
+	f.flags = port->data_format;
+	f.mask  = ~0;
+
+	if (nla_put(skb, IFLA_RMNET_FLAGS, sizeof(f), &f))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.kind		= "rmnet",
 	.maxtype	= __IFLA_RMNET_MAX,
@@ -350,6 +379,7 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
 	.get_size	= rmnet_get_size,
 	.changelink     = rmnet_changelink,
 	.policy		= rmnet_policy,
+	.fill_info	= rmnet_fill_info,
 };
 
 /* Needs either rcu_read_lock() or rtnl lock */

From f2d254fac13cc7c86871ea607c4ab1afa7f13e2e Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Wed, 21 Mar 2018 23:30:54 -0700
Subject: [PATCH 1099/1678] liquidio: Added support for trusted VF

When a VF is trusted, all promiscuous traffic will only be sent to that VF.
In normal operation promiscuous traffic is sent to the PF. There can be
only one trusted VF per PF

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_main.c   | 116 ++++++++++++++++++
 .../cavium/liquidio/liquidio_common.h         |   7 ++
 .../ethernet/cavium/liquidio/octeon_device.h  |   2 +
 3 files changed, 125 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 140085ba48cd..21280cb66550 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -91,6 +91,11 @@ static int octeon_console_debug_enabled(u32 console)
  */
 #define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000
 
+struct lio_trusted_vf_ctx {
+	struct completion complete;
+	int status;
+};
+
 struct liquidio_rx_ctl_context {
 	int octeon_id;
 
@@ -3265,10 +3270,120 @@ static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
 	ether_addr_copy(&ivi->mac[0], macaddr);
 	ivi->vlan = oct->sriov_info.vf_vlantci[vfidx] & VLAN_VID_MASK;
 	ivi->qos = oct->sriov_info.vf_vlantci[vfidx] >> VLAN_PRIO_SHIFT;
+	if (oct->sriov_info.trusted_vf.active &&
+	    oct->sriov_info.trusted_vf.id == vfidx)
+		ivi->trusted = true;
+	else
+		ivi->trusted = false;
 	ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
 	return 0;
 }
 
+static void trusted_vf_callback(struct octeon_device *oct_dev,
+				u32 status, void *ptr)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+	struct lio_trusted_vf_ctx *ctx;
+
+	ctx = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+	ctx->status = status;
+
+	complete(&ctx->complete);
+}
+
+static int liquidio_send_vf_trust_cmd(struct lio *lio, int vfidx, bool trusted)
+{
+	struct octeon_device *oct = lio->oct_dev;
+	struct lio_trusted_vf_ctx *ctx;
+	struct octeon_soft_command *sc;
+	int ctx_size, retval;
+
+	ctx_size = sizeof(struct lio_trusted_vf_ctx);
+	sc = octeon_alloc_soft_command(oct, 0, 0, ctx_size);
+
+	ctx  = (struct lio_trusted_vf_ctx *)sc->ctxptr;
+	init_completion(&ctx->complete);
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	/* vfidx is 0 based, but vf_num (param1) is 1 based */
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_SET_TRUSTED_VF, 0, vfidx + 1,
+				    trusted);
+
+	sc->callback = trusted_vf_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 1000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		retval = -1;
+	} else {
+		/* Wait for response or timeout */
+		if (wait_for_completion_timeout(&ctx->complete,
+						msecs_to_jiffies(2000)))
+			retval = ctx->status;
+		else
+			retval = -1;
+	}
+
+	octeon_free_soft_command(oct, sc);
+
+	return retval;
+}
+
+static int liquidio_set_vf_trust(struct net_device *netdev, int vfidx,
+				 bool setting)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if (strcmp(oct->fw_info.liquidio_firmware_version, "1.7.1") < 0) {
+		/* trusted vf is not supported by firmware older than 1.7.1 */
+		return -EOPNOTSUPP;
+	}
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced) {
+		netif_info(lio, drv, lio->netdev, "Invalid vfidx %d\n", vfidx);
+		return -EINVAL;
+	}
+
+	if (setting) {
+		/* Set */
+
+		if (oct->sriov_info.trusted_vf.active &&
+		    oct->sriov_info.trusted_vf.id == vfidx)
+			return 0;
+
+		if (oct->sriov_info.trusted_vf.active) {
+			netif_info(lio, drv, lio->netdev, "More than one trusted VF is not allowed\n");
+			return -EPERM;
+		}
+	} else {
+		/* Clear */
+
+		if (!oct->sriov_info.trusted_vf.active)
+			return 0;
+	}
+
+	if (!liquidio_send_vf_trust_cmd(lio, vfidx, setting)) {
+		if (setting) {
+			oct->sriov_info.trusted_vf.id = vfidx;
+			oct->sriov_info.trusted_vf.active = true;
+		} else {
+			oct->sriov_info.trusted_vf.active = false;
+		}
+
+		netif_info(lio, drv, lio->netdev, "VF %u is %strusted\n", vfidx,
+			   setting ? "" : "not ");
+	} else {
+		netif_info(lio, drv, lio->netdev, "Failed to set VF trusted\n");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
 				      int linkstate)
 {
@@ -3399,6 +3514,7 @@ static const struct net_device_ops lionetdevops = {
 	.ndo_set_vf_mac		= liquidio_set_vf_mac,
 	.ndo_set_vf_vlan	= liquidio_set_vf_vlan,
 	.ndo_get_vf_config	= liquidio_get_vf_config,
+	.ndo_set_vf_trust	= liquidio_set_vf_trust,
 	.ndo_set_vf_link_state  = liquidio_set_vf_link_state,
 };
 
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index ecc16824fc1b..82a783db5baf 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -84,6 +84,7 @@ enum octeon_tag_type {
 #define OPCODE_NIC_IF_CFG              0x09
 #define OPCODE_NIC_VF_DRV_NOTICE       0x0A
 #define OPCODE_NIC_INTRMOD_PARAMS      0x0B
+#define OPCODE_NIC_SET_TRUSTED_VF	0x13
 #define OPCODE_NIC_SYNC_OCTEON_TIME	0x14
 #define VF_DRV_LOADED                  1
 #define VF_DRV_REMOVED                -1
@@ -918,6 +919,12 @@ union oct_nic_if_cfg {
 	} s;
 };
 
+struct lio_trusted_vf {
+	uint64_t active: 1;
+	uint64_t id : 8;
+	uint64_t reserved: 55;
+};
+
 struct lio_time {
 	s64 sec;   /* seconds */
 	s64 nsec;  /* nanoseconds */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 63b0c758a0a6..91937cc5c1d7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -370,6 +370,8 @@ struct octeon_sriov_info {
 
 	u32	sriov_enabled;
 
+	struct lio_trusted_vf	trusted_vf;
+
 	/*lookup table that maps DPI ring number to VF pci_dev struct pointer*/
 	struct pci_dev *dpiring_to_vfpcidev_lut[MAX_POSSIBLE_VFS];
 

From 76d3e153d0d16032ab68e3b698c4df05acef48c5 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 12:45:02 +0300
Subject: [PATCH 1100/1678] net: Revert "ipv4: get rid of ip_ra_lock"

This reverts commit ba3f571d5dde. The commit was made
after 1215e51edad1 "ipv4: fix a deadlock in ip_ra_control",
and killed ip_ra_lock, which became useless after rtnl_lock()
made used to destroy every raw ipv4 socket. This scales
very bad, and next patch in series reverts 1215e51edad1.
ip_ra_lock will be used again.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 74c962b9b09c..be7c3b71914d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -334,6 +334,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
    sent to multicast group to reach destination designated router.
  */
 struct ip_ra_chain __rcu *ip_ra_chain;
+static DEFINE_SPINLOCK(ip_ra_lock);
 
 
 static void ip_ra_destroy_rcu(struct rcu_head *head)
@@ -355,17 +356,21 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
+	spin_lock_bh(&ip_ra_lock);
 	for (rap = &ip_ra_chain;
-	     (ra = rtnl_dereference(*rap)) != NULL;
+	     (ra = rcu_dereference_protected(*rap,
+			lockdep_is_held(&ip_ra_lock))) != NULL;
 	     rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
+				spin_unlock_bh(&ip_ra_lock);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
 			/* dont let ip_call_ra_chain() use sk again */
 			ra->sk = NULL;
 			RCU_INIT_POINTER(*rap, ra->next);
+			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
@@ -379,14 +384,17 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 			return 0;
 		}
 	}
-	if (!new_ra)
+	if (!new_ra) {
+		spin_unlock_bh(&ip_ra_lock);
 		return -ENOBUFS;
+	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	RCU_INIT_POINTER(new_ra->next, ra);
 	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
+	spin_unlock_bh(&ip_ra_lock);
 
 	return 0;
 }

From 0526947f9dd019da8d550ed20177585a84b3460e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 12:45:12 +0300
Subject: [PATCH 1101/1678] net: Move IP_ROUTER_ALERT out of lock_sock(sk)

ip_ra_control() does not need sk_lock. Who are the another
users of ip_ra_chain? ip_mroute_setsockopt() doesn't take
sk_lock, while parallel IP_ROUTER_ALERT syscalls are
synchronized by ip_ra_lock. So, we may move this command
out of sk_lock.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index be7c3b71914d..dcbf6afe27e7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -647,6 +647,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 
 	/* If optlen==0, it is equivalent to val == 0 */
 
+	if (optname == IP_ROUTER_ALERT)
+		return ip_ra_control(sk, val ? 1 : 0, NULL);
 	if (ip_mroute_opt(optname))
 		return ip_mroute_setsockopt(sk, optname, optval, optlen);
 
@@ -1157,9 +1159,6 @@ mc_msf_out:
 			goto e_inval;
 		inet->mc_all = val;
 		break;
-	case IP_ROUTER_ALERT:
-		err = ip_ra_control(sk, val ? 1 : 0, NULL);
-		break;
 
 	case IP_FREEBIND:
 		if (optlen < 1)

From 128aaa98ad1422eacc4c74879840cb3e579cd934 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 12:45:22 +0300
Subject: [PATCH 1102/1678] net: Revert "ipv4: fix a deadlock in ip_ra_control"

This reverts commit 1215e51edad1.
Since raw_close() is used on every RAW socket destruction,
the changes made by 1215e51edad1 scale sadly. This clearly
seen on endless unshare(CLONE_NEWNET) test, and cleanup_net()
kwork spends a lot of time waiting for rtnl_lock() introduced
by this commit.

Previous patch moved IP_ROUTER_ALERT out of rtnl_lock(),
so we revert this patch.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c |  1 -
 net/ipv4/ipmr.c        | 11 +++++++++--
 net/ipv4/raw.c         |  2 --
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index dcbf6afe27e7..bf5f44b27b7e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -594,7 +594,6 @@ static bool setsockopt_needs_rtnl(int optname)
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_UNBLOCK_SOURCE:
-	case IP_ROUTER_ALERT:
 		return true;
 	}
 	return false;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d752a70855d8..f6be5db16da2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1399,7 +1399,7 @@ static void mrtsock_destruct(struct sock *sk)
 	struct net *net = sock_net(sk);
 	struct mr_table *mrt;
 
-	ASSERT_RTNL();
+	rtnl_lock();
 	ipmr_for_each_table(mrt, net) {
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1411,6 +1411,7 @@ static void mrtsock_destruct(struct sock *sk)
 			mroute_clean_tables(mrt, false);
 		}
 	}
+	rtnl_unlock();
 }
 
 /* Socket options and virtual interface manipulation. The whole
@@ -1475,8 +1476,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 		if (sk != rcu_access_pointer(mrt->mroute_sk)) {
 			ret = -EACCES;
 		} else {
+			/* We need to unlock here because mrtsock_destruct takes
+			 * care of rtnl itself and we can't change that due to
+			 * the IP_ROUTER_ALERT setsockopt which runs without it.
+			 */
+			rtnl_unlock();
 			ret = ip_ra_control(sk, 0, NULL);
-			goto out_unlock;
+			goto out;
 		}
 		break;
 	case MRT_ADD_VIF:
@@ -1588,6 +1594,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	}
 out_unlock:
 	rtnl_unlock();
+out:
 	return ret;
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 54648d20bf0f..720bef7da2f6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout)
 	/*
 	 * Raw sockets may have direct kernel references. Kill them.
 	 */
-	rtnl_lock();
 	ip_ra_control(sk, 0, NULL);
-	rtnl_unlock();
 
 	sk_common_release(sk);
 }

From 5796ef75ec7b6019eac88f66751d663d537a5cd3 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 12:45:32 +0300
Subject: [PATCH 1103/1678] net: Make ip_ra_chain per struct net

This is optimization, which makes ip_call_ra_chain()
iterate less sockets to find the sockets it's looking for.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h         | 13 +++++++++++--
 include/net/netns/ipv4.h |  1 +
 net/ipv4/ip_input.c      |  5 ++---
 net/ipv4/ip_sockglue.c   | 15 ++-------------
 4 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index fe63ba95d12b..d53b5a9eae34 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb)
 	return 0;
 }
 
+/* Special input handler for packets caught by router alert option.
+   They are selected only by protocol field, and then processed likely
+   local ones; but only if someone wants them! Otherwise, router
+   not running rsvpd will kill RSVP.
+
+   It is user level problem, what it will make with them.
+   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
+   but receiver should be enough clever f.e. to forward mtrace requests,
+   sent to multicast group to reach destination designated router.
+ */
+
 struct ip_ra_chain {
 	struct ip_ra_chain __rcu *next;
 	struct sock		*sk;
@@ -101,8 +112,6 @@ struct ip_ra_chain {
 	struct rcu_head		rcu;
 };
 
-extern struct ip_ra_chain __rcu *ip_ra_chain;
-
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
 #define IP_DF		0x4000		/* Flag: "Don't Fragment"	*/
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 382bfd7583cf..97d7ee6667c7 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -49,6 +49,7 @@ struct netns_ipv4 {
 #endif
 	struct ipv4_devconf	*devconf_all;
 	struct ipv4_devconf	*devconf_dflt;
+	struct ip_ra_chain __rcu *ra_chain;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 57fc13c6ab2b..7582713dd18f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 	struct net *net = dev_net(dev);
 
-	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
+	for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 		 */
 		if (sk && inet_sk(sk)->inet_num == protocol &&
 		    (!sk->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == dev->ifindex) &&
-		    net_eq(sock_net(sk), net)) {
+		     sk->sk_bound_dev_if == dev->ifindex)) {
 			if (ip_is_fragment(ip_hdr(skb))) {
 				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return true;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index bf5f44b27b7e..f36d35fe924b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -322,18 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 	return 0;
 }
 
-
-/* Special input handler for packets caught by router alert option.
-   They are selected only by protocol field, and then processed likely
-   local ones; but only if someone wants them! Otherwise, router
-   not running rsvpd will kill RSVP.
-
-   It is user level problem, what it will make with them.
-   I have no idea, how it will masquearde or NAT them (it is joke, joke :-)),
-   but receiver should be enough clever f.e. to forward mtrace requests,
-   sent to multicast group to reach destination designated router.
- */
-struct ip_ra_chain __rcu *ip_ra_chain;
 static DEFINE_SPINLOCK(ip_ra_lock);
 
 
@@ -350,6 +338,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 {
 	struct ip_ra_chain *ra, *new_ra;
 	struct ip_ra_chain __rcu **rap;
+	struct net *net = sock_net(sk);
 
 	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
 		return -EINVAL;
@@ -357,7 +346,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
 	spin_lock_bh(&ip_ra_lock);
-	for (rap = &ip_ra_chain;
+	for (rap = &net->ipv4.ra_chain;
 	     (ra = rcu_dereference_protected(*rap,
 			lockdep_is_held(&ip_ra_lock))) != NULL;
 	     rap = &ra->next) {

From d9ff3049739e349b5380b96226f9ad766741773d Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 12:45:40 +0300
Subject: [PATCH 1104/1678] net: Replace ip_ra_lock with per-net mutex

Since ra_chain is per-net, we may use per-net mutexes
to protect them in ip_ra_control(). This improves
scalability.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  1 +
 net/core/net_namespace.c |  1 +
 net/ipv4/ip_sockglue.c   | 15 ++++++---------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 97d7ee6667c7..8491bc9c86b1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -50,6 +50,7 @@ struct netns_ipv4 {
 	struct ipv4_devconf	*devconf_all;
 	struct ipv4_devconf	*devconf_dflt;
 	struct ip_ra_chain __rcu *ra_chain;
+	struct mutex		ra_mutex;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c340d5cfbdec..95ba2c53bd9a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -301,6 +301,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
 	spin_lock_init(&net->nsid_lock);
+	mutex_init(&net->ipv4.ra_mutex);
 
 	list_for_each_entry(ops, &pernet_list, list) {
 		error = ops_init(ops, net);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f36d35fe924b..5ad2d8ed3a3f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -322,9 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 	return 0;
 }
 
-static DEFINE_SPINLOCK(ip_ra_lock);
-
-
 static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
 	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
@@ -345,21 +342,21 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	spin_lock_bh(&ip_ra_lock);
+	mutex_lock(&net->ipv4.ra_mutex);
 	for (rap = &net->ipv4.ra_chain;
 	     (ra = rcu_dereference_protected(*rap,
-			lockdep_is_held(&ip_ra_lock))) != NULL;
+			lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
 	     rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
-				spin_unlock_bh(&ip_ra_lock);
+				mutex_unlock(&net->ipv4.ra_mutex);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
 			/* dont let ip_call_ra_chain() use sk again */
 			ra->sk = NULL;
 			RCU_INIT_POINTER(*rap, ra->next);
-			spin_unlock_bh(&ip_ra_lock);
+			mutex_unlock(&net->ipv4.ra_mutex);
 
 			if (ra->destructor)
 				ra->destructor(sk);
@@ -374,7 +371,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 		}
 	}
 	if (!new_ra) {
-		spin_unlock_bh(&ip_ra_lock);
+		mutex_unlock(&net->ipv4.ra_mutex);
 		return -ENOBUFS;
 	}
 	new_ra->sk = sk;
@@ -383,7 +380,7 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 	RCU_INIT_POINTER(new_ra->next, ra);
 	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
-	spin_unlock_bh(&ip_ra_lock);
+	mutex_unlock(&net->ipv4.ra_mutex);
 
 	return 0;
 }

From 6d4c3981a8d815466de081138f2e31e9d044c669 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:52 +0000
Subject: [PATCH 1105/1678] net: hns3: Changes to make enet watchdog timeout
 func common for PF/VF

HNS3 drivers enet layer, used for the ring management and stack
interaction, is common to both VF and PF. PF already supports reset
functionality to handle the network stack watchdog timeout trigger
but the existing code is not generic enough to be used to support VF
reset as well.
This patch does following:
1. Makes the existing watchdog timeout handler in enet layer generic
   i.e. suitable for both VF and PF and
2. Introduces the new reset event handler for the VF code.
3. Changes existing reset event handler of PF code to initialize the
   reset level

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  7 ++--
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 30 ++++------------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  2 --
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 36 ++++++++++---------
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 14 ++++++++
 5 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9daa88d9041e..56f9e650d964 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -118,6 +118,7 @@ enum hnae3_reset_notify_type {
 };
 
 enum hnae3_reset_type {
+	HNAE3_VF_RESET,
 	HNAE3_FUNC_RESET,
 	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
@@ -400,8 +401,7 @@ struct hnae3_ae_ops {
 	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
 				  u16 vlan, u8 qos, __be16 proto);
 	int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
-	void (*reset_event)(struct hnae3_handle *handle,
-			    enum hnae3_reset_type reset);
+	void (*reset_event)(struct hnae3_handle *handle);
 	void (*get_channels)(struct hnae3_handle *handle,
 			     struct ethtool_channels *ch);
 	void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
@@ -495,6 +495,9 @@ struct hnae3_handle {
 	struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
 	u64 flags; /* Indicate the capabilities for this handle*/
 
+	unsigned long last_reset_time;
+	enum hnae3_reset_type reset_level;
+
 	union {
 		struct net_device *netdev; /* first member */
 		struct hnae3_knic_private_info kinfo;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0b4a676999ca..40a3eb70629e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -320,7 +320,7 @@ static int hns3_nic_net_open(struct net_device *netdev)
 		return ret;
 	}
 
-	priv->last_reset_time = jiffies;
+	priv->ae_handle->last_reset_time = jiffies;
 	return 0;
 }
 
@@ -1543,7 +1543,6 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
 static void hns3_nic_net_timeout(struct net_device *ndev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
-	unsigned long last_reset_time = priv->last_reset_time;
 	struct hnae3_handle *h = priv->ae_handle;
 
 	if (!hns3_get_tx_timeo_queue_info(ndev))
@@ -1551,24 +1550,12 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
 
 	priv->tx_timeout_count++;
 
-	/* This timeout is far away enough from last timeout,
-	 * if timeout again,set the reset type to PF reset
-	 */
-	if (time_after(jiffies, (last_reset_time + 20 * HZ)))
-		priv->reset_level = HNAE3_FUNC_RESET;
-
-	/* Don't do any new action before the next timeout */
-	else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo)))
+	if (time_before(jiffies, (h->last_reset_time + ndev->watchdog_timeo)))
 		return;
 
-	priv->last_reset_time = jiffies;
-
+	/* request the reset */
 	if (h->ae_algo->ops->reset_event)
-		h->ae_algo->ops->reset_event(h, priv->reset_level);
-
-	priv->reset_level++;
-	if (priv->reset_level > HNAE3_GLOBAL_RESET)
-		priv->reset_level = HNAE3_GLOBAL_RESET;
+		h->ae_algo->ops->reset_event(h);
 }
 
 static const struct net_device_ops hns3_nic_netdev_ops = {
@@ -3122,8 +3109,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	priv->dev = &pdev->dev;
 	priv->netdev = netdev;
 	priv->ae_handle = handle;
-	priv->last_reset_time = jiffies;
-	priv->reset_level = HNAE3_FUNC_RESET;
+	priv->ae_handle->reset_level = HNAE3_NONE_RESET;
+	priv->ae_handle->last_reset_time = jiffies;
 	priv->tx_timeout_count = 0;
 
 	handle->kinfo.netdev = netdev;
@@ -3355,7 +3342,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
-	struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
 	int ret = 0;
 
 	if (netif_running(kinfo->netdev)) {
@@ -3365,8 +3351,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle)
 				   "hns net up fail, ret=%d!\n", ret);
 			return ret;
 		}
-
-		priv->last_reset_time = jiffies;
+		handle->last_reset_time = jiffies;
 	}
 
 	return ret;
@@ -3378,7 +3363,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
-	priv->reset_level = 1;
 	hns3_init_mac_addr(netdev);
 	hns3_nic_set_rx_mode(netdev);
 	hns3_recover_hw_addr(netdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 39daa01f08d5..9e4cfbbf8dcd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -532,8 +532,6 @@ struct hns3_nic_priv {
 	/* The most recently read link state */
 	int link;
 	u64 tx_timeout_count;
-	enum hnae3_reset_type reset_level;
-	unsigned long last_reset_time;
 
 	unsigned long state;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 31e90b588e92..a3e00da3dff0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2845,27 +2845,31 @@ static void hclge_reset(struct hclge_dev *hdev)
 	hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
-static void hclge_reset_event(struct hnae3_handle *handle,
-			      enum hnae3_reset_type reset)
+static void hclge_reset_event(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 
-	dev_info(&hdev->pdev->dev,
-		 "Receive reset event , reset_type is %d", reset);
+	/* check if this is a new reset request and we are not here just because
+	 * last reset attempt did not succeed and watchdog hit us again. We will
+	 * know this if last reset request did not occur very recently (watchdog
+	 * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
+	 * In case of new request we reset the "reset level" to PF reset.
+	 */
+	if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
+		handle->reset_level = HNAE3_FUNC_RESET;
 
-	switch (reset) {
-	case HNAE3_FUNC_RESET:
-	case HNAE3_CORE_RESET:
-	case HNAE3_GLOBAL_RESET:
-		/* request reset & schedule reset task */
-		set_bit(reset, &hdev->reset_request);
-		hclge_reset_task_schedule(hdev);
-		break;
-	default:
-		dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset);
-		break;
-	}
+	dev_info(&hdev->pdev->dev, "received reset event , reset type is %d",
+		 handle->reset_level);
+
+	/* request reset & schedule reset task */
+	set_bit(handle->reset_level, &hdev->reset_request);
+	hclge_reset_task_schedule(hdev);
+
+	if (handle->reset_level < HNAE3_GLOBAL_RESET)
+		handle->reset_level++;
+
+	handle->last_reset_time = jiffies;
 }
 
 static void hclge_reset_subtask(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 906dfa3d0fc6..6c3881d5dbf2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -832,6 +832,19 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 			     2, true, NULL, 0);
 }
 
+static void hclgevf_reset_event(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
+
+	handle->reset_level = HNAE3_VF_RESET;
+
+	/* request VF reset here. Code added later */
+
+	handle->last_reset_time = jiffies;
+}
+
 static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -1526,6 +1539,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.get_tc_size = hclgevf_get_tc_size,
 	.get_fw_version = hclgevf_get_fw_version,
 	.set_vlan_filter = hclgevf_set_vlan_filter,
+	.reset_event = hclgevf_reset_event,
 	.get_channels = hclgevf_get_channels,
 	.get_tqps_and_rss_info = hclgevf_get_tqps_and_rss_info,
 	.get_status = hclgevf_get_status,

From 35a1e50343bdae07bffe911e2d9f7e825fafe4f1 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:53 +0000
Subject: [PATCH 1106/1678] net: hns3: Add VF Reset Service Task to support
 event handling

VF reset would involve handling of different reset related events
from the stack, physical function, mailbox etc. Reset service task
would be used in servicing such reset event requests and later
handling the hardware completions waits and initiating the stack
resets.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 31 +++++++++++++++++++
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |  4 +++
 2 files changed, 35 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 6c3881d5dbf2..cdb6e7ae3b70 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -867,6 +867,15 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
 	hdev->num_msi_used += 1;
 }
 
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
+{
+	if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+		set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+		schedule_work(&hdev->rst_service_task);
+	}
+}
+
 static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
 {
 	if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
@@ -889,6 +898,24 @@ static void hclgevf_service_timer(struct timer_list *t)
 	hclgevf_task_schedule(hdev);
 }
 
+static void hclgevf_reset_service_task(struct work_struct *work)
+{
+	struct hclgevf_dev *hdev =
+		container_of(work, struct hclgevf_dev, rst_service_task);
+
+	if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+		return;
+
+	clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+
+	/* body of the reset service task will constitute of hclge device
+	 * reset state handling. This code shall be added subsequently in
+	 * next patches.
+	 */
+
+	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+}
+
 static void hclgevf_mailbox_service_task(struct work_struct *work)
 {
 	struct hclgevf_dev *hdev;
@@ -1097,6 +1124,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
 	INIT_WORK(&hdev->service_task, hclgevf_service_task);
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 
+	INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+
 	mutex_init(&hdev->mbx_resp.mbx_mutex);
 
 	/* bring the device down */
@@ -1113,6 +1142,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
 		cancel_work_sync(&hdev->service_task);
 	if (hdev->mbx_service_task.func)
 		cancel_work_sync(&hdev->mbx_service_task);
+	if (hdev->rst_service_task.func)
+		cancel_work_sync(&hdev->rst_service_task);
 
 	mutex_destroy(&hdev->mbx_resp.mbx_mutex);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 0eaea063e7c5..8b5fa670ed18 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -52,6 +52,8 @@ enum hclgevf_states {
 	HCLGEVF_STATE_DISABLED,
 	/* task states */
 	HCLGEVF_STATE_SERVICE_SCHED,
+	HCLGEVF_STATE_RST_SERVICE_SCHED,
+	HCLGEVF_STATE_RST_HANDLING,
 	HCLGEVF_STATE_MBX_SERVICE_SCHED,
 	HCLGEVF_STATE_MBX_HANDLING,
 };
@@ -146,6 +148,7 @@ struct hclgevf_dev {
 
 	struct timer_list service_timer;
 	struct work_struct service_task;
+	struct work_struct rst_service_task;
 	struct work_struct mbx_service_task;
 
 	struct hclgevf_tqp *htqp;
@@ -165,4 +168,5 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
 void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
 				 u8 duplex);
+void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
 #endif

From 436667d2e1793995267915d2562f7b14f26c1f63 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:54 +0000
Subject: [PATCH 1107/1678] net: hns3: Add VF Reset device state and its
 handling

This introduces the hclge device reset states of "requested" and
"pending" and also its handling in context to Reset Service Task.

Device gets into requested state because of any VF reset request
asserted from upper layers, for example due to watchdog timeout
expiration. Requested state would result in eventually forwarding
the VF reset request to PF which would actually reset the VF.

Device will get into pending state if:
1. VF receives the acknowledgement from PF for the VF reset
   request it originally sent to PF.
2. Reset Service Task detects that after asserting VF reset for
   certain times the data-path is not working and device then
   decides to assert full VF reset(this means also resetting the
   PCIe interface).
3. PF intimates the VF that it has undergone reset.
Pending state would result in VF to poll for hardware reset
completion status and then resetting the stack/enet layer, which
in turn means reinitializing the ring management/enet layer.

Note: we would be adding support of 3. later as a separate patch.
This decision should not affect VF reset as its event handling
is generic in nature.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  1 +
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 67 +++++++++++++++++--
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |  5 ++
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 56f9e650d964..37ec1b3286c6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -119,6 +119,7 @@ enum hnae3_reset_notify_type {
 
 enum hnae3_reset_type {
 	HNAE3_VF_RESET,
+	HNAE3_VF_FULL_RESET,
 	HNAE3_FUNC_RESET,
 	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index cdb6e7ae3b70..0d204e2f6fcd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -840,7 +840,9 @@ static void hclgevf_reset_event(struct hnae3_handle *handle)
 
 	handle->reset_level = HNAE3_VF_RESET;
 
-	/* request VF reset here. Code added later */
+	/* reset of this VF requested */
+	set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
+	hclgevf_reset_task_schedule(hdev);
 
 	handle->last_reset_time = jiffies;
 }
@@ -889,6 +891,12 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
 		schedule_work(&hdev->service_task);
 }
 
+static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
+{
+	if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
+		hclgevf_reset_task_schedule(hdev);
+}
+
 static void hclgevf_service_timer(struct timer_list *t)
 {
 	struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
@@ -908,10 +916,57 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 
 	clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
 
-	/* body of the reset service task will constitute of hclge device
-	 * reset state handling. This code shall be added subsequently in
-	 * next patches.
-	 */
+	if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
+			       &hdev->reset_state)) {
+		/* PF has initmated that it is about to reset the hardware.
+		 * We now have to poll & check if harware has actually completed
+		 * the reset sequence. On hardware reset completion, VF needs to
+		 * reset the client and ae device.
+		 */
+		hdev->reset_attempts = 0;
+
+		/* code to check/wait for hardware reset completion and the
+		 * further initiating software stack reset would be added here
+		 */
+
+	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
+				      &hdev->reset_state)) {
+		/* we could be here when either of below happens:
+		 * 1. reset was initiated due to watchdog timeout due to
+		 *    a. IMP was earlier reset and our TX got choked down and
+		 *       which resulted in watchdog reacting and inducing VF
+		 *       reset. This also means our cmdq would be unreliable.
+		 *    b. problem in TX due to other lower layer(example link
+		 *       layer not functioning properly etc.)
+		 * 2. VF reset might have been initiated due to some config
+		 *    change.
+		 *
+		 * NOTE: Theres no clear way to detect above cases than to react
+		 * to the response of PF for this reset request. PF will ack the
+		 * 1b and 2. cases but we will not get any intimation about 1a
+		 * from PF as cmdq would be in unreliable state i.e. mailbox
+		 * communication between PF and VF would be broken.
+		 */
+
+		/* if we are never geting into pending state it means either:
+		 * 1. PF is not receiving our request which could be due to IMP
+		 *    reset
+		 * 2. PF is screwed
+		 * We cannot do much for 2. but to check first we can try reset
+		 * our PCIe + stack and see if it alleviates the problem.
+		 */
+		if (hdev->reset_attempts > 3) {
+			/* prepare for full reset of stack + pcie interface */
+			hdev->nic.reset_level = HNAE3_VF_FULL_RESET;
+
+			/* "defer" schedule the reset task again */
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+		} else {
+			hdev->reset_attempts++;
+
+			/* request PF for resetting this VF via mailbox */
+		}
+	}
 
 	clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
 }
@@ -943,6 +998,8 @@ static void hclgevf_service_task(struct work_struct *work)
 	 */
 	hclgevf_request_link_info(hdev);
 
+	hclgevf_deferred_task_schedule(hdev);
+
 	clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 8b5fa670ed18..1c9cf87a73c0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -124,6 +124,11 @@ struct hclgevf_dev {
 	struct hclgevf_rss_cfg rss_cfg;
 	unsigned long state;
 
+#define HCLGEVF_RESET_REQUESTED		0
+#define HCLGEVF_RESET_PENDING		1
+	unsigned long reset_state;	/* requested, pending */
+	u32 reset_attempts;
+
 	u32 fw_version;
 	u16 num_tqps;		/* num task queue pairs of this PF */
 

From a8dedb65926005e99ab23716b1c77586de83c4f4 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:55 +0000
Subject: [PATCH 1108/1678] net: hns3: Add support to request VF Reset to PF

VF driver depends upon PF to eventually reset the hardware. This
request is made using the mailbox command. This patch adds the
required function to acheive above.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 0d204e2f6fcd..b64831169253 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -832,6 +832,20 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 			     2, true, NULL, 0);
 }
 
+static int hclgevf_do_reset(struct hclgevf_dev *hdev)
+{
+	int status;
+	u8 respmsg;
+
+	status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
+				      0, false, &respmsg, sizeof(u8));
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"VF reset request to PF failed(=%d)\n", status);
+
+	return status;
+}
+
 static void hclgevf_reset_event(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
@@ -910,6 +924,7 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 {
 	struct hclgevf_dev *hdev =
 		container_of(work, struct hclgevf_dev, rst_service_task);
+	int ret;
 
 	if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
 		return;
@@ -965,6 +980,10 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 			hdev->reset_attempts++;
 
 			/* request PF for resetting this VF via mailbox */
+			ret = hclgevf_do_reset(hdev);
+			if (ret)
+				dev_warn(&hdev->pdev->dev,
+					 "VF rst fail, stack will call\n");
 		}
 	}
 

From 6988eb2a9b7772d57b1d09bdf769db4c697869ea Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:56 +0000
Subject: [PATCH 1109/1678] net: hns3: Add support to reset the enet/ring mgmt
 layer

After VF driver knows that hardware reset has been performed
successfully, it should proceed ahead and reset the enet layer.
This primarily consists of bringing down interface, clearing
TX/RX rings, disassociating vectors from ring etc.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 103 +++++++++++++++++-
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |   3 +
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index b64831169253..bd45b1117446 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2,6 +2,7 @@
 // Copyright (c) 2016-2017 Hisilicon Limited.
 
 #include <linux/etherdevice.h>
+#include <net/rtnetlink.h>
 #include "hclgevf_cmd.h"
 #include "hclgevf_main.h"
 #include "hclge_mbx.h"
@@ -832,6 +833,101 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 			     2, true, NULL, 0);
 }
 
+static int hclgevf_notify_client(struct hclgevf_dev *hdev,
+				 enum hnae3_reset_notify_type type)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	struct hnae3_handle *handle = &hdev->nic;
+
+	if (!client->ops->reset_notify)
+		return -EOPNOTSUPP;
+
+	return client->ops->reset_notify(handle, type);
+}
+
+static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_RESET_WAIT_MS	500
+#define HCLGEVF_RESET_WAIT_CNT	20
+	u32 val, cnt = 0;
+
+	/* wait to check the hardware reset completion status */
+	val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+	while (hnae_get_bit(val, HCLGEVF_FUN_RST_ING_B) &&
+			    (cnt < HCLGEVF_RESET_WAIT_CNT)) {
+		msleep(HCLGEVF_RESET_WAIT_MS);
+		val = hclgevf_read_dev(&hdev->hw, HCLGEVF_FUN_RST_ING);
+		cnt++;
+	}
+
+	/* hardware completion status should be available by this time */
+	if (cnt >= HCLGEVF_RESET_WAIT_CNT) {
+		dev_warn(&hdev->pdev->dev,
+			 "could'nt get reset done status from h/w, timeout!\n");
+		return -EBUSY;
+	}
+
+	/* we will wait a bit more to let reset of the stack to complete. This
+	 * might happen in case reset assertion was made by PF. Yes, this also
+	 * means we might end up waiting bit more even for VF reset.
+	 */
+	msleep(5000);
+
+	return 0;
+}
+
+static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
+{
+	/* uninitialize the nic client */
+	hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+	/* re-initialize the hclge device - add code here */
+
+	/* bring up the nic client again */
+	hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
+
+	return 0;
+}
+
+static int hclgevf_reset(struct hclgevf_dev *hdev)
+{
+	int ret;
+
+	rtnl_lock();
+
+	/* bring down the nic to stop any ongoing TX/RX */
+	hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
+
+	/* check if VF could successfully fetch the hardware reset completion
+	 * status from the hardware
+	 */
+	ret = hclgevf_reset_wait(hdev);
+	if (ret) {
+		/* can't do much in this situation, will disable VF */
+		dev_err(&hdev->pdev->dev,
+			"VF failed(=%d) to fetch H/W reset completion status\n",
+			ret);
+
+		dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+		hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
+
+		rtnl_unlock();
+		return ret;
+	}
+
+	/* now, re-initialize the nic client and ae device*/
+	ret = hclgevf_reset_stack(hdev);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
+
+	/* bring up the nic to enable TX/RX again */
+	hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
+
+	rtnl_unlock();
+
+	return ret;
+}
+
 static int hclgevf_do_reset(struct hclgevf_dev *hdev)
 {
 	int status;
@@ -940,10 +1036,9 @@ static void hclgevf_reset_service_task(struct work_struct *work)
 		 */
 		hdev->reset_attempts = 0;
 
-		/* code to check/wait for hardware reset completion and the
-		 * further initiating software stack reset would be added here
-		 */
-
+		ret = hclgevf_reset(hdev);
+		if (ret)
+			dev_err(&hdev->pdev->dev, "VF stack reset failed.\n");
 	} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
 				      &hdev->reset_state)) {
 		/* we could be here when either of below happens:
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 1c9cf87a73c0..afdb15d54f03 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -34,6 +34,9 @@
 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B	1
 
 #define HCLGEVF_TQP_RESET_TRY_TIMES	10
+/* Reset related Registers */
+#define HCLGEVF_FUN_RST_ING		0x20C00
+#define HCLGEVF_FUN_RST_ING_B		0
 
 #define HCLGEVF_RSS_IND_TBL_SIZE		512
 #define HCLGEVF_RSS_SET_BITMAP_MSK	0xffff

From 7a01c89723301c343f75862098e4fa0885b75b3b Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:57 +0000
Subject: [PATCH 1110/1678] net: hns3: Add support to re-initialize the hclge
 device

After the hardware reset we should re-fetch the configuration from
PF like queue info and tc info. This might have impact on allocations
made like that of TQPs. Hence, we should release all such allocations
and re-allocate fresh according to new fetched configuration after
reset.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 106 +++++++++++++++---
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |  14 +++
 2 files changed, 106 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index bd45b1117446..6dd75614cc67 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -10,6 +10,8 @@
 
 #define HCLGEVF_NAME	"hclgevf"
 
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev);
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev);
 static struct hnae3_ae_algo ae_algovf;
 
 static const struct pci_device_id ae_algovf_pci_tbl[] = {
@@ -209,6 +211,12 @@ static int hclgevf_alloc_tqps(struct hclgevf_dev *hdev)
 	struct hclgevf_tqp *tqp;
 	int i;
 
+	/* if this is on going reset then we need to re-allocate the TPQs
+	 * since we cannot assume we would get same number of TPQs back from PF
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		devm_kfree(&hdev->pdev->dev, hdev->htqp);
+
 	hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
 				  sizeof(struct hclgevf_tqp), GFP_KERNEL);
 	if (!hdev->htqp)
@@ -252,6 +260,12 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
 	new_tqps = kinfo->rss_size * kinfo->num_tc;
 	kinfo->num_tqps = min(new_tqps, hdev->num_tqps);
 
+	/* if this is on going reset then we need to re-allocate the hnae queues
+	 * as well since number of TPQs from PF might have changed.
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		devm_kfree(&hdev->pdev->dev, kinfo->tqp);
+
 	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
 				  sizeof(struct hnae3_queue *), GFP_KERNEL);
 	if (!kinfo->tqp)
@@ -878,10 +892,18 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
 
 static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
 {
+	int ret;
+
 	/* uninitialize the nic client */
 	hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
 
-	/* re-initialize the hclge device - add code here */
+	/* re-initialize the hclge device */
+	ret = hclgevf_init_hdev(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"hclge device re-init failed, VF is disabled!\n");
+		return ret;
+	}
 
 	/* bring up the nic client again */
 	hclgevf_notify_client(hdev, HNAE3_INIT_CLIENT);
@@ -1179,6 +1201,22 @@ static int hclgevf_configure(struct hclgevf_dev *hdev)
 	return hclgevf_get_tc_info(hdev);
 }
 
+static int hclgevf_alloc_hdev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return -ENOMEM;
+
+	hdev->pdev = pdev;
+	hdev->ae_dev = ae_dev;
+	ae_dev->priv = hdev;
+
+	return 0;
+}
+
 static int hclgevf_init_roce_base_info(struct hclgevf_dev *hdev)
 {
 	struct hnae3_handle *roce = &hdev->roce;
@@ -1284,6 +1322,10 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle)
 
 static void hclgevf_state_init(struct hclgevf_dev *hdev)
 {
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return;
+
 	/* setup tasks for the MBX */
 	INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
@@ -1325,6 +1367,10 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
 	int vectors;
 	int i;
 
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	hdev->num_msi = HCLGEVF_MAX_VF_VECTOR_NUM;
 
 	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
@@ -1375,6 +1421,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
 {
 	int ret = 0;
 
+	/* if this is on going reset then skip this initialization */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	hclgevf_get_misc_vector(hdev);
 
 	ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
@@ -1485,6 +1535,14 @@ static int hclgevf_pci_init(struct hclgevf_dev *hdev)
 	struct hclgevf_hw *hw;
 	int ret;
 
+	/* check if we need to skip initialization of pci. This will happen if
+	 * device is undergoing VF reset. Otherwise, we would need to
+	 * re-initialize pci interface again i.e. when device is not going
+	 * through *any* reset or actually undergoing full reset.
+	 */
+	if (hclgevf_dev_ongoing_reset(hdev))
+		return 0;
+
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable PCI device\n");
@@ -1536,19 +1594,16 @@ static void hclgevf_pci_uninit(struct hclgevf_dev *hdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
-static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 {
-	struct pci_dev *pdev = ae_dev->pdev;
-	struct hclgevf_dev *hdev;
+	struct pci_dev *pdev = hdev->pdev;
 	int ret;
 
-	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
-
-	hdev->pdev = pdev;
-	hdev->ae_dev = ae_dev;
-	ae_dev->priv = hdev;
+	/* check if device is on-going full reset(i.e. pcie as well) */
+	if (hclgevf_dev_ongoing_full_reset(hdev)) {
+		dev_warn(&pdev->dev, "device is going full reset\n");
+		hclgevf_uninit_hdev(hdev);
+	}
 
 	ret = hclgevf_pci_init(hdev);
 	if (ret) {
@@ -1633,15 +1688,38 @@ err_irq_init:
 	return ret;
 }
 
-static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 {
-	struct hclgevf_dev *hdev = ae_dev->priv;
-
 	hclgevf_cmd_uninit(hdev);
 	hclgevf_misc_irq_uninit(hdev);
 	hclgevf_state_uninit(hdev);
 	hclgevf_uninit_msi(hdev);
 	hclgevf_pci_uninit(hdev);
+}
+
+static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	int ret;
+
+	ret = hclgevf_alloc_hdev(ae_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "hclge device allocation failed\n");
+		return ret;
+	}
+
+	ret = hclgevf_init_hdev(ae_dev->priv);
+	if (ret)
+		dev_err(&pdev->dev, "hclge device initialization failed\n");
+
+	return ret;
+}
+
+static void hclgevf_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclgevf_dev *hdev = ae_dev->priv;
+
+	hclgevf_uninit_hdev(hdev);
 	ae_dev->priv = NULL;
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index afdb15d54f03..8cdc6027671f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -169,6 +169,20 @@ struct hclgevf_dev {
 	u32 flag;
 };
 
+static inline bool hclgevf_dev_ongoing_reset(struct hclgevf_dev *hdev)
+{
+	return (hdev &&
+		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+		(hdev->nic.reset_level == HNAE3_VF_RESET));
+}
+
+static inline bool hclgevf_dev_ongoing_full_reset(struct hclgevf_dev *hdev)
+{
+	return (hdev &&
+		(test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) &&
+		(hdev->nic.reset_level == HNAE3_VF_FULL_RESET));
+}
+
 int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
 			 const u8 *msg_data, u8 msg_len, bool need_resp,
 			 u8 *resp_data, u16 resp_len);

From 07a0556a3a735f57060c274c55e895682e4055e6 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:58 +0000
Subject: [PATCH 1111/1678] net: hns3: Changes to support ARQ(Asynchronous
 Receive Queue)

Current mailbox CRQ could consists of both synchronous and async
responses from the PF. Synchronous responses are time critical
and should be handed over to the waiting tasks/context as quickly
as possible otherwise timeout occurs.

Above problem gets accentuated if CRQ consists of even single
async message. Hence, it is important to have quick handling of
synchronous messages and maybe deferred handling of async messages
This patch introduces separate ARQ(async receive queues) for the
async messages. These messages are processed later with repsect
to mailbox task while synchronous messages still gets processed
in context to mailbox interrupt.

ARQ is important as VF reset introduces some new async messages
like MBX_ASSERTING_RESET which adds up to the presssure on the
responses for synchronousmessages and they timeout even more
quickly.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hclge_mbx.h   | 15 ++++
 .../hisilicon/hns3/hns3vf/hclgevf_cmd.c       |  6 ++
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 16 ++--
 .../hisilicon/hns3/hns3vf/hclgevf_main.h      |  5 ++
 .../hisilicon/hns3/hns3vf/hclgevf_mbx.c       | 83 +++++++++++++++++--
 5 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index e6e1d221b5c8..f3e90c29958c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -85,6 +85,21 @@ struct hclge_mbx_pf_to_vf_cmd {
 	u16 msg[8];
 };
 
+/* used by VF to store the received Async responses from PF */
+struct hclgevf_mbx_arq_ring {
+#define HCLGE_MBX_MAX_ARQ_MSG_SIZE	8
+#define HCLGE_MBX_MAX_ARQ_MSG_NUM	1024
+	struct hclgevf_dev *hdev;
+	u32 head;
+	u32 tail;
+	u32 count;
+	u16 msg_q[HCLGE_MBX_MAX_ARQ_MSG_NUM][HCLGE_MBX_MAX_ARQ_MSG_SIZE];
+};
+
 #define hclge_mbx_ring_ptr_move_crq(crq) \
 	(crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num)
+#define hclge_mbx_tail_ptr_move_arq(arq) \
+	(arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
+#define hclge_mbx_head_ptr_move_arq(arq) \
+		(arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE)
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 85985e731311..1bbfe131b596 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -315,6 +315,12 @@ int hclgevf_cmd_init(struct hclgevf_dev *hdev)
 		goto err_csq;
 	}
 
+	/* initialize the pointers of async rx queue of mailbox */
+	hdev->arq.hdev = hdev;
+	hdev->arq.head = 0;
+	hdev->arq.tail = 0;
+	hdev->arq.count = 0;
+
 	/* get firmware version */
 	ret = hclgevf_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 6dd75614cc67..2b8426412cc9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1010,10 +1010,13 @@ void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
 	}
 }
 
-static void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
 {
-	if (!test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+	if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
+	    !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
+		set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 		schedule_work(&hdev->mbx_service_task);
+	}
 }
 
 static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
@@ -1025,6 +1028,10 @@ static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
 
 static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
 {
+	/* if we have any pending mailbox event then schedule the mbx task */
+	if (hdev->mbx_event_pending)
+		hclgevf_mbx_task_schedule(hdev);
+
 	if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
 		hclgevf_reset_task_schedule(hdev);
 }
@@ -1118,7 +1125,7 @@ static void hclgevf_mailbox_service_task(struct work_struct *work)
 
 	clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
 
-	hclgevf_mbx_handler(hdev);
+	hclgevf_mbx_async_handler(hdev);
 
 	clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
 }
@@ -1178,8 +1185,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 	if (!hclgevf_check_event_cause(hdev, &clearval))
 		goto skip_sched;
 
-	/* schedule the VF mailbox service task, if not already scheduled */
-	hclgevf_mbx_task_schedule(hdev);
+	hclgevf_mbx_handler(hdev);
 
 	hclgevf_clear_event_cause(hdev, clearval);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 8cdc6027671f..a477a7c36bbd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -152,7 +152,9 @@ struct hclgevf_dev {
 	int *vector_irq;
 
 	bool accept_mta_mc; /* whether to accept mta filter multicast */
+	bool mbx_event_pending;
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
+	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
 	struct timer_list service_timer;
 	struct work_struct service_task;
@@ -187,8 +189,11 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode,
 			 const u8 *msg_data, u8 msg_len, bool need_resp,
 			 u8 *resp_data, u16 resp_len);
 void hclgevf_mbx_handler(struct hclgevf_dev *hdev);
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev);
+
 void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state);
 void hclgevf_update_speed_duplex(struct hclgevf_dev *hdev, u32 speed,
 				 u8 duplex);
 void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev);
+void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index a63ed3aa2c00..7687911d3eb8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -132,9 +132,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	struct hclge_mbx_pf_to_vf_cmd *req;
 	struct hclgevf_cmq_ring *crq;
 	struct hclgevf_desc *desc;
-	u16 link_status, flag;
-	u32 speed;
-	u8 duplex;
+	u16 *msg_q;
+	u16 flag;
 	u8 *temp;
 	int i;
 
@@ -146,6 +145,12 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_pf_to_vf_cmd *)desc->data;
 
+		/* synchronous messages are time critical and need preferential
+		 * treatment. Therefore, we need to acknowledge all the sync
+		 * responses as quickly as possible so that waiting tasks do not
+		 * timeout and simultaneously queue the async messages for later
+		 * prcessing in context of mailbox task i.e. the slow path.
+		 */
 		switch (req->msg[0]) {
 		case HCLGE_MBX_PF_VF_RESP:
 			if (resp->received_resp)
@@ -165,13 +170,30 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			}
 			break;
 		case HCLGE_MBX_LINK_STAT_CHANGE:
-			link_status = le16_to_cpu(req->msg[1]);
-			memcpy(&speed, &req->msg[2], sizeof(speed));
-			duplex = (u8)le16_to_cpu(req->msg[4]);
+			/* set this mbx event as pending. This is required as we
+			 * might loose interrupt event when mbx task is busy
+			 * handling. This shall be cleared when mbx task just
+			 * enters handling state.
+			 */
+			hdev->mbx_event_pending = true;
 
-			/* update upper layer with new link link status */
-			hclgevf_update_link_status(hdev, link_status);
-			hclgevf_update_speed_duplex(hdev, speed, duplex);
+			/* we will drop the async msg if we find ARQ as full
+			 * and continue with next message
+			 */
+			if (hdev->arq.count >= HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+				dev_warn(&hdev->pdev->dev,
+					 "Async Q full, dropping msg(%d)\n",
+					 req->msg[1]);
+				break;
+			}
+
+			/* tail the async message in arq */
+			msg_q = hdev->arq.msg_q[hdev->arq.tail];
+			memcpy(&msg_q[0], req->msg, HCLGE_MBX_MAX_ARQ_MSG_SIZE);
+			hclge_mbx_tail_ptr_move_arq(hdev->arq);
+			hdev->arq.count++;
+
+			hclgevf_mbx_task_schedule(hdev);
 
 			break;
 		default:
@@ -189,3 +211,46 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 	hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CRQ_HEAD_REG,
 			  crq->next_to_use);
 }
+
+void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
+{
+	u16 link_status;
+	u16 *msg_q;
+	u8 duplex;
+	u32 speed;
+	u32 tail;
+
+	/* we can safely clear it now as we are at start of the async message
+	 * processing
+	 */
+	hdev->mbx_event_pending = false;
+
+	tail = hdev->arq.tail;
+
+	/* process all the async queue messages */
+	while (tail != hdev->arq.head) {
+		msg_q = hdev->arq.msg_q[hdev->arq.head];
+
+		switch (msg_q[0]) {
+		case HCLGE_MBX_LINK_STAT_CHANGE:
+			link_status = le16_to_cpu(msg_q[1]);
+			memcpy(&speed, &msg_q[2], sizeof(speed));
+			duplex = (u8)le16_to_cpu(msg_q[4]);
+
+			/* update upper layer with new link link status */
+			hclgevf_update_link_status(hdev, link_status);
+			hclgevf_update_speed_duplex(hdev, speed, duplex);
+
+			break;
+		default:
+			dev_err(&hdev->pdev->dev,
+				"fetched unsupported(%d) message from arq\n",
+				msg_q[0]);
+			break;
+		}
+
+		hclge_mbx_head_ptr_move_arq(hdev->arq);
+		hdev->arq.count--;
+		msg_q = hdev->arq.msg_q[hdev->arq.head];
+	}
+}

From a15fa7d43b0b625f018c4f0c5856f9061a6d4c82 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:28:59 +0000
Subject: [PATCH 1112/1678] net: hns3: Add *Asserting Reset* mailbox message &
 handling in VF

Reset Asserting message is forwarded by PF to inform VF about
the hardware reset which is about to happen. This might be due
to the earlier VF reset request received by the PF or because PF
for any reason decides to undergo reset. This message results in
VF to go in pending state in which it polls the hardware to
complete the reset and then further resets/tears its own stack.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h      |  1 +
 .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index f3e90c29958c..519e2bd6aa60 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -11,6 +11,7 @@
 
 enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_RESET = 0x01,		/* (VF -> PF) assert reset */
+	HCLGE_MBX_ASSERTING_RESET,	/* (PF -> VF) PF is asserting reset*/
 	HCLGE_MBX_SET_UNICAST,		/* (VF -> PF) set UC addr */
 	HCLGE_MBX_SET_MULTICAST,	/* (VF -> PF) set MC addr */
 	HCLGE_MBX_SET_VLAN,		/* (VF -> PF) set VLAN */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 7687911d3eb8..a28618428338 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -170,6 +170,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			}
 			break;
 		case HCLGE_MBX_LINK_STAT_CHANGE:
+		case HCLGE_MBX_ASSERTING_RESET:
 			/* set this mbx event as pending. This is required as we
 			 * might loose interrupt event when mbx task is busy
 			 * handling. This shall be cleared when mbx task just
@@ -241,6 +242,17 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 			hclgevf_update_link_status(hdev, link_status);
 			hclgevf_update_speed_duplex(hdev, speed, duplex);
 
+			break;
+		case HCLGE_MBX_ASSERTING_RESET:
+			/* PF has asserted reset hence VF should go in pending
+			 * state and poll for the hardware reset status till it
+			 * has been completely reset. After this stack should
+			 * eventually be re-initialized.
+			 */
+			hdev->nic.reset_level = HNAE3_VF_RESET;
+			set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
+			hclgevf_reset_task_schedule(hdev);
+
 			break;
 		default:
 			dev_err(&hdev->pdev->dev,

From 2bfbd35d8ecd97a4a7f1db1754908b54542fa7aa Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Thu, 22 Mar 2018 14:29:00 +0000
Subject: [PATCH 1113/1678] net: hns3: Changes required in PF mailbox to
 support VF reset

PF needs to assert the VF reset when it receives the request to
reset from VF. After receiving request PF ackknowledges the
request by replying back MBX_ASSERTING_RESET message to VF.
VF then goes to pending state and wait for hardware to complete
the reset.

This patch contains code to handle the received VF message, inform
the VF of assertion and reset the VF using cmdq interface.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        |  2 +-
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  1 +
 .../hisilicon/hns3/hns3pf/hclge_mbx.c         | 42 +++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a3e00da3dff0..bede4117bad9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2749,7 +2749,7 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
 	return 0;
 }
 
-static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id)
 {
 	struct hclge_desc desc;
 	struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 8c14d10c88cb..0f4157e71282 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -657,4 +657,5 @@ void hclge_mbx_handler(struct hclge_dev *hdev);
 void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
+int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 949da0c993cf..39013334a613 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -79,6 +79,18 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 	return status;
 }
 
+int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+{
+	u8 msg_data[2];
+	u8 dest_vfid;
+
+	dest_vfid = (u8)vport->vport_id;
+
+	/* send this requested info to VF */
+	return hclge_send_mbx_msg(vport, msg_data, sizeof(u8),
+				  HCLGE_MBX_ASSERTING_RESET, dest_vfid);
+}
+
 static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head)
 {
 	struct hnae3_ring_chain_node *chain_tmp, *chain;
@@ -339,6 +351,33 @@ static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
 	hclge_gen_resp_to_vf(vport, mbx_req, 0, NULL, 0);
 }
 
+static void hclge_reset_vf(struct hclge_vport *vport,
+			   struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	dev_warn(&hdev->pdev->dev, "PF received VF reset request from VF %d!",
+		 mbx_req->mbx_src_vfid);
+
+	/* Acknowledge VF that PF is now about to assert the reset for the VF.
+	 * On receiving this message VF will get into pending state and will
+	 * start polling for the hardware reset completion status.
+	 */
+	ret = hclge_inform_reset_assert_to_vf(vport);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"PF fail(%d) to inform VF(%d)of reset, reset failed!\n",
+			ret, vport->vport_id);
+		return;
+	}
+
+	dev_warn(&hdev->pdev->dev, "PF is now resetting VF %d.\n",
+		 mbx_req->mbx_src_vfid);
+	/* reset this virtual function */
+	hclge_func_reset_cmd(hdev, mbx_req->mbx_src_vfid);
+}
+
 void hclge_mbx_handler(struct hclge_dev *hdev)
 {
 	struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq;
@@ -416,6 +455,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 		case HCLGE_MBX_QUEUE_RESET:
 			hclge_mbx_reset_vf_queue(vport, req);
 			break;
+		case HCLGE_MBX_RESET:
+			hclge_reset_vf(vport, req);
+			break;
 		default:
 			dev_err(&hdev->pdev->dev,
 				"un-supported mailbox message, code = %d\n",

From dcbe73ca55a42712bfd0e9966cd2d5a48355ace3 Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Date: Thu, 22 Mar 2018 12:18:03 -0700
Subject: [PATCH 1114/1678] mac80211: notify driver for change in multicast
 rates

With drivers implementing rate control in driver or firmware
rate_control_send_low() may not get called, and thus the
driver needs to know about changes in the multicast rate.

Add and use a new BSS change flag for this.

Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
[rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 3 +++
 net/mac80211/cfg.c     | 2 ++
 net/mac80211/ibss.c    | 2 +-
 net/mac80211/mesh.c    | 3 ++-
 net/mac80211/util.c    | 3 ++-
 5 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2fd59ed3be00..d39fd6838f41 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -302,6 +302,8 @@ struct ieee80211_vif_chanctx_switch {
  * @BSS_CHANGED_MU_GROUPS: VHT MU-MIMO group id or user position changed
  * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected
  *	keep alive) changed.
+ * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface
+ *
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -329,6 +331,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_OCB                 = 1<<22,
 	BSS_CHANGED_MU_GROUPS		= 1<<23,
 	BSS_CHANGED_KEEP_ALIVE		= 1<<24,
+	BSS_CHANGED_MCAST_RATE		= 1<<25,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fd68f6fb02d7..5c4b105ca398 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2313,6 +2313,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
 	memcpy(sdata->vif.bss_conf.mcast_rate, rate,
 	       sizeof(int) * NUM_NL80211_BANDS);
 
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MCAST_RATE);
+
 	return 0;
 }
 
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index db07e0de9a03..dc582aa35c89 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1839,7 +1839,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 		  IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED
 		| IEEE80211_HT_PARAM_RIFS_MODE;
 
-	changed |= BSS_CHANGED_HT;
+	changed |= BSS_CHANGED_HT | BSS_CHANGED_MCAST_RATE;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 6a381cbe1e33..d51da26e9c18 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -880,7 +880,8 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 		      BSS_CHANGED_BEACON_ENABLED |
 		      BSS_CHANGED_HT |
 		      BSS_CHANGED_BASIC_RATES |
-		      BSS_CHANGED_BEACON_INT;
+		      BSS_CHANGED_BEACON_INT |
+		      BSS_CHANGED_MCAST_RATE;
 
 	local->fif_other_bss++;
 	/* mesh ifaces must set allmulti to forward mcast traffic */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1f82191ce601..55cd2922627a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1968,7 +1968,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			  BSS_CHANGED_CQM |
 			  BSS_CHANGED_QOS |
 			  BSS_CHANGED_IDLE |
-			  BSS_CHANGED_TXPOWER;
+			  BSS_CHANGED_TXPOWER |
+			  BSS_CHANGED_MCAST_RATE;
 
 		if (sdata->vif.mu_mimo_owner)
 			changed |= BSS_CHANGED_MU_GROUPS;

From e9de0018d1fa97f8db9a39fcb69b55266c52835b Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 23 Mar 2018 08:09:48 -0700
Subject: [PATCH 1115/1678] devlink: Remove top_hierarchy arg for DEVLINK
 disabled path

Earlier change missed the path where CONFIG_NET_DEVLINK is disabled.
Thanks to Jiri for spotting.

Fixes: 145307460ba9 ("devlink: Remove top_hierarchy arg to devlink_resource_register")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/devlink.h b/include/net/devlink.h
index d5b707375e48..e21d8cadd480 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -559,7 +559,6 @@ devlink_dpipe_match_put(struct sk_buff *skb,
 static inline int
 devlink_resource_register(struct devlink *devlink,
 			  const char *resource_name,
-			  bool top_hierarchy,
 			  u64 resource_size,
 			  u64 resource_id,
 			  u64 parent_resource_id,

From bda7fab54828bbef2164bb23c0f6b1a7d05cc718 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Thu, 22 Mar 2018 14:42:41 +0000
Subject: [PATCH 1116/1678] virtio-net: Fix operstate for virtio when no
 VIRTIO_NET_F_STATUS

The operstate update logic will leave an interface in the
default UNKNOWN operstate if the interface carrier state never changes
from the default carrier up state set at creation.  This includes the
case of an explicit call to netif_carrier_on, as the carrier on to on
transition has no effect on operstate.

	This affects virtio-net for the case that the virtio peer does
not support VIRTIO_NET_F_STATUS (the feature that provides carrier state
updates).  Without this feature, the virtio specification states that
"the link should be assumed active," so, logically, the operstate should
be UP instead of UNKNOWN.  This has impact on user space applications
that use the operstate to make availability decisions for the interface.

	Resolve this by changing the virtio probe logic slightly to call
netif_carrier_off for both the "with" and "without" VIRTIO_NET_F_STATUS
cases, and then the existing call to netif_carrier_on for the "without"
case will cause an operstate transition.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 23374603e4d9..7b187ec7411e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2857,8 +2857,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Assume link up if device can't report link status,
 	   otherwise get link status from config. */
+	netif_carrier_off(dev);
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
-		netif_carrier_off(dev);
 		schedule_work(&vi->config_work);
 	} else {
 		vi->status = VIRTIO_NET_S_LINK_UP;

From 419d14af9e07fb5ca32b1b1614793c6b1e242152 Mon Sep 17 00:00:00 2001
From: Chas Williams <3chas3@gmail.com>
Date: Thu, 22 Mar 2018 11:34:06 -0400
Subject: [PATCH 1117/1678] bridge: Allow max MTU when multiple VLANs present

If the bridge is allowing multiple VLANs, some VLANs may have
different MTUs.  Instead of choosing the minimum MTU for the
bridge interface, choose the maximum MTU of the bridge members.
With this the user only needs to set a larger MTU on the member
ports that are participating in the large MTU VLANS.

Signed-off-by: Chas Williams <3chas3@gmail.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c         |  2 +-
 net/bridge/br_device.c  |  2 +-
 net/bridge/br_if.c      | 26 ++++++++++++++++++++++----
 net/bridge/br_private.h |  2 +-
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 7770481a6506..a3f95ab9d6a3 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
 	switch (event) {
 	case NETDEV_CHANGEMTU:
-		dev_set_mtu(br->dev, br_min_mtu(br));
+		dev_set_mtu(br->dev, br_mtu(br));
 		break;
 
 	case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 1285ca30ab0a..278fc999d355 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,7 +224,7 @@ static void br_get_stats64(struct net_device *dev,
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	if (new_mtu > br_min_mtu(br))
+	if (new_mtu > br_mtu(br))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9ba4ed65c52b..48dc4d2e2be3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -424,8 +424,18 @@ int br_del_bridge(struct net *net, const char *name)
 	return ret;
 }
 
+static bool min_mtu(int a, int b)
+{
+	return a < b ? 1 : 0;
+}
+
+static bool max_mtu(int a, int b)
+{
+	return a > b ? 1 : 0;
+}
+
 /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-int br_min_mtu(const struct net_bridge *br)
+static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int))
 {
 	const struct net_bridge_port *p;
 	int mtu = 0;
@@ -436,13 +446,21 @@ int br_min_mtu(const struct net_bridge *br)
 		mtu = ETH_DATA_LEN;
 	else {
 		list_for_each_entry(p, &br->port_list, list) {
-			if (!mtu  || p->dev->mtu < mtu)
+			if (!mtu || compare_fn(p->dev->mtu, mtu))
 				mtu = p->dev->mtu;
 		}
 	}
 	return mtu;
 }
 
+int br_mtu(const struct net_bridge *br)
+{
+	if (br->vlan_enabled)
+		return __br_mtu(br, max_mtu);
+	else
+		return __br_mtu(br, min_mtu);
+}
+
 static void br_set_gso_limits(struct net_bridge *br)
 {
 	unsigned int gso_max_size = GSO_MAX_SIZE;
@@ -594,7 +612,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	if (changed_addr)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
-	dev_set_mtu(br->dev, br_min_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br));
 	br_set_gso_limits(br);
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -641,7 +659,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
-	dev_set_mtu(br->dev, br_min_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br));
 	br_set_gso_limits(br);
 
 	spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8e13a64d8c99..048d5b51813b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -578,7 +578,7 @@ int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
 	      struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_min_mtu(const struct net_bridge *br);
+int br_mtu(const struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);

From ae06c70b135886d7d6252f3090146f01a3f3b80c Mon Sep 17 00:00:00 2001
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 22 Mar 2018 10:08:48 -0700
Subject: [PATCH 1118/1678] intel: add SPDX identifiers to all the Intel
 drivers

Add the SPDX identifiers to all the Intel wired LAN driver files, as
outlined in Documentation/process/license-rules.rst.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e100.c                   | 1 +
 drivers/net/ethernet/intel/e1000/Makefile           | 1 +
 drivers/net/ethernet/intel/e1000/e1000.h            | 1 +
 drivers/net/ethernet/intel/e1000/e1000_ethtool.c    | 1 +
 drivers/net/ethernet/intel/e1000/e1000_hw.c         | 1 +
 drivers/net/ethernet/intel/e1000/e1000_hw.h         | 1 +
 drivers/net/ethernet/intel/e1000/e1000_main.c       | 1 +
 drivers/net/ethernet/intel/e1000/e1000_osdep.h      | 1 +
 drivers/net/ethernet/intel/e1000/e1000_param.c      | 1 +
 drivers/net/ethernet/intel/e1000e/80003es2lan.c     | 1 +
 drivers/net/ethernet/intel/e1000e/80003es2lan.h     | 1 +
 drivers/net/ethernet/intel/e1000e/82571.c           | 1 +
 drivers/net/ethernet/intel/e1000e/82571.h           | 1 +
 drivers/net/ethernet/intel/e1000e/Makefile          | 1 +
 drivers/net/ethernet/intel/e1000e/defines.h         | 1 +
 drivers/net/ethernet/intel/e1000e/e1000.h           | 1 +
 drivers/net/ethernet/intel/e1000e/ethtool.c         | 1 +
 drivers/net/ethernet/intel/e1000e/hw.h              | 1 +
 drivers/net/ethernet/intel/e1000e/ich8lan.c         | 1 +
 drivers/net/ethernet/intel/e1000e/ich8lan.h         | 1 +
 drivers/net/ethernet/intel/e1000e/mac.c             | 1 +
 drivers/net/ethernet/intel/e1000e/mac.h             | 1 +
 drivers/net/ethernet/intel/e1000e/manage.c          | 1 +
 drivers/net/ethernet/intel/e1000e/manage.h          | 1 +
 drivers/net/ethernet/intel/e1000e/netdev.c          | 1 +
 drivers/net/ethernet/intel/e1000e/nvm.c             | 1 +
 drivers/net/ethernet/intel/e1000e/nvm.h             | 1 +
 drivers/net/ethernet/intel/e1000e/param.c           | 1 +
 drivers/net/ethernet/intel/e1000e/phy.c             | 1 +
 drivers/net/ethernet/intel/e1000e/phy.h             | 1 +
 drivers/net/ethernet/intel/e1000e/ptp.c             | 1 +
 drivers/net/ethernet/intel/e1000e/regs.h            | 1 +
 drivers/net/ethernet/intel/fm10k/Makefile           | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k.h            | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_common.c     | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_common.h     | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c      | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c    | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c    | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_iov.c        | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_main.c       | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_mbx.c        | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_mbx.h        | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c     | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c        | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_pf.c         | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_pf.h         | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_tlv.c        | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_tlv.h        | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_type.h       | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_vf.c         | 1 +
 drivers/net/ethernet/intel/fm10k/fm10k_vf.h         | 1 +
 drivers/net/ethernet/intel/i40e/Makefile            | 1 +
 drivers/net/ethernet/intel/i40e/i40e.h              | 1 +
 drivers/net/ethernet/intel/i40e/i40e_adminq.c       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_adminq.h       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h   | 1 +
 drivers/net/ethernet/intel/i40e/i40e_alloc.h        | 1 +
 drivers/net/ethernet/intel/i40e/i40e_client.c       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_client.h       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_common.c       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_dcb.c          | 1 +
 drivers/net/ethernet/intel/i40e/i40e_dcb.h          | 1 +
 drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c      | 1 +
 drivers/net/ethernet/intel/i40e/i40e_devids.h       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_diag.c         | 1 +
 drivers/net/ethernet/intel/i40e/i40e_diag.h         | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c      | 1 +
 drivers/net/ethernet/intel/i40e/i40e_hmc.c          | 1 +
 drivers/net/ethernet/intel/i40e/i40e_hmc.h          | 1 +
 drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c      | 1 +
 drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h      | 1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c         | 1 +
 drivers/net/ethernet/intel/i40e/i40e_nvm.c          | 1 +
 drivers/net/ethernet/intel/i40e/i40e_osdep.h        | 1 +
 drivers/net/ethernet/intel/i40e/i40e_prototype.h    | 1 +
 drivers/net/ethernet/intel/i40e/i40e_ptp.c          | 1 +
 drivers/net/ethernet/intel/i40e/i40e_register.h     | 1 +
 drivers/net/ethernet/intel/i40e/i40e_status.h       | 1 +
 drivers/net/ethernet/intel/i40e/i40e_trace.h        | 1 +
 drivers/net/ethernet/intel/i40e/i40e_txrx.c         | 1 +
 drivers/net/ethernet/intel/i40e/i40e_txrx.h         | 1 +
 drivers/net/ethernet/intel/i40e/i40e_type.h         | 1 +
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c  | 1 +
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h  | 1 +
 drivers/net/ethernet/intel/i40evf/Makefile          | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_adminq.c     | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_adminq.h     | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_alloc.h      | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_common.c     | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_devids.h     | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_hmc.h        | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h    | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_osdep.h      | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_prototype.h  | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_register.h   | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_status.h     | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_trace.h      | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c       | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_txrx.h       | 1 +
 drivers/net/ethernet/intel/i40evf/i40e_type.h       | 1 +
 drivers/net/ethernet/intel/i40evf/i40evf.h          | 1 +
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c  | 1 +
 drivers/net/ethernet/intel/i40evf/i40evf_main.c     | 1 +
 drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 1 +
 drivers/net/ethernet/intel/igb/Makefile             | 1 +
 drivers/net/ethernet/intel/igb/e1000_82575.c        | 1 +
 drivers/net/ethernet/intel/igb/e1000_82575.h        | 1 +
 drivers/net/ethernet/intel/igb/e1000_defines.h      | 1 +
 drivers/net/ethernet/intel/igb/e1000_hw.h           | 1 +
 drivers/net/ethernet/intel/igb/e1000_i210.c         | 1 +
 drivers/net/ethernet/intel/igb/e1000_i210.h         | 1 +
 drivers/net/ethernet/intel/igb/e1000_mac.c          | 1 +
 drivers/net/ethernet/intel/igb/e1000_mac.h          | 1 +
 drivers/net/ethernet/intel/igb/e1000_mbx.c          | 1 +
 drivers/net/ethernet/intel/igb/e1000_mbx.h          | 1 +
 drivers/net/ethernet/intel/igb/e1000_nvm.c          | 1 +
 drivers/net/ethernet/intel/igb/e1000_nvm.h          | 1 +
 drivers/net/ethernet/intel/igb/e1000_phy.c          | 1 +
 drivers/net/ethernet/intel/igb/e1000_phy.h          | 1 +
 drivers/net/ethernet/intel/igb/e1000_regs.h         | 1 +
 drivers/net/ethernet/intel/igb/igb.h                | 1 +
 drivers/net/ethernet/intel/igb/igb_ethtool.c        | 1 +
 drivers/net/ethernet/intel/igb/igb_hwmon.c          | 1 +
 drivers/net/ethernet/intel/igb/igb_main.c           | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c            | 1 +
 drivers/net/ethernet/intel/igbvf/Makefile           | 1 +
 drivers/net/ethernet/intel/igbvf/defines.h          | 1 +
 drivers/net/ethernet/intel/igbvf/ethtool.c          | 1 +
 drivers/net/ethernet/intel/igbvf/igbvf.h            | 1 +
 drivers/net/ethernet/intel/igbvf/mbx.c              | 1 +
 drivers/net/ethernet/intel/igbvf/mbx.h              | 1 +
 drivers/net/ethernet/intel/igbvf/netdev.c           | 1 +
 drivers/net/ethernet/intel/igbvf/regs.h             | 1 +
 drivers/net/ethernet/intel/igbvf/vf.c               | 1 +
 drivers/net/ethernet/intel/igbvf/vf.h               | 1 +
 drivers/net/ethernet/intel/ixgb/Makefile            | 1 +
 drivers/net/ethernet/intel/ixgb/ixgb.h              | 1 +
 drivers/net/ethernet/intel/ixgb/ixgb_ee.h           | 1 +
 drivers/net/ethernet/intel/ixgb/ixgb_hw.h           | 1 +
 drivers/net/ethernet/intel/ixgb/ixgb_ids.h          | 1 +
 drivers/net/ethernet/intel/ixgb/ixgb_osdep.h        | 1 +
 drivers/net/ethernet/intel/ixgbe/Makefile           | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe.h            | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c      | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c      | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c     | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.h     | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h        | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c  | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h  | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h  | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h       | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h      | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h        | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_model.h      | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h        | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h      | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h       | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h       | 1 +
 drivers/net/ethernet/intel/ixgbevf/Makefile         | 1 +
 drivers/net/ethernet/intel/ixgbevf/defines.h        | 1 +
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h        | 1 +
 drivers/net/ethernet/intel/ixgbevf/mbx.h            | 1 +
 drivers/net/ethernet/intel/ixgbevf/regs.h           | 1 +
 drivers/net/ethernet/intel/ixgbevf/vf.h             | 1 +
 168 files changed, 168 insertions(+)

diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 29486478836e..41ad56edfb96 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/100 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/Makefile b/drivers/net/ethernet/intel/e1000/Makefile
index 4a6ab1522451..c7caadd3c8af 100644
--- a/drivers/net/ethernet/intel/e1000/Makefile
+++ b/drivers/net/ethernet/intel/e1000/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
index 8fd2458060a0..3a0feea2df54 100644
--- a/drivers/net/ethernet/intel/e1000/e1000.h
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index dc71e87c3260..3e80ca170dd7 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  * Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2006 Intel Corporation.
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 3bac9df1c099..6e7e923d57bf 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 *
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.h b/drivers/net/ethernet/intel/e1000/e1000_hw.h
index 5cf7268cc4e1..f09c569ec19b 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.h
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 3dd4aeb2706d..d5eb19b86a0a 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_osdep.h b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
index 33e7c45a4fe4..ae0559b8b011 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_osdep.h
+++ b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c
index c9cde352b1c8..345f23927bcc 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_param.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_param.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index cd391376036c..953e99df420c 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.h b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
index a2162e11673e..ee6d1256fda4 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.h
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 6b03c8553e59..924f2c8dfa6c 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/82571.h b/drivers/net/ethernet/intel/e1000e/82571.h
index abc6a9abff98..9a24c645f726 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.h
+++ b/drivers/net/ethernet/intel/e1000e/82571.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile
index 106de493373c..24e391a4ac68 100644
--- a/drivers/net/ethernet/intel/e1000e/Makefile
+++ b/drivers/net/ethernet/intel/e1000e/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/1000 Linux driver
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 824fd44e25f0..22883015a695 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 2311b31bdcac..da88555ba1fd 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 003cbd605799..64dc0c11147f 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index d803b1a12349..21802396bed6 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 1dddfb7b2de6..1551d6ce5341 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
index 00a36df02a3f..3c4f82c21084 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.h
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 5bdc3a2d4fd7..b293464a9f27 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/mac.h b/drivers/net/ethernet/intel/e1000e/mac.h
index 8284618af9ff..cb0abf6c76a5 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.h
+++ b/drivers/net/ethernet/intel/e1000e/mac.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index cc9b3befc2bc..e027660aeb92 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/manage.h b/drivers/net/ethernet/intel/e1000e/manage.h
index 0b9ea5952b07..3268f2e58593 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.h
+++ b/drivers/net/ethernet/intel/e1000e/manage.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index dc853b0863af..ec4a9759a6f2 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
index 2efd80dfd88e..68949bb41b7b 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.c
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.h b/drivers/net/ethernet/intel/e1000e/nvm.h
index 5d46967e0d1f..8e082028be7d 100644
--- a/drivers/net/ethernet/intel/e1000e/nvm.h
+++ b/drivers/net/ethernet/intel/e1000e/nvm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index 47da51864543..2def33eba9e6 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 86ff0969efb6..b8226ed0e338 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
index 3027f63ee793..d4180b5e9196 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.h
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index b366885487a8..f941e5085f44 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index 0cb4d365e5ad..16afc3c2a986 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel PRO/1000 Linux driver
  * Copyright(c) 1999 - 2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile
index cac645329cea..93277cb99cb7 100644
--- a/drivers/net/ethernet/intel/fm10k/Makefile
+++ b/drivers/net/ethernet/intel/fm10k/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel(R) Ethernet Switch Host Interface Driver
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 46973fb234c5..a9cdf763c59d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index c58a5377a287..e303d88720ef 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2018 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
index d51f9c7a47ff..2bdb24d2ca9d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index db4bd8bf9722..c4f733452ef2 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 14df09e2d964..43e8d839831f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index c7234f35f8ff..28b6b4e56487 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 760cfa52d02c..30395f5e5e87 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 2c93d719438f..df8607097e4a 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 244d3ad58ca7..c01bf30a0c9e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
index 35c1dbad1330..007e1dfa9b7a 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 75c99aed3c41..45793491d4ba 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2018 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 50f53e403ef5..cffcb187cb76 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2018 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index bee192fe2ffb..7ba54c534f8c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2018 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index e04d41f1a532..ae81f9a16602 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2017 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
index 9d0d31da426b..725ecb7abccd 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2018 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
index a1f1027fe184..5d2ee759507e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 6bb16c13d9d6..dd23af11e2c1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
index 337ba65a9411..f06913630b39 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
index 2662f33c0c71..66a66b73a2f1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Ethernet Switch Host Interface Driver
  * Copyright(c) 2013 - 2016 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index 3da482c3d68d..75437768a07c 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 271ab1a861b7..1d33a8b3ef54 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index e78971605e0b..843fc7781ef8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 2349fbe04bd2..0a8749ee9fd3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 0dfc52772c45..0244923edeb8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
index 926811ad44ac..abed0c52e782 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 704695a61645..999dea5a7c9e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index ba55c889e4c5..9d464d40bc17 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 4fa31d87d9d2..c0a3dae8a2db 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 55079fe3ed63..9fec728dc4b9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index 92d01042c1f6..4f806386cb22 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 886e667f2f1c..502818e3da78 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index b829fd365693..d494dcaf18d0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index 8e46098bad57..ad6a66ccb576 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
index 76ed56641864..df3e60470f8b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
index 0b5911652084..be8341763475 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0c7e7de595d3..846a9d597e01 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
index a7c7b1d9b7c8..6d4b590f851b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
index d90669211392..7b5fd33d70ae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index daa9204426d4..cd40dc487b38 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
index e74128db5be5..79e1396735d9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 4a4401c61089..536ed8e8a96f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 76a5cb04e4fe..ba9687c03795 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
index 80e66da6b145..9c3c3b0d3ac4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index eabb636f6a19..2ec24188d6e2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 97381238eb7c..5b47dd1f75a5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index c234758dad15..b3e206e49cc2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h
index afb72e711d43..10c86f63dc52 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_status.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index d3e55f54a05e..410ba13bcf21 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel(R) 40-10 Gigabit Ethernet Connection Network Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 97cfe944b568..7ccd05bf4b06 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 3c80ea784389..7f8220e65374 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 69ea15892a5b..bfb80092b352 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 321ab4badb68..35173cbe80f7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 6852599b2379..57f727bb9e36 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Driver
diff --git a/drivers/net/ethernet/intel/i40evf/Makefile b/drivers/net/ethernet/intel/i40evf/Makefile
index a393f4a07f06..1e89c5487676 100644
--- a/drivers/net/ethernet/intel/i40evf/Makefile
+++ b/drivers/net/ethernet/intel/i40evf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index d1aab6b8bfb1..6fd677efa9da 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
index e0bfaa3d4a21..a7137c165256 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 815de8d9c3fb..439e71882049 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
index 8e6a6dd9212b..7e0fddd8af36 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_alloc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 67bf5cebb76f..67140cdbcd7a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index 0469e4bfd3ec..352dd3f3eb6a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
index 00ed24bfce13..7432596164f4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
index a5d79877354c..ddac0e4908d3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_lan_hmc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index a90737786c34..8668ad6c1a65 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 47c429931a57..72501bd0f1a9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_register.h b/drivers/net/ethernet/intel/i40evf/i40e_register.h
index 10febcfd7cd8..c9c935659758 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_register.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
index 5b222246e08b..0d7993ecb99a 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_trace.h b/drivers/net/ethernet/intel/i40evf/i40e_trace.h
index 9a5100b2b7c7..ece01dd12a3c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel(R) 40-10 Gigabit Ethernet Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index e088d23eb083..12bd937861e7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 9129447d079b..5790897eae2e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 54951c84a481..449de4b0058e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 279dced87e47..3a7a1e77bf39 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index e6793255de0b..dc4cde274fb8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 7e7cd80abaf4..5f71532be7f1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 3c76c817ca1a..26a59890532f 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index 5bcb2de75933..c48583e98ac1 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 82575 PCI-Express Ethernet Linux driver
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index c37cc8bccf47..dd9b6cac220d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index acf06051e111..e53ebe97d709 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 83cabff1e0ab..98534f765e0e 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
index 6c9485ab4b57..ff835e1e853d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index 07d48f2e3369..6f548247e6d8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
index b2964a2a60b1..56f015ccb206 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.h
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 5eff82678f0b..298afa0d9159 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h
index 90c8893c3eed..04d80c765aee 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index bffd58f7b2a1..ef42f1689b3b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h
index a62b08e1572e..4f0ecd28354d 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
index 3582c5cf8843..e4596f151cd4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.c
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  * This program is free software; you can redistribute it and/or modify it
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h
index febc9cdb7391..dde68cd54a53 100644
--- a/drivers/net/ethernet/intel/igb/e1000_nvm.h
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 413025bdcb50..4ec61243da82 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2015 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
index 9b622b33bb5a..856d2cda0643 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.h
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 568c96842f28..e8fa8c6530e0 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 55d6f17d5799..8dbc399b345e 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 606e6761758f..e77ba0d5866d 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index 44b6a68f1af7..41b306fb90f8 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 715bb32e6901..c1c0bc30a16d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Intel(R) Gigabit Ethernet Linux driver
  * Copyright(c) 2007-2014 Intel Corporation.
  *
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 0746b19ec6d3..7454b9895a65 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /* PTP Hardware Clock (PHC) driver for the Intel 82576 and 82580
  *
  * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com>
diff --git a/drivers/net/ethernet/intel/igbvf/Makefile b/drivers/net/ethernet/intel/igbvf/Makefile
index 044b0ad5fcb9..efe29dae384a 100644
--- a/drivers/net/ethernet/intel/igbvf/Makefile
+++ b/drivers/net/ethernet/intel/igbvf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h
index f1789d192e24..04bcfec0641b 100644
--- a/drivers/net/ethernet/intel/igbvf/defines.h
+++ b/drivers/net/ethernet/intel/igbvf/defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index a127688e83e6..ca39e3cccaeb 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
index bf69f01f8467..f5bf248e22eb 100644
--- a/drivers/net/ethernet/intel/igbvf/igbvf.h
+++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
index c9a441632e9f..9195884096f8 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.c
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.h b/drivers/net/ethernet/intel/igbvf/mbx.h
index 30d58c4a444e..479b062fe9ee 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.h
+++ b/drivers/net/ethernet/intel/igbvf/mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 4214c1519a87..e2b7502f1953 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/regs.h b/drivers/net/ethernet/intel/igbvf/regs.h
index 86a7c120b574..614e52409f11 100644
--- a/drivers/net/ethernet/intel/igbvf/regs.h
+++ b/drivers/net/ethernet/intel/igbvf/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c
index 9577ccf4b26a..bfe8d8297b2e 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.c
+++ b/drivers/net/ethernet/intel/igbvf/vf.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h
index d213eefb6169..193b50026246 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.h
+++ b/drivers/net/ethernet/intel/igbvf/vf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel(R) 82576 Virtual Function Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/Makefile b/drivers/net/ethernet/intel/ixgb/Makefile
index 0b20c5e62ffe..1b42dd554dd2 100644
--- a/drivers/net/ethernet/intel/ixgb/Makefile
+++ b/drivers/net/ethernet/intel/ixgb/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb.h b/drivers/net/ethernet/intel/ixgb/ixgb.h
index 1180cd59b570..92022841755f 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
index 5680f64314b8..475297a810fe 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
index 0bd5d72e1af5..19f36d87ef61 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
index 32c1b302d791..24e849902d60 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
index 8fc905192231..b1710379192e 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel PRO/10GbE Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 8319465eb38d..4cd96c88cb5d 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index c1e3a0039ea5..4f08c712e58e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index a0ebd9ecf243..cb0fe5fedb33 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 4dfc81dbee4b..66a74f4651e8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 61188f343955..633be93f3dbb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 4d4c02366cb3..67f304289fd9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
index fc0a2dd52499..73b6362d4327 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
index f94c7e82a30b..085130626330 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
index 3164f5453b8f..7edce607f901 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
index 90c370230e20..fa030f0abc18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
index 38385876effb..cf1919901514 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index 87d2800b94ab..4f099f516645 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index 811cb4f64a5b..c4628b663590 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
index 538a1c5475b6..72446644f9fa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel 10 Gigabit PCI Express Linux drive
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index b0cac961df3b..d6a7e77348c5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index cf67b9b18ed7..e30d1f07e891 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index ca45359686d3..2daa81e6e9b2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
index e21cd48491d3..182d640e9f7a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
  *
  * Intel 10 Gigabit PCI Express Linux driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index 4ce4c97ef5ad..bb47814cfa90 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 ################################################################################
 #
 # Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 8617cae2f801..71c828842b11 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f65ca156af2d..c06ea4dc49a0 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index bc0442acae78..5ec947fe3d09 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/regs.h b/drivers/net/ethernet/intel/ixgbevf/regs.h
index 2764fd16261f..278f73980501 100644
--- a/drivers/net/ethernet/intel/ixgbevf/regs.h
+++ b/drivers/net/ethernet/intel/ixgbevf/regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index c651fefcc3d2..194fbdaa4519 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver

From 69ca9293e8dd9323c6cde579e1855d6ce9489a46 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:09:53 -0700
Subject: [PATCH 1119/1678] tls: Generalize zerocopy_from_iter

Refactor zerocopy_from_iter to take arguments for pages and size,
such that it can be used for both tx and rx. RX will also support
zerocopy direct to output iter, as long as the full message can
be copied at once (a large enough userspace buffer was provided).

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 057a558ed6d7..ca1d20de3d2c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -226,23 +226,24 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
 }
 
 static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
-			      int length)
+			      int length, int *pages_used,
+			      unsigned int *size_used,
+			      struct scatterlist *to, int to_max_pages,
+			      bool charge)
 {
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 	struct page *pages[MAX_SKB_FRAGS];
 
 	size_t offset;
 	ssize_t copied, use;
 	int i = 0;
-	unsigned int size = ctx->sg_plaintext_size;
-	int num_elem = ctx->sg_plaintext_num_elem;
+	unsigned int size = *size_used;
+	int num_elem = *pages_used;
 	int rc = 0;
 	int maxpages;
 
 	while (length > 0) {
 		i = 0;
-		maxpages = ARRAY_SIZE(ctx->sg_plaintext_data) - num_elem;
+		maxpages = to_max_pages - num_elem;
 		if (maxpages == 0) {
 			rc = -EFAULT;
 			goto out;
@@ -262,10 +263,11 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 		while (copied) {
 			use = min_t(int, copied, PAGE_SIZE - offset);
 
-			sg_set_page(&ctx->sg_plaintext_data[num_elem],
+			sg_set_page(&to[num_elem],
 				    pages[i], use, offset);
-			sg_unmark_end(&ctx->sg_plaintext_data[num_elem]);
-			sk_mem_charge(sk, use);
+			sg_unmark_end(&to[num_elem]);
+			if (charge)
+				sk_mem_charge(sk, use);
 
 			offset = 0;
 			copied -= use;
@@ -276,8 +278,9 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 	}
 
 out:
-	ctx->sg_plaintext_size = size;
-	ctx->sg_plaintext_num_elem = num_elem;
+	*size_used = size;
+	*pages_used = num_elem;
+
 	return rc;
 }
 
@@ -374,7 +377,11 @@ alloc_encrypted:
 
 		if (full_record || eor) {
 			ret = zerocopy_from_iter(sk, &msg->msg_iter,
-						 try_to_copy);
+				try_to_copy, &ctx->sg_plaintext_num_elem,
+				&ctx->sg_plaintext_size,
+				ctx->sg_plaintext_data,
+				ARRAY_SIZE(ctx->sg_plaintext_data),
+				true);
 			if (ret)
 				goto fallback_to_reg_send;
 

From dbe425599ba05c7415f632e6f5f018453098eb69 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:10:06 -0700
Subject: [PATCH 1120/1678] tls: Move cipher info to a separate struct

Separate tx crypto parameters to a separate cipher_context struct.
The same parameters will be used for rx using the same struct.

tls_advance_record_sn is modified to only take the cipher info.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h  | 26 ++++++++++++---------
 net/tls/tls_main.c |  8 +++----
 net/tls/tls_sw.c   | 58 ++++++++++++++++++++++++----------------------
 3 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 4913430ab807..019e52db1817 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -81,6 +81,16 @@ enum {
 	TLS_PENDING_CLOSED_RECORD
 };
 
+struct cipher_context {
+	u16 prepend_size;
+	u16 tag_size;
+	u16 overhead_size;
+	u16 iv_size;
+	char *iv;
+	u16 rec_seq_size;
+	char *rec_seq;
+};
+
 struct tls_context {
 	union {
 		struct tls_crypto_info crypto_send;
@@ -91,13 +101,7 @@ struct tls_context {
 
 	u8 tx_conf:2;
 
-	u16 prepend_size;
-	u16 tag_size;
-	u16 overhead_size;
-	u16 iv_size;
-	char *iv;
-	u16 rec_seq_size;
-	char *rec_seq;
+	struct cipher_context tx;
 
 	struct scatterlist *partially_sent_record;
 	u16 partially_sent_offset;
@@ -190,7 +194,7 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
-					 struct tls_context *ctx)
+					 struct cipher_context *ctx)
 {
 	if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
 		tls_err_abort(sk);
@@ -203,9 +207,9 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
 			     size_t plaintext_len,
 			     unsigned char record_type)
 {
-	size_t pkt_len, iv_size = ctx->iv_size;
+	size_t pkt_len, iv_size = ctx->tx.iv_size;
 
-	pkt_len = plaintext_len + iv_size + ctx->tag_size;
+	pkt_len = plaintext_len + iv_size + ctx->tx.tag_size;
 
 	/* we cover nonce explicit here as well, so buf should be of
 	 * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
@@ -217,7 +221,7 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
 	buf[3] = pkt_len >> 8;
 	buf[4] = pkt_len & 0xFF;
 	memcpy(buf + TLS_NONCE_OFFSET,
-	       ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
+	       ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
 }
 
 static inline void tls_make_aad(char *buf,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d824d548447e..c671560b832b 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -259,8 +259,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		}
 	}
 
-	kfree(ctx->rec_seq);
-	kfree(ctx->iv);
+	kfree(ctx->tx.rec_seq);
+	kfree(ctx->tx.iv);
 
 	if (ctx->tx_conf == TLS_SW_TX)
 		tls_sw_free_tx_resources(sk);
@@ -319,9 +319,9 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
 		}
 		lock_sock(sk);
 		memcpy(crypto_info_aes_gcm_128->iv,
-		       ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+		       ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 		       TLS_CIPHER_AES_GCM_128_IV_SIZE);
-		memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->rec_seq,
+		memcpy(crypto_info_aes_gcm_128->rec_seq, ctx->tx.rec_seq,
 		       TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE);
 		release_sock(sk);
 		if (copy_to_user(optval,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index ca1d20de3d2c..338d743bcc21 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -79,7 +79,7 @@ static void trim_both_sgl(struct sock *sk, int target_size)
 		target_size);
 
 	if (target_size > 0)
-		target_size += tls_ctx->overhead_size;
+		target_size += tls_ctx->tx.overhead_size;
 
 	trim_sg(sk, ctx->sg_encrypted_data,
 		&ctx->sg_encrypted_num_elem,
@@ -152,21 +152,21 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
 	if (!aead_req)
 		return -ENOMEM;
 
-	ctx->sg_encrypted_data[0].offset += tls_ctx->prepend_size;
-	ctx->sg_encrypted_data[0].length -= tls_ctx->prepend_size;
+	ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
+	ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
 	aead_request_set_crypt(aead_req, ctx->sg_aead_in, ctx->sg_aead_out,
-			       data_len, tls_ctx->iv);
+			       data_len, tls_ctx->tx.iv);
 
 	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				  crypto_req_done, &ctx->async_wait);
 
 	rc = crypto_wait_req(crypto_aead_encrypt(aead_req), &ctx->async_wait);
 
-	ctx->sg_encrypted_data[0].offset -= tls_ctx->prepend_size;
-	ctx->sg_encrypted_data[0].length += tls_ctx->prepend_size;
+	ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
+	ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
 
 	kfree(aead_req);
 	return rc;
@@ -183,7 +183,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
 
 	tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size,
-		     tls_ctx->rec_seq, tls_ctx->rec_seq_size,
+		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
 		     record_type);
 
 	tls_fill_prepend(tls_ctx,
@@ -216,7 +216,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	if (rc < 0 && rc != -EAGAIN)
 		tls_err_abort(sk);
 
-	tls_advance_record_sn(sk, tls_ctx);
+	tls_advance_record_sn(sk, &tls_ctx->tx);
 	return rc;
 }
 
@@ -357,7 +357,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		}
 
 		required_size = ctx->sg_plaintext_size + try_to_copy +
-				tls_ctx->overhead_size;
+				tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
@@ -420,7 +420,7 @@ alloc_plaintext:
 				&ctx->sg_encrypted_num_elem,
 				&ctx->sg_encrypted_size,
 				ctx->sg_plaintext_size +
-				tls_ctx->overhead_size);
+				tls_ctx->tx.overhead_size);
 		}
 
 		ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
@@ -512,7 +512,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 			full_record = true;
 		}
 		required_size = ctx->sg_plaintext_size + copy +
-			      tls_ctx->overhead_size;
+			      tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
@@ -644,24 +644,26 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 		goto free_priv;
 	}
 
-	ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
-	ctx->tag_size = tag_size;
-	ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
-	ctx->iv_size = iv_size;
-	ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
-	if (!ctx->iv) {
+	ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
+	ctx->tx.tag_size = tag_size;
+	ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
+	ctx->tx.iv_size = iv_size;
+	ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			     GFP_KERNEL);
+	if (!ctx->tx.iv) {
 		rc = -ENOMEM;
 		goto free_priv;
 	}
-	memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-	memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
-	ctx->rec_seq_size = rec_seq_size;
-	ctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
-	if (!ctx->rec_seq) {
+	memcpy(ctx->tx.iv, gcm_128_info->salt,
+	       TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+	ctx->tx.rec_seq_size = rec_seq_size;
+	ctx->tx.rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+	if (!ctx->tx.rec_seq) {
 		rc = -ENOMEM;
 		goto free_iv;
 	}
-	memcpy(ctx->rec_seq, rec_seq, rec_seq_size);
+	memcpy(ctx->tx.rec_seq, rec_seq, rec_seq_size);
 
 	sg_init_table(sw_ctx->sg_encrypted_data,
 		      ARRAY_SIZE(sw_ctx->sg_encrypted_data));
@@ -697,7 +699,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 	if (rc)
 		goto free_aead;
 
-	rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
+	rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tx.tag_size);
 	if (!rc)
 		return 0;
 
@@ -705,11 +707,11 @@ free_aead:
 	crypto_free_aead(sw_ctx->aead_send);
 	sw_ctx->aead_send = NULL;
 free_rec_seq:
-	kfree(ctx->rec_seq);
-	ctx->rec_seq = NULL;
+	kfree(ctx->tx.rec_seq);
+	ctx->tx.rec_seq = NULL;
 free_iv:
-	kfree(ctx->iv);
-	ctx->iv = NULL;
+	kfree(ctx->tx.iv);
+	ctx->tx.iv = NULL;
 free_priv:
 	kfree(ctx->priv_ctx);
 	ctx->priv_ctx = NULL;

From f4a8e43f1f0abc0e93ed5ee132288ee4142afde1 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:10:15 -0700
Subject: [PATCH 1121/1678] tls: Pass error code explicitly to tls_err_abort

Pass EBADMSG explicitly to tls_err_abort.  Receive path will
pass additional codes - EMSGSIZE if framing is larger than max
TLS record size, EINVAL if TLS version mismatch.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 6 +++---
 net/tls/tls_sw.c  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 019e52db1817..6b44875a78e5 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -174,9 +174,9 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
-static inline void tls_err_abort(struct sock *sk)
+static inline void tls_err_abort(struct sock *sk, int err)
 {
-	sk->sk_err = EBADMSG;
+	sk->sk_err = err;
 	sk->sk_error_report(sk);
 }
 
@@ -197,7 +197,7 @@ static inline void tls_advance_record_sn(struct sock *sk,
 					 struct cipher_context *ctx)
 {
 	if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
-		tls_err_abort(sk);
+		tls_err_abort(sk, EBADMSG);
 	tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 			     ctx->iv_size);
 }
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 338d743bcc21..1c79d9ad1731 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -214,7 +214,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	/* Only pass through MSG_DONTWAIT and MSG_NOSIGNAL flags */
 	rc = tls_push_sg(sk, tls_ctx, ctx->sg_encrypted_data, 0, flags);
 	if (rc < 0 && rc != -EAGAIN)
-		tls_err_abort(sk);
+		tls_err_abort(sk, EBADMSG);
 
 	tls_advance_record_sn(sk, &tls_ctx->tx);
 	return rc;

From 583715853a25b4f2720b847e4fb8e37727299152 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:10:26 -0700
Subject: [PATCH 1122/1678] tls: Refactor variable names

Several config variables are prefixed with tx, drop the prefix
since these will be used for both tx and rx.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h  |  2 +-
 net/tls/tls_main.c | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 6b44875a78e5..095b72283861 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -99,7 +99,7 @@ struct tls_context {
 
 	void *priv_ctx;
 
-	u8 tx_conf:2;
+	u8 conf:2;
 
 	struct cipher_context tx;
 
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index c671560b832b..c405beeda765 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -52,7 +52,7 @@ enum {
 };
 
 enum {
-	TLS_BASE_TX,
+	TLS_BASE,
 	TLS_SW_TX,
 	TLS_NUM_CONFIG,
 };
@@ -65,7 +65,7 @@ static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
 
-	sk->sk_prot = &tls_prots[ip_ver][ctx->tx_conf];
+	sk->sk_prot = &tls_prots[ip_ver][ctx->conf];
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -238,7 +238,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	lock_sock(sk);
 	sk_proto_close = ctx->sk_proto_close;
 
-	if (ctx->tx_conf == TLS_BASE_TX) {
+	if (ctx->conf == TLS_BASE) {
 		kfree(ctx);
 		goto skip_tx_cleanup;
 	}
@@ -262,7 +262,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	kfree(ctx->tx.rec_seq);
 	kfree(ctx->tx.iv);
 
-	if (ctx->tx_conf == TLS_SW_TX)
+	if (ctx->conf == TLS_SW_TX)
 		tls_sw_free_tx_resources(sk);
 
 skip_tx_cleanup:
@@ -371,7 +371,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 	struct tls_crypto_info *crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
 	int rc = 0;
-	int tx_conf;
+	int conf;
 
 	if (!optval || (optlen < sizeof(*crypto_info))) {
 		rc = -EINVAL;
@@ -418,11 +418,11 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 
 	/* currently SW is default, we will have ethtool in future */
 	rc = tls_set_sw_offload(sk, ctx);
-	tx_conf = TLS_SW_TX;
+	conf = TLS_SW_TX;
 	if (rc)
 		goto err_crypto_info;
 
-	ctx->tx_conf = tx_conf;
+	ctx->conf = conf;
 	update_sk_prot(sk, ctx);
 	ctx->sk_write_space = sk->sk_write_space;
 	sk->sk_write_space = tls_write_space;
@@ -465,12 +465,12 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
 
 static void build_protos(struct proto *prot, struct proto *base)
 {
-	prot[TLS_BASE_TX] = *base;
-	prot[TLS_BASE_TX].setsockopt	= tls_setsockopt;
-	prot[TLS_BASE_TX].getsockopt	= tls_getsockopt;
-	prot[TLS_BASE_TX].close		= tls_sk_proto_close;
+	prot[TLS_BASE] = *base;
+	prot[TLS_BASE].setsockopt	= tls_setsockopt;
+	prot[TLS_BASE].getsockopt	= tls_getsockopt;
+	prot[TLS_BASE].close		= tls_sk_proto_close;
 
-	prot[TLS_SW_TX] = prot[TLS_BASE_TX];
+	prot[TLS_SW_TX] = prot[TLS_BASE];
 	prot[TLS_SW_TX].sendmsg		= tls_sw_sendmsg;
 	prot[TLS_SW_TX].sendpage	= tls_sw_sendpage;
 }
@@ -513,7 +513,7 @@ static int tls_init(struct sock *sk)
 		mutex_unlock(&tcpv6_prot_mutex);
 	}
 
-	ctx->tx_conf = TLS_BASE_TX;
+	ctx->conf = TLS_BASE;
 	update_sk_prot(sk, ctx);
 out:
 	return rc;

From c46234ebb4d1eee5e09819f49169e51cfc6eb909 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:10:35 -0700
Subject: [PATCH 1123/1678] tls: RX path for ktls

Add rx path for tls software implementation.

recvmsg, splice_read, and poll implemented.

An additional sockopt TLS_RX is added, with the same interface as
TLS_TX.  Either TLX_RX or TLX_TX may be provided separately, or
together (with two different setsockopt calls with appropriate keys).

Control messages are passed via CMSG in a similar way to transmit.
If no cmsg buffer is passed, then only application data records
will be passed to userspace, and EIO is returned for other types of
alerts.

EBADMSG is passed for decryption errors, and EMSGSIZE is passed for
framing too big, and EBADMSG for framing too small (matching openssl
semantics). EINVAL is returned for TLS versions that do not match the
original setsockopt call.  All are unrecoverable.

strparser is used to parse TLS framing.   Decryption is done directly
in to userspace buffers if they are large enough to support it, otherwise
sk_cow_data is called (similar to ipsec), and buffers are decrypted in
place and copied.  splice_read always decrypts in place, since no
buffers are provided to decrypt in to.

sk_poll is overridden, and only returns POLLIN if a full TLS message is
received.  Otherwise we wait for strparser to finish reading a full frame.
Actual decryption is only done during recvmsg or splice_read calls.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h        |  27 +-
 include/uapi/linux/tls.h |   2 +
 net/tls/Kconfig          |   1 +
 net/tls/tls_main.c       |  62 ++++-
 net/tls/tls_sw.c         | 581 +++++++++++++++++++++++++++++++++++----
 5 files changed, 606 insertions(+), 67 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 095b72283861..437a746300bf 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -40,6 +40,7 @@
 #include <linux/socket.h>
 #include <linux/tcp.h>
 #include <net/tcp.h>
+#include <net/strparser.h>
 
 #include <uapi/linux/tls.h>
 
@@ -58,8 +59,18 @@
 
 struct tls_sw_context {
 	struct crypto_aead *aead_send;
+	struct crypto_aead *aead_recv;
 	struct crypto_wait async_wait;
 
+	/* Receive context */
+	struct strparser strp;
+	void (*saved_data_ready)(struct sock *sk);
+	unsigned int (*sk_poll)(struct file *file, struct socket *sock,
+				struct poll_table_struct *wait);
+	struct sk_buff *recv_pkt;
+	u8 control;
+	bool decrypted;
+
 	/* Sending context */
 	char aad_space[TLS_AAD_SPACE_SIZE];
 
@@ -96,12 +107,17 @@ struct tls_context {
 		struct tls_crypto_info crypto_send;
 		struct tls12_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128;
 	};
+	union {
+		struct tls_crypto_info crypto_recv;
+		struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128;
+	};
 
 	void *priv_ctx;
 
 	u8 conf:2;
 
 	struct cipher_context tx;
+	struct cipher_context rx;
 
 	struct scatterlist *partially_sent_record;
 	u16 partially_sent_offset;
@@ -128,12 +144,19 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
 
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx);
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
-void tls_sw_free_tx_resources(struct sock *sk);
+void tls_sw_free_resources(struct sock *sk);
+int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		   int nonblock, int flags, int *addr_len);
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+			 struct poll_table_struct *wait);
+ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
+			   struct pipe_inode_info *pipe,
+			   size_t len, unsigned int flags);
 
 void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
 void tls_icsk_clean_acked(struct sock *sk);
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 293b2cdad88d..c6633e97eca4 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -38,6 +38,7 @@
 
 /* TLS socket options */
 #define TLS_TX			1	/* Set transmit parameters */
+#define TLS_RX			2	/* Set receive parameters */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver)	((ver) & 0xFF)
@@ -59,6 +60,7 @@
 #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
 
 #define TLS_SET_RECORD_TYPE	1
+#define TLS_GET_RECORD_TYPE	2
 
 struct tls_crypto_info {
 	__u16 version;
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index eb583038c67e..89b8745a986f 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -7,6 +7,7 @@ config TLS
 	select CRYPTO
 	select CRYPTO_AES
 	select CRYPTO_GCM
+	select STREAM_PARSER
 	default n
 	---help---
 	Enable kernel support for TLS protocol. This allows symmetric
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index c405beeda765..6f5c1146da4a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -54,12 +54,15 @@ enum {
 enum {
 	TLS_BASE,
 	TLS_SW_TX,
+	TLS_SW_RX,
+	TLS_SW_RXTX,
 	TLS_NUM_CONFIG,
 };
 
 static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
 static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
+static struct proto_ops tls_sw_proto_ops;
 
 static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 {
@@ -261,9 +264,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 
 	kfree(ctx->tx.rec_seq);
 	kfree(ctx->tx.iv);
+	kfree(ctx->rx.rec_seq);
+	kfree(ctx->rx.iv);
 
-	if (ctx->conf == TLS_SW_TX)
-		tls_sw_free_tx_resources(sk);
+	if (ctx->conf == TLS_SW_TX ||
+	    ctx->conf == TLS_SW_RX ||
+	    ctx->conf == TLS_SW_RXTX) {
+		tls_sw_free_resources(sk);
+	}
 
 skip_tx_cleanup:
 	release_sock(sk);
@@ -365,8 +373,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
 	return do_tls_getsockopt(sk, optname, optval, optlen);
 }
 
-static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
-				unsigned int optlen)
+static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
+				  unsigned int optlen, int tx)
 {
 	struct tls_crypto_info *crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
@@ -378,7 +386,11 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 		goto out;
 	}
 
-	crypto_info = &ctx->crypto_send;
+	if (tx)
+		crypto_info = &ctx->crypto_send;
+	else
+		crypto_info = &ctx->crypto_recv;
+
 	/* Currently we don't support set crypto info more than one time */
 	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
 		rc = -EBUSY;
@@ -417,15 +429,31 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 	}
 
 	/* currently SW is default, we will have ethtool in future */
-	rc = tls_set_sw_offload(sk, ctx);
-	conf = TLS_SW_TX;
+	if (tx) {
+		rc = tls_set_sw_offload(sk, ctx, 1);
+		if (ctx->conf == TLS_SW_RX)
+			conf = TLS_SW_RXTX;
+		else
+			conf = TLS_SW_TX;
+	} else {
+		rc = tls_set_sw_offload(sk, ctx, 0);
+		if (ctx->conf == TLS_SW_TX)
+			conf = TLS_SW_RXTX;
+		else
+			conf = TLS_SW_RX;
+	}
+
 	if (rc)
 		goto err_crypto_info;
 
 	ctx->conf = conf;
 	update_sk_prot(sk, ctx);
-	ctx->sk_write_space = sk->sk_write_space;
-	sk->sk_write_space = tls_write_space;
+	if (tx) {
+		ctx->sk_write_space = sk->sk_write_space;
+		sk->sk_write_space = tls_write_space;
+	} else {
+		sk->sk_socket->ops = &tls_sw_proto_ops;
+	}
 	goto out;
 
 err_crypto_info:
@@ -441,8 +469,10 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
 
 	switch (optname) {
 	case TLS_TX:
+	case TLS_RX:
 		lock_sock(sk);
-		rc = do_tls_setsockopt_tx(sk, optval, optlen);
+		rc = do_tls_setsockopt_conf(sk, optval, optlen,
+					    optname == TLS_TX);
 		release_sock(sk);
 		break;
 	default:
@@ -473,6 +503,14 @@ static void build_protos(struct proto *prot, struct proto *base)
 	prot[TLS_SW_TX] = prot[TLS_BASE];
 	prot[TLS_SW_TX].sendmsg		= tls_sw_sendmsg;
 	prot[TLS_SW_TX].sendpage	= tls_sw_sendpage;
+
+	prot[TLS_SW_RX] = prot[TLS_BASE];
+	prot[TLS_SW_RX].recvmsg		= tls_sw_recvmsg;
+	prot[TLS_SW_RX].close		= tls_sk_proto_close;
+
+	prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
+	prot[TLS_SW_RXTX].recvmsg	= tls_sw_recvmsg;
+	prot[TLS_SW_RXTX].close		= tls_sk_proto_close;
 }
 
 static int tls_init(struct sock *sk)
@@ -531,6 +569,10 @@ static int __init tls_register(void)
 {
 	build_protos(tls_prots[TLSV4], &tcp_prot);
 
+	tls_sw_proto_ops = inet_stream_ops;
+	tls_sw_proto_ops.poll = tls_sw_poll;
+	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
+
 	tcp_register_ulp(&tcp_tls_ulp_ops);
 
 	return 0;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1c79d9ad1731..4dc766b03f00 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -34,11 +34,60 @@
  * SOFTWARE.
  */
 
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <crypto/aead.h>
 
+#include <net/strparser.h>
 #include <net/tls.h>
 
+static int tls_do_decryption(struct sock *sk,
+			     struct scatterlist *sgin,
+			     struct scatterlist *sgout,
+			     char *iv_recv,
+			     size_t data_len,
+			     struct sk_buff *skb,
+			     gfp_t flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+	struct aead_request *aead_req;
+
+	int ret;
+	unsigned int req_size = sizeof(struct aead_request) +
+		crypto_aead_reqsize(ctx->aead_recv);
+
+	aead_req = kzalloc(req_size, flags);
+	if (!aead_req)
+		return -ENOMEM;
+
+	aead_request_set_tfm(aead_req, ctx->aead_recv);
+	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+	aead_request_set_crypt(aead_req, sgin, sgout,
+			       data_len + tls_ctx->rx.tag_size,
+			       (u8 *)iv_recv);
+	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				  crypto_req_done, &ctx->async_wait);
+
+	ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
+
+	if (ret < 0)
+		goto out;
+
+	rxm->offset += tls_ctx->rx.prepend_size;
+	rxm->full_len -= tls_ctx->rx.overhead_size;
+	tls_advance_record_sn(sk, &tls_ctx->rx);
+
+	ctx->decrypted = true;
+
+	ctx->saved_data_ready(sk);
+
+out:
+	kfree(aead_req);
+	return ret;
+}
+
 static void trim_sg(struct sock *sk, struct scatterlist *sg,
 		    int *sg_num_elem, unsigned int *sg_size, int target_size)
 {
@@ -581,13 +630,404 @@ sendpage_end:
 	return ret;
 }
 
-void tls_sw_free_tx_resources(struct sock *sk)
+static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
+				     long timeo, int *err)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct sk_buff *skb;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+	while (!(skb = ctx->recv_pkt)) {
+		if (sk->sk_err) {
+			*err = sock_error(sk);
+			return NULL;
+		}
+
+		if (sock_flag(sk, SOCK_DONE))
+			return NULL;
+
+		if ((flags & MSG_DONTWAIT) || !timeo) {
+			*err = -EAGAIN;
+			return NULL;
+		}
+
+		add_wait_queue(sk_sleep(sk), &wait);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+		sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+		remove_wait_queue(sk_sleep(sk), &wait);
+
+		/* Handle signals */
+		if (signal_pending(current)) {
+			*err = sock_intr_errno(timeo);
+			return NULL;
+		}
+	}
+
+	return skb;
+}
+
+static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		       struct scatterlist *sgout)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + tls_ctx->rx.iv_size];
+	struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
+	struct scatterlist *sgin = &sgin_arr[0];
+	struct strp_msg *rxm = strp_msg(skb);
+	int ret, nsg = ARRAY_SIZE(sgin_arr);
+	char aad_recv[TLS_AAD_SPACE_SIZE];
+	struct sk_buff *unused;
+
+	ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+			    iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			    tls_ctx->rx.iv_size);
+	if (ret < 0)
+		return ret;
+
+	memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	if (!sgout) {
+		nsg = skb_cow_data(skb, 0, &unused) + 1;
+		sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
+		if (!sgout)
+			sgout = sgin;
+	}
+
+	sg_init_table(sgin, nsg);
+	sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv));
+
+	nsg = skb_to_sgvec(skb, &sgin[1],
+			   rxm->offset + tls_ctx->rx.prepend_size,
+			   rxm->full_len - tls_ctx->rx.prepend_size);
+
+	tls_make_aad(aad_recv,
+		     rxm->full_len - tls_ctx->rx.overhead_size,
+		     tls_ctx->rx.rec_seq,
+		     tls_ctx->rx.rec_seq_size,
+		     ctx->control);
+
+	ret = tls_do_decryption(sk, sgin, sgout, iv,
+				rxm->full_len - tls_ctx->rx.overhead_size,
+				skb, sk->sk_allocation);
+
+	if (sgin != &sgin_arr[0])
+		kfree(sgin);
+
+	return ret;
+}
+
+static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
+			       unsigned int len)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+
+	if (len < rxm->full_len) {
+		rxm->offset += len;
+		rxm->full_len -= len;
+
+		return false;
+	}
+
+	/* Finished with message */
+	ctx->recv_pkt = NULL;
+	kfree_skb(skb);
+	strp_unpause(&ctx->strp);
+
+	return true;
+}
+
+int tls_sw_recvmsg(struct sock *sk,
+		   struct msghdr *msg,
+		   size_t len,
+		   int nonblock,
+		   int flags,
+		   int *addr_len)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	unsigned char control;
+	struct strp_msg *rxm;
+	struct sk_buff *skb;
+	ssize_t copied = 0;
+	bool cmsg = false;
+	int err = 0;
+	long timeo;
+
+	flags |= nonblock;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	do {
+		bool zc = false;
+		int chunk = 0;
+
+		skb = tls_wait_data(sk, flags, timeo, &err);
+		if (!skb)
+			goto recv_end;
+
+		rxm = strp_msg(skb);
+		if (!cmsg) {
+			int cerr;
+
+			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+					sizeof(ctx->control), &ctx->control);
+			cmsg = true;
+			control = ctx->control;
+			if (ctx->control != TLS_RECORD_TYPE_DATA) {
+				if (cerr || msg->msg_flags & MSG_CTRUNC) {
+					err = -EIO;
+					goto recv_end;
+				}
+			}
+		} else if (control != ctx->control) {
+			goto recv_end;
+		}
+
+		if (!ctx->decrypted) {
+			int page_count;
+			int to_copy;
+
+			page_count = iov_iter_npages(&msg->msg_iter,
+						     MAX_SKB_FRAGS);
+			to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
+			if (to_copy <= len && page_count < MAX_SKB_FRAGS &&
+			    likely(!(flags & MSG_PEEK)))  {
+				struct scatterlist sgin[MAX_SKB_FRAGS + 1];
+				char unused[21];
+				int pages = 0;
+
+				zc = true;
+				sg_init_table(sgin, MAX_SKB_FRAGS + 1);
+				sg_set_buf(&sgin[0], unused, 13);
+
+				err = zerocopy_from_iter(sk, &msg->msg_iter,
+							 to_copy, &pages,
+							 &chunk, &sgin[1],
+							 MAX_SKB_FRAGS,	false);
+				if (err < 0)
+					goto fallback_to_reg_recv;
+
+				err = decrypt_skb(sk, skb, sgin);
+				for (; pages > 0; pages--)
+					put_page(sg_page(&sgin[pages]));
+				if (err < 0) {
+					tls_err_abort(sk, EBADMSG);
+					goto recv_end;
+				}
+			} else {
+fallback_to_reg_recv:
+				err = decrypt_skb(sk, skb, NULL);
+				if (err < 0) {
+					tls_err_abort(sk, EBADMSG);
+					goto recv_end;
+				}
+			}
+			ctx->decrypted = true;
+		}
+
+		if (!zc) {
+			chunk = min_t(unsigned int, rxm->full_len, len);
+			err = skb_copy_datagram_msg(skb, rxm->offset, msg,
+						    chunk);
+			if (err < 0)
+				goto recv_end;
+		}
+
+		copied += chunk;
+		len -= chunk;
+		if (likely(!(flags & MSG_PEEK))) {
+			u8 control = ctx->control;
+
+			if (tls_sw_advance_skb(sk, skb, chunk)) {
+				/* Return full control message to
+				 * userspace before trying to parse
+				 * another message type
+				 */
+				msg->msg_flags |= MSG_EOR;
+				if (control != TLS_RECORD_TYPE_DATA)
+					goto recv_end;
+			}
+		}
+	} while (len);
+
+recv_end:
+	release_sock(sk);
+	return copied ? : err;
+}
+
+ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
+			   struct pipe_inode_info *pipe,
+			   size_t len, unsigned int flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sock->sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm = NULL;
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	ssize_t copied = 0;
+	int err = 0;
+	long timeo;
+	int chunk;
+
+	lock_sock(sk);
+
+	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+	skb = tls_wait_data(sk, flags, timeo, &err);
+	if (!skb)
+		goto splice_read_end;
+
+	/* splice does not support reading control messages */
+	if (ctx->control != TLS_RECORD_TYPE_DATA) {
+		err = -ENOTSUPP;
+		goto splice_read_end;
+	}
+
+	if (!ctx->decrypted) {
+		err = decrypt_skb(sk, skb, NULL);
+
+		if (err < 0) {
+			tls_err_abort(sk, EBADMSG);
+			goto splice_read_end;
+		}
+		ctx->decrypted = true;
+	}
+	rxm = strp_msg(skb);
+
+	chunk = min_t(unsigned int, rxm->full_len, len);
+	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
+	if (copied < 0)
+		goto splice_read_end;
+
+	if (likely(!(flags & MSG_PEEK)))
+		tls_sw_advance_skb(sk, skb, copied);
+
+splice_read_end:
+	release_sock(sk);
+	return copied ? : err;
+}
+
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+			 struct poll_table_struct *wait)
+{
+	unsigned int ret;
+	struct sock *sk = sock->sk;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	/* Grab POLLOUT and POLLHUP from the underlying socket */
+	ret = ctx->sk_poll(file, sock, wait);
+
+	/* Clear POLLIN bits, and set based on recv_pkt */
+	ret &= ~(POLLIN | POLLRDNORM);
+	if (ctx->recv_pkt)
+		ret |= POLLIN | POLLRDNORM;
+
+	return ret;
+}
+
+static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	char header[tls_ctx->rx.prepend_size];
+	struct strp_msg *rxm = strp_msg(skb);
+	size_t cipher_overhead;
+	size_t data_len = 0;
+	int ret;
+
+	/* Verify that we have a full TLS header, or wait for more data */
+	if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+		return 0;
+
+	/* Linearize header to local buffer */
+	ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+
+	if (ret < 0)
+		goto read_failure;
+
+	ctx->control = header[0];
+
+	data_len = ((header[4] & 0xFF) | (header[3] << 8));
+
+	cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size;
+
+	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) {
+		ret = -EMSGSIZE;
+		goto read_failure;
+	}
+	if (data_len < cipher_overhead) {
+		ret = -EBADMSG;
+		goto read_failure;
+	}
+
+	if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.version) ||
+	    header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.version)) {
+		ret = -EINVAL;
+		goto read_failure;
+	}
+
+	return data_len + TLS_HEADER_SIZE;
+
+read_failure:
+	tls_err_abort(strp->sk, ret);
+
+	return ret;
+}
+
+static void tls_queue(struct strparser *strp, struct sk_buff *skb)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+	struct strp_msg *rxm;
+
+	rxm = strp_msg(skb);
+
+	ctx->decrypted = false;
+
+	ctx->recv_pkt = skb;
+	strp_pause(strp);
+
+	strp->sk->sk_state_change(strp->sk);
+}
+
+static void tls_data_ready(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
+
+	strp_data_ready(&ctx->strp);
+}
+
+void tls_sw_free_resources(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx);
 
 	if (ctx->aead_send)
 		crypto_free_aead(ctx->aead_send);
+	if (ctx->aead_recv) {
+		if (ctx->recv_pkt) {
+			kfree_skb(ctx->recv_pkt);
+			ctx->recv_pkt = NULL;
+		}
+		crypto_free_aead(ctx->aead_recv);
+		strp_stop(&ctx->strp);
+		write_lock_bh(&sk->sk_callback_lock);
+		sk->sk_data_ready = ctx->saved_data_ready;
+		write_unlock_bh(&sk->sk_callback_lock);
+		release_sock(sk);
+		strp_done(&ctx->strp);
+		lock_sock(sk);
+	}
 
 	tls_free_both_sg(sk);
 
@@ -595,12 +1035,15 @@ void tls_sw_free_tx_resources(struct sock *sk)
 	kfree(tls_ctx);
 }
 
-int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
+int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
 	char keyval[TLS_CIPHER_AES_GCM_128_KEY_SIZE];
 	struct tls_crypto_info *crypto_info;
 	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
 	struct tls_sw_context *sw_ctx;
+	struct cipher_context *cctx;
+	struct crypto_aead **aead;
+	struct strp_callbacks cb;
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
 	char *iv, *rec_seq;
 	int rc = 0;
@@ -610,22 +1053,29 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 		goto out;
 	}
 
-	if (ctx->priv_ctx) {
-		rc = -EEXIST;
-		goto out;
+	if (!ctx->priv_ctx) {
+		sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
+		if (!sw_ctx) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		crypto_init_wait(&sw_ctx->async_wait);
+	} else {
+		sw_ctx = ctx->priv_ctx;
 	}
 
-	sw_ctx = kzalloc(sizeof(*sw_ctx), GFP_KERNEL);
-	if (!sw_ctx) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	crypto_init_wait(&sw_ctx->async_wait);
-
 	ctx->priv_ctx = (struct tls_offload_context *)sw_ctx;
 
-	crypto_info = &ctx->crypto_send;
+	if (tx) {
+		crypto_info = &ctx->crypto_send;
+		cctx = &ctx->tx;
+		aead = &sw_ctx->aead_send;
+	} else {
+		crypto_info = &ctx->crypto_recv;
+		cctx = &ctx->rx;
+		aead = &sw_ctx->aead_recv;
+	}
+
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128: {
 		nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE;
@@ -644,48 +1094,49 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 		goto free_priv;
 	}
 
-	ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
-	ctx->tx.tag_size = tag_size;
-	ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
-	ctx->tx.iv_size = iv_size;
-	ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			     GFP_KERNEL);
-	if (!ctx->tx.iv) {
+	cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	cctx->tag_size = tag_size;
+	cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
+	cctx->iv_size = iv_size;
+	cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			   GFP_KERNEL);
+	if (!cctx->iv) {
 		rc = -ENOMEM;
 		goto free_priv;
 	}
-	memcpy(ctx->tx.iv, gcm_128_info->salt,
-	       TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-	memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
-	ctx->tx.rec_seq_size = rec_seq_size;
-	ctx->tx.rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
-	if (!ctx->tx.rec_seq) {
+	memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
+	cctx->rec_seq_size = rec_seq_size;
+	cctx->rec_seq = kmalloc(rec_seq_size, GFP_KERNEL);
+	if (!cctx->rec_seq) {
 		rc = -ENOMEM;
 		goto free_iv;
 	}
-	memcpy(ctx->tx.rec_seq, rec_seq, rec_seq_size);
+	memcpy(cctx->rec_seq, rec_seq, rec_seq_size);
 
-	sg_init_table(sw_ctx->sg_encrypted_data,
-		      ARRAY_SIZE(sw_ctx->sg_encrypted_data));
-	sg_init_table(sw_ctx->sg_plaintext_data,
-		      ARRAY_SIZE(sw_ctx->sg_plaintext_data));
+	if (tx) {
+		sg_init_table(sw_ctx->sg_encrypted_data,
+			      ARRAY_SIZE(sw_ctx->sg_encrypted_data));
+		sg_init_table(sw_ctx->sg_plaintext_data,
+			      ARRAY_SIZE(sw_ctx->sg_plaintext_data));
 
-	sg_init_table(sw_ctx->sg_aead_in, 2);
-	sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
-		   sizeof(sw_ctx->aad_space));
-	sg_unmark_end(&sw_ctx->sg_aead_in[1]);
-	sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
-	sg_init_table(sw_ctx->sg_aead_out, 2);
-	sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
-		   sizeof(sw_ctx->aad_space));
-	sg_unmark_end(&sw_ctx->sg_aead_out[1]);
-	sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+		sg_init_table(sw_ctx->sg_aead_in, 2);
+		sg_set_buf(&sw_ctx->sg_aead_in[0], sw_ctx->aad_space,
+			   sizeof(sw_ctx->aad_space));
+		sg_unmark_end(&sw_ctx->sg_aead_in[1]);
+		sg_chain(sw_ctx->sg_aead_in, 2, sw_ctx->sg_plaintext_data);
+		sg_init_table(sw_ctx->sg_aead_out, 2);
+		sg_set_buf(&sw_ctx->sg_aead_out[0], sw_ctx->aad_space,
+			   sizeof(sw_ctx->aad_space));
+		sg_unmark_end(&sw_ctx->sg_aead_out[1]);
+		sg_chain(sw_ctx->sg_aead_out, 2, sw_ctx->sg_encrypted_data);
+	}
 
-	if (!sw_ctx->aead_send) {
-		sw_ctx->aead_send = crypto_alloc_aead("gcm(aes)", 0, 0);
-		if (IS_ERR(sw_ctx->aead_send)) {
-			rc = PTR_ERR(sw_ctx->aead_send);
-			sw_ctx->aead_send = NULL;
+	if (!*aead) {
+		*aead = crypto_alloc_aead("gcm(aes)", 0, 0);
+		if (IS_ERR(*aead)) {
+			rc = PTR_ERR(*aead);
+			*aead = NULL;
 			goto free_rec_seq;
 		}
 	}
@@ -694,21 +1145,41 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 
 	memcpy(keyval, gcm_128_info->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE);
 
-	rc = crypto_aead_setkey(sw_ctx->aead_send, keyval,
+	rc = crypto_aead_setkey(*aead, keyval,
 				TLS_CIPHER_AES_GCM_128_KEY_SIZE);
 	if (rc)
 		goto free_aead;
 
-	rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tx.tag_size);
-	if (!rc)
-		return 0;
+	rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+	if (rc)
+		goto free_aead;
+
+	if (!tx) {
+		/* Set up strparser */
+		memset(&cb, 0, sizeof(cb));
+		cb.rcv_msg = tls_queue;
+		cb.parse_msg = tls_read_size;
+
+		strp_init(&sw_ctx->strp, sk, &cb);
+
+		write_lock_bh(&sk->sk_callback_lock);
+		sw_ctx->saved_data_ready = sk->sk_data_ready;
+		sk->sk_data_ready = tls_data_ready;
+		write_unlock_bh(&sk->sk_callback_lock);
+
+		sw_ctx->sk_poll = sk->sk_socket->ops->poll;
+
+		strp_check_rcv(&sw_ctx->strp);
+	}
+
+	goto out;
 
 free_aead:
-	crypto_free_aead(sw_ctx->aead_send);
-	sw_ctx->aead_send = NULL;
+	crypto_free_aead(*aead);
+	*aead = NULL;
 free_rec_seq:
-	kfree(ctx->tx.rec_seq);
-	ctx->tx.rec_seq = NULL;
+	kfree(cctx->rec_seq);
+	cctx->rec_seq = NULL;
 free_iv:
 	kfree(ctx->tx.iv);
 	ctx->tx.iv = NULL;

From b6c535b1638b32ff7817d78956b7232100570b5d Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Mar 2018 10:10:44 -0700
Subject: [PATCH 1124/1678] tls: Add receive path documentation

Add documentation on rx path setup and cmsg interface.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tls.txt | 66 +++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/tls.txt b/Documentation/networking/tls.txt
index 77ed00631c12..58b5ef75f1b7 100644
--- a/Documentation/networking/tls.txt
+++ b/Documentation/networking/tls.txt
@@ -48,6 +48,9 @@ the transmit and the receive into the kernel.
 
   setsockopt(sock, SOL_TLS, TLS_TX, &crypto_info, sizeof(crypto_info));
 
+Transmit and receive are set separately, but the setup is the same, using either
+TLS_TX or TLS_RX.
+
 Sending TLS application data
 ----------------------------
 
@@ -79,6 +82,28 @@ for memory), or the encryption will always succeed.  If send() returns
 -ENOMEM and some data was left on the socket buffer from a previous
 call using MSG_MORE, the MSG_MORE data is left on the socket buffer.
 
+Receiving TLS application data
+------------------------------
+
+After setting the TLS_RX socket option, all recv family socket calls
+are decrypted using TLS parameters provided.  A full TLS record must
+be received before decryption can happen.
+
+  char buffer[16384];
+  recv(sock, buffer, 16384);
+
+Received data is decrypted directly in to the user buffer if it is
+large enough, and no additional allocations occur.  If the userspace
+buffer is too small, data is decrypted in the kernel and copied to
+userspace.
+
+EINVAL is returned if the TLS version in the received message does not
+match the version passed in setsockopt.
+
+EMSGSIZE is returned if the received message is too big.
+
+EBADMSG is returned if decryption failed for any other reason.
+
 Send TLS control messages
 -------------------------
 
@@ -118,6 +143,43 @@ using a record of type @record_type.
 Control message data should be provided unencrypted, and will be
 encrypted by the kernel.
 
+Receiving TLS control messages
+------------------------------
+
+TLS control messages are passed in the userspace buffer, with message
+type passed via cmsg.  If no cmsg buffer is provided, an error is
+returned if a control message is received.  Data messages may be
+received without a cmsg buffer set.
+
+  char buffer[16384];
+  char cmsg[CMSG_SPACE(sizeof(unsigned char))];
+  struct msghdr msg = {0};
+  msg.msg_control = cmsg;
+  msg.msg_controllen = sizeof(cmsg);
+
+  struct iovec msg_iov;
+  msg_iov.iov_base = buffer;
+  msg_iov.iov_len = 16384;
+
+  msg.msg_iov = &msg_iov;
+  msg.msg_iovlen = 1;
+
+  int ret = recvmsg(sock, &msg, 0 /* flags */);
+
+  struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+  if (cmsg->cmsg_level == SOL_TLS &&
+      cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
+      int record_type = *((unsigned char *)CMSG_DATA(cmsg));
+      // Do something with record_type, and control message data in
+      // buffer.
+      //
+      // Note that record_type may be == to application data (23).
+  } else {
+      // Buffer contains application data.
+  }
+
+recv will never return data from mixed types of TLS records.
+
 Integrating in to userspace TLS library
 ---------------------------------------
 
@@ -126,10 +188,10 @@ layer of a userspace TLS library.
 
 A patchset to OpenSSL to use ktls as the record layer is here:
 
-https://github.com/Mellanox/tls-openssl
+https://github.com/Mellanox/openssl/commits/tls_rx2
 
 An example of calling send directly after a handshake using
 gnutls.  Since it doesn't implement a full record layer, control
 messages are not supported:
 
-https://github.com/Mellanox/tls-af_ktls_tool
+https://github.com/ktls/af_ktls-tool/commits/RX

From abe0884011f1a569bc1254b70c41525b755e8037 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 23 Mar 2018 11:41:28 +0100
Subject: [PATCH 1125/1678] bpf: Remove struct bpf_verifier_env argument from
 print_bpf_insn

We use print_bpf_insn in user space (bpftool and soon perf),
so it'd be nice to keep it generic and strip it off the kernel
struct bpf_verifier_env argument.

This argument can be safely removed, because its users can
use the struct bpf_insn_cbs::private_data to pass it.

By changing the argument type  we can no longer have clean
'verbose' alias to 'bpf_verifier_log_write' in verifier.c.
Instead  we're adding the  'verbose' cb_print callback and
removing the alias.

This way we have new cb_print callback in place, and all
the 'verbose(env, ...) calls in verifier.c will cleanly
cast to 'verbose(void *, ...)' so no other change is
needed.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/disasm.c   | 52 +++++++++++++++++++++----------------------
 kernel/bpf/disasm.h   |  5 +----
 kernel/bpf/verifier.c | 44 ++++++++++++++++++++++--------------
 3 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 8740406df2cd..d6b76377cb6e 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -113,16 +113,16 @@ static const char *const bpf_jmp_string[16] = {
 };
 
 static void print_bpf_end_insn(bpf_insn_print_t verbose,
-			       struct bpf_verifier_env *env,
+			       void *private_data,
 			       const struct bpf_insn *insn)
 {
-	verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg,
+	verbose(private_data, "(%02x) r%d = %s%d r%d\n",
+		insn->code, insn->dst_reg,
 		BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
 		insn->imm, insn->dst_reg);
 }
 
 void print_bpf_insn(const struct bpf_insn_cbs *cbs,
-		    struct bpf_verifier_env *env,
 		    const struct bpf_insn *insn,
 		    bool allow_ptr_leaks)
 {
@@ -132,23 +132,23 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 	if (class == BPF_ALU || class == BPF_ALU64) {
 		if (BPF_OP(insn->code) == BPF_END) {
 			if (class == BPF_ALU64)
-				verbose(env, "BUG_alu64_%02x\n", insn->code);
+				verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code);
 			else
-				print_bpf_end_insn(verbose, env, insn);
+				print_bpf_end_insn(verbose, cbs->private_data, insn);
 		} else if (BPF_OP(insn->code) == BPF_NEG) {
-			verbose(env, "(%02x) r%d = %s-r%d\n",
+			verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n",
 				insn->code, insn->dst_reg,
 				class == BPF_ALU ? "(u32) " : "",
 				insn->dst_reg);
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			verbose(env, "(%02x) %sr%d %s %sr%d\n",
+			verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n",
 				insn->code, class == BPF_ALU ? "(u32) " : "",
 				insn->dst_reg,
 				bpf_alu_string[BPF_OP(insn->code) >> 4],
 				class == BPF_ALU ? "(u32) " : "",
 				insn->src_reg);
 		} else {
-			verbose(env, "(%02x) %sr%d %s %s%d\n",
+			verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n",
 				insn->code, class == BPF_ALU ? "(u32) " : "",
 				insn->dst_reg,
 				bpf_alu_string[BPF_OP(insn->code) >> 4],
@@ -157,46 +157,46 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 		}
 	} else if (class == BPF_STX) {
 		if (BPF_MODE(insn->code) == BPF_MEM)
-			verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n",
+			verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg,
 				insn->off, insn->src_reg);
 		else if (BPF_MODE(insn->code) == BPF_XADD)
-			verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+			verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->dst_reg, insn->off,
 				insn->src_reg);
 		else
-			verbose(env, "BUG_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_%02x\n", insn->code);
 	} else if (class == BPF_ST) {
 		if (BPF_MODE(insn->code) != BPF_MEM) {
-			verbose(env, "BUG_st_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
 			return;
 		}
-		verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n",
+		verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
 			insn->code,
 			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 			insn->dst_reg,
 			insn->off, insn->imm);
 	} else if (class == BPF_LDX) {
 		if (BPF_MODE(insn->code) != BPF_MEM) {
-			verbose(env, "BUG_ldx_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
 			return;
 		}
-		verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n",
+		verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n",
 			insn->code, insn->dst_reg,
 			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 			insn->src_reg, insn->off);
 	} else if (class == BPF_LD) {
 		if (BPF_MODE(insn->code) == BPF_ABS) {
-			verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n",
+			verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->imm);
 		} else if (BPF_MODE(insn->code) == BPF_IND) {
-			verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
+			verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
 				insn->code,
 				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
 				insn->src_reg, insn->imm);
@@ -212,12 +212,12 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			if (map_ptr && !allow_ptr_leaks)
 				imm = 0;
 
-			verbose(env, "(%02x) r%d = %s\n",
+			verbose(cbs->private_data, "(%02x) r%d = %s\n",
 				insn->code, insn->dst_reg,
 				__func_imm_name(cbs, insn, imm,
 						tmp, sizeof(tmp)));
 		} else {
-			verbose(env, "BUG_ld_%02x\n", insn->code);
+			verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
 			return;
 		}
 	} else if (class == BPF_JMP) {
@@ -227,35 +227,35 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
 			char tmp[64];
 
 			if (insn->src_reg == BPF_PSEUDO_CALL) {
-				verbose(env, "(%02x) call pc%s\n",
+				verbose(cbs->private_data, "(%02x) call pc%s\n",
 					insn->code,
 					__func_get_name(cbs, insn,
 							tmp, sizeof(tmp)));
 			} else {
 				strcpy(tmp, "unknown");
-				verbose(env, "(%02x) call %s#%d\n", insn->code,
+				verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code,
 					__func_get_name(cbs, insn,
 							tmp, sizeof(tmp)),
 					insn->imm);
 			}
 		} else if (insn->code == (BPF_JMP | BPF_JA)) {
-			verbose(env, "(%02x) goto pc%+d\n",
+			verbose(cbs->private_data, "(%02x) goto pc%+d\n",
 				insn->code, insn->off);
 		} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
-			verbose(env, "(%02x) exit\n", insn->code);
+			verbose(cbs->private_data, "(%02x) exit\n", insn->code);
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n",
+			verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n",
 				insn->code, insn->dst_reg,
 				bpf_jmp_string[BPF_OP(insn->code) >> 4],
 				insn->src_reg, insn->off);
 		} else {
-			verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n",
+			verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n",
 				insn->code, insn->dst_reg,
 				bpf_jmp_string[BPF_OP(insn->code) >> 4],
 				insn->imm, insn->off);
 		}
 	} else {
-		verbose(env, "(%02x) %s\n",
+		verbose(cbs->private_data, "(%02x) %s\n",
 			insn->code, bpf_class_string[class]);
 	}
 }
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index 266fe8ee542b..e1324a834a24 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -22,14 +22,12 @@
 #include <string.h>
 #endif
 
-struct bpf_verifier_env;
-
 extern const char *const bpf_alu_string[16];
 extern const char *const bpf_class_string[8];
 
 const char *func_id_name(int id);
 
-typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
+typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data,
 						const char *, ...);
 typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
 					      const struct bpf_insn *insn);
@@ -45,7 +43,6 @@ struct bpf_insn_cbs {
 };
 
 void print_bpf_insn(const struct bpf_insn_cbs *cbs,
-		    struct bpf_verifier_env *env,
 		    const struct bpf_insn *insn,
 		    bool allow_ptr_leaks);
 #endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e9f7c20691c1..e93a6e48641b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -168,23 +168,16 @@ struct bpf_call_arg_meta {
 
 static DEFINE_MUTEX(bpf_verifier_lock);
 
-/* log_level controls verbosity level of eBPF verifier.
- * bpf_verifier_log_write() is used to dump the verification trace to the log,
- * so the user can figure out what's wrong with the program
- */
-__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
-					   const char *fmt, ...)
+static void log_write(struct bpf_verifier_env *env, const char *fmt,
+		      va_list args)
 {
 	struct bpf_verifer_log *log = &env->log;
 	unsigned int n;
-	va_list args;
 
 	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
 		return;
 
-	va_start(args, fmt);
 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
-	va_end(args);
 
 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 		  "verifier log line truncated - local buffer too short\n");
@@ -197,14 +190,30 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 	else
 		log->ubuf = NULL;
 }
-EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
-/* Historically bpf_verifier_log_write was called verbose, but the name was too
- * generic for symbol export. The function was renamed, but not the calls in
- * the verifier to avoid complicating backports. Hence the alias below.
+
+/* log_level controls verbosity level of eBPF verifier.
+ * bpf_verifier_log_write() is used to dump the verification trace to the log,
+ * so the user can figure out what's wrong with the program
  */
-static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
-				   const char *fmt, ...)
-	__attribute__((alias("bpf_verifier_log_write")));
+__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+					   const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	log_write(env, fmt, args);
+	va_end(args);
+}
+EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
+
+__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	log_write(private_data, fmt, args);
+	va_end(args);
+}
 
 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 {
@@ -4600,10 +4609,11 @@ static int do_check(struct bpf_verifier_env *env)
 		if (env->log.level) {
 			const struct bpf_insn_cbs cbs = {
 				.cb_print	= verbose,
+				.private_data	= env,
 			};
 
 			verbose(env, "%d: ", insn_idx);
-			print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
+			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 		}
 
 		if (bpf_prog_is_dev_bound(env->prog->aux)) {

From 337682ca7e3cdce81fe3436bf59782d071c967e6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 23 Mar 2018 11:41:29 +0100
Subject: [PATCH 1126/1678] bpftool: Adjust to new print_bpf_insn interface

Change bpftool to skip the removed struct bpf_verifier_env
argument in print_bpf_insn. It was passed as NULL anyway.

No functional change intended.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/xlated_dumper.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 20da835e9e38..7a3173b76c16 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -114,7 +114,7 @@ static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
 		       sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
 }
 
-static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+static void print_insn(void *private_data, const char *fmt, ...)
 {
 	va_list args;
 
@@ -124,7 +124,7 @@ static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
 }
 
 static void
-print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
+print_insn_for_graph(void *private_data, const char *fmt, ...)
 {
 	char buf[64], *p;
 	va_list args;
@@ -154,7 +154,7 @@ print_insn_for_graph(struct bpf_verifier_env *env, const char *fmt, ...)
 	printf("%s", buf);
 }
 
-static void print_insn_json(struct bpf_verifier_env *env, const char *fmt, ...)
+static void print_insn_json(void *private_data, const char *fmt, ...)
 {
 	unsigned int l = strlen(fmt);
 	char chomped_fmt[l];
@@ -248,7 +248,7 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
 
 		jsonw_start_object(json_wtr);
 		jsonw_name(json_wtr, "disasm");
-		print_bpf_insn(&cbs, NULL, insn + i, true);
+		print_bpf_insn(&cbs, insn + i, true);
 
 		if (opcodes) {
 			jsonw_name(json_wtr, "opcodes");
@@ -302,7 +302,7 @@ void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
 		double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
 
 		printf("% 4d: ", i);
-		print_bpf_insn(&cbs, NULL, insn + i, true);
+		print_bpf_insn(&cbs, insn + i, true);
 
 		if (opcodes) {
 			printf("       ");
@@ -331,7 +331,7 @@ void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
 
 	for (; cur <= insn_end; cur++) {
 		printf("% 4d: ", (int)(cur - insn_start + start_idx));
-		print_bpf_insn(&cbs, NULL, cur, true);
+		print_bpf_insn(&cbs, cur, true);
 		if (cur != insn_end)
 			printf(" | ");
 	}

From 82792a070b168a9659c78e6a19efe5b6d1a7ce73 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 23 Mar 2018 18:27:06 +0200
Subject: [PATCH 1127/1678] net: bridge: fix direct access to bridge
 vlan_enabled and use helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to use br_vlan_enabled() helper otherwise we'll break builds
without bridge vlans:
net/bridge//br_if.c: In function ‘br_mtu’:
net/bridge//br_if.c:458:8: error: ‘const struct net_bridge’ has no
member named ‘vlan_enabled’
  if (br->vlan_enabled)
        ^
net/bridge//br_if.c:462:1: warning: control reaches end of non-void
function [-Wreturn-type]
 }
 ^
scripts/Makefile.build:324: recipe for target 'net/bridge//br_if.o'
failed

Fixes: 419d14af9e07 ("bridge: Allow max MTU when multiple VLANs present")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 48dc4d2e2be3..87b2afd455c7 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -455,7 +455,7 @@ static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int))
 
 int br_mtu(const struct net_bridge *br)
 {
-	if (br->vlan_enabled)
+	if (br_vlan_enabled(br->dev))
 		return __br_mtu(br, max_mtu);
 	else
 		return __br_mtu(br, min_mtu);

From 440ea4ae182820cfced77cee2b7f3e6eaa8ac5d3 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 22 Mar 2018 19:12:19 +0100
Subject: [PATCH 1128/1678] tc-testing: add selftests for 'bpf' action

Test d959: Add cBPF action with valid bytecode
Test f84a: Add cBPF action with invalid bytecode
Test e939: Add eBPF action with valid object-file
Test 282d: Add eBPF action with invalid object-file
Test d819: Replace cBPF bytecode and action control
Test 6ae3: Delete cBPF action
Test 3e0d: List cBPF actions
Test 55ce: Flush BPF actions
Test ccc3: Add cBPF action with duplicate index
Test 89c7: Add cBPF action with invalid index
Test 7ab9: Add cBPF action with cookie

Changes since v1:
 - use index=2^32-1 in test ccc3, add tests 7a89, 89c7 (thanks Roman Mashak)
 - added test 282d

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/bpf.json      | 289 ++++++++++++++++++
 1 file changed, 289 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
new file mode 100644
index 000000000000..5b012f4981d4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -0,0 +1,289 @@
+[
+    {
+        "id": "d959",
+        "name": "Add cBPF action with valid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "f84a",
+        "name": "Add cBPF action with invalid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "e939",
+        "name": "Add eBPF action with valid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf",
+            "rm -f _b.o"
+        ]
+    },
+    {
+        "id": "282d",
+        "name": "Add eBPF action with invalid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf",
+            "rm -f _c.o"
+        ]
+    },
+    {
+        "id": "d819",
+        "name": "Replace cBPF bytecode and action control",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 555",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "6ae3",
+        "name": "Delete cBPF action ",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action bpf index 444",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 444",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "3e0d",
+        "name": "List cBPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action list action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "55ce",
+        "name": "Flush BPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action flush action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "ccc3",
+        "name": "Add cBPF action with duplicate index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295"
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 4294967295",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "89c7",
+        "name": "Add cBPF action with invalid index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "7ab9",
+        "name": "Add cBPF action with cookie",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    }
+]

From 885b0d4375a102d002bc654b4b68da46ba060032 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Thu, 22 Mar 2018 20:14:47 +0200
Subject: [PATCH 1129/1678] mlxsw: spectrum_span: Fix initialization of struct
 mlxsw_sp_span_parms

Since the first element of struct mlxsw_sp_span_parms is a pointer,
to zero-initialize this structure the correct notation is not = {0}, but
rather = {NULL}, as reported by sparse.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index ae22a3daffbf..ac24e52d74db 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -730,7 +730,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 {
 	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
 	const struct mlxsw_sp_span_entry_ops *ops;
-	struct mlxsw_sp_span_parms sparms = {0};
+	struct mlxsw_sp_span_parms sparms = {NULL};
 	struct mlxsw_sp_span_entry *span_entry;
 	int err;
 
@@ -787,7 +787,7 @@ void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
 	ASSERT_RTNL();
 	for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
-		struct mlxsw_sp_span_parms sparms = {0};
+		struct mlxsw_sp_span_parms sparms = {NULL};
 
 		if (!curr->ref_count)
 			continue;

From fc18999ed2a217f249e9880363b28a89c839737e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 21:34:46 +0300
Subject: [PATCH 1130/1678] net: Convert udp_sysctl_ops

These pernet_operations just initialize udp4 defaults.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 908fc02fb4f8..c6dc019bc64b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2842,7 +2842,8 @@ static int __net_init udp_sysctl_init(struct net *net)
 }
 
 static struct pernet_operations __net_initdata udp_sysctl_ops = {
-	.init       = udp_sysctl_init,
+	.init	= udp_sysctl_init,
+	.async	= true,
 };
 
 void __init udp_init(void)

From b2864fbdc5ab6bb4750b00580b67070bd0ab3762 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 22 Mar 2018 21:34:55 +0300
Subject: [PATCH 1131/1678] net: Convert rxrpc_net_ops

These pernet_operations modifies rxrpc_net_id-pointed
per-net entities. There is external link to AF_RXRPC
in fs/afs/Kconfig, but it seems there is no other
pernet_operations interested in that per-net entities.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/net_ns.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index f18c9248e0d4..5fd939dabf41 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -106,4 +106,5 @@ struct pernet_operations rxrpc_net_ops = {
 	.exit	= rxrpc_exit_net,
 	.id	= &rxrpc_net_id,
 	.size	= sizeof(struct rxrpc_net),
+	.async	= true,
 };

From 594619497f3d6d4b8d8440e6d380e8da9dcc9eeb Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 22 Mar 2018 13:44:56 -0500
Subject: [PATCH 1132/1678] net/mlx5: Fix use-after-free

_rule_ is being freed and then dereferenced by accessing rule->ctx

Fix this by copying the value returned by PTR_ERR(rule->ctx) into a local
variable for its safe use after freeing _rule_

Addresses-Coverity-ID: 1466041 ("Read from pointer after free")
Fixes: 05564d0ae075 ("net/mlx5: Add flow-steering commands for FPGA IPSec implementation")
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 4f1568528738..0f5da499a223 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -1061,8 +1061,9 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_core_dev *dev,
 
 	rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
 	if (IS_ERR(rule->ctx)) {
+		int err = PTR_ERR(rule->ctx);
 		kfree(rule);
-		return PTR_ERR(rule->ctx);
+		return err;
 	}
 
 	rule->fte = fte;

From cb30a63384bc91d5da06e1cede1115f666a29271 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:45 +0100
Subject: [PATCH 1133/1678] tipc: refactor function tipc_enable_bearer()

As a preparation for the next commits we try to reduce the footprint of
the function tipc_enable_bearer(), while hopefully making is simpler to
follow.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 136 ++++++++++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 66 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index f3d2e83313e1..e18cb271b005 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -230,88 +230,90 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  * tipc_enable_bearer - enable bearer with the given name
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
-			      u32 disc_domain, u32 priority,
+			      u32 disc_domain, u32 prio,
 			      struct nlattr *attr[])
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bearer_names b_names;
+	u32 self = tipc_own_addr(net);
+	int with_this_prio = 1;
 	struct tipc_bearer *b;
 	struct tipc_media *m;
-	struct tipc_bearer_names b_names;
 	struct sk_buff *skb;
 	char addr_string[16];
-	u32 bearer_id;
-	u32 with_this_prio;
-	u32 i;
+	int bearer_id = 0;
 	int res = -EINVAL;
+	char *errstr = "";
 
-	if (!tn->own_addr) {
-		pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
-			name);
-		return -ENOPROTOOPT;
+	if (!self) {
+		errstr = "not supported in standalone mode";
+		res = -ENOPROTOOPT;
+		goto rejected;
 	}
+
 	if (!bearer_name_validate(name, &b_names)) {
-		pr_warn("Bearer <%s> rejected, illegal name\n", name);
-		return -EINVAL;
+		errstr = "illegal name";
+		goto rejected;
 	}
-	if (tipc_addr_domain_valid(disc_domain) &&
-	    (disc_domain != tn->own_addr)) {
-		if (tipc_in_scope(disc_domain, tn->own_addr)) {
-			disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
-			res = 0;   /* accept any node in own cluster */
-		} else if (in_own_cluster_exact(net, disc_domain))
-			res = 0;   /* accept specified node in own cluster */
+
+	if (tipc_addr_domain_valid(disc_domain) && disc_domain != self) {
+		if (tipc_in_scope(disc_domain, self)) {
+			/* Accept any node in own cluster */
+			disc_domain = self & TIPC_ZONE_CLUSTER_MASK;
+			res = 0;
+		} else if (in_own_cluster_exact(net, disc_domain)) {
+			/* Accept specified node in own cluster */
+			res = 0;
+		}
 	}
 	if (res) {
-		pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
-			name);
-		return -EINVAL;
+		errstr = "illegal discovery domain";
+		goto rejected;
 	}
-	if ((priority > TIPC_MAX_LINK_PRI) &&
-	    (priority != TIPC_MEDIA_LINK_PRI)) {
-		pr_warn("Bearer <%s> rejected, illegal priority\n", name);
-		return -EINVAL;
+
+	if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
+		errstr = "illegal priority";
+		goto rejected;
 	}
 
 	m = tipc_media_find(b_names.media_name);
 	if (!m) {
-		pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
-			name, b_names.media_name);
-		return -EINVAL;
+		errstr = "media not registered";
+		goto rejected;
 	}
 
-	if (priority == TIPC_MEDIA_LINK_PRI)
-		priority = m->priority;
+	if (prio == TIPC_MEDIA_LINK_PRI)
+		prio = m->priority;
 
-restart:
-	bearer_id = MAX_BEARERS;
-	with_this_prio = 1;
-	for (i = MAX_BEARERS; i-- != 0; ) {
-		b = rtnl_dereference(tn->bearer_list[i]);
-		if (!b) {
-			bearer_id = i;
-			continue;
-		}
+	/* Check new bearer vs existing ones and find free bearer id if any */
+	while (bearer_id < MAX_BEARERS) {
+		b = rtnl_dereference(tn->bearer_list[bearer_id]);
+		if (!b)
+			break;
 		if (!strcmp(name, b->name)) {
-			pr_warn("Bearer <%s> rejected, already enabled\n",
-				name);
-			return -EINVAL;
+			errstr = "already enabled";
+			goto rejected;
 		}
-		if ((b->priority == priority) &&
-		    (++with_this_prio > 2)) {
-			if (priority-- == 0) {
-				pr_warn("Bearer <%s> rejected, duplicate priority\n",
-					name);
-				return -EINVAL;
-			}
-			pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
-				name, priority + 1, priority);
-			goto restart;
+		bearer_id++;
+		if (b->priority != prio)
+			continue;
+		if (++with_this_prio <= 2)
+			continue;
+		pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+			name, prio);
+		if (prio == TIPC_MIN_LINK_PRI) {
+			errstr = "cannot adjust to lower";
+			goto rejected;
 		}
+		pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
+		prio--;
+		bearer_id = 0;
+		with_this_prio = 1;
 	}
+
 	if (bearer_id >= MAX_BEARERS) {
-		pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
-			name, MAX_BEARERS);
-		return -EINVAL;
+		errstr = "max 3 bearers permitted";
+		goto rejected;
 	}
 
 	b = kzalloc(sizeof(*b), GFP_ATOMIC);
@@ -322,10 +324,9 @@ restart:
 	b->media = m;
 	res = m->enable_media(net, b, attr);
 	if (res) {
-		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
-			name, -res);
 		kfree(b);
-		return -EINVAL;
+		errstr = "failed to enable media";
+		goto rejected;
 	}
 
 	b->identity = bearer_id;
@@ -333,15 +334,15 @@ restart:
 	b->window = m->window;
 	b->domain = disc_domain;
 	b->net_plane = bearer_id + 'A';
-	b->priority = priority;
+	b->priority = prio;
 	test_and_set_bit_lock(0, &b->up);
 
 	res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
 	if (res) {
 		bearer_disable(net, b);
-		pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
-			name);
-		return -EINVAL;
+		kfree(b);
+		errstr = "failed to create discoverer";
+		goto rejected;
 	}
 
 	rcu_assign_pointer(tn->bearer_list[bearer_id], b);
@@ -353,9 +354,12 @@ restart:
 		return -ENOMEM;
 	}
 
-	pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
-		name,
-		tipc_addr_string_fill(addr_string, disc_domain), priority);
+	tipc_addr_string_fill(addr_string, disc_domain);
+	pr_info("Enabled bearer <%s>, discovery scope %s, priority %u\n",
+		name, addr_string, prio);
+	return res;
+rejected:
+	pr_warn("Bearer <%s> rejected, %s\n", name, errstr);
 	return res;
 }
 

From b39e465e56ec38ca64b4c0affeb6411eb0ed7267 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:46 +0100
Subject: [PATCH 1134/1678] tipc: some cleanups in the file discover.c

To facilitate the coming changes in the neighbor discovery functionality
we make some renaming and refactoring of that code. The functional changes
in this commit are trivial, e.g., that we move the message sending call in
tipc_disc_timeout() outside the spinlock protected region.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c   |   8 +-
 net/tipc/bearer.h   |   2 +-
 net/tipc/discover.c | 321 +++++++++++++++++++++-----------------------
 net/tipc/discover.h |   8 +-
 4 files changed, 164 insertions(+), 175 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index e18cb271b005..76340b9e4851 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -210,7 +210,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (b)
-		tipc_disc_add_dest(b->link_req);
+		tipc_disc_add_dest(b->disc);
 	rcu_read_unlock();
 }
 
@@ -222,7 +222,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
 	if (b)
-		tipc_disc_remove_dest(b->link_req);
+		tipc_disc_remove_dest(b->disc);
 	rcu_read_unlock();
 }
 
@@ -389,8 +389,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 	tipc_node_delete_links(net, bearer_id);
 	b->media->disable_media(b);
 	RCU_INIT_POINTER(b->media_ptr, NULL);
-	if (b->link_req)
-		tipc_disc_delete(b->link_req);
+	if (b->disc)
+		tipc_disc_delete(b->disc);
 	RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL);
 	kfree_rcu(b, rcu);
 	tipc_mon_delete(net, bearer_id);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index a53613d95bc9..6efcee63a381 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -159,7 +159,7 @@ struct tipc_bearer {
 	u32 tolerance;
 	u32 domain;
 	u32 identity;
-	struct tipc_link_req *link_req;
+	struct tipc_discoverer *disc;
 	char net_plane;
 	unsigned long up;
 };
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 92e4828c6b09..09f75558d353 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -39,34 +39,34 @@
 #include "discover.h"
 
 /* min delay during bearer start up */
-#define TIPC_LINK_REQ_INIT	msecs_to_jiffies(125)
+#define TIPC_DISC_INIT	msecs_to_jiffies(125)
 /* max delay if bearer has no links */
-#define TIPC_LINK_REQ_FAST	msecs_to_jiffies(1000)
+#define TIPC_DISC_FAST	msecs_to_jiffies(1000)
 /* max delay if bearer has links */
-#define TIPC_LINK_REQ_SLOW	msecs_to_jiffies(60000)
+#define TIPC_DISC_SLOW	msecs_to_jiffies(60000)
 /* indicates no timer in use */
-#define TIPC_LINK_REQ_INACTIVE	0xffffffff
+#define TIPC_DISC_INACTIVE	0xffffffff
 
 /**
- * struct tipc_link_req - information about an ongoing link setup request
+ * struct tipc_discoverer - information about an ongoing link setup request
  * @bearer_id: identity of bearer issuing requests
  * @net: network namespace instance
  * @dest: destination address for request messages
  * @domain: network domain to which links can be established
  * @num_nodes: number of nodes currently discovered (i.e. with an active link)
  * @lock: spinlock for controlling access to requests
- * @buf: request message to be (repeatedly) sent
+ * @skb: request message to be (repeatedly) sent
  * @timer: timer governing period between requests
  * @timer_intv: current interval between requests (in ms)
  */
-struct tipc_link_req {
+struct tipc_discoverer {
 	u32 bearer_id;
 	struct tipc_media_addr dest;
 	struct net *net;
 	u32 domain;
 	int num_nodes;
 	spinlock_t lock;
-	struct sk_buff *buf;
+	struct sk_buff *skb;
 	struct timer_list timer;
 	unsigned long timer_intv;
 };
@@ -77,22 +77,35 @@ struct tipc_link_req {
  * @type: message type (request or response)
  * @b: ptr to bearer issuing message
  */
-static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
+static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
+			       u32 mtyp, struct tipc_bearer *b)
+{
+	struct tipc_net *tn = tipc_net(net);
+	u32 dest_domain = b->domain;
+	struct tipc_msg *hdr;
+
+	hdr = buf_msg(skb);
+	tipc_msg_init(tn->own_addr, hdr, LINK_CONFIG, mtyp,
+		      MAX_H_SIZE, dest_domain);
+	msg_set_non_seq(hdr, 1);
+	msg_set_node_sig(hdr, tn->random);
+	msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
+	msg_set_dest_domain(hdr, dest_domain);
+	msg_set_bc_netid(hdr, tn->net_id);
+	b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+}
+
+static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst, u32 src,
+			       struct tipc_media_addr *maddr,
 			       struct tipc_bearer *b)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_msg *msg;
-	u32 dest_domain = b->domain;
+	struct sk_buff *skb;
 
-	msg = buf_msg(buf);
-	tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type,
-		      MAX_H_SIZE, dest_domain);
-	msg_set_non_seq(msg, 1);
-	msg_set_node_sig(msg, tn->random);
-	msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
-	msg_set_dest_domain(msg, dest_domain);
-	msg_set_bc_netid(msg, tn->net_id);
-	b->media->addr2msg(msg_media_addr(msg), &b->addr);
+	skb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return;
+	tipc_disc_init_msg(net, skb, mtyp, b);
+	tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
 }
 
 /**
@@ -116,149 +129,123 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
 
 /**
  * tipc_disc_rcv - handle incoming discovery message (request or response)
- * @net: the applicable net namespace
- * @buf: buffer containing message
- * @bearer: bearer that message arrived on
+ * @net: applicable net namespace
+ * @skb: buffer containing message
+ * @b: bearer that message arrived on
  */
 void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
-		   struct tipc_bearer *bearer)
+		   struct tipc_bearer *b)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_media_addr maddr;
-	struct sk_buff *rskb;
+	struct tipc_net *tn = tipc_net(net);
 	struct tipc_msg *hdr = buf_msg(skb);
-	u32 ddom = msg_dest_domain(hdr);
-	u32 onode = msg_prevnode(hdr);
-	u32 net_id = msg_bc_netid(hdr);
-	u32 mtyp = msg_type(hdr);
-	u32 signature = msg_node_sig(hdr);
 	u16 caps = msg_node_capabilities(hdr);
-	bool respond = false;
+	u32 signature = msg_node_sig(hdr);
+	u32 dst = msg_dest_domain(hdr);
+	u32 net_id = msg_bc_netid(hdr);
+	u32 self = tipc_own_addr(net);
+	struct tipc_media_addr maddr;
+	u32 src = msg_prevnode(hdr);
+	u32 mtyp = msg_type(hdr);
 	bool dupl_addr = false;
+	bool respond = false;
 	int err;
 
-	err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
+	err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
 	kfree_skb(skb);
-	if (err)
+	if (err || maddr.broadcast) {
+		pr_warn_ratelimited("Rcv corrupt discovery message\n");
+		return;
+	}
+	/* Ignore discovery messages from own node */
+	if (!memcmp(&maddr, &b->addr, sizeof(maddr)))
 		return;
-
-	/* Ensure message from node is valid and communication is permitted */
 	if (net_id != tn->net_id)
 		return;
-	if (maddr.broadcast)
+	if (!tipc_addr_domain_valid(dst))
 		return;
-	if (!tipc_addr_domain_valid(ddom))
+	if (!tipc_addr_node_valid(src))
 		return;
-	if (!tipc_addr_node_valid(onode))
-		return;
-
-	if (in_own_node(net, onode)) {
-		if (memcmp(&maddr, &bearer->addr, sizeof(maddr)))
-			disc_dupl_alert(bearer, tn->own_addr, &maddr);
+	if (in_own_node(net, src)) {
+		disc_dupl_alert(b, self, &maddr);
 		return;
 	}
-	if (!tipc_in_scope(ddom, tn->own_addr))
+	if (!tipc_in_scope(dst, self))
 		return;
-	if (!tipc_in_scope(bearer->domain, onode))
+	if (!tipc_in_scope(b->domain, src))
 		return;
-
-	tipc_node_check_dest(net, onode, bearer, caps, signature,
+	tipc_node_check_dest(net, src, b, caps, signature,
 			     &maddr, &respond, &dupl_addr);
 	if (dupl_addr)
-		disc_dupl_alert(bearer, onode, &maddr);
+		disc_dupl_alert(b, src, &maddr);
+	if (!respond)
+		return;
+	if (mtyp != DSC_REQ_MSG)
+		return;
+	tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, &maddr, b);
+}
 
-	/* Send response, if necessary */
-	if (respond && (mtyp == DSC_REQ_MSG)) {
-		rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
-		if (!rskb)
-			return;
-		tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
-		tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
+/* tipc_disc_add_dest - increment set of discovered nodes
+ */
+void tipc_disc_add_dest(struct tipc_discoverer *d)
+{
+	spin_lock_bh(&d->lock);
+	d->num_nodes++;
+	spin_unlock_bh(&d->lock);
+}
+
+/* tipc_disc_remove_dest - decrement set of discovered nodes
+ */
+void tipc_disc_remove_dest(struct tipc_discoverer *d)
+{
+	int intv, num;
+
+	spin_lock_bh(&d->lock);
+	d->num_nodes--;
+	num = d->num_nodes;
+	intv = d->timer_intv;
+	if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST))  {
+		d->timer_intv = TIPC_DISC_INIT;
+		mod_timer(&d->timer, jiffies + d->timer_intv);
 	}
+	spin_unlock_bh(&d->lock);
 }
 
-/**
- * disc_update - update frequency of periodic link setup requests
- * @req: ptr to link request structure
- *
- * Reinitiates discovery process if discovery object has no associated nodes
- * and is either not currently searching or is searching at a slow rate
- */
-static void disc_update(struct tipc_link_req *req)
-{
-	if (!req->num_nodes) {
-		if ((req->timer_intv == TIPC_LINK_REQ_INACTIVE) ||
-		    (req->timer_intv > TIPC_LINK_REQ_FAST)) {
-			req->timer_intv = TIPC_LINK_REQ_INIT;
-			mod_timer(&req->timer, jiffies + req->timer_intv);
-		}
-	}
-}
-
-/**
- * tipc_disc_add_dest - increment set of discovered nodes
- * @req: ptr to link request structure
- */
-void tipc_disc_add_dest(struct tipc_link_req *req)
-{
-	spin_lock_bh(&req->lock);
-	req->num_nodes++;
-	spin_unlock_bh(&req->lock);
-}
-
-/**
- * tipc_disc_remove_dest - decrement set of discovered nodes
- * @req: ptr to link request structure
- */
-void tipc_disc_remove_dest(struct tipc_link_req *req)
-{
-	spin_lock_bh(&req->lock);
-	req->num_nodes--;
-	disc_update(req);
-	spin_unlock_bh(&req->lock);
-}
-
-/**
- * disc_timeout - send a periodic link setup request
- * @data: ptr to link request structure
- *
+/* tipc_disc_timeout - send a periodic link setup request
  * Called whenever a link setup request timer associated with a bearer expires.
+ * - Keep doubling time between sent request until limit is reached;
+ * - Hold at fast polling rate if we don't have any associated nodes
+ * - Otherwise hold at slow polling rate
  */
-static void disc_timeout(struct timer_list *t)
+static void tipc_disc_timeout(struct timer_list *t)
 {
-	struct tipc_link_req *req = from_timer(req, t, timer);
-	struct sk_buff *skb;
-	int max_delay;
+	struct tipc_discoverer *d = from_timer(d, t, timer);
+	struct tipc_media_addr maddr;
+	struct sk_buff *skb = NULL;
+	struct net *net;
+	u32 bearer_id;
 
-	spin_lock_bh(&req->lock);
+	spin_lock_bh(&d->lock);
 
 	/* Stop searching if only desired node has been found */
-	if (tipc_node(req->domain) && req->num_nodes) {
-		req->timer_intv = TIPC_LINK_REQ_INACTIVE;
+	if (tipc_node(d->domain) && d->num_nodes) {
+		d->timer_intv = TIPC_DISC_INACTIVE;
 		goto exit;
 	}
-
-	/*
-	 * Send discovery message, then update discovery timer
-	 *
-	 * Keep doubling time between requests until limit is reached;
-	 * hold at fast polling rate if don't have any associated nodes,
-	 * otherwise hold at slow polling rate
-	 */
-	skb = skb_clone(req->buf, GFP_ATOMIC);
-	if (skb)
-		tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
-	req->timer_intv *= 2;
-	if (req->num_nodes)
-		max_delay = TIPC_LINK_REQ_SLOW;
-	else
-		max_delay = TIPC_LINK_REQ_FAST;
-	if (req->timer_intv > max_delay)
-		req->timer_intv = max_delay;
-
-	mod_timer(&req->timer, jiffies + req->timer_intv);
+	/* Adjust timeout interval according to discovery phase */
+	d->timer_intv *= 2;
+	if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
+		d->timer_intv = TIPC_DISC_SLOW;
+	else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
+		d->timer_intv = TIPC_DISC_FAST;
+	mod_timer(&d->timer, jiffies + d->timer_intv);
+	memcpy(&maddr, &d->dest, sizeof(maddr));
+	skb = skb_clone(d->skb, GFP_ATOMIC);
+	net = d->net;
+	bearer_id = d->bearer_id;
 exit:
-	spin_unlock_bh(&req->lock);
+	spin_unlock_bh(&d->lock);
+	if (skb)
+		tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr);
 }
 
 /**
@@ -273,41 +260,41 @@ exit:
 int tipc_disc_create(struct net *net, struct tipc_bearer *b,
 		     struct tipc_media_addr *dest, struct sk_buff **skb)
 {
-	struct tipc_link_req *req;
+	struct tipc_discoverer *d;
 
-	req = kmalloc(sizeof(*req), GFP_ATOMIC);
-	if (!req)
+	d = kmalloc(sizeof(*d), GFP_ATOMIC);
+	if (!d)
 		return -ENOMEM;
-	req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
-	if (!req->buf) {
-		kfree(req);
+	d->skb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
+	if (!d->skb) {
+		kfree(d);
 		return -ENOMEM;
 	}
 
-	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
-	memcpy(&req->dest, dest, sizeof(*dest));
-	req->net = net;
-	req->bearer_id = b->identity;
-	req->domain = b->domain;
-	req->num_nodes = 0;
-	req->timer_intv = TIPC_LINK_REQ_INIT;
-	spin_lock_init(&req->lock);
-	timer_setup(&req->timer, disc_timeout, 0);
-	mod_timer(&req->timer, jiffies + req->timer_intv);
-	b->link_req = req;
-	*skb = skb_clone(req->buf, GFP_ATOMIC);
+	tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+	memcpy(&d->dest, dest, sizeof(*dest));
+	d->net = net;
+	d->bearer_id = b->identity;
+	d->domain = b->domain;
+	d->num_nodes = 0;
+	d->timer_intv = TIPC_DISC_INIT;
+	spin_lock_init(&d->lock);
+	timer_setup(&d->timer, tipc_disc_timeout, 0);
+	mod_timer(&d->timer, jiffies + d->timer_intv);
+	b->disc = d;
+	*skb = skb_clone(d->skb, GFP_ATOMIC);
 	return 0;
 }
 
 /**
  * tipc_disc_delete - destroy object sending periodic link setup requests
- * @req: ptr to link request structure
+ * @d: ptr to link duest structure
  */
-void tipc_disc_delete(struct tipc_link_req *req)
+void tipc_disc_delete(struct tipc_discoverer *d)
 {
-	del_timer_sync(&req->timer);
-	kfree_skb(req->buf);
-	kfree(req);
+	del_timer_sync(&d->timer);
+	kfree_skb(d->skb);
+	kfree(d);
 }
 
 /**
@@ -318,19 +305,21 @@ void tipc_disc_delete(struct tipc_link_req *req)
  */
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b)
 {
-	struct tipc_link_req *req = b->link_req;
+	struct tipc_discoverer *d = b->disc;
+	struct tipc_media_addr maddr;
 	struct sk_buff *skb;
 
-	spin_lock_bh(&req->lock);
-	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b);
-	req->net = net;
-	req->bearer_id = b->identity;
-	req->domain = b->domain;
-	req->num_nodes = 0;
-	req->timer_intv = TIPC_LINK_REQ_INIT;
-	mod_timer(&req->timer, jiffies + req->timer_intv);
-	skb = skb_clone(req->buf, GFP_ATOMIC);
+	spin_lock_bh(&d->lock);
+	tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+	d->net = net;
+	d->bearer_id = b->identity;
+	d->domain = b->domain;
+	d->num_nodes = 0;
+	d->timer_intv = TIPC_DISC_INIT;
+	memcpy(&maddr, &d->dest, sizeof(maddr));
+	mod_timer(&d->timer, jiffies + d->timer_intv);
+	skb = skb_clone(d->skb, GFP_ATOMIC);
+	spin_unlock_bh(&d->lock);
 	if (skb)
-		tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
-	spin_unlock_bh(&req->lock);
+		tipc_bearer_xmit_skb(net, b->identity, skb, &maddr);
 }
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index b80a335389c0..521d96c41dfd 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,14 +37,14 @@
 #ifndef _TIPC_DISCOVER_H
 #define _TIPC_DISCOVER_H
 
-struct tipc_link_req;
+struct tipc_discoverer;
 
 int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
 		     struct tipc_media_addr *dest, struct sk_buff **skb);
-void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_delete(struct tipc_discoverer *req);
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr);
-void tipc_disc_add_dest(struct tipc_link_req *req);
-void tipc_disc_remove_dest(struct tipc_link_req *req);
+void tipc_disc_add_dest(struct tipc_discoverer *req);
+void tipc_disc_remove_dest(struct tipc_discoverer *req);
 void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
 		   struct tipc_bearer *b_ptr);
 

From 2026364149db36c6a2c0c8cae8362fe9a7f954dd Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:47 +0100
Subject: [PATCH 1135/1678] tipc: remove restrictions on node address values

Nominally, TIPC organizes network nodes into a three-level network
hierarchy consisting of the levels 'zone', 'cluster' and 'node'. This
hierarchy is reflected in the node address format, - it is sub-divided
into an 8-bit zone id, and 12 bit cluster id, and a 12-bit node id.

However, the 'zone' and 'cluster' levels have in reality never been
fully implemented,and never will be. The result of this has been
that the first 20 bits the node identity structure have been wasted,
and the usable node identity range within a cluster has been limited
to 12 bits. This is starting to become a problem.

In the following commits, we will need to be able to connect between
nodes which are using the whole 32-bit value space of the node address.
We therefore remove the restrictions on which values can be assigned
to node identity, -it is from now on only a 32-bit integer with no
assumed internal structure.

Isolation between clusters is now achieved only by setting different
values for the 'network id' field used during neighbor discovery, in
practice leading to the latter becoming the new cluster identity.

The rules for accepting discovery requests/responses from neighboring
nodes now become:

- If the user is using legacy address format on both peers, reception
  of discovery messages is subject to the legacy lookup domain check
  in addition to the cluster id check.

- Otherwise, the discovery request/response is always accepted, provided
  both peers have the same network id.

This secures backwards compatibility for users who have been using zone
or cluster identities as cluster separators, instead of the intended
'network id'.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c     | 50 +--------------------------------------------
 net/tipc/addr.h     | 11 ----------
 net/tipc/bearer.c   | 27 ++++--------------------
 net/tipc/discover.c | 15 +++++++-------
 net/tipc/link.c     |  6 ++----
 net/tipc/net.c      |  4 ++--
 net/tipc/node.c     |  8 ++------
 net/tipc/node.h     |  5 +++--
 8 files changed, 21 insertions(+), 105 deletions(-)

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 97cd857d7f43..dfc31a730ca5 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -38,21 +38,6 @@
 #include "addr.h"
 #include "core.h"
 
-/**
- * in_own_cluster - test for cluster inclusion; <0.0.0> always matches
- */
-int in_own_cluster(struct net *net, u32 addr)
-{
-	return in_own_cluster_exact(net, addr) || !addr;
-}
-
-int in_own_cluster_exact(struct net *net, u32 addr)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	return !((addr ^ tn->own_addr) >> 12);
-}
-
 /**
  * in_own_node - test for node inclusion; <0.0.0> always matches
  */
@@ -63,46 +48,13 @@ int in_own_node(struct net *net, u32 addr)
 	return (addr == tn->own_addr) || !addr;
 }
 
-/**
- * tipc_addr_domain_valid - validates a network domain address
- *
- * Accepts <Z.C.N>, <Z.C.0>, <Z.0.0>, and <0.0.0>,
- * where Z, C, and N are non-zero.
- *
- * Returns 1 if domain address is valid, otherwise 0
- */
-int tipc_addr_domain_valid(u32 addr)
-{
-	u32 n = tipc_node(addr);
-	u32 c = tipc_cluster(addr);
-	u32 z = tipc_zone(addr);
-
-	if (n && (!z || !c))
-		return 0;
-	if (c && !z)
-		return 0;
-	return 1;
-}
-
-/**
- * tipc_addr_node_valid - validates a proposed network address for this node
- *
- * Accepts <Z.C.N>, where Z, C, and N are non-zero.
- *
- * Returns 1 if address can be used, otherwise 0
- */
-int tipc_addr_node_valid(u32 addr)
-{
-	return tipc_addr_domain_valid(addr) && tipc_node(addr);
-}
-
 int tipc_in_scope(u32 domain, u32 addr)
 {
 	if (!domain || (domain == addr))
 		return 1;
 	if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
 		return 1;
-	if (domain == tipc_zone_mask(addr)) /* domain <Z.0.0> */
+	if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
 		return 1;
 	return 0;
 }
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 2ecf5a5d40dd..5ffde51b0e68 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -50,11 +50,6 @@ static inline u32 tipc_own_addr(struct net *net)
 	return tn->own_addr;
 }
 
-static inline u32 tipc_zone_mask(u32 addr)
-{
-	return addr & TIPC_ZONE_MASK;
-}
-
 static inline u32 tipc_cluster_mask(u32 addr)
 {
 	return addr & TIPC_ZONE_CLUSTER_MASK;
@@ -71,14 +66,8 @@ static inline int tipc_scope2node(struct net *net, int sc)
 }
 
 u32 tipc_own_addr(struct net *net);
-int in_own_cluster(struct net *net, u32 addr);
-int in_own_cluster_exact(struct net *net, u32 addr);
 int in_own_node(struct net *net, u32 addr);
-u32 addr_domain(struct net *net, u32 sc);
-int tipc_addr_domain_valid(u32);
-int tipc_addr_node_valid(u32 addr);
 int tipc_in_scope(u32 domain, u32 addr);
-int tipc_addr_scope(u32 domain);
 char *tipc_addr_string_fill(char *string, u32 addr);
 
 #endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 76340b9e4851..a71f31879cb3 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -240,7 +240,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	struct tipc_bearer *b;
 	struct tipc_media *m;
 	struct sk_buff *skb;
-	char addr_string[16];
 	int bearer_id = 0;
 	int res = -EINVAL;
 	char *errstr = "";
@@ -256,21 +255,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 		goto rejected;
 	}
 
-	if (tipc_addr_domain_valid(disc_domain) && disc_domain != self) {
-		if (tipc_in_scope(disc_domain, self)) {
-			/* Accept any node in own cluster */
-			disc_domain = self & TIPC_ZONE_CLUSTER_MASK;
-			res = 0;
-		} else if (in_own_cluster_exact(net, disc_domain)) {
-			/* Accept specified node in own cluster */
-			res = 0;
-		}
-	}
-	if (res) {
-		errstr = "illegal discovery domain";
-		goto rejected;
-	}
-
 	if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
 		errstr = "illegal priority";
 		goto rejected;
@@ -354,12 +338,11 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 		return -ENOMEM;
 	}
 
-	tipc_addr_string_fill(addr_string, disc_domain);
-	pr_info("Enabled bearer <%s>, discovery scope %s, priority %u\n",
-		name, addr_string, prio);
+	pr_info("Enabled bearer <%s>, priority %u\n", name, prio);
+
 	return res;
 rejected:
-	pr_warn("Bearer <%s> rejected, %s\n", name, errstr);
+	pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr);
 	return res;
 }
 
@@ -865,12 +848,10 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
 	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	u32 domain;
+	u32 domain = 0;
 	u32 prio;
 
 	prio = TIPC_MEDIA_LINK_PRI;
-	domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK;
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 09f75558d353..669af125b3de 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -161,18 +161,17 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 		return;
 	if (net_id != tn->net_id)
 		return;
-	if (!tipc_addr_domain_valid(dst))
-		return;
-	if (!tipc_addr_node_valid(src))
-		return;
 	if (in_own_node(net, src)) {
 		disc_dupl_alert(b, self, &maddr);
 		return;
 	}
-	if (!tipc_in_scope(dst, self))
-		return;
-	if (!tipc_in_scope(b->domain, src))
-		return;
+	/* Domain filter only works if both peers use legacy address format */
+	if (b->domain) {
+		if (!tipc_in_scope(dst, self))
+			return;
+		if (!tipc_in_scope(b->domain, src))
+			return;
+	}
 	tipc_node_check_dest(net, src, b, caps, signature,
 			     &maddr, &respond, &dupl_addr);
 	if (dupl_addr)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 3c230466804d..86fde005ea47 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -434,7 +434,7 @@ char *tipc_link_name(struct tipc_link *l)
  */
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      int tolerance, char net_plane, u32 mtu, int priority,
-		      int window, u32 session, u32 ownnode, u32 peer,
+		      int window, u32 session, u32 self, u32 peer,
 		      u16 peer_caps,
 		      struct tipc_link *bc_sndlink,
 		      struct tipc_link *bc_rcvlink,
@@ -451,9 +451,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	l->session = session;
 
 	/* Note: peer i/f name is completed by reset/activate message */
-	sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
-		tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
-		if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+	sprintf(l->name, "%x:%s-%x:unknown", self, if_name, peer);
 	strcpy(l->if_name, if_name);
 	l->addr = peer;
 	l->peer_caps = peer_caps;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 5c4c4405b78e..a074f285e6ea 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -121,7 +121,7 @@ int tipc_net_start(struct net *net, u32 addr)
 			     TIPC_CLUSTER_SCOPE, 0, tn->own_addr);
 
 	pr_info("Started in network mode\n");
-	pr_info("Own node address %s, network identity %u\n",
+	pr_info("Own node address %s, cluster identity %u\n",
 		tipc_addr_string_fill(addr_string, tn->own_addr),
 		tn->net_id);
 	return 0;
@@ -238,7 +238,7 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 			return -EPERM;
 
 		addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
-		if (!tipc_addr_node_valid(addr))
+		if (!addr)
 			return -EINVAL;
 
 		tipc_net_start(net, addr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 389193d7cf67..8a4b04933ecc 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -233,9 +233,6 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
 	struct tipc_node *node;
 	unsigned int thash = tipc_hashfn(addr);
 
-	if (unlikely(!in_own_cluster_exact(net, addr)))
-		return NULL;
-
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
 		if (node->addr != addr)
@@ -836,10 +833,9 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 
 	/* Now create new link if not already existing */
 	if (!l) {
-		if (n->link_cnt == 2) {
-			pr_warn("Cannot establish 3rd link to %x\n", n->addr);
+		if (n->link_cnt == 2)
 			goto exit;
-		}
+
 		if_name = strchr(b->name, ':') + 1;
 		if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
 				      b->net_plane, b->mtu, b->priority,
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 4ce5e3a185c0..5fb38cf0bb5c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -49,13 +49,14 @@ enum {
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
 	TIPC_BCAST_RCAST      = (1 << 4),
-	TIPC_MCAST_GROUPS     = (1 << 5)
+	TIPC_NODE_ID32        = (1 << 5)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
 				TIPC_BCAST_STATE_NACK | \
 				TIPC_BCAST_RCAST | \
-				TIPC_BLOCK_FLOWCTL)
+				TIPC_BLOCK_FLOWCTL | \
+				TIPC_NODE_ID32)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);

From b89afb116ca2830cc982624f93e888860868a84b Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:48 +0100
Subject: [PATCH 1136/1678] tipc: allow closest-first lookup algorithm when
 legacy address is configured

The removal of an internal structure of the node address has an unwanted
side effect.
- Currently, if a user is sending an anycast message with destination
  domain 0, the tipc_namebl_translate() function will use the 'closest-
  first' algorithm to first look for a node local destination, and only
  when no such is found, will it resort to the cluster global 'round-
  robin' lookup algorithm.
- Current users can get around this, and enforce unconditional use of
  global round-robin by indicating a destination as Z.0.0 or Z.C.0.
- This option disappears when we make the node address flat, since the
  lookup algorithm has no way of recognizing this case. So, as long as
  there are node local destinations, the algorithm will always select
  one of those, and there is nothing the sender can do to change this.

We solve this by eliminating the 'closest-first' option, which was never
a good idea anyway, for non-legacy users, but only for those. To
distinguish between legacy users and non-legacy users we introduce a new
flag 'legacy_addr_format' in struct tipc_core, to be set when the user
configures a legacy-style Z.C.N node address. Hence, when a legacy user
indicates a zero lookup domain 'closest-first' is selected, and in all
other cases we use 'round-robin'.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c       | 12 +++++++-----
 net/tipc/addr.h       |  2 +-
 net/tipc/core.h       |  3 ++-
 net/tipc/discover.c   | 13 ++++++-------
 net/tipc/name_table.c |  8 +++++---
 net/tipc/net.c        |  2 +-
 6 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index dfc31a730ca5..19987994704f 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -48,15 +48,17 @@ int in_own_node(struct net *net, u32 addr)
 	return (addr == tn->own_addr) || !addr;
 }
 
-int tipc_in_scope(u32 domain, u32 addr)
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
 {
 	if (!domain || (domain == addr))
-		return 1;
+		return true;
+	if (!legacy_format)
+		return false;
 	if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
-		return 1;
+		return true;
 	if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 
 char *tipc_addr_string_fill(char *string, u32 addr)
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 5ffde51b0e68..97bdc0e1a369 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -67,7 +67,7 @@ static inline int tipc_scope2node(struct net *net, int sc)
 
 u32 tipc_own_addr(struct net *net);
 int in_own_node(struct net *net, u32 addr);
-int tipc_in_scope(u32 domain, u32 addr);
+bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
 char *tipc_addr_string_fill(char *string, u32 addr);
 
 #endif
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 347f850dc872..bd2b112680a4 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/core.h: Include file for TIPC global declarations
  *
- * Copyright (c) 2005-2006, 2013 Ericsson AB
+ * Copyright (c) 2005-2006, 2013-2018 Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
  * All rights reserved.
  *
@@ -81,6 +81,7 @@ struct tipc_net {
 	u32 own_addr;
 	int net_id;
 	int random;
+	bool legacy_addr_format;
 
 	/* Node table and node list */
 	spinlock_t node_list_lock;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 669af125b3de..82556e19222d 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -139,6 +139,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 	struct tipc_net *tn = tipc_net(net);
 	struct tipc_msg *hdr = buf_msg(skb);
 	u16 caps = msg_node_capabilities(hdr);
+	bool legacy = tn->legacy_addr_format;
 	u32 signature = msg_node_sig(hdr);
 	u32 dst = msg_dest_domain(hdr);
 	u32 net_id = msg_bc_netid(hdr);
@@ -165,13 +166,11 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 		disc_dupl_alert(b, self, &maddr);
 		return;
 	}
-	/* Domain filter only works if both peers use legacy address format */
-	if (b->domain) {
-		if (!tipc_in_scope(dst, self))
-			return;
-		if (!tipc_in_scope(b->domain, src))
-			return;
-	}
+	if (!tipc_in_scope(legacy, dst, self))
+		return;
+	if (!tipc_in_scope(legacy, b->domain, src))
+		return;
+
 	tipc_node_check_dest(net, src, b, caps, signature,
 			     &maddr, &respond, &dupl_addr);
 	if (dupl_addr)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bbbfc0702634..7478acb39096 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -499,7 +499,9 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 			   u32 *destnode)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
+	bool legacy = tn->legacy_addr_format;
+	u32 self = tipc_own_addr(net);
 	struct sub_seq *sseq;
 	struct name_info *info;
 	struct publication *publ;
@@ -507,7 +509,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	u32 port = 0;
 	u32 node = 0;
 
-	if (!tipc_in_scope(*destnode, tn->own_addr))
+	if (!tipc_in_scope(legacy, *destnode, self))
 		return 0;
 
 	rcu_read_lock();
@@ -521,7 +523,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	info = sseq->info;
 
 	/* Closest-First Algorithm */
-	if (likely(!*destnode)) {
+	if (legacy && !*destnode) {
 		if (!list_empty(&info->local_publ)) {
 			publ = list_first_entry(&info->local_publ,
 						struct publication,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a074f285e6ea..eb0d7a352e3f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -240,7 +240,7 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 		addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
 		if (!addr)
 			return -EINVAL;
-
+		tn->legacy_addr_format = true;
 		tipc_net_start(net, addr);
 	}
 

From 23fd3eace088ab1872ee59c19191a119ec779ac9 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:49 +0100
Subject: [PATCH 1137/1678] tipc: remove direct accesses to own_addr field in
 struct tipc_net

As a preparation to changing the addressing structure of TIPC we replace
all direct accesses to the tipc_net::own_addr field with the function
dedicated for this, tipc_own_addr().

There are no changes to program logics in this commit.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c       |  6 +++---
 net/tipc/addr.h       |  2 +-
 net/tipc/discover.c   |  3 ++-
 net/tipc/link.c       |  9 ++++-----
 net/tipc/name_distr.c | 11 ++++++-----
 net/tipc/name_table.c |  6 +++---
 net/tipc/net.c        | 31 +++++++++++++------------------
 net/tipc/socket.c     | 23 ++++++++++-------------
 8 files changed, 42 insertions(+), 49 deletions(-)

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 19987994704f..6e06b4d981f1 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -43,9 +43,7 @@
  */
 int in_own_node(struct net *net, u32 addr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	return (addr == tn->own_addr) || !addr;
+	return addr == tipc_own_addr(net) || !addr;
 }
 
 bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
@@ -56,6 +54,8 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
 		return false;
 	if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */
 		return true;
+	if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */
+		return true;
 	if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */
 		return true;
 	return false;
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 97bdc0e1a369..6b48f0dc0205 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -45,7 +45,7 @@
 
 static inline u32 tipc_own_addr(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
 
 	return tn->own_addr;
 }
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 82556e19222d..94d524018ca5 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -81,11 +81,12 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
 			       u32 mtyp, struct tipc_bearer *b)
 {
 	struct tipc_net *tn = tipc_net(net);
+	u32 self = tipc_own_addr(net);
 	u32 dest_domain = b->domain;
 	struct tipc_msg *hdr;
 
 	hdr = buf_msg(skb);
-	tipc_msg_init(tn->own_addr, hdr, LINK_CONFIG, mtyp,
+	tipc_msg_init(self, hdr, LINK_CONFIG, mtyp,
 		      MAX_H_SIZE, dest_domain);
 	msg_set_non_seq(hdr, 1);
 	msg_set_node_sig(hdr, tn->random);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 86fde005ea47..4aa56e3bf4fc 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1936,11 +1936,11 @@ msg_full:
 int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 		       struct tipc_link *link, int nlflags)
 {
-	int err;
-	void *hdr;
+	u32 self = tipc_own_addr(net);
 	struct nlattr *attrs;
 	struct nlattr *prop;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	void *hdr;
+	int err;
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  nlflags, TIPC_NL_LINK_GET);
@@ -1953,8 +1953,7 @@ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 
 	if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name))
 		goto attr_msg_full;
-	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST,
-			tipc_cluster_mask(tn->own_addr)))
+	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self)))
 		goto attr_msg_full;
 	if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu))
 		goto attr_msg_full;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 28d095a7d8bb..7e571f4f47bc 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -68,14 +68,14 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 					 u32 dest)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
+	u32 self = tipc_own_addr(net);
 	struct tipc_msg *msg;
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		tipc_msg_init(tn->own_addr, msg, NAME_DISTRIBUTOR, type,
-			      INT_H_SIZE, dest);
+		tipc_msg_init(self, msg, NAME_DISTRIBUTOR,
+			      type, INT_H_SIZE, dest);
 		msg_set_size(msg, INT_H_SIZE + size);
 	}
 	return buf;
@@ -382,13 +382,14 @@ void tipc_named_reinit(struct net *net)
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
 	struct publication *publ;
+	u32 self = tipc_own_addr(net);
 
 	spin_lock_bh(&tn->nametbl_lock);
 
 	list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
-		publ->node = tn->own_addr;
+		publ->node = self;
 	list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
-		publ->node = tn->own_addr;
+		publ->node = self;
 
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 7478acb39096..4359605b1bec 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -540,7 +540,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	}
 
 	/* Round-Robin Algorithm */
-	else if (*destnode == tn->own_addr) {
+	else if (*destnode == tipc_own_addr(net)) {
 		if (list_empty(&info->local_publ))
 			goto no_match;
 		publ = list_first_entry(&info->local_publ, struct publication,
@@ -713,7 +713,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 	}
 
 	publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-					tn->own_addr, port_ref, key);
+					tipc_own_addr(net), port_ref, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count++;
 		buf = tipc_named_publish(net, publ);
@@ -738,7 +738,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
 	spin_lock_bh(&tn->nametbl_lock);
-	publ = tipc_nametbl_remove_publ(net, type, lower, tn->own_addr,
+	publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr(net),
 					port, key);
 	if (likely(publ)) {
 		tn->nametbl->local_publ_count--;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index eb0d7a352e3f..7f140a5308ee 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -106,7 +106,7 @@
 
 int tipc_net_start(struct net *net, u32 addr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
 	char addr_string[16];
 
 	tn->own_addr = addr;
@@ -117,25 +117,24 @@ int tipc_net_start(struct net *net, u32 addr)
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
 
-	tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
-			     TIPC_CLUSTER_SCOPE, 0, tn->own_addr);
+	tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+			     TIPC_CLUSTER_SCOPE, 0, addr);
 
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, cluster identity %u\n",
-		tipc_addr_string_fill(addr_string, tn->own_addr),
+		tipc_addr_string_fill(addr_string, addr),
 		tn->net_id);
 	return 0;
 }
 
 void tipc_net_stop(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	u32 self = tipc_own_addr(net);
 
-	if (!tn->own_addr)
+	if (!self)
 		return;
 
-	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, tn->own_addr, 0,
-			      tn->own_addr);
+	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, 0, self);
 	rtnl_lock();
 	tipc_bearer_stop(net);
 	tipc_node_stop(net);
@@ -202,9 +201,9 @@ out:
 
 int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = tipc_net(net);
 	int err;
 
 	if (!info->attrs[TIPC_NLA_NET])
@@ -216,13 +215,13 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
+	/* Can't change net id once TIPC has joined a network */
+	if (tipc_own_addr(net))
+		return -EPERM;
+
 	if (attrs[TIPC_NLA_NET_ID]) {
 		u32 val;
 
-		/* Can't change net id once TIPC has joined a network */
-		if (tn->own_addr)
-			return -EPERM;
-
 		val = nla_get_u32(attrs[TIPC_NLA_NET_ID]);
 		if (val < 1 || val > 9999)
 			return -EINVAL;
@@ -233,10 +232,6 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 	if (attrs[TIPC_NLA_NET_ADDR]) {
 		u32 addr;
 
-		/* Can't change net addr once TIPC has joined a network */
-		if (tn->own_addr)
-			return -EPERM;
-
 		addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
 		if (!addr)
 			return -EINVAL;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 732ec894f69f..275b666f6231 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -289,10 +289,9 @@ static bool tipc_sk_type_connectionless(struct sock *sk)
 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 {
 	struct sock *sk = &tsk->sk;
-	struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
+	u32 self = tipc_own_addr(sock_net(sk));
 	u32 peer_port = tsk_peer_port(tsk);
-	u32 orig_node;
-	u32 peer_node;
+	u32 orig_node, peer_node;
 
 	if (unlikely(!tipc_sk_connected(sk)))
 		return false;
@@ -306,10 +305,10 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 	if (likely(orig_node == peer_node))
 		return true;
 
-	if (!orig_node && (peer_node == tn->own_addr))
+	if (!orig_node && peer_node == self)
 		return true;
 
-	if (!peer_node && (orig_node == tn->own_addr))
+	if (!peer_node && orig_node == self)
 		return true;
 
 	return false;
@@ -461,8 +460,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	/* Ensure tsk is visible before we read own_addr. */
 	smp_mb();
 
-	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
-		      NAMED_H_SIZE, 0);
+	tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE,
+		      TIPC_NAMED_MSG, NAMED_H_SIZE, 0);
 
 	msg_set_origport(msg, tsk->portid);
 	timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
@@ -671,7 +670,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
-	struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
 
 	memset(addr, 0, sizeof(*addr));
 	if (peer) {
@@ -682,7 +680,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.node = tsk_peer_node(tsk);
 	} else {
 		addr->addr.id.ref = tsk->portid;
-		addr->addr.id.node = tn->own_addr;
+		addr->addr.id.node = tipc_own_addr(sock_net(sk));
 	}
 
 	addr->addrtype = TIPC_ADDR_ID;
@@ -2667,8 +2665,8 @@ void tipc_sk_reinit(struct net *net)
 		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
 			spin_lock_bh(&tsk->sk.sk_lock.slock);
 			msg = &tsk->phdr;
-			msg_set_prevnode(msg, tn->own_addr);
-			msg_set_orignode(msg, tn->own_addr);
+			msg_set_prevnode(msg, tipc_own_addr(net));
+			msg_set_orignode(msg, tipc_own_addr(net));
 			spin_unlock_bh(&tsk->sk.sk_lock.slock);
 		}
 
@@ -3167,11 +3165,10 @@ static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock
 			  *tsk)
 {
 	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = tipc_net(net);
 	struct sock *sk = &tsk->sk;
 
 	if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) ||
-	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
+	    nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net)))
 		return -EMSGSIZE;
 
 	if (tipc_sk_connected(sk)) {

From d50ccc2d3909fc1b4d40e4af16b026f05dc68707 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:50 +0100
Subject: [PATCH 1138/1678] tipc: add 128-bit node identifier

We add a 128-bit node identity, as an alternative to the currently used
32-bit node address.

For the sake of compatibility and to minimize message header changes
we retain the existing 32-bit address field. When not set explicitly by
the user, this field will be filled with a hash value generated from the
much longer node identity, and be used as a shorthand value for the
latter.

We permit either the address or the identity to be set by configuration,
but not both, so when the address value is set by a legacy user the
corresponding 128-bit node identity is generated based on the that value.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |  2 +
 net/tipc/addr.c                   | 81 +++++++++++++++++++++++++------
 net/tipc/addr.h                   | 28 +++++++++--
 net/tipc/core.c                   |  4 +-
 net/tipc/core.h                   |  6 ++-
 net/tipc/discover.c               |  4 +-
 net/tipc/link.c                   |  6 ++-
 net/tipc/name_distr.c             |  6 +--
 net/tipc/net.c                    | 51 +++++++++++++------
 net/tipc/net.h                    |  4 +-
 net/tipc/node.c                   |  8 +--
 net/tipc/node.h                   |  4 +-
 12 files changed, 148 insertions(+), 56 deletions(-)

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index d896ded51bcb..0affb682e5e3 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -169,6 +169,8 @@ enum {
 	TIPC_NLA_NET_UNSPEC,
 	TIPC_NLA_NET_ID,		/* u32 */
 	TIPC_NLA_NET_ADDR,		/* u32 */
+	TIPC_NLA_NET_NODEID,		/* u64 */
+	TIPC_NLA_NET_NODEID_W1,		/* u64 */
 
 	__TIPC_NLA_NET_MAX,
 	TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 6e06b4d981f1..4841e98591d0 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/addr.c: TIPC address utility routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -34,18 +34,9 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/kernel.h>
 #include "addr.h"
 #include "core.h"
 
-/**
- * in_own_node - test for node inclusion; <0.0.0> always matches
- */
-int in_own_node(struct net *net, u32 addr)
-{
-	return addr == tipc_own_addr(net) || !addr;
-}
-
 bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
 {
 	if (!domain || (domain == addr))
@@ -61,9 +52,71 @@ bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr)
 	return false;
 }
 
-char *tipc_addr_string_fill(char *string, u32 addr)
+void tipc_set_node_id(struct net *net, u8 *id)
 {
-	snprintf(string, 16, "<%u.%u.%u>",
-		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
-	return string;
+	struct tipc_net *tn = tipc_net(net);
+	u32 *tmp = (u32 *)id;
+
+	memcpy(tn->node_id, id, NODE_ID_LEN);
+	tipc_nodeid2string(tn->node_id_string, id);
+	tn->node_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
+	pr_info("Own node identity %s, cluster identity %u\n",
+		tipc_own_id_string(net), tn->net_id);
+}
+
+void tipc_set_node_addr(struct net *net, u32 addr)
+{
+	struct tipc_net *tn = tipc_net(net);
+	u8 node_id[NODE_ID_LEN] = {0,};
+
+	tn->node_addr = addr;
+	if (!tipc_own_id(net)) {
+		sprintf(node_id, "%x", addr);
+		tipc_set_node_id(net, node_id);
+	}
+	pr_info("32-bit node address hash set to %x\n", addr);
+}
+
+char *tipc_nodeid2string(char *str, u8 *id)
+{
+	int i;
+	u8 c;
+
+	/* Already a string ? */
+	for (i = 0; i < NODE_ID_LEN; i++) {
+		c = id[i];
+		if (c >= '0' && c <= '9')
+			continue;
+		if (c >= 'A' && c <= 'Z')
+			continue;
+		if (c >= 'a' && c <= 'z')
+			continue;
+		if (c == '.')
+			continue;
+		if (c == ':')
+			continue;
+		if (c == '_')
+			continue;
+		if (c == '-')
+			continue;
+		if (c == '@')
+			continue;
+		if (c != 0)
+			break;
+	}
+	if (i == NODE_ID_LEN) {
+		memcpy(str, id, NODE_ID_LEN);
+		str[NODE_ID_LEN] = 0;
+		return str;
+	}
+
+	/* Translate to hex string */
+	for (i = 0; i < NODE_ID_LEN; i++)
+		sprintf(&str[2 * i], "%02x", id[i]);
+
+	/* Strip off trailing zeroes */
+	for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--)
+		str[i] = 0;
+
+	return str;
 }
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 6b48f0dc0205..31bee0ea7b3e 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/addr.h: Include file for TIPC address utility routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
+ * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, Wind River Systems
  * All rights reserved.
  *
@@ -44,10 +44,22 @@
 #include "core.h"
 
 static inline u32 tipc_own_addr(struct net *net)
+{
+	return tipc_net(net)->node_addr;
+}
+
+static inline u8 *tipc_own_id(struct net *net)
 {
 	struct tipc_net *tn = tipc_net(net);
 
-	return tn->own_addr;
+	if (!strlen(tn->node_id_string))
+		return NULL;
+	return tn->node_id;
+}
+
+static inline char *tipc_own_id_string(struct net *net)
+{
+	return tipc_net(net)->node_id_string;
 }
 
 static inline u32 tipc_cluster_mask(u32 addr)
@@ -65,9 +77,15 @@ static inline int tipc_scope2node(struct net *net, int sc)
 	return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net);
 }
 
-u32 tipc_own_addr(struct net *net);
-int in_own_node(struct net *net, u32 addr);
+static inline int in_own_node(struct net *net, u32 addr)
+{
+	return addr == tipc_own_addr(net) || !addr;
+}
+
 bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr);
-char *tipc_addr_string_fill(char *string, u32 addr);
+void tipc_set_node_id(struct net *net, u8 *id);
+void tipc_set_node_addr(struct net *net, u32 addr);
+char *tipc_nodeid2string(char *str, u8 *id);
+u32 tipc_node_id2hash(u8 *id128);
 
 #endif
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 04fd91bb11d7..e92fed49e095 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -56,7 +56,9 @@ static int __net_init tipc_init_net(struct net *net)
 	int err;
 
 	tn->net_id = 4711;
-	tn->own_addr = 0;
+	tn->node_addr = 0;
+	memset(tn->node_id, 0, sizeof(tn->node_id));
+	memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
 	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
 	get_random_bytes(&tn->random, sizeof(int));
 	INIT_LIST_HEAD(&tn->node_list);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index bd2b112680a4..eabad41cc832 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -72,13 +72,17 @@ struct tipc_monitor;
 #define NODE_HTABLE_SIZE       512
 #define MAX_BEARERS	         3
 #define TIPC_DEF_MON_THRESHOLD  32
+#define NODE_ID_LEN             16
+#define NODE_ID_STR_LEN        (NODE_ID_LEN * 2 + 1)
 
 extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
 struct tipc_net {
-	u32 own_addr;
+	u8  node_id[NODE_ID_LEN];
+	u32 node_addr;
+	char node_id_string[NODE_ID_STR_LEN];
 	int net_id;
 	int random;
 	bool legacy_addr_format;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 94d524018ca5..b4c4cd176b9b 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -118,13 +118,11 @@ static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst, u32 src,
 static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
 			    struct tipc_media_addr *media_addr)
 {
-	char node_addr_str[16];
 	char media_addr_str[64];
 
-	tipc_addr_string_fill(node_addr_str, node_addr);
 	tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str),
 			       media_addr);
-	pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
+	pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr,
 		media_addr_str, b->name);
 }
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 4aa56e3bf4fc..bcd76b1e440c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -442,6 +442,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      struct sk_buff_head *namedq,
 		      struct tipc_link **link)
 {
+	char *self_str = tipc_own_id_string(net);
 	struct tipc_link *l;
 
 	l = kzalloc(sizeof(*l), GFP_ATOMIC);
@@ -451,7 +452,10 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	l->session = session;
 
 	/* Note: peer i/f name is completed by reset/activate message */
-	sprintf(l->name, "%x:%s-%x:unknown", self, if_name, peer);
+	if (strlen(self_str) > 16)
+		sprintf(l->name, "%x:%s-%x:unknown", self, if_name, peer);
+	else
+		sprintf(l->name, "%s:%s-%x:unknown", self_str, if_name, peer);
 	strcpy(l->if_name, if_name);
 	l->addr = peer;
 	l->peer_caps = peer_caps;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 7e571f4f47bc..8240a85b0d0c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -318,7 +318,6 @@ void tipc_named_process_backlog(struct net *net)
 {
 	struct distr_queue_item *e, *tmp;
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	char addr[16];
 	unsigned long now = get_jiffies_64();
 
 	list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
@@ -326,12 +325,11 @@ void tipc_named_process_backlog(struct net *net)
 			if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
 				continue;
 		} else {
-			tipc_addr_string_fill(addr, e->node);
-			pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %s key=%u\n",
+			pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %x key=%u\n",
 					    e->dtype, ntohl(e->i.type),
 					    ntohl(e->i.lower),
 					    ntohl(e->i.upper),
-					    addr, ntohl(e->i.key));
+					    e->node, ntohl(e->i.key));
 		}
 		list_del(&e->next);
 		kfree(e);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7f140a5308ee..e78674891166 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,27 +104,31 @@
  *     - A local spin_lock protecting the queue of subscriber events.
 */
 
-int tipc_net_start(struct net *net, u32 addr)
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 {
-	struct tipc_net *tn = tipc_net(net);
-	char addr_string[16];
+	if (tipc_own_id(net)) {
+		pr_info("Cannot configure node identity twice\n");
+		return -1;
+	}
+	pr_info("Started in network mode\n");
 
-	tn->own_addr = addr;
+	if (node_id) {
+		tipc_set_node_id(net, node_id);
+		tipc_net_finalize(net, tipc_own_addr(net));
+	}
+	if (addr)
+		tipc_net_finalize(net, addr);
+	return 0;
+}
 
-	/* Ensure that the new address is visible before we reinit. */
+void tipc_net_finalize(struct net *net, u32 addr)
+{
+	tipc_set_node_addr(net, addr);
 	smp_mb();
-
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
-
 	tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
 			     TIPC_CLUSTER_SCOPE, 0, addr);
-
-	pr_info("Started in network mode\n");
-	pr_info("Own node address %s, cluster identity %u\n",
-		tipc_addr_string_fill(addr_string, addr),
-		tn->net_id);
-	return 0;
 }
 
 void tipc_net_stop(struct net *net)
@@ -146,8 +150,10 @@ void tipc_net_stop(struct net *net)
 static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	void *hdr;
+	u64 *w0 = (u64 *)&tn->node_id[0];
+	u64 *w1 = (u64 *)&tn->node_id[8];
 	struct nlattr *attrs;
+	void *hdr;
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_NET_GET);
@@ -160,7 +166,10 @@ static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg)
 
 	if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id))
 		goto attr_msg_full;
-
+	if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0))
+		goto attr_msg_full;
+	if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0))
+		goto attr_msg_full;
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
@@ -212,6 +221,7 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
 			       info->attrs[TIPC_NLA_NET], tipc_nl_net_policy,
 			       info->extack);
+
 	if (err)
 		return err;
 
@@ -236,9 +246,18 @@ int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
 		if (!addr)
 			return -EINVAL;
 		tn->legacy_addr_format = true;
-		tipc_net_start(net, addr);
+		tipc_net_init(net, NULL, addr);
 	}
 
+	if (attrs[TIPC_NLA_NET_NODEID]) {
+		u8 node_id[NODE_ID_LEN];
+		u64 *w0 = (u64 *)&node_id[0];
+		u64 *w1 = (u64 *)&node_id[8];
+
+		*w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]);
+		*w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]);
+		tipc_net_init(net, node_id, 0);
+	}
 	return 0;
 }
 
diff --git a/net/tipc/net.h b/net/tipc/net.h
index c0306aa2374b..08efa6010022 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,10 +41,8 @@
 
 extern const struct nla_policy tipc_nl_net_policy[];
 
-int tipc_net_start(struct net *net, u32 addr);
-
+void tipc_net_finalize(struct net *net, u32 addr);
 void tipc_net_stop(struct net *net);
-
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
 int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8a4b04933ecc..7b0c99347406 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -883,11 +883,9 @@ void tipc_node_delete_links(struct net *net, int bearer_id)
 
 static void tipc_node_reset_links(struct tipc_node *n)
 {
-	char addr_string[16];
 	int i;
 
-	pr_warn("Resetting all links to %s\n",
-		tipc_addr_string_fill(addr_string, n->addr));
+	pr_warn("Resetting all links to %x\n", n->addr);
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		tipc_node_link_down(n, i, false);
@@ -1074,15 +1072,13 @@ illegal_evt:
 static void node_lost_contact(struct tipc_node *n,
 			      struct sk_buff_head *inputq)
 {
-	char addr_string[16];
 	struct tipc_sock_conn *conn, *safe;
 	struct tipc_link *l;
 	struct list_head *conns = &n->conn_sks;
 	struct sk_buff *skb;
 	uint i;
 
-	pr_debug("Lost contact with %s\n",
-		 tipc_addr_string_fill(addr_string, n->addr));
+	pr_debug("Lost contact with %x\n", n->addr);
 
 	/* Clean up broadcast state */
 	tipc_bcast_remove_peer(n->net, n->bc_entry.link);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 5fb38cf0bb5c..e06faf4fa55e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -49,14 +49,14 @@ enum {
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
 	TIPC_BCAST_RCAST      = (1 << 4),
-	TIPC_NODE_ID32        = (1 << 5)
+	TIPC_NODE_ID128       = (1 << 5)
 };
 
 #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
 				TIPC_BCAST_STATE_NACK | \
 				TIPC_BCAST_RCAST | \
 				TIPC_BLOCK_FLOWCTL | \
-				TIPC_NODE_ID32)
+				TIPC_NODE_ID128)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);

From 25b0b9c4e835ffaa65b61c3efe2e28acf84d0259 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:51 +0100
Subject: [PATCH 1139/1678] tipc: handle collisions of 32-bit node address hash
 values

When a 32-bit node address is generated from a 128-bit identifier,
there is a risk of collisions which must be discovered and handled.

We do this as follows:
- We don't apply the generated address immediately to the node, but do
  instead initiate a 1 sec trial period to allow other cluster members
  to discover and handle such collisions.

- During the trial period the node periodically sends out a new type
  of message, DSC_TRIAL_MSG, using broadcast or emulated broadcast,
  to all the other nodes in the cluster.

- When a node is receiving such a message, it must check that the
  presented 32-bit identifier either is unused, or was used by the very
  same peer in a previous session. In both cases it accepts the request
  by not responding to it.

- If it finds that the same node has been up before using a different
  address, it responds with a DSC_TRIAL_FAIL_MSG containing that
  address.

- If it finds that the address has already been taken by some other
  node, it generates a new, unused address and returns it to the
  requester.

- During the trial period the requesting node must always be prepared
  to accept a failure message, i.e., a message where a peer suggests a
  different (or equal)  address to the one tried. In those cases it
  must apply the suggested value as trial address and restart the trial
  period.

This algorithm ensures that in the vast majority of cases a node will
have the same address before and after a reboot. If a legacy user
configures the address explicitly, there will be no trial period and
messages, so this protocol addition is completely backwards compatible.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c     |   3 +-
 net/tipc/bearer.c   |   3 +-
 net/tipc/core.c     |   2 +
 net/tipc/core.h     |   2 +
 net/tipc/discover.c | 126 +++++++++++++++++++++++++++++++++++++-------
 net/tipc/link.c     |  26 +++++----
 net/tipc/link.h     |   4 +-
 net/tipc/msg.h      |  23 +++++++-
 net/tipc/net.c      |   4 +-
 net/tipc/node.c     |  85 +++++++++++++++++++++++++++---
 net/tipc/node.h     |   3 +-
 11 files changed, 236 insertions(+), 45 deletions(-)

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 4841e98591d0..b88d48d00913 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -59,7 +59,7 @@ void tipc_set_node_id(struct net *net, u8 *id)
 
 	memcpy(tn->node_id, id, NODE_ID_LEN);
 	tipc_nodeid2string(tn->node_id_string, id);
-	tn->node_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
+	tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3];
 	pr_info("Own node identity %s, cluster identity %u\n",
 		tipc_own_id_string(net), tn->net_id);
 }
@@ -74,6 +74,7 @@ void tipc_set_node_addr(struct net *net, u32 addr)
 		sprintf(node_id, "%x", addr);
 		tipc_set_node_id(net, node_id);
 	}
+	tn->trial_addr = addr;
 	pr_info("32-bit node address hash set to %x\n", addr);
 }
 
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a71f31879cb3..ae5b44ca1c1e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -235,7 +235,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 {
 	struct tipc_net *tn = tipc_net(net);
 	struct tipc_bearer_names b_names;
-	u32 self = tipc_own_addr(net);
 	int with_this_prio = 1;
 	struct tipc_bearer *b;
 	struct tipc_media *m;
@@ -244,7 +243,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	int res = -EINVAL;
 	char *errstr = "";
 
-	if (!self) {
+	if (!tipc_own_id(net)) {
 		errstr = "not supported in standalone mode";
 		res = -ENOPROTOOPT;
 		goto rejected;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e92fed49e095..52dfc51ac4d5 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -57,6 +57,8 @@ static int __net_init tipc_init_net(struct net *net)
 
 	tn->net_id = 4711;
 	tn->node_addr = 0;
+	tn->trial_addr = 0;
+	tn->addr_trial_end = 0;
 	memset(tn->node_id, 0, sizeof(tn->node_id));
 	memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
 	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index eabad41cc832..d0f64ca62d02 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -82,6 +82,8 @@ extern int sysctl_tipc_named_timeout __read_mostly;
 struct tipc_net {
 	u8  node_id[NODE_ID_LEN];
 	u32 node_addr;
+	u32 trial_addr;
+	unsigned long addr_trial_end;
 	char node_id_string[NODE_ID_STR_LEN];
 	int net_id;
 	int random;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index b4c4cd176b9b..e7655736abed 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -1,7 +1,7 @@
 /*
  * net/tipc/discover.c
  *
- * Copyright (c) 2003-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2003-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2005-2006, 2010-2011, Wind River Systems
  * All rights reserved.
  *
@@ -78,34 +78,40 @@ struct tipc_discoverer {
  * @b: ptr to bearer issuing message
  */
 static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb,
-			       u32 mtyp, struct tipc_bearer *b)
+			       u32 mtyp,  struct tipc_bearer *b)
 {
 	struct tipc_net *tn = tipc_net(net);
-	u32 self = tipc_own_addr(net);
 	u32 dest_domain = b->domain;
 	struct tipc_msg *hdr;
 
 	hdr = buf_msg(skb);
-	tipc_msg_init(self, hdr, LINK_CONFIG, mtyp,
+	tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp,
 		      MAX_H_SIZE, dest_domain);
+	msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN);
 	msg_set_non_seq(hdr, 1);
 	msg_set_node_sig(hdr, tn->random);
 	msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES);
 	msg_set_dest_domain(hdr, dest_domain);
 	msg_set_bc_netid(hdr, tn->net_id);
 	b->media->addr2msg(msg_media_addr(hdr), &b->addr);
+	msg_set_node_id(hdr, tipc_own_id(net));
 }
 
-static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst, u32 src,
+static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst,
+			       u32 src, u32 sugg_addr,
 			       struct tipc_media_addr *maddr,
 			       struct tipc_bearer *b)
 {
+	struct tipc_msg *hdr;
 	struct sk_buff *skb;
 
-	skb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
+	skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
 	if (!skb)
 		return;
+	hdr = buf_msg(skb);
 	tipc_disc_init_msg(net, skb, mtyp, b);
+	msg_set_sugg_node_addr(hdr, sugg_addr);
+	msg_set_dest_domain(hdr, dst);
 	tipc_bearer_xmit_skb(net, b->identity, skb, maddr);
 }
 
@@ -126,6 +132,52 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
 		media_addr_str, b->name);
 }
 
+/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ */
+bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+			      struct tipc_media_addr *maddr,
+			      struct tipc_bearer *b,
+			      u32 dst, u32 src,
+			      u32 sugg_addr,
+			      u8 *peer_id,
+			      int mtyp)
+{
+	struct net *net = d->net;
+	struct tipc_net *tn = tipc_net(net);
+	bool trial = time_before(jiffies, tn->addr_trial_end);
+	u32 self = tipc_own_addr(net);
+
+	if (mtyp == DSC_TRIAL_FAIL_MSG) {
+		if (!trial)
+			return true;
+
+		/* Ignore if somebody else already gave new suggestion */
+		if (dst != tn->trial_addr)
+			return true;
+
+		/* Otherwise update trial address and restart trial period */
+		tn->trial_addr = sugg_addr;
+		msg_set_prevnode(buf_msg(d->skb), sugg_addr);
+		tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+		return true;
+	}
+
+	/* Apply trial address if we just left trial period */
+	if (!trial && !self) {
+		tipc_net_finalize(net, tn->trial_addr);
+		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+	}
+
+	if (mtyp != DSC_TRIAL_MSG)
+		return false;
+
+	sugg_addr = tipc_node_try_addr(net, peer_id, src);
+	if (sugg_addr)
+		tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src,
+				   self, sugg_addr, maddr, b);
+	return true;
+}
+
 /**
  * tipc_disc_rcv - handle incoming discovery message (request or response)
  * @net: applicable net namespace
@@ -139,17 +191,27 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 	struct tipc_msg *hdr = buf_msg(skb);
 	u16 caps = msg_node_capabilities(hdr);
 	bool legacy = tn->legacy_addr_format;
+	u32 sugg = msg_sugg_node_addr(hdr);
 	u32 signature = msg_node_sig(hdr);
+	u8 peer_id[NODE_ID_LEN] = {0,};
 	u32 dst = msg_dest_domain(hdr);
 	u32 net_id = msg_bc_netid(hdr);
-	u32 self = tipc_own_addr(net);
 	struct tipc_media_addr maddr;
 	u32 src = msg_prevnode(hdr);
 	u32 mtyp = msg_type(hdr);
 	bool dupl_addr = false;
 	bool respond = false;
+	u32 self;
 	int err;
 
+	skb_linearize(skb);
+	hdr = buf_msg(skb);
+
+	if (caps & TIPC_NODE_ID128)
+		memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN);
+	else
+		sprintf(peer_id, "%x", src);
+
 	err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr));
 	kfree_skb(skb);
 	if (err || maddr.broadcast) {
@@ -161,6 +223,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 		return;
 	if (net_id != tn->net_id)
 		return;
+	if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst,
+				     src, sugg, peer_id, mtyp))
+		return;
+	self = tipc_own_addr(net);
+
+	/* Message from somebody using this node's address */
 	if (in_own_node(net, src)) {
 		disc_dupl_alert(b, self, &maddr);
 		return;
@@ -169,8 +237,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 		return;
 	if (!tipc_in_scope(legacy, b->domain, src))
 		return;
-
-	tipc_node_check_dest(net, src, b, caps, signature,
+	tipc_node_check_dest(net, src, peer_id, b, caps, signature,
 			     &maddr, &respond, &dupl_addr);
 	if (dupl_addr)
 		disc_dupl_alert(b, src, &maddr);
@@ -178,7 +245,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 		return;
 	if (mtyp != DSC_REQ_MSG)
 		return;
-	tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, &maddr, b);
+	tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b);
 }
 
 /* tipc_disc_add_dest - increment set of discovered nodes
@@ -216,9 +283,11 @@ void tipc_disc_remove_dest(struct tipc_discoverer *d)
 static void tipc_disc_timeout(struct timer_list *t)
 {
 	struct tipc_discoverer *d = from_timer(d, t, timer);
+	struct tipc_net *tn = tipc_net(d->net);
+	u32 self = tipc_own_addr(d->net);
 	struct tipc_media_addr maddr;
 	struct sk_buff *skb = NULL;
-	struct net *net;
+	struct net *net = d->net;
 	u32 bearer_id;
 
 	spin_lock_bh(&d->lock);
@@ -228,16 +297,29 @@ static void tipc_disc_timeout(struct timer_list *t)
 		d->timer_intv = TIPC_DISC_INACTIVE;
 		goto exit;
 	}
+
+	/* Did we just leave the address trial period ? */
+	if (!self && !time_before(jiffies, tn->addr_trial_end)) {
+		self = tn->trial_addr;
+		tipc_net_finalize(net, self);
+		msg_set_prevnode(buf_msg(d->skb), self);
+		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+	}
+
 	/* Adjust timeout interval according to discovery phase */
-	d->timer_intv *= 2;
-	if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
-		d->timer_intv = TIPC_DISC_SLOW;
-	else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
-		d->timer_intv = TIPC_DISC_FAST;
+	if (time_before(jiffies, tn->addr_trial_end)) {
+		d->timer_intv = TIPC_DISC_INIT;
+	} else {
+		d->timer_intv *= 2;
+		if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW)
+			d->timer_intv = TIPC_DISC_SLOW;
+		else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
+			d->timer_intv = TIPC_DISC_FAST;
+	}
+
 	mod_timer(&d->timer, jiffies + d->timer_intv);
 	memcpy(&maddr, &d->dest, sizeof(maddr));
 	skb = skb_clone(d->skb, GFP_ATOMIC);
-	net = d->net;
 	bearer_id = d->bearer_id;
 exit:
 	spin_unlock_bh(&d->lock);
@@ -257,18 +339,24 @@ exit:
 int tipc_disc_create(struct net *net, struct tipc_bearer *b,
 		     struct tipc_media_addr *dest, struct sk_buff **skb)
 {
+	struct tipc_net *tn = tipc_net(net);
 	struct tipc_discoverer *d;
 
 	d = kmalloc(sizeof(*d), GFP_ATOMIC);
 	if (!d)
 		return -ENOMEM;
-	d->skb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
+	d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC);
 	if (!d->skb) {
 		kfree(d);
 		return -ENOMEM;
 	}
-
 	tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b);
+
+	/* Do we need an address trial period first ? */
+	if (!tipc_own_addr(net)) {
+		tn->addr_trial_end = jiffies + msecs_to_jiffies(1000);
+		msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG);
+	}
 	memcpy(&d->dest, dest, sizeof(*dest));
 	d->net = net;
 	d->bearer_id = b->identity;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bcd76b1e440c..1289b4ba404f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -434,15 +434,16 @@ char *tipc_link_name(struct tipc_link *l)
  */
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      int tolerance, char net_plane, u32 mtu, int priority,
-		      int window, u32 session, u32 self, u32 peer,
-		      u16 peer_caps,
+		      int window, u32 session, u32 self,
+		      u32 peer, u8 *peer_id, u16 peer_caps,
 		      struct tipc_link *bc_sndlink,
 		      struct tipc_link *bc_rcvlink,
 		      struct sk_buff_head *inputq,
 		      struct sk_buff_head *namedq,
 		      struct tipc_link **link)
 {
-	char *self_str = tipc_own_id_string(net);
+	char peer_str[NODE_ID_STR_LEN] = {0,};
+	char self_str[NODE_ID_STR_LEN] = {0,};
 	struct tipc_link *l;
 
 	l = kzalloc(sizeof(*l), GFP_ATOMIC);
@@ -451,11 +452,18 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	*link = l;
 	l->session = session;
 
-	/* Note: peer i/f name is completed by reset/activate message */
-	if (strlen(self_str) > 16)
-		sprintf(l->name, "%x:%s-%x:unknown", self, if_name, peer);
-	else
-		sprintf(l->name, "%s:%s-%x:unknown", self_str, if_name, peer);
+	/* Set link name for unicast links only */
+	if (peer_id) {
+		tipc_nodeid2string(self_str, tipc_own_id(net));
+		if (strlen(self_str) > 16)
+			sprintf(self_str, "%x", self);
+		tipc_nodeid2string(peer_str, peer_id);
+		if (strlen(peer_str) > 16)
+			sprintf(peer_str, "%x", peer);
+	}
+	/* Peer i/f name will be completed by reset/activate message */
+	sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);
+
 	strcpy(l->if_name, if_name);
 	l->addr = peer;
 	l->peer_caps = peer_caps;
@@ -503,7 +511,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 	struct tipc_link *l;
 
 	if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
-			      0, ownnode, peer, peer_caps, bc_sndlink,
+			      0, ownnode, peer, NULL, peer_caps, bc_sndlink,
 			      NULL, inputq, namedq, link))
 		return false;
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d1bd1787a768..ec59348a81e8 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -73,8 +73,8 @@ enum {
 
 bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      int tolerance, char net_plane, u32 mtu, int priority,
-		      int window, u32 session, u32 ownnode, u32 peer,
-		      u16 peer_caps,
+		      int window, u32 session, u32 ownnode,
+		      u32 peer, u8 *peer_id, u16 peer_caps,
 		      struct tipc_link *bc_sndlink,
 		      struct tipc_link *bc_rcvlink,
 		      struct sk_buff_head *inputq,
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index b4ba1b4f9ae7..a4e944d59394 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -550,6 +550,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
  */
 #define DSC_REQ_MSG		0
 #define DSC_RESP_MSG		1
+#define DSC_TRIAL_MSG		2
+#define DSC_TRIAL_FAIL_MSG	3
 
 /*
  * Group protocol message types
@@ -627,7 +629,6 @@ static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 2, 0, 0xffff, n);
 }
 
-
 /*
  * Word 4
  */
@@ -925,6 +926,26 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
 	return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
 }
 
+static inline u32 msg_sugg_node_addr(struct tipc_msg *m)
+{
+	return msg_word(m, 14);
+}
+
+static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n)
+{
+	msg_set_word(m, 14, n);
+}
+
+static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id)
+{
+	memcpy(msg_data(hdr), id, 16);
+}
+
+static inline u8 *msg_node_id(struct tipc_msg *hdr)
+{
+	return (u8 *)msg_data(hdr);
+}
+
 struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
 bool tipc_msg_validate(struct sk_buff **_skb);
 bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index e78674891166..29538dc00857 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,10 +112,8 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 	}
 	pr_info("Started in network mode\n");
 
-	if (node_id) {
+	if (node_id)
 		tipc_set_node_id(net, node_id);
-		tipc_net_finalize(net, tipc_own_addr(net));
-	}
 	if (addr)
 		tipc_net_finalize(net, addr);
 	return 0;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7b0c99347406..4a95c8c155c6 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -115,6 +115,7 @@ struct tipc_node {
 	u16 capabilities;
 	u32 signature;
 	u32 link_id;
+	u8 peer_id[16];
 	struct list_head publ_list;
 	struct list_head conn_sks;
 	unsigned long keepalive_intv;
@@ -156,6 +157,7 @@ static void tipc_node_delete(struct tipc_node *node);
 static void tipc_node_timeout(struct timer_list *t);
 static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
 static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
 static void tipc_node_put(struct tipc_node *node);
 static bool node_is_up(struct tipc_node *n);
 
@@ -245,6 +247,30 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
 	return node;
 }
 
+/* tipc_node_find_by_id - locate specified node object by its 128-bit id
+ * Note: this function is called only when a discovery request failed
+ * to find the node by its 32-bit id, and is not time critical
+ */
+static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_node *n;
+	bool found = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(n, &tn->node_list, list) {
+		read_lock_bh(&n->lock);
+		if (!memcmp(id, n->peer_id, 16) &&
+		    kref_get_unless_zero(&n->kref))
+			found = true;
+		read_unlock_bh(&n->lock);
+		if (found)
+			break;
+	}
+	rcu_read_unlock();
+	return found ? n : NULL;
+}
+
 static void tipc_node_read_lock(struct tipc_node *n)
 {
 	read_lock_bh(&n->lock);
@@ -307,7 +333,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	}
 }
 
-struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
+struct tipc_node *tipc_node_create(struct net *net, u32 addr,
+				   u8 *peer_id, u16 capabilities)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *n, *temp_node;
@@ -326,6 +353,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 		goto exit;
 	}
 	n->addr = addr;
+	memcpy(&n->peer_id, peer_id, 16);
 	n->net = net;
 	n->capabilities = capabilities;
 	kref_init(&n->kref);
@@ -344,8 +372,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 	n->signature = INVALID_NODE_SIG;
 	n->active_links[0] = INVALID_BEARER_ID;
 	n->active_links[1] = INVALID_BEARER_ID;
-	if (!tipc_link_bc_create(net, tipc_own_addr(net), n->addr,
-				 U16_MAX,
+	if (!tipc_link_bc_create(net, tipc_own_addr(net),
+				 addr, U16_MAX,
 				 tipc_link_window(tipc_bc_sndlink(net)),
 				 n->capabilities,
 				 &n->bc_entry.inputq1,
@@ -735,8 +763,51 @@ bool tipc_node_is_up(struct net *net, u32 addr)
 	return retval;
 }
 
-void tipc_node_check_dest(struct net *net, u32 onode,
-			  struct tipc_bearer *b,
+static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
+{
+	struct tipc_node *n;
+
+	addr ^= tipc_net(net)->random;
+	while ((n = tipc_node_find(net, addr))) {
+		tipc_node_put(n);
+		addr++;
+	}
+	return addr;
+}
+
+/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ */
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_node *n;
+
+	/* Suggest new address if some other peer is using this one */
+	n = tipc_node_find(net, addr);
+	if (n) {
+		if (!memcmp(n->peer_id, id, NODE_ID_LEN))
+			addr = 0;
+		tipc_node_put(n);
+		if (!addr)
+			return 0;
+		return tipc_node_suggest_addr(net, addr);
+	}
+
+	/* Suggest previously used address if peer is known */
+	n = tipc_node_find_by_id(net, id);
+	if (n) {
+		addr = n->addr;
+		tipc_node_put(n);
+	}
+	/* Even this node may be in trial phase */
+	if (tn->trial_addr == addr)
+		return tipc_node_suggest_addr(net, addr);
+
+	return addr;
+}
+
+void tipc_node_check_dest(struct net *net, u32 addr,
+			  u8 *peer_id, struct tipc_bearer *b,
 			  u16 capabilities, u32 signature,
 			  struct tipc_media_addr *maddr,
 			  bool *respond, bool *dupl_addr)
@@ -755,7 +826,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 	*dupl_addr = false;
 	*respond = false;
 
-	n = tipc_node_create(net, onode, capabilities);
+	n = tipc_node_create(net, addr, peer_id, capabilities);
 	if (!n)
 		return;
 
@@ -840,7 +911,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 		if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
 				      b->net_plane, b->mtu, b->priority,
 				      b->window, mod(tipc_net(net)->random),
-				      tipc_own_addr(net), onode,
+				      tipc_own_addr(net), addr, peer_id,
 				      n->capabilities,
 				      tipc_bc_sndlink(n->net), n->bc_entry.link,
 				      &le->inputq,
diff --git a/net/tipc/node.h b/net/tipc/node.h
index e06faf4fa55e..f24b83500df1 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -60,7 +60,8 @@ enum {
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
-void tipc_node_check_dest(struct net *net, u32 onode,
+u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr);
+void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128,
 			  struct tipc_bearer *bearer,
 			  u16 capabilities, u32 signature,
 			  struct tipc_media_addr *maddr,

From 52dfae5c85a4c1078e9f1d5e8947d4a25f73dd81 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 22 Mar 2018 20:42:52 +0100
Subject: [PATCH 1140/1678] tipc: obtain node identity from interface by
 default

Selecting and explicitly configuring a TIPC node identity may be
unwanted in some cases.

In this commit we introduce a default setting if the identity has not
been set at the moment the first bearer is enabled. We do this by
using a raw copy of a unique identifier from the used interface: MAC
address in the case of an L2 bearer, IPv4/IPv6 address in the case
of a UDP bearer.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c    | 24 +++++++++++++++---------
 net/tipc/net.h       |  1 +
 net/tipc/udp_media.c | 13 +++++++++++++
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ae5b44ca1c1e..f7d47c89d658 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -243,12 +243,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	int res = -EINVAL;
 	char *errstr = "";
 
-	if (!tipc_own_id(net)) {
-		errstr = "not supported in standalone mode";
-		res = -ENOPROTOOPT;
-		goto rejected;
-	}
-
 	if (!bearer_name_validate(name, &b_names)) {
 		errstr = "illegal name";
 		goto rejected;
@@ -381,11 +375,13 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 			 struct nlattr *attr[])
 {
+	char *dev_name = strchr((const char *)b->name, ':') + 1;
+	int hwaddr_len = b->media->hwaddr_len;
+	u8 node_id[NODE_ID_LEN] = {0,};
 	struct net_device *dev;
-	char *driver_name = strchr((const char *)b->name, ':') + 1;
 
 	/* Find device with specified name */
-	dev = dev_get_by_name(net, driver_name);
+	dev = dev_get_by_name(net, dev_name);
 	if (!dev)
 		return -ENODEV;
 	if (tipc_mtu_bad(dev, 0)) {
@@ -393,6 +389,16 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 		return -EINVAL;
 	}
 
+	/* Autoconfigure own node identity if needed */
+	if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) {
+		memcpy(node_id, dev->dev_addr, hwaddr_len);
+		tipc_net_init(net, node_id, 0);
+	}
+	if (!tipc_own_id(net)) {
+		pr_warn("Failed to obtain node identity\n");
+		return -EINVAL;
+	}
+
 	/* Associate TIPC bearer with L2 bearer */
 	rcu_assign_pointer(b->media_ptr, dev);
 	b->pt.dev = dev;
@@ -400,7 +406,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 	b->pt.func = tipc_l2_rcv_msg;
 	dev_add_pack(&b->pt);
 	memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
-	memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
+	memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len);
 	b->bcast_addr.media_id = b->media->type_id;
 	b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
 	b->mtu = dev->mtu;
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 08efa6010022..09ad02b50bb1 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,6 +41,7 @@
 
 extern const struct nla_policy tipc_nl_net_policy[];
 
+int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
 void tipc_net_finalize(struct net *net, u32 addr);
 void tipc_net_stop(struct net *net);
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 3deabcab4882..2c13b18426d9 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -47,6 +47,8 @@
 #include <net/addrconf.h>
 #include <linux/tipc_netlink.h>
 #include "core.h"
+#include "addr.h"
+#include "net.h"
 #include "bearer.h"
 #include "netlink.h"
 #include "msg.h"
@@ -647,6 +649,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	struct udp_port_cfg udp_conf = {0};
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
 	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	u8 node_id[NODE_ID_LEN] = {0,};
 
 	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
 	if (!ub)
@@ -677,6 +680,16 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	if (err)
 		goto err;
 
+	/* Autoconfigure own node identity if needed */
+	if (!tipc_own_id(net)) {
+		memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16);
+		tipc_net_init(net, node_id, 0);
+	}
+	if (!tipc_own_id(net)) {
+		pr_warn("Failed to set node id, please configure manually\n");
+		return -EINVAL;
+	}
+
 	b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
 	b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT;
 	rcu_assign_pointer(b->media_ptr, ub);

From 6e3e764b5bc83d36c6a634ebe465feddab3f7066 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 22 Mar 2018 14:59:27 -0500
Subject: [PATCH 1141/1678] dpaa_eth: use true and false for boolean values

Assign true or false to boolean variables instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 85306d1b2acf..2f933b6b2f4e 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -344,7 +344,7 @@ static void dpaa_get_ethtool_stats(struct net_device *net_dev,
 
 	/* gather congestion related counters */
 	cg_num    = 0;
-	cg_status = 0;
+	cg_status = false;
 	cg_time   = jiffies_to_msecs(priv->cgr_data.congested_jiffies);
 	if (qman_query_cgr_congested(&priv->cgr_data.cgr, &cg_status) == 0) {
 		cg_num    = priv->cgr_data.cgr_congested_count;

From c7281d591332b9dd0b48a0db3007fa7f99ee8ecc Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 22 Mar 2018 15:08:49 -0500
Subject: [PATCH 1142/1678] qed: Use true and false for boolean values

Assign true or false to boolean variables instead of an integer value.

This issue was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Sudarsana Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 553a6d17260e..cdb3eec0f68c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -298,8 +298,8 @@ static void qed_init_qm_params(struct qed_hwfn *p_hwfn)
 	qm_info->start_vport = (u8) RESC_START(p_hwfn, QED_VPORT);
 
 	/* rate limiting and weighted fair queueing are always enabled */
-	qm_info->vport_rl_en = 1;
-	qm_info->vport_wfq_en = 1;
+	qm_info->vport_rl_en = true;
+	qm_info->vport_wfq_en = true;
 
 	/* TC config is different for AH 4 port */
 	four_port = p_hwfn->cdev->num_ports_in_engine == MAX_NUM_PORTS_K2;
@@ -1276,9 +1276,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 
 	if (p_hwfn->mcp_info) {
 		if (p_hwfn->mcp_info->func_info.bandwidth_max)
-			qm_info->pf_rl_en = 1;
+			qm_info->pf_rl_en = true;
 		if (p_hwfn->mcp_info->func_info.bandwidth_min)
-			qm_info->pf_wfq_en = 1;
+			qm_info->pf_wfq_en = true;
 	}
 
 	memset(&params, 0, sizeof(params));
@@ -1630,7 +1630,7 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn,
 		qed_vf_pf_tunnel_param_update(p_hwfn, p_params->p_tunn);
 	}
 
-	p_hwfn->b_int_enabled = 1;
+	p_hwfn->b_int_enabled = true;
 
 	return 0;
 }

From d5eabf0c8faf8ce8f8c5cfcba67c40592cbb498d Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Fri, 23 Mar 2018 05:31:07 +0800
Subject: [PATCH 1143/1678] net: hns3: hclge_inform_reset_assert_to_vf() can be
 static

Fixes: 2bfbd35d8ecd ("net: hns3: Changes required in PF mailbox to support VF reset")
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 39013334a613..a6f7ffa9c259 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -79,7 +79,7 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 	return status;
 }
 
-int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
+static int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
 {
 	u8 msg_data[2];
 	u8 dest_vfid;

From 5b73d9955fb4b0e3c37f8f6c71910293246c89dc Mon Sep 17 00:00:00 2001
From: Mathias Kresin <dev@kresin.me>
Date: Thu, 22 Mar 2018 23:31:38 +0100
Subject: [PATCH 1144/1678] net: phy: intel-xway: add VR9 version number

The VR9 phy ids are matching only for the SoC version 1.2. Rename the
macros and change the names to take this into account.

Signed-off-by: Mathias Kresin <dev@kresin.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/intel-xway.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index a11f80cb5388..c7eff6774bb1 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -149,8 +149,8 @@
 #define PHY_ID_PHY22F_1_4		0xD565A410
 #define PHY_ID_PHY11G_1_5		0xD565A401
 #define PHY_ID_PHY22F_1_5		0xD565A411
-#define PHY_ID_PHY11G_VR9		0xD565A409
-#define PHY_ID_PHY22F_VR9		0xD565A419
+#define PHY_ID_PHY11G_VR9_1_2		0xD565A409
+#define PHY_ID_PHY22F_VR9_1_2		0xD565A419
 
 static int xway_gphy_config_init(struct phy_device *phydev)
 {
@@ -312,9 +312,9 @@ static struct phy_driver xway_gphy[] = {
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 	}, {
-		.phy_id		= PHY_ID_PHY11G_VR9,
+		.phy_id		= PHY_ID_PHY11G_VR9_1_2,
 		.phy_id_mask	= 0xffffffff,
-		.name		= "Intel XWAY PHY11G (xRX integrated)",
+		.name		= "Intel XWAY PHY11G (xRX v1.2 integrated)",
 		.features	= PHY_GBIT_FEATURES,
 		.flags		= PHY_HAS_INTERRUPT,
 		.config_init	= xway_gphy_config_init,
@@ -324,9 +324,9 @@ static struct phy_driver xway_gphy[] = {
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 	}, {
-		.phy_id		= PHY_ID_PHY22F_VR9,
+		.phy_id		= PHY_ID_PHY22F_VR9_1_2,
 		.phy_id_mask	= 0xffffffff,
-		.name		= "Intel XWAY PHY22F (xRX integrated)",
+		.name		= "Intel XWAY PHY22F (xRX v1.2 integrated)",
 		.features	= PHY_BASIC_FEATURES,
 		.flags		= PHY_HAS_INTERRUPT,
 		.config_init	= xway_gphy_config_init,
@@ -346,8 +346,8 @@ static struct mdio_device_id __maybe_unused xway_gphy_tbl[] = {
 	{ PHY_ID_PHY22F_1_4, 0xffffffff },
 	{ PHY_ID_PHY11G_1_5, 0xffffffff },
 	{ PHY_ID_PHY22F_1_5, 0xffffffff },
-	{ PHY_ID_PHY11G_VR9, 0xffffffff },
-	{ PHY_ID_PHY22F_VR9, 0xffffffff },
+	{ PHY_ID_PHY11G_VR9_1_2, 0xffffffff },
+	{ PHY_ID_PHY22F_VR9_1_2, 0xffffffff },
 	{ }
 };
 MODULE_DEVICE_TABLE(mdio, xway_gphy_tbl);

From f452518c982e57538e6d49da0a2c80eef22087ab Mon Sep 17 00:00:00 2001
From: Mathias Kresin <dev@kresin.me>
Date: Thu, 22 Mar 2018 23:31:39 +0100
Subject: [PATCH 1145/1678] net: phy: intel-xway: add VR9 v1.1 phy ids

The phys embedded into the v1.1 of the VR9 SoC are using different phy
ids. Add the phy ids to use the driver for this VR9 version as well.

Signed-off-by: Mathias Kresin <dev@kresin.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/intel-xway.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index c7eff6774bb1..7d936fb61c22 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -149,6 +149,8 @@
 #define PHY_ID_PHY22F_1_4		0xD565A410
 #define PHY_ID_PHY11G_1_5		0xD565A401
 #define PHY_ID_PHY22F_1_5		0xD565A411
+#define PHY_ID_PHY11G_VR9_1_1		0xD565A408
+#define PHY_ID_PHY22F_VR9_1_1		0xD565A418
 #define PHY_ID_PHY11G_VR9_1_2		0xD565A409
 #define PHY_ID_PHY22F_VR9_1_2		0xD565A419
 
@@ -311,6 +313,30 @@ static struct phy_driver xway_gphy[] = {
 		.config_intr	= xway_gphy_config_intr,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY11G_VR9_1_1,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY11G (xRX v1.1 integrated)",
+		.features	= PHY_GBIT_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+	}, {
+		.phy_id		= PHY_ID_PHY22F_VR9_1_1,
+		.phy_id_mask	= 0xffffffff,
+		.name		= "Intel XWAY PHY22F (xRX v1.1 integrated)",
+		.features	= PHY_BASIC_FEATURES,
+		.flags		= PHY_HAS_INTERRUPT,
+		.config_init	= xway_gphy_config_init,
+		.ack_interrupt	= xway_gphy_ack_interrupt,
+		.did_interrupt	= xway_gphy_did_interrupt,
+		.config_intr	= xway_gphy_config_intr,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
 	}, {
 		.phy_id		= PHY_ID_PHY11G_VR9_1_2,
 		.phy_id_mask	= 0xffffffff,
@@ -346,6 +372,8 @@ static struct mdio_device_id __maybe_unused xway_gphy_tbl[] = {
 	{ PHY_ID_PHY22F_1_4, 0xffffffff },
 	{ PHY_ID_PHY11G_1_5, 0xffffffff },
 	{ PHY_ID_PHY22F_1_5, 0xffffffff },
+	{ PHY_ID_PHY11G_VR9_1_1, 0xffffffff },
+	{ PHY_ID_PHY22F_VR9_1_1, 0xffffffff },
 	{ PHY_ID_PHY11G_VR9_1_2, 0xffffffff },
 	{ PHY_ID_PHY22F_VR9_1_2, 0xffffffff },
 	{ }

From 1aa37845f7601ce9159cd08fdf381cfb5f494c12 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Mon, 12 Mar 2018 09:22:55 -0400
Subject: [PATCH 1146/1678] ixgbe: add status reg reads to ixgbe_check_remove

Add status register reads and delay between reads to ixgbe_check_remove.
Registers can read 0xFFFFFFFF during PCI reset, which causes the driver
to remove the adapter. The additional status register reads can reduce the
chance of this race condition.

If the status register is not 0xFFFFFFFF, then ixgbe_check_remove returns
the value of the register being read.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbe/ixgbe_common.h   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 31 ++++++++++++-------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 67f304289fd9..2b311382167a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -154,6 +154,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
 void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
 				      ixgbe_link_speed speed);
 
+#define IXGBE_FAILED_READ_RETRIES 5
 #define IXGBE_FAILED_READ_REG 0xffffffffU
 #define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU
 #define IXGBE_FAILED_READ_CFG_WORD 0xffffU
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 85369423452d..faf368b14389 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -353,23 +353,32 @@ static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
 		ixgbe_service_event_schedule(adapter);
 }
 
-static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
+static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
 {
+	u8 __iomem *reg_addr;
 	u32 value;
+	int i;
 
-	/* The following check not only optimizes a bit by not
-	 * performing a read on the status register when the
-	 * register just read was a status register read that
-	 * returned IXGBE_FAILED_READ_REG. It also blocks any
-	 * potential recursion.
+	reg_addr = READ_ONCE(hw->hw_addr);
+	if (ixgbe_removed(reg_addr))
+		return IXGBE_FAILED_READ_REG;
+
+	/* Register read of 0xFFFFFFF can indicate the adapter has been removed,
+	 * so perform several status register reads to determine if the adapter
+	 * has been removed.
 	 */
-	if (reg == IXGBE_STATUS) {
-		ixgbe_remove_adapter(hw);
-		return;
+	for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
+		value = readl(reg_addr + IXGBE_STATUS);
+		if (value != IXGBE_FAILED_READ_REG)
+			break;
+		mdelay(3);
 	}
-	value = ixgbe_read_reg(hw, IXGBE_STATUS);
+
 	if (value == IXGBE_FAILED_READ_REG)
 		ixgbe_remove_adapter(hw);
+	else
+		value = readl(reg_addr + reg);
+	return value;
 }
 
 /**
@@ -415,7 +424,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
 writes_completed:
 	value = readl(reg_addr + reg);
 	if (unlikely(value == IXGBE_FAILED_READ_REG))
-		ixgbe_check_remove(hw, reg);
+		value = ixgbe_check_remove(hw, reg);
 	return value;
 }
 

From 9bf1e20f65796382e3b5ef28d0881f7e8fae0c92 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Thu, 15 Mar 2018 08:22:07 -0400
Subject: [PATCH 1147/1678] ixgbe: fix read-modify-write in x550 phy setup

Replaced an assignment operation with an OR operation.

The variable assignment was overwriting the value read from the PHY
register. The OR operation sets only the intended register bits.

The bits that were being overwritten are reserved, so the assignment had no
functional impact.

Reported by: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index f470d0204771..3123267dfba9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1847,9 +1847,9 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
 			 (IXGBE_CS4227_EDC_MODE_SR << 1));
 
 	if (setup_linear)
-		reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+		reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
 	else
-		reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+		reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
 
 	ret_val = hw->phy.ops.write_reg(hw, reg_slice,
 					IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);

From 2137aec017fa1fd3ddbd3b01f59e506abef2fef0 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Fri, 16 Mar 2018 11:09:04 -0700
Subject: [PATCH 1148/1678] ixgbe: no need for ipsec csum feature check

With the patch
commit f8aa2696b4af ("esp: check the NETIF_F_HW_ESP_TX_CSUM bit before segmenting")
we no longer need to protect ourself from checksum
offload requests on IPsec packets, so we can remove
the check in our .ndo_features_check callback.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index faf368b14389..0eb45d1cd0b2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9917,12 +9917,6 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
 		features &= ~NETIF_F_TSO;
 
-#ifdef CONFIG_XFRM_OFFLOAD
-	/* IPsec offload doesn't get along well with others *yet* */
-	if (skb->sp)
-		features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
-#endif
-
 	return features;
 }
 

From 871dd09bdb02957e259ff672876b04891f356d10 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Fri, 16 Mar 2018 11:09:05 -0700
Subject: [PATCH 1149/1678] ixgbe: remove unneeded ipsec test in TX path

Since the ipsec data fields will be zero anyway in the non-ipsec
case, we can remove the conditional jump.

Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0eb45d1cd0b2..74da310378da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7873,10 +7873,8 @@ no_csum:
 	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
-		fceof_saidx |= itd->sa_idx;
-		type_tucmd |= itd->flags | itd->trailer_len;
-	}
+	fceof_saidx |= itd->sa_idx;
+	type_tucmd |= itd->flags | itd->trailer_len;
 
 	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
 }

From 1db685e6766da5b9929043f4d60dfbb69f7c57c5 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Fri, 16 Mar 2018 11:09:06 -0700
Subject: [PATCH 1150/1678] ixgbe: no need for esp trailer if GSO

There is no need to calculate the trailer length if we're doing
a GSO/TSO, as there is no trailer added to the packet data.
Also, don't bother clearing the flags field as it was already
cleared earlier.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c    | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index f2254528dcfc..5ddea43a21c2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -774,11 +774,7 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 
 	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
 
-	itd->flags = 0;
 	if (xs->id.proto == IPPROTO_ESP) {
-		struct sk_buff *skb = first->skb;
-		int ret, authlen, trailerlen;
-		u8 padlen;
 
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
 			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
@@ -790,19 +786,28 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 		 * padlen bytes of padding.  This ends up not the same
 		 * as the static value found in xs->props.trailer_len (21).
 		 *
-		 * The "correct" way to get the auth length would be to use
-		 *    authlen = crypto_aead_authsize(xs->data);
-		 * but since we know we only have one size to worry about
-		 * we can let the compiler use the constant and save us a
-		 * few CPU cycles.
+		 * ... but if we're doing GSO, don't bother as the stack
+		 * doesn't add a trailer for those.
 		 */
-		authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+		if (!skb_is_gso(first->skb)) {
+			/* The "correct" way to get the auth length would be
+			 * to use
+			 *    authlen = crypto_aead_authsize(xs->data);
+			 * but since we know we only have one size to worry
+			 * about * we can let the compiler use the constant
+			 * and save us a few CPU cycles.
+			 */
+			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+			struct sk_buff *skb = first->skb;
+			u8 padlen;
+			int ret;
 
-		ret = skb_copy_bits(skb, skb->len - (authlen + 2), &padlen, 1);
-		if (unlikely(ret))
-			return 0;
-		trailerlen = authlen + 2 + padlen;
-		itd->trailer_len = trailerlen;
+			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+					    &padlen, 1);
+			if (unlikely(ret))
+				return 0;
+			itd->trailer_len = authlen + 2 + padlen;
+		}
 	}
 	if (tsa->encrypt)
 		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;

From 70da6824c3db2b0ab3088a5451eda6aa2302c51f Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@oracle.com>
Date: Fri, 16 Mar 2018 11:09:07 -0700
Subject: [PATCH 1151/1678] ixgbe: enable TSO with IPsec offload

Fix things up to support TSO offload in conjunction
with IPsec hw offload.  This raises throughput with
IPsec offload on to nearly line rate.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c    |  9 ++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 31 +++++++++++++------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 5ddea43a21c2..68af127987bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -929,8 +929,13 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
 	ixgbe_ipsec_clear_hw_tables(adapter);
 
 	adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
-	adapter->netdev->features |= NETIF_F_HW_ESP;
-	adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+#define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
+				 NETIF_F_HW_ESP_TX_CSUM | \
+				 NETIF_F_GSO_ESP)
+
+	adapter->netdev->features |= IXGBE_ESP_FEATURES;
+	adapter->netdev->hw_enc_features |= IXGBE_ESP_FEATURES;
 
 	return;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 74da310378da..c0d8d7238f71 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7730,7 +7730,8 @@ static void ixgbe_service_task(struct work_struct *work)
 
 static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 		     struct ixgbe_tx_buffer *first,
-		     u8 *hdr_len)
+		     u8 *hdr_len,
+		     struct ixgbe_ipsec_tx_data *itd)
 {
 	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
 	struct sk_buff *skb = first->skb;
@@ -7744,6 +7745,7 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 		unsigned char *hdr;
 	} l4;
 	u32 paylen, l4_offset;
+	u32 fceof_saidx = 0;
 	int err;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -7769,13 +7771,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	if (ip.v4->version == 4) {
 		unsigned char *csum_start = skb_checksum_start(skb);
 		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+		int len = csum_start - trans_start;
 
 		/* IP header will have to cancel out any data that
-		 * is not a part of the outer IP header
+		 * is not a part of the outer IP header, so set to
+		 * a reverse csum if needed, else init check to 0.
 		 */
-		ip.v4->check = csum_fold(csum_partial(trans_start,
-						      csum_start - trans_start,
-						      0));
+		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+					   csum_fold(csum_partial(trans_start,
+								  len, 0)) : 0;
 		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -7806,12 +7810,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 	mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
 
+	fceof_saidx |= itd->sa_idx;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
 	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
 	vlan_macip_lens = l4.hdr - ip.hdr;
 	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd,
+	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
 			  mss_l4len_idx);
 
 	return 1;
@@ -8502,7 +8509,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 	if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
 		goto out_drop;
 #endif
-	tso = ixgbe_tso(tx_ring, first, &hdr_len);
+	tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
 	if (tso < 0)
 		goto out_drop;
 	else if (!tso)
@@ -9911,9 +9918,15 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 
 	/* We can only support IPV4 TSO in tunnels if we can mangle the
 	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 * IPsec offoad sets skb->encapsulation but still can handle
+	 * the TSO, so it's the exception.
 	 */
-	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
-		features &= ~NETIF_F_TSO;
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
+#ifdef CONFIG_XFRM
+		if (!skb->sp)
+#endif
+			features &= ~NETIF_F_TSO;
+	}
 
 	return features;
 }

From c7aec59657b60f3a29fc7d3274ebefd698879301 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 16 Mar 2018 15:34:02 -0700
Subject: [PATCH 1152/1678] ixgbevf: Add XDP support for pass and drop actions

Implement XDP_PASS and XDP_DROP based on the ixgbe implementation.

Based largely on commit 924708081629 ("ixgbe: add XDP support for pass and
drop actions").

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ethtool.c  |   9 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  |  10 +-
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 201 ++++++++++++++----
 3 files changed, 178 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index e7623fed42da..4946a62c70a4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -336,8 +336,13 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 		for (i = 0; i < adapter->num_rx_queues; i++) {
 			/* clone ring and setup updated count */
 			rx_ring[i] = *adapter->rx_ring[i];
+
+			/* Clear copied XDP RX-queue info */
+			memset(&rx_ring[i].xdp_rxq, 0,
+			       sizeof(rx_ring[i].xdp_rxq));
+
 			rx_ring[i].count = new_rx_count;
-			err = ixgbevf_setup_rx_resources(&rx_ring[i]);
+			err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]);
 			if (err) {
 				while (i) {
 					i--;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index c06ea4dc49a0..d41365b1b674 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -2,7 +2,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -35,6 +35,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
 
 #include "vf.h"
 
@@ -100,6 +101,7 @@ struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
 	struct ixgbevf_q_vector *q_vector;	/* backpointer to q_vector */
 	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
 	struct device *dev;
 	void *desc;			/* descriptor ring memory */
 	dma_addr_t dma;			/* phys. address of descriptor ring */
@@ -120,7 +122,7 @@ struct ixgbevf_ring {
 		struct ixgbevf_tx_queue_stats tx_stats;
 		struct ixgbevf_rx_queue_stats rx_stats;
 	};
-
+	struct xdp_rxq_info xdp_rxq;
 	u64 hw_csum_rx_error;
 	u8 __iomem *tail;
 	struct sk_buff *skb;
@@ -357,6 +359,7 @@ struct ixgbevf_adapter {
 
 	/* OS defined structs */
 	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
 	struct pci_dev *pdev;
 
 	/* structs defined in ixgbe_vf.h */
@@ -443,7 +446,8 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter);
 void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);
 void ixgbevf_reset(struct ixgbevf_adapter *adapter);
 void ixgbevf_set_ethtool_ops(struct net_device *netdev);
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *);
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+			       struct ixgbevf_ring *rx_ring);
 int ixgbevf_setup_tx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
 void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4da449e0a4ba..2696b5a6806f 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
 
   Intel 82599 Virtual Function driver
-  Copyright(c) 1999 - 2015 Intel Corporation.
+  Copyright(c) 1999 - 2018 Intel Corporation.
 
   This program is free software; you can redistribute it and/or modify it
   under the terms and conditions of the GNU General Public License,
@@ -50,6 +50,9 @@
 #include <linux/if_vlan.h>
 #include <linux/prefetch.h>
 #include <net/mpls.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
 
 #include "ixgbevf.h"
 
@@ -552,19 +555,21 @@ struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
 }
 
 static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
-				  struct ixgbevf_rx_buffer *rx_buffer)
+				  struct ixgbevf_rx_buffer *rx_buffer,
+				  struct sk_buff *skb)
 {
 	if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
 		/* hand second half of page back to the ring */
 		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
 	} else {
-		/* We are not reusing the buffer so unmap it and free
-		 * any references we are holding to it
-		 */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
-				     ixgbevf_rx_pg_size(rx_ring),
-				     DMA_FROM_DEVICE,
-				     IXGBEVF_RX_DMA_ATTR);
+		if (IS_ERR(skb))
+			/* We are not reusing the buffer so unmap it and free
+			 * any references we are holding to it
+			 */
+			dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+					     ixgbevf_rx_pg_size(rx_ring),
+					     DMA_FROM_DEVICE,
+					     IXGBEVF_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buffer->page,
 					rx_buffer->pagecnt_bias);
 	}
@@ -737,6 +742,10 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
 				    union ixgbe_adv_rx_desc *rx_desc,
 				    struct sk_buff *skb)
 {
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
 	/* verify that the packet does not have any known errors */
 	if (unlikely(ixgbevf_test_staterr(rx_desc,
 					  IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@@ -853,22 +862,23 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
 static
 struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
 				      struct ixgbevf_rx_buffer *rx_buffer,
-				      union ixgbe_adv_rx_desc *rx_desc,
-				      unsigned int size)
+				      struct xdp_buff *xdp,
+				      union ixgbe_adv_rx_desc *rx_desc)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	unsigned int size = xdp->data_end - xdp->data;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
 #endif
 	unsigned int headlen;
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
+	prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
+	prefetch(xdp->data + L1_CACHE_BYTES);
 #endif
 
 	/* allocate a skb to store the frags */
@@ -879,16 +889,18 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > IXGBEVF_RX_HDR_SIZE)
-		headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
+		headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+	memcpy(__skb_put(skb, headlen), xdp->data,
+	       ALIGN(headlen, sizeof(long)));
 
 	/* update all of the pointers */
 	size -= headlen;
 	if (size) {
 		skb_add_rx_frag(skb, 0, rx_buffer->page,
-				(va + headlen) - page_address(rx_buffer->page),
+				(xdp->data + headlen) -
+					page_address(rx_buffer->page),
 				size, truesize);
 #if (PAGE_SIZE < 8192)
 		rx_buffer->page_offset ^= truesize;
@@ -912,32 +924,32 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
 
 static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 					 struct ixgbevf_rx_buffer *rx_buffer,
-					 union ixgbe_adv_rx_desc *rx_desc,
-					 unsigned int size)
+					 struct xdp_buff *xdp,
+					 union ixgbe_adv_rx_desc *rx_desc)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
 	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size);
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
 #endif
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
+	prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-	prefetch(va + L1_CACHE_BYTES);
+	prefetch(xdp->data + L1_CACHE_BYTES);
 #endif
 
-	/* build an skb to around the page buffer */
-	skb = build_skb(va - IXGBEVF_SKB_PAD, truesize);
+	/* build an skb around the page buffer */
+	skb = build_skb(xdp->data_hard_start, truesize);
 	if (unlikely(!skb))
 		return NULL;
 
 	/* update pointers within the skb to store the data */
-	skb_reserve(skb, IXGBEVF_SKB_PAD);
-	__skb_put(skb, size);
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
 
 	/* update buffer offset */
 #if (PAGE_SIZE < 8192)
@@ -948,6 +960,43 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 
 	return skb;
 }
+
+#define IXGBEVF_XDP_PASS 0
+#define IXGBEVF_XDP_CONSUMED 1
+
+static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_ring  *rx_ring,
+				       struct xdp_buff *xdp)
+{
+	int result = IXGBEVF_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough */
+	case XDP_TX:
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping packet */
+	case XDP_DROP:
+		result = IXGBEVF_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	rcu_read_unlock();
+	return ERR_PTR(-result);
+}
+
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 				struct ixgbevf_ring *rx_ring,
 				int budget)
@@ -955,10 +1004,13 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
 	struct sk_buff *skb = rx_ring->skb;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
 
 	while (likely(total_rx_packets < budget)) {
-		union ixgbe_adv_rx_desc *rx_desc;
 		struct ixgbevf_rx_buffer *rx_buffer;
+		union ixgbe_adv_rx_desc *rx_desc;
 		unsigned int size;
 
 		/* return some buffers to hardware, one at a time is too slow */
@@ -981,14 +1033,30 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
 
 		/* retrieve a buffer from the ring */
-		if (skb)
+		if (!skb) {
+			xdp.data = page_address(rx_buffer->page) +
+				   rx_buffer->page_offset;
+			xdp_set_data_meta_invalid(&xdp);
+			xdp.data_hard_start = xdp.data -
+					      ixgbevf_rx_offset(rx_ring);
+			xdp.data_end = xdp.data + size;
+
+			skb = ixgbevf_run_xdp(rx_ring, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			total_rx_packets++;
+			total_rx_bytes += size;
+			rx_buffer->pagecnt_bias++;
+		} else if (skb) {
 			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
-		else if (ring_uses_build_skb(rx_ring))
+		} else if (ring_uses_build_skb(rx_ring)) {
 			skb = ixgbevf_build_skb(rx_ring, rx_buffer,
-						rx_desc, size);
-		else
+						&xdp, rx_desc);
+		} else {
 			skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
-						    rx_desc, size);
+						    &xdp, rx_desc);
+		}
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
@@ -997,7 +1065,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 			break;
 		}
 
-		ixgbevf_put_rx_buffer(rx_ring, rx_buffer);
+		ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb);
 		cleaned_count++;
 
 		/* fetch next buffer in frame if non-eop */
@@ -3159,11 +3227,13 @@ err_setup_tx:
 
 /**
  * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
  *
  * Returns 0 on success, negative on failure
  **/
-int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+			       struct ixgbevf_ring *rx_ring)
 {
 	int size;
 
@@ -3184,6 +3254,13 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
 	if (!rx_ring->desc)
 		goto err;
 
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+			     rx_ring->queue_index) < 0)
+		goto err;
+
+	rx_ring->xdp_prog = adapter->xdp_prog;
+
 	return 0;
 err:
 	vfree(rx_ring->rx_buffer_info);
@@ -3207,7 +3284,7 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
-		err = ixgbevf_setup_rx_resources(adapter->rx_ring[i]);
+		err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]);
 		if (!err)
 			continue;
 		hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
@@ -3232,6 +3309,8 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring)
 {
 	ixgbevf_clean_rx_ring(rx_ring);
 
+	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
@@ -3918,6 +3997,12 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 	int ret;
 
+	/* prevent MTU being changed to a size unsupported by XDP */
+	if (adapter->xdp_prog) {
+		dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n");
+		return -EPERM;
+	}
+
 	spin_lock_bh(&adapter->mbx_lock);
 	/* notify the PF of our intent to use this size of frame */
 	ret = hw->mac.ops.set_rlpml(hw, max_frame);
@@ -4101,6 +4186,47 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev,
 	return features;
 }
 
+static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	/* verify ixgbevf ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *ring = adapter->rx_ring[i];
+
+		if (frame_size > ixgbevf_rx_bufsz(ring))
+			return -EINVAL;
+	}
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ixgbevf_xdp_setup(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!(adapter->xdp_prog);
+		xdp->prog_id = adapter->xdp_prog ?
+			       adapter->xdp_prog->aux->id : 0;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops ixgbevf_netdev_ops = {
 	.ndo_open		= ixgbevf_open,
 	.ndo_stop		= ixgbevf_close,
@@ -4117,6 +4243,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = {
 	.ndo_poll_controller	= ixgbevf_netpoll,
 #endif
 	.ndo_features_check	= ixgbevf_features_check,
+	.ndo_bpf		= ixgbevf_xdp,
 };
 
 static void ixgbevf_assign_netdev_ops(struct net_device *dev)

From 21092e9ce8b1395f45b2648c839a6d60b21c46e8 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 16 Mar 2018 15:34:03 -0700
Subject: [PATCH 1153/1678] ixgbevf: Add support for XDP_TX action

This implements the XDP_TX action which is modeled on the ixgbe
implementation. However instead of using CPU id to determine which XDP
queue to use, this uses the received RX queue index, which is similar
to i40e. Doing this eliminates the restriction that number of CPUs not
exceed number of XDP queues that ixgbe has.

Also, based on the number of queues available, the number of TX queues
may be reduced when an XDP program is loaded in order to accommodate the
XDP queues.

Based largely on
commit 33fdc82f0883 ("ixgbe: add support for XDP_TX action")

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ethtool.c  |  35 ++-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  |  20 +-
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 271 ++++++++++++++++--
 3 files changed, 294 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 4946a62c70a4..da8c0e299a37 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -269,7 +269,7 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL;
 	u32 new_rx_count, new_tx_count;
-	int i, err = 0;
+	int i, j, err = 0;
 
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
@@ -293,15 +293,19 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 	if (!netif_running(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			adapter->xdp_ring[i]->count = new_tx_count;
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			adapter->rx_ring[i]->count = new_rx_count;
 		adapter->tx_ring_count = new_tx_count;
+		adapter->xdp_ring_count = new_tx_count;
 		adapter->rx_ring_count = new_rx_count;
 		goto clear_reset;
 	}
 
 	if (new_tx_count != adapter->tx_ring_count) {
-		tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring));
+		tx_ring = vmalloc((adapter->num_tx_queues +
+				   adapter->num_xdp_queues) * sizeof(*tx_ring));
 		if (!tx_ring) {
 			err = -ENOMEM;
 			goto clear_reset;
@@ -324,6 +328,24 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 				goto clear_reset;
 			}
 		}
+
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			/* clone ring and setup updated count */
+			tx_ring[i] = *adapter->xdp_ring[j];
+			tx_ring[i].count = new_tx_count;
+			err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbevf_free_tx_resources(&tx_ring[i]);
+				}
+
+				vfree(tx_ring);
+				tx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
 	}
 
 	if (new_rx_count != adapter->rx_ring_count) {
@@ -368,6 +390,12 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 		}
 		adapter->tx_ring_count = new_tx_count;
 
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+			*adapter->xdp_ring[j] = tx_ring[i];
+		}
+		adapter->xdp_ring_count = new_tx_count;
+
 		vfree(tx_ring);
 		tx_ring = NULL;
 	}
@@ -390,7 +418,8 @@ static int ixgbevf_set_ringparam(struct net_device *netdev,
 clear_reset:
 	/* free Tx resources if Rx error is encountered */
 	if (tx_ring) {
-		for (i = 0; i < adapter->num_tx_queues; i++)
+		for (i = 0;
+		     i < adapter->num_tx_queues + adapter->num_xdp_queues; i++)
 			ixgbevf_free_tx_resources(&tx_ring[i]);
 		vfree(tx_ring);
 	}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index d41365b1b674..447ce1d5e0e3 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -52,7 +52,11 @@
 struct ixgbevf_tx_buffer {
 	union ixgbe_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		/* XDP uses address ptr on irq_clean */
+		void *data;
+	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	__be16 protocol;
@@ -95,8 +99,16 @@ enum ixgbevf_ring_state_t {
 	__IXGBEVF_RX_BUILD_SKB_ENABLED,
 	__IXGBEVF_TX_DETECT_HANG,
 	__IXGBEVF_HANG_CHECK_ARMED,
+	__IXGBEVF_TX_XDP_RING,
 };
 
+#define ring_is_xdp(ring) \
+		test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+		set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+		clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+
 struct ixgbevf_ring {
 	struct ixgbevf_ring *next;
 	struct ixgbevf_q_vector *q_vector;	/* backpointer to q_vector */
@@ -139,6 +151,7 @@ struct ixgbevf_ring {
 
 #define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES
 #define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES
+#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES
 #define IXGBEVF_MAX_RSS_QUEUES		2
 #define IXGBEVF_82599_RETA_SIZE		128	/* 128 entries */
 #define IXGBEVF_X550_VFRETA_SIZE	64	/* 64 entries */
@@ -339,6 +352,10 @@ struct ixgbevf_adapter {
 	u32 eims_enable_mask;
 	u32 eims_other;
 
+	/* XDP */
+	int num_xdp_queues;
+	struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES];
+
 	/* TX */
 	int num_tx_queues;
 	struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
@@ -373,6 +390,7 @@ struct ixgbevf_adapter {
 	unsigned long state;
 	u64 tx_busy;
 	unsigned int tx_ring_count;
+	unsigned int xdp_ring_count;
 	unsigned int rx_ring_count;
 
 	u8 __iomem *io_addr; /* Mainly for iounmap use */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 2696b5a6806f..309f549808e4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -324,7 +324,10 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		total_packets += tx_buffer->gso_segs;
 
 		/* free the skb */
-		napi_consume_skb(tx_buffer->skb, napi_budget);
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			napi_consume_skb(tx_buffer->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -388,7 +391,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 
 		eop_desc = tx_ring->tx_buffer_info[i].next_to_watch;
 
-		pr_err("Detected Tx Unit Hang\n"
+		pr_err("Detected Tx Unit Hang%s\n"
 		       "  Tx Queue             <%d>\n"
 		       "  TDH, TDT             <%x>, <%x>\n"
 		       "  next_to_use          <%x>\n"
@@ -398,6 +401,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		       "  eop_desc->wb.status  <%x>\n"
 		       "  time_stamp           <%lx>\n"
 		       "  jiffies              <%lx>\n",
+		       ring_is_xdp(tx_ring) ? " XDP" : "",
 		       tx_ring->queue_index,
 		       IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)),
 		       IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)),
@@ -405,7 +409,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		       eop_desc, (eop_desc ? eop_desc->wb.status : 0),
 		       tx_ring->tx_buffer_info[i].time_stamp, jiffies);
 
-		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+		if (!ring_is_xdp(tx_ring))
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
 
 		/* schedule immediate reset if we believe we hung */
 		ixgbevf_tx_timeout_reset(adapter);
@@ -413,6 +419,9 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		return true;
 	}
 
+	if (ring_is_xdp(tx_ring))
+		return !!budget;
+
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
@@ -963,11 +972,78 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 
 #define IXGBEVF_XDP_PASS 0
 #define IXGBEVF_XDP_CONSUMED 1
+#define IXGBEVF_XDP_TX 2
 
-static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_ring  *rx_ring,
+static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
+				 struct xdp_buff *xdp)
+{
+	struct ixgbevf_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	u32 len, cmd_type;
+	dma_addr_t dma;
+	u16 i;
+
+	len = xdp->data_end - xdp->data;
+
+	if (unlikely(!ixgbevf_desc_unused(ring)))
+		return IXGBEVF_XDP_CONSUMED;
+
+	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma))
+		return IXGBEVF_XDP_CONSUMED;
+
+	/* record the location of the first descriptor for this packet */
+	tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
+	tx_buffer->bytecount = len;
+	tx_buffer->gso_segs = 1;
+	tx_buffer->protocol = 0;
+
+	i = ring->next_to_use;
+	tx_desc = IXGBEVF_TX_DESC(ring, i);
+
+	dma_unmap_len_set(tx_buffer, len, len);
+	dma_unmap_addr_set(tx_buffer, dma, dma);
+	tx_buffer->data = xdp->data;
+	tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+	/* put descriptor type bits */
+	cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+		   IXGBE_ADVTXD_DCMD_DEXT |
+		   IXGBE_ADVTXD_DCMD_IFCS;
+	cmd_type |= len | IXGBE_TXD_CMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	tx_desc->read.olinfo_status =
+			cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) |
+				    IXGBE_ADVTXD_CC);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	i++;
+	if (i == ring->count)
+		i = 0;
+
+	tx_buffer->next_to_watch = tx_desc;
+	ring->next_to_use = i;
+
+	/* notify HW of packet */
+	ixgbevf_write_tail(ring, i);
+	return IXGBEVF_XDP_TX;
+}
+
+static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+				       struct ixgbevf_ring  *rx_ring,
 				       struct xdp_buff *xdp)
 {
 	int result = IXGBEVF_XDP_PASS;
+	struct ixgbevf_ring *xdp_ring;
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
@@ -981,10 +1057,13 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_ring  *rx_ring,
 	switch (act) {
 	case XDP_PASS:
 		break;
+	case XDP_TX:
+		xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
+		result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		/* fallthrough */
-	case XDP_TX:
 	case XDP_ABORTED:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		/* fallthrough -- handle aborts by dropping packet */
@@ -997,11 +1076,29 @@ xdp_out:
 	return ERR_PTR(-result);
 }
 
+static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
+				   struct ixgbevf_rx_buffer *rx_buffer,
+				   unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+
+	rx_buffer->page_offset ^= truesize;
+#else
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
 static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 				struct ixgbevf_ring *rx_ring,
 				int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct ixgbevf_adapter *adapter = q_vector->adapter;
 	u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
 	struct sk_buff *skb = rx_ring->skb;
 	struct xdp_buff xdp;
@@ -1041,13 +1138,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 					      ixgbevf_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
 
-			skb = ixgbevf_run_xdp(rx_ring, &xdp);
+			skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
 		}
 
 		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -IXGBEVF_XDP_TX)
+				ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
+						       size);
+			else
+				rx_buffer->pagecnt_bias++;
 			total_rx_packets++;
 			total_rx_bytes += size;
-			rx_buffer->pagecnt_bias++;
 		} else if (skb) {
 			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
 		} else if (ring_uses_build_skb(rx_ring)) {
@@ -1608,6 +1709,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
 	/* Setup the HW Tx Head and Tail descriptor pointers */
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]);
 }
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT	2
@@ -2239,7 +2342,10 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
 		union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
 
 		/* Free all the Tx ring sk_buffs */
-		dev_kfree_skb_any(tx_buffer->skb);
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -2307,6 +2413,8 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter)
 
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		ixgbevf_clean_tx_ring(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbevf_clean_tx_ring(adapter->xdp_ring[i]);
 }
 
 void ixgbevf_down(struct ixgbevf_adapter *adapter)
@@ -2345,6 +2453,13 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter)
 				IXGBE_TXDCTL_SWFLSH);
 	}
 
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx),
+				IXGBE_TXDCTL_SWFLSH);
+	}
+
 	if (!pci_channel_offline(adapter->pdev))
 		ixgbevf_reset(adapter);
 
@@ -2442,6 +2557,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 	/* Start with base case */
 	adapter->num_rx_queues = 1;
 	adapter->num_tx_queues = 1;
+	adapter->num_xdp_queues = 0;
 
 	spin_lock_bh(&adapter->mbx_lock);
 
@@ -2463,8 +2579,13 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
+			if (adapter->xdp_prog &&
+			    hw->mac.max_tx_queues == rss)
+				rss = rss > 3 ? 2 : 1;
+
 			adapter->num_rx_queues = rss;
 			adapter->num_tx_queues = rss;
+			adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0;
 		default:
 			break;
 		}
@@ -2521,6 +2642,8 @@ static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
  * @v_idx: index of vector in adapter struct
  * @txr_count: number of Tx rings for q vector
  * @txr_idx: index of first Tx ring to assign
+ * @xdp_count: total number of XDP rings to allocate
+ * @xdp_idx: index of first XDP ring to allocate
  * @rxr_count: number of Rx rings for q vector
  * @rxr_idx: index of first Rx ring to assign
  *
@@ -2528,13 +2651,15 @@ static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
  **/
 static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
 				  int txr_count, int txr_idx,
+				  int xdp_count, int xdp_idx,
 				  int rxr_count, int rxr_idx)
 {
 	struct ixgbevf_q_vector *q_vector;
+	int reg_idx = txr_idx + xdp_idx;
 	struct ixgbevf_ring *ring;
 	int ring_count, size;
 
-	ring_count = txr_count + rxr_count;
+	ring_count = txr_count + xdp_count + rxr_count;
 	size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
 
 	/* allocate q_vector and rings */
@@ -2567,7 +2692,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
 		/* apply Tx specific ring traits */
 		ring->count = adapter->tx_ring_count;
 		ring->queue_index = txr_idx;
-		ring->reg_idx = txr_idx;
+		ring->reg_idx = reg_idx;
 
 		/* assign ring to adapter */
 		 adapter->tx_ring[txr_idx] = ring;
@@ -2575,6 +2700,36 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
 		/* update count and index */
 		txr_count--;
 		txr_idx++;
+		reg_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (xdp_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = xdp_idx;
+		ring->reg_idx = reg_idx;
+		set_ring_xdp(ring);
+
+		/* assign ring to adapter */
+		adapter->xdp_ring[xdp_idx] = ring;
+
+		/* update count and index */
+		xdp_count--;
+		xdp_idx++;
+		reg_idx++;
 
 		/* push pointer to next ring */
 		ring++;
@@ -2624,8 +2779,12 @@ static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
 	struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
 	struct ixgbevf_ring *ring;
 
-	ixgbevf_for_each_ring(ring, q_vector->tx)
-		adapter->tx_ring[ring->queue_index] = NULL;
+	ixgbevf_for_each_ring(ring, q_vector->tx) {
+		if (ring_is_xdp(ring))
+			adapter->xdp_ring[ring->queue_index] = NULL;
+		else
+			adapter->tx_ring[ring->queue_index] = NULL;
+	}
 
 	ixgbevf_for_each_ring(ring, q_vector->rx)
 		adapter->rx_ring[ring->queue_index] = NULL;
@@ -2651,15 +2810,16 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
 	int rxr_remaining = adapter->num_rx_queues;
 	int txr_remaining = adapter->num_tx_queues;
-	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int xdp_remaining = adapter->num_xdp_queues;
+	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
 	int err;
 
-	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+	if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
 		for (; rxr_remaining; v_idx++, q_vectors--) {
 			int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
 
 			err = ixgbevf_alloc_q_vector(adapter, v_idx,
-						     0, 0, rqpv, rxr_idx);
+						     0, 0, 0, 0, rqpv, rxr_idx);
 			if (err)
 				goto err_out;
 
@@ -2672,9 +2832,11 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 	for (; q_vectors; v_idx++, q_vectors--) {
 		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
 		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
+		int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors);
 
 		err = ixgbevf_alloc_q_vector(adapter, v_idx,
 					     tqpv, txr_idx,
+					     xqpv, xdp_idx,
 					     rqpv, rxr_idx);
 
 		if (err)
@@ -2685,6 +2847,8 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
 		rxr_idx += rqpv;
 		txr_remaining -= tqpv;
 		txr_idx += tqpv;
+		xdp_remaining -= xqpv;
+		xdp_idx += xqpv;
 	}
 
 	return 0;
@@ -2756,9 +2920,10 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
 		goto err_alloc_q_vectors;
 	}
 
-	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
-	       (adapter->num_rx_queues > 1) ? "Enabled" :
-	       "Disabled", adapter->num_rx_queues, adapter->num_tx_queues);
+	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n",
+	       (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
+	       adapter->num_rx_queues, adapter->num_tx_queues,
+	       adapter->num_xdp_queues);
 
 	set_bit(__IXGBEVF_DOWN, &adapter->state);
 
@@ -2779,6 +2944,7 @@ err_set_interrupt:
 static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
 {
 	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = 0;
 
 	ixgbevf_free_q_vectors(adapter);
@@ -2986,6 +3152,8 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter)
 	if (netif_carrier_ok(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			set_check_for_tx_hang(adapter->tx_ring[i]);
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_check_for_tx_hang(adapter->xdp_ring[i]);
 	}
 
 	/* get one bit for every active Tx/Rx interrupt vector */
@@ -3157,6 +3325,9 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter)
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		if (adapter->tx_ring[i]->desc)
 			ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		if (adapter->xdp_ring[i]->desc)
+			ixgbevf_free_tx_resources(adapter->xdp_ring[i]);
 }
 
 /**
@@ -3207,7 +3378,7 @@ err:
  **/
 static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
 {
-	int i, err = 0;
+	int i, j = 0, err = 0;
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]);
@@ -3217,11 +3388,22 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
 		goto err_setup_tx;
 	}
 
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]);
+		if (!err)
+			continue;
+		hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j);
+		break;
+	}
+
 	return 0;
 err_setup_tx:
 	/* rewind the index freeing the rings as we go */
+	while (j--)
+		ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
 	while (i--)
 		ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+
 	return err;
 }
 
@@ -4114,6 +4296,23 @@ static void ixgbevf_shutdown(struct pci_dev *pdev)
 	ixgbevf_suspend(pdev, PMSG_SUSPEND);
 }
 
+static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats,
+				      const struct ixgbevf_ring *ring)
+{
+	u64 bytes, packets;
+	unsigned int start;
+
+	if (ring) {
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			bytes = ring->stats.bytes;
+			packets = ring->stats.packets;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+		stats->tx_bytes += bytes;
+		stats->tx_packets += packets;
+	}
+}
+
 static void ixgbevf_get_stats(struct net_device *netdev,
 			      struct rtnl_link_stats64 *stats)
 {
@@ -4141,13 +4340,12 @@ static void ixgbevf_get_stats(struct net_device *netdev,
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		ring = adapter->tx_ring[i];
-		do {
-			start = u64_stats_fetch_begin_irq(&ring->syncp);
-			bytes = ring->stats.bytes;
-			packets = ring->stats.packets;
-		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
-		stats->tx_bytes += bytes;
-		stats->tx_packets += packets;
+		ixgbevf_get_tx_ring_stats(stats, ring);
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		ring = adapter->xdp_ring[i];
+		ixgbevf_get_tx_ring_stats(stats, ring);
 	}
 	rcu_read_unlock();
 }
@@ -4201,8 +4399,25 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
 	}
 
 	old_prog = xchg(&adapter->xdp_prog, prog);
-	for (i = 0; i < adapter->num_rx_queues; i++)
-		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+
+	/* If transitioning XDP modes reconfigure rings */
+	if (!!prog != !!old_prog) {
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match packet buffer alignment. Unfortunately, the
+		 * hardware is not flexible enough to do this dynamically.
+		 */
+		if (netif_running(dev))
+			ixgbevf_close(dev);
+
+		ixgbevf_clear_interrupt_scheme(adapter);
+		ixgbevf_init_interrupt_scheme(adapter);
+
+		if (netif_running(dev))
+			ixgbevf_open(dev);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+	}
 
 	if (old_prog)
 		bpf_prog_put(old_prog);

From efecfd5f803d5957ccf003310bff432c6ebd653b Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 16 Mar 2018 15:34:04 -0700
Subject: [PATCH 1154/1678] ixgbevf: Delay tail write for XDP packets

Current XDP implementation hits the tail on every XDP_TX; change the
driver to only hit the tail after packet processing is complete.

Based on
commit 7379f97a4fce ("ixgbe: delay tail write to every 'n' packets")

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 309f549808e4..5167e81e0cf1 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1016,14 +1016,8 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
 			cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) |
 				    IXGBE_ADVTXD_CC);
 
-	/* Force memory writes to complete before letting h/w know there
-	 * are new descriptors to fetch.  (Only applicable for weak-ordered
-	 * memory model archs, such as IA-64).
-	 *
-	 * We also need this memory barrier to make certain all of the
-	 * status bits have been updated before next_to_watch is written.
-	 */
-	wmb();
+	/* Avoid any potential race with cleanup */
+	smp_wmb();
 
 	/* set next_to_watch value indicating a packet is present */
 	i++;
@@ -1033,8 +1027,6 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
 	tx_buffer->next_to_watch = tx_desc;
 	ring->next_to_use = i;
 
-	/* notify HW of packet */
-	ixgbevf_write_tail(ring, i);
 	return IXGBEVF_XDP_TX;
 }
 
@@ -1101,6 +1093,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 	struct ixgbevf_adapter *adapter = q_vector->adapter;
 	u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
 	struct sk_buff *skb = rx_ring->skb;
+	bool xdp_xmit = false;
 	struct xdp_buff xdp;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
@@ -1142,11 +1135,13 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		}
 
 		if (IS_ERR(skb)) {
-			if (PTR_ERR(skb) == -IXGBEVF_XDP_TX)
+			if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+				xdp_xmit = true;
 				ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
 						       size);
-			else
+			} else {
 				rx_buffer->pagecnt_bias++;
+			}
 			total_rx_packets++;
 			total_rx_bytes += size;
 		} else if (skb) {
@@ -1208,6 +1203,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 	/* place incomplete frames back on ring for completion */
 	rx_ring->skb = skb;
 
+	if (xdp_xmit) {
+		struct ixgbevf_ring *xdp_ring =
+			adapter->xdp_ring[rx_ring->queue_index];
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.
+		 */
+		wmb();
+		ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use);
+	}
+
 	u64_stats_update_begin(&rx_ring->syncp);
 	rx_ring->stats.packets += total_rx_packets;
 	rx_ring->stats.bytes += total_rx_bytes;

From be8333322effadce697bcee749c827d6e14b4d31 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 16 Mar 2018 15:34:05 -0700
Subject: [PATCH 1155/1678] ixgbevf: Add support for meta data

Add support for XDP meta data when using build skb.

Based on commit 366a88fe2f40 ("bpf, ixgbe: add meta data support")

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 29 ++++++++++++++++---
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 5167e81e0cf1..3d9033f26eff 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -889,6 +889,20 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
 #if L1_CACHE_BYTES < 128
 	prefetch(xdp->data + L1_CACHE_BYTES);
 #endif
+	/* Note, we get here by enabling legacy-rx via:
+	 *
+	 *    ethtool --set-priv-flags <dev> legacy-rx on
+	 *
+	 * In this mode, we currently get 0 extra XDP headroom as
+	 * opposed to having legacy-rx off, where we process XDP
+	 * packets going to stack via ixgbevf_build_skb().
+	 *
+	 * For ixgbevf_construct_skb() mode it means that the
+	 * xdp->data_meta will always point to xdp->data, since
+	 * the helper cannot expand the head. Should this ever
+	 * changed in future for legacy-rx mode on, then lets also
+	 * add xdp->data_meta handling here.
+	 */
 
 	/* allocate a skb to store the frags */
 	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
@@ -936,6 +950,7 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 					 struct xdp_buff *xdp,
 					 union ixgbe_adv_rx_desc *rx_desc)
 {
+	unsigned int metasize = xdp->data - xdp->data_meta;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
 #else
@@ -945,10 +960,14 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 #endif
 	struct sk_buff *skb;
 
-	/* prefetch first cache line of first page */
-	prefetch(xdp->data);
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points to xdp->data, otherwise, we likely
+	 * have a consumer accessing first few bytes of meta data,
+	 * and then actual data.
+	 */
+	prefetch(xdp->data_meta);
 #if L1_CACHE_BYTES < 128
-	prefetch(xdp->data + L1_CACHE_BYTES);
+	prefetch(xdp->data_meta + L1_CACHE_BYTES);
 #endif
 
 	/* build an skb around the page buffer */
@@ -959,6 +978,8 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
 	/* update pointers within the skb to store the data */
 	skb_reserve(skb, xdp->data - xdp->data_hard_start);
 	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
 
 	/* update buffer offset */
 #if (PAGE_SIZE < 8192)
@@ -1126,7 +1147,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 		if (!skb) {
 			xdp.data = page_address(rx_buffer->page) +
 				   rx_buffer->page_offset;
-			xdp_set_data_meta_invalid(&xdp);
+			xdp.data_meta = xdp.data;
 			xdp.data_hard_start = xdp.data -
 					      ixgbevf_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;

From 2eb34bafb352d2ddd02b184c73f3ec4706a4cc12 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 16 Mar 2018 15:34:06 -0700
Subject: [PATCH 1156/1678] ixgbevf: Add XDP queue stats reporting

XDP stats are included in TX stats, however, they are not
reported in TX queue stats since they are setup on different
queues.  Add reporting for XDP queue stats to provide
consistency between the total stats and per queue stats.

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ethtool.c | 24 ++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index da8c0e299a37..8e7d6c6f5c92 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -82,6 +82,7 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
 
 #define IXGBEVF_QUEUE_STATS_LEN ( \
 	(((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \
+	 ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \
 	 ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \
 	 (sizeof(struct ixgbevf_stats) / sizeof(u64)))
 #define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats)
@@ -491,6 +492,23 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev,
 		i += 2;
 	}
 
+	/* populate XDP queue data */
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		ring = adapter->xdp_ring[j];
+		if (!ring) {
+			data[i++] = 0;
+			data[i++] = 0;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			data[i] = ring->stats.packets;
+			data[i + 1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+		i += 2;
+	}
+
 	/* populate Rx queue data */
 	for (j = 0; j < adapter->num_rx_queues; j++) {
 		ring = adapter->rx_ring[j];
@@ -534,6 +552,12 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
 			sprintf(p, "tx_queue_%u_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
+		for (i = 0; i < adapter->num_xdp_queues; i++) {
+			sprintf(p, "xdp_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "xdp_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
 		for (i = 0; i < adapter->num_rx_queues; i++) {
 			sprintf(p, "rx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;

From ed93a39871282fc0dbc2fecd0d04ea0ddad54353 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Thu, 22 Mar 2018 10:02:36 +0100
Subject: [PATCH 1157/1678] ixgbe: tweak page counting for XDP_REDIRECT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current page counting scheme assumes that the reference count
cannot decrease until the received frame is sent to the upper layers
of the networking stack. This assumption does not hold for the
XDP_REDIRECT action, since a page (pointed out by xdp_buff) can have
its reference count decreased via the xdp_do_redirect call.

To work around that, we now start off by a large page count and then
don't allow a refcount less than two.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c0d8d7238f71..afadba99f7b8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1629,7 +1629,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = ixgbe_rx_offset(rx_ring);
-	bi->pagecnt_bias = 1;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 	rx_ring->rx_stats.alloc_rx_page++;
 
 	return true;
@@ -2039,8 +2040,8 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
 

From 353def80c3d37a8e4a196b6856df4bc8c67304bf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 23 Mar 2018 21:03:58 +0300
Subject: [PATCH 1158/1678] mlxsw: spectrum_span: Prevent duplicate mirrors

In net commit 8175f7c4736f ("mlxsw: spectrum: Prevent duplicate
mirrors") we prevented the user from mirroring more than once from a
single binding point (port-direction pair).

The fix was essentially reverted in a merge conflict resolution when net
was merged into net-next. Restore it.

Fixes: 03fe2debbb27 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_span.c   | 28 ++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index ac24e52d74db..65a77708ff61 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -600,13 +600,17 @@ int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 }
 
 static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port,
-				    struct mlxsw_sp_span_entry *span_entry)
+mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
+				    enum mlxsw_sp_span_type type,
+				    struct mlxsw_sp_port *port,
+				    bool bind)
 {
 	struct mlxsw_sp_span_inspected_port *p;
 
 	list_for_each_entry(p, &span_entry->bound_ports_list, list)
-		if (port->local_port == p->local_port)
+		if (type == p->type &&
+		    port->local_port == p->local_port &&
+		    bind == p->bound)
 			return p;
 	return NULL;
 }
@@ -636,8 +640,22 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
 	struct mlxsw_sp_span_inspected_port *inspected_port;
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
 	char sbib_pl[MLXSW_REG_SBIB_LEN];
+	int i;
 	int err;
 
+	/* A given (source port, direction) can only be bound to one analyzer,
+	 * so if a binding is requested, check for conflicts.
+	 */
+	if (bind)
+		for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+			struct mlxsw_sp_span_entry *curr =
+				&mlxsw_sp->span.entries[i];
+
+			if (mlxsw_sp_span_entry_bound_port_find(curr, type,
+								port, bind))
+				return -EEXIST;
+		}
+
 	/* if it is an egress SPAN, bind a shared buffer to it */
 	if (type == MLXSW_SP_SPAN_EGRESS) {
 		u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp,
@@ -665,6 +683,7 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
 	}
 	inspected_port->local_port = port->local_port;
 	inspected_port->type = type;
+	inspected_port->bound = bind;
 	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
 
 	return 0;
@@ -691,7 +710,8 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
 	char sbib_pl[MLXSW_REG_SBIB_LEN];
 
-	inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry);
+	inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
+							     port, bind);
 	if (!inspected_port)
 		return;
 

From affaa0c724c14c914625647efe7b95dfbe8d08f2 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 23 Mar 2018 19:09:39 +0100
Subject: [PATCH 1159/1678] net/sched: remove tcf_idr_cleanup()

tcf_idr_cleanup() is no more used, so remove it.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 1 -
 net/sched/act_api.c   | 8 --------
 2 files changed, 9 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index e0a9c2003b24..9e59ebfded62 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -149,7 +149,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
 		   int bind, bool cpustats);
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 57cf37145282..7bd1b964f021 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -296,14 +296,6 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
-void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
-{
-	if (est)
-		gen_kill_estimator(&a->tcfa_rate_est);
-	free_tcf(a);
-}
-EXPORT_SYMBOL(tcf_idr_cleanup);
-
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
 		   int bind, bool cpustats)

From 94cb5492409219ee3f9468616dd58af314029f76 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 23 Mar 2018 19:31:30 +0100
Subject: [PATCH 1160/1678] net/sched: act_vlan: declare push_vid with host
 byte order

use u16 in place of __be16 to suppress the following sparse warnings:

 net/sched/act_vlan.c:150:26: warning: incorrect type in assignment (different base types)
 net/sched/act_vlan.c:150:26: expected restricted __be16 [usertype] push_vid
 net/sched/act_vlan.c:150:26: got unsigned short
 net/sched/act_vlan.c:151:21: warning: restricted __be16 degrades to integer
 net/sched/act_vlan.c:208:26: warning: incorrect type in assignment (different base types)
 net/sched/act_vlan.c:208:26: expected unsigned short [unsigned] [usertype] tcfv_push_vid
 net/sched/act_vlan.c:208:26: got restricted __be16 [usertype] push_vid

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 4595391c2129..41a66effeb5f 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -117,7 +117,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	struct tc_vlan *parm;
 	struct tcf_vlan *v;
 	int action;
-	__be16 push_vid = 0;
+	u16 push_vid = 0;
 	__be16 push_proto = 0;
 	u8 push_prio = 0;
 	bool exists = false;

From a28a47f1203a7708ab001b3cb7962d391cc14ce8 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:36:56 -0700
Subject: [PATCH 1161/1678] liquidio: Moved common function txqs_stop to
 octeon_network.h

Moving common function txqs_stop to octeon_network.h

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c  | 16 ----------------
 .../net/ethernet/cavium/liquidio/lio_vf_main.c   | 16 ----------------
 .../ethernet/cavium/liquidio/octeon_network.h    | 16 ++++++++++++++++
 3 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 21280cb66550..205a298db010 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -513,22 +513,6 @@ static void liquidio_deinit_pci(void)
 	pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief Stop Tx queues
- * @param netdev network device
- */
-static inline void txqs_stop(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-	} else {
-		netif_stop_queue(netdev);
-	}
-}
-
 /**
  * \brief Start Tx queues
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 3342d64b7081..2e3144661464 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -284,22 +284,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 	.err_handler	= &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * \brief Stop Tx queues
- * @param netdev network device
- */
-static void txqs_stop(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-	} else {
-		netif_stop_queue(netdev);
-	}
-}
-
 /**
  * \brief Start Tx queues
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 76803a569794..d8079e35ef6e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -506,4 +506,20 @@ static inline int wait_for_pending_requests(struct octeon_device *oct)
 	return 0;
 }
 
+/**
+ * \brief Stop Tx queues
+ * @param netdev network device
+ */
+static inline void txqs_stop(struct net_device *netdev)
+{
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_stop_subqueue(netdev, i);
+	} else {
+		netif_stop_queue(netdev);
+	}
+}
+
 #endif

From 95fbba1846d751cccd49c0a896230b3093254b71 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:36:58 -0700
Subject: [PATCH 1162/1678] liquidio: Moved common function txqs_wake to
 octeon_network.h

Moving common function txqs_wake to octeon_network.h

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_main.c   | 28 -------------------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 27 ------------------
 .../ethernet/cavium/liquidio/octeon_network.h | 27 ++++++++++++++++++
 3 files changed, 27 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 205a298db010..8a809b4f6626 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -529,34 +529,6 @@ static inline void txqs_start(struct net_device *netdev)
 	}
 }
 
-/**
- * \brief Wake Tx queues
- * @param netdev network device
- */
-static inline void txqs_wake(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++) {
-			int qno = lio->linfo.txpciq[i %
-				lio->oct_dev->num_iqs].s.q_no;
-
-			if (__netif_subqueue_stopped(netdev, i)) {
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-							  tx_restart, 1);
-				netif_wake_subqueue(netdev, i);
-			}
-		}
-	} else {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		netif_wake_queue(netdev);
-	}
-}
-
 /**
  * \brief Stop Tx queue
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 2e3144661464..60743c37b4e4 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -300,33 +300,6 @@ static void txqs_start(struct net_device *netdev)
 	}
 }
 
-/**
- * \brief Wake Tx queues
- * @param netdev network device
- */
-static void txqs_wake(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++) {
-			int qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs]
-				      .s.q_no;
-			if (__netif_subqueue_stopped(netdev, i)) {
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-							  tx_restart, 1);
-				netif_wake_subqueue(netdev, i);
-			}
-		}
-	} else {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		netif_wake_queue(netdev);
-	}
-}
-
 /**
  * \brief Start Tx queue
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index d8079e35ef6e..2d2f49b7f563 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -522,4 +522,31 @@ static inline void txqs_stop(struct net_device *netdev)
 	}
 }
 
+/**
+ * \brief Wake Tx queues
+ * @param netdev network device
+ */
+static inline void txqs_wake(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++) {
+			int qno = lio->linfo.txpciq[i %
+				lio->oct_dev->num_iqs].s.q_no;
+
+			if (__netif_subqueue_stopped(netdev, i)) {
+				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+							  tx_restart, 1);
+				netif_wake_subqueue(netdev, i);
+			}
+		}
+	} else {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+					  tx_restart, 1);
+		netif_wake_queue(netdev);
+	}
+}
 #endif

From 5f8baa7a8e7e3f6db640187ef3fa2799107cd1c9 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:07 -0700
Subject: [PATCH 1163/1678] liquidio: Moved common function txqs_start to
 octeon_network.h

Moving common function txqs_start to octeon_network.h

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 16 ----------------
 .../net/ethernet/cavium/liquidio/lio_vf_main.c  | 16 ----------------
 .../ethernet/cavium/liquidio/octeon_network.h   | 17 +++++++++++++++++
 3 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 8a809b4f6626..a0c4c0682ece 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -513,22 +513,6 @@ static void liquidio_deinit_pci(void)
 	pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief Start Tx queues
- * @param netdev network device
- */
-static inline void txqs_start(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-	} else {
-		netif_start_queue(netdev);
-	}
-}
-
 /**
  * \brief Stop Tx queue
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 60743c37b4e4..82d70e4752a5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -284,22 +284,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 	.err_handler	= &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * \brief Start Tx queues
- * @param netdev network device
- */
-static void txqs_start(struct net_device *netdev)
-{
-	if (netif_is_multiqueue(netdev)) {
-		int i;
-
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-	} else {
-		netif_start_queue(netdev);
-	}
-}
-
 /**
  * \brief Start Tx queue
  * @param netdev network device
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 2d2f49b7f563..0a14d772f4b2 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -549,4 +549,21 @@ static inline void txqs_wake(struct net_device *netdev)
 		netif_wake_queue(netdev);
 	}
 }
+
+/**
+ * \brief Start Tx queues
+ * @param netdev network device
+ */
+static inline void txqs_start(struct net_device *netdev)
+{
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
+	} else {
+		netif_start_queue(netdev);
+	}
+}
+
 #endif

From 5da052a6bd92afb76b226da4afeb1afb66edf080 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:17 -0700
Subject: [PATCH 1164/1678] liquidio: Moved common function skb_iq to to
 octeon_network.h

Moving common function skb_iq to to octeon_network.h

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c       | 10 ----------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c    | 10 ----------
 drivers/net/ethernet/cavium/liquidio/octeon_network.h | 10 ++++++++++
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index a0c4c0682ece..f9e2a0615174 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1692,16 +1692,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
-static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
-{
-	int q = 0;
-
-	if (netif_is_multiqueue(lio->netdev))
-		q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-	return q;
-}
-
 /**
  * \brief Check Tx queue state for a given network buffer
  * @param lio per-network private data
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 82d70e4752a5..5f0114d9c881 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -993,16 +993,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
-static int skb_iq(struct lio *lio, struct sk_buff *skb)
-{
-	int q = 0;
-
-	if (netif_is_multiqueue(lio->netdev))
-		q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-	return q;
-}
-
 /**
  * \brief Check Tx queue state for a given network buffer
  * @param lio per-network private data
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 0a14d772f4b2..cd334a74a275 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -566,4 +566,14 @@ static inline void txqs_start(struct net_device *netdev)
 	}
 }
 
+static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+	int q = 0;
+
+	if (netif_is_multiqueue(lio->netdev))
+		q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+	return q;
+}
+
 #endif

From c5662c8c6dffd97b347e52d79025e38a88e8a077 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:20 -0700
Subject: [PATCH 1165/1678] liquidio: Removed one line function stop_txq

Removing one line function stop_txq

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index f9e2a0615174..c0df7be6e0c1 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -513,15 +513,6 @@ static void liquidio_deinit_pci(void)
 	pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief Stop Tx queue
- * @param netdev network device
- */
-static void stop_txq(struct net_device *netdev)
-{
-	txqs_stop(netdev);
-}
-
 /**
  * \brief Start Tx queue
  * @param netdev network device
@@ -844,7 +835,7 @@ static inline void update_link_status(struct net_device *netdev,
 		} else {
 			dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
 			netif_carrier_off(netdev);
-			stop_txq(netdev);
+			txqs_stop(netdev);
 		}
 		if (lio->linfo.link.s.mtu != current_max_mtu) {
 			netif_info(lio, probe, lio->netdev, "Max MTU changed from %d to %d\n",

From a8c4a79264b690465b859495cfb42d41d0283264 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:25 -0700
Subject: [PATCH 1166/1678] liquidio: Removed start_txq function

Removing start_txq function from VF and PF files

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c  | 16 +---------------
 .../net/ethernet/cavium/liquidio/lio_vf_main.c   | 16 +---------------
 .../ethernet/cavium/liquidio/octeon_network.h    | 16 ++++++++++------
 3 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index c0df7be6e0c1..424795a3a6d1 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -513,20 +513,6 @@ static void liquidio_deinit_pci(void)
 	pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief Start Tx queue
- * @param netdev network device
- */
-static void start_txq(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (lio->linfo.link.s.link_up) {
-		txqs_start(netdev);
-		return;
-	}
-}
-
 /**
  * \brief Wake a queue
  * @param netdev network device
@@ -2145,7 +2131,7 @@ static int liquidio_open(struct net_device *netdev)
 			return -1;
 	}
 
-	start_txq(netdev);
+	txqs_start(netdev);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 5f0114d9c881..57b6ee578210 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -284,20 +284,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 	.err_handler	= &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * \brief Start Tx queue
- * @param netdev network device
- */
-static void start_txq(struct net_device *netdev)
-{
-	struct lio *lio = GET_LIO(netdev);
-
-	if (lio->linfo.link.s.link_up) {
-		txqs_start(netdev);
-		return;
-	}
-}
-
 /**
  * \brief Wake a queue
  * @param netdev network device
@@ -1189,7 +1175,7 @@ static int liquidio_open(struct net_device *netdev)
 	lio->intf_open = 1;
 
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-	start_txq(netdev);
+	txqs_start(netdev);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index cd334a74a275..72a581aaa320 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -556,13 +556,17 @@ static inline void txqs_wake(struct net_device *netdev)
  */
 static inline void txqs_start(struct net_device *netdev)
 {
-	if (netif_is_multiqueue(netdev)) {
-		int i;
+	struct lio *lio = GET_LIO(netdev);
 
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_start_subqueue(netdev, i);
-	} else {
-		netif_start_queue(netdev);
+	if (lio->linfo.link.s.link_up) {
+		if (netif_is_multiqueue(netdev)) {
+			int i;
+
+			for (i = 0; i < netdev->num_tx_queues; i++)
+				netif_start_subqueue(netdev, i);
+		} else {
+			netif_start_queue(netdev);
+		}
 	}
 }
 

From 2a2fabaf35fca27b35eaf16331b2c1a1377c0516 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:28 -0700
Subject: [PATCH 1167/1678] liquidio: Removed netif_is_multiqueue check

Removing checks for netif_is_multiqueue.
Configuring single queue will be a multiqueue netdev with one queues.

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_core.c   | 18 +---
 .../net/ethernet/cavium/liquidio/lio_main.c   | 93 ++++++-------------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 55 +++--------
 .../ethernet/cavium/liquidio/octeon_network.h | 50 +++-------
 4 files changed, 64 insertions(+), 152 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 666cf7e9cd09..73e70e076e61 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -377,20 +377,12 @@ static void lio_update_txq_status(struct octeon_device *oct, int iq_num)
 		return;
 
 	lio = GET_LIO(netdev);
-	if (netif_is_multiqueue(netdev)) {
-		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
-		    lio->linfo.link.s.link_up &&
-		    (!octnet_iq_is_full(oct, iq_num))) {
-			netif_wake_subqueue(netdev, iq->q_index);
-			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
-						  tx_restart, 1);
-		}
-	} else if (netif_queue_stopped(netdev) &&
-		   lio->linfo.link.s.link_up &&
-		   (!octnet_iq_is_full(oct, lio->txq))) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+	if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+	    lio->linfo.link.s.link_up &&
+	    (!octnet_iq_is_full(oct, iq_num))) {
+		netif_wake_subqueue(netdev, iq->q_index);
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
 					  tx_restart, 1);
-		netif_wake_queue(netdev);
 	}
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 424795a3a6d1..28575ed4652e 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -520,10 +520,7 @@ static void liquidio_deinit_pci(void)
  */
 static inline void wake_q(struct net_device *netdev, int q)
 {
-	if (netif_is_multiqueue(netdev))
-		netif_wake_subqueue(netdev, q);
-	else
-		netif_wake_queue(netdev);
+	netif_wake_subqueue(netdev, q);
 }
 
 /**
@@ -533,10 +530,7 @@ static inline void wake_q(struct net_device *netdev, int q)
  */
 static inline void stop_q(struct net_device *netdev, int q)
 {
-	if (netif_is_multiqueue(netdev))
-		netif_stop_subqueue(netdev, q);
-	else
-		netif_stop_queue(netdev);
+	netif_stop_subqueue(netdev, q);
 }
 
 /**
@@ -546,33 +540,24 @@ static inline void stop_q(struct net_device *netdev, int q)
  */
 static inline int check_txq_status(struct lio *lio)
 {
+	int numqs = lio->netdev->num_tx_queues;
 	int ret_val = 0;
+	int q, iq;
 
-	if (netif_is_multiqueue(lio->netdev)) {
-		int numqs = lio->netdev->num_tx_queues;
-		int q, iq = 0;
-
-		/* check each sub-queue state */
-		for (q = 0; q < numqs; q++) {
-			iq = lio->linfo.txpciq[q %
-				lio->oct_dev->num_iqs].s.q_no;
-			if (octnet_iq_is_full(lio->oct_dev, iq))
-				continue;
-			if (__netif_subqueue_stopped(lio->netdev, q)) {
-				wake_q(lio->netdev, q);
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
-							  tx_restart, 1);
-				ret_val++;
-			}
+	/* check each sub-queue state */
+	for (q = 0; q < numqs; q++) {
+		iq = lio->linfo.txpciq[q %
+			lio->oct_dev->num_iqs].s.q_no;
+		if (octnet_iq_is_full(lio->oct_dev, iq))
+			continue;
+		if (__netif_subqueue_stopped(lio->netdev, q)) {
+			wake_q(lio->netdev, q);
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
+						  tx_restart, 1);
+			ret_val++;
 		}
-	} else {
-		if (octnet_iq_is_full(lio->oct_dev, lio->txq))
-			return 0;
-		wake_q(lio->netdev, lio->txq);
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		ret_val = 1;
 	}
+
 	return ret_val;
 }
 
@@ -1676,15 +1661,10 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
  */
 static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
 {
-	int q = 0, iq = 0;
+	int q, iq;
 
-	if (netif_is_multiqueue(lio->netdev)) {
-		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
-	} else {
-		iq = lio->txq;
-		q = iq;
-	}
+	q = skb->queue_mapping;
+	iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
 
 	if (octnet_iq_is_full(lio->oct_dev, iq))
 		return 0;
@@ -2573,14 +2553,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	if (netif_is_multiqueue(netdev)) {
-		q_idx = skb->queue_mapping;
-		q_idx = (q_idx % (lio->linfo.num_txpciq));
-		tag = q_idx;
-		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
-	} else {
-		iq_no = lio->txq;
-	}
+	q_idx = skb->queue_mapping;
+	q_idx = (q_idx % (lio->linfo.num_txpciq));
+	tag = q_idx;
+	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
 	stats = &oct->instr_queue[iq_no]->stats;
 
@@ -2611,23 +2587,14 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	ndata.q_no = iq_no;
 
-	if (netif_is_multiqueue(netdev)) {
-		if (octnet_iq_is_full(oct, ndata.q_no)) {
-			/* defer sending if queue is full */
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
-			stats->tx_iq_busy++;
-			return NETDEV_TX_BUSY;
-		}
-	} else {
-		if (octnet_iq_is_full(oct, lio->txq)) {
-			/* defer sending if queue is full */
-			stats->tx_iq_busy++;
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   lio->txq);
-			return NETDEV_TX_BUSY;
-		}
+	if (octnet_iq_is_full(oct, ndata.q_no)) {
+		/* defer sending if queue is full */
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+			   ndata.q_no);
+		stats->tx_iq_busy++;
+		return NETDEV_TX_BUSY;
 	}
+
 	/* pr_info(" XMIT - valid Qs: %d, 1st Q no: %d, cpu:  %d, q_no:%d\n",
 	 *	lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no);
 	 */
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 57b6ee578210..f46289de1322 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -291,10 +291,7 @@ static struct pci_driver liquidio_vf_pci_driver = {
  */
 static void wake_q(struct net_device *netdev, int q)
 {
-	if (netif_is_multiqueue(netdev))
-		netif_wake_subqueue(netdev, q);
-	else
-		netif_wake_queue(netdev);
+	netif_wake_subqueue(netdev, q);
 }
 
 /**
@@ -304,10 +301,7 @@ static void wake_q(struct net_device *netdev, int q)
  */
 static void stop_q(struct net_device *netdev, int q)
 {
-	if (netif_is_multiqueue(netdev))
-		netif_stop_subqueue(netdev, q);
-	else
-		netif_stop_queue(netdev);
+	netif_stop_subqueue(netdev, q);
 }
 
 /**
@@ -986,15 +980,10 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
  */
 static int check_txq_state(struct lio *lio, struct sk_buff *skb)
 {
-	int q = 0, iq = 0;
+	int q, iq;
 
-	if (netif_is_multiqueue(lio->netdev)) {
-		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
-	} else {
-		iq = lio->txq;
-		q = iq;
-	}
+	q = skb->queue_mapping;
+	iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
 
 	if (octnet_iq_is_full(lio->oct_dev, iq))
 		return 0;
@@ -1635,14 +1624,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	if (netif_is_multiqueue(netdev)) {
-		q_idx = skb->queue_mapping;
-		q_idx = (q_idx % (lio->linfo.num_txpciq));
-		tag = q_idx;
-		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
-	} else {
-		iq_no = lio->txq;
-	}
+	q_idx = skb->queue_mapping;
+	q_idx = (q_idx % (lio->linfo.num_txpciq));
+	tag = q_idx;
+	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
 	stats = &oct->instr_queue[iq_no]->stats;
 
@@ -1671,22 +1656,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	ndata.q_no = iq_no;
 
-	if (netif_is_multiqueue(netdev)) {
-		if (octnet_iq_is_full(oct, ndata.q_no)) {
-			/* defer sending if queue is full */
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
-			stats->tx_iq_busy++;
-			return NETDEV_TX_BUSY;
-		}
-	} else {
-		if (octnet_iq_is_full(oct, lio->txq)) {
-			/* defer sending if queue is full */
-			stats->tx_iq_busy++;
-			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
-				   ndata.q_no);
-			return NETDEV_TX_BUSY;
-		}
+	if (octnet_iq_is_full(oct, ndata.q_no)) {
+		/* defer sending if queue is full */
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+			   ndata.q_no);
+		stats->tx_iq_busy++;
+		return NETDEV_TX_BUSY;
 	}
 
 	ndata.datasize = skb->len;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 72a581aaa320..7922a698f8b3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -512,14 +512,10 @@ static inline int wait_for_pending_requests(struct octeon_device *oct)
  */
 static inline void txqs_stop(struct net_device *netdev)
 {
-	if (netif_is_multiqueue(netdev)) {
-		int i;
+	int i;
 
-		for (i = 0; i < netdev->num_tx_queues; i++)
-			netif_stop_subqueue(netdev, i);
-	} else {
-		netif_stop_queue(netdev);
-	}
+	for (i = 0; i < netdev->num_tx_queues; i++)
+		netif_stop_subqueue(netdev, i);
 }
 
 /**
@@ -529,24 +525,16 @@ static inline void txqs_stop(struct net_device *netdev)
 static inline void txqs_wake(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
+	int i, qno;
 
-	if (netif_is_multiqueue(netdev)) {
-		int i;
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs].s.q_no;
 
-		for (i = 0; i < netdev->num_tx_queues; i++) {
-			int qno = lio->linfo.txpciq[i %
-				lio->oct_dev->num_iqs].s.q_no;
-
-			if (__netif_subqueue_stopped(netdev, i)) {
-				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
-							  tx_restart, 1);
-				netif_wake_subqueue(netdev, i);
-			}
+		if (__netif_subqueue_stopped(netdev, i)) {
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+						  tx_restart, 1);
+			netif_wake_subqueue(netdev, i);
 		}
-	} else {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
-					  tx_restart, 1);
-		netif_wake_queue(netdev);
 	}
 }
 
@@ -557,27 +545,17 @@ static inline void txqs_wake(struct net_device *netdev)
 static inline void txqs_start(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
+	int i;
 
 	if (lio->linfo.link.s.link_up) {
-		if (netif_is_multiqueue(netdev)) {
-			int i;
-
-			for (i = 0; i < netdev->num_tx_queues; i++)
-				netif_start_subqueue(netdev, i);
-		} else {
-			netif_start_queue(netdev);
-		}
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
 	}
 }
 
 static inline int skb_iq(struct lio *lio, struct sk_buff *skb)
 {
-	int q = 0;
-
-	if (netif_is_multiqueue(lio->netdev))
-		q = skb->queue_mapping % lio->linfo.num_txpciq;
-
-	return q;
+	return skb->queue_mapping % lio->linfo.num_txpciq;
 }
 
 #endif

From 78a202f00d0970544ee96803b5b1c1fe65f8a433 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:30 -0700
Subject: [PATCH 1168/1678] liquidio: Removed one line function stop_q

Removing one line function stop_q

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 12 +-----------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 12 +-----------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 28575ed4652e..69a396d47eb8 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -523,16 +523,6 @@ static inline void wake_q(struct net_device *netdev, int q)
 	netif_wake_subqueue(netdev, q);
 }
 
-/**
- * \brief Stop a queue
- * @param netdev network device
- * @param q which queue to stop
- */
-static inline void stop_q(struct net_device *netdev, int q)
-{
-	netif_stop_subqueue(netdev, q);
-}
-
 /**
  * \brief Check Tx queue status, and take appropriate action
  * @param lio per-network private data
@@ -2750,7 +2740,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
 
 	if (status == IQ_SEND_STOP)
-		stop_q(netdev, q_idx);
+		netif_stop_subqueue(netdev, q_idx);
 
 	netif_trans_update(netdev);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index f46289de1322..3120aed30c61 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -294,16 +294,6 @@ static void wake_q(struct net_device *netdev, int q)
 	netif_wake_subqueue(netdev, q);
 }
 
-/**
- * \brief Stop a queue
- * @param netdev network device
- * @param q which queue to stop
- */
-static void stop_q(struct net_device *netdev, int q)
-{
-	netif_stop_subqueue(netdev, q);
-}
-
 /**
  * Remove the node at the head of the list. The list would be empty at
  * the end of this call if there are no more nodes in the list.
@@ -1803,7 +1793,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if (status == IQ_SEND_STOP) {
 		dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
 			iq_no);
-		stop_q(netdev, q_idx);
+		netif_stop_subqueue(netdev, q_idx);
 	}
 
 	netif_trans_update(netdev);

From dd69debce4a13df39aad6d218a743c4d9a0cf6f3 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:33 -0700
Subject: [PATCH 1169/1678] liquidio: Removed one line function wake_q

Removing one line function wake_q

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 14 ++------------
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 12 +-----------
 2 files changed, 3 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 69a396d47eb8..15129d0d45e9 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -513,16 +513,6 @@ static void liquidio_deinit_pci(void)
 	pci_unregister_driver(&liquidio_pci_driver);
 }
 
-/**
- * \brief Wake a queue
- * @param netdev network device
- * @param q which queue to wake
- */
-static inline void wake_q(struct net_device *netdev, int q)
-{
-	netif_wake_subqueue(netdev, q);
-}
-
 /**
  * \brief Check Tx queue status, and take appropriate action
  * @param lio per-network private data
@@ -541,7 +531,7 @@ static inline int check_txq_status(struct lio *lio)
 		if (octnet_iq_is_full(lio->oct_dev, iq))
 			continue;
 		if (__netif_subqueue_stopped(lio->netdev, q)) {
-			wake_q(lio->netdev, q);
+			netif_wake_subqueue(lio->netdev, q);
 			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq,
 						  tx_restart, 1);
 			ret_val++;
@@ -1661,7 +1651,7 @@ static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
 
 	if (__netif_subqueue_stopped(lio->netdev, q)) {
 		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-		wake_q(lio->netdev, q);
+		netif_wake_subqueue(lio->netdev, q);
 	}
 	return 1;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 3120aed30c61..5ab0831d6b53 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -284,16 +284,6 @@ static struct pci_driver liquidio_vf_pci_driver = {
 	.err_handler	= &liquidio_vf_err_handler,    /* For AER */
 };
 
-/**
- * \brief Wake a queue
- * @param netdev network device
- * @param q which queue to wake
- */
-static void wake_q(struct net_device *netdev, int q)
-{
-	netif_wake_subqueue(netdev, q);
-}
-
 /**
  * Remove the node at the head of the list. The list would be empty at
  * the end of this call if there are no more nodes in the list.
@@ -980,7 +970,7 @@ static int check_txq_state(struct lio *lio, struct sk_buff *skb)
 
 	if (__netif_subqueue_stopped(lio->netdev, q)) {
 		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-		wake_q(lio->netdev, q);
+		netif_wake_subqueue(lio->netdev, q);
 	}
 
 	return 1;

From fc756d0f51c855719ec6a26f6b4d8bdf24ee2e94 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:36 -0700
Subject: [PATCH 1170/1678] liquidio: Function call skb_iq for deriving queue
 from skb

Using skb_iq function for deriving queue from skb

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c    | 3 +--
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 15129d0d45e9..75131dcac221 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2533,8 +2533,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	q_idx = skb->queue_mapping;
-	q_idx = (q_idx % (lio->linfo.num_txpciq));
+	q_idx = skb_iq(lio, skb);
 	tag = q_idx;
 	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 5ab0831d6b53..478c20a7e5cf 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1604,8 +1604,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 	lio = GET_LIO(netdev);
 	oct = lio->oct_dev;
 
-	q_idx = skb->queue_mapping;
-	q_idx = (q_idx % (lio->linfo.num_txpciq));
+	q_idx = skb_iq(lio, skb);
 	tag = q_idx;
 	iq_no = lio->linfo.txpciq[q_idx].s.q_no;
 

From a96d8ad31dd92735b88cfcc5db666d3c57a17b52 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:39 -0700
Subject: [PATCH 1171/1678] liquidio: Renamed txqs_wake to wake_txqs

For consistency renaming txqs_wake to wake_txqs

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c       | 4 ++--
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c    | 4 ++--
 drivers/net/ethernet/cavium/liquidio/octeon_network.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 75131dcac221..d0359f5e9154 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -782,7 +782,7 @@ static inline void update_link_status(struct net_device *netdev,
 		if (lio->linfo.link.s.link_up) {
 			dev_dbg(&oct->pci_dev->dev, "%s: link_up", __func__);
 			netif_carrier_on(netdev);
-			txqs_wake(netdev);
+			wake_txqs(netdev);
 		} else {
 			dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
 			netif_carrier_off(netdev);
@@ -2768,7 +2768,7 @@ static void liquidio_tx_timeout(struct net_device *netdev)
 		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
 		   netdev->stats.tx_dropped);
 	netif_trans_update(netdev);
-	txqs_wake(netdev);
+	wake_txqs(netdev);
 }
 
 static int liquidio_vlan_rx_add_vid(struct net_device *netdev,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 478c20a7e5cf..288096b39f46 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -515,7 +515,7 @@ static void update_link_status(struct net_device *netdev,
 
 		if (lio->linfo.link.s.link_up) {
 			netif_carrier_on(netdev);
-			txqs_wake(netdev);
+			wake_txqs(netdev);
 		} else {
 			netif_carrier_off(netdev);
 			txqs_stop(netdev);
@@ -1822,7 +1822,7 @@ static void liquidio_tx_timeout(struct net_device *netdev)
 		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
 		   netdev->stats.tx_dropped);
 	netif_trans_update(netdev);
-	txqs_wake(netdev);
+	wake_txqs(netdev);
 }
 
 static int
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 7922a698f8b3..3cbc65ae2acb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -522,7 +522,7 @@ static inline void txqs_stop(struct net_device *netdev)
  * \brief Wake Tx queues
  * @param netdev network device
  */
-static inline void txqs_wake(struct net_device *netdev)
+static inline void wake_txqs(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	int i, qno;

From 736b7ea555d05b3febf3ee56afd17834f2d20bc2 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:41 -0700
Subject: [PATCH 1172/1678] liquidio: Renamed txqs_stop to stop_txqs

For consistency renaming txqs_stop to stop_txqs

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c       | 2 +-
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c    | 4 ++--
 drivers/net/ethernet/cavium/liquidio/octeon_network.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index d0359f5e9154..a474a46f1e4a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -786,7 +786,7 @@ static inline void update_link_status(struct net_device *netdev,
 		} else {
 			dev_dbg(&oct->pci_dev->dev, "%s: link_off", __func__);
 			netif_carrier_off(netdev);
-			txqs_stop(netdev);
+			stop_txqs(netdev);
 		}
 		if (lio->linfo.link.s.mtu != current_max_mtu) {
 			netif_info(lio, probe, lio->netdev, "Max MTU changed from %d to %d\n",
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 288096b39f46..4d7a0ae418c6 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -518,7 +518,7 @@ static void update_link_status(struct net_device *netdev,
 			wake_txqs(netdev);
 		} else {
 			netif_carrier_off(netdev);
-			txqs_stop(netdev);
+			stop_txqs(netdev);
 		}
 
 		if (lio->linfo.link.s.mtu != current_max_mtu) {
@@ -1186,7 +1186,7 @@ static int liquidio_stop(struct net_device *netdev)
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
-	txqs_stop(netdev);
+	stop_txqs(netdev);
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 3cbc65ae2acb..1b4c85ae98c3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -510,7 +510,7 @@ static inline int wait_for_pending_requests(struct octeon_device *oct)
  * \brief Stop Tx queues
  * @param netdev network device
  */
-static inline void txqs_stop(struct net_device *netdev)
+static inline void stop_txqs(struct net_device *netdev)
 {
 	int i;
 

From c9614a166338664153cbf1de9f329a03a5748466 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Fri, 23 Mar 2018 17:37:44 -0700
Subject: [PATCH 1173/1678] liquidio: Renamed txqs_start to start_txqs

For consistency renaming txqs_start to start_txqs

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c       | 2 +-
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c    | 2 +-
 drivers/net/ethernet/cavium/liquidio/octeon_network.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index a474a46f1e4a..58b5c75fd2ee 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2091,7 +2091,7 @@ static int liquidio_open(struct net_device *netdev)
 			return -1;
 	}
 
-	txqs_start(netdev);
+	start_txqs(netdev);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 4d7a0ae418c6..d5f5c9a693ee 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1144,7 +1144,7 @@ static int liquidio_open(struct net_device *netdev)
 	lio->intf_open = 1;
 
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
-	txqs_start(netdev);
+	start_txqs(netdev);
 
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 1b4c85ae98c3..8782206271b6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -542,7 +542,7 @@ static inline void wake_txqs(struct net_device *netdev)
  * \brief Start Tx queues
  * @param netdev network device
  */
-static inline void txqs_start(struct net_device *netdev)
+static inline void start_txqs(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	int i;

From 13acc94eff122b260a387d68668bf9d670738e6a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 21 Mar 2018 16:31:03 -0700
Subject: [PATCH 1174/1678] net: permit skb_segment on head_frag frag_list skb

One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.

3472 struct sk_buff *skb_segment(struct sk_buff *head_skb,
3473                             netdev_features_t features)
3474 {
3475         struct sk_buff *segs = NULL;
3476         struct sk_buff *tail = NULL;
...
3665                 while (pos < offset + len) {
3666                         if (i >= nfrags) {
3667                                 BUG_ON(skb_headlen(list_skb));
3668
3669                                 i = 0;
3670                                 nfrags = skb_shinfo(list_skb)->nr_frags;
3671                                 frag = skb_shinfo(list_skb)->frags;
3672                                 frag_skb = list_skb;
...

call stack:
...
 #1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525
 #2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc
 #3 [ffff883ffef03640] oops_end at ffffffff8101d7e7
 #4 [ffff883ffef03668] die at ffffffff8101deb2
 #5 [ffff883ffef03698] do_trap at ffffffff8101a700
 #6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe
 #7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0
 #8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab
    [exception RIP: skb_segment+3044]
    RIP: ffffffff817e4dd4  RSP: ffff883ffef03860  RFLAGS: 00010216
    RAX: 0000000000002bf6  RBX: ffff883feb7aaa00  RCX: 0000000000000011
    RDX: ffff883fb87910c0  RSI: 0000000000000011  RDI: ffff883feb7ab500
    RBP: ffff883ffef03928   R8: 0000000000002ce2   R9: 00000000000027da
    R10: 000001ea00000000  R11: 0000000000002d82  R12: ffff883f90a1ee80
    R13: ffff883fb8791120  R14: ffff883feb7abc00  R15: 0000000000002ce2
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 46cb22215ff4..b5c75d4fcf37 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 }
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
+static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
+{
+	skb_frag_t head_frag;
+	struct page *page;
+
+	page = virt_to_head_page(frag_skb->head);
+	head_frag.page.p = page;
+	head_frag.page_offset = frag_skb->data -
+		(unsigned char *)page_address(page);
+	head_frag.size = skb_headlen(frag_skb);
+	return head_frag;
+}
+
 /**
  *	skb_segment - Perform protocol segmentation on skb.
  *	@head_skb: buffer to segment
@@ -3664,15 +3677,19 @@ normal:
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
-				BUG_ON(skb_headlen(list_skb));
-
 				i = 0;
 				nfrags = skb_shinfo(list_skb)->nr_frags;
 				frag = skb_shinfo(list_skb)->frags;
 				frag_skb = list_skb;
+				if (!skb_headlen(list_skb)) {
+					BUG_ON(!nfrags);
+				} else {
+					BUG_ON(!list_skb->head_frag);
 
-				BUG_ON(!nfrags);
-
+					/* to make room for head_frag. */
+					i--;
+					frag--;
+				}
 				if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
 				    skb_zerocopy_clone(nskb, frag_skb,
 						       GFP_ATOMIC))
@@ -3689,7 +3706,7 @@ normal:
 				goto err;
 			}
 
-			*nskb_frag = *frag;
+			*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
 			__skb_frag_ref(nskb_frag);
 			size = skb_frag_size(nskb_frag);
 

From 76db8087c4c991dcd17f5ea8ac0eafd0696ab450 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 21 Mar 2018 16:31:04 -0700
Subject: [PATCH 1175/1678] net: bpf: add a test for skb_segment in test_bpf
 module

Without the previous commit,
"modprobe test_bpf" will have the following errors:
...
[   98.149165] ------------[ cut here ]------------
[   98.159362] kernel BUG at net/core/skbuff.c:3667!
[   98.169756] invalid opcode: 0000 [#1] SMP PTI
[   98.179370] Modules linked in:
[   98.179371]  test_bpf(+)
...
which triggers the bug the previous commit intends to fix.

The skbs are constructed to mimic what mlx5 may generate.
The packet size/header may not mimic real cases in production. But
the processing flow is similar.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 2 deletions(-)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3e9335493fe4..b2badf6b23cd 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	struct page *page[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		page[i] = alloc_page(GFP_KERNEL);
+		if (!page[i]) {
+			if (i == 0)
+				goto err_page0;
+			else
+				goto err_page1;
+		}
+
+		/* this will set skb[i]->head_frag */
+		skb[i] = dev_alloc_skb(headroom + data_size);
+		if (!skb[i]) {
+			if (i == 0)
+				goto err_skb0;
+			else
+				goto err_skb1;
+		}
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+
+err_skb1:
+	__free_page(page[1]);
+err_page1:
+	kfree_skb(skb[0]);
+err_skb0:
+	__free_page(page[0]);
+err_page0:
+	return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb, *segs;
+	int ret = -1;
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+		   NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb();
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+		goto done;
+	}
+
+	segs = skb_segment(skb, features);
+	if (segs) {
+		kfree_skb_list(segs);
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	kfree_skb(skb);
+done:
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
-	return ret;
+	if (ret)
+		return ret;
+
+	return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)

From 5c71dadbb45970a8f0544a27ae8f1cbd9750e516 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 22 Mar 2018 12:01:13 -0700
Subject: [PATCH 1176/1678] hv_netvsc: Fix the return status in RX path

As defined in hyperv_net.h, the NVSP_STAT_SUCCESS is one not zero.
Some functions returns 0 when it actually means NVSP_STAT_SUCCESS.
This patch fixes them.

In netvsc_receive(), it puts the last RNDIS packet's receive status
for all packets in a vmxferpage which may contain multiple RNDIS
packets.
This patch puts NVSP_STAT_FAIL in the receive completion if one of
the packets in a vmxferpage fails.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c       | 8 ++++++--
 drivers/net/hyperv/netvsc_drv.c   | 2 +-
 drivers/net/hyperv/rndis_filter.c | 4 ++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 4123d081b1c7..58bb2dcbc9f0 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1098,12 +1098,16 @@ static int netvsc_receive(struct net_device *ndev,
 		void *data = recv_buf
 			+ vmxferpage_packet->ranges[i].byte_offset;
 		u32 buflen = vmxferpage_packet->ranges[i].byte_count;
+		int ret;
 
 		trace_rndis_recv(ndev, q_idx, data);
 
 		/* Pass it to the upper layer */
-		status = rndis_filter_receive(ndev, net_device,
-					      channel, data, buflen);
+		ret = rndis_filter_receive(ndev, net_device,
+					   channel, data, buflen);
+
+		if (unlikely(ret != NVSP_STAT_SUCCESS))
+			status = NVSP_STAT_FAIL;
 	}
 
 	enq_receive_complete(ndev, net_device, q_idx,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f28c85d212ce..5d716750ae22 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -831,7 +831,7 @@ int netvsc_recv_callback(struct net_device *net,
 	u64_stats_update_end(&rx_stats->syncp);
 
 	napi_gro_receive(&nvchan->napi, skb);
-	return 0;
+	return NVSP_STAT_SUCCESS;
 }
 
 static void netvsc_get_drvinfo(struct net_device *net,
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 020f8bc54386..4a4952363e8a 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -437,10 +437,10 @@ int rndis_filter_receive(struct net_device *ndev,
 			"unhandled rndis message (type %u len %u)\n",
 			   rndis_msg->ndis_msg_type,
 			   rndis_msg->msg_len);
-		break;
+		return NVSP_STAT_FAIL;
 	}
 
-	return 0;
+	return NVSP_STAT_SUCCESS;
 }
 
 static int rndis_filter_query_device(struct rndis_device *dev,

From c5d24bdd29cc6373331967b5034da21c12805f72 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 22 Mar 2018 12:01:14 -0700
Subject: [PATCH 1177/1678] hv_netvsc: Add range checking for rx packet offset
 and length

This patch adds range checking for rx packet offset and length.
It may only happen if there is a host side bug.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h |  1 +
 drivers/net/hyperv/netvsc.c     | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 32861036c3fc..960f06141472 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -793,6 +793,7 @@ struct netvsc_device {
 
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
+	u32 recv_buf_size; /* allocated bytes */
 	u32 recv_buf_gpadl_handle;
 	u32 recv_section_cnt;
 	u32 recv_section_size;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 58bb2dcbc9f0..c9910c33e671 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -282,6 +282,8 @@ static int netvsc_init_buf(struct hv_device *device,
 		goto cleanup;
 	}
 
+	net_device->recv_buf_size = buf_size;
+
 	/*
 	 * Establish the gpadl handle for this buffer on this
 	 * channel.  Note: This call uses the vmbus connection rather
@@ -1095,11 +1097,22 @@ static int netvsc_receive(struct net_device *ndev,
 
 	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
 	for (i = 0; i < count; i++) {
-		void *data = recv_buf
-			+ vmxferpage_packet->ranges[i].byte_offset;
+		u32 offset = vmxferpage_packet->ranges[i].byte_offset;
 		u32 buflen = vmxferpage_packet->ranges[i].byte_count;
+		void *data;
 		int ret;
 
+		if (unlikely(offset + buflen > net_device->recv_buf_size)) {
+			status = NVSP_STAT_FAIL;
+			netif_err(net_device_ctx, rx_err, ndev,
+				  "Packet offset:%u + len:%u too big\n",
+				  offset, buflen);
+
+			continue;
+		}
+
+		data = recv_buf + offset;
+
 		trace_rndis_recv(ndev, q_idx, data);
 
 		/* Pass it to the upper layer */

From 808679e7fa8018321ccfb227628175b84d2a5ff9 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Sun, 25 Mar 2018 17:20:06 -0400
Subject: [PATCH 1178/1678] tc-testing: updated police, mirred, skbedit and
 skbmod with more tests

Added extra test cases for control actions (reclassify, pipe etc.),
cookies, max index value and police args sanity check.

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/mirred.json   | 192 ++++++++++++++++++
 .../tc-testing/tc-tests/actions/police.json   | 144 +++++++++++++
 .../tc-testing/tc-tests/actions/skbedit.json  | 168 +++++++++++++++
 .../tc-testing/tc-tests/actions/skbmod.json   |  24 +++
 4 files changed, 528 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 0fcccf18399b..443c9b3c8664 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -170,6 +170,198 @@
             "$TC actions flush action mirred"
         ]
     },
+    {
+        "id": "8917",
+        "name": "Add mirred mirror action with control pass",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 1",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "1054",
+        "name": "Add mirred mirror action with control pipe",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "9887",
+        "name": "Add mirred mirror action with control continue",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e4aa",
+        "name": "Add mirred mirror action with control reclassify",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 150",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "ece9",
+        "name": "Add mirred mirror action with control drop",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "0031",
+        "name": "Add mirred mirror action with control jump",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "407c",
+        "name": "Add mirred mirror action with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "8b69",
+        "name": "Add mirred mirror action with maximum index",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
     {
         "id": "a70e",
         "name": "Delete mirred mirror action",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index 0e602a3f9393..38d85a1d7492 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -264,6 +264,150 @@
             "$TC actions flush action police"
         ]
     },
+    {
+        "id": "ddd6",
+        "name": "Add police action with invalid rate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f61c",
+        "name": "Add police action with invalid burst value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "c26f",
+        "name": "Add police action with invalid peakrate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "db04",
+        "name": "Add police action with invalid mtu value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Kbit burst 1Kb mtu 2Pb",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f3c9",
+        "name": "Add police action with cookie",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "d190",
+        "name": "Add police action with maximum index",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*:  police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
     {
         "id": "336e",
         "name": "Delete police action",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index 99635ea4722e..37ecc2716fee 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -215,6 +215,174 @@
             "$TC actions flush action skbedit"
         ]
     },
+    {
+        "id": "464a",
+        "name": "Add skbedit action with control pipe",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 11",
+        "matchPattern": "action order [0-9]*:  skbedit ptype host pipe.*index 11 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "212f",
+        "name": "Add skbedit action with control reclassify",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 90",
+        "matchPattern": "action order [0-9]*:  skbedit mark 56789 reclassify.*index 90 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "0651",
+        "name": "Add skbedit action with control pass",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 pass.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "cc53",
+        "name": "Add skbedit action with control drop",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping 3 drop.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "ec16",
+        "name": "Add skbedit action with control jump",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 2",
+        "matchPattern": "action order [0-9]*:  skbedit priority :8 jump 9.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "db54",
+        "name": "Add skbedit action with control continue",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "1055",
+        "name": "Add skbedit action with cookie",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*:  skbedit priority :16 continue.*index 32 ref.*cookie deadbeef",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
     {
         "id": "5172",
         "name": "List skbedit actions",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index 90bba48c3f07..fe3326e939c1 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -263,6 +263,30 @@
             "$TC actions flush action skbmod"
         ]
     },
+    {
+        "id": "6046",
+        "name": "Add skbmod action with control reclassify and cookie",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
     {
         "id": "58cb",
         "name": "List skbmod actions",

From 422a9fd604a0c2206d40e3eb34f726172a147ccf Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 25 Mar 2018 23:43:14 +0200
Subject: [PATCH 1179/1678] net: dsa: mv88e6xxx: Use the DT IRQ trigger mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By calling request_threaded_irq() with the flag IRQF_TRIGGER_FALLING
we override the trigger mode provided in device tree. And the
interrupt is actually active low, which is what all the current device
tree descriptions use.

Suggested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index fd78378ad6b1..3ba77067a3dc 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -425,7 +425,7 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 
 	err = request_threaded_irq(chip->irq, NULL,
 				   mv88e6xxx_g1_irq_thread_fn,
-				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+				   IRQF_ONESHOT,
 				   dev_name(chip->dev), chip);
 	if (err)
 		mv88e6xxx_g1_irq_free_common(chip);

From 71f74ae48c7fd08ffa0c447dfcea6c7fd3dfefc6 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 25 Mar 2018 23:43:15 +0200
Subject: [PATCH 1180/1678] net: dsa: mv88e6xxx: Call the common IRQ free code

When free'ing the polled IRQs, call the common irq free code.
Otherwise the interrupts are left registered, and when we come to load
the driver a second time, we get an Opps.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 3ba77067a3dc..9a5d786b4885 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -467,6 +467,8 @@ static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip)
 
 static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip)
 {
+	mv88e6xxx_g1_irq_free_common(chip);
+
 	kthread_cancel_delayed_work_sync(&chip->irq_poll_work);
 	kthread_destroy_worker(chip->kworker);
 }

From b6e0e875421ef6debfbe05d3aa99ac788d886074 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 22 Mar 2018 18:19:32 -0700
Subject: [PATCH 1181/1678] net: systemport: Implement adaptive interrupt
 coalescing

Implement support for adaptive RX and TX interrupt coalescing using
net_dim. We have each of our TX ring and our single RX ring implement a
bcm_sysport_net_dim structure which holds an interrupt counter, number
of packets, bytes, and a container for a net_dim instance.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 141 ++++++++++++++++++---
 drivers/net/ethernet/broadcom/bcmsysport.h |  14 ++
 2 files changed, 140 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 3fc549b88c43..4e26f606a7f2 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/net_dim.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
@@ -574,21 +575,55 @@ static int bcm_sysport_set_wol(struct net_device *dev,
 	return 0;
 }
 
+static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
+{
+	u32 reg;
+
+	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
+	reg &= ~(RDMA_INTR_THRESH_MASK |
+		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
+	reg |= priv->dim.coal_pkts;
+	reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
+			    RDMA_TIMEOUT_SHIFT;
+	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+}
+
+static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring)
+{
+	struct bcm_sysport_priv *priv = ring->priv;
+	u32 reg;
+
+	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+	reg &= ~(RING_INTR_THRESH_MASK |
+		 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
+	reg |= ring->dim.coal_pkts;
+	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
+			    RING_TIMEOUT_SHIFT;
+	tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
+}
+
 static int bcm_sysport_get_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_tx_ring *ring;
+	unsigned int i;
 	u32 reg;
 
 	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
 
 	ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000;
 	ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		ring = &priv->tx_rings[i];
+		ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
+	}
 
 	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
 
 	ec->rx_coalesce_usecs = (reg >> RDMA_TIMEOUT_SHIFT) * 8192 / 1000;
 	ec->rx_max_coalesced_frames = reg & RDMA_INTR_THRESH_MASK;
+	ec->use_adaptive_rx_coalesce = priv->dim.use_dim;
 
 	return 0;
 }
@@ -597,8 +632,8 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_tx_ring *ring;
 	unsigned int i;
-	u32 reg;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
 	 * divided by 1024, which yield roughly 8.192 us, our maximum value has
@@ -615,22 +650,26 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 		return -EINVAL;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
-		reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(i));
-		reg &= ~(RING_INTR_THRESH_MASK |
-			 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
-		reg |= ec->tx_max_coalesced_frames;
-		reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
-			 RING_TIMEOUT_SHIFT;
-		tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(i));
+		ring = &priv->tx_rings[i];
+		ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
+		ring->dim.coal_usecs = ec->tx_coalesce_usecs;
+		if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
+			ring->dim.coal_pkts = 1;
+			ring->dim.coal_usecs = 0;
+		}
+		ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
+		bcm_sysport_set_tx_coalesce(ring);
 	}
 
-	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
-	reg &= ~(RDMA_INTR_THRESH_MASK |
-		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
-	reg |= ec->rx_max_coalesced_frames;
-	reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192) <<
-			    RDMA_TIMEOUT_SHIFT;
-	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
+	priv->dim.coal_usecs = ec->rx_coalesce_usecs;
+	priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
+
+	if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
+		priv->dim.coal_pkts = 1;
+		priv->dim.coal_usecs = 0;
+	}
+	priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
+	bcm_sysport_set_rx_coalesce(priv);
 
 	return 0;
 }
@@ -709,6 +748,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct net_device *ndev = priv->netdev;
 	unsigned int processed = 0, to_process;
+	unsigned int processed_bytes = 0;
 	struct bcm_sysport_cb *cb;
 	struct sk_buff *skb;
 	unsigned int p_index;
@@ -800,6 +840,7 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 		 */
 		skb_pull(skb, sizeof(*rsb) + 2);
 		len -= (sizeof(*rsb) + 2);
+		processed_bytes += len;
 
 		/* UniMAC may forward CRC */
 		if (priv->crc_fwd) {
@@ -824,6 +865,9 @@ next:
 			priv->rx_read_ptr = 0;
 	}
 
+	priv->dim.packets = processed;
+	priv->dim.bytes = processed_bytes;
+
 	return processed;
 }
 
@@ -896,6 +940,8 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 	ring->packets += pkts_compl;
 	ring->bytes += bytes_compl;
 	u64_stats_update_end(&priv->syncp);
+	ring->dim.packets = pkts_compl;
+	ring->dim.bytes = bytes_compl;
 
 	ring->c_index = c_index;
 
@@ -941,6 +987,7 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_tx_ring *ring =
 		container_of(napi, struct bcm_sysport_tx_ring, napi);
+	struct net_dim_sample dim_sample;
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
@@ -957,6 +1004,12 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 		return 0;
 	}
 
+	if (ring->dim.use_dim) {
+		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+			       ring->dim.bytes, &dim_sample);
+		net_dim(&ring->dim.dim, dim_sample);
+	}
+
 	return budget;
 }
 
@@ -972,6 +1025,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_priv *priv =
 		container_of(napi, struct bcm_sysport_priv, napi);
+	struct net_dim_sample dim_sample;
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_desc_rx(priv, budget);
@@ -994,6 +1048,12 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
 		intrl2_0_mask_clear(priv, INTRL2_0_RDMA_MBDONE);
 	}
 
+	if (priv->dim.use_dim) {
+		net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
+			       priv->dim.bytes, &dim_sample);
+		net_dim(&priv->dim.dim, dim_sample);
+	}
+
 	return work_done;
 }
 
@@ -1012,6 +1072,40 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
 	netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n");
 }
 
+static void bcm_sysport_dim_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct bcm_sysport_net_dim *ndim =
+			container_of(dim, struct bcm_sysport_net_dim, dim);
+	struct bcm_sysport_priv *priv =
+			container_of(ndim, struct bcm_sysport_priv, dim);
+	struct net_dim_cq_moder cur_profile =
+				net_dim_get_profile(dim->mode, dim->profile_ix);
+
+	priv->dim.coal_usecs = cur_profile.usec;
+	priv->dim.coal_pkts = cur_profile.pkts;
+
+	bcm_sysport_set_rx_coalesce(priv);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
+static void bcm_sysport_dim_tx_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct bcm_sysport_net_dim *ndim =
+			container_of(dim, struct bcm_sysport_net_dim, dim);
+	struct bcm_sysport_tx_ring *ring =
+			container_of(ndim, struct bcm_sysport_tx_ring, dim);
+	struct net_dim_cq_moder cur_profile =
+				net_dim_get_profile(dim->mode, dim->profile_ix);
+
+	ring->dim.coal_usecs = cur_profile.usec;
+	ring->dim.coal_pkts = cur_profile.pkts;
+
+	bcm_sysport_set_tx_coalesce(ring);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
 /* RX and misc interrupt routine */
 static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 {
@@ -1030,6 +1124,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 	}
 
 	if (priv->irq0_stat & INTRL2_0_RDMA_MBDONE) {
+		priv->dim.event_ctr++;
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			/* disable RX interrupts */
 			intrl2_0_mask_set(priv, INTRL2_0_RDMA_MBDONE);
@@ -1057,6 +1152,7 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 			continue;
 
 		txr = &priv->tx_rings[ring];
+		txr->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&txr->napi))) {
 			intrl2_0_mask_set(priv, ring_bit);
@@ -1089,6 +1185,7 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
 			continue;
 
 		txr = &priv->tx_rings[ring];
+		txr->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&txr->napi))) {
 			intrl2_1_mask_set(priv, BIT(ring));
@@ -1354,6 +1451,16 @@ out:
 		phy_print_status(phydev);
 }
 
+static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
+				 void (*cb)(struct work_struct *work))
+{
+	INIT_WORK(&dim->dim.work, cb);
+	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->event_ctr = 0;
+	dim->packets = 0;
+	dim->bytes = 0;
+}
+
 static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 				    unsigned int index)
 {
@@ -1444,6 +1551,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	reg |= (1 << index);
 	tdma_writel(priv, reg, TDMA_TIER1_ARB_0_QUEUE_EN);
 
+	bcm_sysport_init_dim(&ring->dim, bcm_sysport_dim_tx_work);
 	napi_enable(&ring->napi);
 
 	netif_dbg(priv, hw, priv->netdev,
@@ -1474,6 +1582,7 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
 		return;
 
 	napi_disable(&ring->napi);
+	cancel_work_sync(&ring->dim.dim.work);
 	netif_napi_del(&ring->napi);
 
 	bcm_sysport_tx_clean(priv, ring);
@@ -1763,6 +1872,7 @@ static void bcm_sysport_netif_start(struct net_device *dev)
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 
 	/* Enable NAPI */
+	bcm_sysport_init_dim(&priv->dim, bcm_sysport_dim_work);
 	napi_enable(&priv->napi);
 
 	/* Enable RX interrupt and TX ring full interrupt */
@@ -1948,6 +2058,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
 	/* stop all software from updating hardware */
 	netif_tx_stop_all_queues(dev);
 	napi_disable(&priv->napi);
+	cancel_work_sync(&priv->dim.dim.work);
 	phy_stop(dev->phydev);
 
 	/* mask all interrupts */
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 19c91c76e327..e1c97d4a82b4 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -12,6 +12,7 @@
 #define __BCM_SYSPORT_H
 
 #include <linux/if_vlan.h>
+#include <linux/net_dim.h>
 
 /* Receive/transmit descriptor format */
 #define DESC_ADDR_HI_STATUS_LEN	0x00
@@ -695,6 +696,16 @@ struct bcm_sysport_hw_params {
 	unsigned int	num_rx_desc_words;
 };
 
+struct bcm_sysport_net_dim {
+	u16			use_dim;
+	u16			event_ctr;
+	unsigned long		packets;
+	unsigned long		bytes;
+	u32			coal_usecs;
+	u32			coal_pkts;
+	struct net_dim		dim;
+};
+
 /* Software view of the TX ring */
 struct bcm_sysport_tx_ring {
 	spinlock_t	lock;		/* Ring lock for tx reclaim/xmit */
@@ -712,6 +723,7 @@ struct bcm_sysport_tx_ring {
 	struct bcm_sysport_priv *priv;	/* private context backpointer */
 	unsigned long	packets;	/* packets statistics */
 	unsigned long	bytes;		/* bytes statistics */
+	struct bcm_sysport_net_dim dim;	/* Net DIM context */
 	unsigned int	switch_queue;	/* switch port queue number */
 	unsigned int	switch_port;	/* switch port queue number */
 	bool		inspect;	/* inspect switch port and queue */
@@ -743,6 +755,8 @@ struct bcm_sysport_priv {
 	unsigned int		rx_read_ptr;
 	unsigned int		rx_c_index;
 
+	struct bcm_sysport_net_dim	dim;
+
 	/* PHY device */
 	struct device_node	*phy_dn;
 	phy_interface_t		phy_interface;

From 9f4ca05827a27ec73bf63a42732f1978b8a779af Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 22 Mar 2018 18:19:33 -0700
Subject: [PATCH 1182/1678] net: bcmgenet: Add support for adaptive RX
 coalescing

Unlike the moder modern SYSTEMPORT hardware, we do not have a
configurable TDMA timeout, which limits us to implement adaptive RX
interrupt coalescing only. We have each of our RX rings implement a
bcmgenet_net_dim structure which holds an interrupt counter, number of
packets, bytes, and a container for a net_dim instance.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 109 +++++++++++++++---
 .../net/ethernet/broadcom/genet/bcmgenet.h    |  12 ++
 2 files changed, 103 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index b1e35a9accf1..7db8edc643ec 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -603,6 +603,8 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
 				 struct ethtool_coalesce *ec)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rx_ring *ring;
+	unsigned int i;
 
 	ec->tx_max_coalesced_frames =
 		bcmgenet_tdma_ring_readl(priv, DESC_INDEX,
@@ -613,15 +615,37 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
 	ec->rx_coalesce_usecs =
 		bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT) * 8192 / 1000;
 
+	for (i = 0; i < priv->hw_params->rx_queues; i++) {
+		ring = &priv->rx_rings[i];
+		ec->use_adaptive_rx_coalesce |= ring->dim.use_dim;
+	}
+	ring = &priv->rx_rings[DESC_INDEX];
+	ec->use_adaptive_rx_coalesce |= ring->dim.use_dim;
+
 	return 0;
 }
 
+static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring)
+{
+	struct bcmgenet_priv *priv = ring->priv;
+	unsigned int i = ring->index;
+	u32 reg;
+
+	bcmgenet_rdma_ring_writel(priv, i, ring->dim.coal_pkts,
+				  DMA_MBUF_DONE_THRESH);
+
+	reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
+	reg &= ~DMA_TIMEOUT_MASK;
+	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192);
+	bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
+}
+
 static int bcmgenet_set_coalesce(struct net_device *dev,
 				 struct ethtool_coalesce *ec)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rx_ring *ring;
 	unsigned int i;
-	u32 reg;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
 	 * divided by 1024, which yields roughly 8.192us, our maximum value
@@ -641,7 +665,8 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
 	 * transmitted, or when the ring is empty.
 	 */
 	if (ec->tx_coalesce_usecs || ec->tx_coalesce_usecs_high ||
-	    ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low)
+	    ec->tx_coalesce_usecs_irq || ec->tx_coalesce_usecs_low ||
+	    ec->use_adaptive_tx_coalesce)
 		return -EOPNOTSUPP;
 
 	/* Program all TX queues with the same values, as there is no
@@ -656,24 +681,26 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
 				  DMA_MBUF_DONE_THRESH);
 
 	for (i = 0; i < priv->hw_params->rx_queues; i++) {
-		bcmgenet_rdma_ring_writel(priv, i,
-					  ec->rx_max_coalesced_frames,
-					  DMA_MBUF_DONE_THRESH);
-
-		reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
-		reg &= ~DMA_TIMEOUT_MASK;
-		reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192);
-		bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
+		ring = &priv->rx_rings[i];
+		ring->dim.coal_usecs = ec->rx_coalesce_usecs;
+		ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
+		if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
+			ring->dim.coal_pkts = 1;
+			ring->dim.coal_usecs = 0;
+		}
+		ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+		bcmgenet_set_rx_coalesce(ring);
 	}
 
-	bcmgenet_rdma_ring_writel(priv, DESC_INDEX,
-				  ec->rx_max_coalesced_frames,
-				  DMA_MBUF_DONE_THRESH);
-
-	reg = bcmgenet_rdma_readl(priv, DMA_RING16_TIMEOUT);
-	reg &= ~DMA_TIMEOUT_MASK;
-	reg |= DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000, 8192);
-	bcmgenet_rdma_writel(priv, reg, DMA_RING16_TIMEOUT);
+	ring = &priv->rx_rings[DESC_INDEX];
+	ring->dim.coal_usecs = ec->rx_coalesce_usecs;
+	ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
+	if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
+		ring->dim.coal_pkts = 1;
+		ring->dim.coal_usecs = 0;
+	}
+	ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+	bcmgenet_set_rx_coalesce(ring);
 
 	return 0;
 }
@@ -1713,6 +1740,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 	unsigned long dma_flag;
 	int len;
 	unsigned int rxpktprocessed = 0, rxpkttoprocess;
+	unsigned int bytes_processed = 0;
 	unsigned int p_index, mask;
 	unsigned int discards;
 	unsigned int chksum_ok = 0;
@@ -1832,6 +1860,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
 			len -= ETH_FCS_LEN;
 		}
 
+		bytes_processed += len;
+
 		/*Finish setting up the received SKB and send it to the kernel*/
 		skb->protocol = eth_type_trans(skb, priv->dev);
 		ring->packets++;
@@ -1854,6 +1884,9 @@ next:
 		bcmgenet_rdma_ring_writel(priv, ring->index, ring->c_index, RDMA_CONS_INDEX);
 	}
 
+	ring->dim.bytes = bytes_processed;
+	ring->dim.packets = rxpktprocessed;
+
 	return rxpktprocessed;
 }
 
@@ -1862,6 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcmgenet_rx_ring *ring = container_of(napi,
 			struct bcmgenet_rx_ring, napi);
+	struct net_dim_sample dim_sample;
 	unsigned int work_done;
 
 	work_done = bcmgenet_desc_rx(ring, budget);
@@ -1871,9 +1905,32 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
 		ring->int_enable(ring);
 	}
 
+	if (ring->dim.use_dim) {
+		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
+			       ring->dim.bytes, &dim_sample);
+		net_dim(&ring->dim.dim, dim_sample);
+	}
+
 	return work_done;
 }
 
+static void bcmgenet_dim_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct bcmgenet_net_dim *ndim =
+			container_of(dim, struct bcmgenet_net_dim, dim);
+	struct bcmgenet_rx_ring *ring =
+			container_of(ndim, struct bcmgenet_rx_ring, dim);
+	struct net_dim_cq_moder cur_profile =
+			net_dim_get_profile(dim->mode, dim->profile_ix);
+
+	ring->dim.coal_usecs = cur_profile.usec;
+	ring->dim.coal_pkts = cur_profile.pkts;
+
+	bcmgenet_set_rx_coalesce(ring);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
 /* Assign skb to RX DMA descriptor. */
 static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
 				     struct bcmgenet_rx_ring *ring)
@@ -2022,6 +2079,16 @@ static void init_umac(struct bcmgenet_priv *priv)
 	dev_dbg(kdev, "done init umac\n");
 }
 
+static void bcmgenet_init_dim(struct bcmgenet_net_dim *dim,
+			      void (*cb)(struct work_struct *work))
+{
+	INIT_WORK(&dim->dim.work, cb);
+	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	dim->event_ctr = 0;
+	dim->packets = 0;
+	dim->bytes = 0;
+}
+
 /* Initialize a Tx ring along with corresponding hardware registers */
 static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
 				  unsigned int index, unsigned int size,
@@ -2111,6 +2178,8 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
 	if (ret)
 		return ret;
 
+	bcmgenet_init_dim(&ring->dim, bcmgenet_dim_work);
+
 	/* Initialize Rx NAPI */
 	netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll,
 		       NAPI_POLL_WEIGHT);
@@ -2276,10 +2345,12 @@ static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv)
 	for (i = 0; i < priv->hw_params->rx_queues; ++i) {
 		ring = &priv->rx_rings[i];
 		napi_disable(&ring->napi);
+		cancel_work_sync(&ring->dim.dim.work);
 	}
 
 	ring = &priv->rx_rings[DESC_INDEX];
 	napi_disable(&ring->napi);
+	cancel_work_sync(&ring->dim.dim.work);
 }
 
 static void bcmgenet_fini_rx_napi(struct bcmgenet_priv *priv)
@@ -2557,6 +2628,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 			continue;
 
 		rx_ring = &priv->rx_rings[index];
+		rx_ring->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&rx_ring->napi))) {
 			rx_ring->int_disable(rx_ring);
@@ -2601,6 +2673,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 
 	if (status & UMAC_IRQ_RXDMA_DONE) {
 		rx_ring = &priv->rx_rings[DESC_INDEX];
+		rx_ring->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&rx_ring->napi))) {
 			rx_ring->int_disable(rx_ring);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 3c50431ccd2a..22c41e0430fb 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -16,6 +16,7 @@
 #include <linux/mii.h>
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
+#include <linux/net_dim.h>
 
 /* total number of Buffer Descriptors, same for Rx/Tx */
 #define TOTAL_DESC				256
@@ -572,6 +573,16 @@ struct bcmgenet_tx_ring {
 	struct bcmgenet_priv *priv;
 };
 
+struct bcmgenet_net_dim {
+	u16		use_dim;
+	u16		event_ctr;
+	unsigned long	packets;
+	unsigned long	bytes;
+	u32		coal_usecs;
+	u32		coal_pkts;
+	struct net_dim	dim;
+};
+
 struct bcmgenet_rx_ring {
 	struct napi_struct napi;	/* Rx NAPI struct */
 	unsigned long	bytes;
@@ -586,6 +597,7 @@ struct bcmgenet_rx_ring {
 	unsigned int	cb_ptr;		/* Rx ring initial CB ptr */
 	unsigned int	end_ptr;	/* Rx ring end CB ptr */
 	unsigned int	old_discards;
+	struct bcmgenet_net_dim dim;
 	void (*int_enable)(struct bcmgenet_rx_ring *);
 	void (*int_disable)(struct bcmgenet_rx_ring *);
 	struct bcmgenet_priv *priv;

From 843bd7db79c861b49e2912d723625f5fa8e94502 Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Fri, 23 Mar 2018 15:25:10 +0530
Subject: [PATCH 1183/1678] cxgb4: Setup FW queues before registering netdev

When NetworkManager is enabled, there are chances that interface up
is called even before probe completes. This means we have not yet
allocated the FW sge queues, hence rest of ingress queue allocation
wont be proper. Fix this by calling setup_fw_sge_queues() before
register_netdev().

Fixes: 0fbc81b3ad51 ('chcr/cxgb4i/cxgbit/RDMA/cxgb4: Allocate resources dynamically for all cxgb4 ULD's')
Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e880be8e3c45..30dfb9f24f73 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -841,8 +841,6 @@ static int setup_fw_sge_queues(struct adapter *adap)
 
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
 			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
-	if (err)
-		t4_free_sge_resources(adap);
 	return err;
 }
 
@@ -5739,6 +5737,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto out_free_dev;
 
+	err = setup_fw_sge_queues(adapter);
+	if (err) {
+		dev_err(adapter->pdev_dev,
+			"FW sge queue allocation failed, err %d", err);
+		goto out_free_dev;
+	}
+
 	/*
 	 * The card is now ready to go.  If any errors occur during device
 	 * registration we do not fail the whole card but rather proceed only
@@ -5787,10 +5792,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		cxgb4_ptp_init(adapter);
 
 	print_adapter_info(adapter);
-	setup_fw_sge_queues(adapter);
 	return 0;
 
  out_free_dev:
+	t4_free_sge_resources(adapter);
 	free_some_resources(adapter);
 	if (adapter->flags & USING_MSIX)
 		free_msix_info(adapter);

From bd79aceed53b22766c87c1cf5ddf4639a488c3f4 Mon Sep 17 00:00:00 2001
From: Arjun Vynipadath <arjun@chelsio.com>
Date: Fri, 23 Mar 2018 15:48:46 +0530
Subject: [PATCH 1184/1678] cxgb4: copy vlan_id in ndo_get_vf_config

Copy vlan_id to get it displayed in vf info.

Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudhar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 30dfb9f24f73..57d38f8ed455 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2695,13 +2695,17 @@ static int cxgb4_mgmt_get_vf_config(struct net_device *dev,
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adap = pi->adapter;
+	struct vf_info *vfinfo;
 
 	if (vf >= adap->num_vfs)
 		return -EINVAL;
+	vfinfo = &adap->vfinfo[vf];
+
 	ivi->vf = vf;
-	ivi->max_tx_rate = adap->vfinfo[vf].tx_rate;
+	ivi->max_tx_rate = vfinfo->tx_rate;
 	ivi->min_tx_rate = 0;
-	ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+	ether_addr_copy(ivi->mac, vfinfo->vf_mac_addr);
+	ivi->vlan = vfinfo->vlan;
 	return 0;
 }
 

From b46f7da1f360b755c9e20118a54b8dde9c4d8b5c Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 23 Mar 2018 17:03:10 +0530
Subject: [PATCH 1185/1678] cxgb4: depend on firmware event for link status

Depend on the firmware sending us link status changes,
rather than assuming that the link goes down upon L1
configuration.

Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 38e38dcfff91..b862477f00a7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4066,8 +4066,6 @@ int t4_link_l1cfg(struct adapter *adapter, unsigned int mbox,
 	unsigned int fw_mdi = FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO);
 	fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
 
-	lc->link_ok = 0;
-
 	/* Convert driver coding of Pause Frame Flow Control settings into the
 	 * Firmware's API.
 	 */

From 0f1417f9f0ed3f367502011b187a293cea0e2435 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 23 Mar 2018 17:05:49 +0530
Subject: [PATCH 1186/1678] cxgb4: support new ISSI flash parts

Add support for new 32MB and 64MB ISSI (Integrated Silicon
Solution, Inc.) FLASH parts.

Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index b862477f00a7..7cb3ef466cc7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -8604,6 +8604,25 @@ static int t4_get_flash_params(struct adapter *adap)
 		}
 		break;
 	}
+	case 0x9d: { /* ISSI -- Integrated Silicon Solution, Inc. */
+		/* This Density -> Size decoding table is taken from ISSI
+		 * Data Sheets.
+		 */
+		density = (flashid >> 16) & 0xff;
+		switch (density) {
+		case 0x16: /* 32 MB */
+			size = 1 << 25;
+			break;
+		case 0x17: /* 64MB */
+			size = 1 << 26;
+			break;
+		default:
+			dev_err(adap->pdev_dev, "ISSI Flash Part has bad size, ID = %#x, Density code = %#x\n",
+				flashid, density);
+			return -EINVAL;
+		}
+		break;
+	}
 	case 0xc2: { /* Macronix */
 		/* This Density -> Size decoding table is taken from Macronix
 		 * Data Sheets.

From 050e85c97f6151be1689a22810a7240fa7457557 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Mar 2018 14:36:15 +0300
Subject: [PATCH 1187/1678] ibmvnic: Potential NULL dereference in
 clean_one_tx_pool()

There is an && vs || typo here, which potentially leads to a NULL
dereference.

Fixes: e9e1e97884b7 ("ibmvnic: Update TX pool cleaning routine")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5632c030811b..0389a7a52152 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1135,7 +1135,7 @@ static void clean_one_tx_pool(struct ibmvnic_adapter *adapter,
 	u64 tx_entries;
 	int i;
 
-	if (!tx_pool && !tx_pool->tx_buff)
+	if (!tx_pool || !tx_pool->tx_buff)
 		return;
 
 	tx_entries = tx_pool->num_buffers;

From da18ab32d78b6414267d3e5c8c9b5ab34a6d3321 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Sat, 24 Mar 2018 03:47:42 +0800
Subject: [PATCH 1188/1678] tipc: tipc_disc_addr_trial_msg() can be static

Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values")
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Jon Maloy jon.maloy@ericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index e7655736abed..9f666e0650e2 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -134,13 +134,13 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
 
 /* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
  */
-bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
-			      struct tipc_media_addr *maddr,
-			      struct tipc_bearer *b,
-			      u32 dst, u32 src,
-			      u32 sugg_addr,
-			      u8 *peer_id,
-			      int mtyp)
+static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
+				     struct tipc_media_addr *maddr,
+				     struct tipc_bearer *b,
+				     u32 dst, u32 src,
+				     u32 sugg_addr,
+				     u8 *peer_id,
+				     int mtyp)
 {
 	struct net *net = d->net;
 	struct tipc_net *tn = tipc_net(net);

From b9193c1b61ddb97da4713155b0d580e41fb544ac Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 24 Mar 2018 11:44:22 -0700
Subject: [PATCH 1189/1678] bpf: Rename bpf_verifer_log

bpf_verifer_log =>
bpf_verifier_log

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 6 +++---
 kernel/bpf/verifier.c        | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6b66cd1aa0b9..c30668414b22 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -153,7 +153,7 @@ struct bpf_insn_aux_data {
 
 #define BPF_VERIFIER_TMP_LOG_SIZE	1024
 
-struct bpf_verifer_log {
+struct bpf_verifier_log {
 	u32 level;
 	char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
 	char __user *ubuf;
@@ -161,7 +161,7 @@ struct bpf_verifer_log {
 	u32 len_total;
 };
 
-static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
+static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 {
 	return log->len_used >= log->len_total - 1;
 }
@@ -185,7 +185,7 @@ struct bpf_verifier_env {
 	bool allow_ptr_leaks;
 	bool seen_direct_write;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
-	struct bpf_verifer_log log;
+	struct bpf_verifier_log log;
 	u32 subprog_starts[BPF_MAX_SUBPROGS];
 	/* computes the stack depth of each bpf function */
 	u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e93a6e48641b..1e84e02ff733 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -171,7 +171,7 @@ static DEFINE_MUTEX(bpf_verifier_lock);
 static void log_write(struct bpf_verifier_env *env, const char *fmt,
 		      va_list args)
 {
-	struct bpf_verifer_log *log = &env->log;
+	struct bpf_verifier_log *log = &env->log;
 	unsigned int n;
 
 	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
@@ -5611,7 +5611,7 @@ static void free_states(struct bpf_verifier_env *env)
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	struct bpf_verifier_env *env;
-	struct bpf_verifer_log *log;
+	struct bpf_verifier_log *log;
 	int ret = -EINVAL;
 
 	/* no program is valid */

From 77d2e05abd45886dcad2b632c738cf46b9f7c19e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 24 Mar 2018 11:44:23 -0700
Subject: [PATCH 1190/1678] bpf: Add bpf_verifier_vlog() and
 bpf_verifier_log_needed()

The BTF (BPF Type Format) verifier needs to reuse the current
BPF verifier log.  Hence, it requires the following changes:

(1) Expose log_write() in verifier.c for other users.
    Its name is renamed to bpf_verifier_vlog().

(2) The BTF verifier also needs to check
'log->level && log->ubuf && !bpf_verifier_log_full(log);'
independently outside of the current log_write().  It is
because the BTF verifier will do one-check before
making multiple calls to btf_verifier_vlog to log
the details of a type.

Hence, this check is also re-factored to a new function
bpf_verifier_log_needed().  Since it is re-factored,
we can check it before va_start() in the current
bpf_verifier_log_write() and verbose().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h |  7 +++++++
 kernel/bpf/verifier.c        | 19 +++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c30668414b22..7e61c395fddf 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -166,6 +166,11 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
 	return log->len_used >= log->len_total - 1;
 }
 
+static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
+{
+	return log->level && log->ubuf && !bpf_verifier_log_full(log);
+}
+
 #define BPF_MAX_SUBPROGS 256
 
 /* single container for all structs
@@ -192,6 +197,8 @@ struct bpf_verifier_env {
 	u32 subprog_cnt;
 };
 
+void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+		       va_list args);
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 					   const char *fmt, ...);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1e84e02ff733..8acd2207e412 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -168,15 +168,11 @@ struct bpf_call_arg_meta {
 
 static DEFINE_MUTEX(bpf_verifier_lock);
 
-static void log_write(struct bpf_verifier_env *env, const char *fmt,
-		      va_list args)
+void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+		       va_list args)
 {
-	struct bpf_verifier_log *log = &env->log;
 	unsigned int n;
 
-	if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
-		return;
-
 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 
 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
@@ -200,18 +196,25 @@ __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 {
 	va_list args;
 
+	if (!bpf_verifier_log_needed(&env->log))
+		return;
+
 	va_start(args, fmt);
-	log_write(env, fmt, args);
+	bpf_verifier_vlog(&env->log, fmt, args);
 	va_end(args);
 }
 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 
 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 {
+	struct bpf_verifier_env *env = private_data;
 	va_list args;
 
+	if (!bpf_verifier_log_needed(&env->log))
+		return;
+
 	va_start(args, fmt);
-	log_write(private_data, fmt, args);
+	bpf_verifier_vlog(&env->log, fmt, args);
 	va_end(args);
 }
 

From caee728ab761fa1255ff636aec13c87a3d01364d Mon Sep 17 00:00:00 2001
From: Vasanthakumar Thiagarajan <vthiagar@codeaurora.org>
Date: Wed, 14 Mar 2018 12:14:06 +0200
Subject: [PATCH 1191/1678] ath10k: add sta rx packet stats per tid

Added per tid sta counters for the following

- Total number MSDUs received from firmware
- Number of MSDUs received with errors like decryption, crc, mic ,etc.
- Number of MSDUs dropped in the driver
- A-MPDU/A-MSDU subframe stats
- Number of MSDUS passed to mac80211

All stats other than A-MPDU stats are only for received data frames.
A-MPDU stats might have stats for management frames when monitor
interface is active where management frames are notified both in wmi
and HTT interfaces.

These per tid stats can be enabled with tid bitmask through a debugfs
like below

 echo <tid_bitmask> > /sys/kernel/debug/ieee80211/phyX/ath10k/sta_tid_stats_mask

 tid 16 (tid_bitmask 0x10000) is used for non-qos data/management frames

The stats are read from
/sys/kernel/debug/ieee80211/phyX/netdev\:wlanX/stations/<sta_mac>/dump_tid_stats

Sample output:

 To enable rx stats for tid 0, 5 and 6,

 echo 0x00000061 > /sys/kernel/debug/ieee80211/phy0/ath10k/sta_tid_stats_mask

cat /sys/kernel/debug/ieee80211/phy0/netdev\:wlan15/stations/8c\:fd\:f0\:0a\:8e\:df/dump_tid_stats

  		Driver Rx pkt stats per tid, ([tid] count)
                ------------------------------------------
MSDUs from FW                   [00] 2567        [05] 3178        [06] 1089
MSDUs unchained                 [00] 0           [05] 0           [06] 0
MSDUs locally dropped:chained   [00] 0           [05] 0           [06] 0
MSDUs locally dropped:filtered  [00] 0           [05] 0           [06] 0
MSDUs queued for mac80211       [00] 2567        [05] 3178        [06] 1089
MSDUs with error:fcs_err        [00] 0           [05] 0           [06] 2
MSDUs with error:tkip_err       [00] 0           [05] 0           [06] 0
MSDUs with error:crypt_err      [00] 0           [05] 0           [06] 0
MSDUs with error:peer_idx_inval [00] 0           [05] 0           [06] 0

A-MPDU num subframes upto 10    [00] 2567        [05] 3178        [06] 1087
A-MPDU num subframes 11-20      [00] 0           [05] 0           [06] 0
A-MPDU num subframes 21-30      [00] 0           [05] 0           [06] 0
A-MPDU num subframes 31-40      [00] 0           [05] 0           [06] 0
A-MPDU num subframes 41-50      [00] 0           [05] 0           [06] 0
A-MPDU num subframes 51-60      [00] 0           [05] 0           [06] 0
A-MPDU num subframes >60        [00] 0           [05] 0           [06] 0

A-MSDU num subframes 1          [00] 2567        [05] 3178        [06] 1089
A-MSDU num subframes 2          [00] 0           [05] 0           [06] 0
A-MSDU num subframes 3          [00] 0           [05] 0           [06] 0
A-MSDU num subframes 4          [00] 0           [05] 0           [06] 0
A-MSDU num subframes >4         [00] 0           [05] 0           [06] 0

Signed-off-by: Vasanthakumar Thiagarajan <vthiagar@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/core.h        |  45 +++
 drivers/net/wireless/ath/ath10k/debug.c       |  47 +++
 drivers/net/wireless/ath/ath10k/debug.h       |  31 ++
 drivers/net/wireless/ath/ath10k/debugfs_sta.c | 286 ++++++++++++++++++
 drivers/net/wireless/ath/ath10k/htt_rx.c      |  71 ++++-
 5 files changed, 470 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index fe6b30356d3b..c624b96f8b84 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -354,6 +355,45 @@ struct ath10k_txq {
 	unsigned long num_push_allowed;
 };
 
+enum ath10k_pkt_rx_err {
+	ATH10K_PKT_RX_ERR_FCS,
+	ATH10K_PKT_RX_ERR_TKIP,
+	ATH10K_PKT_RX_ERR_CRYPT,
+	ATH10K_PKT_RX_ERR_PEER_IDX_INVAL,
+	ATH10K_PKT_RX_ERR_MAX,
+};
+
+enum ath10k_ampdu_subfrm_num {
+	ATH10K_AMPDU_SUBFRM_NUM_10,
+	ATH10K_AMPDU_SUBFRM_NUM_20,
+	ATH10K_AMPDU_SUBFRM_NUM_30,
+	ATH10K_AMPDU_SUBFRM_NUM_40,
+	ATH10K_AMPDU_SUBFRM_NUM_50,
+	ATH10K_AMPDU_SUBFRM_NUM_60,
+	ATH10K_AMPDU_SUBFRM_NUM_MORE,
+	ATH10K_AMPDU_SUBFRM_NUM_MAX,
+};
+
+enum ath10k_amsdu_subfrm_num {
+	ATH10K_AMSDU_SUBFRM_NUM_1,
+	ATH10K_AMSDU_SUBFRM_NUM_2,
+	ATH10K_AMSDU_SUBFRM_NUM_3,
+	ATH10K_AMSDU_SUBFRM_NUM_4,
+	ATH10K_AMSDU_SUBFRM_NUM_MORE,
+	ATH10K_AMSDU_SUBFRM_NUM_MAX,
+};
+
+struct ath10k_sta_tid_stats {
+	unsigned long int rx_pkt_from_fw;
+	unsigned long int rx_pkt_unchained;
+	unsigned long int rx_pkt_drop_chained;
+	unsigned long int rx_pkt_drop_filter;
+	unsigned long int rx_pkt_err[ATH10K_PKT_RX_ERR_MAX];
+	unsigned long int rx_pkt_queued_for_mac;
+	unsigned long int rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_MAX];
+	unsigned long int rx_pkt_amsdu[ATH10K_AMSDU_SUBFRM_NUM_MAX];
+};
+
 struct ath10k_sta {
 	struct ath10k_vif *arvif;
 
@@ -371,6 +411,9 @@ struct ath10k_sta {
 #ifdef CONFIG_MAC80211_DEBUGFS
 	/* protected by conf_mutex */
 	bool aggr_mode;
+
+	/* Protected with ar->data_lock */
+	struct ath10k_sta_tid_stats tid_stats[IEEE80211_NUM_TIDS + 1];
 #endif
 };
 
@@ -1019,6 +1062,8 @@ struct ath10k {
 
 	void *ce_priv;
 
+	u32 sta_tid_stats_mask;
+
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
 };
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 554cd7856cb6..1b9c092d210f 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -2143,6 +2143,48 @@ static const struct file_operations fops_fw_checksums = {
 	.llseek = default_llseek,
 };
 
+static ssize_t ath10k_sta_tid_stats_mask_read(struct file *file,
+					      char __user *user_buf,
+					      size_t count, loff_t *ppos)
+{
+	struct ath10k *ar = file->private_data;
+	char buf[32];
+	size_t len;
+
+	len = scnprintf(buf, sizeof(buf), "0x%08x\n", ar->sta_tid_stats_mask);
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t ath10k_sta_tid_stats_mask_write(struct file *file,
+					       const char __user *user_buf,
+					       size_t count, loff_t *ppos)
+{
+	struct ath10k *ar = file->private_data;
+	char buf[32];
+	ssize_t len;
+	u32 mask;
+
+	len = min(count, sizeof(buf) - 1);
+	if (copy_from_user(buf, user_buf, len))
+		return -EFAULT;
+
+	buf[len] = '\0';
+	if (kstrtoint(buf, 0, &mask))
+		return -EINVAL;
+
+	ar->sta_tid_stats_mask = mask;
+
+	return len;
+}
+
+static const struct file_operations fops_sta_tid_stats_mask = {
+	.read = ath10k_sta_tid_stats_mask_read,
+	.write = ath10k_sta_tid_stats_mask_write,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 int ath10k_debug_create(struct ath10k *ar)
 {
 	ar->debug.cal_data = vzalloc(ATH10K_DEBUG_CAL_DATA_LEN);
@@ -2258,6 +2300,11 @@ int ath10k_debug_register(struct ath10k *ar)
 	debugfs_create_file("fw_checksums", 0400, ar->debug.debugfs_phy, ar,
 			    &fops_fw_checksums);
 
+	if (IS_ENABLED(CONFIG_MAC80211_DEBUGFS))
+		debugfs_create_file("sta_tid_stats_mask", 0600,
+				    ar->debug.debugfs_phy,
+				    ar, &fops_sta_tid_stats_mask);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index e54308889e59..7ebb9b1e7e69 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -191,12 +192,42 @@ void ath10k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir);
 void ath10k_sta_update_rx_duration(struct ath10k *ar,
 				   struct ath10k_fw_stats *stats);
+void ath10k_sta_update_rx_tid_stats(struct ath10k *ar, u8 *first_hdr,
+				    unsigned long int num_msdus,
+				    enum ath10k_pkt_rx_err err,
+				    unsigned long int unchain_cnt,
+				    unsigned long int drop_cnt,
+				    unsigned long int drop_cnt_filter,
+				    unsigned long int queued_msdus);
+void ath10k_sta_update_rx_tid_stats_ampdu(struct ath10k *ar,
+					  u16 peer_id, u8 tid,
+					  struct htt_rx_indication_mpdu_range *ranges,
+					  int num_ranges);
 #else
 static inline
 void ath10k_sta_update_rx_duration(struct ath10k *ar,
 				   struct ath10k_fw_stats *stats)
 {
 }
+
+static inline
+void ath10k_sta_update_rx_tid_stats(struct ath10k *ar, u8 *first_hdr,
+				    unsigned long int num_msdus,
+				    enum ath10k_pkt_rx_err err,
+				    unsigned long int unchain_cnt,
+				    unsigned long int drop_cnt,
+				    unsigned long int drop_cnt_filter,
+				    unsigned long int queued_msdus)
+{
+}
+
+static inline
+void ath10k_sta_update_rx_tid_stats_ampdu(struct ath10k *ar,
+					  u16 peer_id, u8 tid,
+					  struct htt_rx_indication_mpdu_range *ranges,
+					  int num_ranges)
+{
+}
 #endif /* CONFIG_MAC80211_DEBUGFS */
 
 #ifdef CONFIG_ATH10K_DEBUG
diff --git a/drivers/net/wireless/ath/ath10k/debugfs_sta.c b/drivers/net/wireless/ath/ath10k/debugfs_sta.c
index b260b09dd4d3..8f688f136c22 100644
--- a/drivers/net/wireless/ath/ath10k/debugfs_sta.c
+++ b/drivers/net/wireless/ath/ath10k/debugfs_sta.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2014-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -16,8 +17,125 @@
 
 #include "core.h"
 #include "wmi-ops.h"
+#include "txrx.h"
 #include "debug.h"
 
+static void ath10k_rx_stats_update_amsdu_subfrm(struct ath10k *ar,
+						struct ath10k_sta_tid_stats *stats,
+						u32 msdu_count)
+{
+	if (msdu_count == 1)
+		stats->rx_pkt_amsdu[ATH10K_AMSDU_SUBFRM_NUM_1]++;
+	else if (msdu_count == 2)
+		stats->rx_pkt_amsdu[ATH10K_AMSDU_SUBFRM_NUM_2]++;
+	else if (msdu_count == 3)
+		stats->rx_pkt_amsdu[ATH10K_AMSDU_SUBFRM_NUM_3]++;
+	else if (msdu_count == 4)
+		stats->rx_pkt_amsdu[ATH10K_AMSDU_SUBFRM_NUM_4]++;
+	else if (msdu_count > 4)
+		stats->rx_pkt_amsdu[ATH10K_AMSDU_SUBFRM_NUM_MORE]++;
+}
+
+static void ath10k_rx_stats_update_ampdu_subfrm(struct ath10k *ar,
+						struct ath10k_sta_tid_stats *stats,
+						u32 mpdu_count)
+{
+	if (mpdu_count <= 10)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_10]++;
+	else if (mpdu_count <= 20)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_20]++;
+	else if (mpdu_count <= 30)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_30]++;
+	else if (mpdu_count <= 40)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_40]++;
+	else if (mpdu_count <= 50)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_50]++;
+	else if (mpdu_count <= 60)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_60]++;
+	else if (mpdu_count > 60)
+		stats->rx_pkt_ampdu[ATH10K_AMPDU_SUBFRM_NUM_MORE]++;
+}
+
+void ath10k_sta_update_rx_tid_stats_ampdu(struct ath10k *ar, u16 peer_id, u8 tid,
+					  struct htt_rx_indication_mpdu_range *ranges,
+					  int num_ranges)
+{
+	struct ath10k_sta *arsta;
+	struct ath10k_peer *peer;
+	int i;
+
+	if (tid > IEEE80211_NUM_TIDS || !(ar->sta_tid_stats_mask & BIT(tid)))
+		return;
+
+	rcu_read_lock();
+	spin_lock_bh(&ar->data_lock);
+
+	peer = ath10k_peer_find_by_id(ar, peer_id);
+	if (!peer)
+		goto out;
+
+	arsta = (struct ath10k_sta *)peer->sta->drv_priv;
+
+	for (i = 0; i < num_ranges; i++)
+		ath10k_rx_stats_update_ampdu_subfrm(ar,
+						    &arsta->tid_stats[tid],
+						    ranges[i].mpdu_count);
+
+out:
+	spin_unlock_bh(&ar->data_lock);
+	rcu_read_unlock();
+}
+
+void ath10k_sta_update_rx_tid_stats(struct ath10k *ar, u8 *first_hdr,
+				    unsigned long int num_msdus,
+				    enum ath10k_pkt_rx_err err,
+				    unsigned long int unchain_cnt,
+				    unsigned long int drop_cnt,
+				    unsigned long int drop_cnt_filter,
+				    unsigned long int queued_msdus)
+{
+	struct ieee80211_sta *sta;
+	struct ath10k_sta *arsta;
+	struct ieee80211_hdr *hdr;
+	struct ath10k_sta_tid_stats *stats;
+	u8 tid = IEEE80211_NUM_TIDS;
+	bool non_data_frm = false;
+
+	hdr = (struct ieee80211_hdr *)first_hdr;
+	if (!ieee80211_is_data(hdr->frame_control))
+		non_data_frm = true;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+
+	if (!(ar->sta_tid_stats_mask & BIT(tid)) || non_data_frm)
+		return;
+
+	rcu_read_lock();
+
+	sta = ieee80211_find_sta_by_ifaddr(ar->hw, hdr->addr2, NULL);
+	if (!sta)
+		goto exit;
+
+	arsta = (struct ath10k_sta *)sta->drv_priv;
+
+	spin_lock_bh(&ar->data_lock);
+	stats = &arsta->tid_stats[tid];
+	stats->rx_pkt_from_fw += num_msdus;
+	stats->rx_pkt_unchained += unchain_cnt;
+	stats->rx_pkt_drop_chained += drop_cnt;
+	stats->rx_pkt_drop_filter += drop_cnt_filter;
+	if (err != ATH10K_PKT_RX_ERR_MAX)
+		stats->rx_pkt_err[err] += queued_msdus;
+	stats->rx_pkt_queued_for_mac += queued_msdus;
+	ath10k_rx_stats_update_amsdu_subfrm(ar, &arsta->tid_stats[tid],
+					    num_msdus);
+	spin_unlock_bh(&ar->data_lock);
+
+exit:
+	rcu_read_unlock();
+}
+
 static void ath10k_sta_update_extd_stats_rx_duration(struct ath10k *ar,
 						     struct ath10k_fw_stats *stats)
 {
@@ -342,6 +460,172 @@ static const struct file_operations fops_peer_debug_trigger = {
 	.llseek = default_llseek,
 };
 
+static char *get_err_str(enum ath10k_pkt_rx_err i)
+{
+	switch (i) {
+	case ATH10K_PKT_RX_ERR_FCS:
+		return "fcs_err";
+	case ATH10K_PKT_RX_ERR_TKIP:
+		return "tkip_err";
+	case ATH10K_PKT_RX_ERR_CRYPT:
+		return "crypt_err";
+	case ATH10K_PKT_RX_ERR_PEER_IDX_INVAL:
+		return "peer_idx_inval";
+	case ATH10K_PKT_RX_ERR_MAX:
+		return "unknown";
+	}
+
+	return "unknown";
+}
+
+static char *get_num_ampdu_subfrm_str(enum ath10k_ampdu_subfrm_num i)
+{
+	switch (i) {
+	case ATH10K_AMPDU_SUBFRM_NUM_10:
+		return "upto 10";
+	case ATH10K_AMPDU_SUBFRM_NUM_20:
+		return "11-20";
+	case ATH10K_AMPDU_SUBFRM_NUM_30:
+		return "21-30";
+	case ATH10K_AMPDU_SUBFRM_NUM_40:
+		return "31-40";
+	case ATH10K_AMPDU_SUBFRM_NUM_50:
+		return "41-50";
+	case ATH10K_AMPDU_SUBFRM_NUM_60:
+		return "51-60";
+	case ATH10K_AMPDU_SUBFRM_NUM_MORE:
+		return ">60";
+	case ATH10K_AMPDU_SUBFRM_NUM_MAX:
+		return "0";
+	}
+
+	return "0";
+}
+
+static char *get_num_amsdu_subfrm_str(enum ath10k_amsdu_subfrm_num i)
+{
+	switch (i) {
+	case ATH10K_AMSDU_SUBFRM_NUM_1:
+		return "1";
+	case ATH10K_AMSDU_SUBFRM_NUM_2:
+		return "2";
+	case ATH10K_AMSDU_SUBFRM_NUM_3:
+		return "3";
+	case ATH10K_AMSDU_SUBFRM_NUM_4:
+		return "4";
+	case ATH10K_AMSDU_SUBFRM_NUM_MORE:
+		return ">4";
+	case ATH10K_AMSDU_SUBFRM_NUM_MAX:
+		return "0";
+	}
+
+	return "0";
+}
+
+#define PRINT_TID_STATS(_field, _tabs) \
+	do { \
+		int k = 0; \
+		for (j = 0; j <= IEEE80211_NUM_TIDS; j++) { \
+			if (ar->sta_tid_stats_mask & BIT(j))  { \
+				len += scnprintf(buf + len, buf_len - len, \
+						 "[%02d] %-10lu  ", \
+						 j, stats[j]._field); \
+				k++; \
+				if (k % 8 == 0)  { \
+					len += scnprintf(buf + len, \
+							 buf_len - len, "\n"); \
+					len += scnprintf(buf + len, \
+							 buf_len - len, \
+							 _tabs); \
+				} \
+			} \
+		} \
+		len += scnprintf(buf + len, buf_len - len, "\n"); \
+	} while (0)
+
+static ssize_t ath10k_dbg_sta_read_tid_stats(struct file *file,
+					     char __user *user_buf,
+					     size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
+	struct ath10k *ar = arsta->arvif->ar;
+	struct ath10k_sta_tid_stats *stats = arsta->tid_stats;
+	size_t len = 0, buf_len = 1048 * IEEE80211_NUM_TIDS;
+	char *buf;
+	int i, j;
+	ssize_t ret;
+
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	mutex_lock(&ar->conf_mutex);
+
+	spin_lock_bh(&ar->data_lock);
+
+	len += scnprintf(buf + len, buf_len - len,
+			 "\n\t\tDriver Rx pkt stats per tid, ([tid] count)\n");
+	len += scnprintf(buf + len, buf_len - len,
+			 "\t\t------------------------------------------\n");
+	len += scnprintf(buf + len, buf_len - len, "MSDUs from FW\t\t\t");
+	PRINT_TID_STATS(rx_pkt_from_fw, "\t\t\t\t");
+
+	len += scnprintf(buf + len, buf_len - len, "MSDUs unchained\t\t\t");
+	PRINT_TID_STATS(rx_pkt_unchained, "\t\t\t\t");
+
+	len += scnprintf(buf + len, buf_len - len,
+			 "MSDUs locally dropped:chained\t");
+	PRINT_TID_STATS(rx_pkt_drop_chained, "\t\t\t\t");
+
+	len += scnprintf(buf + len, buf_len - len,
+			 "MSDUs locally dropped:filtered\t");
+	PRINT_TID_STATS(rx_pkt_drop_filter, "\t\t\t\t");
+
+	len += scnprintf(buf + len, buf_len - len,
+			 "MSDUs queued for mac80211\t");
+	PRINT_TID_STATS(rx_pkt_queued_for_mac, "\t\t\t\t");
+
+	for (i = 0; i < ATH10K_PKT_RX_ERR_MAX; i++) {
+		len += scnprintf(buf + len, buf_len - len,
+				 "MSDUs with error:%s\t", get_err_str(i));
+		PRINT_TID_STATS(rx_pkt_err[i], "\t\t\t\t");
+	}
+
+	len += scnprintf(buf + len, buf_len - len, "\n");
+	for (i = 0; i < ATH10K_AMPDU_SUBFRM_NUM_MAX; i++) {
+		len += scnprintf(buf + len, buf_len - len,
+				 "A-MPDU num subframes %s\t",
+				 get_num_ampdu_subfrm_str(i));
+		PRINT_TID_STATS(rx_pkt_ampdu[i], "\t\t\t\t");
+	}
+
+	len += scnprintf(buf + len, buf_len - len, "\n");
+	for (i = 0; i < ATH10K_AMSDU_SUBFRM_NUM_MAX; i++) {
+		len += scnprintf(buf + len, buf_len - len,
+				 "A-MSDU num subframes %s\t\t",
+				 get_num_amsdu_subfrm_str(i));
+		PRINT_TID_STATS(rx_pkt_amsdu[i], "\t\t\t\t");
+	}
+
+	spin_unlock_bh(&ar->data_lock);
+
+	ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
+
+	kfree(buf);
+
+	mutex_unlock(&ar->conf_mutex);
+
+	return ret;
+}
+
+static const struct file_operations fops_tid_stats_dump = {
+	.open = simple_open,
+	.read = ath10k_dbg_sta_read_tid_stats,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 void ath10k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir)
 {
@@ -351,4 +635,6 @@ void ath10k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	debugfs_create_file("delba", 0200, dir, sta, &fops_delba);
 	debugfs_create_file("peer_debug_trigger", 0600, dir, sta,
 			    &fops_peer_debug_trigger);
+	debugfs_create_file("dump_tid_stats", 0400, dir, sta,
+			    &fops_tid_stats_dump);
 }
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 6d96f9560950..64996aba1485 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -1502,7 +1503,9 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu)
 static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 				 struct sk_buff_head *amsdu,
 				 struct ieee80211_rx_status *status,
-				 bool fill_crypt_header)
+				 bool fill_crypt_header,
+				 u8 *rx_hdr,
+				 enum ath10k_pkt_rx_err *err)
 {
 	struct sk_buff *first;
 	struct sk_buff *last;
@@ -1538,6 +1541,9 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	hdr = (void *)rxd->rx_hdr_status;
 	memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN);
 
+	if (rx_hdr)
+		memcpy(rx_hdr, hdr, RX_HTT_HDR_STATUS_LEN);
+
 	/* Each A-MSDU subframe will use the original header as the base and be
 	 * reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl.
 	 */
@@ -1581,6 +1587,17 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	if (has_tkip_err)
 		status->flag |= RX_FLAG_MMIC_ERROR;
 
+	if (err) {
+		if (has_fcs_err)
+			*err = ATH10K_PKT_RX_ERR_FCS;
+		else if (has_tkip_err)
+			*err = ATH10K_PKT_RX_ERR_TKIP;
+		else if (has_crypto_err)
+			*err = ATH10K_PKT_RX_ERR_CRYPT;
+		else if (has_peer_idx_invalid)
+			*err = ATH10K_PKT_RX_ERR_PEER_IDX_INVAL;
+	}
+
 	/* Firmware reports all necessary management frames via WMI already.
 	 * They are not reported to monitor interfaces at all so pass the ones
 	 * coming via HTT to monitor interfaces instead. This simplifies
@@ -1651,11 +1668,13 @@ static void ath10k_htt_rx_h_enqueue(struct ath10k *ar,
 	}
 }
 
-static int ath10k_unchain_msdu(struct sk_buff_head *amsdu)
+static int ath10k_unchain_msdu(struct sk_buff_head *amsdu,
+			       unsigned long int *unchain_cnt)
 {
 	struct sk_buff *skb, *first;
 	int space;
 	int total_len = 0;
+	int amsdu_len = skb_queue_len(amsdu);
 
 	/* TODO:  Might could optimize this by using
 	 * skb_try_coalesce or similar method to
@@ -1691,11 +1710,16 @@ static int ath10k_unchain_msdu(struct sk_buff_head *amsdu)
 	}
 
 	__skb_queue_head(amsdu, first);
+
+	*unchain_cnt += amsdu_len - 1;
+
 	return 0;
 }
 
 static void ath10k_htt_rx_h_unchain(struct ath10k *ar,
-				    struct sk_buff_head *amsdu)
+				    struct sk_buff_head *amsdu,
+				    unsigned long int *drop_cnt,
+				    unsigned long int *unchain_cnt)
 {
 	struct sk_buff *first;
 	struct htt_rx_desc *rxd;
@@ -1713,11 +1737,12 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar,
 	 */
 	if (decap != RX_MSDU_DECAP_RAW ||
 	    skb_queue_len(amsdu) != 1 + rxd->frag_info.ring2_more_count) {
+		*drop_cnt += skb_queue_len(amsdu);
 		__skb_queue_purge(amsdu);
 		return;
 	}
 
-	ath10k_unchain_msdu(amsdu);
+	ath10k_unchain_msdu(amsdu, unchain_cnt);
 }
 
 static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
@@ -1743,7 +1768,8 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 
 static void ath10k_htt_rx_h_filter(struct ath10k *ar,
 				   struct sk_buff_head *amsdu,
-				   struct ieee80211_rx_status *rx_status)
+				   struct ieee80211_rx_status *rx_status,
+				   unsigned long int *drop_cnt)
 {
 	if (skb_queue_empty(amsdu))
 		return;
@@ -1751,6 +1777,9 @@ static void ath10k_htt_rx_h_filter(struct ath10k *ar,
 	if (ath10k_htt_rx_amsdu_allowed(ar, amsdu, rx_status))
 		return;
 
+	if (drop_cnt)
+		*drop_cnt += skb_queue_len(amsdu);
+
 	__skb_queue_purge(amsdu);
 }
 
@@ -1760,6 +1789,12 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 	struct ieee80211_rx_status *rx_status = &htt->rx_status;
 	struct sk_buff_head amsdu;
 	int ret;
+	unsigned long int drop_cnt = 0;
+	unsigned long int unchain_cnt = 0;
+	unsigned long int drop_cnt_filter = 0;
+	unsigned long int msdus_to_queue, num_msdus;
+	enum ath10k_pkt_rx_err err = ATH10K_PKT_RX_ERR_MAX;
+	u8 first_hdr[RX_HTT_HDR_STATUS_LEN];
 
 	__skb_queue_head_init(&amsdu);
 
@@ -1781,16 +1816,23 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 		return ret;
 	}
 
+	num_msdus = skb_queue_len(&amsdu);
+
 	ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff);
 
 	/* only for ret = 1 indicates chained msdus */
 	if (ret > 0)
-		ath10k_htt_rx_h_unchain(ar, &amsdu);
+		ath10k_htt_rx_h_unchain(ar, &amsdu, &drop_cnt, &unchain_cnt);
 
-	ath10k_htt_rx_h_filter(ar, &amsdu, rx_status);
-	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true);
+	ath10k_htt_rx_h_filter(ar, &amsdu, rx_status, &drop_cnt_filter);
+	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err);
+	msdus_to_queue = skb_queue_len(&amsdu);
 	ath10k_htt_rx_h_enqueue(ar, &amsdu, rx_status);
 
+	ath10k_sta_update_rx_tid_stats(ar, first_hdr, num_msdus, err,
+				       unchain_cnt, drop_cnt, drop_cnt_filter,
+				       msdus_to_queue);
+
 	return 0;
 }
 
@@ -1801,9 +1843,14 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
 	struct htt_rx_indication_mpdu_range *mpdu_ranges;
 	int num_mpdu_ranges;
 	int i, mpdu_count = 0;
+	u16 peer_id;
+	u8 tid;
 
 	num_mpdu_ranges = MS(__le32_to_cpu(rx->hdr.info1),
 			     HTT_RX_INDICATION_INFO1_NUM_MPDU_RANGES);
+	peer_id = __le16_to_cpu(rx->hdr.peer_id);
+	tid =  MS(rx->hdr.info0, HTT_RX_INDICATION_INFO0_EXT_TID);
+
 	mpdu_ranges = htt_rx_ind_get_mpdu_ranges(rx);
 
 	ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt rx ind: ",
@@ -1815,6 +1862,9 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
 		mpdu_count += mpdu_ranges[i].mpdu_count;
 
 	atomic_add(mpdu_count, &htt->num_mpdus_ready);
+
+	ath10k_sta_update_rx_tid_stats_ampdu(ar, peer_id, tid, mpdu_ranges,
+					     num_mpdu_ranges);
 }
 
 static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar,
@@ -2124,8 +2174,9 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			 * should still give an idea about rx rate to the user.
 			 */
 			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
-			ath10k_htt_rx_h_filter(ar, &amsdu, status);
-			ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false);
+			ath10k_htt_rx_h_filter(ar, &amsdu, status, NULL);
+			ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false, NULL,
+					     NULL);
 			ath10k_htt_rx_h_enqueue(ar, &amsdu, status);
 			break;
 		case -EAGAIN:

From bc64d05220f3e34cf432a166b83c8fff14cd7a3d Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Wed, 14 Mar 2018 12:14:08 +0200
Subject: [PATCH 1192/1678] ath10k: debugfs support to get final TPC stats for
 10.4 variants

Export the final Transmit Power Control (TPC) value, which is the
minimum of control power and existing TPC value to user space via
a new debugfs file "tpc_stats_final" to help with debugging.
It works with the new wmi cmd and event introduced in 10.4 firmware
branch.

WMI command ID: WMI_PDEV_GET_TPC_TABLE_CMDID
WMI event ID: WMI_PDEV_TPC_TABLE_EVENTID

cat /sys/kernel/debug/ieee80211/phyX/ath10k/tpc_stats_final

$ cat /sys/kernel/debug/ieee80211/phyX/ath10k/tpc_stats_final

TPC config for channel 5180 mode 10

CTL             =  0x 0 Reg. Domain             = 58
Antenna Gain    =  0 Reg. Max Antenna Gain      =   0
Power Limit     = 60 Reg. Max Power             = 60
Num tx chains   =  2 Num supported rates        = 109

******************* CDD POWER TABLE ****************

No.  Preamble Rate_code tpc_value1 tpc_value2 tpc_value3
0    CCK      0x40        0          0
1    CCK      0x41        0          0
[...]
107  HTCUP    0x 0       46          46
108  HTCUP    0x 0       46          46

******************* STBC POWER TABLE ****************

No.  Preamble Rate_code tpc_value1 tpc_value2 tpc_value3
0    CCK      0x40        0          0
1    CCK      0x41        0          0
[...]
107  HTCUP    0x 0        46         46
108  HTCUP    0x 0        46         46

***********************************
TXBF not supported
**********************************

The existing tpc_stats debugfs file provides the dump
which is minimum of target power and regulatory domain.

cat /sys/kernel/debug/ieee80211/phyX/ath10k/tpc_stats

Hardware_used: QCA4019
Firmware version: firmware-5.bin_10.4-3.0-00209

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/core.h    |  22 ++
 drivers/net/wireless/ath/ath10k/debug.c   | 107 ++++++++
 drivers/net/wireless/ath/ath10k/debug.h   |  10 +
 drivers/net/wireless/ath/ath10k/wmi-ops.h |  20 ++
 drivers/net/wireless/ath/ath10k/wmi.c     | 308 ++++++++++++++++++++--
 drivers/net/wireless/ath/ath10k/wmi.h     |  66 +++++
 6 files changed, 518 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index c624b96f8b84..73712c830be7 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -325,6 +325,27 @@ struct ath10k_tpc_stats {
 	struct ath10k_tpc_table tpc_table[WMI_TPC_FLAG];
 };
 
+struct ath10k_tpc_table_final {
+	u32 pream_idx[WMI_TPC_FINAL_RATE_MAX];
+	u8 rate_code[WMI_TPC_FINAL_RATE_MAX];
+	char tpc_value[WMI_TPC_FINAL_RATE_MAX][WMI_TPC_TX_N_CHAIN * WMI_TPC_BUF_SIZE];
+};
+
+struct ath10k_tpc_stats_final {
+	u32 reg_domain;
+	u32 chan_freq;
+	u32 phy_mode;
+	u32 twice_antenna_reduction;
+	u32 twice_max_rd_power;
+	s32 twice_antenna_gain;
+	u32 power_limit;
+	u32 num_tx_chain;
+	u32 ctl;
+	u32 rate_max;
+	u8 flag[WMI_TPC_FLAG];
+	struct ath10k_tpc_table_final tpc_table_final[WMI_TPC_FLAG];
+};
+
 struct ath10k_dfs_stats {
 	u32 phy_errors;
 	u32 pulses_total;
@@ -530,6 +551,7 @@ struct ath10k_debug {
 
 	/* used for tpc-dump storage, protected by data-lock */
 	struct ath10k_tpc_stats *tpc_stats;
+	struct ath10k_tpc_stats_final *tpc_stats_final;
 
 	struct completion tpc_complete;
 
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 1b9c092d210f..bac832ce1873 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -1480,6 +1480,19 @@ void ath10k_debug_tpc_stats_process(struct ath10k *ar,
 	spin_unlock_bh(&ar->data_lock);
 }
 
+void
+ath10k_debug_tpc_stats_final_process(struct ath10k *ar,
+				     struct ath10k_tpc_stats_final *tpc_stats)
+{
+	spin_lock_bh(&ar->data_lock);
+
+	kfree(ar->debug.tpc_stats_final);
+	ar->debug.tpc_stats_final = tpc_stats;
+	complete(&ar->debug.tpc_complete);
+
+	spin_unlock_bh(&ar->data_lock);
+}
+
 static void ath10k_tpc_stats_print(struct ath10k_tpc_stats *tpc_stats,
 				   unsigned int j, char *buf, size_t *len)
 {
@@ -2185,6 +2198,95 @@ static const struct file_operations fops_sta_tid_stats_mask = {
 	.llseek = default_llseek,
 };
 
+static int ath10k_debug_tpc_stats_final_request(struct ath10k *ar)
+{
+	int ret;
+	unsigned long time_left;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	reinit_completion(&ar->debug.tpc_complete);
+
+	ret = ath10k_wmi_pdev_get_tpc_table_cmdid(ar, WMI_TPC_CONFIG_PARAM);
+	if (ret) {
+		ath10k_warn(ar, "failed to request tpc table cmdid: %d\n", ret);
+		return ret;
+	}
+
+	time_left = wait_for_completion_timeout(&ar->debug.tpc_complete,
+						1 * HZ);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int ath10k_tpc_stats_final_open(struct inode *inode, struct file *file)
+{
+	struct ath10k *ar = inode->i_private;
+	void *buf;
+	int ret;
+
+	mutex_lock(&ar->conf_mutex);
+
+	if (ar->state != ATH10K_STATE_ON) {
+		ret = -ENETDOWN;
+		goto err_unlock;
+	}
+
+	buf = vmalloc(ATH10K_TPC_CONFIG_BUF_SIZE);
+	if (!buf) {
+		ret = -ENOMEM;
+		goto err_unlock;
+	}
+
+	ret = ath10k_debug_tpc_stats_final_request(ar);
+	if (ret) {
+		ath10k_warn(ar, "failed to request tpc stats final: %d\n",
+			    ret);
+		goto err_free;
+	}
+
+	ath10k_tpc_stats_fill(ar, ar->debug.tpc_stats, buf);
+	file->private_data = buf;
+
+	mutex_unlock(&ar->conf_mutex);
+	return 0;
+
+err_free:
+	vfree(buf);
+
+err_unlock:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static int ath10k_tpc_stats_final_release(struct inode *inode,
+					  struct file *file)
+{
+	vfree(file->private_data);
+
+	return 0;
+}
+
+static ssize_t ath10k_tpc_stats_final_read(struct file *file,
+					   char __user *user_buf,
+					   size_t count, loff_t *ppos)
+{
+	const char *buf = file->private_data;
+	unsigned int len = strlen(buf);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static const struct file_operations fops_tpc_stats_final = {
+	.open = ath10k_tpc_stats_final_open,
+	.release = ath10k_tpc_stats_final_release,
+	.read = ath10k_tpc_stats_final_read,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 int ath10k_debug_create(struct ath10k *ar)
 {
 	ar->debug.cal_data = vzalloc(ATH10K_DEBUG_CAL_DATA_LEN);
@@ -2305,6 +2407,11 @@ int ath10k_debug_register(struct ath10k *ar)
 				    ar->debug.debugfs_phy,
 				    ar, &fops_sta_tid_stats_mask);
 
+	if (test_bit(WMI_SERVICE_TPC_STATS_FINAL, ar->wmi.svc_map))
+		debugfs_create_file("tpc_stats_final", 0400,
+				    ar->debug.debugfs_phy, ar,
+				    &fops_tpc_stats_final);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 7ebb9b1e7e69..0afca5c106b6 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -102,6 +102,9 @@ void ath10k_debug_unregister(struct ath10k *ar);
 void ath10k_debug_fw_stats_process(struct ath10k *ar, struct sk_buff *skb);
 void ath10k_debug_tpc_stats_process(struct ath10k *ar,
 				    struct ath10k_tpc_stats *tpc_stats);
+void
+ath10k_debug_tpc_stats_final_process(struct ath10k *ar,
+				     struct ath10k_tpc_stats_final *tpc_stats);
 void ath10k_debug_dbglog_add(struct ath10k *ar, u8 *buffer, int len);
 
 #define ATH10K_DFS_STAT_INC(ar, c) (ar->debug.dfs_stats.c++)
@@ -165,6 +168,13 @@ static inline void ath10k_debug_tpc_stats_process(struct ath10k *ar,
 	kfree(tpc_stats);
 }
 
+static inline void
+ath10k_debug_tpc_stats_final_process(struct ath10k *ar,
+				     struct ath10k_tpc_stats_final *tpc_stats)
+{
+	kfree(tpc_stats);
+}
+
 static inline void ath10k_debug_dbglog_add(struct ath10k *ar, u8 *buffer,
 					   int len)
 {
diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h
index 89d230bc9f6e..c35e45340b4f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-ops.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h
@@ -201,6 +201,9 @@ struct wmi_ops {
 					(struct ath10k *ar,
 					 enum wmi_bss_survey_req_type type);
 	struct sk_buff *(*gen_echo)(struct ath10k *ar, u32 value);
+	struct sk_buff *(*gen_pdev_get_tpc_table_cmdid)(struct ath10k *ar,
+							u32 param);
+
 };
 
 int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id);
@@ -1445,4 +1448,21 @@ ath10k_wmi_echo(struct ath10k *ar, u32 value)
 	return ath10k_wmi_cmd_send(ar, skb, wmi->cmd->echo_cmdid);
 }
 
+static inline int
+ath10k_wmi_pdev_get_tpc_table_cmdid(struct ath10k *ar, u32 param)
+{
+	struct sk_buff *skb;
+
+	if (!ar->wmi.ops->gen_pdev_get_tpc_table_cmdid)
+		return -EOPNOTSUPP;
+
+	skb = ar->wmi.ops->gen_pdev_get_tpc_table_cmdid(ar, param);
+
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	return ath10k_wmi_cmd_send(ar, skb,
+				   ar->wmi.cmd->pdev_get_tpc_table_cmdid);
+}
+
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 58dc2189ba49..77c6bc671f32 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -196,6 +197,7 @@ static struct wmi_cmd_map wmi_cmd_map = {
 	.mu_cal_start_cmdid = WMI_CMD_UNSUPPORTED,
 	.set_cca_params_cmdid = WMI_CMD_UNSUPPORTED,
 	.pdev_bss_chan_info_request_cmdid = WMI_CMD_UNSUPPORTED,
+	.pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
 };
 
 /* 10.X WMI cmd track */
@@ -362,6 +364,7 @@ static struct wmi_cmd_map wmi_10x_cmd_map = {
 	.mu_cal_start_cmdid = WMI_CMD_UNSUPPORTED,
 	.set_cca_params_cmdid = WMI_CMD_UNSUPPORTED,
 	.pdev_bss_chan_info_request_cmdid = WMI_CMD_UNSUPPORTED,
+	.pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
 };
 
 /* 10.2.4 WMI cmd track */
@@ -528,6 +531,7 @@ static struct wmi_cmd_map wmi_10_2_4_cmd_map = {
 	.set_cca_params_cmdid = WMI_CMD_UNSUPPORTED,
 	.pdev_bss_chan_info_request_cmdid =
 		WMI_10_2_PDEV_BSS_CHAN_INFO_REQUEST_CMDID,
+	.pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
 };
 
 /* 10.4 WMI cmd track */
@@ -1480,6 +1484,7 @@ static struct wmi_cmd_map wmi_10_2_cmd_map = {
 	.pdev_get_ani_cck_config_cmdid = WMI_CMD_UNSUPPORTED,
 	.pdev_get_ani_ofdm_config_cmdid = WMI_CMD_UNSUPPORTED,
 	.pdev_reserve_ast_entry_cmdid = WMI_CMD_UNSUPPORTED,
+	.pdev_get_tpc_table_cmdid = WMI_CMD_UNSUPPORTED,
 };
 
 static struct wmi_pdev_param_map wmi_10_4_pdev_param_map = {
@@ -4313,19 +4318,11 @@ static void ath10k_tpc_config_disp_tables(struct ath10k *ar,
 	}
 }
 
-void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
+void ath10k_wmi_tpc_config_get_rate_code(u8 *rate_code, u16 *pream_table,
+					 u32 num_tx_chain)
 {
-	u32 i, j, pream_idx, num_tx_chain;
-	u8 rate_code[WMI_TPC_RATE_MAX], rate_idx;
-	u16 pream_table[WMI_TPC_PREAM_TABLE_MAX];
-	struct wmi_pdev_tpc_config_event *ev;
-	struct ath10k_tpc_stats *tpc_stats;
-
-	ev = (struct wmi_pdev_tpc_config_event *)skb->data;
-
-	tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
-	if (!tpc_stats)
-		return;
+	u32 i, j, pream_idx;
+	u8 rate_idx;
 
 	/* Create the rate code table based on the chains supported */
 	rate_idx = 0;
@@ -4349,8 +4346,6 @@ void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
 	pream_table[pream_idx] = rate_idx;
 	pream_idx++;
 
-	num_tx_chain = __le32_to_cpu(ev->num_tx_chain);
-
 	/* Fill HT20 rate code */
 	for (i = 0; i < num_tx_chain; i++) {
 		for (j = 0; j < 8; j++) {
@@ -4374,7 +4369,7 @@ void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
 	pream_idx++;
 
 	/* Fill VHT20 rate code */
-	for (i = 0; i < __le32_to_cpu(ev->num_tx_chain); i++) {
+	for (i = 0; i < num_tx_chain; i++) {
 		for (j = 0; j < 10; j++) {
 			rate_code[rate_idx] =
 			ATH10K_HW_RATECODE(j, i, WMI_RATE_PREAMBLE_VHT);
@@ -4418,6 +4413,26 @@ void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
 		ATH10K_HW_RATECODE(0, 0, WMI_RATE_PREAMBLE_OFDM);
 
 	pream_table[pream_idx] = ATH10K_TPC_PREAM_TABLE_END;
+}
+
+void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
+{
+	u32 num_tx_chain;
+	u8 rate_code[WMI_TPC_RATE_MAX];
+	u16 pream_table[WMI_TPC_PREAM_TABLE_MAX];
+	struct wmi_pdev_tpc_config_event *ev;
+	struct ath10k_tpc_stats *tpc_stats;
+
+	ev = (struct wmi_pdev_tpc_config_event *)skb->data;
+
+	tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
+	if (!tpc_stats)
+		return;
+
+	num_tx_chain = __le32_to_cpu(ev->num_tx_chain);
+
+	ath10k_wmi_tpc_config_get_rate_code(rate_code, pream_table,
+					    num_tx_chain);
 
 	tpc_stats->chan_freq = __le32_to_cpu(ev->chan_freq);
 	tpc_stats->phy_mode = __le32_to_cpu(ev->phy_mode);
@@ -4457,6 +4472,246 @@ void ath10k_wmi_event_pdev_tpc_config(struct ath10k *ar, struct sk_buff *skb)
 		   __le32_to_cpu(ev->rate_max));
 }
 
+static u8
+ath10k_wmi_tpc_final_get_rate(struct ath10k *ar,
+			      struct wmi_pdev_tpc_final_table_event *ev,
+			      u32 rate_idx, u32 num_chains,
+			      u32 rate_code, u8 type, u32 pream_idx)
+{
+	u8 tpc, num_streams, preamble, ch, stm_idx;
+	s8 pow_agcdd, pow_agstbc, pow_agtxbf;
+	int pream;
+
+	num_streams = ATH10K_HW_NSS(rate_code);
+	preamble = ATH10K_HW_PREAMBLE(rate_code);
+	ch = num_chains - 1;
+	stm_idx = num_streams - 1;
+	pream = -1;
+
+	if (__le32_to_cpu(ev->chan_freq) <= 2483) {
+		switch (pream_idx) {
+		case WMI_TPC_PREAM_2GHZ_CCK:
+			pream = 0;
+			break;
+		case WMI_TPC_PREAM_2GHZ_OFDM:
+			pream = 1;
+			break;
+		case WMI_TPC_PREAM_2GHZ_HT20:
+		case WMI_TPC_PREAM_2GHZ_VHT20:
+			pream = 2;
+			break;
+		case WMI_TPC_PREAM_2GHZ_HT40:
+		case WMI_TPC_PREAM_2GHZ_VHT40:
+			pream = 3;
+			break;
+		case WMI_TPC_PREAM_2GHZ_VHT80:
+			pream = 4;
+			break;
+		default:
+			pream = -1;
+			break;
+		}
+	}
+
+	if (__le32_to_cpu(ev->chan_freq) >= 5180) {
+		switch (pream_idx) {
+		case WMI_TPC_PREAM_5GHZ_OFDM:
+			pream = 0;
+			break;
+		case WMI_TPC_PREAM_5GHZ_HT20:
+		case WMI_TPC_PREAM_5GHZ_VHT20:
+			pream = 1;
+			break;
+		case WMI_TPC_PREAM_5GHZ_HT40:
+		case WMI_TPC_PREAM_5GHZ_VHT40:
+			pream = 2;
+			break;
+		case WMI_TPC_PREAM_5GHZ_VHT80:
+			pream = 3;
+			break;
+		case WMI_TPC_PREAM_5GHZ_HTCUP:
+			pream = 4;
+			break;
+		default:
+			pream = -1;
+			break;
+		}
+	}
+
+	if (pream == 4)
+		tpc = min_t(u8, ev->rates_array[rate_idx],
+			    ev->max_reg_allow_pow[ch]);
+	else
+		tpc = min_t(u8, min_t(u8, ev->rates_array[rate_idx],
+				      ev->max_reg_allow_pow[ch]),
+			    ev->ctl_power_table[0][pream][stm_idx]);
+
+	if (__le32_to_cpu(ev->num_tx_chain) <= 1)
+		goto out;
+
+	if (preamble == WMI_RATE_PREAMBLE_CCK)
+		goto out;
+
+	if (num_chains <= num_streams)
+		goto out;
+
+	switch (type) {
+	case WMI_TPC_TABLE_TYPE_STBC:
+		pow_agstbc = ev->max_reg_allow_pow_agstbc[ch - 1][stm_idx];
+		if (pream == 4)
+			tpc = min_t(u8, tpc, pow_agstbc);
+		else
+			tpc = min_t(u8, min_t(u8, tpc, pow_agstbc),
+				    ev->ctl_power_table[0][pream][stm_idx]);
+		break;
+	case WMI_TPC_TABLE_TYPE_TXBF:
+		pow_agtxbf = ev->max_reg_allow_pow_agtxbf[ch - 1][stm_idx];
+		if (pream == 4)
+			tpc = min_t(u8, tpc, pow_agtxbf);
+		else
+			tpc = min_t(u8, min_t(u8, tpc, pow_agtxbf),
+				    ev->ctl_power_table[1][pream][stm_idx]);
+		break;
+	case WMI_TPC_TABLE_TYPE_CDD:
+		pow_agcdd = ev->max_reg_allow_pow_agcdd[ch - 1][stm_idx];
+		if (pream == 4)
+			tpc = min_t(u8, tpc, pow_agcdd);
+		else
+			tpc = min_t(u8, min_t(u8, tpc, pow_agcdd),
+				    ev->ctl_power_table[0][pream][stm_idx]);
+		break;
+	default:
+		ath10k_warn(ar, "unknown wmi tpc final table type: %d\n", type);
+		tpc = 0;
+		break;
+	}
+
+out:
+	return tpc;
+}
+
+static void
+ath10k_wmi_tpc_stats_final_disp_tables(struct ath10k *ar,
+				       struct wmi_pdev_tpc_final_table_event *ev,
+				       struct ath10k_tpc_stats_final *tpc_stats,
+				       u8 *rate_code, u16 *pream_table, u8 type)
+{
+	u32 i, j, pream_idx, flags;
+	u8 tpc[WMI_TPC_TX_N_CHAIN];
+	char tpc_value[WMI_TPC_TX_N_CHAIN * WMI_TPC_BUF_SIZE];
+	char buff[WMI_TPC_BUF_SIZE];
+
+	flags = __le32_to_cpu(ev->flags);
+
+	switch (type) {
+	case WMI_TPC_TABLE_TYPE_CDD:
+		if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_CDD)) {
+			ath10k_dbg(ar, ATH10K_DBG_WMI, "CDD not supported\n");
+			tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
+			return;
+		}
+		break;
+	case WMI_TPC_TABLE_TYPE_STBC:
+		if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_STBC)) {
+			ath10k_dbg(ar, ATH10K_DBG_WMI, "STBC not supported\n");
+			tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
+			return;
+		}
+		break;
+	case WMI_TPC_TABLE_TYPE_TXBF:
+		if (!(flags & WMI_TPC_CONFIG_EVENT_FLAG_TABLE_TXBF)) {
+			ath10k_dbg(ar, ATH10K_DBG_WMI, "TXBF not supported\n");
+			tpc_stats->flag[type] = ATH10K_TPC_TABLE_TYPE_FLAG;
+			return;
+		}
+		break;
+	default:
+		ath10k_dbg(ar, ATH10K_DBG_WMI,
+			   "invalid table type in wmi tpc event: %d\n", type);
+		return;
+	}
+
+	pream_idx = 0;
+	for (i = 0; i < __le32_to_cpu(ev->rate_max); i++) {
+		memset(tpc_value, 0, sizeof(tpc_value));
+		memset(buff, 0, sizeof(buff));
+		if (i == pream_table[pream_idx])
+			pream_idx++;
+
+		for (j = 0; j < WMI_TPC_TX_N_CHAIN; j++) {
+			if (j >= __le32_to_cpu(ev->num_tx_chain))
+				break;
+
+			tpc[j] = ath10k_wmi_tpc_final_get_rate(ar, ev, i, j + 1,
+							       rate_code[i],
+							       type, pream_idx);
+			snprintf(buff, sizeof(buff), "%8d ", tpc[j]);
+			strncat(tpc_value, buff, strlen(buff));
+		}
+		tpc_stats->tpc_table_final[type].pream_idx[i] = pream_idx;
+		tpc_stats->tpc_table_final[type].rate_code[i] = rate_code[i];
+		memcpy(tpc_stats->tpc_table_final[type].tpc_value[i],
+		       tpc_value, sizeof(tpc_value));
+	}
+}
+
+void ath10k_wmi_event_tpc_final_table(struct ath10k *ar, struct sk_buff *skb)
+{
+	u32 num_tx_chain;
+	u8 rate_code[WMI_TPC_FINAL_RATE_MAX];
+	u16 pream_table[WMI_TPC_PREAM_TABLE_MAX];
+	struct wmi_pdev_tpc_final_table_event *ev;
+	struct ath10k_tpc_stats_final *tpc_stats;
+
+	ev = (struct wmi_pdev_tpc_final_table_event *)skb->data;
+
+	tpc_stats = kzalloc(sizeof(*tpc_stats), GFP_ATOMIC);
+	if (!tpc_stats)
+		return;
+
+	num_tx_chain = __le32_to_cpu(ev->num_tx_chain);
+
+	ath10k_wmi_tpc_config_get_rate_code(rate_code, pream_table,
+					    num_tx_chain);
+
+	tpc_stats->chan_freq = __le32_to_cpu(ev->chan_freq);
+	tpc_stats->phy_mode = __le32_to_cpu(ev->phy_mode);
+	tpc_stats->ctl = __le32_to_cpu(ev->ctl);
+	tpc_stats->reg_domain = __le32_to_cpu(ev->reg_domain);
+	tpc_stats->twice_antenna_gain = a_sle32_to_cpu(ev->twice_antenna_gain);
+	tpc_stats->twice_antenna_reduction =
+		__le32_to_cpu(ev->twice_antenna_reduction);
+	tpc_stats->power_limit = __le32_to_cpu(ev->power_limit);
+	tpc_stats->twice_max_rd_power = __le32_to_cpu(ev->twice_max_rd_power);
+	tpc_stats->num_tx_chain = __le32_to_cpu(ev->num_tx_chain);
+	tpc_stats->rate_max = __le32_to_cpu(ev->rate_max);
+
+	ath10k_wmi_tpc_stats_final_disp_tables(ar, ev, tpc_stats,
+					       rate_code, pream_table,
+					       WMI_TPC_TABLE_TYPE_CDD);
+	ath10k_wmi_tpc_stats_final_disp_tables(ar, ev,  tpc_stats,
+					       rate_code, pream_table,
+					       WMI_TPC_TABLE_TYPE_STBC);
+	ath10k_wmi_tpc_stats_final_disp_tables(ar, ev, tpc_stats,
+					       rate_code, pream_table,
+					       WMI_TPC_TABLE_TYPE_TXBF);
+
+	ath10k_debug_tpc_stats_final_process(ar, tpc_stats);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi event tpc final table channel %d mode %d ctl %d regd %d gain %d %d limit %d max_power %d tx_chanins %d rates %d\n",
+		   __le32_to_cpu(ev->chan_freq),
+		   __le32_to_cpu(ev->phy_mode),
+		   __le32_to_cpu(ev->ctl),
+		   __le32_to_cpu(ev->reg_domain),
+		   a_sle32_to_cpu(ev->twice_antenna_gain),
+		   __le32_to_cpu(ev->twice_antenna_reduction),
+		   __le32_to_cpu(ev->power_limit),
+		   __le32_to_cpu(ev->twice_max_rd_power) / 2,
+		   __le32_to_cpu(ev->num_tx_chain),
+		   __le32_to_cpu(ev->rate_max));
+}
+
 static void
 ath10k_wmi_handle_tdls_peer_event(struct ath10k *ar, struct sk_buff *skb)
 {
@@ -5549,6 +5804,9 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb)
 	case WMI_10_4_TDLS_PEER_EVENTID:
 		ath10k_wmi_handle_tdls_peer_event(ar, skb);
 		break;
+	case WMI_10_4_PDEV_TPC_TABLE_EVENTID:
+		ath10k_wmi_event_tpc_final_table(ar, skb);
+		break;
 	default:
 		ath10k_warn(ar, "Unknown eventid: %d\n", id);
 		break;
@@ -7989,6 +8247,24 @@ static u32 ath10k_wmi_prepare_peer_qos(u8 uapsd_queues, u8 sp)
 	return peer_qos;
 }
 
+static struct sk_buff *
+ath10k_wmi_10_4_op_gen_pdev_get_tpc_table_cmdid(struct ath10k *ar, u32 param)
+{
+	struct wmi_pdev_get_tpc_table_cmd *cmd;
+	struct sk_buff *skb;
+
+	skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	cmd = (struct wmi_pdev_get_tpc_table_cmd *)skb->data;
+	cmd->param = __cpu_to_le32(param);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi pdev get tpc table param:%d\n", param);
+	return skb;
+}
+
 static struct sk_buff *
 ath10k_wmi_10_4_gen_tdls_peer_update(struct ath10k *ar,
 				     const struct wmi_tdls_peer_update_cmd_arg *arg,
@@ -8430,6 +8706,8 @@ static const struct wmi_ops wmi_10_4_ops = {
 	.ext_resource_config = ath10k_wmi_10_4_ext_resource_config,
 	.gen_update_fw_tdls_state = ath10k_wmi_10_4_gen_update_fw_tdls_state,
 	.gen_tdls_peer_update = ath10k_wmi_10_4_gen_tdls_peer_update,
+	.gen_pdev_get_tpc_table_cmdid =
+			ath10k_wmi_10_4_op_gen_pdev_get_tpc_table_cmdid,
 
 	/* shared with 10.2 */
 	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index c7b30ed9015d..c8fc45d8090e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005-2011 Atheros Communications Inc.
  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -197,6 +198,9 @@ enum wmi_service {
 	WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
 	WMI_SERVICE_MGMT_TX_WMI,
 	WMI_SERVICE_TDLS_WIDER_BANDWIDTH,
+	WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS,
+	WMI_SERVICE_HOST_DFS_CHECK_SUPPORT,
+	WMI_SERVICE_TPC_STATS_FINAL,
 
 	/* keep last */
 	WMI_SERVICE_MAX,
@@ -339,6 +343,9 @@ enum wmi_10_4_service {
 	WMI_10_4_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE,
 	WMI_10_4_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
 	WMI_10_4_SERVICE_TDLS_WIDER_BANDWIDTH,
+	WMI_10_4_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS,
+	WMI_10_4_SERVICE_HOST_DFS_CHECK_SUPPORT,
+	WMI_10_4_SERVICE_TPC_STATS_FINAL,
 };
 
 static inline char *wmi_service_name(int service_id)
@@ -448,6 +455,9 @@ static inline char *wmi_service_name(int service_id)
 	SVCSTR(WMI_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE);
 	SVCSTR(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY);
 	SVCSTR(WMI_SERVICE_TDLS_WIDER_BANDWIDTH);
+	SVCSTR(WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS);
+	SVCSTR(WMI_SERVICE_HOST_DFS_CHECK_SUPPORT);
+	SVCSTR(WMI_SERVICE_TPC_STATS_FINAL);
 	default:
 		return NULL;
 	}
@@ -746,6 +756,12 @@ static inline void wmi_10_4_svc_map(const __le32 *in, unsigned long *out,
 	       WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, len);
 	SVCMAP(WMI_10_4_SERVICE_TDLS_WIDER_BANDWIDTH,
 	       WMI_SERVICE_TDLS_WIDER_BANDWIDTH, len);
+	SVCMAP(WMI_10_4_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS,
+	       WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS, len);
+	SVCMAP(WMI_10_4_SERVICE_HOST_DFS_CHECK_SUPPORT,
+	       WMI_SERVICE_HOST_DFS_CHECK_SUPPORT, len);
+	SVCMAP(WMI_10_4_SERVICE_TPC_STATS_FINAL,
+	       WMI_SERVICE_TPC_STATS_FINAL, len);
 }
 
 #undef SVCMAP
@@ -3993,10 +4009,12 @@ struct wmi_pdev_get_tpc_config_cmd {
 
 #define WMI_TPC_CONFIG_PARAM		1
 #define WMI_TPC_RATE_MAX		160
+#define WMI_TPC_FINAL_RATE_MAX		240
 #define WMI_TPC_TX_N_CHAIN		4
 #define WMI_TPC_PREAM_TABLE_MAX		10
 #define WMI_TPC_FLAG			3
 #define WMI_TPC_BUF_SIZE		10
+#define WMI_TPC_BEAMFORMING		2
 
 enum wmi_tpc_table_type {
 	WMI_TPC_TABLE_TYPE_CDD = 0,
@@ -4039,6 +4057,51 @@ enum wmi_tp_scale {
 	WMI_TP_SCALE_SIZE   = 5,	/* max num of enum     */
 };
 
+struct wmi_pdev_tpc_final_table_event {
+	__le32 reg_domain;
+	__le32 chan_freq;
+	__le32 phy_mode;
+	__le32 twice_antenna_reduction;
+	__le32 twice_max_rd_power;
+	a_sle32 twice_antenna_gain;
+	__le32 power_limit;
+	__le32 rate_max;
+	__le32 num_tx_chain;
+	__le32 ctl;
+	__le32 flags;
+	s8 max_reg_allow_pow[WMI_TPC_TX_N_CHAIN];
+	s8 max_reg_allow_pow_agcdd[WMI_TPC_TX_N_CHAIN][WMI_TPC_TX_N_CHAIN];
+	s8 max_reg_allow_pow_agstbc[WMI_TPC_TX_N_CHAIN][WMI_TPC_TX_N_CHAIN];
+	s8 max_reg_allow_pow_agtxbf[WMI_TPC_TX_N_CHAIN][WMI_TPC_TX_N_CHAIN];
+	u8 rates_array[WMI_TPC_FINAL_RATE_MAX];
+	u8 ctl_power_table[WMI_TPC_BEAMFORMING][WMI_TPC_TX_N_CHAIN]
+	   [WMI_TPC_TX_N_CHAIN];
+} __packed;
+
+struct wmi_pdev_get_tpc_table_cmd {
+	__le32 param;
+} __packed;
+
+enum wmi_tpc_pream_2ghz {
+	WMI_TPC_PREAM_2GHZ_CCK = 0,
+	WMI_TPC_PREAM_2GHZ_OFDM,
+	WMI_TPC_PREAM_2GHZ_HT20,
+	WMI_TPC_PREAM_2GHZ_HT40,
+	WMI_TPC_PREAM_2GHZ_VHT20,
+	WMI_TPC_PREAM_2GHZ_VHT40,
+	WMI_TPC_PREAM_2GHZ_VHT80,
+};
+
+enum wmi_tpc_pream_5ghz {
+	WMI_TPC_PREAM_5GHZ_OFDM = 1,
+	WMI_TPC_PREAM_5GHZ_HT20,
+	WMI_TPC_PREAM_5GHZ_HT40,
+	WMI_TPC_PREAM_5GHZ_VHT20,
+	WMI_TPC_PREAM_5GHZ_VHT40,
+	WMI_TPC_PREAM_5GHZ_VHT80,
+	WMI_TPC_PREAM_5GHZ_HTCUP,
+};
+
 struct wmi_pdev_chanlist_update_event {
 	/* number of channels */
 	__le32 num_chan;
@@ -6979,5 +7042,8 @@ void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar,
 int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar,
 				   enum wmi_vdev_subtype subtype);
 int ath10k_wmi_barrier(struct ath10k *ar);
+void ath10k_wmi_tpc_config_get_rate_code(u8 *rate_code, u16 *pream_table,
+					 u32 num_tx_chain);
+void ath10k_wmi_event_tpc_final_table(struct ath10k *ar, struct sk_buff *skb);
 
 #endif /* _WMI_H_ */

From ba21ac6cdaef2d0dd51bccbd86547b7f34f7bdc1 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Wed, 14 Mar 2018 12:14:10 +0200
Subject: [PATCH 1193/1678] ath: fix false radar detection in JP region

This fixes false radar detection (of radar type 7)
in Japan region by correcting the radar pulse type
to Chirp as per specification.

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/dfs_pattern_detector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c
index 4100ffd42a43..448b83eea810 100644
--- a/drivers/net/wireless/ath/dfs_pattern_detector.c
+++ b/drivers/net/wireless/ath/dfs_pattern_detector.c
@@ -115,7 +115,7 @@ static const struct radar_detector_specs jp_radar_ref_types[] = {
 	JP_PATTERN(4, 0, 5, 150, 230, 1, 23, 50, false),
 	JP_PATTERN(5, 6, 10, 200, 500, 1, 16, 50, false),
 	JP_PATTERN(6, 11, 20, 200, 500, 1, 12, 50, false),
-	JP_PATTERN(7, 50, 100, 1000, 2000, 1, 3, 50, false),
+	JP_PATTERN(7, 50, 100, 1000, 2000, 1, 3, 50, true),
 	JP_PATTERN(5, 0, 1, 333, 333, 1, 9, 50, false),
 };
 

From 6b8a127bf66d395705153fdaf3fc0b52bedd2e9f Mon Sep 17 00:00:00 2001
From: Ramon Fried <rfried@codeaurora.org>
Date: Wed, 14 Mar 2018 12:14:11 +0200
Subject: [PATCH 1194/1678] wcn36xx: reduce verbosity of drivers messages

Whenever the WLAN interface is started the FW
version and caps are printed.
The caps now will be displayed only in debug mode.
Firmware version will be displayed only once on first
startup of the interface.

Change-Id: I4db6ea7f384fe15eebe4c3ddb1d1ccab00094332
Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/main.c    |  3 ++-
 drivers/net/wireless/ath/wcn36xx/smd.c     | 18 ++++++++++--------
 drivers/net/wireless/ath/wcn36xx/wcn36xx.h |  2 ++
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index fcc98d4f9f9e..621e72b6ec99 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -261,7 +261,7 @@ static void wcn36xx_feat_caps_info(struct wcn36xx *wcn)
 
 	for (i = 0; i < MAX_FEATURE_SUPPORTED; i++) {
 		if (get_feat_caps(wcn->fw_feat_caps, i))
-			wcn36xx_info("FW Cap %s\n", wcn36xx_get_cap_name(i));
+			wcn36xx_dbg(WCN36XX_DBG_MAC, "FW Cap %s\n", wcn36xx_get_cap_name(i));
 	}
 }
 
@@ -1280,6 +1280,7 @@ static int wcn36xx_probe(struct platform_device *pdev)
 	wcn = hw->priv;
 	wcn->hw = hw;
 	wcn->dev = &pdev->dev;
+	wcn->first_boot = true;
 	mutex_init(&wcn->conf_mutex);
 	mutex_init(&wcn->hal_mutex);
 	mutex_init(&wcn->scan_lock);
diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 7cc29285e052..def6b23b777f 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -409,15 +409,17 @@ static int wcn36xx_smd_start_rsp(struct wcn36xx *wcn, void *buf, size_t len)
 	wcn->fw_minor = rsp->start_rsp_params.version.minor;
 	wcn->fw_major = rsp->start_rsp_params.version.major;
 
-	wcn36xx_info("firmware WLAN version '%s' and CRM version '%s'\n",
-		     wcn->wlan_version, wcn->crm_version);
-
-	wcn36xx_info("firmware API %u.%u.%u.%u, %u stations, %u bssids\n",
-		     wcn->fw_major, wcn->fw_minor,
-		     wcn->fw_version, wcn->fw_revision,
-		     rsp->start_rsp_params.stations,
-		     rsp->start_rsp_params.bssids);
+	if (wcn->first_boot) {
+		wcn->first_boot = false;
+		wcn36xx_info("firmware WLAN version '%s' and CRM version '%s'\n",
+			     wcn->wlan_version, wcn->crm_version);
 
+		wcn36xx_info("firmware API %u.%u.%u.%u, %u stations, %u bssids\n",
+			     wcn->fw_major, wcn->fw_minor,
+			     wcn->fw_version, wcn->fw_revision,
+			     rsp->start_rsp_params.stations,
+			     rsp->start_rsp_params.bssids);
+	}
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index 81017e6703b4..5854adf43f3a 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -192,6 +192,8 @@ struct wcn36xx {
 	u8			crm_version[WCN36XX_HAL_VERSION_LENGTH + 1];
 	u8			wlan_version[WCN36XX_HAL_VERSION_LENGTH + 1];
 
+	bool		first_boot;
+
 	/* IRQs */
 	int			tx_irq;
 	int			rx_irq;

From 9ef0f58ed7b4a55da4a64641d538e0d9e46579ac Mon Sep 17 00:00:00 2001
From: Carl Huang <cjhuang@codeaurora.org>
Date: Mon, 5 Mar 2018 14:44:02 +0800
Subject: [PATCH 1195/1678] ath10k: fix use-after-free in
 ath10k_wmi_cmd_send_nowait

The skb may be freed in tx completion context before
trace_ath10k_wmi_cmd is called. This can be easily captured when
KASAN(Kernel Address Sanitizer) is enabled. The fix is to move
trace_ath10k_wmi_cmd before the send operation. As the ret has no
meaning in trace_ath10k_wmi_cmd then, so remove this parameter too.

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
Tested-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/trace.h | 12 ++++--------
 drivers/net/wireless/ath/ath10k/wmi.c   |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h
index e40edced1d82..7d2fac342150 100644
--- a/drivers/net/wireless/ath/ath10k/trace.h
+++ b/drivers/net/wireless/ath/ath10k/trace.h
@@ -152,10 +152,9 @@ TRACE_EVENT(ath10k_log_dbg_dump,
 );
 
 TRACE_EVENT(ath10k_wmi_cmd,
-	TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len,
-		 int ret),
+	TP_PROTO(struct ath10k *ar, int id, const void *buf, size_t buf_len),
 
-	TP_ARGS(ar, id, buf, buf_len, ret),
+	TP_ARGS(ar, id, buf, buf_len),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(ar->dev))
@@ -163,7 +162,6 @@ TRACE_EVENT(ath10k_wmi_cmd,
 		__field(unsigned int, id)
 		__field(size_t, buf_len)
 		__dynamic_array(u8, buf, buf_len)
-		__field(int, ret)
 	),
 
 	TP_fast_assign(
@@ -171,17 +169,15 @@ TRACE_EVENT(ath10k_wmi_cmd,
 		__assign_str(driver, dev_driver_string(ar->dev));
 		__entry->id = id;
 		__entry->buf_len = buf_len;
-		__entry->ret = ret;
 		memcpy(__get_dynamic_array(buf), buf, buf_len);
 	),
 
 	TP_printk(
-		"%s %s id %d len %zu ret %d",
+		"%s %s id %d len %zu",
 		__get_str(driver),
 		__get_str(device),
 		__entry->id,
-		__entry->buf_len,
-		__entry->ret
+		__entry->buf_len
 	)
 );
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 77c6bc671f32..9649bb752bbd 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1747,8 +1747,8 @@ int ath10k_wmi_cmd_send_nowait(struct ath10k *ar, struct sk_buff *skb,
 	cmd_hdr->cmd_id = __cpu_to_le32(cmd);
 
 	memset(skb_cb, 0, sizeof(*skb_cb));
+	trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len);
 	ret = ath10k_htc_send(&ar->htc, ar->wmi.eid, skb);
-	trace_ath10k_wmi_cmd(ar, cmd_id, skb->data, skb->len, ret);
 
 	if (ret)
 		goto err_pull;

From 8b2d93dd22615cb7f3046a5a2083a6f8bb8052ed Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Mon, 12 Mar 2018 17:09:40 +0530
Subject: [PATCH 1196/1678] ath10k: Fix kernel panic while using worker
 (ath10k_sta_rc_update_wk)

When attempt to run worker (ath10k_sta_rc_update_wk) after the station object
(ieee80211_sta) delete will trigger the kernel panic.

This problem arise in AP + Mesh configuration, Where the current node AP VAP
and neighbor node mesh VAP MAC address are same. When the current mesh node
try to establish the mesh link with neighbor node, driver peer creation for
the neighbor mesh node fails due to duplication MAC address. Already the AP
VAP created with same MAC address.

It is caused by the following scenario steps.

Steps:
1. In above condition, ath10k driver sta_state callback (ath10k_sta_state)
   fails to do the state change for a station from IEEE80211_STA_NOTEXIST
   to IEEE80211_STA_NONE due to peer creation fails. Sta_state callback is
   called from ieee80211_add_station() to handle the new station
   (neighbor mesh node) request from the wpa_supplicant.
2. Concurrently ath10k receive the sta_rc_update callback notification from
   the mesh_neighbour_update() to handle the beacon frames of the above
   neighbor mesh node. since its atomic callback, ath10k driver queue the
   work (ath10k_sta_rc_update_wk) to handle rc update.
3. Due to driver sta_state callback fails (step 1), mac80211 free the station
   object.
4. When the worker (ath10k_sta_rc_update_wk) scheduled to run, it will access
   the station object which is already deleted. so it will trigger kernel
   panic.

Added the peer exist check in sta_rc_update callback before queue the work.

Kernel Panic log:

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0204000
[00000000] *pgd=00000000
Internal error: Oops: 17 [#1] PREEMPT SMP ARM
CPU: 1 PID: 1833 Comm: kworker/u4:2 Not tainted 3.14.77 #1
task: dcef0000 ti: d72b6000 task.ti: d72b6000
PC is at pwq_activate_delayed_work+0x10/0x40
LR is at pwq_activate_delayed_work+0xc/0x40
pc : [<c023f988>]    lr : [<c023f984>]    psr: 40000193
sp : d72b7f18  ip : 0000007a  fp : d72b6000
r10: 00000000  r9 : dd404414  r8 : d8c31998
r7 : d72b6038  r6 : 00000004  r5 : d4907ec8  r4 : dcee1300
r3 : ffffffe0  r2 : 00000000  r1 : 00000001  r0 : 00000000
Flags: nZcv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 10c5787d  Table: 595bc06a  DAC: 00000015
...
Process kworker/u4:2 (pid: 1833, stack limit = 0xd72b6238)
Stack: (0xd72b7f18 to 0xd72b8000)
7f00:                                                       00000001 dcee1300
7f20: 00000001 c02410dc d8c31980 dd404400 dd404400 c0242790 d8c31980 00000089
7f40: 00000000 d93e1340 00000000 d8c31980 c0242568 00000000 00000000 00000000
7f60: 00000000 c02474dc 00000000 00000000 000000f8 d8c31980 00000000 00000000
7f80: d72b7f80 d72b7f80 00000000 00000000 d72b7f90 d72b7f90 d72b7fac d93e1340
7fa0: c0247404 00000000 00000000 c0208d20 00000000 00000000 00000000 00000000
7fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
7fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[<c023f988>] (pwq_activate_delayed_work) from [<c02410dc>] (pwq_dec_nr_in_flight+0x58/0xc4)
[<c02410dc>] (pwq_dec_nr_in_flight) from [<c0242790>] (worker_thread+0x228/0x360)
[<c0242790>] (worker_thread) from [<c02474dc>] (kthread+0xd8/0xec)
[<c02474dc>] (kthread) from [<c0208d20>] (ret_from_fork+0x14/0x34)
Code: e92d4038 e1a05000 ebffffbc[69210.619376] SMP: failed to stop secondary CPUs
Rebooting in 3 seconds..

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 7e02ca02b28e..02674ee04435 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -7103,10 +7103,20 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw,
 {
 	struct ath10k *ar = hw->priv;
 	struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
+	struct ath10k_vif *arvif = (void *)vif->drv_priv;
+	struct ath10k_peer *peer;
 	u32 bw, smps;
 
 	spin_lock_bh(&ar->data_lock);
 
+	peer = ath10k_peer_find(ar, arvif->vdev_id, sta->addr);
+	if (!peer) {
+		spin_unlock_bh(&ar->data_lock);
+		ath10k_warn(ar, "mac sta rc update failed to find peer %pM on vdev %i\n",
+			    sta->addr, arvif->vdev_id);
+		return;
+	}
+
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
 		   "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n",
 		   sta->addr, changed, sta->bandwidth, sta->rx_nss,

From 221b6ec69ed9c56b6cd9a124a387a9472f14284e Mon Sep 17 00:00:00 2001
From: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Date: Sat, 3 Mar 2018 05:10:44 +0100
Subject: [PATCH 1197/1678] ath9k: fix crash in spectral scan

Fixes crash seen on arm smp systems (gateworks ventana imx6):

Unable to handle kernel NULL pointer dereference at virtual address 00000014
pgd = 80004000
[00000014] *pgd=00000000
Internal error: Oops - BUG: 17 [#1] PREEMPT SMP ARM
Modules linked in: ip6table_filter nf_conntrack_ipv6 ip6_tables nf_log_ipv6 nf_defrag_ipv6 shortcut_fe ipcomp6 xfrm_ipcomp xfrm6_tunnel xfrm6_mode_tunnel xfrm6_mode_transport xfrm6_mode_ro xfrm6_mode_beet ip6_tunnel tunnel6 mip6 ah6 esp6 xfrm_algo sit ip_tunnel tunnel4 ipv6 ath10k_pci ath10k_core ath9k ath mac80211 cfg80211 compat ath_pci ath_hal(P) caamalg authencesn authenc caamrng caamhash caam_jr caam cdc_ncm usbnet usbcore sky2 imx2_wdt
CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: P                4.9.85 #19
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
task: bf064980 task.stack: bf07c000
PC is at relay_buf_full+0xc/0x30
LR is at _674+0x740/0xf10 [ath9k]
pc : [<8018bce0>]    lr : [<7f1aa604>]    psr: 80000013
sp : bf07dbf0  ip : bf07dc00  fp : bf07dbfc
r10: 0000003f  r9 : bf130e00  r8 : 809044b0
r7 : 00000000  r6 : be67a9f0  r5 : 00000000  r4 : 809043e4
r3 : c0864c24  r2 : 00000000  r1 : 00000004  r0 : 00000000
Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
Control: 10c5387d  Table: 4e6a004a  DAC: 00000055
Process ksoftirqd/0 (pid: 3, stack limit = 0xbf07c210)
Stack: (0xbf07dbf0 to 0xbf07e000)
dbe0:                                     bf07dd04 bf07dc00 7f1aa604 8018bce0
dc00: 00004014 be59e010 bf07dc34 bf07dc18 7f1a7084 7f19c07c be59c010 be6470a0
dc20: 0000096c be648954 bf07dc6c bf07dc38 7f1c286c bf07dd90 bf07dc5c bf07dc48
dc40: 8029ea4c 0000003c 00000001 be59c010 00000094 00000000 00000000 00000000
dc60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
dc80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
dca0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
dcc0: 00000000 00000000 00000000 00000000 00000000 00000000 8010ef24 00000030
dce0: be94f5e8 be6485a0 bddf0200 be59c010 be6465a0 be6415a0 bf07ddf4 bf07dd08
dd00: 7f1cf800 7f1aa55c 1fc38c4c 00000000 bf07dd58 cccccccd 66666667 be640bc0
dd20: bf07dd54 be6415a0 1fc38c4c 00000000 00000000 be59c038 be67a9c0 be59e010
dd40: be67a9f0 be647170 8090c904 be59c010 00000000 00000001 1fc38e84 00000000
dd60: be640bc0 bddf0200 00000200 00000010 0000003f 00000002 20000013 be59c010
dd80: 8092d940 bf7ca2c0 bf07ddb4 bf07dd98 1fc38c4c 2602003f 0100ff1b 80ff1b00
dda0: 00808080 00000000 00000000 80808080 80808080 80808080 80808080 00008080
ddc0: 00000000 00000000 7f1b62b8 00000002 be6470ec be6470f0 00000000 bf07de98
dde0: 8092d940 be6415a0 bf07de94 bf07ddf8 7f1d1ed8 7f1cf1fc 00000000 00000000
de00: bf7cc4c0 00000400 be6470f0 bf07de18 8015165c be59c010 8090453c 8090453c
de20: bf07dec4 be6465a0 8014f614 80148884 0000619a 00000001 bf07c000 00000100
de40: bf07de78 00000001 7f327850 00000002 afb50401 bf064980 bf07de9c bf07de68
de60: bf064a00 803cc668 bf064a00 be6470b4 be6470b8 80844180 00000000 bf07de98
de80: 8092d940 bf07c000 bf07dec4 bf07de98 80124d18 7f1d1c44 80124c94 00000000
dea0: 00000006 80902098 80902080 40000006 00000100 bf07c000 bf07df24 bf07dec8
dec0: 8012501c 80124ca0 bf7cc4c0 bf064980 be95e1c0 04208040 80902d00 000061c7
dee0: 0000000a 80600b54 8092d940 808441f8 80902080 bf07dec8 bf03b200 bf07c000
df00: bf03b200 8090fe54 00000000 00000000 00000000 00000000 bf07df34 bf07df28
df20: 80125148 80124f28 bf07df5c bf07df38 8013deb4 8012511c 00000000 bf03b240
df40: bf03b200 8013dc90 00000000 00000000 bf07dfac bf07df60 8013ad40 8013dc9c
df60: 70448040 00000001 00000000 bf03b200 00000000 00030003 bf07df78 bf07df78
df80: 00000000 00000000 bf07df88 bf07df88 bf03b240 8013ac48 00000000 00000000
dfa0: 00000000 bf07dfb0 80107760 8013ac54 00000000 00000000 00000000 00000000
dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 8c120004 1190ad04
Backtrace:
[<8018bcd4>] (relay_buf_full) from [<7f1aa604>] (_674+0x740/0xf10 [ath9k])
[<7f1aa550>] (_674 [ath9k]) from [<7f1cf800>] (_582+0x14b4/0x3708 [ath9k])
 r10:be6415a0 r9:be6465a0 r8:be59c010 r7:bddf0200 r6:be6485a0 r5:be94f5e8
 r4:00000030
[<7f1cf1f0>] (_582 [ath9k]) from [<7f1d1ed8>] (_735+0x2a0/0xec4 [ath9k])
 r10:be6415a0 r9:8092d940 r8:bf07de98 r7:00000000 r6:be6470f0 r5:be6470ec
 r4:00000002
[<7f1d1c38>] (_735 [ath9k]) from [<80124d18>] (tasklet_action+0x84/0xf8)
 r10:bf07c000 r9:8092d940 r8:bf07de98 r7:00000000 r6:80844180 r5:be6470b8
 r4:be6470b4
[<80124c94>] (tasklet_action) from [<8012501c>] (__do_softirq+0x100/0x1f4)
 r10:bf07c000 r9:00000100 r8:40000006 r7:80902080 r6:80902098 r5:00000006
 r4:00000000 r3:80124c94
[<80124f1c>] (__do_softirq) from [<80125148>] (run_ksoftirqd+0x38/0x4c)
 r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:8090fe54 r5:bf03b200
 r4:bf07c000
[<80125110>] (run_ksoftirqd) from [<8013deb4>] (smpboot_thread_fn+0x224/0x260)
[<8013dc90>] (smpboot_thread_fn) from [<8013ad40>] (kthread+0xf8/0x100)
 r9:00000000 r8:00000000 r7:8013dc90 r6:bf03b200 r5:bf03b240 r4:00000000
[<8013ac48>] (kthread) from [<80107760>] (ret_from_fork+0x14/0x34)
 r7:00000000 r6:00000000 r5:8013ac48 r4:bf03b240
Code: e89da800 e1a0c00d e92dd800 e24cb004 (e5901014)
---[ end trace dddf11ac9111b272 ]---
Kernel panic - not syncing: Fatal exception in interrupt
CPU1: stopping
CPU: 1 PID: 0 Comm: swapper/1 Tainted: P      D         4.9.85 #19
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<8010a708>] (dump_backtrace) from [<8010a99c>] (show_stack+0x18/0x1c)
 r7:bf093f58 r6:20000193 r5:809168e8 r4:00000000
[<8010a984>] (show_stack) from [<802a09c4>] (dump_stack+0x94/0xa8)
[<802a0930>] (dump_stack) from [<8010d184>] (handle_IPI+0xe8/0x180)
 r7:bf093f58 r6:00000000 r5:00000001 r4:808478c4
[<8010d09c>] (handle_IPI) from [<801013e8>] (gic_handle_irq+0x78/0x7c)
 r7:f4000100 r6:bf093f58 r5:f400010c r4:8090467c
[<80101370>] (gic_handle_irq) from [<8010b378>] (__irq_svc+0x58/0x8c)
Exception stack(0xbf093f58 to 0xbf093fa0)
3f40:                                                       bf7d62a0 00000000
3f60: 0010a5f4 80113460 bf092000 809043e4 00000002 80904434 bf092008 412fc09a
3f80: 00000000 bf093fb4 bf093fb8 bf093fa8 8010804c 80108050 60000013 ffffffff
 r9:bf092000 r8:bf092008 r7:bf093f8c r6:ffffffff r5:60000013 r4:80108050
[<80108014>] (arch_cpu_idle) from [<80553c2c>] (default_idle_call+0x30/0x34)
[<80553bfc>] (default_idle_call) from [<80158394>] (cpu_startup_entry+0xc4/0xfc)
[<801582d0>] (cpu_startup_entry) from [<8010ce40>] (secondary_start_kernel+0x168/0x174)
 r7:8092d2f8 r4:80913568
[<8010ccd8>] (secondary_start_kernel) from [<10101488>] (0x10101488)
 r5:00000055 r4:4f07806a
Rebooting in 10 seconds..
Reboot failed -- System halted

Signed-off-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/common-spectral.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c
index 5e77fe1f5b0d..a41bcbda1d9e 100644
--- a/drivers/net/wireless/ath/ath9k/common-spectral.c
+++ b/drivers/net/wireless/ath/ath9k/common-spectral.c
@@ -479,14 +479,16 @@ ath_cmn_is_fft_buf_full(struct ath_spec_scan_priv *spec_priv)
 {
 	int i = 0;
 	int ret = 0;
+	struct rchan_buf *buf;
 	struct rchan *rc = spec_priv->rfs_chan_spec_scan;
 
-	for_each_online_cpu(i)
-		ret += relay_buf_full(*per_cpu_ptr(rc->buf, i));
+	for_each_possible_cpu(i) {
+		if ((buf = *per_cpu_ptr(rc->buf, i))) {
+			ret += relay_buf_full(buf);
+		}
+	}
 
-	i = num_online_cpus();
-
-	if (ret == i)
+	if (ret)
 		return 1;
 	else
 		return 0;

From 1fb148f51e6ceb367e3b5e1fccbd3756bf48e9fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Gro=C3=9Fe?= <pegro@friiks.de>
Date: Tue, 6 Mar 2018 15:57:18 +0100
Subject: [PATCH 1198/1678] ath9k: spelling s/premble/preamble/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Peter Große <pegro@friiks.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/common-init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/common-init.c b/drivers/net/wireless/ath/ath9k/common-init.c
index 8b4f7fdabf58..82de0fadbc95 100644
--- a/drivers/net/wireless/ath/ath9k/common-init.c
+++ b/drivers/net/wireless/ath/ath9k/common-init.c
@@ -88,7 +88,7 @@ static const struct ieee80211_channel ath9k_5ghz_chantable[] = {
 	CHAN5G(5825, 37), /* Channel 165 */
 };
 
-/* Atheros hardware rate code addition for short premble */
+/* Atheros hardware rate code addition for short preamble */
 #define SHPCHECK(__hw_rate, __flags) \
 	((__flags & IEEE80211_RATE_SHORT_PREAMBLE) ? (__hw_rate | 0x04 ) : 0)
 

From 6ced7958168fa77bdf9cca2ee8c7dcef5965a5cf Mon Sep 17 00:00:00 2001
From: Ramon Fried <rfried@codeaurora.org>
Date: Sun, 4 Mar 2018 18:31:34 +0200
Subject: [PATCH 1199/1678] wcn36xx: calculate DXE control registers values

DXE descriptor control registers used hardcoded magic values.  Added bit
definitions of the control register and calculate this values in compilation
for clarity. No functional changes.

Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/dxe.c |   6 +-
 drivers/net/wireless/ath/wcn36xx/dxe.h | 112 ++++++++++++++++++++++---
 2 files changed, 103 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
index a3f1f7d042a4..8e4d6d9ea277 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
@@ -376,7 +376,7 @@ static void reap_tx_dxes(struct wcn36xx *wcn, struct wcn36xx_dxe_ch *ch)
 	spin_lock_irqsave(&ch->lock, flags);
 	ctl = ch->tail_blk_ctl;
 	do {
-		if (ctl->desc->ctrl & WCN36XX_DXE_CTRL_VALID_MASK)
+		if (ctl->desc->ctrl & WCN36xx_DXE_CTRL_VLD)
 			break;
 		if (ctl->skb) {
 			dma_unmap_single(wcn->dev, ctl->desc->src_addr_l,
@@ -397,7 +397,7 @@ static void reap_tx_dxes(struct wcn36xx *wcn, struct wcn36xx_dxe_ch *ch)
 		}
 		ctl = ctl->next;
 	} while (ctl != ch->head_blk_ctl &&
-	       !(ctl->desc->ctrl & WCN36XX_DXE_CTRL_VALID_MASK));
+	       !(ctl->desc->ctrl & WCN36xx_DXE_CTRL_VLD));
 
 	ch->tail_blk_ctl = ctl;
 	spin_unlock_irqrestore(&ch->lock, flags);
@@ -503,7 +503,7 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn,
 		int_mask = WCN36XX_DXE_INT_CH3_MASK;
 	}
 
-	while (!(dxe->ctrl & WCN36XX_DXE_CTRL_VALID_MASK)) {
+	while (!(dxe->ctrl & WCN36xx_DXE_CTRL_VLD)) {
 		skb = ctl->skb;
 		dma_addr = dxe->dst_addr_l;
 		ret = wcn36xx_dxe_fill_skb(wcn->dev, ctl);
diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.h b/drivers/net/wireless/ath/wcn36xx/dxe.h
index c012e807753b..73a14953920d 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.h
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.h
@@ -33,15 +33,106 @@ H2H_TEST_RX_TX = DMA2
 #define WCN36XX_CCU_DXE_INT_SELECT_RIVA		0x310
 #define WCN36XX_CCU_DXE_INT_SELECT_PRONTO	0x10dc
 
-/* TODO This must calculated properly but not hardcoded */
-#define WCN36XX_DXE_CTRL_TX_L			0x328a44
-#define WCN36XX_DXE_CTRL_TX_H			0x32ce44
-#define WCN36XX_DXE_CTRL_RX_L			0x12ad2f
-#define WCN36XX_DXE_CTRL_RX_H			0x12d12f
-#define WCN36XX_DXE_CTRL_TX_H_BD		0x30ce45
-#define WCN36XX_DXE_CTRL_TX_H_SKB		0x32ce4d
-#define WCN36XX_DXE_CTRL_TX_L_BD		0x308a45
-#define WCN36XX_DXE_CTRL_TX_L_SKB		0x328a4d
+/* Descriptor valid */
+#define WCN36xx_DXE_CTRL_VLD		BIT(0)
+/* End of packet */
+#define WCN36xx_DXE_CTRL_EOP		BIT(3)
+/* BD handling bit */
+#define WCN36xx_DXE_CTRL_BDH		BIT(4)
+/* Source is a queue */
+#define WCN36xx_DXE_CTRL_SIQ		BIT(5)
+/* Destination is a queue */
+#define WCN36xx_DXE_CTRL_DIQ		BIT(6)
+/* Pointer address is a queue */
+#define WCN36xx_DXE_CTRL_PIQ		BIT(7)
+/* Release PDU when done */
+#define WCN36xx_DXE_CTRL_PDU_REL	BIT(8)
+/* STOP channel processing */
+#define WCN36xx_DXE_CTRL_STOP		BIT(16)
+/* INT on descriptor done */
+#define WCN36xx_DXE_CTRL_INT		BIT(17)
+/* Endian byte swap enable */
+#define WCN36xx_DXE_CTRL_SWAP		BIT(20)
+/* Master endianness */
+#define WCN36xx_DXE_CTRL_ENDIANNESS	BIT(21)
+
+/* Transfer type */
+#define WCN36xx_DXE_CTRL_XTYPE_SHIFT 1
+#define WCN36xx_DXE_CTRL_XTYPE_MASK GENMASK(2, WCN36xx_DXE_CTRL_XTYPE_SHIFT)
+#define WCN36xx_DXE_CTRL_XTYPE_SET(x)	((x) << WCN36xx_DXE_CTRL_XTYPE_SHIFT)
+
+/* BMU Threshold select */
+#define WCN36xx_DXE_CTRL_BTHLD_SEL_SHIFT 9
+#define WCN36xx_DXE_CTRL_BTHLD_SEL_MASK GENMASK(12, WCN36xx_DXE_CTRL_BTHLD_SEL_SHIFT)
+#define WCN36xx_DXE_CTRL_BTHLD_SEL_SET(x) ((x) << WCN36xx_DXE_CTRL_BTHLD_SEL_SHIFT)
+
+/* Priority */
+#define WCN36xx_DXE_CTRL_PRIO_SHIFT 13
+#define WCN36xx_DXE_CTRL_PRIO_MASK GENMASK(15, WCN36xx_DXE_CTRL_PRIO_SHIFT)
+#define WCN36xx_DXE_CTRL_PRIO_SET(x) ((x) << WCN36xx_DXE_CTRL_PRIO_SHIFT)
+
+/* BD Template index */
+#define WCN36xx_DXE_CTRL_BDT_IDX_SHIFT 18
+#define WCN36xx_DXE_CTRL_BDT_IDX_MASK GENMASK(19, WCN36xx_DXE_CTRL_BDT_IDX_SHIFT)
+#define WCN36xx_DXE_CTRL_BDT_IDX_SET(x) ((x) << WCN36xx_DXE_CTRL_BDT_IDX_SHIFT)
+
+/* Transfer types: */
+/* Host to host */
+#define WCN36xx_DXE_XTYPE_H2H (0)
+/* Host to BMU */
+#define WCN36xx_DXE_XTYPE_H2B (2)
+/* BMU to host */
+#define WCN36xx_DXE_XTYPE_B2H (3)
+
+#define WCN36XX_DXE_CTRL_TX_L	(WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+	WCN36xx_DXE_CTRL_DIQ | WCN36xx_DXE_CTRL_BTHLD_SEL_SET(5) | \
+	WCN36xx_DXE_CTRL_PRIO_SET(4) | WCN36xx_DXE_CTRL_INT | \
+	WCN36xx_DXE_CTRL_SWAP | WCN36xx_DXE_CTRL_ENDIANNESS)
+
+#define WCN36XX_DXE_CTRL_TX_H	 (WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+	WCN36xx_DXE_CTRL_DIQ | WCN36xx_DXE_CTRL_BTHLD_SEL_SET(7) | \
+	WCN36xx_DXE_CTRL_PRIO_SET(6) | WCN36xx_DXE_CTRL_INT | \
+	WCN36xx_DXE_CTRL_SWAP | WCN36xx_DXE_CTRL_ENDIANNESS)
+
+#define WCN36XX_DXE_CTRL_RX_L	(WCN36xx_DXE_CTRL_VLD | \
+	WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_B2H) | \
+	WCN36xx_DXE_CTRL_EOP | WCN36xx_DXE_CTRL_SIQ | \
+	WCN36xx_DXE_CTRL_PDU_REL | WCN36xx_DXE_CTRL_BTHLD_SEL_SET(6) | \
+	WCN36xx_DXE_CTRL_PRIO_SET(5) | WCN36xx_DXE_CTRL_INT | \
+	WCN36xx_DXE_CTRL_SWAP)
+
+#define WCN36XX_DXE_CTRL_RX_H	(WCN36xx_DXE_CTRL_VLD | \
+	WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_B2H) | \
+	WCN36xx_DXE_CTRL_EOP | WCN36xx_DXE_CTRL_SIQ | \
+	WCN36xx_DXE_CTRL_PDU_REL |  WCN36xx_DXE_CTRL_BTHLD_SEL_SET(8) | \
+	WCN36xx_DXE_CTRL_PRIO_SET(6) | WCN36xx_DXE_CTRL_INT | \
+	WCN36xx_DXE_CTRL_SWAP)
+
+#define WCN36XX_DXE_CTRL_TX_H_BD	(WCN36xx_DXE_CTRL_VLD | \
+	WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+	WCN36xx_DXE_CTRL_DIQ | WCN36xx_DXE_CTRL_BTHLD_SEL_SET(7) | \
+	WCN36xx_DXE_CTRL_PRIO_SET(6) | WCN36xx_DXE_CTRL_SWAP | \
+	WCN36xx_DXE_CTRL_ENDIANNESS)
+
+#define WCN36XX_DXE_CTRL_TX_H_SKB	(WCN36xx_DXE_CTRL_VLD | \
+	WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+	WCN36xx_DXE_CTRL_EOP | WCN36xx_DXE_CTRL_DIQ | \
+	WCN36xx_DXE_CTRL_BTHLD_SEL_SET(7) | WCN36xx_DXE_CTRL_PRIO_SET(6) | \
+	WCN36xx_DXE_CTRL_INT | WCN36xx_DXE_CTRL_SWAP | \
+	WCN36xx_DXE_CTRL_ENDIANNESS)
+
+#define WCN36XX_DXE_CTRL_TX_L_BD	 (WCN36xx_DXE_CTRL_VLD | \
+	WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+	WCN36xx_DXE_CTRL_DIQ | WCN36xx_DXE_CTRL_BTHLD_SEL_SET(5) | \
+	WCN36xx_DXE_CTRL_PRIO_SET(4) | WCN36xx_DXE_CTRL_SWAP | \
+	WCN36xx_DXE_CTRL_ENDIANNESS)
+
+#define WCN36XX_DXE_CTRL_TX_L_SKB	(WCN36xx_DXE_CTRL_VLD | \
+	WCN36xx_DXE_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+	WCN36xx_DXE_CTRL_EOP | WCN36xx_DXE_CTRL_DIQ | \
+	WCN36xx_DXE_CTRL_BTHLD_SEL_SET(5) | WCN36xx_DXE_CTRL_PRIO_SET(4) | \
+	WCN36xx_DXE_CTRL_INT | WCN36xx_DXE_CTRL_SWAP | \
+	WCN36xx_DXE_CTRL_ENDIANNESS)
 
 /* TODO This must calculated properly but not hardcoded */
 #define WCN36XX_DXE_WQ_TX_L			0x17
@@ -49,9 +140,6 @@ H2H_TEST_RX_TX = DMA2
 #define WCN36XX_DXE_WQ_RX_L			0xB
 #define WCN36XX_DXE_WQ_RX_H			0x4
 
-/* DXE descriptor control filed */
-#define WCN36XX_DXE_CTRL_VALID_MASK (0x00000001)
-
 /* TODO This must calculated properly but not hardcoded */
 /* DXE default control register values */
 #define WCN36XX_DXE_CH_DEFAULT_CTL_RX_L		0x847EAD2F

From e5d04670904ffe21591573e1aa76a0158f939241 Mon Sep 17 00:00:00 2001
From: Ramon Fried <rfried@codeaurora.org>
Date: Sun, 11 Mar 2018 14:01:18 +0200
Subject: [PATCH 1200/1678] wcn36xx: calculate DXE default channel values

DXE channel defaults used hardcoded magic values.
Added bit definitions of the control register and
calculate this values in compilation for clarity.

Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/dxe.h | 104 +++++++++++++++++++++++--
 1 file changed, 99 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.h b/drivers/net/wireless/ath/wcn36xx/dxe.h
index 73a14953920d..feb3cb7ee81f 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.h
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.h
@@ -140,12 +140,106 @@ H2H_TEST_RX_TX = DMA2
 #define WCN36XX_DXE_WQ_RX_L			0xB
 #define WCN36XX_DXE_WQ_RX_H			0x4
 
-/* TODO This must calculated properly but not hardcoded */
+/* Channel enable or restart */
+#define WCN36xx_DXE_CH_CTRL_EN			BIT(0)
+/* End of packet bit */
+#define WCN36xx_DXE_CH_CTRL_EOP			BIT(3)
+/* BD Handling bit */
+#define WCN36xx_DXE_CH_CTRL_BDH			BIT(4)
+/* Source is queue */
+#define WCN36xx_DXE_CH_CTRL_SIQ			BIT(5)
+/* Destination is queue */
+#define WCN36xx_DXE_CH_CTRL_DIQ			BIT(6)
+/* Pointer descriptor is queue */
+#define WCN36xx_DXE_CH_CTRL_PIQ			BIT(7)
+/* Relase PDU when done */
+#define WCN36xx_DXE_CH_CTRL_PDU_REL		BIT(8)
+/* Stop channel processing */
+#define WCN36xx_DXE_CH_CTRL_STOP		BIT(16)
+/* Enable external descriptor interrupt */
+#define WCN36xx_DXE_CH_CTRL_INE_ED		BIT(17)
+/* Enable channel interrupt on errors */
+#define WCN36xx_DXE_CH_CTRL_INE_ERR		BIT(18)
+/* Enable Channel interrupt when done */
+#define WCN36xx_DXE_CH_CTRL_INE_DONE	BIT(19)
+/* External descriptor enable */
+#define WCN36xx_DXE_CH_CTRL_EDEN		BIT(20)
+/* Wait for valid bit */
+#define WCN36xx_DXE_CH_CTRL_EDVEN		BIT(21)
+/* Endianness is little endian*/
+#define WCN36xx_DXE_CH_CTRL_ENDIANNESS	BIT(26)
+/* Abort transfer */
+#define WCN36xx_DXE_CH_CTRL_ABORT		BIT(27)
+/* Long descriptor format */
+#define WCN36xx_DXE_CH_CTRL_DFMT		BIT(28)
+/* Endian byte swap enable */
+#define WCN36xx_DXE_CH_CTRL_SWAP		BIT(31)
+
+/* Transfer type */
+#define WCN36xx_DXE_CH_CTRL_XTYPE_SHIFT 1
+#define WCN36xx_DXE_CH_CTRL_XTYPE_MASK GENMASK(2, WCN36xx_DXE_CH_CTRL_XTYPE_SHIFT)
+#define WCN36xx_DXE_CH_CTRL_XTYPE_SET(x)	((x) << WCN36xx_DXE_CH_CTRL_XTYPE_SHIFT)
+
+/* Channel BMU Threshold select */
+#define WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SHIFT 9
+#define WCN36xx_DXE_CH_CTRL_BTHLD_SEL_MASK GENMASK(12, WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SHIFT)
+#define WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SET(x) ((x) << WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SHIFT)
+
+/* Channel Priority */
+#define WCN36xx_DXE_CH_CTRL_PRIO_SHIFT 13
+#define WCN36xx_DXE_CH_CTRL_PRIO_MASK GENMASK(15, WCN36xx_DXE_CH_CTRL_PRIO_SHIFT)
+#define WCN36xx_DXE_CH_CTRL_PRIO_SET(x) ((x) << WCN36xx_DXE_CH_CTRL_PRIO_SHIFT)
+
+/* Counter select */
+#define WCN36xx_DXE_CH_CTRL_SEL_SHIFT 22
+#define WCN36xx_DXE_CH_CTRL_SEL_MASK GENMASK(25, WCN36xx_DXE_CH_CTRL_SEL_SHIFT)
+#define WCN36xx_DXE_CH_CTRL_SEL_SET(x)	((x) << WCN36xx_DXE_CH_CTRL_SEL_SHIFT)
+
+/* Channel BD template index */
+#define WCN36xx_DXE_CH_CTRL_BDT_IDX_SHIFT 29
+#define WCN36xx_DXE_CH_CTRL_BDT_IDX_MASK GENMASK(30, WCN36xx_DXE_CH_CTRL_BDT_IDX_SHIFT)
+#define WCN36xx_DXE_CH_CTRL_BDT_IDX_SET(x)	((x) << WCN36xx_DXE_CH_CTRL_BDT_IDX_SHIFT)
+
 /* DXE default control register values */
-#define WCN36XX_DXE_CH_DEFAULT_CTL_RX_L		0x847EAD2F
-#define WCN36XX_DXE_CH_DEFAULT_CTL_RX_H		0x84FED12F
-#define WCN36XX_DXE_CH_DEFAULT_CTL_TX_H		0x853ECF4D
-#define WCN36XX_DXE_CH_DEFAULT_CTL_TX_L		0x843e8b4d
+#define WCN36XX_DXE_CH_DEFAULT_CTL_RX_L (WCN36xx_DXE_CH_CTRL_EN | \
+		WCN36xx_DXE_CH_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_B2H) | \
+		WCN36xx_DXE_CH_CTRL_EOP | WCN36xx_DXE_CH_CTRL_SIQ | \
+		WCN36xx_DXE_CH_CTRL_PDU_REL | WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SET(6) | \
+		WCN36xx_DXE_CH_CTRL_PRIO_SET(5) | WCN36xx_DXE_CH_CTRL_INE_ED | \
+		WCN36xx_DXE_CH_CTRL_INE_ERR | WCN36xx_DXE_CH_CTRL_INE_DONE | \
+		WCN36xx_DXE_CH_CTRL_EDEN | WCN36xx_DXE_CH_CTRL_EDVEN | \
+		WCN36xx_DXE_CH_CTRL_SEL_SET(1) | WCN36xx_DXE_CH_CTRL_ENDIANNESS | \
+		WCN36xx_DXE_CH_CTRL_SWAP)
+
+#define WCN36XX_DXE_CH_DEFAULT_CTL_RX_H (WCN36xx_DXE_CH_CTRL_EN | \
+		WCN36xx_DXE_CH_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_B2H) | \
+		WCN36xx_DXE_CH_CTRL_EOP | WCN36xx_DXE_CH_CTRL_SIQ | \
+		WCN36xx_DXE_CH_CTRL_PDU_REL | WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SET(8) | \
+		WCN36xx_DXE_CH_CTRL_PRIO_SET(6) | WCN36xx_DXE_CH_CTRL_INE_ED | \
+		WCN36xx_DXE_CH_CTRL_INE_ERR | WCN36xx_DXE_CH_CTRL_INE_DONE | \
+		WCN36xx_DXE_CH_CTRL_EDEN | WCN36xx_DXE_CH_CTRL_EDVEN | \
+		WCN36xx_DXE_CH_CTRL_SEL_SET(3) | WCN36xx_DXE_CH_CTRL_ENDIANNESS | \
+		WCN36xx_DXE_CH_CTRL_SWAP)
+
+#define WCN36XX_DXE_CH_DEFAULT_CTL_TX_H	(WCN36xx_DXE_CH_CTRL_EN | \
+		WCN36xx_DXE_CH_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+		WCN36xx_DXE_CH_CTRL_EOP | WCN36xx_DXE_CH_CTRL_DIQ | \
+		WCN36xx_DXE_CH_CTRL_PDU_REL | WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SET(7) | \
+		WCN36xx_DXE_CH_CTRL_PRIO_SET(6) | WCN36xx_DXE_CH_CTRL_INE_ED | \
+		WCN36xx_DXE_CH_CTRL_INE_ERR | WCN36xx_DXE_CH_CTRL_INE_DONE | \
+		WCN36xx_DXE_CH_CTRL_EDEN | WCN36xx_DXE_CH_CTRL_EDVEN | \
+		WCN36xx_DXE_CH_CTRL_SEL_SET(4) | WCN36xx_DXE_CH_CTRL_ENDIANNESS | \
+		WCN36xx_DXE_CH_CTRL_SWAP)
+
+#define WCN36XX_DXE_CH_DEFAULT_CTL_TX_L (WCN36xx_DXE_CH_CTRL_EN | \
+		WCN36xx_DXE_CH_CTRL_XTYPE_SET(WCN36xx_DXE_XTYPE_H2B) | \
+		WCN36xx_DXE_CH_CTRL_EOP | WCN36xx_DXE_CH_CTRL_DIQ | \
+		WCN36xx_DXE_CH_CTRL_PDU_REL | WCN36xx_DXE_CH_CTRL_BTHLD_SEL_SET(5) | \
+		WCN36xx_DXE_CH_CTRL_PRIO_SET(4) | WCN36xx_DXE_CH_CTRL_INE_ED | \
+		WCN36xx_DXE_CH_CTRL_INE_ERR | WCN36xx_DXE_CH_CTRL_INE_DONE | \
+		WCN36xx_DXE_CH_CTRL_EDEN | WCN36xx_DXE_CH_CTRL_EDVEN | \
+		WCN36xx_DXE_CH_CTRL_SEL_SET(0) | WCN36xx_DXE_CH_CTRL_ENDIANNESS | \
+		WCN36xx_DXE_CH_CTRL_SWAP)
 
 /* Common DXE registers */
 #define WCN36XX_DXE_MEM_CSR			(WCN36XX_DXE_MEM_REG + 0x00)

From 6767b302e1c961f0775abaa068495aa137331191 Mon Sep 17 00:00:00 2001
From: Ramon Fried <rfried@codeaurora.org>
Date: Sun, 11 Mar 2018 14:01:43 +0200
Subject: [PATCH 1201/1678] wcn36xx: Check DXE IRQ reason

IRQ reason was not cheked for errors.
Although error handing is not currently supported, it
will be nice to output an error value to the log if the
DMA operation failed.

Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/dxe.c | 50 ++++++++++++++++++++++----
 drivers/net/wireless/ath/wcn36xx/dxe.h |  4 +++
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
index 8e4d6d9ea277..7d5ecaf02288 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
@@ -415,14 +415,31 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev)
 					  WCN36XX_DXE_CH_STATUS_REG_ADDR_TX_H,
 					  &int_reason);
 
-		/* TODO: Check int_reason */
-
 		wcn36xx_dxe_write_register(wcn,
 					   WCN36XX_DXE_0_INT_CLR,
 					   WCN36XX_INT_MASK_CHAN_TX_H);
 
-		wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_0_INT_ED_CLR,
-					   WCN36XX_INT_MASK_CHAN_TX_H);
+		if (int_reason & WCN36XX_CH_STAT_INT_ERR_MASK ) {
+			wcn36xx_dxe_write_register(wcn,
+						   WCN36XX_DXE_0_INT_ERR_CLR,
+						   WCN36XX_INT_MASK_CHAN_TX_H);
+
+			wcn36xx_err("DXE IRQ reported error: 0x%x in high TX channel\n",
+					int_src);
+		}
+
+		if (int_reason & WCN36XX_CH_STAT_INT_DONE_MASK) {
+			wcn36xx_dxe_write_register(wcn,
+						   WCN36XX_DXE_0_INT_DONE_CLR,
+						   WCN36XX_INT_MASK_CHAN_TX_H);
+		}
+
+		if (int_reason & WCN36XX_CH_STAT_INT_ED_MASK) {
+			wcn36xx_dxe_write_register(wcn,
+						   WCN36XX_DXE_0_INT_ED_CLR,
+						   WCN36XX_INT_MASK_CHAN_TX_H);
+		}
+
 		wcn36xx_dbg(WCN36XX_DBG_DXE, "dxe tx ready high\n");
 		reap_tx_dxes(wcn, &wcn->dxe_tx_h_ch);
 	}
@@ -431,14 +448,33 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev)
 		wcn36xx_dxe_read_register(wcn,
 					  WCN36XX_DXE_CH_STATUS_REG_ADDR_TX_L,
 					  &int_reason);
-		/* TODO: Check int_reason */
 
 		wcn36xx_dxe_write_register(wcn,
 					   WCN36XX_DXE_0_INT_CLR,
 					   WCN36XX_INT_MASK_CHAN_TX_L);
 
-		wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_0_INT_ED_CLR,
-					   WCN36XX_INT_MASK_CHAN_TX_L);
+
+		if (int_reason & WCN36XX_CH_STAT_INT_ERR_MASK ) {
+			wcn36xx_dxe_write_register(wcn,
+						   WCN36XX_DXE_0_INT_ERR_CLR,
+						   WCN36XX_INT_MASK_CHAN_TX_L);
+
+			wcn36xx_err("DXE IRQ reported error: 0x%x in low TX channel\n",
+					int_src);
+		}
+
+		if (int_reason & WCN36XX_CH_STAT_INT_DONE_MASK) {
+			wcn36xx_dxe_write_register(wcn,
+						   WCN36XX_DXE_0_INT_DONE_CLR,
+						   WCN36XX_INT_MASK_CHAN_TX_L);
+		}
+
+		if (int_reason & WCN36XX_CH_STAT_INT_ED_MASK) {
+			wcn36xx_dxe_write_register(wcn,
+						   WCN36XX_DXE_0_INT_ED_CLR,
+						   WCN36XX_INT_MASK_CHAN_TX_L);
+		}
+
 		wcn36xx_dbg(WCN36XX_DBG_DXE, "dxe tx ready low\n");
 		reap_tx_dxes(wcn, &wcn->dxe_tx_l_ch);
 	}
diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.h b/drivers/net/wireless/ath/wcn36xx/dxe.h
index feb3cb7ee81f..2bc376c5391b 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.h
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.h
@@ -262,6 +262,10 @@ H2H_TEST_RX_TX = DMA2
 #define WCN36XX_DXE_0_INT_DONE_CLR		(WCN36XX_DXE_MEM_REG + 0x38)
 #define WCN36XX_DXE_0_INT_ERR_CLR		(WCN36XX_DXE_MEM_REG + 0x3C)
 
+#define WCN36XX_CH_STAT_INT_DONE_MASK   0x00008000
+#define WCN36XX_CH_STAT_INT_ERR_MASK    0x00004000
+#define WCN36XX_CH_STAT_INT_ED_MASK     0x00002000
+
 #define WCN36XX_DXE_0_CH0_STATUS		(WCN36XX_DXE_MEM_REG + 0x404)
 #define WCN36XX_DXE_0_CH1_STATUS		(WCN36XX_DXE_MEM_REG + 0x444)
 #define WCN36XX_DXE_0_CH2_STATUS		(WCN36XX_DXE_MEM_REG + 0x484)

From 837f08fdecbe4b2ffc7725624342e73b886665a8 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:05 -0700
Subject: [PATCH 1202/1678] ice: Add basic driver framework for Intel(R) E800
 Series

This patch adds a basic driver framework for the Intel(R) E800 Ethernet
Series of network devices. There is no functionality right now other than
the ability to load.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 Documentation/networking/ice.txt            |  39 +++++
 MAINTAINERS                                 |   1 +
 drivers/net/ethernet/intel/Kconfig          |  14 ++
 drivers/net/ethernet/intel/Makefile         |   1 +
 drivers/net/ethernet/intel/ice/Makefile     |  10 ++
 drivers/net/ethernet/intel/ice/ice.h        |  34 +++++
 drivers/net/ethernet/intel/ice/ice_devids.h |  19 +++
 drivers/net/ethernet/intel/ice/ice_main.c   | 158 ++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_type.h   |  28 ++++
 9 files changed, 304 insertions(+)
 create mode 100644 Documentation/networking/ice.txt
 create mode 100644 drivers/net/ethernet/intel/ice/Makefile
 create mode 100644 drivers/net/ethernet/intel/ice/ice.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_devids.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_main.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_type.h

diff --git a/Documentation/networking/ice.txt b/Documentation/networking/ice.txt
new file mode 100644
index 000000000000..6261c46378e1
--- /dev/null
+++ b/Documentation/networking/ice.txt
@@ -0,0 +1,39 @@
+Intel(R) Ethernet Connection E800 Series Linux Driver
+===================================================================
+
+Intel ice Linux driver.
+Copyright(c) 2018 Intel Corporation.
+
+Contents
+========
+- Enabling the driver
+- Support
+
+The driver in this release supports Intel's E800 Series of products. For
+more information, visit Intel's support page at http://support.intel.com.
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command:
+
+     Make oldconfig/silentoldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+	-> Device Drivers
+	  -> Network device support (NETDEVICES [=y])
+	    -> Ethernet driver support
+	      -> Intel devices
+	        -> Intel(R) Ethernet Connection E800 Series Support
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+    http://support.intel.com
+
+If an issue is identified with the released source code, please email
+the maintainer listed in the MAINTAINERS file.
diff --git a/MAINTAINERS b/MAINTAINERS
index b3ea844cf228..9107d9241564 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7063,6 +7063,7 @@ F:	Documentation/networking/ixgbe.txt
 F:	Documentation/networking/ixgbevf.txt
 F:	Documentation/networking/i40e.txt
 F:	Documentation/networking/i40evf.txt
+F:	Documentation/networking/ice.txt
 F:	drivers/net/ethernet/intel/
 F:	drivers/net/ethernet/intel/*/
 F:	include/linux/avf/virtchnl.h
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 1feb54b6d92e..14d287bed33c 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -251,6 +251,20 @@ config I40EVF
 	  will be called i40evf.  MSI-X interrupt support is required
 	  for this driver to work correctly.
 
+config ICE
+	tristate "Intel(R) Ethernet Connection E800 Series Support"
+	default n
+	depends on PCI_MSI
+	---help---
+	  This driver supports Intel(R) Ethernet Connection E800 Series of
+	  devices.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called ice.
+
 config FM10K
 	tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
 	default n
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index 90af7757a885..807a4f8c7e4e 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_I40E) += i40e/
 obj-$(CONFIG_IXGB) += ixgb/
 obj-$(CONFIG_I40EVF) += i40evf/
 obj-$(CONFIG_FM10K) += fm10k/
+obj-$(CONFIG_ICE) += ice/
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
new file mode 100644
index 000000000000..7ea6295d58fb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver
+#
+
+obj-$(CONFIG_ICE) += ice.o
+
+ice-y := ice_main.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
new file mode 100644
index 000000000000..e3c1672d4976
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_H_
+#define _ICE_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/compiler.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/bitmap.h>
+#include "ice_devids.h"
+#include "ice_type.h"
+
+#define ICE_BAR0		0
+
+#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+enum ice_state {
+	__ICE_DOWN,
+	__ICE_STATE_NBITS		/* must be last */
+};
+
+struct ice_pf {
+	struct pci_dev *pdev;
+	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	u32 msg_enable;
+	struct ice_hw hw;
+};
+#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
new file mode 100644
index 000000000000..0e14d7215a6e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_DEVIDS_H_
+#define _ICE_DEVIDS_H_
+
+/* Device IDs */
+/* Intel(R) Ethernet Controller C810 for backplane */
+#define ICE_DEV_ID_C810_BACKPLANE	0x1591
+/* Intel(R) Ethernet Controller C810 for QSFP */
+#define ICE_DEV_ID_C810_QSFP		0x1592
+/* Intel(R) Ethernet Controller C810 for SFP */
+#define ICE_DEV_ID_C810_SFP		0x1593
+/* Intel(R) Ethernet Controller C810/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_C810_10G_BASE_T	0x1594
+/* Intel(R) Ethernet Controller C810 1GbE */
+#define ICE_DEV_ID_C810_SGMII		0x1595
+
+#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
new file mode 100644
index 000000000000..3ada230a3fc7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Intel(R) Ethernet Connection E800 Series Linux Driver */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ice.h"
+
+#define DRV_VERSION	"ice-0.0.1-k"
+#define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
+static const char ice_drv_ver[] = DRV_VERSION;
+static const char ice_driver_string[] = DRV_SUMMARY;
+static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static int debug = -1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "netif message level (0=none,...,0x7FFF=all)");
+
+/**
+ * ice_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ice_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_probe(struct pci_dev *pdev,
+		     const struct pci_device_id __always_unused *ent)
+{
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int err;
+
+	/* this driver uses devres, see Documentation/driver-model/devres.txt */
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
+	if (err) {
+		dev_err(&pdev->dev, "I/O map error %d\n", err);
+		return err;
+	}
+
+	pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL);
+	if (!pf)
+		return -ENOMEM;
+
+	/* set up for high or low dma */
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err)
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err);
+		return err;
+	}
+
+	pci_enable_pcie_error_reporting(pdev);
+	pci_set_master(pdev);
+
+	pf->pdev = pdev;
+	pci_set_drvdata(pdev, pf);
+	set_bit(__ICE_DOWN, pf->state);
+
+	hw = &pf->hw;
+	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+	hw->back = pf;
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
+
+	return 0;
+}
+
+/**
+ * ice_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void ice_remove(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf)
+		return;
+
+	set_bit(__ICE_DOWN, pf->state);
+	pci_disable_pcie_error_reporting(pdev);
+}
+
+/* ice_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ice_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SGMII), 0 },
+	/* required last entry */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
+
+static struct pci_driver ice_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ice_pci_tbl,
+	.probe = ice_probe,
+	.remove = ice_remove,
+};
+
+/**
+ * ice_module_init - Driver registration routine
+ *
+ * ice_module_init is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init ice_module_init(void)
+{
+	int status;
+
+	pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver);
+	pr_info("%s\n", ice_copyright);
+
+	status = pci_register_driver(&ice_driver);
+	if (status)
+		pr_err("failed to register pci driver, err %d\n", status);
+
+	return status;
+}
+module_init(ice_module_init);
+
+/**
+ * ice_module_exit - Driver exit cleanup routine
+ *
+ * ice_module_exit is called just before the driver is removed
+ * from memory.
+ */
+static void __exit ice_module_exit(void)
+{
+	pci_unregister_driver(&ice_driver);
+	pr_info("module unloaded\n");
+}
+module_exit(ice_module_exit);
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
new file mode 100644
index 000000000000..cd018f81b5c7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TYPE_H_
+#define _ICE_TYPE_H_
+
+/* Bus parameters */
+struct ice_bus_info {
+	u16 device;
+	u8 func;
+};
+
+/* Port hardware description */
+struct ice_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+
+	struct ice_bus_info bus;
+};
+
+#endif /* _ICE_TYPE_H_ */

From 8396764154c6365bfeb9cc851b9746511a151255 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Fri, 23 Mar 2018 21:30:36 +0900
Subject: [PATCH 1203/1678] dt-bindings: net: ave: add PXs3 support

Add a compatible string for ethernet controller on UniPhier PXs3 SoC.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/socionext,uniphier-ave4.txt          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
index 270ea4efff13..96398cc2982f 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -9,6 +9,7 @@ Required properties:
 	- "socionext,uniphier-pxs2-ave4" : for PXs2 SoC
 	- "socionext,uniphier-ld11-ave4" : for LD11 SoC
 	- "socionext,uniphier-ld20-ave4" : for LD20 SoC
+	- "socionext,uniphier-pxs3-ave4" : for PXs3 SoC
  - reg: Address where registers are mapped and size of region.
  - interrupts: Should contain the MAC interrupt.
  - phy-mode: See ethernet.txt in the same directory. Allow to choose

From 469e3d79b85caf4e218ad55a65c18f7e81541f55 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Fri, 23 Mar 2018 21:30:37 +0900
Subject: [PATCH 1204/1678] net: ethernet: ave: add UniPhier PXs3 support

Add a compatible string and SoC data for ethernet controller on
UniPhier PXs3 SoC.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/socionext/sni_ave.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index f5c5984afefb..0b3b7a460641 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1701,6 +1701,10 @@ static const struct ave_soc_data ave_ld20_data = {
 	.is_desc_64bit = true,
 };
 
+static const struct ave_soc_data ave_pxs3_data = {
+	.is_desc_64bit = false,
+};
+
 static const struct of_device_id of_ave_match[] = {
 	{
 		.compatible = "socionext,uniphier-pro4-ave4",
@@ -1718,6 +1722,10 @@ static const struct of_device_id of_ave_match[] = {
 		.compatible = "socionext,uniphier-ld20-ave4",
 		.data = &ave_ld20_data,
 	},
+	{
+		.compatible = "socionext,uniphier-pxs3-ave4",
+		.data = &ave_pxs3_data,
+	},
 	{ /* Sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, of_ave_match);

From c2b72e8e10abfb54099828f8fdc68b8b0174e4e4 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Fri, 23 Mar 2018 09:52:25 -0500
Subject: [PATCH 1205/1678] fsl/fman: remove unnecessary set_dma_ops() call and
 HAS_DMA dependency

The platform device is no longer used for DMA mapping so the
(questionable) setting of the DMA ops done here is no longer
needed. Removing it together with the HAS_DMA dependency that
it required.

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fman/Kconfig | 1 -
 drivers/net/ethernet/freescale/fman/mac.c   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig
index 8870a9a798ca..dc0850b3b517 100644
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -2,7 +2,6 @@ config FSL_FMAN
 	tristate "FMan support"
 	depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
 	select GENERIC_ALLOCATOR
-	depends on HAS_DMA
 	select PHYLIB
 	default n
 	help
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 4829dcd9e077..7b5b95f52c09 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -567,7 +567,6 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
 	}
 
 	pdev->dev.parent = priv->dev;
-	set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
 
 	ret = platform_device_add_data(pdev, &data, sizeof(data));
 	if (ret)

From ede2762d93ff16e0974f7446516b46b1022db213 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 23 Mar 2018 19:47:19 +0300
Subject: [PATCH 1206/1678] net: Make NETDEV_XXX commands enum { }

This patch is preparation to drop NETDEV_UNREGISTER_FINAL.
Since the cmd is used in usnic_ib_netdev_event_to_string()
to get cmd name, after plain removing NETDEV_UNREGISTER_FINAL
from everywhere, we'd have holes in event2str[] in this
function.

Instead of that, let's make NETDEV_XXX commands names
available for everyone, and to define netdev_cmd_to_name()
in the way we won't have to shaffle names after their
numbers are changed.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/usnic/usnic_ib_main.c | 15 +----
 include/linux/netdevice.h                   | 69 +++++++++++----------
 net/core/dev.c                              | 20 ++++++
 3 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index f45e99a938e0..5bf3b20eba25 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -97,20 +97,7 @@ void usnic_ib_log_vf(struct usnic_ib_vf *vf)
 /* Start of netdev section */
 static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
 {
-	const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
-		"NETDEV_REBOOT", "NETDEV_CHANGE",
-		"NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
-		"NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
-		"NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
-		"NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
-		"NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
-		"NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
-	};
-
-	if (event >= ARRAY_SIZE(event2str))
-		return "UNKNOWN_NETDEV_EVENT";
-	else
-		return event2str[event];
+	return netdev_cmd_to_name(event);
 }
 
 static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 913b1cc882cf..dd5a04c971d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2312,43 +2312,46 @@ struct netdev_lag_lower_state_info {
 
 #include <linux/notifier.h>
 
-/* netdevice notifier chain. Please remember to update the rtnetlink
- * notification exclusion list in rtnetlink_event() when adding new
- * types.
+/* netdevice notifier chain. Please remember to update netdev_cmd_to_name()
+ * and the rtnetlink notification exclusion list in rtnetlink_event() when
+ * adding new types.
  */
-#define NETDEV_UP	0x0001	/* For now you can't veto a device up/down */
-#define NETDEV_DOWN	0x0002
-#define NETDEV_REBOOT	0x0003	/* Tell a protocol stack a network interface
+enum netdev_cmd {
+	NETDEV_UP	= 1,	/* For now you can't veto a device up/down */
+	NETDEV_DOWN,
+	NETDEV_REBOOT,		/* Tell a protocol stack a network interface
 				   detected a hardware crash and restarted
 				   - we can use this eg to kick tcp sessions
 				   once done */
-#define NETDEV_CHANGE	0x0004	/* Notify device state change */
-#define NETDEV_REGISTER 0x0005
-#define NETDEV_UNREGISTER	0x0006
-#define NETDEV_CHANGEMTU	0x0007 /* notify after mtu change happened */
-#define NETDEV_CHANGEADDR	0x0008
-#define NETDEV_GOING_DOWN	0x0009
-#define NETDEV_CHANGENAME	0x000A
-#define NETDEV_FEAT_CHANGE	0x000B
-#define NETDEV_BONDING_FAILOVER 0x000C
-#define NETDEV_PRE_UP		0x000D
-#define NETDEV_PRE_TYPE_CHANGE	0x000E
-#define NETDEV_POST_TYPE_CHANGE	0x000F
-#define NETDEV_POST_INIT	0x0010
-#define NETDEV_UNREGISTER_FINAL 0x0011
-#define NETDEV_RELEASE		0x0012
-#define NETDEV_NOTIFY_PEERS	0x0013
-#define NETDEV_JOIN		0x0014
-#define NETDEV_CHANGEUPPER	0x0015
-#define NETDEV_RESEND_IGMP	0x0016
-#define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
-#define NETDEV_CHANGEINFODATA	0x0018
-#define NETDEV_BONDING_INFO	0x0019
-#define NETDEV_PRECHANGEUPPER	0x001A
-#define NETDEV_CHANGELOWERSTATE	0x001B
-#define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
-#define NETDEV_UDP_TUNNEL_DROP_INFO	0x001D
-#define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
+	NETDEV_CHANGE,		/* Notify device state change */
+	NETDEV_REGISTER,
+	NETDEV_UNREGISTER,
+	NETDEV_CHANGEMTU,	/* notify after mtu change happened */
+	NETDEV_CHANGEADDR,
+	NETDEV_GOING_DOWN,
+	NETDEV_CHANGENAME,
+	NETDEV_FEAT_CHANGE,
+	NETDEV_BONDING_FAILOVER,
+	NETDEV_PRE_UP,
+	NETDEV_PRE_TYPE_CHANGE,
+	NETDEV_POST_TYPE_CHANGE,
+	NETDEV_POST_INIT,
+	NETDEV_UNREGISTER_FINAL,
+	NETDEV_RELEASE,
+	NETDEV_NOTIFY_PEERS,
+	NETDEV_JOIN,
+	NETDEV_CHANGEUPPER,
+	NETDEV_RESEND_IGMP,
+	NETDEV_PRECHANGEMTU,	/* notify before mtu change happened */
+	NETDEV_CHANGEINFODATA,
+	NETDEV_BONDING_INFO,
+	NETDEV_PRECHANGEUPPER,
+	NETDEV_CHANGELOWERSTATE,
+	NETDEV_UDP_TUNNEL_PUSH_INFO,
+	NETDEV_UDP_TUNNEL_DROP_INFO,
+	NETDEV_CHANGE_TX_QUEUE_LEN,
+};
+const char *netdev_cmd_to_name(enum netdev_cmd cmd);
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
diff --git a/net/core/dev.c b/net/core/dev.c
index f9c28f44286c..055e7ae12759 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1571,6 +1571,26 @@ static void dev_disable_gro_hw(struct net_device *dev)
 		netdev_WARN(dev, "failed to disable GRO_HW!\n");
 }
 
+const char *netdev_cmd_to_name(enum netdev_cmd cmd)
+{
+#define N(val) 						\
+	case NETDEV_##val:				\
+		return "NETDEV_" __stringify(val);
+	switch (cmd) {
+	N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
+	N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
+	N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
+	N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
+	N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
+	N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
+	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+	N(UNREGISTER_FINAL)
+	};
+#undef N
+	return "UNKNOWN_NETDEV_EVENT";
+}
+EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
+
 static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
 				   struct net_device *dev)
 {

From 3e0c2dbfea28dd29a3316a4c9774947425a9facd Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 23 Mar 2018 19:47:29 +0300
Subject: [PATCH 1207/1678] infiniband: Replace
 usnic_ib_netdev_event_to_string() with netdev_cmd_to_name()

This function just calls netdev_cmd_to_name().

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/usnic/usnic_ib_main.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 5bf3b20eba25..ca5638091b55 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -95,11 +95,6 @@ void usnic_ib_log_vf(struct usnic_ib_vf *vf)
 }
 
 /* Start of netdev section */
-static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
-{
-	return netdev_cmd_to_name(event);
-}
-
 static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
 {
 	struct usnic_ib_ucontext *ctx;
@@ -172,7 +167,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 			ib_dispatch_event(&ib_event);
 		} else {
 			usnic_dbg("Ignoring %s on %s\n",
-					usnic_ib_netdev_event_to_string(event),
+					netdev_cmd_to_name(event),
 					us_ibdev->ib_dev.name);
 		}
 		break;
@@ -209,7 +204,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
 		break;
 	default:
 		usnic_dbg("Ignoring event %s on %s",
-				usnic_ib_netdev_event_to_string(event),
+				netdev_cmd_to_name(event),
 				us_ibdev->ib_dev.name);
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);
@@ -251,7 +246,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 	switch (event) {
 	case NETDEV_DOWN:
 		usnic_info("%s via ip notifiers",
-				usnic_ib_netdev_event_to_string(event));
+				netdev_cmd_to_name(event));
 		usnic_fwd_del_ipaddr(us_ibdev->ufdev);
 		usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
 		ib_event.event = IB_EVENT_GID_CHANGE;
@@ -262,7 +257,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 	case NETDEV_UP:
 		usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
 		usnic_info("%s via ip notifiers: ip %pI4",
-				usnic_ib_netdev_event_to_string(event),
+				netdev_cmd_to_name(event),
 				&us_ibdev->ufdev->inaddr);
 		ib_event.event = IB_EVENT_GID_CHANGE;
 		ib_event.device = &us_ibdev->ib_dev;
@@ -271,7 +266,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
 		break;
 	default:
 		usnic_info("Ignoring event %s on %s",
-				usnic_ib_netdev_event_to_string(event),
+				netdev_cmd_to_name(event),
 				us_ibdev->ib_dev.name);
 	}
 	mutex_unlock(&us_ibdev->usdev_lock);

From 070f2d7e264acd6316fc24092b7f51a18c75ac9c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 23 Mar 2018 19:47:39 +0300
Subject: [PATCH 1208/1678] net: Drop NETDEV_UNREGISTER_FINAL

Last user is gone after bdf5bd7f2132 "rds: tcp: remove
register_netdevice_notifier infrastructure.", so we can
remove this netdevice command. This allows to delete
rtnl_lock() in netdev_run_todo(), which is hot path for
net namespace unregistration.

dev_change_net_namespace() and netdev_wait_allrefs()
have rcu_barrier() before NETDEV_UNREGISTER_FINAL call,
and the source commits say they were introduced to
delemit the call with NETDEV_UNREGISTER, but this patch
leaves them on the places, since they require additional
analysis, whether we need in them for something else.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/main.c | 4 ++--
 include/linux/netdevice.h         | 1 -
 include/rdma/ib_verbs.h           | 4 ++--
 net/core/dev.c                    | 7 -------
 4 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index db4bf97c0e15..eb32abb0099a 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -90,8 +90,8 @@ static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num)
 	dev_hold(qdev->ndev);
 
 	/* The HW vendor's device driver must guarantee
-	 * that this function returns NULL before the net device reaches
-	 * NETDEV_UNREGISTER_FINAL state.
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
 	 */
 	return qdev->ndev;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dd5a04c971d5..2a2d9cf50aa2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2336,7 +2336,6 @@ enum netdev_cmd {
 	NETDEV_PRE_TYPE_CHANGE,
 	NETDEV_POST_TYPE_CHANGE,
 	NETDEV_POST_INIT,
-	NETDEV_UNREGISTER_FINAL,
 	NETDEV_RELEASE,
 	NETDEV_NOTIFY_PEERS,
 	NETDEV_JOIN,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ff3ed435701f..6eb174753acf 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2122,8 +2122,8 @@ struct ib_device {
 	 * net device of device @device at port @port_num or NULL if such
 	 * a net device doesn't exist. The vendor driver should call dev_hold
 	 * on this net device. The HW vendor's device driver must guarantee
-	 * that this function returns NULL before the net device reaches
-	 * NETDEV_UNREGISTER_FINAL state.
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
 	 */
 	struct net_device	  *(*get_netdev)(struct ib_device *device,
 						 u8 port_num);
diff --git a/net/core/dev.c b/net/core/dev.c
index 055e7ae12759..97a96df4b6da 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1584,7 +1584,6 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
 	N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
 	N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
 	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
-	N(UNREGISTER_FINAL)
 	};
 #undef N
 	return "UNKNOWN_NETDEV_EVENT";
@@ -8097,7 +8096,6 @@ static void netdev_wait_allrefs(struct net_device *dev)
 			rcu_barrier();
 			rtnl_lock();
 
-			call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
 				/* We must not have linkwatch events
@@ -8169,10 +8167,6 @@ void netdev_run_todo(void)
 			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
-		rtnl_lock();
-		call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
-		__rtnl_unlock();
-
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
 			pr_err("network todo '%s' but state %d\n",
 			       dev->name, dev->reg_state);
@@ -8614,7 +8608,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 */
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 	rcu_barrier();
-	call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 
 	new_nsid = peernet2id_alloc(dev_net(dev), net);
 	/* If there is an ifindex conflict assign a new one */

From d6444062f8f07c346a21bd815af4a3dc8b231574 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 23 Mar 2018 15:54:38 -0700
Subject: [PATCH 1209/1678] net: Use octal not symbolic permissions

Prefer the direct use of octal for permissions.

Done with checkpatch -f --types=SYMBOLIC_PERMS --fix-inplace
and some typing.

Miscellanea:

o Whitespace neatening around these conversions.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlanproc.c                       |  6 ++--
 net/appletalk/atalk_proc.c                 |  8 ++---
 net/atm/atm_sysfs.c                        | 12 +++----
 net/atm/clip.c                             |  2 +-
 net/atm/lec.c                              |  2 +-
 net/atm/proc.c                             |  2 +-
 net/ax25/af_ax25.c                         |  6 ++--
 net/bluetooth/rfcomm/tty.c                 |  4 +--
 net/bridge/br_sysfs_br.c                   |  2 +-
 net/bridge/br_sysfs_if.c                   | 36 +++++++++----------
 net/can/af_can.c                           |  2 +-
 net/can/gw.c                               |  2 +-
 net/ceph/ceph_common.c                     |  2 +-
 net/core/net-procfs.c                      |  6 ++--
 net/core/net-sysfs.c                       | 12 +++----
 net/core/sock.c                            |  2 +-
 net/decnet/af_decnet.c                     |  2 +-
 net/decnet/dn_dev.c                        |  2 +-
 net/decnet/dn_neigh.c                      |  2 +-
 net/decnet/dn_route.c                      |  2 +-
 net/dns_resolver/dns_key.c                 |  2 +-
 net/ipv4/arp.c                             |  2 +-
 net/ipv4/fib_trie.c                        |  6 ++--
 net/ipv4/igmp.c                            |  4 +--
 net/ipv4/ipconfig.c                        |  2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c         |  2 +-
 net/ipv4/ping.c                            |  2 +-
 net/ipv4/proc.c                            |  6 ++--
 net/ipv4/raw.c                             |  2 +-
 net/ipv4/route.c                           |  4 +--
 net/ipv4/tcp_ipv4.c                        |  2 +-
 net/ipv4/udp.c                             |  2 +-
 net/ipv6/addrconf.c                        |  2 +-
 net/ipv6/anycast.c                         |  2 +-
 net/ipv6/ip6_flowlabel.c                   |  2 +-
 net/ipv6/mcast.c                           |  4 +--
 net/ipv6/proc.c                            |  6 ++--
 net/ipv6/raw.c                             |  2 +-
 net/ipv6/route.c                           |  2 +-
 net/kcm/kcmproc.c                          |  4 +--
 net/l2tp/l2tp_ppp.c                        |  2 +-
 net/llc/llc_proc.c                         |  4 +--
 net/mac80211/rc80211_minstrel.c            |  2 +-
 net/mac80211/rc80211_minstrel_debugfs.c    |  8 ++---
 net/mac80211/rc80211_minstrel_ht_debugfs.c |  8 ++---
 net/netfilter/nf_conntrack_netbios_ns.c    |  2 +-
 net/netfilter/nf_conntrack_snmp.c          |  2 +-
 net/netfilter/nf_conntrack_standalone.c    |  2 +-
 net/netfilter/nf_log.c                     |  2 +-
 net/netfilter/nf_synproxy_core.c           |  2 +-
 net/netfilter/xt_IDLETIMER.c               |  2 +-
 net/netfilter/xt_recent.c                  |  4 +--
 net/netrom/af_netrom.c                     |  6 ++--
 net/rose/af_rose.c                         |  8 ++---
 net/rxrpc/af_rxrpc.c                       |  2 +-
 net/sctp/proc.c                            | 16 ++++-----
 net/sunrpc/auth_gss/svcauth_gss.c          |  2 +-
 net/sunrpc/cache.c                         | 10 +++---
 net/sunrpc/debugfs.c                       |  6 ++--
 net/sunrpc/rpc_pipe.c                      | 42 +++++++++++-----------
 net/wireless/wext-proc.c                   |  2 +-
 net/x25/x25_proc.c                         | 12 +++----
 net/xfrm/xfrm_proc.c                       |  2 +-
 63 files changed, 161 insertions(+), 161 deletions(-)

diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a662ccc166df..a627a5db2125 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -148,8 +148,8 @@ int __net_init vlan_proc_init(struct net *net)
 	if (!vn->proc_vlan_dir)
 		goto err;
 
-	vn->proc_vlan_conf = proc_create(name_conf, S_IFREG|S_IRUSR|S_IWUSR,
-				     vn->proc_vlan_dir, &vlan_fops);
+	vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
+					 vn->proc_vlan_dir, &vlan_fops);
 	if (!vn->proc_vlan_conf)
 		goto err;
 	return 0;
@@ -172,7 +172,7 @@ int vlan_proc_add_dev(struct net_device *vlandev)
 	if (!strcmp(vlandev->name, name_conf))
 		return -EINVAL;
 	vlan->dent =
-		proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
+		proc_create_data(vlandev->name, S_IFREG | 0600,
 				 vn->proc_vlan_dir, &vlandev_fops, vlandev);
 	if (!vlan->dent)
 		return -ENOBUFS;
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index a3bf9d519193..7214aea14cb3 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -257,22 +257,22 @@ int __init atalk_proc_init(void)
 	if (!atalk_proc_dir)
 		goto out;
 
-	p = proc_create("interface", S_IRUGO, atalk_proc_dir,
+	p = proc_create("interface", 0444, atalk_proc_dir,
 			&atalk_seq_interface_fops);
 	if (!p)
 		goto out_interface;
 
-	p = proc_create("route", S_IRUGO, atalk_proc_dir,
+	p = proc_create("route", 0444, atalk_proc_dir,
 			&atalk_seq_route_fops);
 	if (!p)
 		goto out_route;
 
-	p = proc_create("socket", S_IRUGO, atalk_proc_dir,
+	p = proc_create("socket", 0444, atalk_proc_dir,
 			&atalk_seq_socket_fops);
 	if (!p)
 		goto out_socket;
 
-	p = proc_create("arp", S_IRUGO, atalk_proc_dir, &atalk_seq_arp_fops);
+	p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
 	if (!p)
 		goto out_arp;
 
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 5d2fed9f5710..39b94ca5f65d 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -96,12 +96,12 @@ static ssize_t show_link_rate(struct device *cdev,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
 }
 
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(atmaddress, S_IRUGO, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, S_IRUGO, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, S_IRUGO, show_carrier, NULL);
-static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static DEVICE_ATTR(link_rate, S_IRUGO, show_link_rate, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
+static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
+static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
+static DEVICE_ATTR(type, 0444, show_type, NULL);
+static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
 
 static struct device_attribute *atm_attrs[] = {
 	&dev_attr_atmaddress,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index d4f6029d5109..f07dbc632222 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -893,7 +893,7 @@ static int __init atm_clip_init(void)
 	{
 		struct proc_dir_entry *p;
 
-		p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
+		p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
 		if (!p) {
 			pr_err("Unable to initialize /proc/net/atm/arp\n");
 			atm_clip_exit_noproc();
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 09a1f056712a..01d5d20a6eb1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1042,7 +1042,7 @@ static int __init lane_module_init(void)
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *p;
 
-	p = proc_create("lec", S_IRUGO, atm_proc_root, &lec_seq_fops);
+	p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
 	if (!p) {
 		pr_err("Unable to initialize /proc/net/atm/lec\n");
 		return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index edc48edc95c1..55410c00c7e2 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -474,7 +474,7 @@ int __init atm_proc_init(void)
 	for (e = atm_proc_ents; e->name; e++) {
 		struct proc_dir_entry *dirent;
 
-		dirent = proc_create(e->name, S_IRUGO,
+		dirent = proc_create(e->name, 0444,
 				     atm_proc_root, e->proc_fops);
 		if (!dirent)
 			goto err_out_remove;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c8319ed48485..2b41366fcad2 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1989,10 +1989,10 @@ static int __init ax25_init(void)
 	dev_add_pack(&ax25_packet_type);
 	register_netdevice_notifier(&ax25_dev_notifier);
 
-	proc_create("ax25_route", S_IRUGO, init_net.proc_net,
+	proc_create("ax25_route", 0444, init_net.proc_net,
 		    &ax25_route_fops);
-	proc_create("ax25", S_IRUGO, init_net.proc_net, &ax25_info_fops);
-	proc_create("ax25_calls", S_IRUGO, init_net.proc_net, &ax25_uid_fops);
+	proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
+	proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
 out:
 	return rc;
 }
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 5f3074cb6b4d..5e44d842cc5d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -210,8 +210,8 @@ static ssize_t show_channel(struct device *tty_dev, struct device_attribute *att
 	return sprintf(buf, "%d\n", dev->channel);
 }
 
-static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static DEVICE_ATTR(channel, S_IRUGO, show_channel, NULL);
+static DEVICE_ATTR(address, 0444, show_address, NULL);
+static DEVICE_ATTR(channel, 0444, show_channel, NULL);
 
 static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
 					   struct rfcomm_dlc *dlc)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index b1be0dcfba6b..0318a69888d4 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -893,7 +893,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
 
 static struct bin_attribute bridge_forward = {
 	.attr = { .name = SYSFS_BRIDGE_FDB,
-		  .mode = S_IRUGO, },
+		  .mode = 0444, },
 	.read = brforward_read,
 };
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 126a8ea73c96..fd31ad83ec7b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -44,7 +44,7 @@ static int store_##_name(struct net_bridge_port *p, unsigned long v) \
 {								\
 	return store_flag(p, v, _mask);				\
 }								\
-static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR,			\
+static BRPORT_ATTR(_name, 0644,					\
 		   show_##_name, store_##_name)
 
 static int store_flag(struct net_bridge_port *p, unsigned long v,
@@ -71,7 +71,7 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
 	return sprintf(buf, "%d\n", p->path_cost);
 }
 
-static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(path_cost, 0644,
 		   show_path_cost, br_stp_set_path_cost);
 
 static ssize_t show_priority(struct net_bridge_port *p, char *buf)
@@ -79,91 +79,91 @@ static ssize_t show_priority(struct net_bridge_port *p, char *buf)
 	return sprintf(buf, "%d\n", p->priority);
 }
 
-static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
+static BRPORT_ATTR(priority, 0644,
 			 show_priority, br_stp_set_port_priority);
 
 static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
 {
 	return br_show_bridge_id(buf, &p->designated_root);
 }
-static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);
+static BRPORT_ATTR(designated_root, 0444, show_designated_root, NULL);
 
 static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
 {
 	return br_show_bridge_id(buf, &p->designated_bridge);
 }
-static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);
+static BRPORT_ATTR(designated_bridge, 0444, show_designated_bridge, NULL);
 
 static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->designated_port);
 }
-static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);
+static BRPORT_ATTR(designated_port, 0444, show_designated_port, NULL);
 
 static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->designated_cost);
 }
-static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);
+static BRPORT_ATTR(designated_cost, 0444, show_designated_cost, NULL);
 
 static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "0x%x\n", p->port_id);
 }
-static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);
+static BRPORT_ATTR(port_id, 0444, show_port_id, NULL);
 
 static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "0x%x\n", p->port_no);
 }
 
-static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);
+static BRPORT_ATTR(port_no, 0444, show_port_no, NULL);
 
 static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->topology_change_ack);
 }
-static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);
+static BRPORT_ATTR(change_ack, 0444, show_change_ack, NULL);
 
 static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->config_pending);
 }
-static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);
+static BRPORT_ATTR(config_pending, 0444, show_config_pending, NULL);
 
 static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
 {
 	return sprintf(buf, "%d\n", p->state);
 }
-static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);
+static BRPORT_ATTR(state, 0444, show_port_state, NULL);
 
 static ssize_t show_message_age_timer(struct net_bridge_port *p,
 					    char *buf)
 {
 	return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
 }
-static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);
+static BRPORT_ATTR(message_age_timer, 0444, show_message_age_timer, NULL);
 
 static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
 					    char *buf)
 {
 	return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
 }
-static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);
+static BRPORT_ATTR(forward_delay_timer, 0444, show_forward_delay_timer, NULL);
 
 static ssize_t show_hold_timer(struct net_bridge_port *p,
 					    char *buf)
 {
 	return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
 }
-static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
+static BRPORT_ATTR(hold_timer, 0444, show_hold_timer, NULL);
 
 static int store_flush(struct net_bridge_port *p, unsigned long v)
 {
 	br_fdb_delete_by_port(p->br, p, 0, 0); // Don't delete local entry
 	return 0;
 }
-static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static BRPORT_ATTR(flush, 0200, NULL, store_flush);
 
 static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
 {
@@ -179,7 +179,7 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
 
 	return 0;
 }
-static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
 		   store_group_fwd_mask);
 
 BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
@@ -204,7 +204,7 @@ static int store_multicast_router(struct net_bridge_port *p,
 {
 	return br_multicast_set_port_router(p, v);
 }
-static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
+static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
 		   store_multicast_router);
 
 BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index e899970398a1..2f0d0a72e4b5 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -72,7 +72,7 @@ MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>, "
 MODULE_ALIAS_NETPROTO(PF_CAN);
 
 static int stats_timer __read_mostly = 1;
-module_param(stats_timer, int, S_IRUGO);
+module_param(stats_timer, int, 0444);
 MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
 
 static struct kmem_cache *rcv_cache __read_mostly;
diff --git a/net/can/gw.c b/net/can/gw.c
index 08e97668d5cf..8d71e199d5b3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -72,7 +72,7 @@ MODULE_ALIAS(CAN_GW_NAME);
 #define CGW_DEFAULT_HOPS 1
 
 static unsigned int max_hops __read_mostly = CGW_DEFAULT_HOPS;
-module_param(max_hops, uint, S_IRUGO);
+module_param(max_hops, uint, 0444);
 MODULE_PARM_DESC(max_hops,
 		 "maximum " CAN_GW_NAME " routing hops for CAN frames "
 		 "(valid values: " __stringify(CGW_MIN_HOPS) "-"
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4d4c82229e9e..4adf07826f4a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -54,7 +54,7 @@ static const struct kernel_param_ops param_ops_supported_features = {
 	.get = param_get_supported_features,
 };
 module_param_cb(supported_features, &param_ops_supported_features, NULL,
-		S_IRUGO);
+		0444);
 
 const char *ceph_msg_type_name(int type)
 {
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 65b51e778782..dd6ae431d038 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -315,12 +315,12 @@ static int __net_init dev_proc_net_init(struct net *net)
 {
 	int rc = -ENOMEM;
 
-	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+	if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
 		goto out;
-	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+	if (!proc_create("softnet_stat", 0444, net->proc_net,
 			 &softnet_seq_fops))
 		goto out_dev;
-	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+	if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
 		goto out_softnet;
 
 	if (wext_proc_init(net))
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 60a5ad2c33ee..c476f0794132 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -431,7 +431,7 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
 	return netdev_store(dev, attr, buf, len, change_group);
 }
 NETDEVICE_SHOW(group, fmt_dec);
-static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
 
 static int change_proto_down(struct net_device *dev, unsigned long proto_down)
 {
@@ -854,10 +854,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 }
 
 static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
-	= __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+	= __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
 
 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
-	= __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	= __ATTR(rps_flow_cnt, 0644,
 		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
 #endif /* CONFIG_RPS */
 
@@ -1154,7 +1154,7 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
 }
 
 static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
-	= __ATTR(hold_time, S_IRUGO | S_IWUSR,
+	= __ATTR(hold_time, 0644,
 		 bql_show_hold_time, bql_set_hold_time);
 
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
@@ -1166,7 +1166,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
 }
 
 static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
-	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
+	__ATTR(inflight, 0444, bql_show_inflight, NULL);
 
 #define BQL_ATTR(NAME, FIELD)						\
 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
@@ -1182,7 +1182,7 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
 }									\
 									\
 static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
-	= __ATTR(NAME, S_IRUGO | S_IWUSR,				\
+	= __ATTR(NAME, 0644,				\
 		 bql_show_ ## NAME, bql_set_ ## NAME)
 
 BQL_ATTR(limit, limit);
diff --git a/net/core/sock.c b/net/core/sock.c
index e689496dfd8a..8cee2920a47f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3455,7 +3455,7 @@ static const struct file_operations proto_seq_fops = {
 
 static __net_init int proto_init_net(struct net *net)
 {
-	if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
+	if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2ee8306c23e3..32751602767f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2383,7 +2383,7 @@ static int __init decnet_init(void)
 	dev_add_pack(&dn_dix_packet_type);
 	register_netdevice_notifier(&dn_dev_notifier);
 
-	proc_create("decnet", S_IRUGO, init_net.proc_net, &dn_socket_seq_fops);
+	proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
 	dn_register_sysctl();
 out:
 	return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c9f5e1ebb9c8..c03b046478c3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1424,7 +1424,7 @@ void __init dn_dev_init(void)
 	rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
 			     NULL, dn_nl_dump_ifaddr, 0);
 
-	proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
+	proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
 
 #ifdef CONFIG_SYSCTL
 	{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 6e37d9e6345e..13156165afa3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -608,7 +608,7 @@ static const struct file_operations dn_neigh_seq_fops = {
 void __init dn_neigh_init(void)
 {
 	neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
-	proc_create("decnet_neigh", S_IRUGO, init_net.proc_net,
+	proc_create("decnet_neigh", 0444, init_net.proc_net,
 		    &dn_neigh_seq_fops);
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ef20b8e31669..eca0cc6b761f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1918,7 +1918,7 @@ void __init dn_route_init(void)
 
 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
 
-	proc_create("decnet_cache", S_IRUGO, init_net.proc_net,
+	proc_create("decnet_cache", 0444, init_net.proc_net,
 		    &dn_rt_cache_seq_fops);
 
 #ifdef CONFIG_DECNET_ROUTER
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index e1d4d898a007..8396705deffc 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -38,7 +38,7 @@ MODULE_AUTHOR("Wang Lei");
 MODULE_LICENSE("GPL");
 
 unsigned int dns_resolver_debug;
-module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, dns_resolver_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
 
 const struct cred *dns_resolver_cache;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7dc9de8444a9..4c6ba0fb2630 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1434,7 +1434,7 @@ static const struct file_operations arp_seq_fops = {
 
 static int __net_init arp_net_init(struct net *net)
 {
-	if (!proc_create("arp", S_IRUGO, net->proc_net, &arp_seq_fops))
+	if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 62243a8abf92..fac0b73e24d1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2722,14 +2722,14 @@ static const struct file_operations fib_route_fops = {
 
 int __net_init fib_proc_init(struct net *net)
 {
-	if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
+	if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
 		goto out1;
 
-	if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
+	if (!proc_create("fib_triestat", 0444, net->proc_net,
 			 &fib_triestat_fops))
 		goto out2;
 
-	if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
+	if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
 		goto out3;
 
 	return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c2743763777e..f17cd83ba164 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2993,10 +2993,10 @@ static int __net_init igmp_net_init(struct net *net)
 	struct proc_dir_entry *pde;
 	int err;
 
-	pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
+	pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
 	if (!pde)
 		goto out_igmp;
-	pde = proc_create("mcfilter", S_IRUGO, net->proc_net,
+	pde = proc_create("mcfilter", 0444, net->proc_net,
 			  &igmp_mcf_seq_fops);
 	if (!pde)
 		goto out_mcfilter;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f75802ad960f..43f620feb1c4 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1369,7 +1369,7 @@ static int __init ip_auto_config(void)
 	unsigned int i;
 
 #ifdef CONFIG_PROC_FS
-	proc_create("pnp", S_IRUGO, init_net.proc_net, &pnp_seq_fops);
+	proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
 #endif /* CONFIG_PROC_FS */
 
 	if (!ic_enable)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0fc88fa7a4dc..31b4cca588d0 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -250,7 +250,7 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
 
 		/* create proc dir entry */
 		sprintf(buffer, "%pI4", &ip);
-		c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+		c->pde = proc_create_data(buffer, 0600,
 					  cn->procdir,
 					  &clusterip_proc_fops, c);
 		if (!c->pde) {
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 0164def9c808..1f24bc8273a0 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1177,7 +1177,7 @@ static struct ping_seq_afinfo ping_v4_seq_afinfo = {
 int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
 {
 	struct proc_dir_entry *p;
-	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(afinfo->name, 0444, net->proc_net,
 			     afinfo->seq_fops, afinfo);
 	if (!p)
 		return -ENOMEM;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d97e83b2dd33..80de2e659dcb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -521,12 +521,12 @@ static const struct file_operations netstat_seq_fops = {
 
 static __net_init int ip_proc_init_net(struct net *net)
 {
-	if (!proc_create("sockstat", S_IRUGO, net->proc_net,
+	if (!proc_create("sockstat", 0444, net->proc_net,
 			 &sockstat_seq_fops))
 		goto out_sockstat;
-	if (!proc_create("netstat", S_IRUGO, net->proc_net, &netstat_seq_fops))
+	if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
 		goto out_netstat;
-	if (!proc_create("snmp", S_IRUGO, net->proc_net, &snmp_seq_fops))
+	if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
 		goto out_snmp;
 
 	return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 720bef7da2f6..0ee2501a9027 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1140,7 +1140,7 @@ static const struct file_operations raw_seq_fops = {
 
 static __net_init int raw_init_net(struct net *net)
 {
-	if (!proc_create("raw", S_IRUGO, net->proc_net, &raw_seq_fops))
+	if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4ac5728689f5..ce9bd5380d21 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -379,12 +379,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
+	pde = proc_create("rt_cache", 0444, net->proc_net,
 			  &rt_cache_seq_fops);
 	if (!pde)
 		goto err1;
 
-	pde = proc_create("rt_cache", S_IRUGO,
+	pde = proc_create("rt_cache", 0444,
 			  net->proc_net_stat, &rt_cpu_seq_fops);
 	if (!pde)
 		goto err2;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2c6aec2643e8..fec8b1fd7b63 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2215,7 +2215,7 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
 	afinfo->seq_ops.next		= tcp_seq_next;
 	afinfo->seq_ops.stop		= tcp_seq_stop;
 
-	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(afinfo->name, 0444, net->proc_net,
 			     afinfo->seq_fops, afinfo);
 	if (!p)
 		rc = -ENOMEM;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c6dc019bc64b..fb8f3a36bd14 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2673,7 +2673,7 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
 	afinfo->seq_ops.next		= udp_seq_next;
 	afinfo->seq_ops.stop		= udp_seq_stop;
 
-	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(afinfo->name, 0444, net->proc_net,
 			     afinfo->seq_fops, afinfo);
 	if (!p)
 		rc = -ENOMEM;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6fd4bbdc444f..9ca90cd9fba4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4281,7 +4281,7 @@ static const struct file_operations if6_fops = {
 
 static int __net_init if6_proc_net_init(struct net *net)
 {
-	if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
+	if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index d580d4d456a5..bbcabbba9bd8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -544,7 +544,7 @@ static const struct file_operations ac6_seq_fops = {
 
 int __net_init ac6_proc_init(struct net *net)
 {
-	if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
+	if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 6ddf52282894..f75b06ba8325 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = {
 
 static int __net_init ip6_flowlabel_proc_init(struct net *net)
 {
-	if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+	if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
 			 &ip6fl_seq_fops))
 		return -ENOMEM;
 	return 0;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d1a0cefac273..1e4c2b6ebd78 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net)
 	int err;
 
 	err = -ENOMEM;
-	if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+	if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
 		goto out;
-	if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+	if (!proc_create("mcfilter6", 0444, net->proc_net,
 			 &igmp6_mcf_seq_fops))
 		goto out_proc_net_igmp6;
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 1678cf037688..f9891fa672f3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -290,7 +290,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
 	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 
-	p = proc_create_data(idev->dev->name, S_IRUGO,
+	p = proc_create_data(idev->dev->name, 0444,
 			     net->mib.proc_net_devsnmp6,
 			     &snmp6_dev_seq_fops, idev);
 	if (!p)
@@ -314,11 +314,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 
 static int __net_init ipv6_proc_init_net(struct net *net)
 {
-	if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+	if (!proc_create("sockstat6", 0444, net->proc_net,
 			 &sockstat6_seq_fops))
 		return -ENOMEM;
 
-	if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
+	if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
 		goto proc_snmp6_fail;
 
 	net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 10a4ac4933b7..b5e5de732494 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = {
 
 static int __net_init raw6_init_net(struct net *net)
 {
-	if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
+	if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a2ed9fdd58d4..1d0eaa69874d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5067,7 +5067,7 @@ static int __net_init ip6_route_net_init_late(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
-	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
+	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
 #endif
 	return 0;
 }
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 2c1c8b3e4452..4c2e9907f254 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -269,7 +269,7 @@ static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
 	struct proc_dir_entry *p;
 	int rc = 0;
 
-	p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net,
+	p = proc_create_data(muxinfo->name, 0444, net->proc_net,
 			     muxinfo->seq_fops, muxinfo);
 	if (!p)
 		rc = -ENOMEM;
@@ -406,7 +406,7 @@ static int kcm_proc_init_net(struct net *net)
 {
 	int err;
 
-	if (!proc_create("kcm_stats", S_IRUGO, net->proc_net,
+	if (!proc_create("kcm_stats", 0444, net->proc_net,
 			 &kcm_stats_seq_fops)) {
 		err = -ENOMEM;
 		goto out_kcm_stats;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 977bca659787..f24504efe729 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1742,7 +1742,7 @@ static __net_init int pppol2tp_init_net(struct net *net)
 	struct proc_dir_entry *pde;
 	int err = 0;
 
-	pde = proc_create("pppol2tp", S_IRUGO, net->proc_net,
+	pde = proc_create("pppol2tp", 0444, net->proc_net,
 			  &pppol2tp_proc_fops);
 	if (!pde) {
 		err = -ENOMEM;
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 66821e8a2b7a..62ea0aed94b4 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -249,11 +249,11 @@ int __init llc_proc_init(void)
 	if (!llc_proc_dir)
 		goto out;
 
-	p = proc_create("socket", S_IRUGO, llc_proc_dir, &llc_seq_socket_fops);
+	p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops);
 	if (!p)
 		goto out_socket;
 
-	p = proc_create("core", S_IRUGO, llc_proc_dir, &llc_seq_core_fops);
+	p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops);
 	if (!p)
 		goto out_core;
 
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 9766c1cc4b0a..8221bc5582ab 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -690,7 +690,7 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
 #ifdef CONFIG_MAC80211_DEBUGFS
 	mp->fixed_rate_idx = (u32) -1;
 	mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx",
-			S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx);
+			0666, debugfsdir, &mp->fixed_rate_idx);
 #endif
 
 	minstrel_init_cck_rates(mp);
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 36fc971deb86..9ad7d63d3e5b 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -214,11 +214,11 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
 {
 	struct minstrel_sta_info *mi = priv_sta;
 
-	mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi,
-			&minstrel_stat_fops);
+	mi->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, mi,
+					    &minstrel_stat_fops);
 
-	mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir,
-			mi, &minstrel_stat_csv_fops);
+	mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, mi,
+						&minstrel_stat_csv_fops);
 }
 
 void
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 7d969e300fb3..bfcc03152dc6 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -303,10 +303,10 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
 {
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 
-	msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp,
-			&minstrel_ht_stat_fops);
-	msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO,
-			     dir, msp, &minstrel_ht_stat_csv_fops);
+	msp->dbg_stats = debugfs_create_file("rc_stats", 0444, dir, msp,
+					     &minstrel_ht_stat_fops);
+	msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", 0444, dir, msp,
+						 &minstrel_ht_stat_csv_fops);
 }
 
 void
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 496ce173f0c1..cc11bf890eb9 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -33,7 +33,7 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
 MODULE_ALIAS_NFCT_HELPER("netbios_ns");
 
 static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, S_IRUSR);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 static struct nf_conntrack_expect_policy exp_policy = {
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
index 87b95a2c270c..1b18f43ad226 100644
--- a/net/netfilter/nf_conntrack_snmp.c
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -26,7 +26,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_NFCT_HELPER("snmp");
 
 static unsigned int timeout __read_mostly = 30;
-module_param(timeout, uint, S_IRUSR);
+module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
 int (*nf_nat_snmp_hook)(struct sk_buff *skb,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3cdce391362e..98844c87d01e 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -495,7 +495,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)
 	if (uid_valid(root_uid) && gid_valid(root_gid))
 		proc_set_user(pde, root_uid, root_gid);
 
-	pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
+	pde = proc_create("nf_conntrack", 0444, net->proc_net_stat,
 			  &ct_cpu_seq_fops);
 	if (!pde)
 		goto out_stat_nf_conntrack;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 1ba3da51050d..a964e4d356cc 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -549,7 +549,7 @@ static int __net_init nf_log_net_init(struct net *net)
 	int ret = -ENOMEM;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("nf_log", S_IRUGO,
+	if (!proc_create("nf_log", 0444,
 			 net->nf.proc_netfilter, &nflog_file_ops))
 		return ret;
 #endif
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 64b875e452ca..8f16fd27132d 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -325,7 +325,7 @@ static const struct file_operations synproxy_cpu_seq_fops = {
 
 static int __net_init synproxy_proc_init(struct net *net)
 {
-	if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
+	if (!proc_create("synproxy", 0444, net->proc_net_stat,
 			 &synproxy_cpu_seq_fops))
 		return -ENOMEM;
 	return 0;
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 1ac6600bfafd..5ee859193783 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -132,7 +132,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
 		ret = -ENOMEM;
 		goto out_free_timer;
 	}
-	info->timer->attr.attr.mode = S_IRUGO;
+	info->timer->attr.attr.mode = 0444;
 	info->timer->attr.show = idletimer_tg_show;
 
 	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 486dd24da78b..434e35ce940b 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -51,8 +51,8 @@ static unsigned int ip_list_gid __read_mostly;
 module_param(ip_list_tot, uint, 0400);
 module_param(ip_list_hash_size, uint, 0400);
 module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
-module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
+module_param(ip_list_uid, uint, 0644);
+module_param(ip_list_gid, uint, 0644);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 35bb6807927f..4221d98a314b 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1450,9 +1450,9 @@ static int __init nr_proto_init(void)
 
 	nr_loopback_init();
 
-	proc_create("nr", S_IRUGO, init_net.proc_net, &nr_info_fops);
-	proc_create("nr_neigh", S_IRUGO, init_net.proc_net, &nr_neigh_fops);
-	proc_create("nr_nodes", S_IRUGO, init_net.proc_net, &nr_nodes_fops);
+	proc_create("nr", 0444, init_net.proc_net, &nr_info_fops);
+	proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops);
+	proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops);
 out:
 	return rc;
 fail:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5170373b797c..9ff5e0a76593 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1567,12 +1567,12 @@ static int __init rose_proto_init(void)
 
 	rose_add_loopback_neigh();
 
-	proc_create("rose", S_IRUGO, init_net.proc_net, &rose_info_fops);
-	proc_create("rose_neigh", S_IRUGO, init_net.proc_net,
+	proc_create("rose", 0444, init_net.proc_net, &rose_info_fops);
+	proc_create("rose_neigh", 0444, init_net.proc_net,
 		    &rose_neigh_fops);
-	proc_create("rose_nodes", S_IRUGO, init_net.proc_net,
+	proc_create("rose_nodes", 0444, init_net.proc_net,
 		    &rose_nodes_fops);
-	proc_create("rose_routes", S_IRUGO, init_net.proc_net,
+	proc_create("rose_routes", 0444, init_net.proc_net,
 		    &rose_routes_fops);
 out:
 	return rc;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0c9c18aa7c77..9e1c2c6b6a67 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -32,7 +32,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_RXRPC);
 
 unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
-module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+module_param_named(debug, rxrpc_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "RxRPC debugging mask");
 
 static struct proto rxrpc_proto;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 17d0155d9de3..1d9ccc6dab2b 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -450,17 +450,17 @@ int __net_init sctp_proc_init(struct net *net)
 	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
 	if (!net->sctp.proc_net_sctp)
 		return -ENOMEM;
-	if (!proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_snmp_seq_fops))
+	if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp,
+			 &sctp_snmp_seq_fops))
 		goto cleanup;
-	if (!proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_eps_seq_fops))
+	if (!proc_create("eps", 0444, net->sctp.proc_net_sctp,
+			 &sctp_eps_seq_fops))
 		goto cleanup;
-	if (!proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_assocs_seq_fops))
+	if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp,
+			 &sctp_assocs_seq_fops))
 		goto cleanup;
-	if (!proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
-			&sctp_remaddr_seq_fops))
+	if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp,
+			 &sctp_remaddr_seq_fops))
 		goto cleanup;
 	return 0;
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 26531193fce4..5089dbb96d58 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1375,7 +1375,7 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
 	struct proc_dir_entry **p = &sn->use_gssp_proc;
 
 	sn->use_gss_proxy = -1;
-	*p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+	*p = proc_create_data("use-gss-proxy", S_IFREG | 0600,
 			      sn->proc_net_rpc,
 			      &use_gss_proxy_ops, net);
 	if (!*p)
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8a7e1c774f9c..c536cc24b3d1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1621,20 +1621,20 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 	if (cd->procfs == NULL)
 		goto out_nomem;
 
-	p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
+	p = proc_create_data("flush", S_IFREG | 0600,
 			     cd->procfs, &cache_flush_operations_procfs, cd);
 	if (p == NULL)
 		goto out_nomem;
 
 	if (cd->cache_request || cd->cache_parse) {
-		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
-				cd->procfs, &cache_file_operations_procfs, cd);
+		p = proc_create_data("channel", S_IFREG | 0600, cd->procfs,
+				     &cache_file_operations_procfs, cd);
 		if (p == NULL)
 			goto out_nomem;
 	}
 	if (cd->cache_show) {
-		p = proc_create_data("content", S_IFREG|S_IRUSR,
-				cd->procfs, &content_file_operations_procfs, cd);
+		p = proc_create_data("content", S_IFREG | 0400, cd->procfs,
+				     &content_file_operations_procfs, cd);
 		if (p == NULL)
 			goto out_nomem;
 	}
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e980d2a493de..45a033329cd4 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -139,7 +139,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 		return;
 
 	/* make tasks file */
-	if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
+	if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
 				 clnt, &tasks_fops))
 		goto out_err;
 
@@ -241,7 +241,7 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 		return;
 
 	/* make tasks file */
-	if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
+	if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
 				 xprt, &xprt_info_fops)) {
 		debugfs_remove_recursive(xprt->debugfs);
 		xprt->debugfs = NULL;
@@ -317,7 +317,7 @@ inject_fault_dir(struct dentry *topdir)
 	if (!faultdir)
 		return NULL;
 
-	if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+	if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
 				 NULL, &fault_disconnect_fops))
 		return NULL;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fc97fc3ed637..0f08934b2cea 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -820,13 +820,13 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
 {
 	struct dentry *dentry;
 	struct inode *dir = d_inode(parent);
-	umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
+	umode_t umode = S_IFIFO | 0600;
 	int err;
 
 	if (pipe->ops->upcall == NULL)
-		umode &= ~S_IRUGO;
+		umode &= ~0444;
 	if (pipe->ops->downcall == NULL)
-		umode &= ~S_IWUGO;
+		umode &= ~0222;
 
 	inode_lock_nested(dir, I_MUTEX_PARENT);
 	dentry = __rpc_lookup_create_exclusive(parent, name);
@@ -1035,7 +1035,7 @@ static const struct rpc_filelist authfiles[] = {
 	[RPCAUTH_info] = {
 		.name = "info",
 		.i_fop = &rpc_info_operations,
-		.mode = S_IFREG | S_IRUSR,
+		.mode = S_IFREG | 0400,
 	},
 };
 
@@ -1068,8 +1068,8 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
 {
 	struct dentry *ret;
 
-	ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
-			rpc_clntdir_populate, rpc_client);
+	ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
+				 rpc_clntdir_populate, rpc_client);
 	if (!IS_ERR(ret)) {
 		rpc_client->cl_pipedir_objects.pdh_dentry = ret;
 		rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
@@ -1096,17 +1096,17 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
 	[0] = {
 		.name = "channel",
 		.i_fop = &cache_file_operations_pipefs,
-		.mode = S_IFREG|S_IRUSR|S_IWUSR,
+		.mode = S_IFREG | 0600,
 	},
 	[1] = {
 		.name = "content",
 		.i_fop = &content_file_operations_pipefs,
-		.mode = S_IFREG|S_IRUSR,
+		.mode = S_IFREG | 0400,
 	},
 	[2] = {
 		.name = "flush",
 		.i_fop = &cache_flush_operations_pipefs,
-		.mode = S_IFREG|S_IRUSR|S_IWUSR,
+		.mode = S_IFREG | 0600,
 	},
 };
 
@@ -1164,39 +1164,39 @@ enum {
 static const struct rpc_filelist files[] = {
 	[RPCAUTH_lockd] = {
 		.name = "lockd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_mount] = {
 		.name = "mount",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_nfs] = {
 		.name = "nfs",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_portmap] = {
 		.name = "portmap",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_statd] = {
 		.name = "statd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_nfsd4_cb] = {
 		.name = "nfsd4_cb",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_cache] = {
 		.name = "cache",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_nfsd] = {
 		.name = "nfsd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 	[RPCAUTH_gssd] = {
 		.name = "gssd",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 };
 
@@ -1261,7 +1261,7 @@ EXPORT_SYMBOL_GPL(rpc_put_sb_net);
 static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
 	[0] = {
 		.name = "clntXX",
-		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+		.mode = S_IFDIR | 0555,
 	},
 };
 
@@ -1310,7 +1310,7 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
 	[0] = {
 		.name = "info",
 		.i_fop = &rpc_dummy_info_operations,
-		.mode = S_IFREG | S_IRUSR,
+		.mode = S_IFREG | 0400,
 	},
 };
 
@@ -1397,7 +1397,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_d_op = &simple_dentry_operations;
 	sb->s_time_gran = 1;
 
-	inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
+	inode = rpc_get_inode(sb, S_IFDIR | 0555);
 	sb->s_root = root = d_make_root(inode);
 	if (!root)
 		return -ENOMEM;
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index 5511f989ef47..b4c464594a5e 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -142,7 +142,7 @@ static const struct file_operations wireless_seq_fops = {
 int __net_init wext_proc_init(struct net *net)
 {
 	/* Create /proc/net/wireless entry */
-	if (!proc_create("wireless", S_IRUGO, net->proc_net,
+	if (!proc_create("wireless", 0444, net->proc_net,
 			 &wireless_seq_fops))
 		return -ENOMEM;
 
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 0917f047f2cf..64b415e93f6a 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -212,16 +212,16 @@ int __init x25_proc_init(void)
 	if (!proc_mkdir("x25", init_net.proc_net))
 		return -ENOMEM;
 
-	if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
-			&x25_seq_route_fops))
+	if (!proc_create("x25/route", 0444, init_net.proc_net,
+			 &x25_seq_route_fops))
 		goto out;
 
-	if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
-			&x25_seq_socket_fops))
+	if (!proc_create("x25/socket", 0444, init_net.proc_net,
+			 &x25_seq_socket_fops))
 		goto out;
 
-	if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
-			&x25_seq_forward_fops))
+	if (!proc_create("x25/forward", 0444, init_net.proc_net,
+			 &x25_seq_forward_fops))
 		goto out;
 	return 0;
 
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 6d5f85f4e672..ed06903cd84d 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -79,7 +79,7 @@ static const struct file_operations xfrm_statistics_seq_fops = {
 
 int __net_init xfrm_proc_init(struct net *net)
 {
-	if (!proc_create("xfrm_stat", S_IRUGO, net->proc_net,
+	if (!proc_create("xfrm_stat", 0444, net->proc_net,
 			 &xfrm_statistics_seq_fops))
 		return -ENOMEM;
 	return 0;

From d61e40385655fbba659fc3d81df9bdf1b848e263 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 23 Mar 2018 15:54:39 -0700
Subject: [PATCH 1210/1678] drivers/net: Use octal not symbolic permissions

Prefer the direct use of octal for permissions.

Done with checkpatch -f --types=SYMBOLIC_PERMS --fix-inplace
and some typing.

Miscellanea:

o Whitespace neatening around these conversions.

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Wei Liu <wei.liu2@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_procfs.c      |  2 +-
 drivers/net/bonding/bond_sysfs.c       | 73 +++++++++++++-------------
 drivers/net/bonding/bond_sysfs_slave.c |  4 +-
 drivers/net/caif/caif_serial.c         | 32 +++++------
 drivers/net/caif/caif_spi.c            | 16 +++---
 drivers/net/caif/caif_virtio.c         | 16 +++---
 drivers/net/can/at91_can.c             |  3 +-
 drivers/net/can/cc770/cc770.c          |  4 +-
 drivers/net/can/cc770/cc770_isa.c      | 16 +++---
 drivers/net/can/grcan.c                |  4 +-
 drivers/net/can/janz-ican3.c           |  6 +--
 drivers/net/can/sja1000/sja1000_isa.c  | 14 ++---
 drivers/net/can/softing/softing_main.c |  4 +-
 drivers/net/can/spi/mcp251x.c          |  2 +-
 drivers/net/can/usb/esd_usb2.c         |  6 +--
 drivers/net/can/vcan.c                 |  2 +-
 drivers/net/hamradio/bpqether.c        |  3 +-
 drivers/net/hamradio/yam.c             |  2 +-
 drivers/net/hyperv/netvsc_drv.c        |  4 +-
 drivers/net/ieee802154/at86rf230.c     |  2 +-
 drivers/net/phy/spi_ks8995.c           |  2 +-
 drivers/net/ppp/ppp_generic.c          |  2 +-
 drivers/net/ppp/pppoe.c                |  2 +-
 drivers/net/usb/cdc_ncm.c              | 12 ++---
 drivers/net/usb/hso.c                  |  8 +--
 drivers/net/xen-netback/xenbus.c       |  4 +-
 drivers/net/xen-netfront.c             |  6 +--
 27 files changed, 124 insertions(+), 127 deletions(-)

diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index f7799321dffb..01059f1a7bca 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -287,7 +287,7 @@ void bond_create_proc_entry(struct bonding *bond)
 
 	if (bn->proc_dir) {
 		bond->proc_entry = proc_create_data(bond_dev->name,
-						    S_IRUGO, bn->proc_dir,
+						    0444, bn->proc_dir,
 						    &bond_info_fops, bond);
 		if (bond->proc_entry == NULL)
 			netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n",
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 040b493f60ae..6096440e96ea 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -147,7 +147,7 @@ err_no_cmd:
 static const struct class_attribute class_attr_bonding_masters = {
 	.attr = {
 		.name = "bonding_masters",
-		.mode = S_IWUSR | S_IRUGO,
+		.mode = 0644,
 	},
 	.show = bonding_show_bonds,
 	.store = bonding_store_bonds,
@@ -202,7 +202,7 @@ static ssize_t bonding_show_slaves(struct device *d,
 
 	return res;
 }
-static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves,
+static DEVICE_ATTR(slaves, 0644, bonding_show_slaves,
 		   bonding_sysfs_store_option);
 
 /* Show the bonding mode. */
@@ -216,8 +216,7 @@ static ssize_t bonding_show_mode(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, BOND_MODE(bond));
 }
-static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR,
-		   bonding_show_mode, bonding_sysfs_store_option);
+static DEVICE_ATTR(mode, 0644, bonding_show_mode, bonding_sysfs_store_option);
 
 /* Show the bonding transmit hash method. */
 static ssize_t bonding_show_xmit_hash(struct device *d,
@@ -231,7 +230,7 @@ static ssize_t bonding_show_xmit_hash(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.xmit_policy);
 }
-static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(xmit_hash_policy, 0644,
 		   bonding_show_xmit_hash, bonding_sysfs_store_option);
 
 /* Show arp_validate. */
@@ -247,7 +246,7 @@ static ssize_t bonding_show_arp_validate(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.arp_validate);
 }
-static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
+static DEVICE_ATTR(arp_validate, 0644, bonding_show_arp_validate,
 		   bonding_sysfs_store_option);
 
 /* Show arp_all_targets. */
@@ -263,7 +262,7 @@ static ssize_t bonding_show_arp_all_targets(struct device *d,
 	return sprintf(buf, "%s %d\n",
 		       val->string, bond->params.arp_all_targets);
 }
-static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(arp_all_targets, 0644,
 		   bonding_show_arp_all_targets, bonding_sysfs_store_option);
 
 /* Show fail_over_mac. */
@@ -279,7 +278,7 @@ static ssize_t bonding_show_fail_over_mac(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.fail_over_mac);
 }
-static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(fail_over_mac, 0644,
 		   bonding_show_fail_over_mac, bonding_sysfs_store_option);
 
 /* Show the arp timer interval. */
@@ -291,7 +290,7 @@ static ssize_t bonding_show_arp_interval(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.arp_interval);
 }
-static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(arp_interval, 0644,
 		   bonding_show_arp_interval, bonding_sysfs_store_option);
 
 /* Show the arp targets. */
@@ -312,7 +311,7 @@ static ssize_t bonding_show_arp_targets(struct device *d,
 
 	return res;
 }
-static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(arp_ip_target, 0644,
 		   bonding_show_arp_targets, bonding_sysfs_store_option);
 
 /* Show the up and down delays. */
@@ -324,7 +323,7 @@ static ssize_t bonding_show_downdelay(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);
 }
-static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(downdelay, 0644,
 		   bonding_show_downdelay, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_updelay(struct device *d,
@@ -336,7 +335,7 @@ static ssize_t bonding_show_updelay(struct device *d,
 	return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);
 
 }
-static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(updelay, 0644,
 		   bonding_show_updelay, bonding_sysfs_store_option);
 
 /* Show the LACP interval. */
@@ -351,7 +350,7 @@ static ssize_t bonding_show_lacp(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
 }
-static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(lacp_rate, 0644,
 		   bonding_show_lacp, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_min_links(struct device *d,
@@ -362,7 +361,7 @@ static ssize_t bonding_show_min_links(struct device *d,
 
 	return sprintf(buf, "%u\n", bond->params.min_links);
 }
-static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(min_links, 0644,
 		   bonding_show_min_links, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_select(struct device *d,
@@ -376,7 +375,7 @@ static ssize_t bonding_show_ad_select(struct device *d,
 
 	return sprintf(buf, "%s %d\n", val->string, bond->params.ad_select);
 }
-static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_select, 0644,
 		   bonding_show_ad_select, bonding_sysfs_store_option);
 
 /* Show the number of peer notifications to send after a failover event. */
@@ -387,9 +386,9 @@ static ssize_t bonding_show_num_peer_notif(struct device *d,
 	struct bonding *bond = to_bond(d);
 	return sprintf(buf, "%d\n", bond->params.num_peer_notif);
 }
-static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(num_grat_arp, 0644,
 		   bonding_show_num_peer_notif, bonding_sysfs_store_option);
-static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(num_unsol_na, 0644,
 		   bonding_show_num_peer_notif, bonding_sysfs_store_option);
 
 /* Show the MII monitor interval. */
@@ -401,7 +400,7 @@ static ssize_t bonding_show_miimon(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.miimon);
 }
-static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(miimon, 0644,
 		   bonding_show_miimon, bonding_sysfs_store_option);
 
 /* Show the primary slave. */
@@ -421,7 +420,7 @@ static ssize_t bonding_show_primary(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(primary, 0644,
 		   bonding_show_primary, bonding_sysfs_store_option);
 
 /* Show the primary_reselect flag. */
@@ -438,7 +437,7 @@ static ssize_t bonding_show_primary_reselect(struct device *d,
 	return sprintf(buf, "%s %d\n",
 		       val->string, bond->params.primary_reselect);
 }
-static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(primary_reselect, 0644,
 		   bonding_show_primary_reselect, bonding_sysfs_store_option);
 
 /* Show the use_carrier flag. */
@@ -450,7 +449,7 @@ static ssize_t bonding_show_carrier(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.use_carrier);
 }
-static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(use_carrier, 0644,
 		   bonding_show_carrier, bonding_sysfs_store_option);
 
 
@@ -471,7 +470,7 @@ static ssize_t bonding_show_active_slave(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(active_slave, 0644,
 		   bonding_show_active_slave, bonding_sysfs_store_option);
 
 /* Show link status of the bond interface. */
@@ -484,7 +483,7 @@ static ssize_t bonding_show_mii_status(struct device *d,
 
 	return sprintf(buf, "%s\n", active ? "up" : "down");
 }
-static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL);
+static DEVICE_ATTR(mii_status, 0444, bonding_show_mii_status, NULL);
 
 /* Show current 802.3ad aggregator ID. */
 static ssize_t bonding_show_ad_aggregator(struct device *d,
@@ -503,7 +502,7 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL);
+static DEVICE_ATTR(ad_aggregator, 0444, bonding_show_ad_aggregator, NULL);
 
 
 /* Show number of active 802.3ad ports. */
@@ -523,7 +522,7 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL);
+static DEVICE_ATTR(ad_num_ports, 0444, bonding_show_ad_num_ports, NULL);
 
 
 /* Show current 802.3ad actor key. */
@@ -543,7 +542,7 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL);
+static DEVICE_ATTR(ad_actor_key, 0444, bonding_show_ad_actor_key, NULL);
 
 
 /* Show current 802.3ad partner key. */
@@ -563,7 +562,7 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL);
+static DEVICE_ATTR(ad_partner_key, 0444, bonding_show_ad_partner_key, NULL);
 
 
 /* Show current 802.3ad partner mac. */
@@ -582,7 +581,7 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,
 
 	return count;
 }
-static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL);
+static DEVICE_ATTR(ad_partner_mac, 0444, bonding_show_ad_partner_mac, NULL);
 
 /* Show the queue_ids of the slaves in the current bond. */
 static ssize_t bonding_show_queue_id(struct device *d,
@@ -615,7 +614,7 @@ static ssize_t bonding_show_queue_id(struct device *d,
 
 	return res;
 }
-static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id,
+static DEVICE_ATTR(queue_id, 0644, bonding_show_queue_id,
 		   bonding_sysfs_store_option);
 
 
@@ -628,7 +627,7 @@ static ssize_t bonding_show_slaves_active(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.all_slaves_active);
 }
-static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(all_slaves_active, 0644,
 		   bonding_show_slaves_active, bonding_sysfs_store_option);
 
 /* Show the number of IGMP membership reports to send on link failure */
@@ -640,7 +639,7 @@ static ssize_t bonding_show_resend_igmp(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.resend_igmp);
 }
-static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(resend_igmp, 0644,
 		   bonding_show_resend_igmp, bonding_sysfs_store_option);
 
 
@@ -652,7 +651,7 @@ static ssize_t bonding_show_lp_interval(struct device *d,
 
 	return sprintf(buf, "%d\n", bond->params.lp_interval);
 }
-static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(lp_interval, 0644,
 		   bonding_show_lp_interval, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
@@ -662,7 +661,7 @@ static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
 	struct bonding *bond = to_bond(d);
 	return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
 }
-static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(tlb_dynamic_lb, 0644,
 		   bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_packets_per_slave(struct device *d,
@@ -674,7 +673,7 @@ static ssize_t bonding_show_packets_per_slave(struct device *d,
 
 	return sprintf(buf, "%u\n", packets_per_slave);
 }
-static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(packets_per_slave, 0644,
 		   bonding_show_packets_per_slave, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_actor_sys_prio(struct device *d,
@@ -688,7 +687,7 @@ static ssize_t bonding_show_ad_actor_sys_prio(struct device *d,
 
 	return 0;
 }
-static DEVICE_ATTR(ad_actor_sys_prio, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_actor_sys_prio, 0644,
 		   bonding_show_ad_actor_sys_prio, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_actor_system(struct device *d,
@@ -703,7 +702,7 @@ static ssize_t bonding_show_ad_actor_system(struct device *d,
 	return 0;
 }
 
-static DEVICE_ATTR(ad_actor_system, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_actor_system, 0644,
 		   bonding_show_ad_actor_system, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_ad_user_port_key(struct device *d,
@@ -717,7 +716,7 @@ static ssize_t bonding_show_ad_user_port_key(struct device *d,
 
 	return 0;
 }
-static DEVICE_ATTR(ad_user_port_key, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(ad_user_port_key, 0644,
 		   bonding_show_ad_user_port_key, bonding_sysfs_store_option);
 
 static struct attribute *per_bond_attrs[] = {
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 7d16c51e6913..2f120b2ffef0 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -25,8 +25,8 @@ const struct slave_attribute slave_attr_##_name = {		\
 		 .mode = _mode },				\
 	.show	= _show,					\
 };
-#define SLAVE_ATTR_RO(_name) \
-	SLAVE_ATTR(_name, S_IRUGO, _name##_show)
+#define SLAVE_ATTR_RO(_name)					\
+	SLAVE_ATTR(_name, 0444, _name##_show)
 
 static ssize_t state_show(struct slave *slave, char *buf)
 {
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 709838e4c062..a0f954f36c09 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -40,20 +40,20 @@ static LIST_HEAD(ser_list);
 static LIST_HEAD(ser_release_list);
 
 static bool ser_loop;
-module_param(ser_loop, bool, S_IRUGO);
+module_param(ser_loop, bool, 0444);
 MODULE_PARM_DESC(ser_loop, "Run in simulated loopback mode.");
 
 static bool ser_use_stx = true;
-module_param(ser_use_stx, bool, S_IRUGO);
+module_param(ser_use_stx, bool, 0444);
 MODULE_PARM_DESC(ser_use_stx, "STX enabled or not.");
 
 static bool ser_use_fcs = true;
 
-module_param(ser_use_fcs, bool, S_IRUGO);
+module_param(ser_use_fcs, bool, 0444);
 MODULE_PARM_DESC(ser_use_fcs, "FCS enabled or not.");
 
 static int ser_write_chunk = MAX_WRITE_CHUNK;
-module_param(ser_write_chunk, int, S_IRUGO);
+module_param(ser_write_chunk, int, 0444);
 
 MODULE_PARM_DESC(ser_write_chunk, "Maximum size of data written to UART.");
 
@@ -97,21 +97,21 @@ static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
 	ser->debugfs_tty_dir =
 			debugfs_create_dir(tty->name, debugfsdir);
 	if (!IS_ERR(ser->debugfs_tty_dir)) {
-		debugfs_create_blob("last_tx_msg", S_IRUSR,
-				ser->debugfs_tty_dir,
-				&ser->tx_blob);
+		debugfs_create_blob("last_tx_msg", 0400,
+				    ser->debugfs_tty_dir,
+				    &ser->tx_blob);
 
-		debugfs_create_blob("last_rx_msg", S_IRUSR,
-				ser->debugfs_tty_dir,
-				&ser->rx_blob);
+		debugfs_create_blob("last_rx_msg", 0400,
+				    ser->debugfs_tty_dir,
+				    &ser->rx_blob);
 
-		debugfs_create_x32("ser_state", S_IRUSR,
-				ser->debugfs_tty_dir,
-				(u32 *)&ser->state);
+		debugfs_create_x32("ser_state", 0400,
+				   ser->debugfs_tty_dir,
+				   (u32 *)&ser->state);
 
-		debugfs_create_x8("tty_status", S_IRUSR,
-				ser->debugfs_tty_dir,
-				&ser->tty_status);
+		debugfs_create_x8("tty_status", 0400,
+				  ser->debugfs_tty_dir,
+				  &ser->tty_status);
 
 	}
 	ser->tx_blob.data = ser->tx_data;
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index 980eace53d44..d28a1398c091 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -35,27 +35,27 @@ MODULE_DESCRIPTION("CAIF SPI driver");
 #define PAD_POW2(x, pow) ((((x)&((pow)-1))==0) ? 0 : (((pow)-((x)&((pow)-1)))))
 
 static bool spi_loop;
-module_param(spi_loop, bool, S_IRUGO);
+module_param(spi_loop, bool, 0444);
 MODULE_PARM_DESC(spi_loop, "SPI running in loopback mode.");
 
 /* SPI frame alignment. */
-module_param(spi_frm_align, int, S_IRUGO);
+module_param(spi_frm_align, int, 0444);
 MODULE_PARM_DESC(spi_frm_align, "SPI frame alignment.");
 
 /*
  * SPI padding options.
  * Warning: must be a base of 2 (& operation used) and can not be zero !
  */
-module_param(spi_up_head_align, int, S_IRUGO);
+module_param(spi_up_head_align, int, 0444);
 MODULE_PARM_DESC(spi_up_head_align, "SPI uplink head alignment.");
 
-module_param(spi_up_tail_align, int, S_IRUGO);
+module_param(spi_up_tail_align, int, 0444);
 MODULE_PARM_DESC(spi_up_tail_align, "SPI uplink tail alignment.");
 
-module_param(spi_down_head_align, int, S_IRUGO);
+module_param(spi_down_head_align, int, 0444);
 MODULE_PARM_DESC(spi_down_head_align, "SPI downlink head alignment.");
 
-module_param(spi_down_tail_align, int, S_IRUGO);
+module_param(spi_down_tail_align, int, 0444);
 MODULE_PARM_DESC(spi_down_tail_align, "SPI downlink tail alignment.");
 
 #ifdef CONFIG_ARM
@@ -250,10 +250,10 @@ static const struct file_operations dbgfs_frame_fops = {
 static inline void dev_debugfs_add(struct cfspi *cfspi)
 {
 	cfspi->dbgfs_dir = debugfs_create_dir(cfspi->pdev->name, dbgfs_root);
-	cfspi->dbgfs_state = debugfs_create_file("state", S_IRUGO,
+	cfspi->dbgfs_state = debugfs_create_file("state", 0444,
 						 cfspi->dbgfs_dir, cfspi,
 						 &dbgfs_state_fops);
-	cfspi->dbgfs_frame = debugfs_create_file("frame", S_IRUGO,
+	cfspi->dbgfs_frame = debugfs_create_file("frame", 0444,
 						 cfspi->dbgfs_dir, cfspi,
 						 &dbgfs_frame_fops);
 }
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index c3d104feee13..2814e0dee4bb 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -629,21 +629,21 @@ static inline void debugfs_init(struct cfv_info *cfv)
 	if (IS_ERR(cfv->debugfs))
 		return;
 
-	debugfs_create_u32("rx-napi-complete", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-napi-complete", 0400, cfv->debugfs,
 			   &cfv->stats.rx_napi_complete);
-	debugfs_create_u32("rx-napi-resched", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-napi-resched", 0400, cfv->debugfs,
 			   &cfv->stats.rx_napi_resched);
-	debugfs_create_u32("rx-nomem", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-nomem", 0400, cfv->debugfs,
 			   &cfv->stats.rx_nomem);
-	debugfs_create_u32("rx-kicks", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("rx-kicks", 0400, cfv->debugfs,
 			   &cfv->stats.rx_kicks);
-	debugfs_create_u32("tx-full-ring", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-full-ring", 0400, cfv->debugfs,
 			   &cfv->stats.tx_full_ring);
-	debugfs_create_u32("tx-no-mem", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-no-mem", 0400, cfv->debugfs,
 			   &cfv->stats.tx_no_mem);
-	debugfs_create_u32("tx-kicks", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-kicks", 0400, cfv->debugfs,
 			   &cfv->stats.tx_kicks);
-	debugfs_create_u32("tx-flow-on", S_IRUSR, cfv->debugfs,
+	debugfs_create_u32("tx-flow-on", 0400, cfv->debugfs,
 			   &cfv->stats.tx_flow_on);
 }
 
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index f37ce0e1b603..d98c69045b17 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1224,8 +1224,7 @@ static ssize_t at91_sysfs_set_mb0_id(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(mb0_id, S_IWUSR | S_IRUGO,
-	at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id);
+static DEVICE_ATTR(mb0_id, 0644, at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id);
 
 static struct attribute *at91_sysfs_attrs[] = {
 	&dev_attr_mb0_id.attr,
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
index 6da69af103e6..d4dd4da23997 100644
--- a/drivers/net/can/cc770/cc770.c
+++ b/drivers/net/can/cc770/cc770.c
@@ -67,12 +67,12 @@ MODULE_DESCRIPTION(KBUILD_MODNAME "CAN netdevice driver");
  * otherwise 11 bit SFF messages.
  */
 static int msgobj15_eff;
-module_param(msgobj15_eff, int, S_IRUGO);
+module_param(msgobj15_eff, int, 0444);
 MODULE_PARM_DESC(msgobj15_eff, "Extended 29-bit frames for message object 15 "
 		 "(default: 11-bit standard frames)");
 
 static int i82527_compat;
-module_param(i82527_compat, int, S_IRUGO);
+module_param(i82527_compat, int, 0444);
 MODULE_PARM_DESC(i82527_compat, "Strict Intel 82527 comptibility mode "
 		 "without using additional functions");
 
diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c
index 3a30fd3b4498..fcd34698074f 100644
--- a/drivers/net/can/cc770/cc770_isa.c
+++ b/drivers/net/can/cc770/cc770_isa.c
@@ -82,29 +82,29 @@ static u8 cor[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static u8 bcr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 
-module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, 0444);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ number");
 
-module_param_array(clk, int, NULL, S_IRUGO);
+module_param_array(clk, int, NULL, 0444);
 MODULE_PARM_DESC(clk, "External oscillator clock frequency "
 		 "(default=16000000 [16 MHz])");
 
-module_param_array(cir, byte, NULL, S_IRUGO);
+module_param_array(cir, byte, NULL, 0444);
 MODULE_PARM_DESC(cir, "CPU interface register (default=0x40 [DSC])");
 
-module_param_array(cor, byte, NULL, S_IRUGO);
+module_param_array(cor, byte, NULL, 0444);
 MODULE_PARM_DESC(cor, "Clockout register (default=0x00)");
 
-module_param_array(bcr, byte, NULL, S_IRUGO);
+module_param_array(bcr, byte, NULL, 0444);
 MODULE_PARM_DESC(bcr, "Bus configuration register (default=0x40 [CBY])");
 
 #define CC770_IOSIZE          0x20
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index 897c6b113d3f..2d3046afa80d 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -1484,7 +1484,7 @@ static netdev_tx_t grcan_start_xmit(struct sk_buff *skb,
 		}							\
 	}								\
 	module_param_named(name, grcan_module_config.name,		\
-			   mtype, S_IRUGO);				\
+			   mtype, 0444);				\
 	MODULE_PARM_DESC(name, desc)
 
 #define GRCAN_CONFIG_ATTR(name, desc)					\
@@ -1513,7 +1513,7 @@ static netdev_tx_t grcan_start_xmit(struct sk_buff *skb,
 		struct grcan_priv *priv = netdev_priv(dev);		\
 		return sprintf(buf, "%d\n", priv->config.name);		\
 	}								\
-	static DEVICE_ATTR(name, S_IRUGO | S_IWUSR,			\
+	static DEVICE_ATTR(name, 0644,					\
 			   grcan_show_##name,				\
 			   grcan_store_##name);				\
 	GRCAN_MODULE_PARAM(name, ushort, GRCAN_NOT_BOOL, desc)
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index 12a53c8e8e1d..adfdb66a486e 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1865,9 +1865,9 @@ static ssize_t ican3_sysfs_show_fwinfo(struct device *dev,
 	return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo);
 }
 
-static DEVICE_ATTR(termination, S_IWUSR | S_IRUGO, ican3_sysfs_show_term,
-						   ican3_sysfs_set_term);
-static DEVICE_ATTR(fwinfo, S_IRUSR | S_IRUGO, ican3_sysfs_show_fwinfo, NULL);
+static DEVICE_ATTR(termination, 0644, ican3_sysfs_show_term,
+		   ican3_sysfs_set_term);
+static DEVICE_ATTR(fwinfo, 0444, ican3_sysfs_show_fwinfo, NULL);
 
 static struct attribute *ican3_sysfs_attrs[] = {
 	&dev_attr_termination.attr,
diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
index a89c1e92554d..1a2ae6ce8d87 100644
--- a/drivers/net/can/sja1000/sja1000_isa.c
+++ b/drivers/net/can/sja1000/sja1000_isa.c
@@ -48,27 +48,27 @@ static unsigned char ocr[MAXDEV] = {[0 ... (MAXDEV - 1)] = 0xff};
 static int indirect[MAXDEV] = {[0 ... (MAXDEV - 1)] = -1};
 static spinlock_t indirect_lock[MAXDEV];  /* lock for indirect access mode */
 
-module_param_hw_array(port, ulong, ioport, NULL, S_IRUGO);
+module_param_hw_array(port, ulong, ioport, NULL, 0444);
 MODULE_PARM_DESC(port, "I/O port number");
 
-module_param_hw_array(mem, ulong, iomem, NULL, S_IRUGO);
+module_param_hw_array(mem, ulong, iomem, NULL, 0444);
 MODULE_PARM_DESC(mem, "I/O memory address");
 
-module_param_hw_array(indirect, int, ioport, NULL, S_IRUGO);
+module_param_hw_array(indirect, int, ioport, NULL, 0444);
 MODULE_PARM_DESC(indirect, "Indirect access via address and data port");
 
-module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, 0444);
 MODULE_PARM_DESC(irq, "IRQ number");
 
-module_param_array(clk, int, NULL, S_IRUGO);
+module_param_array(clk, int, NULL, 0444);
 MODULE_PARM_DESC(clk, "External oscillator clock frequency "
 		 "(default=16000000 [16 MHz])");
 
-module_param_array(cdr, byte, NULL, S_IRUGO);
+module_param_array(cdr, byte, NULL, 0444);
 MODULE_PARM_DESC(cdr, "Clock divider register "
 		 "(default=0x48 [CDR_CBP | CDR_CLK_OFF])");
 
-module_param_array(ocr, byte, NULL, S_IRUGO);
+module_param_array(ocr, byte, NULL, 0444);
 MODULE_PARM_DESC(ocr, "Output control register "
 		 "(default=0x18 [OCR_TX0_PUSHPULL])");
 
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 5f64deec9f6c..e22696190583 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -601,8 +601,8 @@ static ssize_t store_output(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static const DEVICE_ATTR(chip, S_IRUGO, show_chip, NULL);
-static const DEVICE_ATTR(output, S_IRUGO | S_IWUSR, show_output, store_output);
+static const DEVICE_ATTR(chip, 0444, show_chip, NULL);
+static const DEVICE_ATTR(output, 0644, show_output, store_output);
 
 static const struct attribute *const netdev_sysfs_attrs[] = {
 	&dev_attr_chip.attr,
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 98d118b3aaf4..e90817608645 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -220,7 +220,7 @@
 #define DEVICE_NAME "mcp251x"
 
 static int mcp251x_enable_dma; /* Enable SPI DMA. Default: 0 (Off) */
-module_param(mcp251x_enable_dma, int, S_IRUGO);
+module_param(mcp251x_enable_dma, int, 0444);
 MODULE_PARM_DESC(mcp251x_enable_dma, "Enable SPI DMA. Default: 0 (Off)");
 
 static const struct can_bittiming_const mcp251x_bittiming_const = {
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index c6dcf93675c0..5820fd5b69b5 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -496,7 +496,7 @@ static ssize_t show_firmware(struct device *d,
 		       (dev->version >> 8) & 0xf,
 		       dev->version & 0xff);
 }
-static DEVICE_ATTR(firmware, S_IRUGO, show_firmware, NULL);
+static DEVICE_ATTR(firmware, 0444, show_firmware, NULL);
 
 static ssize_t show_hardware(struct device *d,
 			     struct device_attribute *attr, char *buf)
@@ -509,7 +509,7 @@ static ssize_t show_hardware(struct device *d,
 		       (dev->version >> 24) & 0xf,
 		       (dev->version >> 16) & 0xff);
 }
-static DEVICE_ATTR(hardware, S_IRUGO, show_hardware, NULL);
+static DEVICE_ATTR(hardware, 0444, show_hardware, NULL);
 
 static ssize_t show_nets(struct device *d,
 			 struct device_attribute *attr, char *buf)
@@ -519,7 +519,7 @@ static ssize_t show_nets(struct device *d,
 
 	return sprintf(buf, "%d", dev->net_count);
 }
-static DEVICE_ATTR(nets, S_IRUGO, show_nets, NULL);
+static DEVICE_ATTR(nets, 0444, show_nets, NULL);
 
 static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg)
 {
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index c2b04f505e16..d200a5b0651c 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -65,7 +65,7 @@ MODULE_ALIAS_RTNL_LINK(DRV_NAME);
  */
 
 static bool echo; /* echo testing. Default: 0 (Off) */
-module_param(echo, bool, S_IRUGO);
+module_param(echo, bool, 0444);
 MODULE_PARM_DESC(echo, "Echo sent frames (for testing). Default: 0 (Off)");
 
 
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index 78a6414c5fd9..dfabbae72efd 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -590,8 +590,7 @@ static int bpq_device_event(struct notifier_block *this,
 static int __init bpq_init_driver(void)
 {
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("bpqether", S_IRUGO, init_net.proc_net,
-			 &bpq_info_fops)) {
+	if (!proc_create("bpqether", 0444, init_net.proc_net, &bpq_info_fops)) {
 		printk(KERN_ERR
 			"bpq: cannot create /proc/net/bpqether entry.\n");
 		return -ENOENT;
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 14c3632b8cde..83034eb7ed4f 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -1168,7 +1168,7 @@ static int __init yam_init_driver(void)
 	yam_timer.expires = jiffies + HZ / 100;
 	add_timer(&yam_timer);
 
-	proc_create("yam", S_IRUGO, init_net.proc_net, &yam_info_fops);
+	proc_create("yam", 0444, init_net.proc_net, &yam_info_fops);
 	return 0;
  error:
 	while (--i >= 0) {
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5d716750ae22..ecc84954c511 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -55,7 +55,7 @@
 #define VF_TAKEOVER_INT (HZ / 10)
 
 static unsigned int ring_size __ro_after_init = 128;
-module_param(ring_size, uint, S_IRUGO);
+module_param(ring_size, uint, 0444);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
 unsigned int netvsc_ring_bytes __ro_after_init;
 struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
@@ -66,7 +66,7 @@ static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				NETIF_MSG_TX_ERR;
 
 static int debug = -1;
-module_param(debug, int, S_IRUGO);
+module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 
 static void netvsc_change_rx_flags(struct net_device *net, int change)
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 548d9d026a85..77abedf0b524 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1661,7 +1661,7 @@ static int at86rf230_debugfs_init(struct at86rf230_local *lp)
 	if (!at86rf230_debugfs_root)
 		return -ENOMEM;
 
-	stats = debugfs_create_file("trac_stats", S_IRUGO,
+	stats = debugfs_create_file("trac_stats", 0444,
 				    at86rf230_debugfs_root, lp,
 				    &at86rf230_stats_fops);
 	if (!stats)
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 1e2d4f1179da..f17b3441779b 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -417,7 +417,7 @@ static void ks8995_parse_dt(struct ks8995_switch *ks)
 static const struct bin_attribute ks8995_registers_attr = {
 	.attr = {
 		.name   = "registers",
-		.mode   = S_IRUSR | S_IWUSR,
+		.mode   = 0600,
 	},
 	.size   = KS8995_REGS_SIZE,
 	.read   = ks8995_registers_read,
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 926c2c322d43..22fcff3c7a9a 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1687,7 +1687,7 @@ ppp_push(struct ppp *ppp)
 
 #ifdef CONFIG_PPP_MULTILINK
 static bool mp_protocol_compress __read_mostly = true;
-module_param(mp_protocol_compress, bool, S_IRUGO | S_IWUSR);
+module_param(mp_protocol_compress, bool, 0644);
 MODULE_PARM_DESC(mp_protocol_compress,
 		 "compress protocol id in multilink fragments");
 
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index c10e6181a2f0..f9552a400271 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1142,7 +1142,7 @@ static __net_init int pppoe_init_net(struct net *net)
 
 	rwlock_init(&pn->hash_lock);
 
-	pde = proc_create("pppoe", S_IRUGO, net->proc_net, &pppoe_seq_fops);
+	pde = proc_create("pppoe", 0444, net->proc_net, &pppoe_seq_fops);
 #ifdef CONFIG_PROC_FS
 	if (!pde)
 		return -ENOMEM;
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9e1b74590682..90d07ed224d5 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -58,7 +58,7 @@ static bool prefer_mbim = true;
 #else
 static bool prefer_mbim;
 #endif
-module_param(prefer_mbim, bool, S_IRUGO | S_IWUSR);
+module_param(prefer_mbim, bool, 0644);
 MODULE_PARM_DESC(prefer_mbim, "Prefer MBIM setting on dual NCM/MBIM functions");
 
 static void cdc_ncm_txpath_bh(unsigned long param);
@@ -281,10 +281,10 @@ static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d,  struct device_att
 	return len;
 }
 
-static DEVICE_ATTR(min_tx_pkt, S_IRUGO | S_IWUSR, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
-static DEVICE_ATTR(rx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
-static DEVICE_ATTR(tx_max, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
-static DEVICE_ATTR(tx_timer_usecs, S_IRUGO | S_IWUSR, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
+static DEVICE_ATTR(min_tx_pkt, 0644, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
+static DEVICE_ATTR(rx_max, 0644, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
+static DEVICE_ATTR(tx_max, 0644, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
+static DEVICE_ATTR(tx_timer_usecs, 0644, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
 
 static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf)
 {
@@ -335,7 +335,7 @@ static ssize_t cdc_ncm_show_##name(struct device *d, struct device_attribute *at
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; \
 	return sprintf(buf, format "\n", tocpu(ctx->ncm_parm.name));	\
 } \
-static DEVICE_ATTR(name, S_IRUGO, cdc_ncm_show_##name, NULL)
+static DEVICE_ATTR(name, 0444, cdc_ncm_show_##name, NULL)
 
 NCM_PARM_ATTR(bmNtbFormatsSupported, "0x%04x", le16_to_cpu);
 NCM_PARM_ATTR(dwNtbInMaxSize, "%u", le32_to_cpu);
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 981c931a7a1f..e53883ad6107 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -519,7 +519,7 @@ static ssize_t hso_sysfs_show_porttype(struct device *dev,
 
 	return sprintf(buf, "%s\n", port_name);
 }
-static DEVICE_ATTR(hsotype, S_IRUGO, hso_sysfs_show_porttype, NULL);
+static DEVICE_ATTR(hsotype, 0444, hso_sysfs_show_porttype, NULL);
 
 static struct attribute *hso_serial_dev_attrs[] = {
 	&dev_attr_hsotype.attr,
@@ -3289,12 +3289,12 @@ MODULE_LICENSE("GPL");
 
 /* change the debug level (eg: insmod hso.ko debug=0x04) */
 MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 
 /* set the major tty number (eg: insmod hso.ko tty_major=245) */
 MODULE_PARM_DESC(tty_major, "Set the major tty number");
-module_param(tty_major, int, S_IRUGO | S_IWUSR);
+module_param(tty_major, int, 0644);
 
 /* disable network interface (eg: insmod hso.ko disable_net=1) */
 MODULE_PARM_DESC(disable_net, "Disable the network interface");
-module_param(disable_net, int, S_IRUGO | S_IWUSR);
+module_param(disable_net, int, 0644);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index a56d3eab35dd..e1aef253601e 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -224,7 +224,7 @@ static void xenvif_debugfs_addif(struct xenvif *vif)
 
 			snprintf(filename, sizeof(filename), "io_ring_q%d", i);
 			pfile = debugfs_create_file(filename,
-						    S_IRUSR | S_IWUSR,
+						    0600,
 						    vif->xenvif_dbg_root,
 						    &vif->queues[i],
 						    &xenvif_dbg_io_ring_ops_fops);
@@ -235,7 +235,7 @@ static void xenvif_debugfs_addif(struct xenvif *vif)
 
 		if (vif->ctrl_irq) {
 			pfile = debugfs_create_file("ctrl",
-						    S_IRUSR,
+						    0400,
 						    vif->xenvif_dbg_root,
 						    vif,
 						    &xenvif_dbg_ctrl_ops_fops);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 3127bc8633ca..4dd0668003e7 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2113,9 +2113,9 @@ static ssize_t store_rxbuf(struct device *dev,
 	return len;
 }
 
-static DEVICE_ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
-static DEVICE_ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
-static DEVICE_ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL);
+static DEVICE_ATTR(rxbuf_min, 0644, show_rxbuf, store_rxbuf);
+static DEVICE_ATTR(rxbuf_max, 0644, show_rxbuf, store_rxbuf);
+static DEVICE_ATTR(rxbuf_cur, 0444, show_rxbuf, NULL);
 
 static struct attribute *xennet_dev_attrs[] = {
 	&dev_attr_rxbuf_min.attr,

From d3757ba4c1421d2ad00d2bf97119005e37ad2902 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 23 Mar 2018 16:34:44 -0700
Subject: [PATCH 1211/1678] ethernet: Use octal not symbolic permissions

Prefer the direct use of octal for permissions.

Done with checkpatch -f --types=SYMBOLIC_PERMS --fix-inplace
and some typing.

Miscellanea:

o Whitespace neatening around these conversions.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/apne.c              |   2 +-
 drivers/net/ethernet/8390/lib8390.c           |   2 +-
 drivers/net/ethernet/8390/ne.c                |   2 +-
 drivers/net/ethernet/8390/ne2k-pci.c          |   2 +-
 drivers/net/ethernet/8390/smc-ultra.c         |   2 +-
 drivers/net/ethernet/8390/stnic.c             |   2 +-
 drivers/net/ethernet/8390/wd.c                |   2 +-
 drivers/net/ethernet/altera/altera_tse_main.c |   6 +-
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c      |  10 +-
 drivers/net/ethernet/amd/xgbe/xgbe-main.c     |   2 +-
 drivers/net/ethernet/broadcom/bnx2.c          |   2 +-
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  |  12 +-
 drivers/net/ethernet/broadcom/sb1250-mac.c    |  10 +-
 drivers/net/ethernet/broadcom/tg3.c           |   6 +-
 drivers/net/ethernet/brocade/bna/bnad.c       |   2 +-
 .../net/ethernet/brocade/bna/bnad_debugfs.c   |  10 +-
 .../net/ethernet/cavium/thunder/nicvf_main.c  |   2 +-
 .../net/ethernet/chelsio/cxgb3/cxgb3_main.c   |   6 +-
 .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c    | 112 +++++++++---------
 .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c   |  10 +-
 drivers/net/ethernet/ec_bhf.c                 |   2 +-
 drivers/net/ethernet/emulex/benet/be_main.c   |   6 +-
 drivers/net/ethernet/ibm/ehea/ehea_main.c     |   7 +-
 drivers/net/ethernet/ibm/ibmveth.c            |   2 +-
 drivers/net/ethernet/intel/igb/igb_hwmon.c    |   2 +-
 .../net/ethernet/intel/ixgbe/ixgbe_sysfs.c    |   2 +-
 drivers/net/ethernet/marvell/mvneta.c         |   8 +-
 drivers/net/ethernet/marvell/skge.c           |   2 +-
 drivers/net/ethernet/marvell/sky2.c           |   2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c     |  16 +--
 .../net/ethernet/mellanox/mlxsw/core_hwmon.c  |  10 +-
 .../net/ethernet/myricom/myri10ge/myri10ge.c  |  32 ++---
 .../ethernet/netronome/nfp/nfp_net_debugfs.c  |   6 +-
 .../ethernet/qlogic/netxen/netxen_nic_main.c  |  14 +--
 .../net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c |  30 ++---
 drivers/net/ethernet/qualcomm/qca_debug.c     |   2 +-
 .../net/ethernet/samsung/sxgbe/sxgbe_main.c   |   4 +-
 drivers/net/ethernet/sfc/mcdi_mon.c           |   2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  26 ++--
 drivers/net/ethernet/sun/niu.c                |  10 +-
 40 files changed, 194 insertions(+), 195 deletions(-)

diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c
index c56ac9ebc08f..fe6c834c422e 100644
--- a/drivers/net/ethernet/8390/apne.c
+++ b/drivers/net/ethernet/8390/apne.c
@@ -117,7 +117,7 @@ static const char version[] =
 static int apne_owned;	/* signal if card already owned */
 
 static u32 apne_msg_enable;
-module_param_named(msg_enable, apne_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, apne_msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 struct net_device * __init apne_probe(int unit)
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index 5d9bbde9fe68..c9c55c9eab9f 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -113,7 +113,7 @@ static void __NS8390_init(struct net_device *dev, int startp);
 
 static unsigned version_printed;
 static u32 msg_enable;
-module_param(msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param(msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 /*
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 4cdff6e6af89..99a2453eb34f 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -77,7 +77,7 @@ static u32 ne_msg_enable;
 module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(bad, int, NULL, 0);
-module_param_named(msg_enable, ne_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, ne_msg_enable, uint, 0444);
 MODULE_PARM_DESC(io, "I/O base address(es),required");
 MODULE_PARM_DESC(irq, "IRQ number(s)");
 MODULE_PARM_DESC(bad, "Accept card(s) with bad signatures");
diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c
index 1bdea746926c..42985a82321a 100644
--- a/drivers/net/ethernet/8390/ne2k-pci.c
+++ b/drivers/net/ethernet/8390/ne2k-pci.c
@@ -76,7 +76,7 @@ MODULE_AUTHOR("Donald Becker / Paul Gortmaker");
 MODULE_DESCRIPTION("PCI NE2000 clone driver");
 MODULE_LICENSE("GPL");
 
-module_param_named(msg_enable, ne2k_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, ne2k_msg_enable, uint, 0444);
 module_param_array(options, int, NULL, 0);
 module_param_array(full_duplex, int, NULL, 0);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 4e02f6a23575..3fe3b4dfa7c5 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -563,7 +563,7 @@ static int irq[MAX_ULTRA_CARDS];
 
 module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_named(msg_enable, ultra_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, ultra_msg_enable, uint, 0444);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (assigned)");
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c
index aca957d4e121..1f0670cd3ea3 100644
--- a/drivers/net/ethernet/8390/stnic.c
+++ b/drivers/net/ethernet/8390/stnic.c
@@ -71,7 +71,7 @@ static void stnic_init (struct net_device *dev);
 
 static u32 stnic_msg_enable;
 
-module_param_named(msg_enable, stnic_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, stnic_msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
 /* SH7750 specific read/write io. */
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index fb17c2c7e1dd..c834123560f1 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -507,7 +507,7 @@ module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_hw_array(mem, int, iomem, NULL, 0);
 module_param_hw_array(mem_end, int, iomem, NULL, 0);
-module_param_named(msg_enable, wd_msg_enable, uint, (S_IRUSR|S_IRGRP|S_IROTH));
+module_param_named(msg_enable, wd_msg_enable, uint, 0444);
 MODULE_PARM_DESC(io, "I/O base address(es)");
 MODULE_PARM_DESC(irq, "IRQ number(s) (ignored for PureData boards)");
 MODULE_PARM_DESC(mem, "memory base address(es)(ignored for PureData boards)");
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 527908c7e384..baca8f704a45 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -56,7 +56,7 @@
 static atomic_t instance_count = ATOMIC_INIT(~0);
 /* Module parameters */
 static int debug = -1;
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)");
 
 static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -65,12 +65,12 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 
 #define RX_DESCRIPTORS 64
 static int dma_rx_num = RX_DESCRIPTORS;
-module_param(dma_rx_num, int, S_IRUGO | S_IWUSR);
+module_param(dma_rx_num, int, 0644);
 MODULE_PARM_DESC(dma_rx_num, "Number of descriptors in the RX list");
 
 #define TX_DESCRIPTORS 64
 static int dma_tx_num = TX_DESCRIPTORS;
-module_param(dma_tx_num, int, S_IRUGO | S_IWUSR);
+module_param(dma_tx_num, int, 0644);
 MODULE_PARM_DESC(dma_tx_num, "Number of descriptors in the TX list");
 
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 100adee778df..7c204f05b418 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -137,21 +137,21 @@ static unsigned int ecc_ded_period = 600;
 
 #ifdef CONFIG_AMD_XGBE_HAVE_ECC
 /* Only expose the ECC parameters if supported */
-module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_sec_info_threshold, uint, 0644);
 MODULE_PARM_DESC(ecc_sec_info_threshold,
 		 " ECC corrected error informational threshold setting");
 
-module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_sec_warn_threshold, uint, 0644);
 MODULE_PARM_DESC(ecc_sec_warn_threshold,
 		 " ECC corrected error warning threshold setting");
 
-module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_sec_period, uint, 0644);
 MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)");
 
-module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_ded_threshold, uint, 0644);
 MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting");
 
-module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO);
+module_param(ecc_ded_period, uint, 0644);
 MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)");
 #endif
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index d91fa595be98..795e556d4a3f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -131,7 +131,7 @@ MODULE_VERSION(XGBE_DRV_VERSION);
 MODULE_DESCRIPTION(XGBE_DRV_DESC);
 
 static int debug = -1;
-module_param(debug, int, S_IWUSR | S_IRUGO);
+module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, " Network interface message level setting");
 
 static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 5e34b34f7740..9ffc4a8c5fc7 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -87,7 +87,7 @@ MODULE_FIRMWARE(FW_RV2P_FILE_09_Ax);
 
 static int disable_msi = 0;
 
-module_param(disable_msi, int, S_IRUGO);
+module_param(disable_msi, int, 0444);
 MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
 
 typedef enum {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index b8388e93520a..fd97250d6371 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -97,29 +97,29 @@ MODULE_FIRMWARE(FW_FILE_NAME_E1H);
 MODULE_FIRMWARE(FW_FILE_NAME_E2);
 
 int bnx2x_num_queues;
-module_param_named(num_queues, bnx2x_num_queues, int, S_IRUGO);
+module_param_named(num_queues, bnx2x_num_queues, int, 0444);
 MODULE_PARM_DESC(num_queues,
 		 " Set number of queues (default is as a number of CPUs)");
 
 static int disable_tpa;
-module_param(disable_tpa, int, S_IRUGO);
+module_param(disable_tpa, int, 0444);
 MODULE_PARM_DESC(disable_tpa, " Disable the TPA (LRO) feature");
 
 static int int_mode;
-module_param(int_mode, int, S_IRUGO);
+module_param(int_mode, int, 0444);
 MODULE_PARM_DESC(int_mode, " Force interrupt mode other than MSI-X "
 				"(1 INT#x; 2 MSI)");
 
 static int dropless_fc;
-module_param(dropless_fc, int, S_IRUGO);
+module_param(dropless_fc, int, 0444);
 MODULE_PARM_DESC(dropless_fc, " Pause on exhausted host ring");
 
 static int mrrs = -1;
-module_param(mrrs, int, S_IRUGO);
+module_param(mrrs, int, 0444);
 MODULE_PARM_DESC(mrrs, " Force Max Read Req Size (0..3) (for debug)");
 
 static int debug;
-module_param(debug, int, S_IRUGO);
+module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, " Default debug msglevel");
 
 static struct workqueue_struct *bnx2x_wq;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index ecdef42f0ae6..ef4a0c326736 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -63,24 +63,24 @@ MODULE_DESCRIPTION("Broadcom SiByte SOC GB Ethernet driver");
 
 /* 1 normal messages, 0 quiet .. 7 verbose. */
 static int debug = 1;
-module_param(debug, int, S_IRUGO);
+module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, "Debug messages");
 
 #ifdef CONFIG_SBMAC_COALESCE
 static int int_pktcnt_tx = 255;
-module_param(int_pktcnt_tx, int, S_IRUGO);
+module_param(int_pktcnt_tx, int, 0444);
 MODULE_PARM_DESC(int_pktcnt_tx, "TX packet count");
 
 static int int_timeout_tx = 255;
-module_param(int_timeout_tx, int, S_IRUGO);
+module_param(int_timeout_tx, int, 0444);
 MODULE_PARM_DESC(int_timeout_tx, "TX timeout value");
 
 static int int_pktcnt_rx = 64;
-module_param(int_pktcnt_rx, int, S_IRUGO);
+module_param(int_pktcnt_rx, int, 0444);
 MODULE_PARM_DESC(int_pktcnt_rx, "RX packet count");
 
 static int int_timeout_rx = 64;
-module_param(int_timeout_rx, int, S_IRUGO);
+module_param(int_timeout_rx, int, 0444);
 MODULE_PARM_DESC(int_timeout_rx, "RX timeout value");
 #endif
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index f2593978ae75..08bbb639be1a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -10799,11 +10799,11 @@ static ssize_t tg3_show_temp(struct device *dev,
 }
 
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, tg3_show_temp, NULL,
+static SENSOR_DEVICE_ATTR(temp1_input, 0444, tg3_show_temp, NULL,
 			  TG3_TEMP_SENSOR_OFFSET);
-static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, tg3_show_temp, NULL,
+static SENSOR_DEVICE_ATTR(temp1_crit, 0444, tg3_show_temp, NULL,
 			  TG3_TEMP_CAUTION_OFFSET);
-static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, tg3_show_temp, NULL,
+static SENSOR_DEVICE_ATTR(temp1_max, 0444, tg3_show_temp, NULL,
 			  TG3_TEMP_MAX_OFFSET);
 
 static struct attribute *tg3_attrs[] = {
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index a843076597ec..69cc3e0119d6 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -46,7 +46,7 @@ module_param(bnad_ioc_auto_recover, uint, 0444);
 MODULE_PARM_DESC(bnad_ioc_auto_recover, "Enable / Disable auto recovery");
 
 static uint bna_debugfs_enable = 1;
-module_param(bna_debugfs_enable, uint, S_IRUGO | S_IWUSR);
+module_param(bna_debugfs_enable, uint, 0644);
 MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1,"
 		 " Range[false:0|true:1]");
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index cebfe3bd086e..933799be0471 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -486,11 +486,11 @@ struct bnad_debugfs_entry {
 };
 
 static const struct bnad_debugfs_entry bnad_debugfs_files[] = {
-	{ "fwtrc",  S_IFREG|S_IRUGO, &bnad_debugfs_op_fwtrc, },
-	{ "fwsave", S_IFREG|S_IRUGO, &bnad_debugfs_op_fwsave, },
-	{ "regrd",  S_IFREG|S_IRUGO|S_IWUSR, &bnad_debugfs_op_regrd, },
-	{ "regwr",  S_IFREG|S_IWUSR, &bnad_debugfs_op_regwr, },
-	{ "drvinfo", S_IFREG|S_IRUGO, &bnad_debugfs_op_drvinfo, },
+	{ "fwtrc",  S_IFREG | 0444, &bnad_debugfs_op_fwtrc, },
+	{ "fwsave", S_IFREG | 0444, &bnad_debugfs_op_fwsave, },
+	{ "regrd",  S_IFREG | 0644, &bnad_debugfs_op_regrd, },
+	{ "regwr",  S_IFREG | 0200, &bnad_debugfs_op_regwr, },
+	{ "drvinfo", S_IFREG | 0444, &bnad_debugfs_op_drvinfo, },
 };
 
 static struct dentry *bna_debugfs_root;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7d9c5ffbd041..73fe3881414b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -63,7 +63,7 @@ module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Debug message level bitmap");
 
 static int cpi_alg = CPI_ALG_NONE;
-module_param(cpi_alg, int, S_IRUGO);
+module_param(cpi_alg, int, 0444);
 MODULE_PARM_DESC(cpi_alg,
 		 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 185fe8df7628..2edfdbdaae48 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -776,11 +776,11 @@ static ssize_t store_nservers(struct device *d, struct device_attribute *attr,
 
 #define CXGB3_ATTR_R(name, val_expr) \
 CXGB3_SHOW(name, val_expr) \
-static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
 #define CXGB3_ATTR_RW(name, val_expr, store_method) \
 CXGB3_SHOW(name, val_expr) \
-static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_method)
+static DEVICE_ATTR(name, 0644, show_##name, store_method)
 
 CXGB3_ATTR_R(cam_size, t3_mc5_size(&adap->mc5));
 CXGB3_ATTR_RW(nfilters, adap->params.mc5.nfilters, store_nfilters);
@@ -859,7 +859,7 @@ static ssize_t store_##name(struct device *d, struct device_attribute *attr, \
 { \
 	return tm_attr_store(d, buf, len, sched); \
 } \
-static DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_##name, store_##name)
+static DEVICE_ATTR(name, 0644, show_##name, store_##name)
 
 TM_ATTR(sched0, 0);
 TM_ATTR(sched1, 1);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index de2ba86eccfd..251d5bdc972f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2752,7 +2752,7 @@ DEFINE_SIMPLE_DEBUGFS_FILE(tid_info);
 static void add_debugfs_mem(struct adapter *adap, const char *name,
 			    unsigned int idx, unsigned int size_mb)
 {
-	debugfs_create_file_size(name, S_IRUSR, adap->debugfs_root,
+	debugfs_create_file_size(name, 0400, adap->debugfs_root,
 				 (void *)adap + idx, &mem_debugfs_fops,
 				 size_mb << 20);
 }
@@ -2947,65 +2947,65 @@ int t4_setup_debugfs(struct adapter *adap)
 	struct dentry *de;
 
 	static struct t4_debugfs_entry t4_debugfs_files[] = {
-		{ "cim_la", &cim_la_fops, S_IRUSR, 0 },
-		{ "cim_pif_la", &cim_pif_la_fops, S_IRUSR, 0 },
-		{ "cim_ma_la", &cim_ma_la_fops, S_IRUSR, 0 },
-		{ "cim_qcfg", &cim_qcfg_fops, S_IRUSR, 0 },
-		{ "clk", &clk_debugfs_fops, S_IRUSR, 0 },
-		{ "devlog", &devlog_fops, S_IRUSR, 0 },
-		{ "mboxlog", &mboxlog_fops, S_IRUSR, 0 },
-		{ "mbox0", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 0 },
-		{ "mbox1", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 1 },
-		{ "mbox2", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 2 },
-		{ "mbox3", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 3 },
-		{ "mbox4", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 4 },
-		{ "mbox5", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 5 },
-		{ "mbox6", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 6 },
-		{ "mbox7", &mbox_debugfs_fops, S_IRUSR | S_IWUSR, 7 },
-		{ "trace0", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 0 },
-		{ "trace1", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 1 },
-		{ "trace2", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 2 },
-		{ "trace3", &mps_trc_debugfs_fops, S_IRUSR | S_IWUSR, 3 },
-		{ "l2t", &t4_l2t_fops, S_IRUSR, 0},
-		{ "mps_tcam", &mps_tcam_debugfs_fops, S_IRUSR, 0 },
-		{ "rss", &rss_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_config", &rss_config_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_key", &rss_key_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_pf_config", &rss_pf_config_debugfs_fops, S_IRUSR, 0 },
-		{ "rss_vf_config", &rss_vf_config_debugfs_fops, S_IRUSR, 0 },
-		{ "sge_qinfo", &sge_qinfo_debugfs_fops, S_IRUSR, 0 },
-		{ "ibq_tp0",  &cim_ibq_fops, S_IRUSR, 0 },
-		{ "ibq_tp1",  &cim_ibq_fops, S_IRUSR, 1 },
-		{ "ibq_ulp",  &cim_ibq_fops, S_IRUSR, 2 },
-		{ "ibq_sge0", &cim_ibq_fops, S_IRUSR, 3 },
-		{ "ibq_sge1", &cim_ibq_fops, S_IRUSR, 4 },
-		{ "ibq_ncsi", &cim_ibq_fops, S_IRUSR, 5 },
-		{ "obq_ulp0", &cim_obq_fops, S_IRUSR, 0 },
-		{ "obq_ulp1", &cim_obq_fops, S_IRUSR, 1 },
-		{ "obq_ulp2", &cim_obq_fops, S_IRUSR, 2 },
-		{ "obq_ulp3", &cim_obq_fops, S_IRUSR, 3 },
-		{ "obq_sge",  &cim_obq_fops, S_IRUSR, 4 },
-		{ "obq_ncsi", &cim_obq_fops, S_IRUSR, 5 },
-		{ "tp_la", &tp_la_fops, S_IRUSR, 0 },
-		{ "ulprx_la", &ulprx_la_fops, S_IRUSR, 0 },
-		{ "sensors", &sensors_debugfs_fops, S_IRUSR, 0 },
-		{ "pm_stats", &pm_stats_debugfs_fops, S_IRUSR, 0 },
-		{ "tx_rate", &tx_rate_debugfs_fops, S_IRUSR, 0 },
-		{ "cctrl", &cctrl_tbl_debugfs_fops, S_IRUSR, 0 },
+		{ "cim_la", &cim_la_fops, 0400, 0 },
+		{ "cim_pif_la", &cim_pif_la_fops, 0400, 0 },
+		{ "cim_ma_la", &cim_ma_la_fops, 0400, 0 },
+		{ "cim_qcfg", &cim_qcfg_fops, 0400, 0 },
+		{ "clk", &clk_debugfs_fops, 0400, 0 },
+		{ "devlog", &devlog_fops, 0400, 0 },
+		{ "mboxlog", &mboxlog_fops, 0400, 0 },
+		{ "mbox0", &mbox_debugfs_fops, 0600, 0 },
+		{ "mbox1", &mbox_debugfs_fops, 0600, 1 },
+		{ "mbox2", &mbox_debugfs_fops, 0600, 2 },
+		{ "mbox3", &mbox_debugfs_fops, 0600, 3 },
+		{ "mbox4", &mbox_debugfs_fops, 0600, 4 },
+		{ "mbox5", &mbox_debugfs_fops, 0600, 5 },
+		{ "mbox6", &mbox_debugfs_fops, 0600, 6 },
+		{ "mbox7", &mbox_debugfs_fops, 0600, 7 },
+		{ "trace0", &mps_trc_debugfs_fops, 0600, 0 },
+		{ "trace1", &mps_trc_debugfs_fops, 0600, 1 },
+		{ "trace2", &mps_trc_debugfs_fops, 0600, 2 },
+		{ "trace3", &mps_trc_debugfs_fops, 0600, 3 },
+		{ "l2t", &t4_l2t_fops, 0400, 0},
+		{ "mps_tcam", &mps_tcam_debugfs_fops, 0400, 0 },
+		{ "rss", &rss_debugfs_fops, 0400, 0 },
+		{ "rss_config", &rss_config_debugfs_fops, 0400, 0 },
+		{ "rss_key", &rss_key_debugfs_fops, 0400, 0 },
+		{ "rss_pf_config", &rss_pf_config_debugfs_fops, 0400, 0 },
+		{ "rss_vf_config", &rss_vf_config_debugfs_fops, 0400, 0 },
+		{ "sge_qinfo", &sge_qinfo_debugfs_fops, 0400, 0 },
+		{ "ibq_tp0",  &cim_ibq_fops, 0400, 0 },
+		{ "ibq_tp1",  &cim_ibq_fops, 0400, 1 },
+		{ "ibq_ulp",  &cim_ibq_fops, 0400, 2 },
+		{ "ibq_sge0", &cim_ibq_fops, 0400, 3 },
+		{ "ibq_sge1", &cim_ibq_fops, 0400, 4 },
+		{ "ibq_ncsi", &cim_ibq_fops, 0400, 5 },
+		{ "obq_ulp0", &cim_obq_fops, 0400, 0 },
+		{ "obq_ulp1", &cim_obq_fops, 0400, 1 },
+		{ "obq_ulp2", &cim_obq_fops, 0400, 2 },
+		{ "obq_ulp3", &cim_obq_fops, 0400, 3 },
+		{ "obq_sge",  &cim_obq_fops, 0400, 4 },
+		{ "obq_ncsi", &cim_obq_fops, 0400, 5 },
+		{ "tp_la", &tp_la_fops, 0400, 0 },
+		{ "ulprx_la", &ulprx_la_fops, 0400, 0 },
+		{ "sensors", &sensors_debugfs_fops, 0400, 0 },
+		{ "pm_stats", &pm_stats_debugfs_fops, 0400, 0 },
+		{ "tx_rate", &tx_rate_debugfs_fops, 0400, 0 },
+		{ "cctrl", &cctrl_tbl_debugfs_fops, 0400, 0 },
 #if IS_ENABLED(CONFIG_IPV6)
-		{ "clip_tbl", &clip_tbl_debugfs_fops, S_IRUSR, 0 },
+		{ "clip_tbl", &clip_tbl_debugfs_fops, 0400, 0 },
 #endif
-		{ "tids", &tid_info_debugfs_fops, S_IRUSR, 0},
-		{ "blocked_fl", &blocked_fl_fops, S_IRUSR | S_IWUSR, 0 },
-		{ "meminfo", &meminfo_fops, S_IRUSR, 0 },
-		{ "crypto", &chcr_stats_debugfs_fops, S_IRUSR, 0 },
+		{ "tids", &tid_info_debugfs_fops, 0400, 0},
+		{ "blocked_fl", &blocked_fl_fops, 0600, 0 },
+		{ "meminfo", &meminfo_fops, 0400, 0 },
+		{ "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
 	};
 
 	/* Debug FS nodes common to all T5 and later adapters.
 	 */
 	static struct t4_debugfs_entry t5_debugfs_files[] = {
-		{ "obq_sge_rx_q0", &cim_obq_fops, S_IRUSR, 6 },
-		{ "obq_sge_rx_q1", &cim_obq_fops, S_IRUSR, 7 },
+		{ "obq_sge_rx_q0", &cim_obq_fops, 0400, 6 },
+		{ "obq_sge_rx_q1", &cim_obq_fops, 0400, 7 },
 	};
 
 	add_debugfs_files(adap,
@@ -3050,11 +3050,11 @@ int t4_setup_debugfs(struct adapter *adap)
 		}
 	}
 
-	de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
+	de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap,
 				      &flash_debugfs_fops, adap->params.sf_size);
-	debugfs_create_bool("use_backdoor", S_IWUSR | S_IRUSR,
+	debugfs_create_bool("use_backdoor", 0600,
 			    adap->debugfs_root, &adap->use_bd);
-	debugfs_create_bool("trace_rss", S_IWUSR | S_IRUSR,
+	debugfs_create_bool("trace_rss", 0600,
 			    adap->debugfs_root, &adap->trace_rss);
 
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 7bd8497fd9be..9a81b52307a9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2402,11 +2402,11 @@ struct cxgb4vf_debugfs_entry {
 };
 
 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
-	{ "mboxlog",    S_IRUGO, &mboxlog_fops },
-	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
-	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
-	{ "resources",  S_IRUGO, &resources_proc_fops },
-	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
+	{ "mboxlog",    0444, &mboxlog_fops },
+	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
+	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
+	{ "resources",  0444, &resources_proc_fops },
+	{ "interfaces", 0444, &interfaces_proc_fops },
 };
 
 /*
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 1b79a6defd56..d71cba0842c5 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -602,7 +602,7 @@ static struct pci_driver pci_driver = {
 };
 module_pci_driver(pci_driver);
 
-module_param(polling_frequency, long, S_IRUGO);
+module_param(polling_frequency, long, 0444);
 MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns");
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 5774fb6f8aa0..c697e79e491e 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -34,11 +34,11 @@ MODULE_LICENSE("GPL");
  * Use sysfs method to enable/disable VFs.
  */
 static unsigned int num_vfs;
-module_param(num_vfs, uint, S_IRUGO);
+module_param(num_vfs, uint, 0444);
 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
 
 static ushort rx_frag_size = 2048;
-module_param(rx_frag_size, ushort, S_IRUGO);
+module_param(rx_frag_size, ushort, 0444);
 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
 
 /* Per-module error detection/recovery workq shared across all functions.
@@ -5788,7 +5788,7 @@ static ssize_t be_hwmon_show_temp(struct device *dev,
 			       adapter->hwmon_info.be_on_die_temp * 1000);
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+static SENSOR_DEVICE_ATTR(temp1_input, 0444,
 			  be_hwmon_show_temp, NULL, 1);
 
 static struct attribute *be_hwmon_attrs[] = {
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 4878b7169e0f..ba580bfae512 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2903,8 +2903,7 @@ static ssize_t ehea_show_port_id(struct device *dev,
 	return sprintf(buf, "%d", port->logical_port_id);
 }
 
-static DEVICE_ATTR(log_port_id, S_IRUSR | S_IRGRP | S_IROTH, ehea_show_port_id,
-		   NULL);
+static DEVICE_ATTR(log_port_id, 0444, ehea_show_port_id, NULL);
 
 static void logical_port_release(struct device *dev)
 {
@@ -3235,8 +3234,8 @@ static ssize_t ehea_remove_port(struct device *dev,
 	return (ssize_t) count;
 }
 
-static DEVICE_ATTR(probe_port, S_IWUSR, NULL, ehea_probe_port);
-static DEVICE_ATTR(remove_port, S_IWUSR, NULL, ehea_remove_port);
+static DEVICE_ATTR(probe_port, 0200, NULL, ehea_probe_port);
+static DEVICE_ATTR(remove_port, 0200, NULL, ehea_remove_port);
 
 static int ehea_create_device_sysfs(struct platform_device *dev)
 {
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index f210398200ec..c1b51edaaf62 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -82,7 +82,7 @@ module_param(rx_flush, uint, 0644);
 MODULE_PARM_DESC(rx_flush, "Flush receive buffers before use");
 
 static bool old_large_send __read_mostly;
-module_param(old_large_send, bool, S_IRUGO);
+module_param(old_large_send, bool, 0444);
 MODULE_PARM_DESC(old_large_send,
 	"Use old large send method on firmware that supports the new method");
 
diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index 41b306fb90f8..bebe43b3a836 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -148,7 +148,7 @@ static int igb_add_hwmon_attr(struct igb_adapter *adapter,
 		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
 	igb_attr->hw = &adapter->hw;
 	igb_attr->dev_attr.store = NULL;
-	igb_attr->dev_attr.attr.mode = S_IRUGO;
+	igb_attr->dev_attr.attr.mode = 0444;
 	igb_attr->dev_attr.attr.name = igb_attr->name;
 	sysfs_attr_init(&igb_attr->dev_attr.attr);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
index ef6df3d6437e..24766e125592 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
@@ -146,7 +146,7 @@ static int ixgbe_add_hwmon_attr(struct ixgbe_adapter *adapter,
 		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
 	ixgbe_attr->hw = &adapter->hw;
 	ixgbe_attr->dev_attr.store = NULL;
-	ixgbe_attr->dev_attr.attr.mode = S_IRUGO;
+	ixgbe_attr->dev_attr.attr.mode = 0444;
 	ixgbe_attr->dev_attr.attr.name = ixgbe_attr->name;
 	sysfs_attr_init(&ixgbe_attr->dev_attr.attr);
 
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 25e9a551cc8c..eaa4bb80f1c9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4655,8 +4655,8 @@ MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com");
 MODULE_AUTHOR("Rami Rosen <rosenr@marvell.com>, Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
 MODULE_LICENSE("GPL");
 
-module_param(rxq_number, int, S_IRUGO);
-module_param(txq_number, int, S_IRUGO);
+module_param(rxq_number, int, 0444);
+module_param(txq_number, int, 0444);
 
-module_param(rxq_def, int, S_IRUGO);
-module_param(rx_copybreak, int, S_IRUGO | S_IWUSR);
+module_param(rxq_def, int, 0444);
+module_param(rx_copybreak, int, 0644);
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 31efc47c847e..9c08c3650c02 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3783,7 +3783,7 @@ static int skge_device_event(struct notifier_block *unused,
 		break;
 
 	case NETDEV_UP:
-		d = debugfs_create_file(dev->name, S_IRUGO,
+		d = debugfs_create_file(dev->name, 0444,
 					skge_debug, dev,
 					&skge_debug_fops);
 		if (!d || IS_ERR(d))
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 9fe85300e7b6..9b77db7c13d0 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4667,7 +4667,7 @@ static int sky2_device_event(struct notifier_block *unused,
 		break;
 
 	case NETDEV_UP:
-		sky2->debugfs = debugfs_create_file(dev->name, S_IRUGO,
+		sky2->debugfs = debugfs_create_file(dev->name, 0444,
 						    sky2_debug, dev,
 						    &sky2_debug_fops);
 		if (IS_ERR(sky2->debugfs))
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 4d84cab77105..5a26851b4ffd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2993,10 +2993,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 
 	sprintf(info->dev_name, "mlx4_port%d", port);
 	info->port_attr.attr.name = info->dev_name;
-	if (mlx4_is_mfunc(dev))
-		info->port_attr.attr.mode = S_IRUGO;
-	else {
-		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (mlx4_is_mfunc(dev)) {
+		info->port_attr.attr.mode = 0444;
+	} else {
+		info->port_attr.attr.mode = 0644;
 		info->port_attr.store     = set_port_type;
 	}
 	info->port_attr.show      = show_port_type;
@@ -3011,10 +3011,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 
 	sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
 	info->port_mtu_attr.attr.name = info->dev_mtu_name;
-	if (mlx4_is_mfunc(dev))
-		info->port_mtu_attr.attr.mode = S_IRUGO;
-	else {
-		info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (mlx4_is_mfunc(dev)) {
+		info->port_mtu_attr.attr.mode = 0444;
+	} else {
+		info->port_mtu_attr.attr.mode = 0644;
 		info->port_mtu_attr.store     = set_port_ib_mtu;
 	}
 	info->port_mtu_attr.show      = show_port_ib_mtu;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index ab710e37af99..84185f8dfbae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -218,32 +218,32 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 	switch (attr_type) {
 	case MLXSW_HWMON_ATTR_TYPE_TEMP:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_input", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_highest", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_TEMP_RST:
 		mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_temp_rst_store;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0200;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "temp%u_reset_history", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_FAN_RPM:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "fan%u_input", num + 1);
 		break;
 	case MLXSW_HWMON_ATTR_TYPE_PWM:
 		mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show;
 		mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store;
-		mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO;
+		mlxsw_hwmon_attr->dev_attr.attr.mode = 0644;
 		snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
 			 "pwm%u", num + 1);
 		break;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 2521c8c40015..b2d2ec8c11e2 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -266,7 +266,7 @@ MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat");
 
 /* Careful: must be accessed under kernel_param_lock() */
 static char *myri10ge_fw_name = NULL;
-module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR);
+module_param(myri10ge_fw_name, charp, 0644);
 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name");
 
 #define MYRI10GE_MAX_BOARDS 8
@@ -277,49 +277,49 @@ module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL,
 MODULE_PARM_DESC(myri10ge_fw_names, "Firmware image names per board");
 
 static int myri10ge_ecrc_enable = 1;
-module_param(myri10ge_ecrc_enable, int, S_IRUGO);
+module_param(myri10ge_ecrc_enable, int, 0444);
 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E");
 
 static int myri10ge_small_bytes = -1;	/* -1 == auto */
-module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_small_bytes, int, 0644);
 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets");
 
 static int myri10ge_msi = 1;	/* enable msi by default */
-module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_msi, int, 0644);
 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts");
 
 static int myri10ge_intr_coal_delay = 75;
-module_param(myri10ge_intr_coal_delay, int, S_IRUGO);
+module_param(myri10ge_intr_coal_delay, int, 0444);
 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay");
 
 static int myri10ge_flow_control = 1;
-module_param(myri10ge_flow_control, int, S_IRUGO);
+module_param(myri10ge_flow_control, int, 0444);
 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter");
 
 static int myri10ge_deassert_wait = 1;
-module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_deassert_wait, int, 0644);
 MODULE_PARM_DESC(myri10ge_deassert_wait,
 		 "Wait when deasserting legacy interrupts");
 
 static int myri10ge_force_firmware = 0;
-module_param(myri10ge_force_firmware, int, S_IRUGO);
+module_param(myri10ge_force_firmware, int, 0444);
 MODULE_PARM_DESC(myri10ge_force_firmware,
 		 "Force firmware to assume aligned completions");
 
 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
-module_param(myri10ge_initial_mtu, int, S_IRUGO);
+module_param(myri10ge_initial_mtu, int, 0444);
 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU");
 
 static int myri10ge_napi_weight = 64;
-module_param(myri10ge_napi_weight, int, S_IRUGO);
+module_param(myri10ge_napi_weight, int, 0444);
 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight");
 
 static int myri10ge_watchdog_timeout = 1;
-module_param(myri10ge_watchdog_timeout, int, S_IRUGO);
+module_param(myri10ge_watchdog_timeout, int, 0444);
 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout");
 
 static int myri10ge_max_irq_loops = 1048576;
-module_param(myri10ge_max_irq_loops, int, S_IRUGO);
+module_param(myri10ge_max_irq_loops, int, 0444);
 MODULE_PARM_DESC(myri10ge_max_irq_loops,
 		 "Set stuck legacy IRQ detection threshold");
 
@@ -330,21 +330,21 @@ module_param(myri10ge_debug, int, 0);
 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)");
 
 static int myri10ge_fill_thresh = 256;
-module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR);
+module_param(myri10ge_fill_thresh, int, 0644);
 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed");
 
 static int myri10ge_reset_recover = 1;
 
 static int myri10ge_max_slices = 1;
-module_param(myri10ge_max_slices, int, S_IRUGO);
+module_param(myri10ge_max_slices, int, 0444);
 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues");
 
 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
-module_param(myri10ge_rss_hash, int, S_IRUGO);
+module_param(myri10ge_rss_hash, int, 0444);
 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do");
 
 static int myri10ge_dca = 1;
-module_param(myri10ge_dca, int, S_IRUGO);
+module_param(myri10ge_dca, int, 0444);
 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible");
 
 #define MYRI10GE_FW_OFFSET 1024*1024
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index cf81cf95d1d8..67cdd8330c59 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -231,15 +231,15 @@ void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id)
 
 	for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) {
 		sprintf(name, "%d", i);
-		debugfs_create_file(name, S_IRUSR, rx,
+		debugfs_create_file(name, 0400, rx,
 				    &nn->r_vecs[i], &nfp_rx_q_fops);
-		debugfs_create_file(name, S_IRUSR, xdp,
+		debugfs_create_file(name, 0400, xdp,
 				    &nn->r_vecs[i], &nfp_xdp_q_fops);
 	}
 
 	for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) {
 		sprintf(name, "%d", i);
-		debugfs_create_file(name, S_IRUSR, tx,
+		debugfs_create_file(name, 0400, tx,
 				    &nn->r_vecs[i], &nfp_tx_q_fops);
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index f2e8de607119..8259e8309320 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -2829,9 +2829,9 @@ netxen_show_bridged_mode(struct device *dev,
 }
 
 static const struct device_attribute dev_attr_bridged_mode = {
-       .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)},
-       .show = netxen_show_bridged_mode,
-       .store = netxen_store_bridged_mode,
+	.attr = { .name = "bridged_mode", .mode = 0644 },
+	.show = netxen_show_bridged_mode,
+	.store = netxen_store_bridged_mode,
 };
 
 static ssize_t
@@ -2861,7 +2861,7 @@ netxen_show_diag_mode(struct device *dev,
 }
 
 static const struct device_attribute dev_attr_diag_mode = {
-	.attr = {.name = "diag_mode", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "diag_mode", .mode = 0644 },
 	.show = netxen_show_diag_mode,
 	.store = netxen_store_diag_mode,
 };
@@ -3006,14 +3006,14 @@ static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj,
 
 
 static const struct bin_attribute bin_attr_crb = {
-	.attr = {.name = "crb", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "crb", .mode = 0644 },
 	.size = 0,
 	.read = netxen_sysfs_read_crb,
 	.write = netxen_sysfs_write_crb,
 };
 
 static const struct bin_attribute bin_attr_mem = {
-	.attr = {.name = "mem", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "mem", .mode = 0644 },
 	.size = 0,
 	.read = netxen_sysfs_read_mem,
 	.write = netxen_sysfs_write_mem,
@@ -3142,7 +3142,7 @@ out:
 }
 
 static const struct bin_attribute bin_attr_dimm = {
-	.attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) },
+	.attr = { .name = "dimm", .mode = 0644 },
 	.size = sizeof(struct netxen_dimm_cfg),
 	.read = netxen_sysfs_read_dimm,
 };
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 287d89dd086f..891f03a7a33d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -1175,81 +1175,81 @@ static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp,
 }
 
 static const struct device_attribute dev_attr_bridged_mode = {
-       .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)},
-       .show = qlcnic_show_bridged_mode,
-       .store = qlcnic_store_bridged_mode,
+	.attr = { .name = "bridged_mode", .mode = 0644 },
+	.show = qlcnic_show_bridged_mode,
+	.store = qlcnic_store_bridged_mode,
 };
 
 static const struct device_attribute dev_attr_diag_mode = {
-	.attr = {.name = "diag_mode", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "diag_mode", .mode = 0644 },
 	.show = qlcnic_show_diag_mode,
 	.store = qlcnic_store_diag_mode,
 };
 
 static const struct device_attribute dev_attr_beacon = {
-	.attr = {.name = "beacon", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "beacon", .mode = 0644 },
 	.show = qlcnic_show_beacon,
 	.store = qlcnic_store_beacon,
 };
 
 static const struct bin_attribute bin_attr_crb = {
-	.attr = {.name = "crb", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "crb", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_crb,
 	.write = qlcnic_sysfs_write_crb,
 };
 
 static const struct bin_attribute bin_attr_mem = {
-	.attr = {.name = "mem", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "mem", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_mem,
 	.write = qlcnic_sysfs_write_mem,
 };
 
 static const struct bin_attribute bin_attr_npar_config = {
-	.attr = {.name = "npar_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "npar_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_npar_config,
 	.write = qlcnic_sysfs_write_npar_config,
 };
 
 static const struct bin_attribute bin_attr_pci_config = {
-	.attr = {.name = "pci_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "pci_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_pci_config,
 	.write = NULL,
 };
 
 static const struct bin_attribute bin_attr_port_stats = {
-	.attr = {.name = "port_stats", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "port_stats", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_get_port_stats,
 	.write = qlcnic_sysfs_clear_port_stats,
 };
 
 static const struct bin_attribute bin_attr_esw_stats = {
-	.attr = {.name = "esw_stats", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "esw_stats", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_get_esw_stats,
 	.write = qlcnic_sysfs_clear_esw_stats,
 };
 
 static const struct bin_attribute bin_attr_esw_config = {
-	.attr = {.name = "esw_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "esw_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_esw_config,
 	.write = qlcnic_sysfs_write_esw_config,
 };
 
 static const struct bin_attribute bin_attr_pm_config = {
-	.attr = {.name = "pm_config", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "pm_config", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_sysfs_read_pm_config,
 	.write = qlcnic_sysfs_write_pm_config,
 };
 
 static const struct bin_attribute bin_attr_flash = {
-	.attr = {.name = "flash", .mode = (S_IRUGO | S_IWUSR)},
+	.attr = { .name = "flash", .mode = 0644 },
 	.size = 0,
 	.read = qlcnic_83xx_sysfs_flash_read_handler,
 	.write = qlcnic_83xx_sysfs_flash_write_handler,
@@ -1276,7 +1276,7 @@ static ssize_t qlcnic_hwmon_show_temp(struct device *dev,
 }
 
 /* hwmon-sysfs attributes */
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+static SENSOR_DEVICE_ATTR(temp1_input, 0444,
 			  qlcnic_hwmon_show_temp, NULL, 1);
 
 static struct attribute *qlcnic_hwmon_attrs[] = {
diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c b/drivers/net/ethernet/qualcomm/qca_debug.c
index 92b6be9c4429..51d89c86e60f 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c
@@ -151,7 +151,7 @@ qcaspi_init_device_debugfs(struct qcaspi *qca)
 			dev_name(&qca->net_dev->dev));
 		return;
 	}
-	debugfs_create_file("info", S_IFREG | S_IRUGO, device_root, qca,
+	debugfs_create_file("info", S_IFREG | 0444, device_root, qca,
 			    &qcaspi_info_ops);
 }
 
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index fd35d8004a78..a9da1ad4b4f2 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -57,9 +57,9 @@
 static int debug = -1;
 static int eee_timer = SXGBE_DEFAULT_LPI_TIMER;
 
-module_param(eee_timer, int, S_IRUGO | S_IWUSR);
+module_param(eee_timer, int, 0644);
 
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 				      NETIF_MSG_LINK | NETIF_MSG_IFUP |
 				      NETIF_MSG_IFDOWN | NETIF_MSG_TIMER);
diff --git a/drivers/net/ethernet/sfc/mcdi_mon.c b/drivers/net/ethernet/sfc/mcdi_mon.c
index f97da05952c7..f17751559ccc 100644
--- a/drivers/net/ethernet/sfc/mcdi_mon.c
+++ b/drivers/net/ethernet/sfc/mcdi_mon.c
@@ -298,7 +298,7 @@ efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name,
 	attr->limit_value = limit_value;
 	sysfs_attr_init(&attr->dev_attr.attr);
 	attr->dev_attr.attr.name = attr->name;
-	attr->dev_attr.attr.mode = S_IRUGO;
+	attr->dev_attr.attr.mode = 0444;
 	attr->dev_attr.show = reader;
 	hwmon->group.attrs[hwmon->n_attrs++] = &attr->dev_attr.attr;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a9856a8bf8ad..9f983dd069d5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -57,36 +57,36 @@
 /* Module parameters */
 #define TX_TIMEO	5000
 static int watchdog = TX_TIMEO;
-module_param(watchdog, int, S_IRUGO | S_IWUSR);
+module_param(watchdog, int, 0644);
 MODULE_PARM_DESC(watchdog, "Transmit timeout in milliseconds (default 5s)");
 
 static int debug = -1;
-module_param(debug, int, S_IRUGO | S_IWUSR);
+module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)");
 
 static int phyaddr = -1;
-module_param(phyaddr, int, S_IRUGO);
+module_param(phyaddr, int, 0444);
 MODULE_PARM_DESC(phyaddr, "Physical device address");
 
 #define STMMAC_TX_THRESH	(DMA_TX_SIZE / 4)
 #define STMMAC_RX_THRESH	(DMA_RX_SIZE / 4)
 
 static int flow_ctrl = FLOW_OFF;
-module_param(flow_ctrl, int, S_IRUGO | S_IWUSR);
+module_param(flow_ctrl, int, 0644);
 MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
 
 static int pause = PAUSE_TIME;
-module_param(pause, int, S_IRUGO | S_IWUSR);
+module_param(pause, int, 0644);
 MODULE_PARM_DESC(pause, "Flow Control Pause Time");
 
 #define TC_DEFAULT 64
 static int tc = TC_DEFAULT;
-module_param(tc, int, S_IRUGO | S_IWUSR);
+module_param(tc, int, 0644);
 MODULE_PARM_DESC(tc, "DMA threshold control value");
 
 #define	DEFAULT_BUFSIZE	1536
 static int buf_sz = DEFAULT_BUFSIZE;
-module_param(buf_sz, int, S_IRUGO | S_IWUSR);
+module_param(buf_sz, int, 0644);
 MODULE_PARM_DESC(buf_sz, "DMA buffer size");
 
 #define	STMMAC_RX_COPYBREAK	256
@@ -97,7 +97,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 
 #define STMMAC_DEFAULT_LPI_TIMER	1000
 static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
-module_param(eee_timer, int, S_IRUGO | S_IWUSR);
+module_param(eee_timer, int, 0644);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
 
@@ -105,7 +105,7 @@ MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
  * but allow user to force to use the chain instead of the ring
  */
 static unsigned int chain_mode;
-module_param(chain_mode, int, S_IRUGO);
+module_param(chain_mode, int, 0444);
 MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode");
 
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
@@ -4001,7 +4001,7 @@ static int stmmac_init_fs(struct net_device *dev)
 
 	/* Entry to report DMA RX/TX rings */
 	priv->dbgfs_rings_status =
-		debugfs_create_file("descriptors_status", S_IRUGO,
+		debugfs_create_file("descriptors_status", 0444,
 				    priv->dbgfs_dir, dev,
 				    &stmmac_rings_status_fops);
 
@@ -4013,9 +4013,9 @@ static int stmmac_init_fs(struct net_device *dev)
 	}
 
 	/* Entry to report the DMA HW features */
-	priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", S_IRUGO,
-					    priv->dbgfs_dir,
-					    dev, &stmmac_dma_cap_fops);
+	priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", 0444,
+						  priv->dbgfs_dir,
+						  dev, &stmmac_dma_cap_fops);
 
 	if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
 		netdev_err(priv->dev, "ERROR creating stmmac MMC debugfs file\n");
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 8dd545fed30d..f081de4f38d7 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9437,11 +9437,11 @@ static ssize_t show_num_ports(struct device *dev,
 }
 
 static struct device_attribute niu_parent_attributes[] = {
-	__ATTR(port_phy, S_IRUGO, show_port_phy, NULL),
-	__ATTR(plat_type, S_IRUGO, show_plat_type, NULL),
-	__ATTR(rxchan_per_port, S_IRUGO, show_rxchan_per_port, NULL),
-	__ATTR(txchan_per_port, S_IRUGO, show_txchan_per_port, NULL),
-	__ATTR(num_ports, S_IRUGO, show_num_ports, NULL),
+	__ATTR(port_phy, 0444, show_port_phy, NULL),
+	__ATTR(plat_type, 0444, show_plat_type, NULL),
+	__ATTR(rxchan_per_port, 0444, show_rxchan_per_port, NULL),
+	__ATTR(txchan_per_port, 0444, show_txchan_per_port, NULL),
+	__ATTR(num_ports, 0444, show_num_ports, NULL),
 	{}
 };
 

From 0c29ba1b43df1eb7d8beb03fc929d2dac4c15f7e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Mar 2018 23:51:57 +0000
Subject: [PATCH 1212/1678] net: qualcomm: rmnet: check for null ep to avoid
 null pointer dereference

The call to rmnet_get_endpoint can potentially return NULL so check
for this to avoid any subsequent null pointer dereferences on a NULL
ep.

Detected by CoverityScan, CID#1465385 ("Dereference null return value")

Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 38d9356ebcc4..d33988570217 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -312,6 +312,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 	if (data[IFLA_RMNET_MUX_ID]) {
 		mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
 		ep = rmnet_get_endpoint(port, priv->mux_id);
+		if (!ep)
+			return -ENODEV;
 
 		hlist_del_init_rcu(&ep->hlnode);
 		hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);

From 3bd6d258b1d5f76744567855d1376358a94f127d Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 24 Mar 2018 11:32:43 +0800
Subject: [PATCH 1213/1678] net: hns3: fix for returning wrong value problem in
 hns3_get_rss_key_size

The return type of hns3_get_rss_key_size is u32. But a negative value is
returned. This patch fixes it by replacing the negative value with zero.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 9d07116a4426..ac523c90d604 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -638,7 +638,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev)
 
 	if (!h->ae_algo || !h->ae_algo->ops ||
 	    !h->ae_algo->ops->get_rss_key_size)
-		return -EOPNOTSUPP;
+		return 0;
 
 	return h->ae_algo->ops->get_rss_key_size(h);
 }

From da44a00f06df1f823ea449065e79581ee624de4b Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 24 Mar 2018 11:32:44 +0800
Subject: [PATCH 1214/1678] net: hns3: fix for returning wrong value problem in
 hns3_get_rss_indir_size

The return type of hns3_get_rss_indir_size is u32. But a negative value is
returned. This patch fixes it by replacing the negative value with zero.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index ac523c90d604..eb3c34f3cf87 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -649,7 +649,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev)
 
 	if (!h->ae_algo || !h->ae_algo->ops ||
 	    !h->ae_algo->ops->get_rss_indir_size)
-		return -EOPNOTSUPP;
+		return 0;
 
 	return h->ae_algo->ops->get_rss_indir_size(h);
 }

From 3c8f5c0339515202e8662b6e3ae36a7b16610caf Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 24 Mar 2018 11:32:45 +0800
Subject: [PATCH 1215/1678] net: hns3: fix for the wrong shift problem in
 hns3_set_txbd_baseinfo

Third parameter of hnae_set_field is shift, But a mask is given. This
patch fixes it by replacing HNS3_TXD_BDTYPE_M with HNS3_TXD_BDTYPE_S.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 40a3eb70629e..a31b4adf6e6a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -764,7 +764,7 @@ static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
 {
 	/* Config bd buffer end */
 	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
-		       HNS3_TXD_BDTYPE_M, 0);
+		       HNS3_TXD_BDTYPE_S, 0);
 	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
 	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
 	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0);

From ea739c90650ae39307c7eb2bc6851ab78deed5df Mon Sep 17 00:00:00 2001
From: Fuyun Liang <liangfuyun1@huawei.com>
Date: Sat, 24 Mar 2018 11:32:46 +0800
Subject: [PATCH 1216/1678] net: hns3: fix for not initializing VF rss_hash_key
 problem

Default rss_hash_key value should be given to all vports. But just the
PF rss_hash_key has the default value here. This patch adds rss_hash_key
Initialization for all vports.

Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bede4117bad9..ce093c350316 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3466,8 +3466,6 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev)
 	struct hclge_vport *vport = hdev->vport;
 	int i;
 
-	netdev_rss_key_fill(vport->rss_hash_key, HCLGE_RSS_KEY_SIZE);
-
 	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 		vport[i].rss_tuple_sets.ipv4_tcp_en =
 			HCLGE_RSS_INPUT_TUPLE_OTHER;
@@ -3487,6 +3485,8 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev)
 			HCLGE_RSS_INPUT_TUPLE_OTHER;
 
 		vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+
+		netdev_rss_key_fill(vport[i].rss_hash_key, HCLGE_RSS_KEY_SIZE);
 	}
 
 	hclge_rss_indir_init_cfg(hdev);

From b50ae26c57cb2fe8811574f6c1f92b16968e0dca Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Sat, 24 Mar 2018 11:32:47 +0800
Subject: [PATCH 1217/1678] net: hns3: never send command queue message to IMP
 when reset

IMP will not handle and command queue message any more when it is
in core/global, driver should not send command queue message to
IMP until reinitialize the NIC HW.

This patch checks the status and avoid the message sent to IMP when
reset.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 23 +++++++++++++++----
 .../hisilicon/hns3/hns3pf/hclge_mdio.c        |  6 +++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index ce093c350316..2066dd734444 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3584,6 +3584,9 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle,
 	struct hclge_dev *hdev = vport->back;
 	int vector_id, ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	vector_id = hclge_get_vector_index(hdev, vector);
 	if (vector_id < 0) {
 		dev_err(&handle->pdev->dev,
@@ -3781,13 +3784,16 @@ static int hclge_ae_start(struct hnae3_handle *handle)
 	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
 	mod_timer(&hdev->service_timer, jiffies + HZ);
 
+	/* reset tqp stats */
+	hclge_reset_tqp_stats(handle);
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	ret = hclge_mac_start_phy(hdev);
 	if (ret)
 		return ret;
 
-	/* reset tqp stats */
-	hclge_reset_tqp_stats(handle);
-
 	return 0;
 }
 
@@ -3797,6 +3803,12 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 	struct hclge_dev *hdev = vport->back;
 	int i;
 
+	del_timer_sync(&hdev->service_timer);
+	cancel_work_sync(&hdev->service_task);
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return;
+
 	for (i = 0; i < vport->alloc_tqps; i++)
 		hclge_tqp_enable(hdev, i, 0, false);
 
@@ -3807,8 +3819,6 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	/* reset tqp stats */
 	hclge_reset_tqp_stats(handle);
-	del_timer_sync(&hdev->service_timer);
-	cancel_work_sync(&hdev->service_task);
 	hclge_update_link_status(hdev);
 }
 
@@ -4940,6 +4950,9 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	u16 queue_gid;
 	int ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return;
+
 	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index c1dea3a47bdd..682c2d6618e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -60,6 +60,9 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
 	struct hclge_desc desc;
 	int ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
 
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
@@ -95,6 +98,9 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
 	struct hclge_desc desc;
 	int ret;
 
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+		return 0;
+
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
 
 	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;

From 93e9ad985038f269838c4c5deab4c84d87c7b5a7 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Fri, 23 Mar 2018 21:24:02 -0700
Subject: [PATCH 1218/1678] ptp: Fix documentation to match code.

Ever since commit 3a06c7ac24f9 ("posix-clocks: Remove interval timer
facility and mmap/fasync callbacks") the possibility of PHC based
posix timers has been removed.  In addition it will probably never
make sense to implement this functionality.

This patch removes the misleading text which seems to suggest that
posix timers for PHC devices will ever be a thing.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ptp/ptp.txt | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
index ae8fef86b832..11e904ee073f 100644
--- a/Documentation/ptp/ptp.txt
+++ b/Documentation/ptp/ptp.txt
@@ -18,7 +18,6 @@
     - Adjust clock frequency
 
   + Ancillary clock features
-    - One short or periodic alarms, with signal delivery to user program
     - Time stamp external events
     - Period output signals configurable from user space
     - Synchronization of the Linux system time via the PPS subsystem
@@ -48,9 +47,7 @@
    User space programs may control the clock using standardized
    ioctls. A program may query, enable, configure, and disable the
    ancillary clock features. User space can receive time stamped
-   events via blocking read() and poll(). One shot and periodic
-   signals may be configured via the POSIX timer_settime() system
-   call.
+   events via blocking read() and poll().
 
 ** Writing clock drivers
 

From 4ceedeb1b68e376ccfec9f8d2e46bcd541702aba Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Mar 2018 23:07:41 +0300
Subject: [PATCH 1219/1678] sh_eth: add sh_eth_cpu_data::soft_reset() method

sh_eth_reset() performs a software reset which is implemented in a
completely different way for the GEther-like controllers vs the other
controllers due to a different layout of EDMR (and other factors) --
it therefore makes sense to convert this function to a mandatory
sh_eth_cpu_data::soft_reset() method and thus get rid of the runtime
controller type check via sh_eth_is_{gether|rz_fast_ether}().

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 135 +++++++++++++++-----------
 drivers/net/ethernet/renesas/sh_eth.h |   3 +
 2 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 3557fe3f2bb5..e7d980f9df7d 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -501,6 +501,62 @@ static void sh_eth_chip_reset(struct net_device *ndev)
 	mdelay(1);
 }
 
+static int sh_eth_soft_reset(struct net_device *ndev)
+{
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER);
+	mdelay(3);
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0);
+
+	return 0;
+}
+
+static int sh_eth_check_soft_reset(struct net_device *ndev)
+{
+	int cnt;
+
+	for (cnt = 100; cnt > 0; cnt--) {
+		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
+			return 0;
+		mdelay(1);
+	}
+
+	netdev_err(ndev, "Device reset failed\n");
+	return -ETIMEDOUT;
+}
+
+static int sh_eth_soft_reset_gether(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	int ret;
+
+	sh_eth_write(ndev, EDSR_ENALL, EDSR);
+	sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER);
+
+	ret = sh_eth_check_soft_reset(ndev);
+	if (ret)
+		return ret;
+
+	/* Table Init */
+	sh_eth_write(ndev, 0, TDLAR);
+	sh_eth_write(ndev, 0, TDFAR);
+	sh_eth_write(ndev, 0, TDFXR);
+	sh_eth_write(ndev, 0, TDFFR);
+	sh_eth_write(ndev, 0, RDLAR);
+	sh_eth_write(ndev, 0, RDFAR);
+	sh_eth_write(ndev, 0, RDFXR);
+	sh_eth_write(ndev, 0, RDFFR);
+
+	/* Reset HW CRC register */
+	if (mdp->cd->hw_checksum)
+		sh_eth_write(ndev, 0, CSMR);
+
+	/* Select MII mode */
+	if (mdp->cd->select_mii)
+		sh_eth_select_mii(ndev);
+
+	return ret;
+}
+
 static void sh_eth_set_rate_gether(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -521,6 +577,8 @@ static void sh_eth_set_rate_gether(struct net_device *ndev)
 #ifdef CONFIG_OF
 /* R7S72100 */
 static struct sh_eth_cpu_data r7s72100_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 
@@ -565,6 +623,8 @@ static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
 
 /* R8A7740 */
 static struct sh_eth_cpu_data r8a7740_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset_r8a7740,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
@@ -620,6 +680,8 @@ static void sh_eth_set_rate_rcar(struct net_device *ndev)
 
 /* R-Car Gen1 */
 static struct sh_eth_cpu_data rcar_gen1_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_rcar,
 
@@ -647,6 +709,8 @@ static struct sh_eth_cpu_data rcar_gen1_data = {
 
 /* R-Car Gen2 and RZ/G1 */
 static struct sh_eth_cpu_data rcar_gen2_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_rcar,
 
@@ -694,6 +758,8 @@ static void sh_eth_set_rate_sh7724(struct net_device *ndev)
 
 /* SH7724 */
 static struct sh_eth_cpu_data sh7724_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_sh7724,
 
@@ -736,6 +802,8 @@ static void sh_eth_set_rate_sh7757(struct net_device *ndev)
 
 /* SH7757 */
 static struct sh_eth_cpu_data sh7757_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_sh7757,
 
@@ -808,6 +876,8 @@ static void sh_eth_set_rate_giga(struct net_device *ndev)
 
 /* SH7757(GETHERC) */
 static struct sh_eth_cpu_data sh7757_data_giga = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset_giga,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_giga,
@@ -847,6 +917,8 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 
 /* SH7734 */
 static struct sh_eth_cpu_data sh7734_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
@@ -883,6 +955,8 @@ static struct sh_eth_cpu_data sh7734_data = {
 
 /* SH7763 */
 static struct sh_eth_cpu_data sh7763_data = {
+	.soft_reset	= sh_eth_soft_reset_gether,
+
 	.chip_reset	= sh_eth_chip_reset,
 	.set_duplex	= sh_eth_set_duplex,
 	.set_rate	= sh_eth_set_rate_gether,
@@ -917,6 +991,8 @@ static struct sh_eth_cpu_data sh7763_data = {
 };
 
 static struct sh_eth_cpu_data sh7619_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -935,6 +1011,8 @@ static struct sh_eth_cpu_data sh7619_data = {
 };
 
 static struct sh_eth_cpu_data sh771x_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -974,59 +1052,6 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd)
 		cd->trscer_err_mask = DEFAULT_TRSCER_ERR_MASK;
 }
 
-static int sh_eth_check_reset(struct net_device *ndev)
-{
-	int cnt;
-
-	for (cnt = 100; cnt > 0; cnt--) {
-		if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER))
-			return 0;
-		mdelay(1);
-	}
-
-	netdev_err(ndev, "Device reset failed\n");
-	return -ETIMEDOUT;
-}
-
-static int sh_eth_reset(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	int ret = 0;
-
-	if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp)) {
-		sh_eth_write(ndev, EDSR_ENALL, EDSR);
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_GETHER, EDMR_SRST_GETHER);
-
-		ret = sh_eth_check_reset(ndev);
-		if (ret)
-			return ret;
-
-		/* Table Init */
-		sh_eth_write(ndev, 0x0, TDLAR);
-		sh_eth_write(ndev, 0x0, TDFAR);
-		sh_eth_write(ndev, 0x0, TDFXR);
-		sh_eth_write(ndev, 0x0, TDFFR);
-		sh_eth_write(ndev, 0x0, RDLAR);
-		sh_eth_write(ndev, 0x0, RDFAR);
-		sh_eth_write(ndev, 0x0, RDFXR);
-		sh_eth_write(ndev, 0x0, RDFFR);
-
-		/* Reset HW CRC register */
-		if (mdp->cd->hw_checksum)
-			sh_eth_write(ndev, 0x0, CSMR);
-
-		/* Select MII mode */
-		if (mdp->cd->select_mii)
-			sh_eth_select_mii(ndev);
-	} else {
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, EDMR_SRST_ETHER);
-		mdelay(3);
-		sh_eth_modify(ndev, EDMR, EDMR_SRST_ETHER, 0);
-	}
-
-	return ret;
-}
-
 static void sh_eth_set_receive_align(struct sk_buff *skb)
 {
 	uintptr_t reserve = (uintptr_t)skb->data & (SH_ETH_RX_ALIGN - 1);
@@ -1362,7 +1387,7 @@ static int sh_eth_dev_init(struct net_device *ndev)
 	int ret;
 
 	/* Soft Reset */
-	ret = sh_eth_reset(ndev);
+	ret = mdp->cd->soft_reset(ndev);
 	if (ret)
 		return ret;
 
@@ -1463,7 +1488,7 @@ static void sh_eth_dev_exit(struct net_device *ndev)
 	 */
 	msleep(2); /* max frame time at 10 Mbps < 1250 us */
 	sh_eth_get_stats(ndev);
-	sh_eth_reset(ndev);
+	mdp->cd->soft_reset(ndev);
 
 	/* Set MAC address again */
 	update_mac_address(ndev);
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 21047d58a93f..ff3520b0d86f 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -469,6 +469,9 @@ struct sh_eth_rxdesc {
 
 /* This structure is used by each CPU dependency handling. */
 struct sh_eth_cpu_data {
+	/* mandatory functions */
+	int (*soft_reset)(struct net_device *ndev);
+
 	/* optional functions */
 	void (*chip_reset)(struct net_device *ndev);
 	void (*set_duplex)(struct net_device *ndev);

From 3e416992e2fae2c3177bed157534503691dc3510 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Mar 2018 23:08:42 +0300
Subject: [PATCH 1220/1678] sh_eth: add sh_eth_cpu_data::edtrr_trns value

sh_eth_get_edtrr_trns() returns the value to be written to EDTRR in order
to start TX DMA -- this value is different between the GEther-like and
the other controllers. We can replace this function (and thus get rid of
the calls to sh_eth_is_{gether|rz_fast_ether}() by it) with a new field
'edtrr_trns' in the 'struct sh_eth_cpu_data'.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 27 +++++++++++++++------------
 drivers/net/ethernet/renesas/sh_eth.h |  1 +
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index e7d980f9df7d..d4a11ff24496 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -584,6 +584,7 @@ static struct sh_eth_cpu_data r7s72100_data = {
 
 	.register_type	= SH_ETH_REG_FAST_RZ,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD,
 	.ecsipr_value	= ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_TWB1IP | EESIPR_TWBIP | EESIPR_TC1IP |
@@ -631,6 +632,7 @@ static struct sh_eth_cpu_data r8a7740_data = {
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -687,6 +689,7 @@ static struct sh_eth_cpu_data rcar_gen1_data = {
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ADEIP | EESIPR_ECIIP |
@@ -716,6 +719,7 @@ static struct sh_eth_cpu_data rcar_gen2_data = {
 
 	.register_type	= SH_ETH_REG_FAST_RCAR,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP |
 			  ECSIPR_MPDIP,
@@ -765,6 +769,7 @@ static struct sh_eth_cpu_data sh7724_data = {
 
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
 	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ADEIP | EESIPR_ECIIP |
@@ -809,6 +814,7 @@ static struct sh_eth_cpu_data sh7757_data = {
 
 	.register_type	= SH_ETH_REG_FAST_SH4,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -884,6 +890,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -925,6 +932,7 @@ static struct sh_eth_cpu_data sh7734_data = {
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -963,6 +971,7 @@ static struct sh_eth_cpu_data sh7763_data = {
 
 	.register_type	= SH_ETH_REG_GIGABIT,
 
+	.edtrr_trns	= EDTRR_TRNS_GETHER,
 	.ecsr_value	= ECSR_ICD | ECSR_MPD,
 	.ecsipr_value	= ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
@@ -995,6 +1004,7 @@ static struct sh_eth_cpu_data sh7619_data = {
 
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -1015,6 +1025,7 @@ static struct sh_eth_cpu_data sh771x_data = {
 
 	.register_type	= SH_ETH_REG_FAST_SH3_SH2,
 
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
 	.eesipr_value	= EESIPR_RFCOFIP | EESIPR_ECIIP |
 			  EESIPR_FTCIP | EESIPR_TDEIP | EESIPR_TFUFIP |
 			  EESIPR_FRIP | EESIPR_RDEIP | EESIPR_RFOFIP |
@@ -1094,14 +1105,6 @@ static void read_mac_address(struct net_device *ndev, unsigned char *mac)
 	}
 }
 
-static u32 sh_eth_get_edtrr_trns(struct sh_eth_private *mdp)
-{
-	if (sh_eth_is_gether(mdp) || sh_eth_is_rz_fast_ether(mdp))
-		return EDTRR_TRNS_GETHER;
-	else
-		return EDTRR_TRNS_ETHER;
-}
-
 struct bb_info {
 	void (*set_gate)(void *addr);
 	struct mdiobb_ctrl ctrl;
@@ -1741,9 +1744,9 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status)
 		sh_eth_tx_free(ndev, true);
 
 		/* SH7712 BUG */
-		if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
+		if (edtrr ^ mdp->cd->edtrr_trns) {
 			/* tx dma start */
-			sh_eth_write(ndev, sh_eth_get_edtrr_trns(mdp), EDTRR);
+			sh_eth_write(ndev, mdp->cd->edtrr_trns, EDTRR);
 		}
 		/* wakeup */
 		netif_wake_queue(ndev);
@@ -2502,8 +2505,8 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	mdp->cur_tx++;
 
-	if (!(sh_eth_read(ndev, EDTRR) & sh_eth_get_edtrr_trns(mdp)))
-		sh_eth_write(ndev, sh_eth_get_edtrr_trns(mdp), EDTRR);
+	if (!(sh_eth_read(ndev, EDTRR) & mdp->cd->edtrr_trns))
+		sh_eth_write(ndev, mdp->cd->edtrr_trns, EDTRR);
 
 	return NETDEV_TX_OK;
 }
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index ff3520b0d86f..aa3c45153c9a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -479,6 +479,7 @@ struct sh_eth_cpu_data {
 
 	/* mandatory initialize value */
 	int register_type;
+	u32 edtrr_trns;
 	u32 eesipr_value;
 
 	/* optional initialize value */

From 246e30cc4fd8db8390e83617fa5ff29ad5280ad9 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Mar 2018 23:09:55 +0300
Subject: [PATCH 1221/1678] sh_eth: add sh_eth_cpu_data::xdfar_rw flag

The GEther-like controllers have writeable RDFAR/TDFAR, on the others
they are read-only or just absent (on R-Car). Currently we are calling
sh_eth_is_{gether|rz_fast_ether}() in order to check if these registers
can be written to, however it would be simpler to check the new 'xdfar_rw'
bitfield in the 'struct sh_eth_cpu_data'...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 11 +++++++----
 drivers/net/ethernet/renesas/sh_eth.h |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d4a11ff24496..294fecd42901 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -611,6 +611,7 @@ static struct sh_eth_cpu_data r7s72100_data = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
 };
@@ -659,6 +660,7 @@ static struct sh_eth_cpu_data r8a7740_data = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
 	.select_mii	= 1,
@@ -918,6 +920,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 	.rpadir_value   = 2 << 16,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.dual_port	= 1,
 };
@@ -955,6 +958,7 @@ static struct sh_eth_cpu_data sh7734_data = {
 	.hw_swap	= 1,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.hw_checksum	= 1,
 	.select_mii	= 1,
@@ -993,6 +997,7 @@ static struct sh_eth_cpu_data sh7763_data = {
 	.hw_swap	= 1,
 	.no_trimd	= 1,
 	.no_ade		= 1,
+	.xdfar_rw	= 1,
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
@@ -1301,8 +1306,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
 		/* Rx descriptor address set */
 		if (i == 0) {
 			sh_eth_write(ndev, mdp->rx_desc_dma, RDLAR);
-			if (sh_eth_is_gether(mdp) ||
-			    sh_eth_is_rz_fast_ether(mdp))
+			if (mdp->cd->xdfar_rw)
 				sh_eth_write(ndev, mdp->rx_desc_dma, RDFAR);
 		}
 	}
@@ -1324,8 +1328,7 @@ static void sh_eth_ring_format(struct net_device *ndev)
 		if (i == 0) {
 			/* Tx descriptor address set */
 			sh_eth_write(ndev, mdp->tx_desc_dma, TDLAR);
-			if (sh_eth_is_gether(mdp) ||
-			    sh_eth_is_rz_fast_ether(mdp))
+			if (mdp->cd->xdfar_rw)
 				sh_eth_write(ndev, mdp->tx_desc_dma, TDFAR);
 		}
 	}
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index aa3c45153c9a..25c0b1db060a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -508,6 +508,7 @@ struct sh_eth_cpu_data {
 	unsigned rpadir:1;	/* E-DMAC have RPADIR */
 	unsigned no_trimd:1;	/* E-DMAC DO NOT have TRIMD */
 	unsigned no_ade:1;	/* E-DMAC DO NOT have ADE bit in EESR */
+	unsigned xdfar_rw:1;	/* E-DMAC has writeable RDFAR/TDFAR */
 	unsigned hw_checksum:1;	/* E-DMAC has CSMR */
 	unsigned select_mii:1;	/* EtherC have RMII_MII (MII select register) */
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */

From ce9134dff6d90c9f9c6bfd3d32dffdadf8a8e668 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Mar 2018 23:11:19 +0300
Subject: [PATCH 1222/1678] sh_eth: add sh_eth_cpu_data::no_tx_cntrs flag

RZ/A1H (R7S72100) Ether controller doesn't  seem to have the TX counter
registers like TROCR/CDCR/LCCR (or at least they are still undocumented
like some TSU registers), so we bail out of sh_eth_get_stats() early in
this case.  Currently we are calling sh_eth_is_rz_fast_ether() in order
to check for this, but it would be simpler to check the new 'no_tx_cntrs'
bitfield in the 'struct sh_eth_cpu_data'; then we'd be able  to remove
sh_eth_is_rz_fast_ether() as there would be no callers left...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 8 ++------
 drivers/net/ethernet/renesas/sh_eth.h | 1 +
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 294fecd42901..f1cd354b8f1c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -455,11 +455,6 @@ static bool sh_eth_is_gether(struct sh_eth_private *mdp)
 	return mdp->reg_offset == sh_eth_offset_gigabit;
 }
 
-static bool sh_eth_is_rz_fast_ether(struct sh_eth_private *mdp)
-{
-	return mdp->reg_offset == sh_eth_offset_fast_rz;
-}
-
 static void sh_eth_select_mii(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -614,6 +609,7 @@ static struct sh_eth_cpu_data r7s72100_data = {
 	.xdfar_rw	= 1,
 	.hw_checksum	= 1,
 	.tsu		= 1,
+	.no_tx_cntrs	= 1,
 };
 
 static void sh_eth_chip_reset_r8a7740(struct net_device *ndev)
@@ -2534,7 +2530,7 @@ static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 
-	if (sh_eth_is_rz_fast_ether(mdp))
+	if (mdp->cd->no_tx_cntrs)
 		return &ndev->stats;
 
 	if (!mdp->is_opened)
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 25c0b1db060a..7ea162a56c77 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -514,6 +514,7 @@ struct sh_eth_cpu_data {
 	unsigned rmiimode:1;	/* EtherC has RMIIMODE register */
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
+	unsigned no_tx_cntrs:1;	/* EtherC DOES NOT have TX error counters */
 	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 

From 4c1d45850d5327049bce461743179b46adc33f9e Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 24 Mar 2018 23:12:54 +0300
Subject: [PATCH 1223/1678] sh_eth: add sh_eth_cpu_data::cexcr flag

GEther controllers have CERCR/CEECR instead of CNDCR on the others.
Currently we are calling sh_eth_is_gether() in order to check for this,
however it would be simpler  to check the new 'cexcr' bitfield in the
'struct sh_eth_cpu_data';  then we'd be able to remove sh_eth_is_gether()
as there would be no callers left...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 11 +++++------
 drivers/net/ethernet/renesas/sh_eth.h |  1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f1cd354b8f1c..306558ef36b5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -450,11 +450,6 @@ static u32 sh_eth_tsu_read(struct sh_eth_private *mdp, int enum_index)
 	return ioread32(mdp->tsu_addr + mdp->reg_offset[enum_index]);
 }
 
-static bool sh_eth_is_gether(struct sh_eth_private *mdp)
-{
-	return mdp->reg_offset == sh_eth_offset_gigabit;
-}
-
 static void sh_eth_select_mii(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -661,6 +656,7 @@ static struct sh_eth_cpu_data r8a7740_data = {
 	.tsu		= 1,
 	.select_mii	= 1,
 	.magic		= 1,
+	.cexcr		= 1,
 };
 
 /* There is CPU dependent code */
@@ -918,6 +914,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = {
 	.no_ade		= 1,
 	.xdfar_rw	= 1,
 	.tsu		= 1,
+	.cexcr		= 1,
 	.dual_port	= 1,
 };
 
@@ -959,6 +956,7 @@ static struct sh_eth_cpu_data sh7734_data = {
 	.hw_checksum	= 1,
 	.select_mii	= 1,
 	.magic		= 1,
+	.cexcr		= 1,
 };
 
 /* SH7763 */
@@ -997,6 +995,7 @@ static struct sh_eth_cpu_data sh7763_data = {
 	.tsu		= 1,
 	.irq_flags	= IRQF_SHARED,
 	.magic		= 1,
+	.cexcr		= 1,
 	.dual_port	= 1,
 };
 
@@ -2540,7 +2539,7 @@ static struct net_device_stats *sh_eth_get_stats(struct net_device *ndev)
 	sh_eth_update_stat(ndev, &ndev->stats.collisions, CDCR);
 	sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors, LCCR);
 
-	if (sh_eth_is_gether(mdp)) {
+	if (mdp->cd->cexcr) {
 		sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors,
 				   CERCR);
 		sh_eth_update_stat(ndev, &ndev->stats.tx_carrier_errors,
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 7ea162a56c77..a0416e04306a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -515,6 +515,7 @@ struct sh_eth_cpu_data {
 	unsigned rtrate:1;	/* EtherC has RTRATE register */
 	unsigned magic:1;	/* EtherC has ECMR.MPDE and ECSR.MPD */
 	unsigned no_tx_cntrs:1;	/* EtherC DOES NOT have TX error counters */
+	unsigned cexcr:1;	/* EtherC has CERCR/CEECR */
 	unsigned dual_port:1;	/* Dual EtherC/E-DMAC */
 };
 

From 7ec59eeac804a59ea6fff81e89b6e584e9955e5b Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:06 -0700
Subject: [PATCH 1224/1678] ice: Add support for control queues

A control queue is a hardware interface which is used by the driver
to interact with other subsystems (like firmware, PHY, etc.). It is
implemented as a producer-consumer ring. More specifically, an
"admin queue" is a type of control queue used to interact with the
firmware.

This patch introduces data structures and functions to initialize
and teardown control/admin queues. Once the admin queue is initialized,
the driver uses it to get the firmware version.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile       |   4 +-
 drivers/net/ethernet/intel/ice/ice.h          |   1 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  94 ++
 drivers/net/ethernet/intel/ice/ice_common.c   | 130 +++
 drivers/net/ethernet/intel/ice/ice_common.h   |  25 +
 drivers/net/ethernet/intel/ice/ice_controlq.c | 965 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_controlq.h |  83 ++
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  32 +
 drivers/net/ethernet/intel/ice/ice_main.c     |  11 +-
 drivers/net/ethernet/intel/ice/ice_osdep.h    |  72 ++
 drivers/net/ethernet/intel/ice/ice_status.h   |  21 +
 drivers/net/ethernet/intel/ice/ice_type.h     |  22 +
 12 files changed, 1458 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_common.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_common.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_controlq.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_controlq.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_hw_autogen.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_osdep.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_status.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 7ea6295d58fb..562836a8f18a 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -7,4 +7,6 @@
 
 obj-$(CONFIG_ICE) += ice.o
 
-ice-y := ice_main.o
+ice-y := ice_main.o	\
+	 ice_controlq.o	\
+	 ice_common.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index e3c1672d4976..59b1400f9e81 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
+#include <linux/delay.h>
 #include <linux/bitmap.h>
 #include "ice_devids.h"
 #include "ice_type.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
new file mode 100644
index 000000000000..e2fa8e2ea6ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_ADMINQ_CMD_H_
+#define _ICE_ADMINQ_CMD_H_
+
+/* This header file defines the Admin Queue commands, error codes and
+ * descriptor format.  It is shared between Firmware and Software.
+ */
+
+struct ice_aqc_generic {
+	__le32 param0;
+	__le32 param1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get version (direct 0x0001) */
+struct ice_aqc_get_ver {
+	__le32 rom_ver;
+	__le32 fw_build;
+	u8 fw_branch;
+	u8 fw_major;
+	u8 fw_minor;
+	u8 fw_patch;
+	u8 api_branch;
+	u8 api_major;
+	u8 api_minor;
+	u8 api_patch;
+};
+
+/* Queue Shutdown (direct 0x0003) */
+struct ice_aqc_q_shutdown {
+#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
+	__le32 driver_unloading;
+	u8 reserved[12];
+};
+
+/**
+ * struct ice_aq_desc - Admin Queue (AQ) descriptor
+ * @flags: ICE_AQ_FLAG_* flags
+ * @opcode: AQ command opcode
+ * @datalen: length in bytes of indirect/external data buffer
+ * @retval: return value from firmware
+ * @cookie_h: opaque data high-half
+ * @cookie_l: opaque data low-half
+ * @params: command-specific parameters
+ *
+ * Descriptor format for commands the driver posts on the Admin Transmit Queue
+ * (ATQ).  The firmware writes back onto the command descriptor and returns
+ * the result of the command.  Asynchronous events that are not an immediate
+ * result of the command are written to the Admin Receive Queue (ARQ) using
+ * the same descriptor format.  Descriptors are in little-endian notation with
+ * 32-bit words.
+ */
+struct ice_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		u8 raw[16];
+		struct ice_aqc_generic generic;
+		struct ice_aqc_get_ver get_ver;
+		struct ice_aqc_q_shutdown q_shutdown;
+	} params;
+};
+
+/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
+#define ICE_AQ_LG_BUF	512
+
+#define ICE_AQ_FLAG_LB_S	9
+#define ICE_AQ_FLAG_BUF_S	12
+#define ICE_AQ_FLAG_SI_S	13
+
+#define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
+#define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
+#define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
+
+/* error codes */
+enum ice_aq_err {
+	ICE_AQ_RC_OK		= 0,  /* success */
+};
+
+/* Admin Queue command opcodes */
+enum ice_adminq_opc {
+	/* AQ commands */
+	ice_aqc_opc_get_ver				= 0x0001,
+	ice_aqc_opc_q_shutdown				= 0x0003,
+};
+
+#endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
new file mode 100644
index 000000000000..78452b92cae6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_adminq_cmd.h"
+
+/**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @mask: debug mask
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc,
+		  void *buf, u16 buf_len)
+{
+	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+	u16 len;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (!(mask & hw->debug_mask))
+		return;
+#endif
+
+	if (!desc)
+		return;
+
+	len = le16_to_cpu(cq_desc->datalen);
+
+	ice_debug(hw, mask,
+		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		  le16_to_cpu(cq_desc->opcode),
+		  le16_to_cpu(cq_desc->flags),
+		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+	ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_high),
+		  le32_to_cpu(cq_desc->cookie_low));
+	ice_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param0),
+		  le32_to_cpu(cq_desc->params.generic.param1));
+	ice_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.addr_high),
+		  le32_to_cpu(cq_desc->params.generic.addr_low));
+	if (buf && cq_desc->datalen != 0) {
+		ice_debug(hw, mask, "Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len);
+	}
+}
+
+/* FW Admin Queue command wrappers */
+
+/**
+ * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
+ * @hw: pointer to the hw struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Helper function to send FW Admin Queue commands to the FW Admin Queue.
+ */
+enum ice_status
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
+		u16 buf_size, struct ice_sq_cd *cd)
+{
+	return ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_aq_get_fw_ver
+ * @hw: pointer to the hw struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the firmware version (0x0001) from the admin queue commands
+ */
+enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_ver *resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	resp = &desc.params.get_ver;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	if (!status) {
+		hw->fw_branch = resp->fw_branch;
+		hw->fw_maj_ver = resp->fw_major;
+		hw->fw_min_ver = resp->fw_minor;
+		hw->fw_patch = resp->fw_patch;
+		hw->fw_build = le32_to_cpu(resp->fw_build);
+		hw->api_branch = resp->api_branch;
+		hw->api_maj_ver = resp->api_major;
+		hw->api_min_ver = resp->api_minor;
+		hw->api_patch = resp->api_patch;
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_q_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well (0x0003).
+ */
+enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
+{
+	struct ice_aqc_q_shutdown *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.q_shutdown;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = cpu_to_le32(ICE_AQC_DRIVER_UNLOADING);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
new file mode 100644
index 000000000000..ba9347459db1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_COMMON_H_
+#define _ICE_COMMON_H_
+
+#include "ice.h"
+#include "ice_type.h"
+
+void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
+		  u16 buf_len);
+enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
+void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+enum ice_status
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
+enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
+enum ice_status
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
+		void *buf, u16 buf_size, struct ice_sq_cd *cd);
+enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
new file mode 100644
index 000000000000..b32c0fc04bc9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_adminq_init_regs - Initialize AdminQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_adminq_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+
+	cq->sq.head = PF_FW_ATQH;
+	cq->sq.tail = PF_FW_ATQT;
+	cq->sq.len = PF_FW_ATQLEN;
+	cq->sq.bah = PF_FW_ATQBAH;
+	cq->sq.bal = PF_FW_ATQBAL;
+	cq->sq.len_mask = PF_FW_ATQLEN_ATQLEN_M;
+	cq->sq.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M;
+	cq->sq.head_mask = PF_FW_ATQH_ATQH_M;
+
+	cq->rq.head = PF_FW_ARQH;
+	cq->rq.tail = PF_FW_ARQT;
+	cq->rq.len = PF_FW_ARQLEN;
+	cq->rq.bah = PF_FW_ARQBAH;
+	cq->rq.bal = PF_FW_ARQBAL;
+	cq->rq.len_mask = PF_FW_ARQLEN_ARQLEN_M;
+	cq->rq.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M;
+	cq->rq.head_mask = PF_FW_ARQH_ARQH_M;
+}
+
+/**
+ * ice_check_sq_alive
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if Queue is enabled else false.
+ */
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* check both queue-length and queue-enable fields */
+	if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask)
+		return (rd32(hw, cq->sq.len) & (cq->sq.len_mask |
+						cq->sq.len_ena_mask)) ==
+			(cq->num_sq_entries | cq->sq.len_ena_mask);
+
+	return false;
+}
+
+/**
+ * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc);
+
+	cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->sq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->sq.desc_buf.va)
+		return ICE_ERR_NO_MEMORY;
+	cq->sq.desc_buf.size = size;
+
+	cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				      sizeof(struct ice_sq_cd), GFP_KERNEL);
+	if (!cq->sq.cmd_buf) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+				   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+		cq->sq.desc_buf.va = NULL;
+		cq->sq.desc_buf.pa = 0;
+		cq->sq.desc_buf.size = 0;
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc);
+
+	cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->rq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->rq.desc_buf.va)
+		return ICE_ERR_NO_MEMORY;
+	cq->rq.desc_buf.size = size;
+	return 0;
+}
+
+/**
+ * ice_free_ctrlq_sq_ring - Free Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+			   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+	cq->sq.desc_buf.va = NULL;
+	cq->sq.desc_buf.pa = 0;
+	cq->sq.desc_buf.size = 0;
+}
+
+/**
+ * ice_free_ctrlq_rq_ring - Free Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted receive buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.desc_buf.size,
+			   cq->rq.desc_buf.va, cq->rq.desc_buf.pa);
+	cq->rq.desc_buf.va = NULL;
+	cq->rq.desc_buf.pa = 0;
+	cq->rq.desc_buf.size = 0;
+}
+
+/**
+ * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+	cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries,
+				       sizeof(cq->rq.desc_buf), GFP_KERNEL);
+	if (!cq->rq.dma_head)
+		return ICE_ERR_NO_MEMORY;
+	cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_rq_entries; i++) {
+		struct ice_aq_desc *desc;
+		struct ice_dma_mem *bi;
+
+		bi = &cq->rq.r.rq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->rq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_rq_bufs;
+		bi->size = cq->rq_buf_size;
+
+		/* now configure the descriptors for use */
+		desc = ICE_CTL_Q_DESC(cq->rq, i);
+
+		desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+		desc->opcode = 0;
+		/* This is in accordance with Admin queue design, there is no
+		 * register for buffer size configuration
+		 */
+		desc->datalen = cpu_to_le16(bi->size);
+		desc->retval = 0;
+		desc->cookie_high = 0;
+		desc->cookie_low = 0;
+		desc->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.generic.param0 = 0;
+		desc->params.generic.param1 = 0;
+	}
+	return 0;
+
+unwind_alloc_rq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+		cq->rq.r.rq_bi[i].va = NULL;
+		cq->rq.r.rq_bi[i].pa = 0;
+		cq->rq.r.rq_bi[i].size = 0;
+	}
+	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static enum ice_status
+ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* No mapped memory needed yet, just the buffer info structures */
+	cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				       sizeof(cq->sq.desc_buf), GFP_KERNEL);
+	if (!cq->sq.dma_head)
+		return ICE_ERR_NO_MEMORY;
+	cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_sq_entries; i++) {
+		struct ice_dma_mem *bi;
+
+		bi = &cq->sq.r.sq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->sq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_sq_bufs;
+		bi->size = cq->sq_buf_size;
+	}
+	return 0;
+
+unwind_alloc_sq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size,
+				   cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa);
+		cq->sq.r.sq_bi[i].va = NULL;
+		cq->sq.r.sq_bi[i].pa = 0;
+		cq->sq.r.sq_bi[i].size = 0;
+	}
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+
+	return ICE_ERR_NO_MEMORY;
+}
+
+/**
+ * ice_free_rq_bufs - Free ARQ buffer info elements
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static void ice_free_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* free descriptors */
+	for (i = 0; i < cq->num_rq_entries; i++) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+		cq->rq.r.rq_bi[i].va = NULL;
+		cq->rq.r.rq_bi[i].pa = 0;
+		cq->rq.r.rq_bi[i].size = 0;
+	}
+
+	/* free the dma header */
+	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+}
+
+/**
+ * ice_free_sq_bufs - Free ATQ buffer info elements
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static void ice_free_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* only unmap if the address is non-NULL */
+	for (i = 0; i < cq->num_sq_entries; i++)
+		if (cq->sq.r.sq_bi[i].pa) {
+			dmam_free_coherent(ice_hw_to_dev(hw),
+					   cq->sq.r.sq_bi[i].size,
+					   cq->sq.r.sq_bi[i].va,
+					   cq->sq.r.sq_bi[i].pa);
+			cq->sq.r.sq_bi[i].va = NULL;
+			cq->sq.r.sq_bi[i].pa = 0;
+			cq->sq.r.sq_bi[i].size = 0;
+		}
+
+	/* free the buffer info list */
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.cmd_buf);
+
+	/* free the dma header */
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+}
+
+/**
+ * ice_cfg_sq_regs - configure Control ATQ registers
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the transmit queue
+ */
+static enum ice_status
+ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, cq->sq.head, 0);
+	wr32(hw, cq->sq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, cq->sq.len, (cq->num_sq_entries | cq->sq.len_ena_mask));
+	wr32(hw, cq->sq.bal, lower_32_bits(cq->sq.desc_buf.pa));
+	wr32(hw, cq->sq.bah, upper_32_bits(cq->sq.desc_buf.pa));
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, cq->sq.bal);
+	if (reg != lower_32_bits(cq->sq.desc_buf.pa))
+		return ICE_ERR_AQ_ERROR;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_rq_regs - configure Control ARQ register
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the receive (event q)
+ */
+static enum ice_status
+ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, cq->rq.head, 0);
+	wr32(hw, cq->rq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, cq->rq.len, (cq->num_rq_entries | cq->rq.len_ena_mask));
+	wr32(hw, cq->rq.bal, lower_32_bits(cq->rq.desc_buf.pa));
+	wr32(hw, cq->rq.bah, upper_32_bits(cq->rq.desc_buf.pa));
+
+	/* Update tail in the HW to post pre-allocated buffers */
+	wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, cq->rq.bal);
+	if (reg != lower_32_bits(cq->rq.desc_buf.pa))
+		return ICE_ERR_AQ_ERROR;
+
+	return 0;
+}
+
+/**
+ * ice_init_sq - main initialization routine for Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This is the main initialization routine for the Control Send Queue
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->sq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code;
+
+	if (cq->sq.count > 0) {
+		/* queue already initialized */
+		ret_code = ICE_ERR_NOT_READY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_sq_entries || !cq->sq_buf_size) {
+		ret_code = ICE_ERR_CFG;
+		goto init_ctrlq_exit;
+	}
+
+	cq->sq.next_to_use = 0;
+	cq->sq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_sq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_sq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_sq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->sq.count = cq->num_sq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ice_free_ctrlq_sq_ring(hw, cq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_init_rq - initialize ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main initialization routine for the Admin Receive (Event) Queue.
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code;
+
+	if (cq->rq.count > 0) {
+		/* queue already initialized */
+		ret_code = ICE_ERR_NOT_READY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->rq_buf_size) {
+		ret_code = ICE_ERR_CFG;
+		goto init_ctrlq_exit;
+	}
+
+	cq->rq.next_to_use = 0;
+	cq->rq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_rq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_rq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_rq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->rq.count = cq->num_rq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ice_free_ctrlq_rq_ring(hw, cq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_shutdown_sq - shutdown the Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Transmit Queue
+ */
+static enum ice_status
+ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code = 0;
+
+	mutex_lock(&cq->sq_lock);
+
+	if (!cq->sq.count) {
+		ret_code = ICE_ERR_NOT_READY;
+		goto shutdown_sq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, cq->sq.head, 0);
+	wr32(hw, cq->sq.tail, 0);
+	wr32(hw, cq->sq.len, 0);
+	wr32(hw, cq->sq.bal, 0);
+	wr32(hw, cq->sq.bah, 0);
+
+	cq->sq.count = 0;	/* to indicate uninitialized queue */
+
+	/* free ring buffers and the ring itself */
+	ice_free_sq_bufs(hw, cq);
+	ice_free_ctrlq_sq_ring(hw, cq);
+
+shutdown_sq_out:
+	mutex_unlock(&cq->sq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_aq_ver_check - Check the reported AQ API version.
+ * @fw_branch: The "branch" of FW, typically describes the device type
+ * @fw_major: The major version of the FW API
+ * @fw_minor: The minor version increment of the FW API
+ *
+ * Checks if the driver should load on a given AQ API version.
+ *
+ * Return: 'true' iff the driver should attempt to load. 'false' otherwise.
+ */
+static bool ice_aq_ver_check(u8 fw_branch, u8 fw_major, u8 fw_minor)
+{
+	if (fw_branch != EXP_FW_API_VER_BRANCH)
+		return false;
+	if (fw_major != EXP_FW_API_VER_MAJOR)
+		return false;
+	if (fw_minor != EXP_FW_API_VER_MINOR)
+		return false;
+	return true;
+}
+
+/**
+ * ice_shutdown_rq - shutdown Control ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Receive Queue
+ */
+static enum ice_status
+ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	enum ice_status ret_code = 0;
+
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ret_code = ICE_ERR_NOT_READY;
+		goto shutdown_rq_out;
+	}
+
+	/* Stop Control Queue processing */
+	wr32(hw, cq->rq.head, 0);
+	wr32(hw, cq->rq.tail, 0);
+	wr32(hw, cq->rq.len, 0);
+	wr32(hw, cq->rq.bal, 0);
+	wr32(hw, cq->rq.bah, 0);
+
+	/* set rq.count to 0 to indicate uninitialized queue */
+	cq->rq.count = 0;
+
+	/* free ring buffers and the ring itself */
+	ice_free_rq_bufs(hw, cq);
+	ice_free_ctrlq_rq_ring(hw, cq);
+
+shutdown_rq_out:
+	mutex_unlock(&cq->rq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_init_check_adminq - Check version for Admin Queue to know if its alive
+ * @hw: pointer to the hardware structure
+ */
+static enum ice_status ice_init_check_adminq(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+	enum ice_status status;
+
+	status = ice_aq_get_fw_ver(hw, NULL);
+	if (status)
+		goto init_ctrlq_free_rq;
+
+	if (!ice_aq_ver_check(hw->api_branch, hw->api_maj_ver,
+			      hw->api_min_ver)) {
+		status = ICE_ERR_FW_API_VER;
+		goto init_ctrlq_free_rq;
+	}
+
+	return 0;
+
+init_ctrlq_free_rq:
+	ice_shutdown_rq(hw, cq);
+	ice_shutdown_sq(hw, cq);
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+	return status;
+}
+
+/**
+ * ice_init_ctrlq - main initialization routine for any control Queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ */
+static enum ice_status ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+	enum ice_status ret_code;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		ice_adminq_init_regs(hw);
+		cq = &hw->adminq;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	cq->qtype = q_type;
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->num_sq_entries ||
+	    !cq->rq_buf_size || !cq->sq_buf_size) {
+		return ICE_ERR_CFG;
+	}
+	mutex_init(&cq->sq_lock);
+	mutex_init(&cq->rq_lock);
+
+	/* setup SQ command write back timeout */
+	cq->sq_cmd_timeout = ICE_CTL_Q_SQ_CMD_TIMEOUT;
+
+	/* allocate the ATQ */
+	ret_code = ice_init_sq(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_destroy_locks;
+
+	/* allocate the ARQ */
+	ret_code = ice_init_rq(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_sq;
+
+	/* success! */
+	return 0;
+
+init_ctrlq_free_sq:
+	ice_shutdown_sq(hw, cq);
+init_ctrlq_destroy_locks:
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_init_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ */
+enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
+{
+	enum ice_status ret_code;
+
+	/* Init FW admin queue */
+	ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	if (ret_code)
+		return ret_code;
+
+	return ice_init_check_adminq(hw);
+}
+
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		if (ice_check_sq_alive(hw, cq))
+			ice_aq_q_shutdown(hw, true);
+		break;
+	default:
+		return;
+	}
+
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+	/* Shutdown FW admin queue */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+}
+
+/**
+ * ice_clean_sq - cleans Admin send queue (ATQ)
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * returns the number of free desc
+ */
+static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	struct ice_ctl_q_ring *sq = &cq->sq;
+	u16 ntc = sq->next_to_clean;
+	struct ice_sq_cd *details;
+	struct ice_aq_desc *desc;
+
+	desc = ICE_CTL_Q_DESC(*sq, ntc);
+	details = ICE_CTL_Q_DETAILS(*sq, ntc);
+
+	while (rd32(hw, cq->sq.head) != ntc) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
+		memset(desc, 0, sizeof(*desc));
+		memset(details, 0, sizeof(*details));
+		ntc++;
+		if (ntc == sq->count)
+			ntc = 0;
+		desc = ICE_CTL_Q_DESC(*sq, ntc);
+		details = ICE_CTL_Q_DETAILS(*sq, ntc);
+	}
+
+	sq->next_to_clean = ntc;
+
+	return ICE_CTL_Q_DESC_UNUSED(sq);
+}
+
+/**
+ * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if the firmware has processed all descriptors on the
+ * admin send queue. Returns false if there are still requests pending.
+ */
+static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* AQ designers suggest use of head for better
+	 * timing reliability than DD bit
+	 */
+	return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
+}
+
+/**
+ * ice_sq_send_cmd - send command to Control Queue (ATQ)
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * This is the main send command routine for the ATQ.  It runs the q,
+ * cleans the queue, etc.
+ */
+enum ice_status
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_dma_mem *dma_buf = NULL;
+	struct ice_aq_desc *desc_on_ring;
+	bool cmd_completed = false;
+	enum ice_status status = 0;
+	struct ice_sq_cd *details;
+	u32 total_delay = 0;
+	u16 retval = 0;
+	u32 val = 0;
+
+	mutex_lock(&cq->sq_lock);
+
+	cq->sq_last_status = ICE_AQ_RC_OK;
+
+	if (!cq->sq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Send queue not initialized.\n");
+		status = ICE_ERR_AQ_EMPTY;
+		goto sq_send_command_error;
+	}
+
+	if ((buf && !buf_size) || (!buf && buf_size)) {
+		status = ICE_ERR_PARAM;
+		goto sq_send_command_error;
+	}
+
+	if (buf) {
+		if (buf_size > cq->sq_buf_size) {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Invalid buffer size for Control Send queue: %d.\n",
+				  buf_size);
+			status = ICE_ERR_INVAL_SIZE;
+			goto sq_send_command_error;
+		}
+
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	}
+
+	val = rd32(hw, cq->sq.head);
+	if (val >= cq->num_sq_entries) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "head overrun at %d in the Control Send Queue ring\n",
+			  val);
+		status = ICE_ERR_AQ_EMPTY;
+		goto sq_send_command_error;
+	}
+
+	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
+	if (cd)
+		memcpy(details, cd, sizeof(*details));
+	else
+		memset(details, 0, sizeof(*details));
+
+	/* Call clean and check queue available function to reclaim the
+	 * descriptors that were processed by FW/MBX; the function returns the
+	 * number of desc available. The clean function called here could be
+	 * called in a separate thread in case of asynchronous completions.
+	 */
+	if (ice_clean_sq(hw, cq) == 0) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Error: Control Send Queue is full.\n");
+		status = ICE_ERR_AQ_FULL;
+		goto sq_send_command_error;
+	}
+
+	/* initialize the temp desc pointer with the right desc */
+	desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use);
+
+	/* if the desc is available copy the temp desc to the right place */
+	memcpy(desc_on_ring, desc, sizeof(*desc_on_ring));
+
+	/* if buf is not NULL assume indirect command */
+	if (buf) {
+		dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use];
+		/* copy the user buf into the respective DMA buf */
+		memcpy(dma_buf->va, buf, buf_size);
+		desc_on_ring->datalen = cpu_to_le16(buf_size);
+
+		/* Update the address values in the desc with the pa value
+		 * for respective buffer
+		 */
+		desc_on_ring->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(dma_buf->pa));
+		desc_on_ring->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(dma_buf->pa));
+	}
+
+	/* Debug desc and buffer */
+	ice_debug(hw, ICE_DBG_AQ_MSG,
+		  "ATQ: Control Send queue desc and buffer:\n");
+
+	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size);
+
+	(cq->sq.next_to_use)++;
+	if (cq->sq.next_to_use == cq->sq.count)
+		cq->sq.next_to_use = 0;
+	wr32(hw, cq->sq.tail, cq->sq.next_to_use);
+
+	do {
+		if (ice_sq_done(hw, cq))
+			break;
+
+		mdelay(1);
+		total_delay++;
+	} while (total_delay < cq->sq_cmd_timeout);
+
+	/* if ready, copy the desc back to temp */
+	if (ice_sq_done(hw, cq)) {
+		memcpy(desc, desc_on_ring, sizeof(*desc));
+		if (buf) {
+			/* get returned length to copy */
+			u16 copy_size = le16_to_cpu(desc->datalen);
+
+			if (copy_size > buf_size) {
+				ice_debug(hw, ICE_DBG_AQ_MSG,
+					  "Return len %d > than buf len %d\n",
+					  copy_size, buf_size);
+				status = ICE_ERR_AQ_ERROR;
+			} else {
+				memcpy(buf, dma_buf->va, copy_size);
+			}
+		}
+		retval = le16_to_cpu(desc->retval);
+		if (retval) {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Control Send Queue command completed with error 0x%x\n",
+				  retval);
+
+			/* strip off FW internal code */
+			retval &= 0xff;
+		}
+		cmd_completed = true;
+		if (!status && retval != ICE_AQ_RC_OK)
+			status = ICE_ERR_AQ_ERROR;
+		cq->sq_last_status = (enum ice_aq_err)retval;
+	}
+
+	ice_debug(hw, ICE_DBG_AQ_MSG,
+		  "ATQ: desc and buffer writeback:\n");
+
+	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size);
+
+	/* save writeback AQ if requested */
+	if (details->wb_desc)
+		memcpy(details->wb_desc, desc_on_ring,
+		       sizeof(*details->wb_desc));
+
+	/* update the error if time out occurred */
+	if (!cmd_completed) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Send Queue Writeback timeout.\n");
+		status = ICE_ERR_AQ_TIMEOUT;
+	}
+
+sq_send_command_error:
+	mutex_unlock(&cq->sq_lock);
+	return status;
+}
+
+/**
+ * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function
+ * @desc: pointer to the temp descriptor (non DMA mem)
+ * @opcode: the opcode can be used to decide which flags to turn off or on
+ *
+ * Fill the desc with default values
+ */
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
+{
+	/* zero out the desc */
+	memset(desc, 0, sizeof(*desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
new file mode 100644
index 000000000000..6133d38525e0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_CONTROLQ_H_
+#define _ICE_CONTROLQ_H_
+
+#include "ice_adminq_cmd.h"
+
+#define ICE_CTL_Q_DESC(R, i) \
+	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
+
+#define ICE_CTL_Q_DESC_UNUSED(R) \
+	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->next_to_clean - (R)->next_to_use - 1)
+
+/* Defines that help manage the driver vs FW API checks.
+ * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
+ *
+ */
+#define EXP_FW_API_VER_BRANCH		0x00
+#define EXP_FW_API_VER_MAJOR		0x00
+#define EXP_FW_API_VER_MINOR		0x01
+
+/* Different control queue types: These are mainly for SW consumption. */
+enum ice_ctl_q {
+	ICE_CTL_Q_UNKNOWN = 0,
+	ICE_CTL_Q_ADMIN,
+};
+
+/* Control Queue default settings */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
+
+struct ice_ctl_q_ring {
+	void *dma_head;			/* Virtual address to dma head */
+	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
+	void *cmd_buf;			/* command buffer memory */
+
+	union {
+		struct ice_dma_mem *sq_bi;
+		struct ice_dma_mem *rq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+	u32 len_mask;
+	u32 len_ena_mask;
+	u32 head_mask;
+};
+
+/* sq transaction details */
+struct ice_sq_cd {
+	struct ice_aq_desc *wb_desc;
+};
+
+#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
+
+/* Control Queue information */
+struct ice_ctl_q_info {
+	enum ice_ctl_q qtype;
+	struct ice_ctl_q_ring rq;	/* receive queue */
+	struct ice_ctl_q_ring sq;	/* send queue */
+	u32 sq_cmd_timeout;		/* send queue cmd write back timeout */
+	u16 num_rq_entries;		/* receive queue depth */
+	u16 num_sq_entries;		/* send queue depth */
+	u16 rq_buf_size;		/* receive queue buffer size */
+	u16 sq_buf_size;		/* send queue buffer size */
+	struct mutex sq_lock;		/* Send queue lock */
+	struct mutex rq_lock;		/* Receive queue lock */
+	enum ice_aq_err sq_last_status;	/* last status on send queue */
+	enum ice_aq_err rq_last_status;	/* last status on receive queue */
+};
+
+#endif /* _ICE_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
new file mode 100644
index 000000000000..66a982b55eaf
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Machine-generated file */
+
+#ifndef _ICE_HW_AUTOGEN_H_
+#define _ICE_HW_AUTOGEN_H_
+
+#define PF_FW_ARQBAH			0x00080180
+#define PF_FW_ARQBAL			0x00080080
+#define PF_FW_ARQH			0x00080380
+#define PF_FW_ARQH_ARQH_S		0
+#define PF_FW_ARQH_ARQH_M		ICE_M(0x3FF, PF_FW_ARQH_ARQH_S)
+#define PF_FW_ARQLEN			0x00080280
+#define PF_FW_ARQLEN_ARQLEN_S		0
+#define PF_FW_ARQLEN_ARQLEN_M		ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S)
+#define PF_FW_ARQLEN_ARQENABLE_S	31
+#define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
+#define PF_FW_ARQT			0x00080480
+#define PF_FW_ATQBAH			0x00080100
+#define PF_FW_ATQBAL			0x00080000
+#define PF_FW_ATQH			0x00080300
+#define PF_FW_ATQH_ATQH_S		0
+#define PF_FW_ATQH_ATQH_M		ICE_M(0x3FF, PF_FW_ATQH_ATQH_S)
+#define PF_FW_ATQLEN			0x00080200
+#define PF_FW_ATQLEN_ATQLEN_S		0
+#define PF_FW_ATQLEN_ATQLEN_M		ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S)
+#define PF_FW_ATQLEN_ATQENABLE_S	31
+#define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
+#define PF_FW_ATQT			0x00080400
+
+#endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 3ada230a3fc7..8b63c25fda8b 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -20,7 +20,11 @@ MODULE_VERSION(DRV_VERSION);
 
 static int debug = -1;
 module_param(debug, int, 0644);
-MODULE_PARM_DESC(debug, "netif message level (0=none,...,0x7FFF=all)");
+#ifndef CONFIG_DYNAMIC_DEBUG
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
+#else
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
+#endif /* !CONFIG_DYNAMIC_DEBUG */
 
 /**
  * ice_probe - Device initialization routine
@@ -79,6 +83,11 @@ static int ice_probe(struct pci_dev *pdev,
 	hw->bus.func = PCI_FUNC(pdev->devfn);
 	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
 
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (debug < -1)
+		hw->debug_mask = debug;
+#endif
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
new file mode 100644
index 000000000000..5c992aa1f150
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_OSDEP_H_
+#define _ICE_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/io.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+
+#define ICE_M(m, s)		((m) << (s))
+
+struct ice_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	size_t size;
+};
+
+#define ice_hw_to_dev(ptr)	\
+	(&(container_of((ptr), struct ice_pf, hw))->pdev->dev)
+
+#ifdef CONFIG_DYNAMIC_DEBUG
+#define ice_debug(hw, type, fmt, args...) \
+	dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
+
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+	print_hex_dump_debug(KBUILD_MODNAME " ",		\
+			     DUMP_PREFIX_OFFSET, rowsize,	\
+			     groupsize, buf, len, false)
+#else
+#define ice_debug(hw, type, fmt, args...)			\
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		dev_info(ice_hw_to_dev(hw), fmt, ##args);	\
+} while (0)
+
+#ifdef DEBUG
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		print_hex_dump_debug(KBUILD_MODNAME,		\
+				     DUMP_PREFIX_OFFSET,	\
+				     rowsize, groupsize, buf,	\
+				     len, false);		\
+} while (0)
+#else
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	struct ice_hw *hw_l = hw;				\
+	if ((type) & (hw_l)->debug_mask) {			\
+		u16 len_l = len;				\
+		u8 *buf_l = buf;				\
+		int i;						\
+		for (i = 0; i < (len_l - 16); i += 16)		\
+			ice_debug(hw_l, type, "0x%04X  %16ph\n",\
+				  i, ((buf_l) + i));		\
+		if (i < len_l)					\
+			ice_debug(hw_l, type, "0x%04X  %*ph\n", \
+				  i, ((len_l) - i), ((buf_l) + i));\
+	}							\
+} while (0)
+#endif /* DEBUG */
+#endif /* CONFIG_DYNAMIC_DEBUG */
+
+#endif /* _ICE_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
new file mode 100644
index 000000000000..978c6dff3c71
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_STATUS_H_
+#define _ICE_STATUS_H_
+
+/* Error Codes */
+enum ice_status {
+	ICE_ERR_PARAM				= -1,
+	ICE_ERR_NOT_READY			= -3,
+	ICE_ERR_INVAL_SIZE			= -6,
+	ICE_ERR_FW_API_VER			= -10,
+	ICE_ERR_NO_MEMORY			= -11,
+	ICE_ERR_CFG				= -12,
+	ICE_ERR_AQ_ERROR			= -100,
+	ICE_ERR_AQ_TIMEOUT			= -101,
+	ICE_ERR_AQ_FULL				= -102,
+	ICE_ERR_AQ_EMPTY			= -104,
+};
+
+#endif /* _ICE_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index cd018f81b5c7..212ab6cc0a42 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -4,6 +4,15 @@
 #ifndef _ICE_TYPE_H_
 #define _ICE_TYPE_H_
 
+#include "ice_status.h"
+#include "ice_hw_autogen.h"
+#include "ice_osdep.h"
+#include "ice_controlq.h"
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+#define ICE_DBG_AQ_MSG		BIT_ULL(24)
+#define ICE_DBG_AQ_CMD		BIT_ULL(27)
+
 /* Bus parameters */
 struct ice_bus_info {
 	u16 device;
@@ -14,6 +23,7 @@ struct ice_bus_info {
 struct ice_hw {
 	u8 __iomem *hw_addr;
 	void *back;
+	u64 debug_mask;		/* bitmap for debug mask */
 
 	/* pci info */
 	u16 device_id;
@@ -23,6 +33,18 @@ struct ice_hw {
 	u8 revision_id;
 
 	struct ice_bus_info bus;
+	/* Control Queue info */
+	struct ice_ctl_q_info adminq;
+
+	u8 api_branch;		/* API branch version */
+	u8 api_maj_ver;		/* API major version */
+	u8 api_min_ver;		/* API minor version */
+	u8 api_patch;		/* API patch version */
+	u8 fw_branch;		/* firmware branch version */
+	u8 fw_maj_ver;		/* firmware major version */
+	u8 fw_min_ver;		/* firmware minor version */
+	u8 fw_patch;		/* firmware patch version */
+	u32 fw_build;		/* firmware build number */
 };
 
 #endif /* _ICE_TYPE_H_ */

From 3c95c567322e1dfaa7e0348171181aaf63aa6049 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:15 -0400
Subject: [PATCH 1225/1678] net: qla3xxx: Eliminate duplicate barriers on
 weakly-ordered archs

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing code to

wmb()
writel_relaxed()
mmiowb()

for multi-arch support.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qla3xxx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 9e5264d8773b..b48f76182049 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -1858,8 +1858,9 @@ static void ql_update_small_bufq_prod_index(struct ql3_adapter *qdev)
 			qdev->small_buf_release_cnt -= 8;
 		}
 		wmb();
-		writel(qdev->small_buf_q_producer_index,
-			&port_regs->CommonRegs.rxSmallQProducerIndex);
+		writel_relaxed(qdev->small_buf_q_producer_index,
+			       &port_regs->CommonRegs.rxSmallQProducerIndex);
+		mmiowb();
 	}
 }
 

From f9442ac498a86b1b0ec383a332be2635fbb1ed98 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:16 -0400
Subject: [PATCH 1226/1678] qlcnic: Eliminate duplicate barriers on
 weakly-ordered archs

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Acked-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 46b0372dd032..97c146e7698a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -478,7 +478,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter)
 	wmb();
 
 	/* clear the interrupt trigger control register */
-	writel(0, adapter->isr_int_vec);
+	writel_relaxed(0, adapter->isr_int_vec);
 	intr_val = readl(adapter->isr_int_vec);
 	do {
 		intr_val = readl(adapter->tgt_status_reg);

From edd874235ac20a04483f15b27251203e9508dec5 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:17 -0400
Subject: [PATCH 1227/1678] bnx2x: Replace doorbell barrier() with wmb()

barrier() doesn't guarantee memory writes to be observed by the hardware on
all architectures. barrier() only tells compiler not to move this code
with respect to other read/writes.

If memory write needs to be observed by the HW, wmb() is the right choice.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c     | 3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index d7c98e807ca8..0f86f1850de8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4153,7 +4153,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	wmb();
 
 	txdata->tx_db.data.prod += nbd;
-	barrier();
+	/* make sure descriptor update is observed by HW */
+	wmb();
 
 	DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 1e33abde4a3e..39af4f85379d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2591,7 +2591,8 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 	wmb();
 
 	txdata->tx_db.data.prod += 2;
-	barrier();
+	/* make sure descriptor update is observed by the HW */
+	wmb();
 	DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
 
 	mmiowb();

From 7f883c774e1bad34d5474d8a74989336631ce178 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:18 -0400
Subject: [PATCH 1228/1678] bnx2x: Eliminate duplicate barriers on
 weakly-ordered archs

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h         | 12 ++++++++----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c     |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h     |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c    |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c    |  4 +++-
 6 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 352beff796ae..d847e1b9c37b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -166,6 +166,12 @@ do {						\
 #define REG_RD8(bp, offset)		readb(REG_ADDR(bp, offset))
 #define REG_RD16(bp, offset)		readw(REG_ADDR(bp, offset))
 
+#define REG_WR_RELAXED(bp, offset, val)	\
+	writel_relaxed((u32)val, REG_ADDR(bp, offset))
+
+#define REG_WR16_RELAXED(bp, offset, val) \
+	writew_relaxed((u16)val, REG_ADDR(bp, offset))
+
 #define REG_WR(bp, offset, val)		writel((u32)val, REG_ADDR(bp, offset))
 #define REG_WR8(bp, offset, val)	writeb((u8)val, REG_ADDR(bp, offset))
 #define REG_WR16(bp, offset, val)	writew((u16)val, REG_ADDR(bp, offset))
@@ -758,10 +764,8 @@ struct bnx2x_fastpath {
 #if (BNX2X_DB_SHIFT < BNX2X_DB_MIN_SHIFT)
 #error "Min DB doorbell stride is 8"
 #endif
-#define DOORBELL(bp, cid, val) \
-	do { \
-		writel((u32)(val), bp->doorbells + (bp->db_size * (cid))); \
-	} while (0)
+#define DOORBELL_RELAXED(bp, cid, val) \
+	writel_relaxed((u32)(val), (bp)->doorbells + ((bp)->db_size * (cid)))
 
 /* TX CSUM helpers */
 #define SKB_CS_OFF(skb)		(offsetof(struct tcphdr, check) - \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0f86f1850de8..95871576ab92 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4156,7 +4156,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* make sure descriptor update is observed by HW */
 	wmb();
 
-	DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
+	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
 	mmiowb();
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a5265e1344f1..a8ce5c55bbb0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -522,8 +522,8 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 	wmb();
 
 	for (i = 0; i < sizeof(rx_prods)/4; i++)
-		REG_WR(bp, fp->ustorm_rx_prods_offset + i*4,
-		       ((u32 *)&rx_prods)[i]);
+		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
+			       ((u32 *)&rx_prods)[i]);
 
 	mmiowb(); /* keep prod updates ordered */
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 39af4f85379d..da18aa239acb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2593,7 +2593,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 	txdata->tx_db.data.prod += 2;
 	/* make sure descriptor update is observed by the HW */
 	wmb();
-	DOORBELL(bp, txdata->cid, txdata->tx_db.raw);
+	DOORBELL_RELAXED(bp, txdata->cid, txdata->tx_db.raw);
 
 	mmiowb();
 	barrier();
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index fd97250d6371..c766ae23bc74 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -3817,8 +3817,8 @@ static void bnx2x_sp_prod_update(struct bnx2x *bp)
 	 */
 	mb();
 
-	REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
-		 bp->spq_prod_idx);
+	REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
+			 bp->spq_prod_idx);
 	mmiowb();
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 76a4668c50fe..8e0a317b31f7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -170,7 +170,9 @@ static int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping)
 	wmb();
 
 	/* Trigger the PF FW */
-	writeb(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+	writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+
+	mmiowb();
 
 	/* Wait for PF to complete */
 	while ((tout >= 0) && (!*done)) {

From e42d8cee343a545ac2d9557a3b28708bbca2bd31 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:19 -0400
Subject: [PATCH 1229/1678] net: qlge: Eliminate duplicate barriers on
 weakly-ordered archs

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlge/qlge.h      | 16 ++++++++++++++++
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |  3 ++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 84ac50f92c9c..3e71b65a9546 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -2184,6 +2184,22 @@ static inline void ql_write_db_reg(u32 val, void __iomem *addr)
 	mmiowb();
 }
 
+/*
+ * Doorbell Registers:
+ * Doorbell registers are virtual registers in the PCI memory space.
+ * The space is allocated by the chip during PCI initialization.  The
+ * device driver finds the doorbell address in BAR 3 in PCI config space.
+ * The registers are used to control outbound and inbound queues. For
+ * example, the producer index for an outbound queue.  Each queue uses
+ * 1 4k chunk of memory.  The lower half of the space is for outbound
+ * queues. The upper half is for inbound queues.
+ * Caller has to guarantee ordering.
+ */
+static inline void ql_write_db_reg_relaxed(u32 val, void __iomem *addr)
+{
+	writel_relaxed(val, addr);
+}
+
 /*
  * Shadow Registers:
  * Outbound queues have a consumer index that is maintained by the chip.
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 50038d946ced..8293c2028002 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2700,7 +2700,8 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 		tx_ring->prod_idx = 0;
 	wmb();
 
-	ql_write_db_reg(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
+	ql_write_db_reg_relaxed(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
+	mmiowb();
 	netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
 		     "tx queued, slot %d, len %d\n",
 		     tx_ring->prod_idx, skb->len);

From fd141fa47c03018aa1f77c335b0f444493e145d5 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:20 -0400
Subject: [PATCH 1230/1678] bnxt_en: Eliminate duplicate barriers on
 weakly-ordered archs

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Also add mmiowb() so that write code doesn't move outside of scope.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Acked-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c7e5e6f09647..3ff5f65758a3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1922,7 +1922,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 		/* Sync BD data before updating doorbell */
 		wmb();
 
-		bnxt_db_write(bp, db, DB_KEY_TX | prod);
+		bnxt_db_write_relaxed(bp, db, DB_KEY_TX | prod);
 	}
 
 	cpr->cp_raw_cons = raw_cons;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5e3d62189cab..960162c9386c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1402,6 +1402,15 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
 		((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
 }
 
+/* For TX and RX ring doorbells with no ordering guarantee*/
+static inline void bnxt_db_write_relaxed(struct bnxt *bp, void __iomem *db,
+					 u32 val)
+{
+	writel_relaxed(val, db);
+	if (bp->flags & BNXT_FLAG_DOUBLE_DB)
+		writel_relaxed(val, db);
+}
+
 /* For TX and RX ring doorbells */
 static inline void bnxt_db_write(struct bnxt *bp, void __iomem *db, u32 val)
 {

From 6d2e1a8d5e25e5f4563f5ea24bcb5da1ae261b26 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sun, 25 Mar 2018 10:39:21 -0400
Subject: [PATCH 1231/1678] net: ena: Eliminate duplicate barriers on
 weakly-ordered archs

Code includes barrier() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a barrier().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed() and adding mmiowb() for ordering protection.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c     | 8 ++++++--
 drivers/net/ethernet/amazon/ena/ena_eth_com.h | 8 ++++++--
 drivers/net/ethernet/amazon/ena/ena_netdev.c  | 5 +++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index bf2de5298005..1b9d3130af4d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -631,8 +631,10 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 	 */
 	wmb();
 
-	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+	writel_relaxed(mmio_read_reg,
+		       ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
 
+	mmiowb();
 	for (i = 0; i < timeout; i++) {
 		if (read_resp->req_id == mmio_read->seq_num)
 			break;
@@ -1826,7 +1828,9 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 
 	/* write the aenq doorbell after all AENQ descriptors were read */
 	mb();
-	writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	writel_relaxed((u32)aenq->head,
+		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	mmiowb();
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 2f7657227cfe..6fdc753d9483 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
 	return io_sq->q_depth - 1 - cnt;
 }
 
-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
+					    bool relaxed)
 {
 	u16 tail;
 
@@ -116,7 +117,10 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
 		 io_sq->qid, tail);
 
-	writel(tail, io_sq->db_addr);
+	if (relaxed)
+		writel_relaxed(tail, io_sq->db_addr);
+	else
+		writel(tail, io_sq->db_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6975150d144e..a822e70c2af3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -556,7 +556,8 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 		 * issue a doorbell
 		 */
 		wmb();
-		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
+		mmiowb();
 	}
 
 	rx_ring->next_to_use = next_to_use;
@@ -2151,7 +2152,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
 		/* trigger the dma engine */
-		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
 		u64_stats_update_begin(&tx_ring->syncp);
 		tx_ring->tx_stats.doorbells++;
 		u64_stats_update_end(&tx_ring->syncp);

From f31e4b6fe227dfd7ed51c3fc0550878c7d7a8cf2 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:07 -0700
Subject: [PATCH 1232/1678] ice: Start hardware initialization

This patch implements multiple pieces of the initialization flow
as follows:

1) A reset is issued to ensure a clean device state, followed
   by initialization of admin queue interface.

2) Once the admin queue interface is up, clear the PF config
   and transition the device to non-PXE mode.

3) Get the NVM configuration stored in the device's non-volatile
   memory (NVM) using ice_init_nvm.

CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile       |   3 +-
 drivers/net/ethernet/intel/ice/ice.h          |   2 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  79 ++++
 drivers/net/ethernet/intel/ice/ice_common.c   | 405 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |  11 +
 drivers/net/ethernet/intel/ice/ice_controlq.h |   3 +
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  30 ++
 drivers/net/ethernet/intel/ice/ice_main.c     |  31 ++
 drivers/net/ethernet/intel/ice/ice_nvm.c      | 236 ++++++++++
 drivers/net/ethernet/intel/ice/ice_osdep.h    |   1 +
 drivers/net/ethernet/intel/ice/ice_status.h   |   5 +
 drivers/net/ethernet/intel/ice/ice_type.h     |  49 +++
 12 files changed, 854 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_nvm.c

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 562836a8f18a..b9a32ddecb17 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_ICE) += ice.o
 
 ice-y := ice_main.o	\
 	 ice_controlq.o	\
-	 ice_common.o
+	 ice_common.o	\
+	 ice_nvm.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 59b1400f9e81..4c4f161768ed 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -16,8 +16,10 @@
 #include <linux/bitmap.h>
 #include "ice_devids.h"
 #include "ice_type.h"
+#include "ice_common.h"
 
 #define ICE_BAR0		0
+#define ICE_AQ_LEN		64
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index e2fa8e2ea6ae..70fe00c90329 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -36,6 +36,67 @@ struct ice_aqc_q_shutdown {
 	u8 reserved[12];
 };
 
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+struct ice_aqc_req_res {
+	__le16 res_id;
+#define ICE_AQC_RES_ID_NVM		1
+#define ICE_AQC_RES_ID_SDP		2
+#define ICE_AQC_RES_ID_CHNG_LOCK	3
+#define ICE_AQC_RES_ID_GLBL_LOCK	4
+	__le16 access_type;
+#define ICE_AQC_RES_ACCESS_READ		1
+#define ICE_AQC_RES_ACCESS_WRITE	2
+
+	/* Upon successful completion, FW writes this value and driver is
+	 * expected to release resource before timeout. This value is provided
+	 * in milliseconds.
+	 */
+	__le32 timeout;
+#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS	3000
+#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS	180000
+#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS	1000
+#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS	3000
+	/* For SDP: pin id of the SDP */
+	__le32 res_number;
+	/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
+	__le16 status;
+#define ICE_AQ_RES_GLBL_SUCCESS		0
+#define ICE_AQ_RES_GLBL_IN_PROG		1
+#define ICE_AQ_RES_GLBL_DONE		2
+	u8 reserved[2];
+};
+
+/* Clear PXE Command and response (direct 0x0110) */
+struct ice_aqc_clear_pxe {
+	u8 rx_cnt;
+#define ICE_AQC_CLEAR_PXE_RX_CNT		0x2
+	u8 reserved[15];
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct ice_aqc_nvm {
+	u8	cmd_flags;
+#define ICE_AQC_NVM_LAST_CMD		BIT(0)
+#define ICE_AQC_NVM_PCIR_REQ		BIT(0)	/* Used by NVM Update reply */
+#define ICE_AQC_NVM_PRESERVATION_S	1
+#define ICE_AQC_NVM_PRESERVATION_M	(3 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_NO_PRESERVATION	(0 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_ALL	BIT(1)
+#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << CSR_AQ_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_FLASH_ONLY		BIT(7)
+	u8	module_typeid;
+	__le16	length;
+#define ICE_AQC_NVM_ERASE_LEN	0xFFFF
+	__le32	offset;
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
 /**
  * struct ice_aq_desc - Admin Queue (AQ) descriptor
  * @flags: ICE_AQ_FLAG_* flags
@@ -65,6 +126,9 @@ struct ice_aq_desc {
 		struct ice_aqc_generic generic;
 		struct ice_aqc_get_ver get_ver;
 		struct ice_aqc_q_shutdown q_shutdown;
+		struct ice_aqc_req_res res_owner;
+		struct ice_aqc_clear_pxe clear_pxe;
+		struct ice_aqc_nvm nvm;
 	} params;
 };
 
@@ -82,6 +146,8 @@ struct ice_aq_desc {
 /* error codes */
 enum ice_aq_err {
 	ICE_AQ_RC_OK		= 0,  /* success */
+	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
+	ICE_AQ_RC_EEXIST	= 13, /* object already exists */
 };
 
 /* Admin Queue command opcodes */
@@ -89,6 +155,19 @@ enum ice_adminq_opc {
 	/* AQ commands */
 	ice_aqc_opc_get_ver				= 0x0001,
 	ice_aqc_opc_q_shutdown				= 0x0003,
+
+	/* resource ownership */
+	ice_aqc_opc_req_res				= 0x0008,
+	ice_aqc_opc_release_res				= 0x0009,
+
+	/* PXE */
+	ice_aqc_opc_clear_pxe_mode			= 0x0110,
+
+	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
+
+	/* NVM commands */
+	ice_aqc_opc_nvm_read				= 0x0701,
+
 };
 
 #endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 78452b92cae6..d3d420c3ba7b 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4,6 +4,224 @@
 #include "ice_common.h"
 #include "ice_adminq_cmd.h"
 
+#define ICE_PF_RESET_WAIT_COUNT	200
+
+/**
+ * ice_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the MAC type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ */
+static enum ice_status ice_set_mac_type(struct ice_hw *hw)
+{
+	if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
+		return ICE_ERR_DEVICE_NOT_SUPPORTED;
+
+	hw->mac_type = ICE_MAC_GENERIC;
+	return 0;
+}
+
+/**
+ * ice_clear_pf_cfg - Clear PF configuration
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_init_hw - main hardware initialization routine
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_init_hw(struct ice_hw *hw)
+{
+	enum ice_status status;
+
+	/* Set MAC type based on DeviceID */
+	status = ice_set_mac_type(hw);
+	if (status)
+		return status;
+
+	hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) &
+			 PF_FUNC_RID_FUNC_NUM_M) >>
+		PF_FUNC_RID_FUNC_NUM_S;
+
+	status = ice_reset(hw, ICE_RESET_PFR);
+	if (status)
+		return status;
+
+	status = ice_init_all_ctrlq(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	status = ice_clear_pf_cfg(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	ice_clear_pxe_mode(hw);
+
+	status = ice_init_nvm(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	return 0;
+
+err_unroll_cqinit:
+	ice_shutdown_all_ctrlq(hw);
+	return status;
+}
+
+/**
+ * ice_deinit_hw - unroll initialization operations done by ice_init_hw
+ * @hw: pointer to the hardware structure
+ */
+void ice_deinit_hw(struct ice_hw *hw)
+{
+	ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_check_reset - Check to see if a global reset is complete
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_check_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg = 0, grst_delay;
+
+	/* Poll for Device Active state in case a recent CORER, GLOBR,
+	 * or EMPR has occurred. The grst delay value is in 100ms units.
+	 * Add 1sec for outstanding AQ commands that can take a long time.
+	 */
+	grst_delay = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >>
+		      GLGEN_RSTCTL_GRSTDEL_S) + 10;
+
+	for (cnt = 0; cnt < grst_delay; cnt++) {
+		mdelay(100);
+		reg = rd32(hw, GLGEN_RSTAT);
+		if (!(reg & GLGEN_RSTAT_DEVSTATE_M))
+			break;
+	}
+
+	if (cnt == grst_delay) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Global reset polling failed to complete.\n");
+		return ICE_ERR_RESET_FAILED;
+	}
+
+#define ICE_RESET_DONE_MASK	(GLNVM_ULD_CORER_DONE_M | \
+				 GLNVM_ULD_GLOBR_DONE_M)
+
+	/* Device is Active; check Global Reset processes are done */
+	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK;
+		if (reg == ICE_RESET_DONE_MASK) {
+			ice_debug(hw, ICE_DBG_INIT,
+				  "Global reset processes done. %d\n", cnt);
+			break;
+		}
+		mdelay(10);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n",
+			  reg);
+		return ICE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * If a global reset has been triggered, this function checks
+ * for its completion and then issues the PF reset
+ */
+static enum ice_status ice_pf_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg;
+
+	/* If at function entry a global reset was already in progress, i.e.
+	 * state is not 'device active' or any of the reset done bits are not
+	 * set in GLNVM_ULD, there is no need for a PF Reset; poll until the
+	 * global reset is done.
+	 */
+	if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) ||
+	    (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) {
+		/* poll on global reset currently in progress until done */
+		if (ice_check_reset(hw))
+			return ICE_ERR_RESET_FAILED;
+
+		return 0;
+	}
+
+	/* Reset the PF */
+	reg = rd32(hw, PFGEN_CTRL);
+
+	wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
+
+	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, PFGEN_CTRL);
+		if (!(reg & PFGEN_CTRL_PFSWR_M))
+			break;
+
+		mdelay(1);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "PF reset polling failed to complete.\n");
+		return ICE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reset - Perform different types of reset
+ * @hw: pointer to the hardware structure
+ * @req: reset request
+ *
+ * This function triggers a reset as specified by the req parameter.
+ *
+ * Note:
+ * If anything other than a PF reset is triggered, PXE mode is restored.
+ * This has to be cleared using ice_clear_pxe_mode again, once the AQ
+ * interface has been restored in the rebuild flow.
+ */
+enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
+{
+	u32 val = 0;
+
+	switch (req) {
+	case ICE_RESET_PFR:
+		return ice_pf_reset(hw);
+	case ICE_RESET_CORER:
+		ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n");
+		val = GLGEN_RTRIG_CORER_M;
+		break;
+	case ICE_RESET_GLOBR:
+		ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
+		val = GLGEN_RTRIG_GLOBR_M;
+		break;
+	}
+
+	val |= rd32(hw, GLGEN_RTRIG);
+	wr32(hw, GLGEN_RTRIG, val);
+	ice_flush(hw);
+
+	/* wait for the FW to be ready */
+	return ice_check_reset(hw);
+}
+
 /**
  * ice_debug_cq
  * @hw: pointer to the hardware structure
@@ -128,3 +346,190 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
 
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
 }
+
+/**
+ * ice_aq_req_res
+ * @hw: pointer to the hw struct
+ * @res: resource id
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cd: pointer to command details structure or NULL
+ *
+ * requests common resource using the admin queue commands (0x0008)
+ */
+static enum ice_status
+ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+	       enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd_resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd_resp = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res);
+
+	cmd_resp->res_id = cpu_to_le16(res);
+	cmd_resp->access_type = cpu_to_le16(access);
+	cmd_resp->res_number = cpu_to_le32(sdp_number);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	/* The completion specifies the maximum time in ms that the driver
+	 * may hold the resource in the Timeout field.
+	 * If the resource is held by someone else, the command completes with
+	 * busy return value and the timeout field indicates the maximum time
+	 * the current owner of the resource has to free it.
+	 */
+	if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
+		*timeout = le32_to_cpu(cmd_resp->timeout);
+
+	return status;
+}
+
+/**
+ * ice_aq_release_res
+ * @hw: pointer to the hw struct
+ * @res: resource id
+ * @sdp_number: resource number
+ * @cd: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands (0x0009)
+ */
+static enum ice_status
+ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res);
+
+	cmd->res_id = cpu_to_le16(res);
+	cmd->res_number = cpu_to_le32(sdp_number);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_acquire_res
+ * @hw: pointer to the HW structure
+ * @res: resource id
+ * @access: access type (read or write)
+ *
+ * This function will attempt to acquire the ownership of a resource.
+ */
+enum ice_status
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access)
+{
+#define ICE_RES_POLLING_DELAY_MS	10
+	u32 delay = ICE_RES_POLLING_DELAY_MS;
+	enum ice_status status;
+	u32 time_left = 0;
+	u32 timeout;
+
+	status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+	/* An admin queue return code of ICE_AQ_RC_EEXIST means that another
+	 * driver has previously acquired the resource and performed any
+	 * necessary updates; in this case the caller does not obtain the
+	 * resource and has no further work to do.
+	 */
+	if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
+		status = ICE_ERR_AQ_NO_WORK;
+		goto ice_acquire_res_exit;
+	}
+
+	if (status)
+		ice_debug(hw, ICE_DBG_RES,
+			  "resource %d acquire type %d failed.\n", res, access);
+
+	/* If necessary, poll until the current lock owner timeouts */
+	timeout = time_left;
+	while (status && timeout && time_left) {
+		mdelay(delay);
+		timeout = (timeout > delay) ? timeout - delay : 0;
+		status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_EEXIST) {
+			/* lock free, but no work to do */
+			status = ICE_ERR_AQ_NO_WORK;
+			break;
+		}
+
+		if (!status)
+			/* lock acquired */
+			break;
+	}
+	if (status && status != ICE_ERR_AQ_NO_WORK)
+		ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n");
+
+ice_acquire_res_exit:
+	if (status == ICE_ERR_AQ_NO_WORK) {
+		if (access == ICE_RES_WRITE)
+			ice_debug(hw, ICE_DBG_RES,
+				  "resource indicates no work to do.\n");
+		else
+			ice_debug(hw, ICE_DBG_RES,
+				  "Warning: ICE_ERR_AQ_NO_WORK not expected\n");
+	}
+	return status;
+}
+
+/**
+ * ice_release_res
+ * @hw: pointer to the HW structure
+ * @res: resource id
+ *
+ * This function will release a resource using the proper Admin Command.
+ */
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
+{
+	enum ice_status status;
+	u32 total_delay = 0;
+
+	status = ice_aq_release_res(hw, res, 0, NULL);
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin Q timeout, so handle them correctly
+	 */
+	while ((status == ICE_ERR_AQ_TIMEOUT) &&
+	       (total_delay < hw->adminq.sq_cmd_timeout)) {
+		mdelay(1);
+		status = ice_aq_release_res(hw, res, 0, NULL);
+		total_delay++;
+	}
+}
+
+/**
+ * ice_aq_clear_pxe_mode
+ * @hw: pointer to the hw struct
+ *
+ * Tell the firmware that the driver is taking over from PXE (0x0110).
+ */
+static enum ice_status ice_aq_clear_pxe_mode(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode);
+	desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the hw struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ */
+void ice_clear_pxe_mode(struct ice_hw *hw)
+{
+	if (ice_check_sq_alive(hw, &hw->adminq))
+		ice_aq_clear_pxe_mode(hw);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index ba9347459db1..b1a7c5afe86b 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -9,12 +9,22 @@
 
 void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
 		  u16 buf_len);
+enum ice_status ice_init_hw(struct ice_hw *hw);
+void ice_deinit_hw(struct ice_hw *hw);
+enum ice_status ice_check_reset(struct ice_hw *hw);
+enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
 void ice_shutdown_all_ctrlq(struct ice_hw *hw);
 enum ice_status
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access);
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
+enum ice_status ice_init_nvm(struct ice_hw *hw);
+enum ice_status
 ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		struct ice_aq_desc *desc, void *buf, u16 buf_size,
 		struct ice_sq_cd *cd);
+void ice_clear_pxe_mode(struct ice_hw *hw);
 bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
 enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
 void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
@@ -22,4 +32,5 @@ enum ice_status
 ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
 		void *buf, u16 buf_size, struct ice_sq_cd *cd);
 enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
 #endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 6133d38525e0..b5306e8a5a0d 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -6,6 +6,9 @@
 
 #include "ice_adminq_cmd.h"
 
+/* Maximum buffer lengths for all control queue types */
+#define ICE_AQ_MAX_BUF_LEN 4096
+
 #define ICE_CTL_Q_DESC(R, i) \
 	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
 
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 66a982b55eaf..893d5e967e66 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -28,5 +28,35 @@
 #define PF_FW_ATQLEN_ATQENABLE_S	31
 #define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
 #define PF_FW_ATQT			0x00080400
+#define GLGEN_RSTAT			0x000B8188
+#define GLGEN_RSTAT_DEVSTATE_S		0
+#define GLGEN_RSTAT_DEVSTATE_M		ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S)
+#define GLGEN_RSTCTL			0x000B8180
+#define GLGEN_RSTCTL_GRSTDEL_S		0
+#define GLGEN_RSTCTL_GRSTDEL_M		ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RTRIG			0x000B8190
+#define GLGEN_RTRIG_CORER_S		0
+#define GLGEN_RTRIG_CORER_M		BIT(GLGEN_RTRIG_CORER_S)
+#define GLGEN_RTRIG_GLOBR_S		1
+#define GLGEN_RTRIG_GLOBR_M		BIT(GLGEN_RTRIG_GLOBR_S)
+#define GLGEN_STAT			0x000B612C
+#define PFGEN_CTRL			0x00091000
+#define PFGEN_CTRL_PFSWR_S		0
+#define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
+#define GLLAN_RCTL_0			0x002941F8
+#define GLNVM_FLA			0x000B6108
+#define GLNVM_FLA_LOCKED_S		6
+#define GLNVM_FLA_LOCKED_M		BIT(GLNVM_FLA_LOCKED_S)
+#define GLNVM_GENS			0x000B6100
+#define GLNVM_GENS_SR_SIZE_S		5
+#define GLNVM_GENS_SR_SIZE_M		ICE_M(0x7, GLNVM_GENS_SR_SIZE_S)
+#define GLNVM_ULD			0x000B6008
+#define GLNVM_ULD_CORER_DONE_S		3
+#define GLNVM_ULD_CORER_DONE_M		BIT(GLNVM_ULD_CORER_DONE_S)
+#define GLNVM_ULD_GLOBR_DONE_S		4
+#define GLNVM_ULD_GLOBR_DONE_M		BIT(GLNVM_ULD_GLOBR_DONE_S)
+#define PF_FUNC_RID			0x0009E880
+#define PF_FUNC_RID_FUNC_NUM_S		0
+#define PF_FUNC_RID_FUNC_NUM_M		ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S)
 
 #endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8b63c25fda8b..953be26054ca 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -26,6 +26,18 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX
 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 #endif /* !CONFIG_DYNAMIC_DEBUG */
 
+/**
+ * ice_set_ctrlq_len - helper function to set controlq length
+ * @hw: pointer to the hw instance
+ */
+static void ice_set_ctrlq_len(struct ice_hw *hw)
+{
+	hw->adminq.num_rq_entries = ICE_AQ_LEN;
+	hw->adminq.num_sq_entries = ICE_AQ_LEN;
+	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
+}
+
 /**
  * ice_probe - Device initialization routine
  * @pdev: PCI device information struct
@@ -81,6 +93,8 @@ static int ice_probe(struct pci_dev *pdev,
 	hw->subsystem_device_id = pdev->subsystem_device;
 	hw->bus.device = PCI_SLOT(pdev->devfn);
 	hw->bus.func = PCI_FUNC(pdev->devfn);
+	ice_set_ctrlq_len(hw);
+
 	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
 
 #ifndef CONFIG_DYNAMIC_DEBUG
@@ -88,7 +102,22 @@ static int ice_probe(struct pci_dev *pdev,
 		hw->debug_mask = debug;
 #endif
 
+	err = ice_init_hw(hw);
+	if (err) {
+		dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err);
+		err = -EIO;
+		goto err_exit_unroll;
+	}
+
+	dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n",
+		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
+		 hw->api_maj_ver, hw->api_min_ver);
+
 	return 0;
+
+err_exit_unroll:
+	pci_disable_pcie_error_reporting(pdev);
+	return err;
 }
 
 /**
@@ -103,6 +132,8 @@ static void ice_remove(struct pci_dev *pdev)
 		return;
 
 	set_bit(__ICE_DOWN, pf->state);
+
+	ice_deinit_hw(&pf->hw);
 	pci_disable_pcie_error_reporting(pdev);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
new file mode 100644
index 000000000000..fa7a69ac92b0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_aq_read_nvm
+ * @hw: pointer to the hw struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands (0x0701)
+ */
+static enum ice_status
+ice_aq_read_nvm(struct ice_hw *hw, u8 module_typeid, u32 offset, u16 length,
+		void *data, bool last_command, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read);
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+	cmd->module_typeid = module_typeid;
+	cmd->offset = cpu_to_le32(offset);
+	cmd->length = cpu_to_le16(length);
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_check_sr_access_params - verify params for Shadow RAM R/W operations.
+ * @hw: pointer to the HW structure
+ * @offset: offset in words from module start
+ * @words: number of words to access
+ */
+static enum ice_status
+ice_check_sr_access_params(struct ice_hw *hw, u32 offset, u16 words)
+{
+	if ((offset + words) > hw->nvm.sr_words) {
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: offset beyond SR lmt.\n");
+		return ICE_ERR_PARAM;
+	}
+
+	if (words > ICE_SR_SECTOR_SIZE_IN_WORDS) {
+		/* We can access only up to 4KB (one sector), in one AQ write */
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: tried to access %d words, limit is %d.\n",
+			  words, ICE_SR_SECTOR_SIZE_IN_WORDS);
+		return ICE_ERR_PARAM;
+	}
+
+	if (((offset + (words - 1)) / ICE_SR_SECTOR_SIZE_IN_WORDS) !=
+	    (offset / ICE_SR_SECTOR_SIZE_IN_WORDS)) {
+		/* A single access cannot spread over two sectors */
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM error: cannot spread over two sectors.\n");
+		return ICE_ERR_PARAM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_read_sr_aq - Read Shadow RAM.
+ * @hw: pointer to the HW structure
+ * @offset: offset in words from module start
+ * @words: number of words to read
+ * @data: buffer for words reads from Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+ * Reads 16-bit word buffers from the Shadow RAM using the admin command.
+ */
+static enum ice_status
+ice_read_sr_aq(struct ice_hw *hw, u32 offset, u16 words, u16 *data,
+	       bool last_command)
+{
+	enum ice_status status;
+
+	status = ice_check_sr_access_params(hw, offset, words);
+
+	/* values in "offset" and "words" parameters are sized as words
+	 * (16 bits) but ice_aq_read_nvm expects these values in bytes.
+	 * So do this conversion while calling ice_aq_read_nvm.
+	 */
+	if (!status)
+		status = ice_aq_read_nvm(hw, 0, 2 * offset, 2 * words, data,
+					 last_command, NULL);
+
+	return status;
+}
+
+/**
+ * ice_read_sr_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_aq method.
+ */
+static enum ice_status
+ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	enum ice_status status;
+
+	status = ice_read_sr_aq(hw, offset, 1, data, true);
+	if (!status)
+		*data = le16_to_cpu(*(__le16 *)data);
+
+	return status;
+}
+
+/**
+ * ice_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership.
+ */
+static enum
+ice_status ice_acquire_nvm(struct ice_hw *hw,
+			   enum ice_aq_res_access_type access)
+{
+	if (hw->nvm.blank_nvm_mode)
+		return 0;
+
+	return ice_acquire_res(hw, ICE_NVM_RES_ID, access);
+}
+
+/**
+ * ice_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM ownership.
+ */
+static void ice_release_nvm(struct ice_hw *hw)
+{
+	if (hw->nvm.blank_nvm_mode)
+		return;
+
+	ice_release_res(hw, ICE_NVM_RES_ID);
+}
+
+/**
+ * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
+ */
+static enum ice_status
+ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	enum ice_status status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (!status) {
+		status = ice_read_sr_word_aq(hw, offset, data);
+		ice_release_nvm(hw);
+	}
+
+	return status;
+}
+
+/**
+ * ice_init_nvm - initializes NVM setting
+ * @hw: pointer to the hw struct
+ *
+ * This function reads and populates NVM settings such as Shadow RAM size,
+ * max_timeout, and blank_nvm_mode
+ */
+enum ice_status ice_init_nvm(struct ice_hw *hw)
+{
+	struct ice_nvm_info *nvm = &hw->nvm;
+	u16 eetrack_lo, eetrack_hi;
+	enum ice_status status = 0;
+	u32 fla, gens_stat;
+	u8 sr_size;
+
+	/* The SR size is stored regardless of the nvm programming mode
+	 * as the blank mode may be used in the factory line.
+	 */
+	gens_stat = rd32(hw, GLNVM_GENS);
+	sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S;
+
+	/* Switching to words (sr_size contains power of 2) */
+	nvm->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB;
+
+	/* Check if we are in the normal or blank NVM programming mode */
+	fla = rd32(hw, GLNVM_FLA);
+	if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
+		nvm->blank_nvm_mode = false;
+	} else { /* Blank programming mode */
+		nvm->blank_nvm_mode = true;
+		status = ICE_ERR_NVM_BLANK_MODE;
+		ice_debug(hw, ICE_DBG_NVM,
+			  "NVM init error: unsupported blank mode.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read DEV starter version.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK lo.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK hi.\n");
+		return status;
+	}
+
+	hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
index 5c992aa1f150..f57c414bc0a9 100644
--- a/drivers/net/ethernet/intel/ice/ice_osdep.h
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -15,6 +15,7 @@
 #define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
 #define rd64(a, reg)		readq((a)->hw_addr + (reg))
 
+#define ice_flush(a)		rd32((a), GLGEN_STAT)
 #define ICE_M(m, s)		((m) << (s))
 
 struct ice_dma_mem {
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 978c6dff3c71..7e8049efac1c 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -9,12 +9,17 @@ enum ice_status {
 	ICE_ERR_PARAM				= -1,
 	ICE_ERR_NOT_READY			= -3,
 	ICE_ERR_INVAL_SIZE			= -6,
+	ICE_ERR_DEVICE_NOT_SUPPORTED		= -8,
+	ICE_ERR_RESET_FAILED			= -9,
 	ICE_ERR_FW_API_VER			= -10,
 	ICE_ERR_NO_MEMORY			= -11,
 	ICE_ERR_CFG				= -12,
+	ICE_ERR_OUT_OF_RANGE			= -13,
+	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,
 	ICE_ERR_AQ_TIMEOUT			= -101,
 	ICE_ERR_AQ_FULL				= -102,
+	ICE_ERR_AQ_NO_WORK			= -103,
 	ICE_ERR_AQ_EMPTY			= -104,
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 212ab6cc0a42..31be38369a48 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -10,20 +10,58 @@
 #include "ice_controlq.h"
 
 /* debug masks - set these bits in hw->debug_mask to control output */
+#define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_NVM		BIT_ULL(7)
+#define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
 #define ICE_DBG_AQ_CMD		BIT_ULL(27)
 
+enum ice_aq_res_ids {
+	ICE_NVM_RES_ID = 1,
+	ICE_SPD_RES_ID,
+	ICE_GLOBAL_CFG_LOCK_RES_ID,
+	ICE_CHANGE_LOCK_RES_ID
+};
+
+enum ice_aq_res_access_type {
+	ICE_RES_READ = 1,
+	ICE_RES_WRITE
+};
+
+/* Various MAC types */
+enum ice_mac_type {
+	ICE_MAC_UNKNOWN = 0,
+	ICE_MAC_GENERIC,
+};
+
+/* Various RESET request, These are not tied with HW reset types */
+enum ice_reset_req {
+	ICE_RESET_PFR	= 0,
+	ICE_RESET_CORER	= 1,
+	ICE_RESET_GLOBR	= 2,
+};
+
 /* Bus parameters */
 struct ice_bus_info {
 	u16 device;
 	u8 func;
 };
 
+/* NVM Information */
+struct ice_nvm_info {
+	u32 eetrack;              /* NVM data version */
+	u32 oem_ver;              /* OEM version info */
+	u16 sr_words;             /* Shadow RAM size in words */
+	u16 ver;                  /* NVM package version */
+	bool blank_nvm_mode;      /* is NVM empty (no FW present) */
+};
+
 /* Port hardware description */
 struct ice_hw {
 	u8 __iomem *hw_addr;
 	void *back;
 	u64 debug_mask;		/* bitmap for debug mask */
+	enum ice_mac_type mac_type;
 
 	/* pci info */
 	u16 device_id;
@@ -32,7 +70,11 @@ struct ice_hw {
 	u16 subsystem_vendor_id;
 	u8 revision_id;
 
+	u8 pf_id;		/* device profile info */
+
 	struct ice_bus_info bus;
+	struct ice_nvm_info nvm;
+
 	/* Control Queue info */
 	struct ice_ctl_q_info adminq;
 
@@ -47,4 +89,11 @@ struct ice_hw {
 	u32 fw_build;		/* firmware build number */
 };
 
+/* Checksum and Shadow RAM pointers */
+#define ICE_SR_NVM_DEV_STARTER_VER	0x18
+#define ICE_SR_NVM_EETRACK_LO		0x2D
+#define ICE_SR_NVM_EETRACK_HI		0x2E
+#define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
+#define ICE_SR_WORDS_IN_1KB		512
+
 #endif /* _ICE_TYPE_H_ */

From 07e1671cfca54edf3f0c0915ef8754013b02dfbb Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Mon, 26 Mar 2018 10:16:37 +0200
Subject: [PATCH 1233/1678] nfp: flower: refactor shared ip header in match
 offload

Refactored shared ip header code for IPv4 and IPv6 in match offload.

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/cmsg.h  | 19 +++---
 .../net/ethernet/netronome/nfp/flower/match.c | 61 +++++++++----------
 2 files changed, 38 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 28c1cd5b823b..db32f129aded 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -260,6 +260,13 @@ struct nfp_flower_tp_ports {
 	__be16 port_dst;
 };
 
+struct nfp_flower_ip_ext {
+	u8 tos;
+	u8 proto;
+	u8 ttl;
+	u8 flags;
+};
+
 /* L3 IPv4 details (3W/12B)
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
@@ -272,10 +279,7 @@ struct nfp_flower_tp_ports {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct nfp_flower_ipv4 {
-	u8 tos;
-	u8 proto;
-	u8 ttl;
-	u8 flags;
+	struct nfp_flower_ip_ext ip_ext;
 	__be32 ipv4_src;
 	__be32 ipv4_dst;
 };
@@ -284,7 +288,7 @@ struct nfp_flower_ipv4 {
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |    DSCP   |ECN|   protocol    |          reserved             |
+ * |    DSCP   |ECN|   protocol    |      ttl      |     flags     |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |   ipv6_exthdr   | res |            ipv6_flow_label            |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -306,10 +310,7 @@ struct nfp_flower_ipv4 {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct nfp_flower_ipv6 {
-	u8 tos;
-	u8 proto;
-	u8 ttl;
-	u8 reserved;
+	struct nfp_flower_ip_ext ip_ext;
 	__be32 ipv6_flow_label_exthdr;
 	struct in6_addr ipv6_src;
 	struct in6_addr ipv6_dst;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index b3bc8279d4fb..26110670ba13 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -146,26 +146,15 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
 }
 
 static void
-nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
-			struct tc_cls_flower_offload *flow,
-			bool mask_version)
+nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *frame,
+			  struct tc_cls_flower_offload *flow,
+			  bool mask_version)
 {
 	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv4_addrs *addr;
-	struct flow_dissector_key_basic *basic;
-
-	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
-
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						 target);
-		frame->ipv4_src = addr->src;
-		frame->ipv4_dst = addr->dst;
-	}
 
 	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *basic;
+
 		basic = skb_flow_dissector_target(flow->dissector,
 						  FLOW_DISSECTOR_KEY_BASIC,
 						  target);
@@ -203,6 +192,28 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
 	}
 }
 
+static void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
+			struct tc_cls_flower_offload *flow,
+			bool mask_version)
+{
+	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_dissector_key_ipv4_addrs *addr;
+
+	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
+
+	if (dissector_uses_key(flow->dissector,
+			       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		addr = skb_flow_dissector_target(flow->dissector,
+						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						 target);
+		frame->ipv4_src = addr->src;
+		frame->ipv4_dst = addr->dst;
+	}
+
+	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
+}
+
 static void
 nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 			struct tc_cls_flower_offload *flow,
@@ -210,7 +221,6 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 {
 	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
 	struct flow_dissector_key_ipv6_addrs *addr;
-	struct flow_dissector_key_basic *basic;
 
 	memset(frame, 0, sizeof(struct nfp_flower_ipv6));
 
@@ -223,22 +233,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
 		frame->ipv6_dst = addr->dst;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		basic = skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  target);
-		frame->proto = basic->ip_proto;
-	}
-
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *flow_ip;
-
-		flow_ip = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_IP,
-						    target);
-		frame->tos = flow_ip->tos;
-		frame->ttl = flow_ip->ttl;
-	}
+	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
 }
 
 static void

From 71ea5343a01cad36d7c44bea07a79dd96049e6c0 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Mon, 26 Mar 2018 10:16:38 +0200
Subject: [PATCH 1234/1678] nfp: flower: implement ip fragmentation match
 offload

Implement ip fragmentation match offloading for both IPv4 and IPv6. Allows
offloading frag, nofrag, first and nofirstfrag classification.

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/cmsg.h  |  3 +++
 drivers/net/ethernet/netronome/nfp/flower/match.c | 12 ++++++++++++
 .../net/ethernet/netronome/nfp/flower/offload.c   | 15 +++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index db32f129aded..3f46d836d1b8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -61,6 +61,9 @@
 #define NFP_FLOWER_MASK_MPLS_BOS	BIT(8)
 #define NFP_FLOWER_MASK_MPLS_Q		BIT(0)
 
+#define NFP_FL_IP_FRAG_FIRST		BIT(7)
+#define NFP_FL_IP_FRAGMENTED		BIT(6)
+
 /* Compressed HW representation of TCP Flags */
 #define NFP_FL_TCP_FLAG_URG		BIT(4)
 #define NFP_FL_TCP_FLAG_PSH		BIT(3)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 26110670ba13..91935405f586 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -190,6 +190,18 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *frame,
 		if (tcp_flags & TCPHDR_URG)
 			frame->flags |= NFP_FL_TCP_FLAG_URG;
 	}
+
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key;
+
+		key = skb_flow_dissector_target(flow->dissector,
+						FLOW_DISSECTOR_KEY_CONTROL,
+						target);
+		if (key->flags & FLOW_DIS_IS_FRAGMENT)
+			frame->flags |= NFP_FL_IP_FRAGMENTED;
+		if (key->flags & FLOW_DIS_FIRST_FRAG)
+			frame->flags |= NFP_FL_IP_FRAG_FIRST;
+	}
 }
 
 static void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index f3586c519805..114d2ab02a38 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -48,6 +48,10 @@
 	(TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \
 	 TCPHDR_PSH | TCPHDR_URG)
 
+#define NFP_FLOWER_SUPPORTED_CTLFLAGS \
+	(FLOW_DIS_IS_FRAGMENT | \
+	 FLOW_DIS_FIRST_FRAG)
+
 #define NFP_FLOWER_WHITELIST_DISSECTOR \
 	(BIT(FLOW_DISSECTOR_KEY_CONTROL) | \
 	 BIT(FLOW_DISSECTOR_KEY_BASIC) | \
@@ -322,6 +326,17 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key_ctl;
+
+		key_ctl = skb_flow_dissector_target(flow->dissector,
+						    FLOW_DISSECTOR_KEY_CONTROL,
+						    flow->key);
+
+		if (key_ctl->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS)
+			return -EOPNOTSUPP;
+	}
+
 	ret_key_ls->key_layer = key_layer;
 	ret_key_ls->key_layer_two = key_layer_two;
 	ret_key_ls->key_size = key_size;

From 855aeba34047f53e3665b0c3bcac80fe87ee2f7b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Mar 2018 12:28:47 +0300
Subject: [PATCH 1235/1678] net: Convert rpcsec_gss_net_ops

These pernet_operations initialize and destroy sunrpc_net_id
refered per-net items. Only used global list is cache_list,
and accesses already serialized.

sunrpc_destroy_cache_detail() check for list_empty() without
cache_list_lock, but when it's called from unregister_pernet_subsys(),
there can't be callers in parallel, so we won't miss list_empty()
in this case.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Anna Schumaker <Anna.Schumaker@netapp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/auth_gss/auth_gss.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 9463af4b32e8..44f939cb6bc8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2063,6 +2063,7 @@ static __net_exit void rpcsec_gss_exit_net(struct net *net)
 static struct pernet_operations rpcsec_gss_net_ops = {
 	.init = rpcsec_gss_init_net,
 	.exit = rpcsec_gss_exit_net,
+	.async = true,
 };
 
 /*

From 5e804a6077dccf154047a1ffe5b5232dce579659 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Mar 2018 12:28:55 +0300
Subject: [PATCH 1236/1678] net: Convert sunrpc_net_ops

These pernet_operations look similar to rpcsec_gss_net_ops,
they just create and destroy another caches. So, they also
can be async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Anna Schumaker <Anna.Schumaker@netapp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/sunrpc_syms.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 56f9eff74150..68287e921847 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -79,6 +79,7 @@ static struct pernet_operations sunrpc_net_ops = {
 	.exit = sunrpc_exit_net,
 	.id = &sunrpc_net_id,
 	.size = sizeof(struct sunrpc_net),
+	.async = true,
 };
 
 static int __init

From 436de500948e1176ef013468c2630f83bb72a901 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Mar 2018 12:29:04 +0300
Subject: [PATCH 1237/1678] net: Convert nfs4_dns_resolver_ops

These pernet_operations look similar to rpcsec_gss_net_ops,
they just create and destroy another cache. Also they create
and destroy directory. So, they also look safe to be async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Anna Schumaker <Anna.Schumaker@netapp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nfs/dns_resolve.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 060c658eab66..e90bd69ab653 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -410,6 +410,7 @@ static void nfs4_dns_net_exit(struct net *net)
 static struct pernet_operations nfs4_dns_resolver_ops = {
 	.init = nfs4_dns_net_init,
 	.exit = nfs4_dns_net_exit,
+	.async = true,
 };
 
 static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,

From dbf7bb4437260605fffcaff9aad3514163209db1 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Mon, 26 Mar 2018 12:29:13 +0300
Subject: [PATCH 1238/1678] net: Convert nfs4blocklayout_net_ops

These pernet_operations create and destroy per-net pipe
and dentry, and they seem safe to be marked as async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Anna Schumaker <Anna.Schumaker@netapp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nfs/blocklayout/rpc_pipefs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index 9fb067a6f7e0..ef9fa111b009 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -261,6 +261,7 @@ static void nfs4blocklayout_net_exit(struct net *net)
 static struct pernet_operations nfs4blocklayout_net_ops = {
 	.init = nfs4blocklayout_net_init,
 	.exit = nfs4blocklayout_net_exit,
+	.async = true,
 };
 
 int __init bl_init_pipefs(void)

From bc67a0daf8f3bc6fa8fcb68090f3c444de7f951c Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:31 +0300
Subject: [PATCH 1239/1678] ipmr: Make vif fib notifiers common

The fib-notifiers are tightly coupled with the vif_device which is
already common. Move the notifier struct definition and helpers to the
common file; Currently they're only used by ipmr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h      |  8 ------
 include/linux/mroute_base.h | 53 +++++++++++++++++++++++++++++++++++++
 net/ipv4/ipmr.c             | 31 ++++------------------
 3 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 7ed82e4f11b3..3f70a04a5879 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -55,14 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule)
 }
 #endif
 
-struct vif_entry_notifier_info {
-	struct fib_notifier_info info;
-	struct net_device *dev;
-	vifi_t vif_index;
-	unsigned short vif_flags;
-	u32 tb_id;
-};
-
 #define VIFF_STATIC 0x8000
 
 struct mfc_cache_cmp_arg {
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index c2560cb50f1d..23326f5402f3 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -6,6 +6,7 @@
 #include <linux/spinlock.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <net/fib_notifier.h>
 
 /**
  * struct vif_device - interface representor for multicast routing
@@ -36,6 +37,58 @@ struct vif_device {
 	__be32 local, remote;
 };
 
+struct vif_entry_notifier_info {
+	struct fib_notifier_info info;
+	struct net_device *dev;
+	unsigned short vif_index;
+	unsigned short vif_flags;
+	u32 tb_id;
+};
+
+static inline int mr_call_vif_notifier(struct notifier_block *nb,
+				       struct net *net,
+				       unsigned short family,
+				       enum fib_event_type event_type,
+				       struct vif_device *vif,
+				       unsigned short vif_index, u32 tb_id)
+{
+	struct vif_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.dev = vif->dev,
+		.vif_index = vif_index,
+		.vif_flags = vif->flags,
+		.tb_id = tb_id,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static inline int mr_call_vif_notifiers(struct net *net,
+					unsigned short family,
+					enum fib_event_type event_type,
+					struct vif_device *vif,
+					unsigned short vif_index, u32 tb_id,
+					unsigned int *ipmr_seq)
+{
+	struct vif_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.dev = vif->dev,
+		.vif_index = vif_index,
+		.vif_flags = vif->flags,
+		.tb_id = tb_id,
+	};
+
+	ASSERT_RTNL();
+	(*ipmr_seq)++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
 #ifndef MAXVIFS
 /* This one is nasty; value is defined in uapi using different symbols for
  * mroute and morute6 but both map into same 32.
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f6be5db16da2..bb1a0655f8e4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -650,18 +650,8 @@ static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
 					struct vif_device *vif,
 					vifi_t vif_index, u32 tb_id)
 {
-	struct vif_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.dev = vif->dev,
-		.vif_index = vif_index,
-		.vif_flags = vif->flags,
-		.tb_id = tb_id,
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return mr_call_vif_notifier(nb, net, RTNL_FAMILY_IPMR, event_type,
+				    vif, vif_index, tb_id);
 }
 
 static int call_ipmr_vif_entry_notifiers(struct net *net,
@@ -669,20 +659,9 @@ static int call_ipmr_vif_entry_notifiers(struct net *net,
 					 struct vif_device *vif,
 					 vifi_t vif_index, u32 tb_id)
 {
-	struct vif_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.dev = vif->dev,
-		.vif_index = vif_index,
-		.vif_flags = vif->flags,
-		.tb_id = tb_id,
-	};
-
-	ASSERT_RTNL();
-	net->ipv4.ipmr_seq++;
-	return call_fib_notifiers(net, event_type, &info.info);
+	return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+				     vif, vif_index, tb_id,
+				     &net->ipv4.ipmr_seq);
 }
 
 static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,

From 54c4cad97b8fd414909b78d4274a6797baa52b3b Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:32 +0300
Subject: [PATCH 1240/1678] ipmr: Make MFC fib notifiers common

Like vif notifications, move the notifier struct for MFC as well as its
helpers into a common file; Currently they're only used by ipmr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 13 +++---
 include/linux/mroute.h                        |  6 ---
 include/linux/mroute_base.h                   | 44 +++++++++++++++++++
 net/ipv4/ipmr.c                               | 26 ++---------
 4 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 921bd1075edf..8d067de924cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5391,7 +5391,9 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
 
-	return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
+	return mlxsw_sp_mr_route4_add(vr->mr4_table,
+				      (struct mfc_cache *) men_info->mfc,
+				      replace);
 }
 
 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
@@ -5406,7 +5408,8 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
+	mlxsw_sp_mr_route4_del(vr->mr4_table,
+			       (struct mfc_cache *) men_info->mfc);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
@@ -5682,11 +5685,11 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
 						replace);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		ipmr_cache_put(fib_work->men_info.mfc);
+		ipmr_cache_put((struct mfc_cache *) fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
-		ipmr_cache_put(fib_work->men_info.mfc);
+		ipmr_cache_put((struct mfc_cache *) fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_VIF_ADD:
 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
@@ -5766,7 +5769,7 @@ mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
-		ipmr_cache_hold(fib_work->men_info.mfc);
+		ipmr_cache_hold((struct mfc_cache *) fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_VIF_ADD: /* fall through */
 	case FIB_EVENT_VIF_DEL:
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 3f70a04a5879..c855d80b51f7 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -80,12 +80,6 @@ struct mfc_cache {
 	};
 };
 
-struct mfc_entry_notifier_info {
-	struct fib_notifier_info info;
-	struct mfc_cache *mfc;
-	u32 tb_id;
-};
-
 struct rtmsg;
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 23326f5402f3..2c594686c05e 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -152,6 +152,50 @@ struct mr_mfc {
 	struct rcu_head	rcu;
 };
 
+struct mfc_entry_notifier_info {
+	struct fib_notifier_info info;
+	struct mr_mfc *mfc;
+	u32 tb_id;
+};
+
+static inline int mr_call_mfc_notifier(struct notifier_block *nb,
+				       struct net *net,
+				       unsigned short family,
+				       enum fib_event_type event_type,
+				       struct mr_mfc *mfc, u32 tb_id)
+{
+	struct mfc_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.mfc = mfc,
+		.tb_id = tb_id
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static inline int mr_call_mfc_notifiers(struct net *net,
+					unsigned short family,
+					enum fib_event_type event_type,
+					struct mr_mfc *mfc, u32 tb_id,
+					unsigned int *ipmr_seq)
+{
+	struct mfc_entry_notifier_info info = {
+		.info = {
+			.family = family,
+			.net = net,
+		},
+		.mfc = mfc,
+		.tb_id = tb_id
+	};
+
+	ASSERT_RTNL();
+	(*ipmr_seq)++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
 struct mr_table;
 
 /**
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index bb1a0655f8e4..470956d2d8ad 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -669,34 +669,16 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
 					enum fib_event_type event_type,
 					struct mfc_cache *mfc, u32 tb_id)
 {
-	struct mfc_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.mfc = mfc,
-		.tb_id = tb_id
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return mr_call_mfc_notifier(nb, net, RTNL_FAMILY_IPMR,
+				    event_type, &mfc->_c, tb_id);
 }
 
 static int call_ipmr_mfc_entry_notifiers(struct net *net,
 					 enum fib_event_type event_type,
 					 struct mfc_cache *mfc, u32 tb_id)
 {
-	struct mfc_entry_notifier_info info = {
-		.info = {
-			.family = RTNL_FAMILY_IPMR,
-			.net = net,
-		},
-		.mfc = mfc,
-		.tb_id = tb_id
-	};
-
-	ASSERT_RTNL();
-	net->ipv4.ipmr_seq++;
-	return call_fib_notifiers(net, event_type, &info.info);
+	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
+				     &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
 }
 
 /**

From cdc9f9443b5c3a61c7cec807965054ee1fd29acf Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:33 +0300
Subject: [PATCH 1241/1678] ipmr: Make ipmr_dump() common

Since all the primitive elements used for the notification done by ipmr
are now common [mr_table, mr_mfc, vif_device] we can refactor the logic
for dumping them to a common file.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 18 +++++++++++++
 net/ipv4/ipmr.c             | 53 ++-----------------------------------
 net/ipv4/ipmr_base.c        | 42 +++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 2c594686c05e..289eb5aa7b5d 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -277,6 +277,13 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 				 u32 portid, u32 seq, struct mr_mfc *c,
 				 int cmd, int flags),
 		     spinlock_t *lock);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+	    int (*rules_dump)(struct net *net,
+			      struct notifier_block *nb),
+	    struct mr_table *(*mr_iter)(struct net *net,
+					struct mr_table *mrt),
+	    rwlock_t *mrt_lock);
 #else
 static inline void vif_device_init(struct vif_device *v,
 				   struct net_device *dev,
@@ -333,6 +340,17 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	return -EINVAL;
 }
+
+static inline int mr_dump(struct net *net, struct notifier_block *nb,
+			  unsigned short family,
+			  int (*rules_dump)(struct net *net,
+					    struct notifier_block *nb),
+			  struct mr_table *(*mr_iter)(struct net *net,
+						      struct mr_table *mrt),
+			  rwlock_t *mrt_lock)
+{
+	return -EINVAL;
+}
 #endif
 
 static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 470956d2d8ad..24c340021aba 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -644,16 +644,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 }
 #endif
 
-static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
-					struct net *net,
-					enum fib_event_type event_type,
-					struct vif_device *vif,
-					vifi_t vif_index, u32 tb_id)
-{
-	return mr_call_vif_notifier(nb, net, RTNL_FAMILY_IPMR, event_type,
-				    vif, vif_index, tb_id);
-}
-
 static int call_ipmr_vif_entry_notifiers(struct net *net,
 					 enum fib_event_type event_type,
 					 struct vif_device *vif,
@@ -664,15 +654,6 @@ static int call_ipmr_vif_entry_notifiers(struct net *net,
 				     &net->ipv4.ipmr_seq);
 }
 
-static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
-					struct net *net,
-					enum fib_event_type event_type,
-					struct mfc_cache *mfc, u32 tb_id)
-{
-	return mr_call_mfc_notifier(nb, net, RTNL_FAMILY_IPMR,
-				    event_type, &mfc->_c, tb_id);
-}
-
 static int call_ipmr_mfc_entry_notifiers(struct net *net,
 					 enum fib_event_type event_type,
 					 struct mfc_cache *mfc, u32 tb_id)
@@ -2950,38 +2931,8 @@ static unsigned int ipmr_seq_read(struct net *net)
 
 static int ipmr_dump(struct net *net, struct notifier_block *nb)
 {
-	struct mr_table *mrt;
-	int err;
-
-	err = ipmr_rules_dump(net, nb);
-	if (err)
-		return err;
-
-	ipmr_for_each_table(mrt, net) {
-		struct vif_device *v = &mrt->vif_table[0];
-		struct mr_mfc *mfc;
-		int vifi;
-
-		/* Notifiy on table VIF entries */
-		read_lock(&mrt_lock);
-		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
-			if (!v->dev)
-				continue;
-
-			call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
-						     v, vifi, mrt->id);
-		}
-		read_unlock(&mrt_lock);
-
-		/* Notify on table MFC entries */
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
-			call_ipmr_mfc_entry_notifier(nb, net,
-						     FIB_EVENT_ENTRY_ADD,
-						     (struct mfc_cache *)mfc,
-						     mrt->id);
-	}
-
-	return 0;
+	return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
+		       ipmr_mr_table_iter, &mrt_lock);
 }
 
 static const struct fib_notifier_ops ipmr_notifier_ops_template = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 8ba55bfda817..4fe97723b53f 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -321,3 +321,45 @@ done:
 	return skb->len;
 }
 EXPORT_SYMBOL(mr_rtm_dumproute);
+
+int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
+	    int (*rules_dump)(struct net *net,
+			      struct notifier_block *nb),
+	    struct mr_table *(*mr_iter)(struct net *net,
+					struct mr_table *mrt),
+	    rwlock_t *mrt_lock)
+{
+	struct mr_table *mrt;
+	int err;
+
+	err = rules_dump(net, nb);
+	if (err)
+		return err;
+
+	for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
+		struct vif_device *v = &mrt->vif_table[0];
+		struct mr_mfc *mfc;
+		int vifi;
+
+		/* Notifiy on table VIF entries */
+		read_lock(mrt_lock);
+		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+			if (!v->dev)
+				continue;
+
+			mr_call_vif_notifier(nb, net, family,
+					     FIB_EVENT_VIF_ADD,
+					     v, vifi, mrt->id);
+		}
+		read_unlock(mrt_lock);
+
+		/* Notify on table MFC entries */
+		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+			mr_call_mfc_notifier(nb, net, family,
+					     FIB_EVENT_ENTRY_ADD,
+					     mfc, mrt->id);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mr_dump);

From 088aa3eec2ce340b5d0f0f54430f5706223d5e45 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:34 +0300
Subject: [PATCH 1242/1678] ip6mr: Support fib notifications

In similar fashion to ipmr, support fib notifications for ip6mr mfc and
vif related events. This would later allow drivers to react to said
notifications and offload the IPv6 mroutes.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |   2 +
 net/ipv6/ip6mr.c         | 112 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5b51110435fc..c29f09cfc9d7 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -96,6 +96,8 @@ struct netns_ipv6 {
 	atomic_t		fib6_sernum;
 	struct seg6_pernet_data *seg6_data;
 	struct fib_notifier_ops	*notifier_ops;
+	struct fib_notifier_ops	*ip6mr_notifier_ops;
+	unsigned int ipmr_seq; /* protected by rtnl_mutex */
 	struct {
 		struct hlist_head head;
 		spinlock_t	lock;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7345bd6c4b7d..0be2f333e168 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -258,6 +258,16 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 	rtnl_unlock();
 }
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
+}
 #else
 #define ip6mr_for_each_table(mrt, net) \
 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
@@ -295,6 +305,16 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 	net->ipv6.mrt6 = NULL;
 	rtnl_unlock();
 }
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+	return 0;
+}
 #endif
 
 static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -653,10 +673,25 @@ failure:
 }
 #endif
 
-/*
- *	Delete a VIF entry
- */
+static int call_ip6mr_vif_entry_notifiers(struct net *net,
+					  enum fib_event_type event_type,
+					  struct vif_device *vif,
+					  mifi_t vif_index, u32 tb_id)
+{
+	return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+				     vif, vif_index, tb_id,
+				     &net->ipv6.ipmr_seq);
+}
 
+static int call_ip6mr_mfc_entry_notifiers(struct net *net,
+					  enum fib_event_type event_type,
+					  struct mfc6_cache *mfc, u32 tb_id)
+{
+	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+				     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
+}
+
+/* Delete a VIF entry */
 static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 		       struct list_head *head)
 {
@@ -669,6 +704,11 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 
 	v = &mrt->vif_table[vifi];
 
+	if (VIF_EXISTS(mrt, vifi))
+		call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+					       FIB_EVENT_VIF_DEL, v, vifi,
+					       mrt->id);
+
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
 	v->dev = NULL;
@@ -887,6 +927,8 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
 	if (vifi + 1 > mrt->maxvif)
 		mrt->maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
+	call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
+				       v, vifi, mrt->id);
 	return 0;
 }
 
@@ -1175,6 +1217,8 @@ static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
 	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
 	list_del_rcu(&c->_c.list);
 
+	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
 	ip6mr_cache_free(c);
 	return 0;
@@ -1203,21 +1247,63 @@ static int ip6mr_device_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
+static unsigned int ip6mr_seq_read(struct net *net)
+{
+	ASSERT_RTNL();
+
+	return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+}
+
+static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+{
+	return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
+		       ip6mr_mr_table_iter, &mrt_lock);
+}
+
 static struct notifier_block ip6_mr_notifier = {
 	.notifier_call = ip6mr_device_event
 };
 
-/*
- *	Setup for IP multicast routing
- */
+static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
+	.family		= RTNL_FAMILY_IP6MR,
+	.fib_seq_read	= ip6mr_seq_read,
+	.fib_dump	= ip6mr_dump,
+	.owner		= THIS_MODULE,
+};
 
+static int __net_init ip6mr_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	net->ipv6.ipmr_seq = 0;
+
+	ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	net->ipv6.ip6mr_notifier_ops = ops;
+
+	return 0;
+}
+
+static void __net_exit ip6mr_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
+	net->ipv6.ip6mr_notifier_ops = NULL;
+}
+
+/* Setup for IP multicast routing */
 static int __net_init ip6mr_net_init(struct net *net)
 {
 	int err;
 
+	err = ip6mr_notifier_init(net);
+	if (err)
+		return err;
+
 	err = ip6mr_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto ip6mr_rules_fail;
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -1235,7 +1321,8 @@ proc_cache_fail:
 proc_vif_fail:
 	ip6mr_rules_exit(net);
 #endif
-fail:
+ip6mr_rules_fail:
+	ip6mr_notifier_exit(net);
 	return err;
 }
 
@@ -1246,6 +1333,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	remove_proc_entry("ip6_mr_vif", net->proc_net);
 #endif
 	ip6mr_rules_exit(net);
+	ip6mr_notifier_exit(net);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1337,6 +1425,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 		if (!mrtsock)
 			c->_c.mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
+		call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+					       c, mrt->id);
 		mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 		return 0;
 	}
@@ -1388,6 +1478,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
 		ip6mr_cache_resolve(net, mrt, uc, c);
 		ip6mr_cache_free(uc);
 	}
+	call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+				       c, mrt->id);
 	mr6_netlink_event(mrt, c, RTM_NEWROUTE);
 	return 0;
 }
@@ -1424,6 +1516,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 		spin_lock_bh(&mfc_unres_lock);
 		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
+			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+						       FIB_EVENT_ENTRY_DEL,
+						       (struct mfc6_cache *)c,
+						       mrt->id);
 			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
 					  RTM_DELROUTE);
 			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);

From d3c07e5b9939a055fa017f200e535ae947eb22ab Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:35 +0300
Subject: [PATCH 1243/1678] ip6mr: Add API for default_rule fib

Add the ability to discern whether a given FIB rule notification relates
to the default rule inserted when registering ip6mr or a different one.

Would later be used by drivers wishing to offload ipv6 multicast routes
but unable to offload rules other than the default one.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute6.h | 10 ++++++++++
 net/ipv6/ip6mr.c        |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 1ac38e6819f5..c4a45859f586 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -8,6 +8,7 @@
 #include <net/net_namespace.h>
 #include <uapi/linux/mroute6.h>
 #include <linux/mroute_base.h>
+#include <net/fib_rules.h>
 
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
@@ -63,6 +64,15 @@ static inline void ip6_mr_cleanup(void)
 }
 #endif
 
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+bool ip6mr_rule_default(const struct fib_rule *rule);
+#else
+static inline bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+	return true;
+}
+#endif
+
 #define VIFF_STATIC 0x8000
 
 struct mfc6_cache_cmp_arg {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0be2f333e168..a187c523a95f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -268,6 +268,13 @@ static unsigned int ip6mr_rules_seq_read(struct net *net)
 {
 	return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 }
+
+bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+	return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
+	       rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
+}
+EXPORT_SYMBOL(ip6mr_rule_default);
 #else
 #define ip6mr_for_each_table(mrt, net) \
 	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)

From 8c13af2a219c6498071b30ea558438c74267ae4d Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:36 +0300
Subject: [PATCH 1244/1678] ip6mr: Add refcounting to mfc

Since ipmr and ip6mr are using the same mr_mfc struct at their core, we
can now refactor the ipmr_cache_{hold,put} logic and apply refcounting
to both ipmr and ip6mr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.c |  6 +++---
 .../ethernet/mellanox/mlxsw/spectrum_router.c |  6 +++---
 include/linux/mroute.h                        | 19 -------------------
 include/linux/mroute_base.h                   | 13 +++++++++++++
 net/ipv4/ipmr.c                               |  8 ++++----
 net/ipv6/ip6mr.c                              |  6 ++++--
 6 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index 978a3c70653a..51b104ae2eec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -360,7 +360,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 
 	/* Find min_mtu and link iVIF and eVIFs */
 	mr_route->min_mtu = ETH_MAX_MTU;
-	ipmr_cache_hold(mfc);
+	mr_cache_hold(&mfc->_c);
 	mr_route->mfc4 = mfc;
 	mr_route->mr_table = mr_table;
 	for (i = 0; i < MAXVIFS; i++) {
@@ -380,7 +380,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
 	return mr_route;
 err:
-	ipmr_cache_put(mfc);
+	mr_cache_put(&mfc->_c);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
@@ -393,7 +393,7 @@ static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
 	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
 
 	mlxsw_sp_mr_route_ivif_unlink(mr_route);
-	ipmr_cache_put(mr_route->mfc4);
+	mr_cache_put((struct mr_mfc *)mr_route->mfc4);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 8d067de924cf..be241c708bb9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5685,11 +5685,11 @@ static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
 						replace);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
-		ipmr_cache_put((struct mfc_cache *) fib_work->men_info.mfc);
+		mr_cache_put(fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
 		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
-		ipmr_cache_put((struct mfc_cache *) fib_work->men_info.mfc);
+		mr_cache_put(fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_VIF_ADD:
 		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
@@ -5769,7 +5769,7 @@ mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
 	case FIB_EVENT_ENTRY_ADD: /* fall through */
 	case FIB_EVENT_ENTRY_DEL:
 		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
-		ipmr_cache_hold((struct mfc_cache *) fib_work->men_info.mfc);
+		mr_cache_hold(fib_work->men_info.mfc);
 		break;
 	case FIB_EVENT_VIF_ADD: /* fall through */
 	case FIB_EVENT_VIF_DEL:
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index c855d80b51f7..9a36fad9e068 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -84,23 +84,4 @@ struct rtmsg;
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
 		   struct rtmsg *rtm, u32 portid);
-
-#ifdef CONFIG_IP_MROUTE
-void ipmr_cache_free(struct mfc_cache *mfc_cache);
-#else
-static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
-{
-}
-#endif
-
-static inline void ipmr_cache_put(struct mfc_cache *c)
-{
-	if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount))
-		ipmr_cache_free(c);
-}
-static inline void ipmr_cache_hold(struct mfc_cache *c)
-{
-	refcount_inc(&c->_c.mfc_un.res.refcount);
-}
-
 #endif
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 289eb5aa7b5d..d617fe45543e 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -125,6 +125,7 @@ enum {
  * @refcount: reference count for this entry
  * @list: global entry list
  * @rcu: used for entry destruction
+ * @free: Operation used for freeing an entry under RCU
  */
 struct mr_mfc {
 	struct rhlist_head mnode;
@@ -150,8 +151,20 @@ struct mr_mfc {
 	} mfc_un;
 	struct list_head list;
 	struct rcu_head	rcu;
+	void (*free)(struct rcu_head *head);
 };
 
+static inline void mr_cache_put(struct mr_mfc *c)
+{
+	if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+		call_rcu(&c->rcu, c->free);
+}
+
+static inline void mr_cache_hold(struct mr_mfc *c)
+{
+	refcount_inc(&c->mfc_un.res.refcount);
+}
+
 struct mfc_entry_notifier_info {
 	struct fib_notifier_info info;
 	struct mr_mfc *mfc;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 24c340021aba..e79211a8537c 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -732,11 +732,10 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
 	kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
 }
 
-void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free(struct mfc_cache *c)
 {
 	call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
 }
-EXPORT_SYMBOL(ipmr_cache_free);
 
 /* Destroy an unresolved cache entry, killing queued skbs
  * and reporting error to netlink readers.
@@ -987,6 +986,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 	if (c) {
 		c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 		c->_c.mfc_un.res.minvif = MAXVIFS;
+		c->_c.free = ipmr_cache_free_rcu;
 		refcount_set(&c->_c.mfc_un.res.refcount, 1);
 	}
 	return c;
@@ -1206,7 +1206,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
 	list_del_rcu(&c->_c.list);
 	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mroute_netlink_event(mrt, c, RTM_DELROUTE);
-	ipmr_cache_put(c);
+	mr_cache_put(&c->_c);
 
 	return 0;
 }
@@ -1318,7 +1318,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
 					      mrt->id);
 		mroute_netlink_event(mrt, cache, RTM_DELROUTE);
-		ipmr_cache_put(cache);
+		mr_cache_put(c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a187c523a95f..1c8fa29d155a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -989,6 +989,8 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
 		return NULL;
 	c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 	c->_c.mfc_un.res.minvif = MAXMIFS;
+	c->_c.free = ip6mr_cache_free_rcu;
+	refcount_set(&c->_c.mfc_un.res.refcount, 1);
 	return c;
 }
 
@@ -1227,7 +1229,7 @@ static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
 	call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
 				       FIB_EVENT_ENTRY_DEL, c, mrt->id);
 	mr6_netlink_event(mrt, c, RTM_DELROUTE);
-	ip6mr_cache_free(c);
+	mr_cache_put(&c->_c);
 	return 0;
 }
 
@@ -1516,7 +1518,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
 		list_del_rcu(&c->list);
 		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
-		ip6mr_cache_free((struct mfc6_cache *)c);
+		mr_cache_put(c);
 	}
 
 	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {

From 9a3d183ad6079997d6f11c0294c2e0f12cface94 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:37 +0300
Subject: [PATCH 1245/1678] mlxsw: reg: Configure RIF to forward IPv6 multicast
 packets

Similarly to what was done in commit 4af5964e5888 ("mlxsw: reg:
Configure RIF to forward IPv4 multicast packets by default") we now set
two additional bits to allow IPv6 multicast forwarding.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index e002398364c8..8397bed1f372 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4225,6 +4225,12 @@ MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1);
 
+/* reg_ritr_ipv6_mc
+ * IPv6 multicast routing enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_mc, 0x00, 26, 1);
+
 enum mlxsw_reg_ritr_if_type {
 	/* VLAN interface. */
 	MLXSW_REG_RITR_VLAN_IF,
@@ -4290,6 +4296,14 @@ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
  */
 MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1);
 
+/* reg_ritr_ipv6_mc_fe
+ * IPv6 Multicast Forwarding Enable.
+ * When disabled, forwarding is blocked but local traffic (traps and IP to me)
+ * will be enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv6_mc_fe, 0x04, 26, 1);
+
 /* reg_ritr_lb_en
  * Loop-back filter enable for unicast packets.
  * If the flag is set then loop-back filter for unicast packets is
@@ -4513,12 +4527,14 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 	mlxsw_reg_ritr_ipv4_set(payload, 1);
 	mlxsw_reg_ritr_ipv6_set(payload, 1);
 	mlxsw_reg_ritr_ipv4_mc_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_mc_set(payload, 1);
 	mlxsw_reg_ritr_type_set(payload, type);
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
 	mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
 	mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_mc_fe_set(payload, 1);
 	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);

From a82b1b8fc07cd0715b034d49379de964be464abc Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:38 +0300
Subject: [PATCH 1246/1678] mlxsw: reg: Add register settings for IPv6
 multicast routing

Add new fields for the rmft register necessary for setting the IPv6
multicast FIB table. Add a matching wrapper function for filling
the register in the IPv6 scenario.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 58 +++++++++++++++++------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8397bed1f372..6218231e379e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -6318,30 +6318,34 @@ MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1);
  */
 MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16);
 
-/* reg_rmft2_dip4
- * Destination IPv4 address
+/* reg_rmft2_dip{4,6}
+ * Destination IPv4/6 address
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, dip6, 0x10, 16);
 MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32);
 
-/* reg_rmft2_dip4_mask
+/* reg_rmft2_dip{4,6}_mask
  * A bit that is set directs the TCAM to compare the corresponding bit in key. A
  * bit that is clear directs the TCAM to ignore the corresponding bit in key.
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, dip6_mask, 0x20, 16);
 MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32);
 
-/* reg_rmft2_sip4
- * Source IPv4 address
+/* reg_rmft2_sip{4,6}
+ * Source IPv4/6 address
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, sip6, 0x30, 16);
 MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32);
 
-/* reg_rmft2_sip4_mask
+/* reg_rmft2_sip{4,6}_mask
  * A bit that is set directs the TCAM to compare the corresponding bit in key. A
  * bit that is clear directs the TCAM to ignore the corresponding bit in key.
  * Access: RW
  */
+MLXSW_ITEM_BUF(reg, rmft2, sip6_mask, 0x40, 16);
 MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32);
 
 /* reg_rmft2_flexible_action_set
@@ -6359,26 +6363,52 @@ MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80,
 	       MLXSW_REG_FLEX_ACTION_SET_LEN);
 
 static inline void
-mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
-			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
-			  u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
-			  const char *flexible_action_set)
+mlxsw_reg_rmft2_common_pack(char *payload, bool v, u16 offset,
+			    u16 virtual_router,
+			    enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			    const char *flex_action_set)
 {
 	MLXSW_REG_ZERO(rmft2, payload);
 	mlxsw_reg_rmft2_v_set(payload, v);
-	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
 	mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE);
 	mlxsw_reg_rmft2_offset_set(payload, offset);
 	mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router);
 	mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask);
 	mlxsw_reg_rmft2_irif_set(payload, irif);
+	if (flex_action_set)
+		mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
+							      flex_action_set);
+}
+
+static inline void
+mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			  u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
+			  const char *flexible_action_set)
+{
+	mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router,
+				    irif_mask, irif, flexible_action_set);
+	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
 	mlxsw_reg_rmft2_dip4_set(payload, dip4);
 	mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask);
 	mlxsw_reg_rmft2_sip4_set(payload, sip4);
 	mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask);
-	if (flexible_action_set)
-		mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
-							      flexible_action_set);
+}
+
+static inline void
+mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+			  enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+			  struct in6_addr dip6, struct in6_addr dip6_mask,
+			  struct in6_addr sip6, struct in6_addr sip6_mask,
+			  const char *flexible_action_set)
+{
+	mlxsw_reg_rmft2_common_pack(payload, v, offset, virtual_router,
+				    irif_mask, irif, flexible_action_set);
+	mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV6);
+	mlxsw_reg_rmft2_dip6_memcpy_to(payload, (void *)&dip6);
+	mlxsw_reg_rmft2_dip6_mask_memcpy_to(payload, (void *)&dip6_mask);
+	mlxsw_reg_rmft2_sip6_memcpy_to(payload, (void *)&sip6);
+	mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask);
 }
 
 /* MFCR - Management Fan Control Register

From a3b66866a71434f3804eba390e9dc343c5c79f3f Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:39 +0300
Subject: [PATCH 1247/1678] mlxsw: spectrum_mr: Pass protocol as part of
 catchall route params

Since commit c011ec1bbfd6 ("mlxsw: spectrum: Add the multicast routing
offloading logic") spectrum_mr did not populate the protocol portion of
the catcahall_route_params; mr-tcam logic worked correctly for ipv4
since the enum value for MLXSW_SP_L3_PROTO_IPV4 is '0'.

Explicitly fill the protocol as we'll soon need to differentiate between
ipv4 and ipv6.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index 51b104ae2eec..a8f4927ff150 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -848,6 +848,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 		.prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
 		.key = {
 			.vrid = vr_id,
+			.proto = proto,
 		},
 		.value = {
 			.route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,

From 9742f866bd82f6a7fc3dafa538bd0fa2a73a1e5f Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:40 +0300
Subject: [PATCH 1248/1678] mlxsw: spectrum_router: Support IPv6 multicast to
 host CPU

A step toward offloading IPv6 routing, this adds an additional
multicast routing table meant for IPv6 [with its underlying TCAM
region] and populates the default rule for IPv6 multicast packets.

Following this, ingress IPv6 multicast packets would be trapped and
delivered to the host CPU.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/spectrum_mr_tcam.c         | 105 ++++++++++++------
 .../ethernet/mellanox/mlxsw/spectrum_router.c |  69 ++++++++----
 .../ethernet/mellanox/mlxsw/spectrum_router.h |   1 +
 3 files changed, 121 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
index 4c7f32d4288d..4f4c0d311883 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -51,7 +51,7 @@ struct mlxsw_sp_mr_tcam_region {
 };
 
 struct mlxsw_sp_mr_tcam {
-	struct mlxsw_sp_mr_tcam_region ipv4_tcam_region;
+	struct mlxsw_sp_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
 };
 
 /* This struct maps to one RIGR2 register entry */
@@ -316,20 +316,37 @@ static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
 					  mlxsw_afa_block_first_set(afa_block));
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-	default:
-		WARN_ON_ONCE(1);
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  key->group.addr6,
+					  key->group_mask.addr6,
+					  key->source.addr6,
+					  key->source_mask.addr6,
+					  mlxsw_afa_block_first_set(afa_block));
 	}
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
 }
 
 static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
+					 struct mlxsw_sp_mr_route_key *key,
 					 struct parman_item *parman_item)
 {
+	struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
 	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
 
-	mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid,
-				  0, 0, 0, 0, 0, 0, NULL);
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
+					  vrid, 0, 0, 0, 0, 0, 0, NULL);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
+					  vrid, 0, 0, zero_addr, zero_addr,
+					  zero_addr, zero_addr, NULL);
+		break;
+	}
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
 }
@@ -353,27 +370,30 @@ mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static struct mlxsw_sp_mr_tcam_region *
+mlxsw_sp_mr_tcam_protocol_region(struct mlxsw_sp_mr_tcam *mr_tcam,
+				 enum mlxsw_sp_l3proto proto)
+{
+	return &mr_tcam->tcam_regions[proto];
+}
+
 static int
 mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
 				       struct mlxsw_sp_mr_tcam_route *route,
 				       enum mlxsw_sp_mr_route_prio prio)
 {
-	struct parman_prio *parman_prio = NULL;
+	struct mlxsw_sp_mr_tcam_region *tcam_region;
 	int err;
 
-	switch (route->key.proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio];
-		err = parman_item_add(mr_tcam->ipv4_tcam_region.parman,
-				      parman_prio, &route->parman_item);
-		if (err)
-			return err;
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-	default:
-		WARN_ON_ONCE(1);
-	}
-	route->parman_prio = parman_prio;
+	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
+						       route->key.proto);
+	err = parman_item_add(tcam_region->parman,
+			      &tcam_region->parman_prios[prio],
+			      &route->parman_item);
+	if (err)
+		return err;
+
+	route->parman_prio = &tcam_region->parman_prios[prio];
 	return 0;
 }
 
@@ -381,15 +401,13 @@ static void
 mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
 					  struct mlxsw_sp_mr_tcam_route *route)
 {
-	switch (route->key.proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		parman_item_remove(mr_tcam->ipv4_tcam_region.parman,
-				   route->parman_prio, &route->parman_item);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-	default:
-		WARN_ON_ONCE(1);
-	}
+	struct mlxsw_sp_mr_tcam_region *tcam_region;
+
+	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
+						       route->key.proto);
+
+	parman_item_remove(tcam_region->parman,
+			   route->parman_prio, &route->parman_item);
 }
 
 static int
@@ -462,7 +480,7 @@ static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 
 	mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
-				      &route->parman_item);
+				      &route->key, &route->parman_item);
 	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
 	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
@@ -806,21 +824,42 @@ mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
 static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+	u32 rtar_key;
+	int err;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
 	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
 		return -EIO;
 
-	return mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-					    &mr_tcam->ipv4_tcam_region,
-					    MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST);
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
+	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
+					   &region[MLXSW_SP_L3_PROTO_IPV4],
+					   rtar_key);
+	if (err)
+		return err;
+
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
+	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
+					   &region[MLXSW_SP_L3_PROTO_IPV6],
+					   rtar_key);
+	if (err)
+		goto err_ipv6_region_init;
+
+	return 0;
+
+err_ipv6_region_init:
+	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+	return err;
 }
 
 static void mlxsw_sp_mr_tcam_fini(void *priv)
 {
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
 
-	mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region);
+	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
+	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
 }
 
 const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index be241c708bb9..caa17e0a39fa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -467,7 +467,7 @@ struct mlxsw_sp_vr {
 	unsigned int rif_count;
 	struct mlxsw_sp_fib *fib4;
 	struct mlxsw_sp_fib *fib6;
-	struct mlxsw_sp_mr_table *mr4_table;
+	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -711,7 +711,9 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 
 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 {
-	return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
+	return !!vr->fib4 || !!vr->fib6 ||
+	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
+	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -789,7 +791,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 					      u32 tb_id,
 					      struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp_mr_table *mr4_table;
+	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
 	struct mlxsw_sp_fib *fib4;
 	struct mlxsw_sp_fib *fib6;
 	struct mlxsw_sp_vr *vr;
@@ -812,15 +814,25 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 					     MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(mr4_table)) {
 		err = PTR_ERR(mr4_table);
-		goto err_mr_table_create;
+		goto err_mr4_table_create;
 	}
+	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+					     MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(mr6_table)) {
+		err = PTR_ERR(mr6_table);
+		goto err_mr6_table_create;
+	}
+
 	vr->fib4 = fib4;
 	vr->fib6 = fib6;
-	vr->mr4_table = mr4_table;
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
 	vr->tb_id = tb_id;
 	return vr;
 
-err_mr_table_create:
+err_mr6_table_create:
+	mlxsw_sp_mr_table_destroy(mr4_table);
+err_mr4_table_create:
 	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
 err_fib6_create:
 	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
@@ -830,8 +842,10 @@ err_fib6_create:
 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_vr *vr)
 {
-	mlxsw_sp_mr_table_destroy(vr->mr4_table);
-	vr->mr4_table = NULL;
+	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
+	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
+	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
 	vr->fib6 = NULL;
 	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
@@ -854,7 +868,8 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
 {
 	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
 	    list_empty(&vr->fib6->node_list) &&
-	    mlxsw_sp_mr_table_empty(vr->mr4_table))
+	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
+	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
 		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
 }
 
@@ -5391,7 +5406,7 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
 
-	return mlxsw_sp_mr_route4_add(vr->mr4_table,
+	return mlxsw_sp_mr_route4_add(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
 				      (struct mfc_cache *) men_info->mfc,
 				      replace);
 }
@@ -5408,7 +5423,7 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_route4_del(vr->mr4_table,
+	mlxsw_sp_mr_route4_del(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
 			       (struct mfc_cache *) men_info->mfc);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
@@ -5428,7 +5443,8 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(vr);
 
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
-	return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
+	return mlxsw_sp_mr_vif_add(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
+				   ven_info->dev,
 				   ven_info->vif_index,
 				   ven_info->vif_flags, rif);
 }
@@ -5446,7 +5462,8 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
+	mlxsw_sp_mr_vif_del(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
+			    ven_info->vif_index);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
@@ -5538,7 +5555,7 @@ static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
 
 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
@@ -5546,7 +5563,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 		if (!mlxsw_sp_vr_is_used(vr))
 			continue;
 
-		mlxsw_sp_mr_table_flush(vr->mr4_table);
+		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
+			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
 
 		/* If virtual router was only used for IPv4, then it's no
@@ -6041,7 +6059,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_rif *rif;
 	struct mlxsw_sp_vr *vr;
 	u16 rif_index;
-	int err;
+	int i, err;
 
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
 	ops = mlxsw_sp->router->rif_ops_arr[type];
@@ -6081,9 +6099,11 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_configure;
 
-	err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
-	if (err)
-		goto err_mr_rif_add;
+	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
+		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
+		if (err)
+			goto err_mr_rif_add;
+	}
 
 	mlxsw_sp_rif_counters_alloc(rif);
 	mlxsw_sp->router->rifs[rif_index] = rif;
@@ -6091,6 +6111,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	return rif;
 
 err_mr_rif_add:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
 	ops->deconfigure(rif);
 err_configure:
 	if (fid)
@@ -6110,13 +6132,15 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
 	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 	struct mlxsw_sp_fid *fid = rif->fid;
 	struct mlxsw_sp_vr *vr;
+	int i;
 
 	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
 	vr = &mlxsw_sp->router->vrs[rif->vr_id];
 
 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
 	mlxsw_sp_rif_counters_free(rif);
-	mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
+	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
+		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
 	ops->deconfigure(rif);
 	if (fid)
 		/* Loopback RIFs are not associated with a FID. */
@@ -6523,13 +6547,16 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
 
 	if (rif->mtu != dev->mtu) {
 		struct mlxsw_sp_vr *vr;
+		int i;
 
 		/* The RIF is relevant only to its mr_table instance, as unlike
 		 * unicast routing, in multicast routing a RIF cannot be shared
 		 * between several multicast routing tables.
 		 */
 		vr = &mlxsw_sp->router->vrs[rif->vr_id];
-		mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
+		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
+			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
+						   rif, dev->mtu);
 	}
 
 	ether_addr_copy(rif->addr, dev->dev_addr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 1fb82246ce96..a01edcf56797 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -41,6 +41,7 @@
 enum mlxsw_sp_l3proto {
 	MLXSW_SP_L3_PROTO_IPV4,
 	MLXSW_SP_L3_PROTO_IPV6,
+#define MLXSW_SP_L3_PROTO_MAX	(MLXSW_SP_L3_PROTO_IPV6 + 1)
 };
 
 union mlxsw_sp_l3addr {

From 4caef4638f0bea78aa1625ad7fa20df5076d17bf Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:41 +0300
Subject: [PATCH 1249/1678] mlxsw: spectrum_mr: Convert into using mr_mfc

Current multicast routing logic in driver assumes it's always meant to
deal with IPv4 multicast routes, leaving several placeholders for
later IPv6 support [currently usually WARN()].

This patch changes the driver's internal multicast route struct into
holding a common mr_mfc instead of the IPv4 mfc_cache.
The various placeholders are grouped into 2:
  - Functions that require only the common bits; These remain and the
    restriction for IPv4-only is lifted.
  - Function that require IPv4-specifics - for handling these functions
    we add sets of operations that encapsulate the protocol differences

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.c | 192 ++++++++++--------
 1 file changed, 108 insertions(+), 84 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index a8f4927ff150..ae8c5b5387db 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -47,6 +47,11 @@ struct mlxsw_sp_mr {
 	/* priv has to be always the last item */
 };
 
+struct mlxsw_sp_mr_vif;
+struct mlxsw_sp_mr_vif_ops {
+	bool (*is_regular)(const struct mlxsw_sp_mr_vif *vif);
+};
+
 struct mlxsw_sp_mr_vif {
 	struct net_device *dev;
 	const struct mlxsw_sp_rif *rif;
@@ -61,6 +66,9 @@ struct mlxsw_sp_mr_vif {
 	 * instance is used as an ingress VIF
 	 */
 	struct list_head route_ivif_list;
+
+	/* Protocol specific operations for a VIF */
+	const struct mlxsw_sp_mr_vif_ops *ops;
 };
 
 struct mlxsw_sp_mr_route_vif_entry {
@@ -70,6 +78,17 @@ struct mlxsw_sp_mr_route_vif_entry {
 	struct mlxsw_sp_mr_route *mr_route;
 };
 
+struct mlxsw_sp_mr_table;
+struct mlxsw_sp_mr_table_ops {
+	bool (*is_route_valid)(const struct mlxsw_sp_mr_table *mr_table,
+			       const struct mr_mfc *mfc);
+	void (*key_create)(struct mlxsw_sp_mr_table *mr_table,
+			   struct mlxsw_sp_mr_route_key *key,
+			   struct mr_mfc *mfc);
+	bool (*is_route_starg)(const struct mlxsw_sp_mr_table *mr_table,
+			       const struct mlxsw_sp_mr_route *mr_route);
+};
+
 struct mlxsw_sp_mr_table {
 	struct list_head node;
 	enum mlxsw_sp_l3proto proto;
@@ -78,6 +97,7 @@ struct mlxsw_sp_mr_table {
 	struct mlxsw_sp_mr_vif vifs[MAXVIFS];
 	struct list_head route_list;
 	struct rhashtable route_ht;
+	const struct mlxsw_sp_mr_table_ops *ops;
 	char catchall_route_priv[0];
 	/* catchall_route_priv has to be always the last item */
 };
@@ -88,7 +108,7 @@ struct mlxsw_sp_mr_route {
 	struct mlxsw_sp_mr_route_key key;
 	enum mlxsw_sp_mr_route_action route_action;
 	u16 min_mtu;
-	struct mfc_cache *mfc4;
+	struct mr_mfc *mfc;
 	void *route_priv;
 	const struct mlxsw_sp_mr_table *mr_table;
 	/* A list of route_vif_entry structs that point to the egress VIFs */
@@ -104,14 +124,9 @@ static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
 	.automatic_shrinking = true,
 };
 
-static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif)
-{
-	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
-}
-
 static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
 {
-	return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif;
+	return vif->ops->is_regular(vif) && vif->dev && vif->rif;
 }
 
 static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
@@ -122,18 +137,9 @@ static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
 static bool
 mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
 {
-	vifi_t ivif;
+	vifi_t ivif = mr_route->mfc->mfc_parent;
 
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		ivif = mr_route->mfc4->_c.mfc_parent;
-		return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
-	return false;
+	return mr_route->mfc->mfc_un.res.ttls[ivif] != 255;
 }
 
 static int
@@ -149,19 +155,6 @@ mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
 	return valid_evifs;
 }
 
-static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route)
-{
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY);
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
-	return false;
-}
-
 static enum mlxsw_sp_mr_route_action
 mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
 {
@@ -174,7 +167,8 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
 	/* The kernel does not match a (*,G) route that the ingress interface is
 	 * not one of the egress interfaces, so trap these kind of routes.
 	 */
-	if (mlxsw_sp_mr_route_starg(mr_route) &&
+	if (mr_route->mr_table->ops->is_route_starg(mr_route->mr_table,
+						    mr_route) &&
 	    !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
 		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
 
@@ -195,25 +189,11 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
 static enum mlxsw_sp_mr_route_prio
 mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
 {
-	return mlxsw_sp_mr_route_starg(mr_route) ?
+	return mr_route->mr_table->ops->is_route_starg(mr_route->mr_table,
+						       mr_route) ?
 		MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
 }
 
-static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
-				   struct mlxsw_sp_mr_route_key *key,
-				   const struct mfc_cache *mfc)
-{
-	bool starg = (mfc->mfc_origin == htonl(INADDR_ANY));
-
-	memset(key, 0, sizeof(*key));
-	key->vrid = mr_table->vr_id;
-	key->proto = mr_table->proto;
-	key->group.addr4 = mfc->mfc_mcastgrp;
-	key->group_mask.addr4 = htonl(0xffffffff);
-	key->source.addr4 = mfc->mfc_origin;
-	key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff);
-}
-
 static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
 				       struct mlxsw_sp_mr_vif *mr_vif)
 {
@@ -356,12 +336,13 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 	if (!mr_route)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&mr_route->evif_list);
-	mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc);
 
 	/* Find min_mtu and link iVIF and eVIFs */
 	mr_route->min_mtu = ETH_MAX_MTU;
 	mr_cache_hold(&mfc->_c);
-	mr_route->mfc4 = mfc;
+	mr_route->mfc = &mfc->_c;
+	mr_table->ops->key_create(mr_table, &mr_route->key, mr_route->mfc);
+
 	mr_route->mr_table = mr_table;
 	for (i = 0; i < MAXVIFS; i++) {
 		if (mfc->_c.mfc_un.res.ttls[i] != 255) {
@@ -393,7 +374,7 @@ static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
 	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
 
 	mlxsw_sp_mr_route_ivif_unlink(mr_route);
-	mr_cache_put((struct mr_mfc *)mr_route->mfc4);
+	mr_cache_put((struct mr_mfc *)mr_route->mfc);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
@@ -416,18 +397,10 @@ static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
 static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
 					bool offload)
 {
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		if (offload)
-			mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD;
-		else
-			mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD;
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
+	if (offload)
+		mr_route->mfc->mfc_flags |= MFC_OFFLOAD;
+	else
+		mr_route->mfc->mfc_flags &= ~MFC_OFFLOAD;
 }
 
 static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
@@ -456,15 +429,8 @@ int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
 	struct mlxsw_sp_mr_route *mr_route;
 	int err;
 
-	/* If the route is a (*,*) route, abort, as these kind of routes are
-	 * used for proxy routes.
-	 */
-	if (mfc->mfc_origin == htonl(INADDR_ANY) &&
-	    mfc->mfc_mcastgrp == htonl(INADDR_ANY)) {
-		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
-			 "Offloading proxy routes is not supported.\n");
+	if (!mr_table->ops->is_route_valid(mr_table, &mfc->_c))
 		return -EINVAL;
-	}
 
 	/* Create a new route */
 	mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
@@ -535,7 +501,7 @@ void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
 	struct mlxsw_sp_mr_route *mr_route;
 	struct mlxsw_sp_mr_route_key key;
 
-	mlxsw_sp_mr_route4_key(mr_table, &key, mfc);
+	mr_table->ops->key_create(mr_table, &key, &mfc->_c);
 	mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
 					  mlxsw_sp_mr_route_ht_params);
 	if (mr_route)
@@ -840,6 +806,70 @@ void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
 	}
 }
 
+/* Protocol specific functions */
+static bool
+mlxsw_sp_mr_route4_validate(const struct mlxsw_sp_mr_table *mr_table,
+			    const struct mr_mfc *c)
+{
+	struct mfc_cache *mfc = (struct mfc_cache *) c;
+
+	/* If the route is a (*,*) route, abort, as these kind of routes are
+	 * used for proxy routes.
+	 */
+	if (mfc->mfc_origin == htonl(INADDR_ANY) &&
+	    mfc->mfc_mcastgrp == htonl(INADDR_ANY)) {
+		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+			 "Offloading proxy routes is not supported.\n");
+		return false;
+	}
+	return true;
+}
+
+static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
+				   struct mlxsw_sp_mr_route_key *key,
+				   struct mr_mfc *c)
+{
+	const struct mfc_cache *mfc = (struct mfc_cache *) c;
+	bool starg;
+
+	starg = (mfc->mfc_origin == htonl(INADDR_ANY));
+
+	memset(key, 0, sizeof(*key));
+	key->vrid = mr_table->vr_id;
+	key->proto = MLXSW_SP_L3_PROTO_IPV4;
+	key->group.addr4 = mfc->mfc_mcastgrp;
+	key->group_mask.addr4 = htonl(0xffffffff);
+	key->source.addr4 = mfc->mfc_origin;
+	key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff);
+}
+
+static bool mlxsw_sp_mr_route4_starg(const struct mlxsw_sp_mr_table *mr_table,
+				     const struct mlxsw_sp_mr_route *mr_route)
+{
+	return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY);
+}
+
+static bool mlxsw_sp_mr_vif4_is_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
+}
+
+static struct
+mlxsw_sp_mr_vif_ops mlxsw_sp_mr_vif_ops_arr[] = {
+	{
+		.is_regular = mlxsw_sp_mr_vif4_is_regular,
+	},
+};
+
+static struct
+mlxsw_sp_mr_table_ops mlxsw_sp_mr_table_ops_arr[] = {
+	{
+		.is_route_valid = mlxsw_sp_mr_route4_validate,
+		.key_create = mlxsw_sp_mr_route4_key,
+		.is_route_starg = mlxsw_sp_mr_route4_starg,
+	},
+};
+
 struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 						   u32 vr_id,
 						   enum mlxsw_sp_l3proto proto)
@@ -867,6 +897,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 	mr_table->vr_id = vr_id;
 	mr_table->mlxsw_sp = mlxsw_sp;
 	mr_table->proto = proto;
+	mr_table->ops = &mlxsw_sp_mr_table_ops_arr[proto];
 	INIT_LIST_HEAD(&mr_table->route_list);
 
 	err = rhashtable_init(&mr_table->route_ht,
@@ -877,6 +908,7 @@ struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
 	for (i = 0; i < MAXVIFS; i++) {
 		INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
 		INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
+		mr_table->vifs[i].ops = &mlxsw_sp_mr_vif_ops_arr[proto];
 	}
 
 	err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
@@ -943,18 +975,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
 	mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
 				&bytes);
 
-	switch (mr_route->mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		if (mr_route->mfc4->_c.mfc_un.res.pkt != packets)
-			mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies;
-		mr_route->mfc4->_c.mfc_un.res.pkt = packets;
-		mr_route->mfc4->_c.mfc_un.res.bytes = bytes;
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
+	if (mr_route->mfc->mfc_un.res.pkt != packets)
+		mr_route->mfc->mfc_un.res.lastuse = jiffies;
+	mr_route->mfc->mfc_un.res.pkt = packets;
+	mr_route->mfc->mfc_un.res.bytes = bytes;
 }
 
 static void mlxsw_sp_mr_stats_update(struct work_struct *work)

From eb35da0ce831547e5a8b09a2b315d5e27b19d41f Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:42 +0300
Subject: [PATCH 1250/1678] mlxsw: spectrum_router: Make IPMR-related APIs
 family agnostic

spectrum_router and spectrum_mr have several APIs that are used to
manipulate configurations originating from ipmr fib notifications.
Following previous patches all the protocol-specifics that are necessary
for the configuration are hidden within spectrum_mr. This allows us to
clean the API and make sure that other than choosing the mr_table based
on the fib notification family, spectrum_router wouldn't care about the
source of the notification when passing it onward to spectrum_mr.

This would later allow us to leverage the same code for fib
notifications originating from ip6mr.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.c | 52 +++++++------------
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.h |  8 +--
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 33 ++++++++----
 3 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index ae8c5b5387db..ba6bcbe6f75b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -323,8 +323,8 @@ static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
 }
 
 static struct mlxsw_sp_mr_route *
-mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
-			  struct mfc_cache *mfc)
+mlxsw_sp_mr_route_create(struct mlxsw_sp_mr_table *mr_table,
+			 struct mr_mfc *mfc)
 {
 	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
 	struct mlxsw_sp_mr_route *mr_route;
@@ -339,13 +339,13 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 
 	/* Find min_mtu and link iVIF and eVIFs */
 	mr_route->min_mtu = ETH_MAX_MTU;
-	mr_cache_hold(&mfc->_c);
-	mr_route->mfc = &mfc->_c;
+	mr_cache_hold(mfc);
+	mr_route->mfc = mfc;
 	mr_table->ops->key_create(mr_table, &mr_route->key, mr_route->mfc);
 
 	mr_route->mr_table = mr_table;
 	for (i = 0; i < MAXVIFS; i++) {
-		if (mfc->_c.mfc_un.res.ttls[i] != 255) {
+		if (mfc->mfc_un.res.ttls[i] != 255) {
 			err = mlxsw_sp_mr_route_evif_link(mr_route,
 							  &mr_table->vifs[i]);
 			if (err)
@@ -356,44 +356,30 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
 		}
 	}
 	mlxsw_sp_mr_route_ivif_link(mr_route,
-				    &mr_table->vifs[mfc->_c.mfc_parent]);
+				    &mr_table->vifs[mfc->mfc_parent]);
 
 	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
 	return mr_route;
 err:
-	mr_cache_put(&mfc->_c);
+	mr_cache_put(mfc);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
 	return ERR_PTR(err);
 }
 
-static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
-				       struct mlxsw_sp_mr_route *mr_route)
+static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
+				      struct mlxsw_sp_mr_route *mr_route)
 {
 	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
 
 	mlxsw_sp_mr_route_ivif_unlink(mr_route);
-	mr_cache_put((struct mr_mfc *)mr_route->mfc);
+	mr_cache_put(mr_route->mfc);
 	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
 		mlxsw_sp_mr_route_evif_unlink(rve);
 	kfree(mr_route);
 }
 
-static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
-				      struct mlxsw_sp_mr_route *mr_route)
-{
-	switch (mr_table->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		/* fall through */
-	default:
-		WARN_ON_ONCE(1);
-	}
-}
-
 static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
 					bool offload)
 {
@@ -422,18 +408,18 @@ static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
 	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
 }
 
-int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
-			   struct mfc_cache *mfc, bool replace)
+int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
+			  struct mr_mfc *mfc, bool replace)
 {
 	struct mlxsw_sp_mr_route *mr_orig_route = NULL;
 	struct mlxsw_sp_mr_route *mr_route;
 	int err;
 
-	if (!mr_table->ops->is_route_valid(mr_table, &mfc->_c))
+	if (!mr_table->ops->is_route_valid(mr_table, mfc))
 		return -EINVAL;
 
 	/* Create a new route */
-	mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
+	mr_route = mlxsw_sp_mr_route_create(mr_table, mfc);
 	if (IS_ERR(mr_route))
 		return PTR_ERR(mr_route);
 
@@ -478,7 +464,7 @@ int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
 				       &mr_orig_route->ht_node,
 				       mlxsw_sp_mr_route_ht_params);
 		list_del(&mr_orig_route->node);
-		mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route);
+		mlxsw_sp_mr_route_destroy(mr_table, mr_orig_route);
 	}
 
 	mlxsw_sp_mr_mfc_offload_update(mr_route);
@@ -491,17 +477,17 @@ err_rhashtable_insert:
 	list_del(&mr_route->node);
 err_no_orig_route:
 err_duplicate_route:
-	mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
+	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
 	return err;
 }
 
-void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
-			    struct mfc_cache *mfc)
+void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+			   struct mr_mfc *mfc)
 {
 	struct mlxsw_sp_mr_route *mr_route;
 	struct mlxsw_sp_mr_route_key key;
 
-	mr_table->ops->key_create(mr_table, &key, &mfc->_c);
+	mr_table->ops->key_create(mr_table, &key, mfc);
 	mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
 					  mlxsw_sp_mr_route_ht_params);
 	if (mr_route)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
index 5d26a122af49..6f94fc9eea1b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -109,10 +109,10 @@ struct mlxsw_sp_mr_table;
 int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
 		     const struct mlxsw_sp_mr_ops *mr_ops);
 void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
-			   struct mfc_cache *mfc, bool replace);
-void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
-			    struct mfc_cache *mfc);
+int mlxsw_sp_mr_route_add(struct mlxsw_sp_mr_table *mr_table,
+			  struct mr_mfc *mfc, bool replace);
+void mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+			   struct mr_mfc *mfc);
 int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
 			struct net_device *dev, vifi_t vif_index,
 			unsigned long vif_flags,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index caa17e0a39fa..14b887b96cbb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5393,10 +5393,23 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static struct mlxsw_sp_mr_table *
+mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
+{
+	switch (family) {
+	case RTNL_FAMILY_IPMR:
+		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
+	default:
+		WARN_ON(1);
+		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
+	}
+}
+
 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mfc_entry_notifier_info *men_info,
 				     bool replace)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
 	if (mlxsw_sp->router->aborted)
@@ -5406,14 +5419,14 @@ static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
 
-	return mlxsw_sp_mr_route4_add(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
-				      (struct mfc_cache *) men_info->mfc,
-				      replace);
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
+	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
 }
 
 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 				      struct mfc_entry_notifier_info *men_info)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
 	if (mlxsw_sp->router->aborted)
@@ -5423,8 +5436,8 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_route4_del(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
-			       (struct mfc_cache *) men_info->mfc);
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
+	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 
@@ -5432,6 +5445,7 @@ static int
 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
 			      struct vif_entry_notifier_info *ven_info)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_rif *rif;
 	struct mlxsw_sp_vr *vr;
 
@@ -5442,9 +5456,9 @@ mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(vr))
 		return PTR_ERR(vr);
 
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
-	return mlxsw_sp_mr_vif_add(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
-				   ven_info->dev,
+	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
 				   ven_info->vif_index,
 				   ven_info->vif_flags, rif);
 }
@@ -5453,6 +5467,7 @@ static void
 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 			      struct vif_entry_notifier_info *ven_info)
 {
+	struct mlxsw_sp_mr_table *mrt;
 	struct mlxsw_sp_vr *vr;
 
 	if (mlxsw_sp->router->aborted)
@@ -5462,8 +5477,8 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON(!vr))
 		return;
 
-	mlxsw_sp_mr_vif_del(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4],
-			    ven_info->vif_index);
+	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
+	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
 }
 

From 6981e104a8c3300f053142142d4606d8bbfceee7 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:43 +0300
Subject: [PATCH 1251/1678] mlxsw: spectrum_mr: Add ipv6 specific operations

Populate the various operation structures meant for IPv6 with logic
unique to that protocol suite.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.c | 56 +++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/spectrum_mr.h |  1 +
 2 files changed, 57 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index ba6bcbe6f75b..a82539609d49 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -33,6 +33,7 @@
  */
 
 #include <linux/rhashtable.h>
+#include <net/ipv6.h>
 
 #include "spectrum_mr.h"
 #include "spectrum_router.h"
@@ -840,11 +841,60 @@ static bool mlxsw_sp_mr_vif4_is_regular(const struct mlxsw_sp_mr_vif *vif)
 	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
 }
 
+static bool
+mlxsw_sp_mr_route6_validate(const struct mlxsw_sp_mr_table *mr_table,
+			    const struct mr_mfc *c)
+{
+	struct mfc6_cache *mfc = (struct mfc6_cache *) c;
+
+	/* If the route is a (*,*) route, abort, as these kind of routes are
+	 * used for proxy routes.
+	 */
+	if (ipv6_addr_any(&mfc->mf6c_origin) &&
+	    ipv6_addr_any(&mfc->mf6c_mcastgrp)) {
+		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+			 "Offloading proxy routes is not supported.\n");
+		return false;
+	}
+	return true;
+}
+
+static void mlxsw_sp_mr_route6_key(struct mlxsw_sp_mr_table *mr_table,
+				   struct mlxsw_sp_mr_route_key *key,
+				   struct mr_mfc *c)
+{
+	const struct mfc6_cache *mfc = (struct mfc6_cache *) c;
+
+	memset(key, 0, sizeof(*key));
+	key->vrid = mr_table->vr_id;
+	key->proto = MLXSW_SP_L3_PROTO_IPV6;
+	key->group.addr6 = mfc->mf6c_mcastgrp;
+	memset(&key->group_mask.addr6, 0xff, sizeof(key->group_mask.addr6));
+	key->source.addr6 = mfc->mf6c_origin;
+	if (!ipv6_addr_any(&mfc->mf6c_origin))
+		memset(&key->source_mask.addr6, 0xff,
+		       sizeof(key->source_mask.addr6));
+}
+
+static bool mlxsw_sp_mr_route6_starg(const struct mlxsw_sp_mr_table *mr_table,
+				     const struct mlxsw_sp_mr_route *mr_route)
+{
+	return ipv6_addr_any(&mr_route->key.source_mask.addr6);
+}
+
+static bool mlxsw_sp_mr_vif6_is_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+	return !(vif->vif_flags & MIFF_REGISTER);
+}
+
 static struct
 mlxsw_sp_mr_vif_ops mlxsw_sp_mr_vif_ops_arr[] = {
 	{
 		.is_regular = mlxsw_sp_mr_vif4_is_regular,
 	},
+	{
+		.is_regular = mlxsw_sp_mr_vif6_is_regular,
+	},
 };
 
 static struct
@@ -854,6 +904,12 @@ mlxsw_sp_mr_table_ops mlxsw_sp_mr_table_ops_arr[] = {
 		.key_create = mlxsw_sp_mr_route4_key,
 		.is_route_starg = mlxsw_sp_mr_route4_starg,
 	},
+	{
+		.is_route_valid = mlxsw_sp_mr_route6_validate,
+		.key_create = mlxsw_sp_mr_route6_key,
+		.is_route_starg = mlxsw_sp_mr_route6_starg,
+	},
+
 };
 
 struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
index 6f94fc9eea1b..7c864a86811d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -36,6 +36,7 @@
 #define _MLXSW_SPECTRUM_MCROUTER_H
 
 #include <linux/mroute.h>
+#include <linux/mroute6.h>
 #include "spectrum_router.h"
 #include "spectrum.h"
 

From 64ed1b9e8fb7c2c5a668546795a88ada5ce77c7d Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:44 +0300
Subject: [PATCH 1252/1678] mlxsw: spectrum_router: Process IP6MR fib
 notification

Following previous patches driver is ready to handle notifications
arriving from ip6mr - start processing those when they arrive following
the same manner ipmr currently goes through.

This should enable driver to start offloading ipv6 multicast routes.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c   | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 14b887b96cbb..a9ccd974c620 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5396,13 +5396,10 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
 static struct mlxsw_sp_mr_table *
 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
 {
-	switch (family) {
-	case RTNL_FAMILY_IPMR:
+	if (family == RTNL_FAMILY_IPMR)
 		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
-	default:
-		WARN_ON(1);
-		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
-	}
+	else
+		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
 }
 
 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
@@ -5844,6 +5841,10 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event,
 		if (!ipmr_rule_default(rule) && !rule->l3mdev)
 			err = -1;
 		break;
+	case RTNL_FAMILY_IP6MR:
+		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
+			err = -1;
+		break;
 	}
 
 	if (err < 0)
@@ -5863,7 +5864,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 
 	if (!net_eq(info->net, &init_net) ||
 	    (info->family != AF_INET && info->family != AF_INET6 &&
-	     info->family != RTNL_FAMILY_IPMR))
+	     info->family != RTNL_FAMILY_IPMR &&
+	     info->family != RTNL_FAMILY_IP6MR))
 		return NOTIFY_DONE;
 
 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
@@ -5893,6 +5895,7 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
 		mlxsw_sp_router_fib6_event(fib_work, info);
 		break;
+	case RTNL_FAMILY_IP6MR:
 	case RTNL_FAMILY_IPMR:
 		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
 		mlxsw_sp_router_fibmr_event(fib_work, info);

From 6a170d326f7a0d187b33eacb0344add8e223ca11 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalm@mellanox.com>
Date: Mon, 26 Mar 2018 15:01:45 +0300
Subject: [PATCH 1253/1678] mlxsw: spectrum: Add multicast router trap for
 PIMv6

Add a new trap for PIMv6 packets. As PIM already has a designated trap
group [ & rate limiter], simply use the same for PIMv6 as well.

Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 +
 drivers/net/ethernet/mellanox/mlxsw/trap.h     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 7885fc475f7e..4aa84442e357 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3380,6 +3380,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
 	/* Multicast Router Traps */
 	MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
+	MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
 	MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
 	MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index ec6cef8267ae..399e9d6993f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -77,6 +77,7 @@ enum {
 	MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
 	MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+	MLXSW_TRAP_ID_IPV6_PIM = 0x79,
 	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
 	MLXSW_TRAP_ID_IPV6_BGP = 0x89,
 	MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,

From 9c20346b6309e20f64ee8e7054914ddc92c60baf Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:08 -0700
Subject: [PATCH 1254/1678] ice: Get switch config, scheduler config and device
 capabilities

This patch adds to the initialization flow by getting switch
configuration, scheduler configuration and device capabilities.

Switch configuration:
On boot, an L2 switch element is created in the firmware per physical
function. Each physical function is also mapped to a port, to which its
switch element is connected. In other words, this switch can be visualized
as an embedded vSwitch that can connect a physical function's virtual
station interfaces (VSIs) to the egress/ingress port. Egress/ingress
filters will be eventually created and applied on this switch element.
As part of the initialization flow, the driver gets configuration data
from this switch element and stores it.

Scheduler configuration:
The Tx scheduler is a subsystem responsible for setting and enforcing QoS.
As part of the initialization flow, the driver queries and stores the
default scheduler configuration for the given physical function.

Device capabilities:
As part of initialization, the driver has to determine what the device is
capable of (ex. max queues, VSIs, etc). This information is obtained from
the firmware and stored by the driver.

CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile       |   4 +-
 drivers/net/ethernet/intel/ice/ice.h          |   2 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   | 209 +++++++++++
 drivers/net/ethernet/intel/ice/ice_common.c   | 231 ++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |   2 +
 drivers/net/ethernet/intel/ice/ice_sched.c    | 340 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_sched.h    |  28 ++
 drivers/net/ethernet/intel/ice/ice_switch.c   | 144 ++++++++
 drivers/net/ethernet/intel/ice/ice_switch.h   |  14 +
 drivers/net/ethernet/intel/ice/ice_type.h     | 109 ++++++
 10 files changed, 1082 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_sched.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_sched.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_switch.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_switch.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index b9a32ddecb17..bd2cbe14e76e 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -10,4 +10,6 @@ obj-$(CONFIG_ICE) += ice.o
 ice-y := ice_main.o	\
 	 ice_controlq.o	\
 	 ice_common.o	\
-	 ice_nvm.o
+	 ice_nvm.o	\
+	 ice_switch.o	\
+	 ice_sched.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 4c4f161768ed..716c555532f4 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -16,7 +16,9 @@
 #include <linux/bitmap.h>
 #include "ice_devids.h"
 #include "ice_type.h"
+#include "ice_switch.h"
 #include "ice_common.h"
+#include "ice_sched.h"
 
 #define ICE_BAR0		0
 #define ICE_AQ_LEN		64
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 70fe00c90329..d1b70f0ed0e2 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -8,6 +8,8 @@
  * descriptor format.  It is shared between Firmware and Software.
  */
 
+#define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
+
 struct ice_aqc_generic {
 	__le32 param0;
 	__le32 param1;
@@ -68,6 +70,40 @@ struct ice_aqc_req_res {
 	u8 reserved[2];
 };
 
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct ice_aqc_list_caps {
+	u8 cmd_flags;
+	u8 pf_index;
+	u8 reserved[2];
+	__le32 count;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Device/Function buffer entry, repeated per reported capability */
+struct ice_aqc_list_caps_elem {
+	__le16 cap;
+#define ICE_AQC_CAPS_VSI				0x0017
+#define ICE_AQC_CAPS_RSS				0x0040
+#define ICE_AQC_CAPS_RXQS				0x0041
+#define ICE_AQC_CAPS_TXQS				0x0042
+#define ICE_AQC_CAPS_MSIX				0x0043
+#define ICE_AQC_CAPS_MAX_MTU				0x0047
+
+	u8 major_ver;
+	u8 minor_ver;
+	/* Number of resources described by this capability */
+	__le32 number;
+	/* Only meaningful for some types of resources */
+	__le32 logical_id;
+	/* Only meaningful for some types of resources */
+	__le32 phys_id;
+	__le64 rsvd1;
+	__le64 rsvd2;
+};
+
 /* Clear PXE Command and response (direct 0x0110) */
 struct ice_aqc_clear_pxe {
 	u8 rx_cnt;
@@ -75,6 +111,161 @@ struct ice_aqc_clear_pxe {
 	u8 reserved[15];
 };
 
+/* Get switch configuration (0x0200) */
+struct ice_aqc_get_sw_cfg {
+	/* Reserved for command and copy of request flags for response */
+	__le16 flags;
+	/* First desc in case of command and next_elem in case of response
+	 * In case of response, if it is not zero, means all the configuration
+	 * was not returned and new command shall be sent with this value in
+	 * the 'first desc' field
+	 */
+	__le16 element;
+	/* Reserved for command, only used for response */
+	__le16 num_elems;
+	__le16 rsvd;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Each entry in the response buffer is of the following type: */
+struct ice_aqc_get_sw_cfg_resp_elem {
+	/* VSI/Port Number */
+	__le16 vsi_port_num;
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M	\
+			(0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S	14
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M	(0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S)
+#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT	0
+#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT	1
+#define ICE_AQC_GET_SW_CONF_RESP_VSI		2
+
+	/* SWID VSI/Port belongs to */
+	__le16 swid;
+
+	/* Bit 14..0 : PF/VF number VSI belongs to
+	 * Bit 15 : VF indication bit
+	 */
+	__le16 pf_vf_num;
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M	\
+				(0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_IS_VF		BIT(15)
+};
+
+/* The response buffer is as follows. Note that the length of the
+ * elements array varies with the length of the command response.
+ */
+struct ice_aqc_get_sw_cfg_resp {
+	struct ice_aqc_get_sw_cfg_resp_elem elements[1];
+};
+
+/* Add TSE (indirect 0x0401)
+ * Delete TSE (indirect 0x040F)
+ * Move TSE (indirect 0x0408)
+ */
+struct ice_aqc_add_move_delete_elem {
+	__le16 num_grps_req;
+	__le16 num_grps_updated;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_elem_info_bw {
+	__le16 bw_profile_idx;
+	__le16 bw_alloc;
+};
+
+struct ice_aqc_txsched_elem {
+	u8 elem_type; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_TYPE_UNDEFINED		0x0
+#define ICE_AQC_ELEM_TYPE_ROOT_PORT		0x1
+#define ICE_AQC_ELEM_TYPE_TC			0x2
+#define ICE_AQC_ELEM_TYPE_SE_GENERIC		0x3
+#define ICE_AQC_ELEM_TYPE_ENTRY_POINT		0x4
+#define ICE_AQC_ELEM_TYPE_LEAF			0x5
+#define ICE_AQC_ELEM_TYPE_SE_PADDED		0x6
+	u8 valid_sections;
+#define ICE_AQC_ELEM_VALID_GENERIC		BIT(0)
+#define ICE_AQC_ELEM_VALID_CIR			BIT(1)
+#define ICE_AQC_ELEM_VALID_EIR			BIT(2)
+#define ICE_AQC_ELEM_VALID_SHARED		BIT(3)
+	u8 generic;
+#define ICE_AQC_ELEM_GENERIC_MODE_M		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_S		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_M	(0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S)
+#define ICE_AQC_ELEM_GENERIC_SP_S		0x4
+#define ICE_AQC_ELEM_GENERIC_SP_M	(0x1 << ICE_AQC_ELEM_GENERIC_SP_S)
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S	0x5
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M	\
+	(0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S)
+	u8 flags; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_FLAG_SUSPEND_M		0x1
+	struct ice_aqc_elem_info_bw cir_bw;
+	struct ice_aqc_elem_info_bw eir_bw;
+	__le16 srl_id;
+	__le16 reserved2;
+};
+
+struct ice_aqc_txsched_elem_data {
+	__le32 parent_teid;
+	__le32 node_teid;
+	struct ice_aqc_txsched_elem data;
+};
+
+struct ice_aqc_txsched_topo_grp_info_hdr {
+	__le32 parent_teid;
+	__le16 num_elems;
+	__le16 reserved2;
+};
+
+struct ice_aqc_delete_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	__le32 teid[1];
+};
+
+/* Query Scheduler Resource Allocation (indirect 0x0412)
+ * This indirect command retrieves the scheduler resources allocated by
+ * EMP Firmware to the given PF.
+ */
+struct ice_aqc_query_txsched_res {
+	u8 reserved[8];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_generic_sched_props {
+	__le16 phys_levels;
+	__le16 logical_levels;
+	u8 flattening_bitmap;
+	u8 max_device_cgds;
+	u8 max_pf_cgds;
+	u8 rsvd0;
+	__le16 rdma_qsets;
+	u8 rsvd1[22];
+};
+
+struct ice_aqc_layer_props {
+	u8 logical_layer;
+	u8 chunk_size;
+	__le16 max_device_nodes;
+	__le16 max_pf_nodes;
+	u8 rsvd0[2];
+	__le16 max_shared_rate_lmtr;
+	__le16 max_children;
+	__le16 max_cir_rl_profiles;
+	__le16 max_eir_rl_profiles;
+	__le16 max_srl_profiles;
+	u8 rsvd1[14];
+};
+
+struct ice_aqc_query_txsched_res_resp {
+	struct ice_aqc_generic_sched_props sched_props;
+	struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
@@ -128,6 +319,10 @@ struct ice_aq_desc {
 		struct ice_aqc_q_shutdown q_shutdown;
 		struct ice_aqc_req_res res_owner;
 		struct ice_aqc_clear_pxe clear_pxe;
+		struct ice_aqc_list_caps get_cap;
+		struct ice_aqc_get_sw_cfg get_sw_conf;
+		struct ice_aqc_query_txsched_res query_sched_res;
+		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
 	} params;
 };
@@ -136,16 +331,19 @@ struct ice_aq_desc {
 #define ICE_AQ_LG_BUF	512
 
 #define ICE_AQ_FLAG_LB_S	9
+#define ICE_AQ_FLAG_RD_S	10
 #define ICE_AQ_FLAG_BUF_S	12
 #define ICE_AQ_FLAG_SI_S	13
 
 #define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
+#define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
 #define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
 #define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
 
 /* error codes */
 enum ice_aq_err {
 	ICE_AQ_RC_OK		= 0,  /* success */
+	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
 	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
 	ICE_AQ_RC_EEXIST	= 13, /* object already exists */
 };
@@ -160,11 +358,22 @@ enum ice_adminq_opc {
 	ice_aqc_opc_req_res				= 0x0008,
 	ice_aqc_opc_release_res				= 0x0009,
 
+	/* device/function capabilities */
+	ice_aqc_opc_list_func_caps			= 0x000A,
+	ice_aqc_opc_list_dev_caps			= 0x000B,
+
 	/* PXE */
 	ice_aqc_opc_clear_pxe_mode			= 0x0110,
 
+	/* internal switch commands */
+	ice_aqc_opc_get_sw_cfg				= 0x0200,
+
 	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
 
+	/* transmit scheduler commands */
+	ice_aqc_opc_delete_sched_elems			= 0x040F,
+	ice_aqc_opc_query_sched_res			= 0x0412,
+
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index d3d420c3ba7b..f9567dc1aefd 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice_common.h"
+#include "ice_sched.h"
 #include "ice_adminq_cmd.h"
 
 #define ICE_PF_RESET_WAIT_COUNT	200
@@ -70,8 +71,37 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	if (status)
 		goto err_unroll_cqinit;
 
+	status = ice_get_caps(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	hw->port_info = devm_kzalloc(ice_hw_to_dev(hw),
+				     sizeof(*hw->port_info), GFP_KERNEL);
+	if (!hw->port_info) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_cqinit;
+	}
+
+	/* set the back pointer to hw */
+	hw->port_info->hw = hw;
+
+	/* Initialize port_info struct with switch configuration data */
+	status = ice_get_initial_sw_cfg(hw);
+	if (status)
+		goto err_unroll_alloc;
+
+	/* Query the allocated resources for tx scheduler */
+	status = ice_sched_query_res_alloc(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED,
+			  "Failed to get scheduler allocated resources\n");
+		goto err_unroll_alloc;
+	}
+
 	return 0;
 
+err_unroll_alloc:
+	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
 err_unroll_cqinit:
 	ice_shutdown_all_ctrlq(hw);
 	return status;
@@ -83,7 +113,12 @@ err_unroll_cqinit:
  */
 void ice_deinit_hw(struct ice_hw *hw)
 {
+	ice_sched_cleanup_all(hw);
 	ice_shutdown_all_ctrlq(hw);
+	if (hw->port_info) {
+		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+		hw->port_info = NULL;
+	}
 }
 
 /**
@@ -505,6 +540,202 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
 	}
 }
 
+/**
+ * ice_parse_caps - parse function/device capabilities
+ * @hw: pointer to the hw struct
+ * @buf: pointer to a buffer containing function/device capability records
+ * @cap_count: number of capability records in the list
+ * @opc: type of capabilities list to parse
+ *
+ * Helper function to parse function(0x000a)/device(0x000b) capabilities list.
+ */
+static void
+ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
+	       enum ice_adminq_opc opc)
+{
+	struct ice_aqc_list_caps_elem *cap_resp;
+	struct ice_hw_func_caps *func_p = NULL;
+	struct ice_hw_dev_caps *dev_p = NULL;
+	struct ice_hw_common_caps *caps;
+	u32 i;
+
+	if (!buf)
+		return;
+
+	cap_resp = (struct ice_aqc_list_caps_elem *)buf;
+
+	if (opc == ice_aqc_opc_list_dev_caps) {
+		dev_p = &hw->dev_caps;
+		caps = &dev_p->common_cap;
+	} else if (opc == ice_aqc_opc_list_func_caps) {
+		func_p = &hw->func_caps;
+		caps = &func_p->common_cap;
+	} else {
+		ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
+		return;
+	}
+
+	for (i = 0; caps && i < cap_count; i++, cap_resp++) {
+		u32 logical_id = le32_to_cpu(cap_resp->logical_id);
+		u32 phys_id = le32_to_cpu(cap_resp->phys_id);
+		u32 number = le32_to_cpu(cap_resp->number);
+		u16 cap = le16_to_cpu(cap_resp->cap);
+
+		switch (cap) {
+		case ICE_AQC_CAPS_VSI:
+			if (dev_p) {
+				dev_p->num_vsi_allocd_to_host = number;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: Dev.VSI cnt = %d\n",
+					  dev_p->num_vsi_allocd_to_host);
+			} else if (func_p) {
+				func_p->guaranteed_num_vsi = number;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: Func.VSI cnt = %d\n",
+					  func_p->guaranteed_num_vsi);
+			}
+			break;
+		case ICE_AQC_CAPS_RSS:
+			caps->rss_table_size = number;
+			caps->rss_table_entry_width = logical_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: RSS table size = %d\n",
+				  caps->rss_table_size);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: RSS table width = %d\n",
+				  caps->rss_table_entry_width);
+			break;
+		case ICE_AQC_CAPS_RXQS:
+			caps->num_rxq = number;
+			caps->rxq_first_id = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Rx first queue ID = %d\n",
+				  caps->rxq_first_id);
+			break;
+		case ICE_AQC_CAPS_TXQS:
+			caps->num_txq = number;
+			caps->txq_first_id = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Num Tx Qs = %d\n", caps->num_txq);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Tx first queue ID = %d\n",
+				  caps->txq_first_id);
+			break;
+		case ICE_AQC_CAPS_MSIX:
+			caps->num_msix_vectors = number;
+			caps->msix_vector_first_id = phys_id;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: MSIX vector count = %d\n",
+				  caps->num_msix_vectors);
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: MSIX first vector index = %d\n",
+				  caps->msix_vector_first_id);
+			break;
+		case ICE_AQC_CAPS_MAX_MTU:
+			caps->max_mtu = number;
+			if (dev_p)
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: Dev.MaxMTU = %d\n",
+					  caps->max_mtu);
+			else if (func_p)
+				ice_debug(hw, ICE_DBG_INIT,
+					  "HW caps: func.MaxMTU = %d\n",
+					  caps->max_mtu);
+			break;
+		default:
+			ice_debug(hw, ICE_DBG_INIT,
+				  "HW caps: Unknown capability[%d]: 0x%x\n", i,
+				  cap);
+			break;
+		}
+	}
+}
+
+/**
+ * ice_aq_discover_caps - query function/device capabilities
+ * @hw: pointer to the hw struct
+ * @buf: a virtual buffer to hold the capabilities
+ * @buf_size: Size of the virtual buffer
+ * @data_size: Size of the returned data, or buf size needed if AQ err==ENOMEM
+ * @opc: capabilities type to discover - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the function(0x000a)/device(0x000b) capabilities description from
+ * the firmware.
+ */
+static enum ice_status
+ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u16 *data_size,
+		     enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_list_caps *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_cap;
+
+	if (opc != ice_aqc_opc_list_func_caps &&
+	    opc != ice_aqc_opc_list_dev_caps)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status)
+		ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc);
+	*data_size = le16_to_cpu(desc.datalen);
+
+	return status;
+}
+
+/**
+ * ice_get_caps - get info about the HW
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_get_caps(struct ice_hw *hw)
+{
+	enum ice_status status;
+	u16 data_size = 0;
+	u16 cbuf_len;
+	u8 retries;
+
+	/* The driver doesn't know how many capabilities the device will return
+	 * so the buffer size required isn't known ahead of time. The driver
+	 * starts with cbuf_len and if this turns out to be insufficient, the
+	 * device returns ICE_AQ_RC_ENOMEM and also the buffer size it needs.
+	 * The driver then allocates the buffer of this size and retries the
+	 * operation. So it follows that the retry count is 2.
+	 */
+#define ICE_GET_CAP_BUF_COUNT	40
+#define ICE_GET_CAP_RETRY_COUNT	2
+
+	cbuf_len = ICE_GET_CAP_BUF_COUNT *
+		sizeof(struct ice_aqc_list_caps_elem);
+
+	retries = ICE_GET_CAP_RETRY_COUNT;
+
+	do {
+		void *cbuf;
+
+		cbuf = devm_kzalloc(ice_hw_to_dev(hw), cbuf_len, GFP_KERNEL);
+		if (!cbuf)
+			return ICE_ERR_NO_MEMORY;
+
+		status = ice_aq_discover_caps(hw, cbuf, cbuf_len, &data_size,
+					      ice_aqc_opc_list_func_caps, NULL);
+		devm_kfree(ice_hw_to_dev(hw), cbuf);
+
+		if (!status || hw->adminq.sq_last_status != ICE_AQ_RC_ENOMEM)
+			break;
+
+		/* If ENOMEM is returned, try again with bigger buffer */
+		cbuf_len = data_size;
+	} while (--retries);
+
+	return status;
+}
+
 /**
  * ice_aq_clear_pxe_mode
  * @hw: pointer to the hw struct
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index b1a7c5afe86b..87d873493bdd 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -6,6 +6,7 @@
 
 #include "ice.h"
 #include "ice_type.h"
+#include "ice_switch.h"
 
 void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf,
 		  u16 buf_len);
@@ -25,6 +26,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		struct ice_aq_desc *desc, void *buf, u16 buf_size,
 		struct ice_sq_cd *cd);
 void ice_clear_pxe_mode(struct ice_hw *hw);
+enum ice_status ice_get_caps(struct ice_hw *hw);
 bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
 enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
 void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
new file mode 100644
index 000000000000..ce4edf61ec8e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_sched.h"
+
+/**
+ * ice_aq_delete_sched_elems - delete scheduler elements
+ * @hw: pointer to the hw struct
+ * @grps_req: number of groups to delete
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_del: returns total number of elements deleted
+ * @cd: pointer to command details structure or NULL
+ *
+ * Delete scheduling elements (0x040F)
+ */
+static enum ice_status
+ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
+			  struct ice_aqc_delete_elem *buf, u16 buf_size,
+			  u16 *grps_del, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_move_delete_elem *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.add_move_delete_elem;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_delete_sched_elems);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_grps_req = cpu_to_le16(grps_req);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && grps_del)
+		*grps_del = le16_to_cpu(cmd->num_grps_updated);
+
+	return status;
+}
+
+/**
+ * ice_sched_remove_elems - remove nodes from hw
+ * @hw: pointer to the hw struct
+ * @parent: pointer to the parent node
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be deleted
+ *
+ * This function remove nodes from hw
+ */
+static enum ice_status
+ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
+		       u16 num_nodes, u32 *node_teids)
+{
+	struct ice_aqc_delete_elem *buf;
+	u16 i, num_groups_removed = 0;
+	enum ice_status status;
+	u16 buf_size;
+
+	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++)
+		buf->teid[i] = cpu_to_le32(node_teids[i]);
+	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
+					   &num_groups_removed, NULL);
+	if (status || num_groups_removed != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n");
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_get_first_node - get the first node of the given layer
+ * @hw: pointer to the hw struct
+ * @parent: pointer the base node of the subtree
+ * @layer: layer number
+ *
+ * This function retrieves the first node of the given layer from the subtree
+ */
+static struct ice_sched_node *
+ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
+			 u8 layer)
+{
+	u8 i;
+
+	if (layer < hw->sw_entry_point_layer)
+		return NULL;
+	for (i = 0; i < parent->num_children; i++) {
+		struct ice_sched_node *node = parent->children[i];
+
+		if (node) {
+			if (node->tx_sched_layer == layer)
+				return node;
+			/* this recursion is intentional, and wouldn't
+			 * go more than 9 calls
+			 */
+			return ice_sched_get_first_node(hw, node, layer);
+		}
+	}
+	return NULL;
+}
+
+/**
+ * ice_sched_get_tc_node - get pointer to TC node
+ * @pi: port information structure
+ * @tc: TC number
+ *
+ * This function returns the TC node pointer
+ */
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
+{
+	u8 i;
+
+	if (!pi)
+		return NULL;
+	for (i = 0; i < pi->root->num_children; i++)
+		if (pi->root->children[i]->tc_num == tc)
+			return pi->root->children[i];
+	return NULL;
+}
+
+/**
+ * ice_free_sched_node - Free a Tx scheduler node from SW DB
+ * @pi: port information structure
+ * @node: pointer to the ice_sched_node struct
+ *
+ * This function frees up a node from SW DB as well as from HW
+ *
+ * This function needs to be called with the port_info->sched_lock held
+ */
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+	struct ice_sched_node *parent;
+	struct ice_hw *hw = pi->hw;
+	u8 i, j;
+
+	/* Free the children before freeing up the parent node
+	 * The parent array is updated below and that shifts the nodes
+	 * in the array. So always pick the first child if num children > 0
+	 */
+	while (node->num_children)
+		ice_free_sched_node(pi, node->children[0]);
+
+	/* Leaf, TC and root nodes can't be deleted by SW */
+	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+		enum ice_status status;
+
+		status = ice_sched_remove_elems(hw, node->parent, 1, &teid);
+		if (status)
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "remove element failed %d\n", status);
+	}
+	parent = node->parent;
+	/* root has no parent */
+	if (parent) {
+		struct ice_sched_node *p, *tc_node;
+
+		/* update the parent */
+		for (i = 0; i < parent->num_children; i++)
+			if (parent->children[i] == node) {
+				for (j = i + 1; j < parent->num_children; j++)
+					parent->children[j - 1] =
+						parent->children[j];
+				parent->num_children--;
+				break;
+			}
+
+		/* search for previous sibling that points to this node and
+		 * remove the reference
+		 */
+		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
+		if (!tc_node) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "Invalid TC number %d\n", node->tc_num);
+			goto err_exit;
+		}
+		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
+		while (p) {
+			if (p->sibling == node) {
+				p->sibling = node->sibling;
+				break;
+			}
+			p = p->sibling;
+		}
+	}
+err_exit:
+	/* leaf nodes have no children */
+	if (node->children)
+		devm_kfree(ice_hw_to_dev(hw), node->children);
+	devm_kfree(ice_hw_to_dev(hw), node);
+}
+
+/**
+ * ice_aq_query_sched_res - query scheduler resource
+ * @hw: pointer to the hw struct
+ * @buf_size: buffer size in bytes
+ * @buf: pointer to buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduler resource allocation (0x0412)
+ */
+static enum ice_status
+ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
+		       struct ice_aqc_query_txsched_res_resp *buf,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sched_clear_tx_topo - clears the schduler tree nodes
+ * @pi: port information structure
+ *
+ * This function removes all the nodes from HW as well as from SW DB.
+ */
+static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_vsi_info *vsi_elem;
+	struct ice_sched_agg_info *atmp;
+	struct ice_sched_vsi_info *tmp;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return;
+
+	hw = pi->hw;
+
+	list_for_each_entry_safe(agg_info, atmp, &pi->agg_list, list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry) {
+			list_del(&agg_vsi_info->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
+		}
+	}
+
+	/* remove the vsi list */
+	list_for_each_entry_safe(vsi_elem, tmp, &pi->vsi_info_list,
+				 list_entry) {
+		list_del(&vsi_elem->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_elem);
+	}
+
+	if (pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+}
+
+/**
+ * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
+ * @pi: port information structure
+ *
+ * Cleanup scheduling elements from SW DB
+ */
+static void ice_sched_clear_port(struct ice_port_info *pi)
+{
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return;
+
+	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
+	mutex_lock(&pi->sched_lock);
+	ice_sched_clear_tx_topo(pi);
+	mutex_unlock(&pi->sched_lock);
+	mutex_destroy(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
+ * @hw: pointer to the hw struct
+ *
+ * Cleanup scheduling elements from SW DB for all the ports
+ */
+void ice_sched_cleanup_all(struct ice_hw *hw)
+{
+	if (!hw || !hw->port_info)
+		return;
+
+	if (hw->layer_info)
+		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
+
+	ice_sched_clear_port(hw->port_info);
+
+	hw->num_tx_sched_layers = 0;
+	hw->num_tx_sched_phys_layers = 0;
+	hw->flattened_layers = 0;
+	hw->max_cgds = 0;
+}
+
+/**
+ * ice_sched_query_res_alloc - query the FW for num of logical sched layers
+ * @hw: pointer to the HW struct
+ *
+ * query FW for allocated scheduler resources and store in HW struct
+ */
+enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
+{
+	struct ice_aqc_query_txsched_res_resp *buf;
+	enum ice_status status = 0;
+
+	if (hw->layer_info)
+		return status;
+
+	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
+	if (status)
+		goto sched_query_out;
+
+	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
+	hw->num_tx_sched_phys_layers =
+		le16_to_cpu(buf->sched_props.phys_levels);
+	hw->flattened_layers = buf->sched_props.flattening_bitmap;
+	hw->max_cgds = buf->sched_props.max_pf_cgds;
+
+	 hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
+				       (hw->num_tx_sched_layers *
+					sizeof(*hw->layer_info)),
+				       GFP_KERNEL);
+	if (!hw->layer_info) {
+		status = ICE_ERR_NO_MEMORY;
+		goto sched_query_out;
+	}
+
+sched_query_out:
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
new file mode 100644
index 000000000000..e329f6ec6a0e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SCHED_H_
+#define _ICE_SCHED_H_
+
+#include "ice_common.h"
+
+struct ice_sched_agg_vsi_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u16 vsi_id;
+};
+
+struct ice_sched_agg_info {
+	struct list_head agg_vsi_list;
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u32 agg_id;
+	enum ice_agg_type agg_type;
+};
+
+/* FW AQ command calls */
+enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_cleanup_all(struct ice_hw *hw);
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
+#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
new file mode 100644
index 000000000000..8fc0579b0bbb
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_switch.h"
+
+/**
+ * ice_aq_get_sw_cfg - get switch configuration
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of the buffer available for response
+ * @req_desc: pointer to requested descriptor
+ * @num_elems: pointer to number of elements
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get switch configuration (0x0200) to be placed in 'buff'.
+ * This admin command returns information such as initial VSI/port number
+ * and switch ID it belongs to.
+ *
+ * NOTE: *req_desc is both an input/output parameter.
+ * The caller of this function first calls this function with *request_desc set
+ * to 0.  If the response from f/w has *req_desc set to 0, all the switch
+ * configuration information has been returned; if non-zero (meaning not all
+ * the information was returned), the caller should call this function again
+ * with *req_desc set to the previous value returned by f/w to get the
+ * next block of switch configuration information.
+ *
+ * *num_elems is output only parameter. This reflects the number of elements
+ * in response buffer. The caller of this function to use *num_elems while
+ * parsing the response buffer.
+ */
+static enum ice_status
+ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp *buf,
+		  u16 buf_size, u16 *req_desc, u16 *num_elems,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_sw_cfg *cmd;
+	enum ice_status status;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg);
+	cmd = &desc.params.get_sw_conf;
+	cmd->element = cpu_to_le16(*req_desc);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		*req_desc = le16_to_cpu(cmd->element);
+		*num_elems = le16_to_cpu(cmd->num_elems);
+	}
+
+	return status;
+}
+
+/* ice_init_port_info - Initialize port_info with switch configuration data
+ * @pi: pointer to port_info
+ * @vsi_port_num: VSI number or port number
+ * @type: Type of switch element (port or VSI)
+ * @swid: switch ID of the switch the element is attached to
+ * @pf_vf_num: PF or VF number
+ * @is_vf: true if the element is a VF, false otherwise
+ */
+static void
+ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type,
+		   u16 swid, u16 pf_vf_num, bool is_vf)
+{
+	switch (type) {
+	case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT:
+		pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK);
+		pi->sw_id = swid;
+		pi->pf_vf_num = pf_vf_num;
+		pi->is_vf = is_vf;
+		pi->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL;
+		pi->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL;
+		break;
+	default:
+		ice_debug(pi->hw, ICE_DBG_SW,
+			  "incorrect VSI/port type received\n");
+		break;
+	}
+}
+
+/* ice_get_initial_sw_cfg - Get initial port and default VSI data
+ * @hw: pointer to the hardware structure
+ */
+enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_get_sw_cfg_resp *rbuf;
+	enum ice_status status;
+	u16 req_desc = 0;
+	u16 num_elems;
+	u16 i;
+
+	rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN,
+			    GFP_KERNEL);
+
+	if (!rbuf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Multiple calls to ice_aq_get_sw_cfg may be required
+	 * to get all the switch configuration information. The need
+	 * for additional calls is indicated by ice_aq_get_sw_cfg
+	 * writing a non-zero value in req_desc
+	 */
+	do {
+		status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN,
+					   &req_desc, &num_elems, NULL);
+
+		if (status)
+			break;
+
+		for (i = 0; i < num_elems; i++) {
+			struct ice_aqc_get_sw_cfg_resp_elem *ele;
+			u16 pf_vf_num, swid, vsi_port_num;
+			bool is_vf = false;
+			u8 type;
+
+			ele = rbuf[i].elements;
+			vsi_port_num = le16_to_cpu(ele->vsi_port_num) &
+				ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M;
+
+			pf_vf_num = le16_to_cpu(ele->pf_vf_num) &
+				ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M;
+
+			swid = le16_to_cpu(ele->swid);
+
+			if (le16_to_cpu(ele->pf_vf_num) &
+			    ICE_AQC_GET_SW_CONF_RESP_IS_VF)
+				is_vf = true;
+
+			type = le16_to_cpu(ele->vsi_port_num) >>
+				ICE_AQC_GET_SW_CONF_RESP_TYPE_S;
+
+			if (type == ICE_AQC_GET_SW_CONF_RESP_VSI) {
+				/* FW VSI is not needed. Just continue. */
+				continue;
+			}
+
+			ice_init_port_info(hw->port_info, vsi_port_num,
+					   type, swid, pf_vf_num, is_vf);
+		}
+	} while (req_desc && !status);
+
+	devm_kfree(ice_hw_to_dev(hw), (void *)rbuf);
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
new file mode 100644
index 000000000000..b98cb978a129
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SWITCH_H_
+#define _ICE_SWITCH_H_
+
+#include "ice_common.h"
+
+#define ICE_SW_CFG_MAX_BUF_LEN 2048
+#define ICE_DFLT_VSI_INVAL 0xff
+
+enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
+
+#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 31be38369a48..0b71634668bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -12,6 +12,8 @@
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
 #define ICE_DBG_NVM		BIT_ULL(7)
+#define ICE_DBG_SW		BIT_ULL(13)
+#define ICE_DBG_SCHED		BIT_ULL(14)
 #define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
 #define ICE_DBG_AQ_CMD		BIT_ULL(27)
@@ -34,6 +36,38 @@ enum ice_mac_type {
 	ICE_MAC_GENERIC,
 };
 
+/* Common HW capabilities for SW use */
+struct ice_hw_common_caps {
+	/* TX/RX queues */
+	u16 num_rxq;		/* Number/Total RX queues */
+	u16 rxq_first_id;	/* First queue ID for RX queues */
+	u16 num_txq;		/* Number/Total TX queues */
+	u16 txq_first_id;	/* First queue ID for TX queues */
+
+	/* MSI-X vectors */
+	u16 num_msix_vectors;
+	u16 msix_vector_first_id;
+
+	/* Max MTU for function or device */
+	u16 max_mtu;
+
+	/* RSS related capabilities */
+	u16 rss_table_size;		/* 512 for PFs and 64 for VFs */
+	u8 rss_table_entry_width;	/* RSS Entry width in bits */
+};
+
+/* Function specific capabilities */
+struct ice_hw_func_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 guaranteed_num_vsi;
+};
+
+/* Device wide capabilities */
+struct ice_hw_dev_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
+};
+
 /* Various RESET request, These are not tied with HW reset types */
 enum ice_reset_req {
 	ICE_RESET_PFR	= 0,
@@ -56,10 +90,76 @@ struct ice_nvm_info {
 	bool blank_nvm_mode;      /* is NVM empty (no FW present) */
 };
 
+/* Max number of port to queue branches w.r.t topology */
+#define ICE_MAX_TRAFFIC_CLASS 8
+
+struct ice_sched_node {
+	struct ice_sched_node *parent;
+	struct ice_sched_node *sibling; /* next sibling in the same layer */
+	struct ice_sched_node **children;
+	struct ice_aqc_txsched_elem_data info;
+	u32 agg_id;			/* aggregator group id */
+	u16 vsi_id;
+	bool in_use;			/* suspended or in use */
+	u8 tx_sched_layer;		/* Logical Layer (1-9) */
+	u8 num_children;
+	u8 tc_num;
+	u8 owner;
+#define ICE_SCHED_NODE_OWNER_LAN	0
+};
+
+/* The aggregator type determines if identifier is for a VSI group,
+ * aggregator group, aggregator of queues, or queue group.
+ */
+enum ice_agg_type {
+	ICE_AGG_TYPE_UNKNOWN = 0,
+	ICE_AGG_TYPE_VSI,
+	ICE_AGG_TYPE_AGG, /* aggregator */
+	ICE_AGG_TYPE_Q,
+	ICE_AGG_TYPE_QG
+};
+
+/* vsi type list entry to locate corresponding vsi/ag nodes */
+struct ice_sched_vsi_info {
+	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
+	struct list_head list_entry;
+	u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+	u16 vsi_id;
+};
+
+/* driver defines the policy */
+struct ice_sched_tx_policy {
+	u16 max_num_vsis;
+	u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS];
+	bool rdma_ena;
+};
+
+struct ice_port_info {
+	struct ice_sched_node *root;	/* Root Node per Port */
+	struct ice_hw *hw;		/* back pointer to hw instance */
+	u16 sw_id;			/* Initial switch ID belongs to port */
+	u16 pf_vf_num;
+	u8 port_state;
+#define ICE_SCHED_PORT_STATE_INIT	0x0
+#define ICE_SCHED_PORT_STATE_READY	0x1
+	u16 dflt_tx_vsi_num;
+	u16 dflt_rx_vsi_num;
+	struct mutex sched_lock;	/* protect access to TXSched tree */
+	struct ice_sched_tx_policy sched_policy;
+	struct list_head vsi_info_list;
+	struct list_head agg_list;	/* lists all aggregator */
+	u8 lport;
+#define ICE_LPORT_MASK		0xff
+	bool is_vf;
+};
+
 /* Port hardware description */
 struct ice_hw {
 	u8 __iomem *hw_addr;
 	void *back;
+	struct ice_aqc_layer_props *layer_info;
+	struct ice_port_info *port_info;
 	u64 debug_mask;		/* bitmap for debug mask */
 	enum ice_mac_type mac_type;
 
@@ -72,8 +172,17 @@ struct ice_hw {
 
 	u8 pf_id;		/* device profile info */
 
+	/* TX Scheduler values */
+	u16 num_tx_sched_layers;
+	u16 num_tx_sched_phys_layers;
+	u8 flattened_layers;
+	u8 max_cgds;
+	u8 sw_entry_point_layer;
+
 	struct ice_bus_info bus;
 	struct ice_nvm_info nvm;
+	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
+	struct ice_hw_func_caps func_caps;	/* function capabilities */
 
 	/* Control Queue info */
 	struct ice_ctl_q_info adminq;

From dc49c77236769c571e77d49450b2dfc001d60e33 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:09 -0700
Subject: [PATCH 1255/1678] ice: Get MAC/PHY/link info and scheduler topology

This patch adds code to continue the initialization flow as follows:

1) Get PHY/link information and store it
2) Get default scheduler tree topology and store it
3) Get the MAC address associated with the port and store it

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |   1 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   | 261 ++++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.c   | 264 ++++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |   3 +
 drivers/net/ethernet/intel/ice/ice_sched.c    | 328 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_sched.h    |   6 +
 drivers/net/ethernet/intel/ice/ice_status.h   |   1 +
 drivers/net/ethernet/intel/ice/ice_type.h     |  65 ++++
 8 files changed, 929 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 716c555532f4..1a1776a2a5a1 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/compiler.h>
+#include <linux/etherdevice.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/delay.h>
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index d1b70f0ed0e2..5061240996b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -104,6 +104,35 @@ struct ice_aqc_list_caps_elem {
 	__le64 rsvd2;
 };
 
+/* Manage MAC address, read command - indirect (0x0107)
+ * This struct is also used for the response
+ */
+struct ice_aqc_manage_mac_read {
+	__le16 flags; /* Zeroed by device driver */
+#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID		BIT(4)
+#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID		BIT(5)
+#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID		BIT(6)
+#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
+#define ICE_AQC_MAN_MAC_READ_S			4
+#define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID	BIT(0)
+	u8 num_addr; /* Used in response */
+	u8 reserved[3];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response buffer format for manage MAC read command */
+struct ice_aqc_manage_mac_read_resp {
+	u8 lport_num;
+	u8 addr_type;
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN		0
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL		1
+	u8 mac_addr[ETH_ALEN];
+};
+
 /* Clear PXE Command and response (direct 0x0110) */
 struct ice_aqc_clear_pxe {
 	u8 rx_cnt;
@@ -161,6 +190,16 @@ struct ice_aqc_get_sw_cfg_resp {
 	struct ice_aqc_get_sw_cfg_resp_elem elements[1];
 };
 
+/* Get Default Topology (indirect 0x0400) */
+struct ice_aqc_get_topo {
+	u8 port_num;
+	u8 num_branches;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* Add TSE (indirect 0x0401)
  * Delete TSE (indirect 0x040F)
  * Move TSE (indirect 0x0408)
@@ -221,6 +260,12 @@ struct ice_aqc_txsched_topo_grp_info_hdr {
 	__le16 reserved2;
 };
 
+struct ice_aqc_get_topo_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data
+		generic[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
 struct ice_aqc_delete_elem {
 	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
 	__le32 teid[1];
@@ -266,6 +311,210 @@ struct ice_aqc_query_txsched_res_resp {
 	struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
 };
 
+/* Get PHY capabilities (indirect 0x0600) */
+struct ice_aqc_get_phy_caps {
+	u8 lport_num;
+	u8 reserved;
+	__le16 param0;
+	/* 18.0 - Report qualified modules */
+#define ICE_AQC_GET_PHY_RQM		BIT(0)
+	/* 18.1 - 18.2 : Report mode
+	 * 00b - Report NVM capabilities
+	 * 01b - Report topology capabilities
+	 * 10b - Report SW configured
+	 */
+#define ICE_AQC_REPORT_MODE_S		1
+#define ICE_AQC_REPORT_MODE_M		(3 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_NVM_CAP		0
+#define ICE_AQC_REPORT_TOPO_CAP		BIT(1)
+#define ICE_AQC_REPORT_SW_CFG		BIT(2)
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is #define of PHY type (Extended):
+ * The first set of defines is for phy_type_low.
+ */
+#define ICE_PHY_TYPE_LOW_100BASE_TX		BIT_ULL(0)
+#define ICE_PHY_TYPE_LOW_100M_SGMII		BIT_ULL(1)
+#define ICE_PHY_TYPE_LOW_1000BASE_T		BIT_ULL(2)
+#define ICE_PHY_TYPE_LOW_1000BASE_SX		BIT_ULL(3)
+#define ICE_PHY_TYPE_LOW_1000BASE_LX		BIT_ULL(4)
+#define ICE_PHY_TYPE_LOW_1000BASE_KX		BIT_ULL(5)
+#define ICE_PHY_TYPE_LOW_1G_SGMII		BIT_ULL(6)
+#define ICE_PHY_TYPE_LOW_2500BASE_T		BIT_ULL(7)
+#define ICE_PHY_TYPE_LOW_2500BASE_X		BIT_ULL(8)
+#define ICE_PHY_TYPE_LOW_2500BASE_KX		BIT_ULL(9)
+#define ICE_PHY_TYPE_LOW_5GBASE_T		BIT_ULL(10)
+#define ICE_PHY_TYPE_LOW_5GBASE_KR		BIT_ULL(11)
+#define ICE_PHY_TYPE_LOW_10GBASE_T		BIT_ULL(12)
+#define ICE_PHY_TYPE_LOW_10G_SFI_DA		BIT_ULL(13)
+#define ICE_PHY_TYPE_LOW_10GBASE_SR		BIT_ULL(14)
+#define ICE_PHY_TYPE_LOW_10GBASE_LR		BIT_ULL(15)
+#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1		BIT_ULL(16)
+#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC	BIT_ULL(17)
+#define ICE_PHY_TYPE_LOW_10G_SFI_C2C		BIT_ULL(18)
+#define ICE_PHY_TYPE_LOW_25GBASE_T		BIT_ULL(19)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR		BIT_ULL(20)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR_S		BIT_ULL(21)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR1		BIT_ULL(22)
+#define ICE_PHY_TYPE_LOW_25GBASE_SR		BIT_ULL(23)
+#define ICE_PHY_TYPE_LOW_25GBASE_LR		BIT_ULL(24)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR		BIT_ULL(25)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR_S		BIT_ULL(26)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR1		BIT_ULL(27)
+#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC	BIT_ULL(28)
+#define ICE_PHY_TYPE_LOW_25G_AUI_C2C		BIT_ULL(29)
+#define ICE_PHY_TYPE_LOW_40GBASE_CR4		BIT_ULL(30)
+#define ICE_PHY_TYPE_LOW_40GBASE_SR4		BIT_ULL(31)
+#define ICE_PHY_TYPE_LOW_40GBASE_LR4		BIT_ULL(32)
+#define ICE_PHY_TYPE_LOW_40GBASE_KR4		BIT_ULL(33)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC	BIT_ULL(34)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI		BIT_ULL(35)
+#define ICE_PHY_TYPE_LOW_MAX_INDEX		63
+
+struct ice_aqc_get_phy_caps_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 reserved;
+	u8 caps;
+#define ICE_AQC_PHY_EN_TX_LINK_PAUSE			BIT(0)
+#define ICE_AQC_PHY_EN_RX_LINK_PAUSE			BIT(1)
+#define ICE_AQC_PHY_LOW_POWER_MODE			BIT(2)
+#define ICE_AQC_PHY_EN_LINK				BIT(3)
+#define ICE_AQC_PHY_AN_MODE				BIT(4)
+#define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+	u8 low_power_ctrl;
+#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
+	__le16 eee_cap;
+#define ICE_AQC_PHY_EEE_EN_100BASE_TX			BIT(0)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_T			BIT(1)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_T			BIT(2)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_KX			BIT(3)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_KR			BIT(4)
+#define ICE_AQC_PHY_EEE_EN_25GBASE_KR			BIT(5)
+#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
+	__le16 eeer_value;
+	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 link_fec_options;
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
+#define ICE_AQC_PHY_FEC_25G_RS_528_REQ			BIT(2)
+#define ICE_AQC_PHY_FEC_25G_KR_REQ			BIT(3)
+#define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
+#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
+#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+	u8 extended_compliance_code;
+#define ICE_MODULE_TYPE_TOTAL_BYTE			3
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS		0x80
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE	BIT(0)
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE	BIT(1)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR		BIT(4)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR		BIT(5)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM		BIT(6)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER		BIT(7)
+#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
+	u8 qualified_module_count;
+#define ICE_AQC_QUAL_MOD_COUNT_MAX			16
+	struct {
+		u8 v_oui[3];
+		u8 rsvd1;
+		u8 v_part[16];
+		__le32 v_rev;
+		__le64 rsvd8;
+	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
+};
+
+/* Get link status (indirect 0x0607), also used for Link Status Event */
+struct ice_aqc_get_link_status {
+	u8 lport_num;
+	u8 reserved;
+	__le16 cmd_flags;
+#define ICE_AQ_LSE_M			0x3
+#define ICE_AQ_LSE_NOP			0x0
+#define ICE_AQ_LSE_DIS			0x2
+#define ICE_AQ_LSE_ENA			0x3
+	/* only response uses this flag */
+#define ICE_AQ_LSE_IS_ENABLED		0x1
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get link status response data structure, also used for Link Status Event */
+struct ice_aqc_get_link_status_data {
+	u8 topo_media_conflict;
+#define ICE_AQ_LINK_TOPO_CONFLICT	BIT(0)
+#define ICE_AQ_LINK_MEDIA_CONFLICT	BIT(1)
+#define ICE_AQ_LINK_TOPO_CORRUPT	BIT(2)
+	u8 reserved1;
+	u8 link_info;
+#define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
+#define ICE_AQ_LINK_FAULT		BIT(1)
+#define ICE_AQ_LINK_FAULT_TX		BIT(2)
+#define ICE_AQ_LINK_FAULT_RX		BIT(3)
+#define ICE_AQ_LINK_FAULT_REMOTE	BIT(4)
+#define ICE_AQ_LINK_UP_PORT		BIT(5)	/* External Port Link Status */
+#define ICE_AQ_MEDIA_AVAILABLE		BIT(6)
+#define ICE_AQ_SIGNAL_DETECT		BIT(7)
+	u8 an_info;
+#define ICE_AQ_AN_COMPLETED		BIT(0)
+#define ICE_AQ_LP_AN_ABILITY		BIT(1)
+#define ICE_AQ_PD_FAULT			BIT(2)	/* Parallel Detection Fault */
+#define ICE_AQ_FEC_EN			BIT(3)
+#define ICE_AQ_PHY_LOW_POWER		BIT(4)	/* Low Power State */
+#define ICE_AQ_LINK_PAUSE_TX		BIT(5)
+#define ICE_AQ_LINK_PAUSE_RX		BIT(6)
+#define ICE_AQ_QUALIFIED_MODULE		BIT(7)
+	u8 ext_info;
+#define ICE_AQ_LINK_PHY_TEMP_ALARM	BIT(0)
+#define ICE_AQ_LINK_EXCESSIVE_ERRORS	BIT(1)	/* Excessive Link Errors */
+	/* Port TX Suspended */
+#define ICE_AQ_LINK_TX_S		2
+#define ICE_AQ_LINK_TX_M		(0x03 << ICE_AQ_LINK_TX_S)
+#define ICE_AQ_LINK_TX_ACTIVE		0
+#define ICE_AQ_LINK_TX_DRAINED		1
+#define ICE_AQ_LINK_TX_FLUSHED		3
+	u8 reserved2;
+	__le16 max_frame_size;
+	u8 cfg;
+#define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
+#define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
+#define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+	/* Pacing Config */
+#define ICE_AQ_CFG_PACING_S		3
+#define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
+#define ICE_AQ_CFG_PACING_TYPE_M	BIT(7)
+#define ICE_AQ_CFG_PACING_TYPE_AVG	0
+#define ICE_AQ_CFG_PACING_TYPE_FIXED	ICE_AQ_CFG_PACING_TYPE_M
+	/* External Device Power Ability */
+	u8 power_desc;
+#define ICE_AQ_PWR_CLASS_M		0x3
+#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH	0
+#define ICE_AQ_LINK_PWR_BASET_HIGH	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_1	0
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_2	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_3	2
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_4	3
+	__le16 link_speed;
+#define ICE_AQ_LINK_SPEED_10MB		BIT(0)
+#define ICE_AQ_LINK_SPEED_100MB		BIT(1)
+#define ICE_AQ_LINK_SPEED_1000MB	BIT(2)
+#define ICE_AQ_LINK_SPEED_2500MB	BIT(3)
+#define ICE_AQ_LINK_SPEED_5GB		BIT(4)
+#define ICE_AQ_LINK_SPEED_10GB		BIT(5)
+#define ICE_AQ_LINK_SPEED_20GB		BIT(6)
+#define ICE_AQ_LINK_SPEED_25GB		BIT(7)
+#define ICE_AQ_LINK_SPEED_40GB		BIT(8)
+#define ICE_AQ_LINK_SPEED_UNKNOWN	BIT(15)
+	__le32 reserved3; /* Aligns next field to 8-byte boundary */
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 reserved4;
+};
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
@@ -318,12 +567,16 @@ struct ice_aq_desc {
 		struct ice_aqc_get_ver get_ver;
 		struct ice_aqc_q_shutdown q_shutdown;
 		struct ice_aqc_req_res res_owner;
+		struct ice_aqc_manage_mac_read mac_read;
 		struct ice_aqc_clear_pxe clear_pxe;
 		struct ice_aqc_list_caps get_cap;
+		struct ice_aqc_get_phy_caps get_phy;
 		struct ice_aqc_get_sw_cfg get_sw_conf;
+		struct ice_aqc_get_topo get_topo;
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_get_link_status get_link_status;
 	} params;
 };
 
@@ -362,6 +615,9 @@ enum ice_adminq_opc {
 	ice_aqc_opc_list_func_caps			= 0x000A,
 	ice_aqc_opc_list_dev_caps			= 0x000B,
 
+	/* manage MAC address */
+	ice_aqc_opc_manage_mac_read			= 0x0107,
+
 	/* PXE */
 	ice_aqc_opc_clear_pxe_mode			= 0x0110,
 
@@ -371,9 +627,14 @@ enum ice_adminq_opc {
 	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
 
 	/* transmit scheduler commands */
+	ice_aqc_opc_get_dflt_topo			= 0x0400,
 	ice_aqc_opc_delete_sched_elems			= 0x040F,
 	ice_aqc_opc_query_sched_res			= 0x0412,
 
+	/* PHY commands */
+	ice_aqc_opc_get_phy_caps			= 0x0600,
+	ice_aqc_opc_get_link_status			= 0x0607,
+
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index f9567dc1aefd..ef5bab25c2f5 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -36,13 +36,238 @@ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw)
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
 }
 
+/**
+ * ice_aq_manage_mac_read - manage MAC address read command
+ * @hw: pointer to the hw struct
+ * @buf: a virtual buffer to hold the manage MAC read response
+ * @buf_size: Size of the virtual buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to return per PF station MAC address (0x0107).
+ * NOTE: Upon successful completion of this command, MAC address information
+ * is returned in user specified buffer. Please interpret user specified
+ * buffer as "manage_mac_read" response.
+ * Response such as various MAC addresses are stored in HW struct (port.mac)
+ * ice_aq_discover_caps is expected to be called before this function is called.
+ */
+static enum ice_status
+ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_read_resp *resp;
+	struct ice_aqc_manage_mac_read *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 flags;
+
+	cmd = &desc.params.mac_read;
+
+	if (buf_size < sizeof(*resp))
+		return ICE_ERR_BUF_TOO_SHORT;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (status)
+		return status;
+
+	resp = (struct ice_aqc_manage_mac_read_resp *)buf;
+	flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
+
+	if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
+		ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n");
+		return ICE_ERR_CFG;
+	}
+
+	ether_addr_copy(hw->port_info->mac.lan_addr, resp->mac_addr);
+	ether_addr_copy(hw->port_info->mac.perm_addr, resp->mac_addr);
+	return 0;
+}
+
+/**
+ * ice_aq_get_phy_caps - returns PHY capabilities
+ * @pi: port information structure
+ * @qual_mods: report qualified modules
+ * @report_mode: report mode capabilities
+ * @pcaps: structure for PHY capabilities to be filled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Returns the various PHY capabilities supported on the Port (0x0600)
+ */
+static enum ice_status
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *pcaps,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_phy_caps *cmd;
+	u16 pcaps_size = sizeof(*pcaps);
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_phy;
+
+	if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
+
+	if (qual_mods)
+		cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM);
+
+	cmd->param0 |= cpu_to_le16(report_mode);
+	status = ice_aq_send_cmd(pi->hw, &desc, pcaps, pcaps_size, cd);
+
+	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP)
+		pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
+
+	return status;
+}
+
+/**
+ * ice_get_media_type - Gets media type
+ * @pi: port information structure
+ */
+static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
+{
+	struct ice_link_status *hw_link_info;
+
+	if (!pi)
+		return ICE_MEDIA_UNKNOWN;
+
+	hw_link_info = &pi->phy.link_info;
+
+	if (hw_link_info->phy_type_low) {
+		switch (hw_link_info->phy_type_low) {
+		case ICE_PHY_TYPE_LOW_1000BASE_SX:
+		case ICE_PHY_TYPE_LOW_1000BASE_LX:
+		case ICE_PHY_TYPE_LOW_10GBASE_SR:
+		case ICE_PHY_TYPE_LOW_10GBASE_LR:
+		case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		case ICE_PHY_TYPE_LOW_25GBASE_SR:
+		case ICE_PHY_TYPE_LOW_25GBASE_LR:
+		case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+			return ICE_MEDIA_FIBER;
+		case ICE_PHY_TYPE_LOW_100BASE_TX:
+		case ICE_PHY_TYPE_LOW_1000BASE_T:
+		case ICE_PHY_TYPE_LOW_2500BASE_T:
+		case ICE_PHY_TYPE_LOW_5GBASE_T:
+		case ICE_PHY_TYPE_LOW_10GBASE_T:
+		case ICE_PHY_TYPE_LOW_25GBASE_T:
+			return ICE_MEDIA_BASET;
+		case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+		case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+			return ICE_MEDIA_DA;
+		case ICE_PHY_TYPE_LOW_1000BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_X:
+		case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+		case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+			return ICE_MEDIA_BACKPLANE;
+		}
+	}
+
+	return ICE_MEDIA_UNKNOWN;
+}
+
+/**
+ * ice_aq_get_link_info
+ * @pi: port information structure
+ * @ena_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Link Status (0x607). Returns the link status of the adapter.
+ */
+enum ice_status
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd)
+{
+	struct ice_link_status *hw_link_info_old, *hw_link_info;
+	struct ice_aqc_get_link_status_data link_data = { 0 };
+	struct ice_aqc_get_link_status *resp;
+	enum ice_media_type *hw_media_type;
+	struct ice_fc_info *hw_fc_info;
+	bool tx_pause, rx_pause;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 cmd_flags;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw_link_info_old = &pi->phy.link_info_old;
+	hw_media_type = &pi->phy.media_type;
+	hw_link_info = &pi->phy.link_info;
+	hw_fc_info = &pi->fc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
+	cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS;
+	resp = &desc.params.get_link_status;
+	resp->cmd_flags = cpu_to_le16(cmd_flags);
+	resp->lport_num = pi->lport;
+
+	status = ice_aq_send_cmd(pi->hw, &desc, &link_data, sizeof(link_data),
+				 cd);
+
+	if (status)
+		return status;
+
+	/* save off old link status information */
+	*hw_link_info_old = *hw_link_info;
+
+	/* update current link status information */
+	hw_link_info->link_speed = le16_to_cpu(link_data.link_speed);
+	hw_link_info->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	*hw_media_type = ice_get_media_type(pi);
+	hw_link_info->link_info = link_data.link_info;
+	hw_link_info->an_info = link_data.an_info;
+	hw_link_info->ext_info = link_data.ext_info;
+	hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
+
+	/* update fc info */
+	tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
+	rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX);
+	if (tx_pause && rx_pause)
+		hw_fc_info->current_mode = ICE_FC_FULL;
+	else if (tx_pause)
+		hw_fc_info->current_mode = ICE_FC_TX_PAUSE;
+	else if (rx_pause)
+		hw_fc_info->current_mode = ICE_FC_RX_PAUSE;
+	else
+		hw_fc_info->current_mode = ICE_FC_NONE;
+
+	hw_link_info->lse_ena =
+		!!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+	/* save link status information */
+	if (link)
+		*link = *hw_link_info;
+
+	/* flag cleared so calling functions don't call AQ again */
+	pi->phy.get_link_info = false;
+
+	return status;
+}
+
 /**
  * ice_init_hw - main hardware initialization routine
  * @hw: pointer to the hardware structure
  */
 enum ice_status ice_init_hw(struct ice_hw *hw)
 {
+	struct ice_aqc_get_phy_caps_data *pcaps;
 	enum ice_status status;
+	u16 mac_buf_len;
+	void *mac_buf;
 
 	/* Set MAC type based on DeviceID */
 	status = ice_set_mac_type(hw);
@@ -98,8 +323,46 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 		goto err_unroll_alloc;
 	}
 
+	/* Initialize port_info struct with scheduler data */
+	status = ice_sched_init_port(hw->port_info);
+	if (status)
+		goto err_unroll_sched;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps) {
+		status = ICE_ERR_NO_MEMORY;
+		goto err_unroll_sched;
+	}
+
+	/* Initialize port_info struct with PHY capabilities */
+	status = ice_aq_get_phy_caps(hw->port_info, false,
+				     ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL);
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Initialize port_info struct with link information */
+	status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Get port MAC information */
+	mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp);
+	mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL);
+
+	if (!mac_buf)
+		goto err_unroll_sched;
+
+	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
+	devm_kfree(ice_hw_to_dev(hw), mac_buf);
+
+	if (status)
+		goto err_unroll_sched;
+
 	return 0;
 
+err_unroll_sched:
+	ice_sched_cleanup_all(hw);
 err_unroll_alloc:
 	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
 err_unroll_cqinit:
@@ -115,6 +378,7 @@ void ice_deinit_hw(struct ice_hw *hw)
 {
 	ice_sched_cleanup_all(hw);
 	ice_shutdown_all_ctrlq(hw);
+
 	if (hw->port_info) {
 		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
 		hw->port_info = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 87d873493bdd..e0a7aa1dd943 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -35,4 +35,7 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
 		void *buf, u16 buf_size, struct ice_sq_cd *cd);
 enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
 enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
+enum ice_status
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd);
 #endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index ce4edf61ec8e..22039f9eb591 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -3,6 +3,141 @@
 
 #include "ice_sched.h"
 
+/**
+ * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
+ * @pi: port information structure
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts the root node of the scheduling tree topology
+ * to the SW DB.
+ */
+static enum ice_status
+ice_sched_add_root_node(struct ice_port_info *pi,
+			struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_sched_node *root;
+	struct ice_hw *hw;
+	u16 max_children;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ICE_ERR_NO_MEMORY;
+
+	max_children = le16_to_cpu(hw->layer_info[0].max_children);
+	root->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
+				      sizeof(*root), GFP_KERNEL);
+	if (!root->children) {
+		devm_kfree(ice_hw_to_dev(hw), root);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	memcpy(&root->info, info, sizeof(*info));
+	pi->root = root;
+	return 0;
+}
+
+/**
+ * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
+ * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
+ * @teid: node teid to search
+ *
+ * This function searches for a node matching the teid in the scheduling tree
+ * from the SW DB. The search is recursive and is restricted by the number of
+ * layers it has searched through; stopping at the max supported layer.
+ *
+ * This function needs to be called when holding the port_info->sched_lock
+ */
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
+{
+	u16 i;
+
+	/* The TEID is same as that of the start_node */
+	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
+		return start_node;
+
+	/* The node has no children or is at the max layer */
+	if (!start_node->num_children ||
+	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
+	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
+		return NULL;
+
+	/* Check if teid matches to any of the children nodes */
+	for (i = 0; i < start_node->num_children; i++)
+		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
+			return start_node->children[i];
+
+	/* Search within each child's sub-tree */
+	for (i = 0; i < start_node->num_children; i++) {
+		struct ice_sched_node *tmp;
+
+		tmp = ice_sched_find_node_by_teid(start_node->children[i],
+						  teid);
+		if (tmp)
+			return tmp;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_sched_add_node - Insert the Tx scheduler node in SW DB
+ * @pi: port information structure
+ * @layer: Scheduler layer of the node
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts a scheduler node to the SW DB.
+ */
+enum ice_status
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_sched_node *parent;
+	struct ice_sched_node *node;
+	struct ice_hw *hw;
+	u16 max_children;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	/* A valid parent node should be there */
+	parent = ice_sched_find_node_by_teid(pi->root,
+					     le32_to_cpu(info->parent_teid));
+	if (!parent) {
+		ice_debug(hw, ICE_DBG_SCHED,
+			  "Parent Node not found for parent_teid=0x%x\n",
+			  le32_to_cpu(info->parent_teid));
+		return ICE_ERR_PARAM;
+	}
+
+	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ICE_ERR_NO_MEMORY;
+	max_children = le16_to_cpu(hw->layer_info[layer].max_children);
+	if (max_children) {
+		node->children = devm_kcalloc(ice_hw_to_dev(hw), max_children,
+					      sizeof(*node), GFP_KERNEL);
+		if (!node->children) {
+			devm_kfree(ice_hw_to_dev(hw), node);
+			return ICE_ERR_NO_MEMORY;
+		}
+	}
+
+	node->in_use = true;
+	node->parent = parent;
+	node->tx_sched_layer = layer;
+	parent->children[parent->num_children++] = node;
+	memcpy(&node->info, info, sizeof(*info));
+	return 0;
+}
+
 /**
  * ice_aq_delete_sched_elems - delete scheduler elements
  * @hw: pointer to the hw struct
@@ -194,6 +329,36 @@ err_exit:
 	devm_kfree(ice_hw_to_dev(hw), node);
 }
 
+/**
+ * ice_aq_get_dflt_topo - gets default scheduler topology
+ * @hw: pointer to the hw struct
+ * @lport: logical port number
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_branches: returns total number of queue to port branches
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get default scheduler topology (0x400)
+ */
+static enum ice_status
+ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
+		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
+		     u8 *num_branches, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_topo *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_topo;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
+	cmd->port_num = lport;
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_branches)
+		*num_branches = cmd->num_branches;
+
+	return status;
+}
+
 /**
  * ice_aq_query_sched_res - query scheduler resource
  * @hw: pointer to the hw struct
@@ -297,6 +462,169 @@ void ice_sched_cleanup_all(struct ice_hw *hw)
 	hw->max_cgds = 0;
 }
 
+/**
+ * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
+ * @pi: port information structure
+ *
+ * This function removes the leaf node that was created by the FW
+ * during initialization
+ */
+static void
+ice_rm_dflt_leaf_node(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	node = pi->root;
+	while (node) {
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+		enum ice_status status;
+
+		/* remove the default leaf node */
+		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
+		if (!status)
+			ice_free_sched_node(pi, node);
+	}
+}
+
+/**
+ * ice_sched_rm_dflt_nodes - free the default nodes in the tree
+ * @pi: port information structure
+ *
+ * This function frees all the nodes except root and TC that were created by
+ * the FW during initialization
+ */
+static void
+ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	ice_rm_dflt_leaf_node(pi);
+	/* remove the default nodes except TC and root nodes */
+	node = pi->root;
+	while (node) {
+		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
+			ice_free_sched_node(pi, node);
+			break;
+		}
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+}
+
+/**
+ * ice_sched_init_port - Initialize scheduler by querying information from FW
+ * @pi: port info structure for the tree to cleanup
+ *
+ * This function is the initial call to find the total number of Tx scheduler
+ * resources, default topology created by firmware and storing the information
+ * in SW DB.
+ */
+enum ice_status ice_sched_init_port(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_topo_elem *buf;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 num_branches;
+	u16 num_elems;
+	u8 i, j;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+
+	/* Query the Default Topology from FW */
+	buf = devm_kcalloc(ice_hw_to_dev(hw), ICE_TXSCHED_MAX_BRANCHES,
+			   sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Query default scheduling tree topology */
+	status = ice_aq_get_dflt_topo(hw, pi->lport, buf,
+				      sizeof(*buf) * ICE_TXSCHED_MAX_BRANCHES,
+				      &num_branches, NULL);
+	if (status)
+		goto err_init_port;
+
+	/* num_branches should be between 1-8 */
+	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
+			  num_branches);
+		status = ICE_ERR_PARAM;
+		goto err_init_port;
+	}
+
+	/* get the number of elements on the default/first branch */
+	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
+
+	/* num_elems should always be between 1-9 */
+	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
+			  num_elems);
+		status = ICE_ERR_PARAM;
+		goto err_init_port;
+	}
+
+	/* If the last node is a leaf node then the index of the Q group
+	 * layer is two less than the number of elements.
+	 */
+	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
+	    ICE_AQC_ELEM_TYPE_LEAF)
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
+	else
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
+
+	/* Insert the Tx Sched root node */
+	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
+	if (status)
+		goto err_init_port;
+
+	/* Parse the default tree and cache the information */
+	for (i = 0; i < num_branches; i++) {
+		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
+
+		/* Skip root element as already inserted */
+		for (j = 1; j < num_elems; j++) {
+			/* update the sw entry point */
+			if (buf[0].generic[j].data.elem_type ==
+			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
+				hw->sw_entry_point_layer = j;
+
+			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
+			if (status)
+				goto err_init_port;
+		}
+	}
+
+	/* Remove the default nodes. */
+	if (pi->root)
+		ice_sched_rm_dflt_nodes(pi);
+
+	/* initialize the port for handling the scheduler tree */
+	pi->port_state = ICE_SCHED_PORT_STATE_READY;
+	mutex_init(&pi->sched_lock);
+	INIT_LIST_HEAD(&pi->agg_list);
+	INIT_LIST_HEAD(&pi->vsi_info_list);
+
+err_init_port:
+	if (status && pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
 /**
  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
  * @hw: pointer to the HW struct
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index e329f6ec6a0e..2926ee9c373e 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -21,8 +21,14 @@ struct ice_sched_agg_info {
 };
 
 /* FW AQ command calls */
+enum ice_status ice_sched_init_port(struct ice_port_info *pi);
 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw);
 void ice_sched_cleanup_all(struct ice_hw *hw);
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
+enum ice_status
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info);
 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
 #endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 7e8049efac1c..144712db1ad2 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -15,6 +15,7 @@ enum ice_status {
 	ICE_ERR_NO_MEMORY			= -11,
 	ICE_ERR_CFG				= -12,
 	ICE_ERR_OUT_OF_RANGE			= -13,
+	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,
 	ICE_ERR_AQ_TIMEOUT			= -101,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 0b71634668bb..6024c80bfa6b 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -12,6 +12,7 @@
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
 #define ICE_DBG_NVM		BIT_ULL(7)
+#define ICE_DBG_LAN		BIT_ULL(8)
 #define ICE_DBG_SW		BIT_ULL(13)
 #define ICE_DBG_SCHED		BIT_ULL(14)
 #define ICE_DBG_RES		BIT_ULL(17)
@@ -30,12 +31,56 @@ enum ice_aq_res_access_type {
 	ICE_RES_WRITE
 };
 
+enum ice_fc_mode {
+	ICE_FC_NONE = 0,
+	ICE_FC_RX_PAUSE,
+	ICE_FC_TX_PAUSE,
+	ICE_FC_FULL,
+	ICE_FC_PFC,
+	ICE_FC_DFLT
+};
+
 /* Various MAC types */
 enum ice_mac_type {
 	ICE_MAC_UNKNOWN = 0,
 	ICE_MAC_GENERIC,
 };
 
+/* Media Types */
+enum ice_media_type {
+	ICE_MEDIA_UNKNOWN = 0,
+	ICE_MEDIA_FIBER,
+	ICE_MEDIA_BASET,
+	ICE_MEDIA_BACKPLANE,
+	ICE_MEDIA_DA,
+};
+
+struct ice_link_status {
+	/* Refer to ice_aq_phy_type for bits definition */
+	u64 phy_type_low;
+	u16 max_frame_size;
+	u16 link_speed;
+	bool lse_ena;	/* Link Status Event notification */
+	u8 link_info;
+	u8 an_info;
+	u8 ext_info;
+	u8 pacing;
+	u8 req_speeds;
+	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
+	 * ice_aqc_get_phy_caps structure
+	 */
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+};
+
+/* PHY info such as phy_type, etc... */
+struct ice_phy_info {
+	struct ice_link_status link_info;
+	struct ice_link_status link_info_old;
+	u64 phy_type_low;
+	enum ice_media_type media_type;
+	bool get_link_info;
+};
+
 /* Common HW capabilities for SW use */
 struct ice_hw_common_caps {
 	/* TX/RX queues */
@@ -68,6 +113,12 @@ struct ice_hw_dev_caps {
 	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
 };
 
+/* MAC info */
+struct ice_mac_info {
+	u8 lan_addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+};
+
 /* Various RESET request, These are not tied with HW reset types */
 enum ice_reset_req {
 	ICE_RESET_PFR	= 0,
@@ -81,6 +132,12 @@ struct ice_bus_info {
 	u8 func;
 };
 
+/* Flow control (FC) parameters */
+struct ice_fc_info {
+	enum ice_fc_mode current_mode;	/* FC mode in effect */
+	enum ice_fc_mode req_mode;	/* FC mode requested by caller */
+};
+
 /* NVM Information */
 struct ice_nvm_info {
 	u32 eetrack;              /* NVM data version */
@@ -92,6 +149,7 @@ struct ice_nvm_info {
 
 /* Max number of port to queue branches w.r.t topology */
 #define ICE_MAX_TRAFFIC_CLASS 8
+#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
 
 struct ice_sched_node {
 	struct ice_sched_node *parent;
@@ -108,6 +166,9 @@ struct ice_sched_node {
 #define ICE_SCHED_NODE_OWNER_LAN	0
 };
 
+/* Access Macros for Tx Sched Elements data */
+#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid)
+
 /* The aggregator type determines if identifier is for a VSI group,
  * aggregator group, aggregator of queues, or queue group.
  */
@@ -138,6 +199,7 @@ struct ice_sched_tx_policy {
 struct ice_port_info {
 	struct ice_sched_node *root;	/* Root Node per Port */
 	struct ice_hw *hw;		/* back pointer to hw instance */
+	u32 last_node_teid;		/* scheduler last node info */
 	u16 sw_id;			/* Initial switch ID belongs to port */
 	u16 pf_vf_num;
 	u8 port_state;
@@ -145,6 +207,9 @@ struct ice_port_info {
 #define ICE_SCHED_PORT_STATE_READY	0x1
 	u16 dflt_tx_vsi_num;
 	u16 dflt_rx_vsi_num;
+	struct ice_fc_info fc;
+	struct ice_mac_info mac;
+	struct ice_phy_info phy;
 	struct mutex sched_lock;	/* protect access to TXSched tree */
 	struct ice_sched_tx_policy sched_policy;
 	struct list_head vsi_info_list;

From 940b61af02f497fcd911b9e2d75c6b8cf76b92fd Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:10 -0700
Subject: [PATCH 1256/1678] ice: Initialize PF and setup miscellaneous
 interrupt

This patch continues the initialization flow as follows:

1) Allocate and initialize necessary fields (like vsi, num_alloc_vsi,
   irq_tracker, etc) in the ice_pf instance.

2) Setup the miscellaneous interrupt handler. This also known as the
   "other interrupt causes" (OIC) handler and is used to handle non
   hotpath interrupts (like control queue events, link events,
   exceptions, etc.

3) Implement a background task to process admin queue receive (ARQ)
   events received by the driver.

CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  84 ++
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |   2 +
 drivers/net/ethernet/intel/ice/ice_common.c   |   6 +
 drivers/net/ethernet/intel/ice/ice_common.h   |   3 +
 drivers/net/ethernet/intel/ice/ice_controlq.c | 101 +++
 drivers/net/ethernet/intel/ice/ice_controlq.h |   8 +
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  63 ++
 drivers/net/ethernet/intel/ice/ice_main.c     | 719 +++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_txrx.h     |  29 +
 drivers/net/ethernet/intel/ice/ice_type.h     |  11 +
 10 files changed, 1025 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx.h

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 1a1776a2a5a1..8103e61e67f9 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -12,29 +12,113 @@
 #include <linux/compiler.h>
 #include <linux/etherdevice.h>
 #include <linux/pci.h>
+#include <linux/workqueue.h>
 #include <linux/aer.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/bitmap.h>
+#include <linux/if_bridge.h>
 #include "ice_devids.h"
 #include "ice_type.h"
+#include "ice_txrx.h"
 #include "ice_switch.h"
 #include "ice_common.h"
 #include "ice_sched.h"
 
 #define ICE_BAR0		0
+#define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
 #define ICE_AQ_LEN		64
+#define ICE_MIN_MSIX		2
+#define ICE_MAX_VSI_ALLOC	130
+#define ICE_MAX_TXQS		2048
+#define ICE_MAX_RXQS		2048
+#define ICE_RES_VALID_BIT	0x8000
+#define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
+struct ice_res_tracker {
+	u16 num_entries;
+	u16 search_hint;
+	u16 list[1];
+};
+
+struct ice_sw {
+	struct ice_pf *pf;
+	u16 sw_id;		/* switch ID for this switch */
+	u16 bridge_mode;	/* VEB/VEPA/Port Virtualizer */
+};
+
 enum ice_state {
 	__ICE_DOWN,
+	__ICE_PFR_REQ,			/* set by driver and peers */
+	__ICE_ADMINQ_EVENT_PENDING,
+	__ICE_SERVICE_SCHED,
 	__ICE_STATE_NBITS		/* must be last */
 };
 
+/* struct that defines a VSI, associated with a dev */
+struct ice_vsi {
+	struct net_device *netdev;
+	struct ice_port_info *port_info; /* back pointer to port_info */
+	u16 vsi_num;			 /* HW (absolute) index of this VSI */
+} ____cacheline_internodealigned_in_smp;
+
+enum ice_pf_flags {
+	ICE_FLAG_MSIX_ENA,
+	ICE_FLAG_FLTR_SYNC,
+	ICE_FLAG_RSS_ENA,
+	ICE_PF_FLAGS_NBITS		/* must be last */
+};
+
 struct ice_pf {
 	struct pci_dev *pdev;
+	struct msix_entry *msix_entries;
+	struct ice_res_tracker *irq_tracker;
+	struct ice_vsi **vsi;		/* VSIs created by the driver */
+	struct ice_sw *first_sw;	/* first switch created by firmware */
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(avail_txqs, ICE_MAX_TXQS);
+	DECLARE_BITMAP(avail_rxqs, ICE_MAX_RXQS);
+	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+	unsigned long serv_tmr_period;
+	unsigned long serv_tmr_prev;
+	struct timer_list serv_tmr;
+	struct work_struct serv_task;
+	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
+	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	u32 msg_enable;
+	u32 oicr_idx;		/* Other interrupt cause vector index */
+	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
+	u32 num_avail_msix;	/* remaining MSIX vectors left unclaimed */
+	u16 num_lan_tx;		/* num lan tx queues setup */
+	u16 num_lan_rx;		/* num lan rx queues setup */
+	u16 q_left_tx;		/* remaining num tx queues left unclaimed */
+	u16 q_left_rx;		/* remaining num rx queues left unclaimed */
+	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
+	u16 num_alloc_vsi;
+
 	struct ice_hw hw;
+	char int_name[ICE_INT_NAME_STR_LEN];
 };
+
+/**
+ * ice_irq_dynamic_ena - Enable default interrupt generation settings
+ * @hw: pointer to hw struct
+ */
+static inline void ice_irq_dynamic_ena(struct ice_hw *hw)
+{
+	u32 vector = ((struct ice_pf *)hw->back)->oicr_idx;
+	int itr = ICE_ITR_NONE;
+	u32 val;
+
+	/* clear the PBA here, as this function is meant to clean out all
+	 * previous interrupts and enable the interrupt
+	 */
+	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
+
+	wr32(hw, GLINT_DYN_CTL(vector), val);
+}
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 5061240996b1..f0837e277b2f 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -583,11 +583,13 @@ struct ice_aq_desc {
 /* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
 #define ICE_AQ_LG_BUF	512
 
+#define ICE_AQ_FLAG_ERR_S	2
 #define ICE_AQ_FLAG_LB_S	9
 #define ICE_AQ_FLAG_RD_S	10
 #define ICE_AQ_FLAG_BUF_S	12
 #define ICE_AQ_FLAG_SI_S	13
 
+#define ICE_AQ_FLAG_ERR		BIT(ICE_AQ_FLAG_ERR_S) /* 0x4    */
 #define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
 #define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
 #define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index ef5bab25c2f5..a4ce8a87fb0d 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -282,6 +282,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	if (status)
 		return status;
 
+	/* set these values to minimum allowed */
+	hw->itr_gran_200 = ICE_ITR_GRAN_MIN_200;
+	hw->itr_gran_100 = ICE_ITR_GRAN_MIN_100;
+	hw->itr_gran_50 = ICE_ITR_GRAN_MIN_50;
+	hw->itr_gran_25 = ICE_ITR_GRAN_MIN_25;
+
 	status = ice_init_all_ctrlq(hw);
 	if (status)
 		goto err_unroll_cqinit;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index e0a7aa1dd943..dd4473e84ebb 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -17,6 +17,9 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw);
 void ice_shutdown_all_ctrlq(struct ice_hw *hw);
 enum ice_status
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending);
+enum ice_status
 ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
 		enum ice_aq_res_access_type access);
 void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index b32c0fc04bc9..5909a4407e38 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -963,3 +963,104 @@ void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
 	desc->opcode = cpu_to_le16(opcode);
 	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
 }
+
+/**
+ * ice_clean_rq_elem
+ * @hw: pointer to the hw struct
+ * @cq: pointer to the specific Control queue
+ * @e: event info from the receive descriptor, includes any buffers
+ * @pending: number of events that could be left to process
+ *
+ * This function cleans one Admin Receive Queue element and returns
+ * the contents through e.  It can also return how many events are
+ * left to process through 'pending'.
+ */
+enum ice_status
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending)
+{
+	u16 ntc = cq->rq.next_to_clean;
+	enum ice_status ret_code = 0;
+	struct ice_aq_desc *desc;
+	struct ice_dma_mem *bi;
+	u16 desc_idx;
+	u16 datalen;
+	u16 flags;
+	u16 ntu;
+
+	/* pre-clean the event info */
+	memset(&e->desc, 0, sizeof(e->desc));
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Receive queue not initialized.\n");
+		ret_code = ICE_ERR_AQ_EMPTY;
+		goto clean_rq_elem_err;
+	}
+
+	/* set next_to_use to head */
+	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+
+	if (ntu == ntc) {
+		/* nothing to do - shouldn't need to update ring's values */
+		ret_code = ICE_ERR_AQ_NO_WORK;
+		goto clean_rq_elem_out;
+	}
+
+	/* now clean the next descriptor */
+	desc = ICE_CTL_Q_DESC(cq->rq, ntc);
+	desc_idx = ntc;
+
+	flags = le16_to_cpu(desc->flags);
+	if (flags & ICE_AQ_FLAG_ERR) {
+		ret_code = ICE_ERR_AQ_ERROR;
+		cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Control Receive Queue Event received with error 0x%x\n",
+			  cq->rq_last_status);
+	}
+	memcpy(&e->desc, desc, sizeof(e->desc));
+	datalen = le16_to_cpu(desc->datalen);
+	e->msg_len = min(datalen, e->buf_len);
+	if (e->msg_buf && e->msg_len)
+		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
+
+	ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n");
+
+	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf,
+		     cq->rq_buf_size);
+
+	/* Restore the original datalen and buffer address in the desc,
+	 * FW updates datalen to indicate the event message size
+	 */
+	bi = &cq->rq.r.rq_bi[ntc];
+	memset(desc, 0, sizeof(*desc));
+
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+	if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	desc->datalen = cpu_to_le16(bi->size);
+	desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+	desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+	/* set tail = the last cleaned desc index. */
+	wr32(hw, cq->rq.tail, ntc);
+	/* ntc is updated to tail + 1 */
+	ntc++;
+	if (ntc == cq->num_rq_entries)
+		ntc = 0;
+	cq->rq.next_to_clean = ntc;
+	cq->rq.next_to_use = ntu;
+
+clean_rq_elem_out:
+	/* Set pending if needed, unlock and return */
+	if (pending)
+		*pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
+clean_rq_elem_err:
+	mutex_unlock(&cq->rq_lock);
+
+	return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index b5306e8a5a0d..ea02b89243e2 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -67,6 +67,14 @@ struct ice_sq_cd {
 
 #define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
 
+/* rq event information */
+struct ice_rq_event_info {
+	struct ice_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
 /* Control Queue information */
 struct ice_ctl_q_info {
 	enum ice_ctl_q qtype;
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 893d5e967e66..446a8bbef488 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -14,6 +14,12 @@
 #define PF_FW_ARQLEN			0x00080280
 #define PF_FW_ARQLEN_ARQLEN_S		0
 #define PF_FW_ARQLEN_ARQLEN_M		ICE_M(0x3FF, PF_FW_ARQLEN_ARQLEN_S)
+#define PF_FW_ARQLEN_ARQVFE_S		28
+#define PF_FW_ARQLEN_ARQVFE_M		BIT(PF_FW_ARQLEN_ARQVFE_S)
+#define PF_FW_ARQLEN_ARQOVFL_S		29
+#define PF_FW_ARQLEN_ARQOVFL_M		BIT(PF_FW_ARQLEN_ARQOVFL_S)
+#define PF_FW_ARQLEN_ARQCRIT_S		30
+#define PF_FW_ARQLEN_ARQCRIT_M		BIT(PF_FW_ARQLEN_ARQCRIT_S)
 #define PF_FW_ARQLEN_ARQENABLE_S	31
 #define PF_FW_ARQLEN_ARQENABLE_M	BIT(PF_FW_ARQLEN_ARQENABLE_S)
 #define PF_FW_ARQT			0x00080480
@@ -25,6 +31,12 @@
 #define PF_FW_ATQLEN			0x00080200
 #define PF_FW_ATQLEN_ATQLEN_S		0
 #define PF_FW_ATQLEN_ATQLEN_M		ICE_M(0x3FF, PF_FW_ATQLEN_ATQLEN_S)
+#define PF_FW_ATQLEN_ATQVFE_S		28
+#define PF_FW_ATQLEN_ATQVFE_M		BIT(PF_FW_ATQLEN_ATQVFE_S)
+#define PF_FW_ATQLEN_ATQOVFL_S		29
+#define PF_FW_ATQLEN_ATQOVFL_M		BIT(PF_FW_ATQLEN_ATQOVFL_S)
+#define PF_FW_ATQLEN_ATQCRIT_S		30
+#define PF_FW_ATQLEN_ATQCRIT_M		BIT(PF_FW_ATQLEN_ATQCRIT_S)
 #define PF_FW_ATQLEN_ATQENABLE_S	31
 #define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
 #define PF_FW_ATQT			0x00080400
@@ -43,6 +55,57 @@
 #define PFGEN_CTRL			0x00091000
 #define PFGEN_CTRL_PFSWR_S		0
 #define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
+#define PFHMC_ERRORDATA			0x00520500
+#define PFHMC_ERRORINFO			0x00520400
+#define GLINT_DYN_CTL(_INT)		(0x00160000 + ((_INT) * 4))
+#define GLINT_DYN_CTL_INTENA_S		0
+#define GLINT_DYN_CTL_INTENA_M		BIT(GLINT_DYN_CTL_INTENA_S)
+#define GLINT_DYN_CTL_CLEARPBA_S	1
+#define GLINT_DYN_CTL_CLEARPBA_M	BIT(GLINT_DYN_CTL_CLEARPBA_S)
+#define GLINT_DYN_CTL_ITR_INDX_S	3
+#define GLINT_DYN_CTL_SW_ITR_INDX_S	25
+#define GLINT_DYN_CTL_SW_ITR_INDX_M	ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S)
+#define GLINT_DYN_CTL_INTENA_MSK_S	31
+#define GLINT_DYN_CTL_INTENA_MSK_M	BIT(GLINT_DYN_CTL_INTENA_MSK_S)
+#define GLINT_ITR(_i, _INT)		(0x00154000 + ((_i) * 8192 + (_INT) * 4))
+#define PFINT_FW_CTL			0x0016C800
+#define PFINT_FW_CTL_MSIX_INDX_S	0
+#define PFINT_FW_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_FW_CTL_MSIX_INDX_S)
+#define PFINT_FW_CTL_ITR_INDX_S		11
+#define PFINT_FW_CTL_ITR_INDX_M		ICE_M(0x3, PFINT_FW_CTL_ITR_INDX_S)
+#define PFINT_FW_CTL_CAUSE_ENA_S	30
+#define PFINT_FW_CTL_CAUSE_ENA_M	BIT(PFINT_FW_CTL_CAUSE_ENA_S)
+#define PFINT_OICR			0x0016CA00
+#define PFINT_OICR_INTEVENT_S		0
+#define PFINT_OICR_INTEVENT_M		BIT(PFINT_OICR_INTEVENT_S)
+#define PFINT_OICR_HLP_RDY_S		14
+#define PFINT_OICR_HLP_RDY_M		BIT(PFINT_OICR_HLP_RDY_S)
+#define PFINT_OICR_CPM_RDY_S		15
+#define PFINT_OICR_CPM_RDY_M		BIT(PFINT_OICR_CPM_RDY_S)
+#define PFINT_OICR_ECC_ERR_S		16
+#define PFINT_OICR_ECC_ERR_M		BIT(PFINT_OICR_ECC_ERR_S)
+#define PFINT_OICR_MAL_DETECT_S		19
+#define PFINT_OICR_MAL_DETECT_M		BIT(PFINT_OICR_MAL_DETECT_S)
+#define PFINT_OICR_GRST_S		20
+#define PFINT_OICR_GRST_M		BIT(PFINT_OICR_GRST_S)
+#define PFINT_OICR_PCI_EXCEPTION_S	21
+#define PFINT_OICR_PCI_EXCEPTION_M	BIT(PFINT_OICR_PCI_EXCEPTION_S)
+#define PFINT_OICR_GPIO_S		22
+#define PFINT_OICR_GPIO_M		BIT(PFINT_OICR_GPIO_S)
+#define PFINT_OICR_STORM_DETECT_S	24
+#define PFINT_OICR_STORM_DETECT_M	BIT(PFINT_OICR_STORM_DETECT_S)
+#define PFINT_OICR_HMC_ERR_S		26
+#define PFINT_OICR_HMC_ERR_M		BIT(PFINT_OICR_HMC_ERR_S)
+#define PFINT_OICR_PE_CRITERR_S		28
+#define PFINT_OICR_PE_CRITERR_M		BIT(PFINT_OICR_PE_CRITERR_S)
+#define PFINT_OICR_CTL			0x0016CA80
+#define PFINT_OICR_CTL_MSIX_INDX_S	0
+#define PFINT_OICR_CTL_MSIX_INDX_M	ICE_M(0x7FF, PFINT_OICR_CTL_MSIX_INDX_S)
+#define PFINT_OICR_CTL_ITR_INDX_S	11
+#define PFINT_OICR_CTL_ITR_INDX_M	ICE_M(0x3, PFINT_OICR_CTL_ITR_INDX_S)
+#define PFINT_OICR_CTL_CAUSE_ENA_S	30
+#define PFINT_OICR_CTL_CAUSE_ENA_M	BIT(PFINT_OICR_CTL_CAUSE_ENA_S)
+#define PFINT_OICR_ENA			0x0016C900
 #define GLLAN_RCTL_0			0x002941F8
 #define GLNVM_FLA			0x000B6108
 #define GLNVM_FLA_LOCKED_S		6
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 953be26054ca..d93eaae5dc60 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -26,6 +26,294 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXX
 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 #endif /* !CONFIG_DYNAMIC_DEBUG */
 
+static struct workqueue_struct *ice_wq;
+
+/**
+ * ice_search_res - Search the tracker for a block of resources
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ * Returns the base item index of the block, or -ENOMEM for error
+ */
+static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	int start = res->search_hint;
+	int end = start;
+
+	id |= ICE_RES_VALID_BIT;
+
+	do {
+		/* skip already allocated entries */
+		if (res->list[end++] & ICE_RES_VALID_BIT) {
+			start = end;
+			if ((start + needed) > res->num_entries)
+				break;
+		}
+
+		if (end == (start + needed)) {
+			int i = start;
+
+			/* there was enough, so assign it to the requestor */
+			while (i != end)
+				res->list[i++] = id;
+
+			if (end == res->num_entries)
+				end = 0;
+
+			res->search_hint = end;
+			return start;
+		}
+	} while (1);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_get_res - get a block of resources
+ * @pf: board private structure
+ * @res: pointer to the resource
+ * @needed: size of the block needed
+ * @id: identifier to track owner
+ *
+ * Returns the base item index of the block, or -ENOMEM for error
+ * The search_hint trick and lack of advanced fit-finding only works
+ * because we're highly likely to have all the same sized requests.
+ * Linear search time and any fragmentation should be minimal.
+ */
+static int
+ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
+{
+	int ret;
+
+	if (!res || !pf)
+		return -EINVAL;
+
+	if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
+		dev_err(&pf->pdev->dev,
+			"param err: needed=%d, num_entries = %d id=0x%04x\n",
+			needed, res->num_entries, id);
+		return -EINVAL;
+	}
+
+	/* search based on search_hint */
+	ret = ice_search_res(res, needed, id);
+
+	if (ret < 0) {
+		/* previous search failed. Reset search hint and try again */
+		res->search_hint = 0;
+		ret = ice_search_res(res, needed, id);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_free_res - free a block of resources
+ * @res: pointer to the resource
+ * @index: starting index previously returned by ice_get_res
+ * @id: identifier to track owner
+ * Returns number of resources freed
+ */
+static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
+{
+	int count = 0;
+	int i;
+
+	if (!res || index >= res->num_entries)
+		return -EINVAL;
+
+	id |= ICE_RES_VALID_BIT;
+	for (i = index; i < res->num_entries && res->list[i] == id; i++) {
+		res->list[i] = 0;
+		count++;
+	}
+
+	return count;
+}
+
+/**
+ * __ice_clean_ctrlq - helper function to clean controlq rings
+ * @pf: ptr to struct ice_pf
+ * @q_type: specific Control queue type
+ */
+static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
+{
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ctl_q_info *cq;
+	u16 pending, i = 0;
+	const char *qtype;
+	u32 oldval, val;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		qtype = "Admin";
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n",
+			 q_type);
+		return 0;
+	}
+
+	/* check for error indications - PF_xx_AxQLEN register layout for
+	 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
+	 */
+	val = rd32(hw, cq->rq.len);
+	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+		   PF_FW_ARQLEN_ARQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ARQLEN_ARQVFE_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue VF Error detected\n", qtype);
+		if (val & PF_FW_ARQLEN_ARQOVFL_M) {
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ARQLEN_ARQCRIT_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+			 PF_FW_ARQLEN_ARQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->rq.len, val);
+	}
+
+	val = rd32(hw, cq->sq.len);
+	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+		   PF_FW_ATQLEN_ATQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ATQLEN_ATQVFE_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Send Queue VF Error detected\n", qtype);
+		if (val & PF_FW_ATQLEN_ATQOVFL_M) {
+			dev_dbg(&pf->pdev->dev,
+				"%s Send Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ATQLEN_ATQCRIT_M)
+			dev_dbg(&pf->pdev->dev,
+				"%s Send Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+			 PF_FW_ATQLEN_ATQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->sq.len, val);
+	}
+
+	event.buf_len = cq->rq_buf_size;
+	event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len,
+				     GFP_KERNEL);
+	if (!event.msg_buf)
+		return 0;
+
+	do {
+		enum ice_status ret;
+
+		ret = ice_clean_rq_elem(hw, cq, &event, &pending);
+		if (ret == ICE_ERR_AQ_NO_WORK)
+			break;
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"%s Receive Queue event error %d\n", qtype,
+				ret);
+			break;
+		}
+	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
+
+	devm_kfree(&pf->pdev->dev, event.msg_buf);
+
+	return pending && (i == ICE_DFLT_IRQ_WORK);
+}
+
+/**
+ * ice_clean_adminq_subtask - clean the AdminQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_adminq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
+		return;
+
+	clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+	/* re-enable Admin queue interrupt causes */
+	val = rd32(hw, PFINT_FW_CTL);
+	wr32(hw, PFINT_FW_CTL, (val | PFINT_FW_CTL_CAUSE_ENA_M));
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_service_task_schedule - schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue.
+ */
+static void ice_service_task_schedule(struct ice_pf *pf)
+{
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state))
+		queue_work(ice_wq, &pf->serv_task);
+}
+
+/**
+ * ice_service_task_complete - finish up the service task
+ * @pf: board private structure
+ */
+static void ice_service_task_complete(struct ice_pf *pf)
+{
+	WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state));
+
+	/* force memory (pf->state) to sync before next service task */
+	smp_mb__before_atomic();
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_service_timer - timer callback to schedule service task
+ * @t: pointer to timer_list
+ */
+static void ice_service_timer(struct timer_list *t)
+{
+	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
+
+	mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_service_task - manage and run subtasks
+ * @work: pointer to work_struct contained by the PF struct
+ */
+static void ice_service_task(struct work_struct *work)
+{
+	struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
+	unsigned long start_time = jiffies;
+
+	/* subtasks */
+	ice_clean_adminq_subtask(pf);
+
+	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
+	ice_service_task_complete(pf);
+
+	/* If the tasks have taken longer than one service timer period
+	 * or there is more work to be done, reset the service timer to
+	 * schedule the service task now.
+	 */
+	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
+	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+		mod_timer(&pf->serv_tmr, jiffies);
+}
+
 /**
  * ice_set_ctrlq_len - helper function to set controlq length
  * @hw: pointer to the hw instance
@@ -38,6 +326,361 @@ static void ice_set_ctrlq_len(struct ice_hw *hw)
 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
 }
 
+/**
+ * ice_ena_misc_vector - enable the non-queue interrupts
+ * @pf: board private structure
+ */
+static void ice_ena_misc_vector(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	/* clear things first */
+	wr32(hw, PFINT_OICR_ENA, 0);	/* disable all */
+	rd32(hw, PFINT_OICR);		/* read to clear */
+
+	val = (PFINT_OICR_HLP_RDY_M |
+	       PFINT_OICR_CPM_RDY_M |
+	       PFINT_OICR_ECC_ERR_M |
+	       PFINT_OICR_MAL_DETECT_M |
+	       PFINT_OICR_GRST_M |
+	       PFINT_OICR_PCI_EXCEPTION_M |
+	       PFINT_OICR_GPIO_M |
+	       PFINT_OICR_STORM_DETECT_M |
+	       PFINT_OICR_HMC_ERR_M);
+
+	wr32(hw, PFINT_OICR_ENA, val);
+
+	/* SW_ITR_IDX = 0, but don't change INTENA */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
+}
+
+/**
+ * ice_misc_intr - misc interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
+{
+	struct ice_pf *pf = (struct ice_pf *)data;
+	struct ice_hw *hw = &pf->hw;
+	irqreturn_t ret = IRQ_NONE;
+	u32 oicr, ena_mask;
+
+	set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+	oicr = rd32(hw, PFINT_OICR);
+	ena_mask = rd32(hw, PFINT_OICR_ENA);
+
+	if (!(oicr & PFINT_OICR_INTEVENT_M))
+		goto ena_intr;
+
+	if (oicr & PFINT_OICR_HMC_ERR_M) {
+		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
+		dev_dbg(&pf->pdev->dev,
+			"HMC Error interrupt - info 0x%x, data 0x%x\n",
+			rd32(hw, PFHMC_ERRORINFO),
+			rd32(hw, PFHMC_ERRORDATA));
+	}
+
+	/* Report and mask off any remaining unexpected interrupts */
+	oicr &= ena_mask;
+	if (oicr) {
+		dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n",
+			oicr);
+		/* If a critical error is pending there is no choice but to
+		 * reset the device.
+		 */
+		if (oicr & (PFINT_OICR_PE_CRITERR_M |
+			    PFINT_OICR_PCI_EXCEPTION_M |
+			    PFINT_OICR_ECC_ERR_M))
+			set_bit(__ICE_PFR_REQ, pf->state);
+
+		ena_mask &= ~oicr;
+	}
+	ret = IRQ_HANDLED;
+
+ena_intr:
+	/* re-enable interrupt causes that are not handled during this pass */
+	wr32(hw, PFINT_OICR_ENA, ena_mask);
+	if (!test_bit(__ICE_DOWN, pf->state)) {
+		ice_service_task_schedule(pf);
+		ice_irq_dynamic_ena(hw);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_free_irq_msix_misc - Unroll misc vector setup
+ * @pf: board private structure
+ */
+static void ice_free_irq_msix_misc(struct ice_pf *pf)
+{
+	/* disable OICR interrupt */
+	wr32(&pf->hw, PFINT_OICR_ENA, 0);
+	ice_flush(&pf->hw);
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
+		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
+		devm_free_irq(&pf->pdev->dev,
+			      pf->msix_entries[pf->oicr_idx].vector, pf);
+	}
+
+	ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
+}
+
+/**
+ * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors.  This is not used
+ * when in MSI or Legacy interrupt mode.
+ */
+static int ice_req_irq_msix_misc(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int oicr_idx, err = 0;
+	u8 itr_gran;
+	u32 val;
+
+	if (!pf->int_name[0])
+		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
+			 dev_driver_string(&pf->pdev->dev),
+			 dev_name(&pf->pdev->dev));
+
+	/* reserve one vector in irq_tracker for misc interrupts */
+	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+	if (oicr_idx < 0)
+		return oicr_idx;
+
+	pf->oicr_idx = oicr_idx;
+
+	err = devm_request_irq(&pf->pdev->dev,
+			       pf->msix_entries[pf->oicr_idx].vector,
+			       ice_misc_intr, 0, pf->int_name, pf);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"devm_request_irq for %s failed: %d\n",
+			pf->int_name, err);
+		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+		return err;
+	}
+
+	ice_ena_misc_vector(pf);
+
+	val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+	      (ICE_RX_ITR & PFINT_OICR_CTL_ITR_INDX_M) |
+	      PFINT_OICR_CTL_CAUSE_ENA_M;
+	wr32(hw, PFINT_OICR_CTL, val);
+
+	/* This enables Admin queue Interrupt causes */
+	val = (pf->oicr_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+	      (ICE_RX_ITR & PFINT_FW_CTL_ITR_INDX_M) |
+	      PFINT_FW_CTL_CAUSE_ENA_M;
+	wr32(hw, PFINT_FW_CTL, val);
+
+	itr_gran = hw->itr_gran_200;
+
+	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
+	     ITR_TO_REG(ICE_ITR_8K, itr_gran));
+
+	ice_flush(hw);
+	ice_irq_dynamic_ena(hw);
+
+	return 0;
+}
+
+/**
+ * ice_determine_q_usage - Calculate queue distribution
+ * @pf: board private structure
+ *
+ * Return -ENOMEM if we don't get enough queues for all ports
+ */
+static void ice_determine_q_usage(struct ice_pf *pf)
+{
+	u16 q_left_tx, q_left_rx;
+
+	q_left_tx = pf->hw.func_caps.common_cap.num_txq;
+	q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
+
+	/* initial support for only 1 tx and 1 rx queue */
+	pf->num_lan_tx = 1;
+	pf->num_lan_rx = 1;
+
+	pf->q_left_tx = q_left_tx - pf->num_lan_tx;
+	pf->q_left_rx = q_left_rx - pf->num_lan_rx;
+}
+
+/**
+ * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
+ * @pf: board private structure to initialize
+ */
+static void ice_deinit_pf(struct ice_pf *pf)
+{
+	if (pf->serv_tmr.function)
+		del_timer_sync(&pf->serv_tmr);
+	if (pf->serv_task.func)
+		cancel_work_sync(&pf->serv_task);
+	mutex_destroy(&pf->sw_mutex);
+	mutex_destroy(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_init_pf - Initialize general software structures (struct ice_pf)
+ * @pf: board private structure to initialize
+ */
+static void ice_init_pf(struct ice_pf *pf)
+{
+	bitmap_zero(pf->flags, ICE_PF_FLAGS_NBITS);
+	set_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+
+	mutex_init(&pf->sw_mutex);
+	mutex_init(&pf->avail_q_mutex);
+
+	/* Clear avail_[t|r]x_qs bitmaps (set all to avail) */
+	mutex_lock(&pf->avail_q_mutex);
+	bitmap_zero(pf->avail_txqs, ICE_MAX_TXQS);
+	bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS);
+	mutex_unlock(&pf->avail_q_mutex);
+
+	/* setup service timer and periodic service task */
+	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
+	pf->serv_tmr_period = HZ;
+	INIT_WORK(&pf->serv_task, ice_service_task);
+	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_ena_msix_range - Request a range of MSIX vectors from the OS
+ * @pf: board private structure
+ *
+ * compute the number of MSIX vectors required (v_budget) and request from
+ * the OS. Return the number of vectors reserved or negative on failure
+ */
+static int ice_ena_msix_range(struct ice_pf *pf)
+{
+	int v_left, v_actual, v_budget = 0;
+	int needed, err, i;
+
+	v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
+
+	/* reserve one vector for miscellaneous handler */
+	needed = 1;
+	v_budget += needed;
+	v_left -= needed;
+
+	/* reserve vectors for LAN traffic */
+	pf->num_lan_msix = min_t(int, num_online_cpus(), v_left);
+	v_budget += pf->num_lan_msix;
+
+	pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+
+	if (!pf->msix_entries) {
+		err = -ENOMEM;
+		goto exit_err;
+	}
+
+	for (i = 0; i < v_budget; i++)
+		pf->msix_entries[i].entry = i;
+
+	/* actually reserve the vectors */
+	v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
+					 ICE_MIN_MSIX, v_budget);
+
+	if (v_actual < 0) {
+		dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n");
+		err = v_actual;
+		goto msix_err;
+	}
+
+	if (v_actual < v_budget) {
+		dev_warn(&pf->pdev->dev,
+			 "not enough vectors. requested = %d, obtained = %d\n",
+			 v_budget, v_actual);
+		if (v_actual >= (pf->num_lan_msix + 1)) {
+			pf->num_avail_msix = v_actual - (pf->num_lan_msix + 1);
+		} else if (v_actual >= 2) {
+			pf->num_lan_msix = 1;
+			pf->num_avail_msix = v_actual - 2;
+		} else {
+			pci_disable_msix(pf->pdev);
+			err = -ERANGE;
+			goto msix_err;
+		}
+	}
+
+	return v_actual;
+
+msix_err:
+	devm_kfree(&pf->pdev->dev, pf->msix_entries);
+	goto exit_err;
+
+exit_err:
+	pf->num_lan_msix = 0;
+	clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+	return err;
+}
+
+/**
+ * ice_dis_msix - Disable MSI-X interrupt setup in OS
+ * @pf: board private structure
+ */
+static void ice_dis_msix(struct ice_pf *pf)
+{
+	pci_disable_msix(pf->pdev);
+	devm_kfree(&pf->pdev->dev, pf->msix_entries);
+	pf->msix_entries = NULL;
+	clear_bit(ICE_FLAG_MSIX_ENA, pf->flags);
+}
+
+/**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+static int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+	int vectors = 0;
+	ssize_t size;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		vectors = ice_ena_msix_range(pf);
+	else
+		return -ENODEV;
+
+	if (vectors < 0)
+		return vectors;
+
+	/* set up vector assignment tracking */
+	size = sizeof(struct ice_res_tracker) + (sizeof(u16) * vectors);
+
+	pf->irq_tracker = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+	if (!pf->irq_tracker) {
+		ice_dis_msix(pf);
+		return -ENOMEM;
+	}
+
+	pf->irq_tracker->num_entries = vectors;
+
+	return 0;
+}
+
+/**
+ * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
+ * @pf: board private structure
+ */
+static void ice_clear_interrupt_scheme(struct ice_pf *pf)
+{
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		ice_dis_msix(pf);
+
+	devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+	pf->irq_tracker = NULL;
+}
+
 /**
  * ice_probe - Device initialization routine
  * @pdev: PCI device information struct
@@ -113,8 +756,70 @@ static int ice_probe(struct pci_dev *pdev,
 		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build,
 		 hw->api_maj_ver, hw->api_min_ver);
 
+	ice_init_pf(pf);
+
+	ice_determine_q_usage(pf);
+
+	pf->num_alloc_vsi = min_t(u16, ICE_MAX_VSI_ALLOC,
+				  hw->func_caps.guaranteed_num_vsi);
+	if (!pf->num_alloc_vsi) {
+		err = -EIO;
+		goto err_init_pf_unroll;
+	}
+
+	pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi,
+			       sizeof(struct ice_vsi *), GFP_KERNEL);
+	if (!pf->vsi) {
+		err = -ENOMEM;
+		goto err_init_pf_unroll;
+	}
+
+	err = ice_init_interrupt_scheme(pf);
+	if (err) {
+		dev_err(&pdev->dev,
+			"ice_init_interrupt_scheme failed: %d\n", err);
+		err = -EIO;
+		goto err_init_interrupt_unroll;
+	}
+
+	/* In case of MSIX we are going to setup the misc vector right here
+	 * to handle admin queue events etc. In case of legacy and MSI
+	 * the misc functionality and queue processing is combined in
+	 * the same vector and that gets setup at open.
+	 */
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		err = ice_req_irq_msix_misc(pf);
+		if (err) {
+			dev_err(&pdev->dev,
+				"setup of misc vector failed: %d\n", err);
+			goto err_init_interrupt_unroll;
+		}
+	}
+
+	/* create switch struct for the switch element created by FW on boot */
+	pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(struct ice_sw),
+				    GFP_KERNEL);
+	if (!pf->first_sw) {
+		err = -ENOMEM;
+		goto err_msix_misc_unroll;
+	}
+
+	pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+	pf->first_sw->pf = pf;
+
+	/* record the sw_id available for later use */
+	pf->first_sw->sw_id = hw->port_info->sw_id;
+
 	return 0;
 
+err_msix_misc_unroll:
+	ice_free_irq_msix_misc(pf);
+err_init_interrupt_unroll:
+	ice_clear_interrupt_scheme(pf);
+	devm_kfree(&pdev->dev, pf->vsi);
+err_init_pf_unroll:
+	ice_deinit_pf(pf);
+	ice_deinit_hw(hw);
 err_exit_unroll:
 	pci_disable_pcie_error_reporting(pdev);
 	return err;
@@ -133,6 +838,9 @@ static void ice_remove(struct pci_dev *pdev)
 
 	set_bit(__ICE_DOWN, pf->state);
 
+	ice_free_irq_msix_misc(pf);
+	ice_clear_interrupt_scheme(pf);
+	ice_deinit_pf(pf);
 	ice_deinit_hw(&pf->hw);
 	pci_disable_pcie_error_reporting(pdev);
 }
@@ -176,9 +884,17 @@ static int __init ice_module_init(void)
 	pr_info("%s - version %s\n", ice_driver_string, ice_drv_ver);
 	pr_info("%s\n", ice_copyright);
 
+	ice_wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, KBUILD_MODNAME);
+	if (!ice_wq) {
+		pr_err("Failed to create workqueue\n");
+		return -ENOMEM;
+	}
+
 	status = pci_register_driver(&ice_driver);
-	if (status)
+	if (status) {
 		pr_err("failed to register pci driver, err %d\n", status);
+		destroy_workqueue(ice_wq);
+	}
 
 	return status;
 }
@@ -193,6 +909,7 @@ module_init(ice_module_init);
 static void __exit ice_module_exit(void)
 {
 	pci_unregister_driver(&ice_driver);
+	destroy_workqueue(ice_wq);
 	pr_info("module unloaded\n");
 }
 module_exit(ice_module_exit);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
new file mode 100644
index 000000000000..7ceb69ea745e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TXRX_H_
+#define _ICE_TXRX_H_
+
+#define ICE_DFLT_IRQ_WORK	256
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum ice_dyn_idx_t {
+	ICE_IDX_ITR0 = 0,
+	ICE_IDX_ITR1 = 1,
+	ICE_IDX_ITR2 = 2,
+	ICE_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* indices into GLINT_ITR registers */
+#define ICE_RX_ITR	ICE_IDX_ITR0
+#define ICE_ITR_DYNAMIC	0x8000  /* use top bit as a flag */
+#define ICE_ITR_8K	0x003E
+
+/* apply ITR HW granularity translation to program the HW registers */
+#define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran))
+
+#endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 6024c80bfa6b..2325be552a84 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -261,6 +261,17 @@ struct ice_hw {
 	u8 fw_min_ver;		/* firmware minor version */
 	u8 fw_patch;		/* firmware patch version */
 	u32 fw_build;		/* firmware build number */
+
+	/* minimum allowed value for different speeds */
+#define ICE_ITR_GRAN_MIN_200	1
+#define ICE_ITR_GRAN_MIN_100	1
+#define ICE_ITR_GRAN_MIN_50	2
+#define ICE_ITR_GRAN_MIN_25	4
+	/* ITR granularity in 1 us */
+	u8 itr_gran_200;
+	u8 itr_gran_100;
+	u8 itr_gran_50;
+	u8 itr_gran_25;
 };
 
 /* Checksum and Shadow RAM pointers */

From 3a858ba392c3b19986c40a4c170ddc37b144115f Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:11 -0700
Subject: [PATCH 1257/1678] ice: Add support for VSI allocation and
 deallocation

This patch introduces data structures and functions to alloc/free
VSIs. The driver represents a VSI using the ice_vsi structure.

Some noteworthy points about VSI allocation:

1) A VSI is allocated in the firmware using the "add VSI" admin queue
   command (implemented as ice_aq_add_vsi). The firmware returns an
   identifier for the allocated VSI. The VSI context is used to program
   certain aspects (loopback, queue map, etc.) of the VSI's configuration.

2) A VSI is deleted using the "free VSI" admin queue command (implemented
   as ice_aq_free_vsi).

3) The driver represents a VSI using struct ice_vsi. This is allocated
   and initialized as part of the ice_vsi_alloc flow, and deallocated
   as part of the ice_vsi_delete flow.

4) Once the VSI is created, a netdev is allocated and associated with it.
   The VSI's ring and vector related data structures are also allocated
   and initialized.

5) A VSI's queues can either be contiguous or scattered. To do this, the
   driver maintains a bitmap (vsi->avail_txqs) which is kept in sync with
   the firmware's VSI queue allocation imap. If the VSI can't get a
   contiguous queue allocation, it will fallback to scatter. This is
   implemented in ice_vsi_get_qs which is called as part of the VSI setup
   flow. In the release flow, the VSI's queues are released and the bitmap
   is updated to reflect this by ice_vsi_put_qs.

CC: Shannon Nelson <shannon.nelson@oracle.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |   72 ++
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  199 +++
 drivers/net/ethernet/intel/ice/ice_main.c     | 1111 +++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_switch.c   |  115 ++
 drivers/net/ethernet/intel/ice/ice_switch.h   |   21 +
 drivers/net/ethernet/intel/ice/ice_txrx.h     |   26 +
 drivers/net/ethernet/intel/ice/ice_type.h     |    4 +
 7 files changed, 1548 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 8103e61e67f9..21d0c237ee3f 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -11,6 +11,8 @@
 #include <linux/netdevice.h>
 #include <linux/compiler.h>
 #include <linux/etherdevice.h>
+#include <linux/cpumask.h>
+#include <linux/if_vlan.h>
 #include <linux/pci.h>
 #include <linux/workqueue.h>
 #include <linux/aer.h>
@@ -18,6 +20,7 @@
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/bitmap.h>
+#include <linux/log2.h>
 #include <linux/if_bridge.h>
 #include "ice_devids.h"
 #include "ice_type.h"
@@ -27,17 +30,43 @@
 #include "ice_sched.h"
 
 #define ICE_BAR0		0
+#define ICE_DFLT_NUM_DESC	128
+#define ICE_REQ_DESC_MULTIPLE	32
 #define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
 #define ICE_AQ_LEN		64
 #define ICE_MIN_MSIX		2
+#define ICE_NO_VSI		0xffff
 #define ICE_MAX_VSI_ALLOC	130
 #define ICE_MAX_TXQS		2048
 #define ICE_MAX_RXQS		2048
+#define ICE_VSI_MAP_CONTIG	0
+#define ICE_VSI_MAP_SCATTER	1
+#define ICE_MAX_SCATTER_TXQS	16
+#define ICE_MAX_SCATTER_RXQS	16
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
+#define ICE_INVAL_Q_INDEX	0xffff
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
+#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
+			 ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN)
+
+#define ICE_UP_TABLE_TRANSLATE(val, i) \
+		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
+		  ICE_AQ_VSI_UP_TABLE_UP##i##_M)
+
+struct ice_tc_info {
+	u16 qoffset;
+	u16 qcount;
+};
+
+struct ice_tc_cfg {
+	u8 numtc; /* Total number of enabled TCs */
+	u8 ena_tc; /* TX map */
+	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
 struct ice_res_tracker {
 	u16 num_entries;
 	u16 search_hint;
@@ -61,8 +90,47 @@ enum ice_state {
 /* struct that defines a VSI, associated with a dev */
 struct ice_vsi {
 	struct net_device *netdev;
+	struct ice_sw *vsw;		 /* switch this VSI is on */
+	struct ice_pf *back;		 /* back pointer to PF */
 	struct ice_port_info *port_info; /* back pointer to port_info */
+	struct ice_ring **rx_rings;	 /* rx ring array */
+	struct ice_ring **tx_rings;	 /* tx ring array */
+	struct ice_q_vector **q_vectors; /* q_vector array */
+	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	int num_q_vectors;
+	int base_vector;
+	enum ice_vsi_type type;
 	u16 vsi_num;			 /* HW (absolute) index of this VSI */
+	u16 idx;			 /* software index in pf->vsi[] */
+
+	/* Interrupt thresholds */
+	u16 work_lmt;
+
+	struct ice_aqc_vsi_props info;	 /* VSI properties */
+
+	/* queue information */
+	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u16 txq_map[ICE_MAX_TXQS];	 /* index in pf->avail_txqs */
+	u16 rxq_map[ICE_MAX_RXQS];	 /* index in pf->avail_rxqs */
+	u16 alloc_txq;			 /* Allocated Tx queues */
+	u16 num_txq;			 /* Used Tx queues */
+	u16 alloc_rxq;			 /* Allocated Rx queues */
+	u16 num_rxq;			 /* Used Rx queues */
+	u16 num_desc;
+	struct ice_tc_cfg tc_cfg;
+} ____cacheline_internodealigned_in_smp;
+
+/* struct that defines an interrupt vector */
+struct ice_q_vector {
+	struct ice_vsi *vsi;
+	cpumask_t affinity_mask;
+	struct napi_struct napi;
+	struct ice_ring_container rx;
+	struct ice_ring_container tx;
+	u16 v_idx;			/* index in the vsi->q_vector array. */
+	u8 num_ring_tx;			/* total number of tx rings in vector */
+	u8 num_ring_rx;			/* total number of rx rings in vector */
 } ____cacheline_internodealigned_in_smp;
 
 enum ice_pf_flags {
@@ -103,6 +171,10 @@ struct ice_pf {
 	char int_name[ICE_INT_NAME_STR_LEN];
 };
 
+struct ice_netdev_priv {
+	struct ice_vsi *vsi;
+};
+
 /**
  * ice_irq_dynamic_ena - Enable default interrupt generation settings
  * @hw: pointer to hw struct
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index f0837e277b2f..682e8dac72cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -9,6 +9,7 @@
  */
 
 #define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
+#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX	9728
 
 struct ice_aqc_generic {
 	__le32 param0;
@@ -190,6 +191,199 @@ struct ice_aqc_get_sw_cfg_resp {
 	struct ice_aqc_get_sw_cfg_resp_elem elements[1];
 };
 
+/* Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Get VSI (indirect 0x0212)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_get_update_free_vsi {
+	__le16 vsi_num;
+#define ICE_AQ_VSI_NUM_S	0
+#define ICE_AQ_VSI_NUM_M	(0x03FF << ICE_AQ_VSI_NUM_S)
+#define ICE_AQ_VSI_IS_VALID	BIT(15)
+	__le16 cmd_flags;
+#define ICE_AQ_VSI_KEEP_ALLOC	0x1
+	u8 vf_id;
+	u8 reserved;
+	__le16 vsi_flags;
+#define ICE_AQ_VSI_TYPE_S	0
+#define ICE_AQ_VSI_TYPE_M	(0x3 << ICE_AQ_VSI_TYPE_S)
+#define ICE_AQ_VSI_TYPE_VF	0x0
+#define ICE_AQ_VSI_TYPE_VMDQ2	0x1
+#define ICE_AQ_VSI_TYPE_PF	0x2
+#define ICE_AQ_VSI_TYPE_EMP_MNG	0x3
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response descriptor for:
+ * Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_update_free_vsi_resp {
+	__le16 vsi_num;
+	__le16 ext_status;
+	__le16 vsi_used;
+	__le16 vsi_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_vsi_props {
+	__le16 valid_sections;
+#define ICE_AQ_VSI_PROP_SW_VALID		BIT(0)
+#define ICE_AQ_VSI_PROP_SECURITY_VALID		BIT(1)
+#define ICE_AQ_VSI_PROP_VLAN_VALID		BIT(2)
+#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID		BIT(3)
+#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID	BIT(4)
+#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID		BIT(5)
+#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID		BIT(6)
+#define ICE_AQ_VSI_PROP_Q_OPT_VALID		BIT(7)
+#define ICE_AQ_VSI_PROP_OUTER_UP_VALID		BIT(8)
+#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID		BIT(11)
+#define ICE_AQ_VSI_PROP_PASID_VALID		BIT(12)
+	/* switch section */
+	u8 sw_id;
+	u8 sw_flags;
+#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB		BIT(5)
+#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB		BIT(6)
+#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE		BIT(7)
+	u8 sw_flags2;
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S	0
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M	\
+				(0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
+#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA	BIT(0)
+#define ICE_AQ_VSI_SW_FLAG_LAN_ENA		BIT(4)
+	u8 veb_stat_id;
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_S		0
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_M	(0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID		BIT(5)
+	/* security section */
+	u8 sec_flags;
+#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	BIT(0)
+#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF	BIT(2)
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S	4
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M	(0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
+#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA	BIT(0)
+	u8 sec_reserved;
+	/* VLAN section */
+	__le16 pvid; /* VLANS include priority bits */
+	u8 pvlan_reserved[2];
+	u8 port_vlan_flags;
+#define ICE_AQ_VSI_PVLAN_MODE_S	0
+#define ICE_AQ_VSI_PVLAN_MODE_M	(0x3 << ICE_AQ_VSI_PVLAN_MODE_S)
+#define ICE_AQ_VSI_PVLAN_MODE_UNTAGGED	0x1
+#define ICE_AQ_VSI_PVLAN_MODE_TAGGED	0x2
+#define ICE_AQ_VSI_PVLAN_MODE_ALL	0x3
+#define ICE_AQ_VSI_PVLAN_INSERT_PVID	BIT(2)
+#define ICE_AQ_VSI_PVLAN_EMOD_S	3
+#define ICE_AQ_VSI_PVLAN_EMOD_M	(0x3 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH	(0x0 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_STR_UP	(0x1 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_STR	(0x2 << ICE_AQ_VSI_PVLAN_EMOD_S)
+#define ICE_AQ_VSI_PVLAN_EMOD_NOTHING	(0x3 << ICE_AQ_VSI_PVLAN_EMOD_S)
+	u8 pvlan_reserved2[3];
+	/* ingress egress up sections */
+	__le32 ingress_table; /* bitmap, 3 bits per up */
+#define ICE_AQ_VSI_UP_TABLE_UP0_S	0
+#define ICE_AQ_VSI_UP_TABLE_UP0_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
+#define ICE_AQ_VSI_UP_TABLE_UP1_S	3
+#define ICE_AQ_VSI_UP_TABLE_UP1_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
+#define ICE_AQ_VSI_UP_TABLE_UP2_S	6
+#define ICE_AQ_VSI_UP_TABLE_UP2_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
+#define ICE_AQ_VSI_UP_TABLE_UP3_S	9
+#define ICE_AQ_VSI_UP_TABLE_UP3_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
+#define ICE_AQ_VSI_UP_TABLE_UP4_S	12
+#define ICE_AQ_VSI_UP_TABLE_UP4_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
+#define ICE_AQ_VSI_UP_TABLE_UP5_S	15
+#define ICE_AQ_VSI_UP_TABLE_UP5_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
+#define ICE_AQ_VSI_UP_TABLE_UP6_S	18
+#define ICE_AQ_VSI_UP_TABLE_UP6_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
+#define ICE_AQ_VSI_UP_TABLE_UP7_S	21
+#define ICE_AQ_VSI_UP_TABLE_UP7_M	(0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
+	__le32 egress_table;   /* same defines as for ingress table */
+	/* outer tags section */
+	__le16 outer_tag;
+	u8 outer_tag_flags;
+#define ICE_AQ_VSI_OUTER_TAG_MODE_S	0
+#define ICE_AQ_VSI_OUTER_TAG_MODE_M	(0x3 << ICE_AQ_VSI_OUTER_TAG_MODE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NOTHING	0x0
+#define ICE_AQ_VSI_OUTER_TAG_REMOVE	0x1
+#define ICE_AQ_VSI_OUTER_TAG_COPY	0x2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_S	2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_M	(0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NONE	0x0
+#define ICE_AQ_VSI_OUTER_TAG_STAG	0x1
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100	0x2
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100	0x3
+#define ICE_AQ_VSI_OUTER_TAG_INSERT	BIT(4)
+#define ICE_AQ_VSI_OUTER_TAG_ACCEPT_HOST BIT(6)
+	u8 outer_tag_reserved;
+	/* queue mapping section */
+	__le16 mapping_flags;
+#define ICE_AQ_VSI_Q_MAP_CONTIG	0x0
+#define ICE_AQ_VSI_Q_MAP_NONCONTIG	BIT(0)
+	__le16 q_mapping[16];
+#define ICE_AQ_VSI_Q_S		0
+#define ICE_AQ_VSI_Q_M		(0x7FF << ICE_AQ_VSI_Q_S)
+	__le16 tc_mapping[8];
+#define ICE_AQ_VSI_TC_Q_OFFSET_S	0
+#define ICE_AQ_VSI_TC_Q_OFFSET_M	(0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
+#define ICE_AQ_VSI_TC_Q_NUM_S		11
+#define ICE_AQ_VSI_TC_Q_NUM_M		(0xF << ICE_AQ_VSI_TC_Q_NUM_S)
+	/* queueing option section */
+	u8 q_opt_rss;
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S	0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI	0x0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF	0x2
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL	0x3
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S	2
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M	(0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S	6
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ	(0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ	(0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_XOR	(0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_JHASH	(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+	u8 q_opt_tc;
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_S	0
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_M	(0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
+#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR	BIT(7)
+	u8 q_opt_flags;
+#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN	BIT(0)
+	u8 q_opt_reserved[3];
+	/* outer up section */
+	__le32 outer_up_table; /* same structure and defines as ingress tbl */
+	/* section 10 */
+	__le16 sect_10_reserved;
+	/* flow director section */
+	__le16 fd_options;
+#define ICE_AQ_VSI_FD_ENABLE		BIT(0)
+#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE	BIT(1)
+#define ICE_AQ_VSI_FD_PROG_ENABLE	BIT(3)
+	__le16 max_fd_fltr_dedicated;
+	__le16 max_fd_fltr_shared;
+	__le16 fd_def_q;
+#define ICE_AQ_VSI_FD_DEF_Q_S		0
+#define ICE_AQ_VSI_FD_DEF_Q_M		(0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
+#define ICE_AQ_VSI_FD_DEF_GRP_S	12
+#define ICE_AQ_VSI_FD_DEF_GRP_M	(0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
+	__le16 fd_report_opt;
+#define ICE_AQ_VSI_FD_REPORT_Q_S	0
+#define ICE_AQ_VSI_FD_REPORT_Q_M	(0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_S	12
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_M	(0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
+#define ICE_AQ_VSI_FD_DEF_DROP		BIT(15)
+	/* PASID section */
+	__le32 pasid_id;
+#define ICE_AQ_VSI_PASID_ID_S		0
+#define ICE_AQ_VSI_PASID_ID_M		(0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
+#define ICE_AQ_VSI_PASID_ID_VALID	BIT(31)
+	u8 reserved[24];
+};
+
 /* Get Default Topology (indirect 0x0400) */
 struct ice_aqc_get_topo {
 	u8 port_num;
@@ -576,6 +770,7 @@ struct ice_aq_desc {
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
 		struct ice_aqc_get_link_status get_link_status;
 	} params;
 };
@@ -626,6 +821,10 @@ enum ice_adminq_opc {
 	/* internal switch commands */
 	ice_aqc_opc_get_sw_cfg				= 0x0200,
 
+	/* VSI commands */
+	ice_aqc_opc_add_vsi				= 0x0210,
+	ice_aqc_opc_update_vsi				= 0x0211,
+	ice_aqc_opc_free_vsi				= 0x0213,
 	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
 
 	/* transmit scheduler commands */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d93eaae5dc60..04e004ba2067 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -28,6 +28,37 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 
 static struct workqueue_struct *ice_wq;
 
+static int ice_vsi_release(struct ice_vsi *vsi);
+
+/**
+ * ice_get_free_slot - get the next non-NULL location index in array
+ * @array: array to search
+ * @size: size of the array
+ * @curr: last known occupied index to be used as a search hint
+ *
+ * void * is being used to keep the functionality generic. This lets us use this
+ * function on any array of pointers.
+ */
+static int ice_get_free_slot(void *array, int size, int curr)
+{
+	int **tmp_array = (int **)array;
+	int next;
+
+	if (curr < (size - 1) && !tmp_array[curr + 1]) {
+		next = curr + 1;
+	} else {
+		int i = 0;
+
+		while ((i < size) && (tmp_array[i]))
+			i++;
+		if (i == size)
+			next = ICE_NO_VSI;
+		else
+			next = i;
+	}
+	return next;
+}
+
 /**
  * ice_search_res - Search the tracker for a block of resources
  * @res: pointer to the resource
@@ -326,6 +357,270 @@ static void ice_set_ctrlq_len(struct ice_hw *hw)
 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
 }
 
+/**
+ * ice_vsi_delete - delete a VSI from the switch
+ * @vsi: pointer to VSI being removed
+ */
+static void ice_vsi_delete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx ctxt;
+	enum ice_status status;
+
+	ctxt.vsi_num = vsi->vsi_num;
+
+	memcpy(&ctxt.info, &vsi->info, sizeof(struct ice_aqc_vsi_props));
+
+	status = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL);
+	if (status)
+		dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n",
+			vsi->vsi_num);
+}
+
+/**
+ * ice_vsi_setup_q_map - Setup a VSI queue map
+ * @vsi: the VSI being configured
+ * @ctxt: VSI context structure
+ */
+static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+	u16 offset = 0, qmap = 0, pow = 0, qcount;
+	u16 qcount_tx = vsi->alloc_txq;
+	u16 qcount_rx = vsi->alloc_rxq;
+	bool ena_tc0 = false;
+	int i;
+
+	/* at least TC0 should be enabled by default */
+	if (vsi->tc_cfg.numtc) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(0)))
+			ena_tc0 =  true;
+	} else {
+		ena_tc0 =  true;
+	}
+
+	if (ena_tc0) {
+		vsi->tc_cfg.numtc++;
+		vsi->tc_cfg.ena_tc |= 1;
+	}
+
+	qcount = qcount_rx / vsi->tc_cfg.numtc;
+
+	/* find higher power-of-2 of qcount */
+	pow = ilog2(qcount);
+
+	if (!is_power_of_2(qcount))
+		pow++;
+
+	/* TC mapping is a function of the number of Rx queues assigned to the
+	 * VSI for each traffic class and the offset of these queues.
+	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
+	 * queues allocated to TC0. No:of queues is a power-of-2.
+	 *
+	 * If TC is not enabled, the queue offset is set to 0, and allocate one
+	 * queue, this way, traffic for the given TC will be sent to the default
+	 * queue.
+	 *
+	 * Setup number and offset of Rx queues for all TCs for the VSI
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+			/* TC is not enabled */
+			vsi->tc_cfg.tc_info[i].qoffset = 0;
+			vsi->tc_cfg.tc_info[i].qcount = 1;
+			ctxt->info.tc_mapping[i] = 0;
+			continue;
+		}
+
+		/* TC is enabled */
+		vsi->tc_cfg.tc_info[i].qoffset = offset;
+		vsi->tc_cfg.tc_info[i].qcount = qcount;
+
+		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+			ICE_AQ_VSI_TC_Q_OFFSET_M) |
+			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+			 ICE_AQ_VSI_TC_Q_NUM_M);
+		offset += qcount;
+		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+	}
+
+	vsi->num_txq = qcount_tx;
+	vsi->num_rxq = offset;
+
+	/* Rx queue mapping */
+	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	/* q_mapping buffer holds the info for the first queue allocated for
+	 * this VSI in the PF space and also the number of queues associated
+	 * with this VSI.
+	 */
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+}
+
+/**
+ * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ *
+ * This initializes a default VSI context for all sections except the Queues.
+ */
+static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
+{
+	u32 table = 0;
+
+	memset(&ctxt->info, 0, sizeof(ctxt->info));
+	/* VSI's should be allocated from shared pool */
+	ctxt->alloc_from_pool = true;
+	/* Src pruning enabled by default */
+	ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
+	/* Traffic from VSI can be sent to LAN */
+	ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+	/* Allow all packets untagged/tagged */
+	ctxt->info.port_vlan_flags = ((ICE_AQ_VSI_PVLAN_MODE_ALL &
+				       ICE_AQ_VSI_PVLAN_MODE_M) >>
+				      ICE_AQ_VSI_PVLAN_MODE_S);
+	/* Show VLAN/UP from packets in Rx descriptors */
+	ctxt->info.port_vlan_flags |= ((ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH &
+					ICE_AQ_VSI_PVLAN_EMOD_M) >>
+				       ICE_AQ_VSI_PVLAN_EMOD_S);
+	/* Have 1:1 UP mapping for both ingress/egress tables */
+	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
+	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
+	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
+	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
+	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
+	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
+	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
+	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
+	ctxt->info.ingress_table = cpu_to_le32(table);
+	ctxt->info.egress_table = cpu_to_le32(table);
+	/* Have 1:1 UP mapping for outer to inner UP table */
+	ctxt->info.outer_up_table = cpu_to_le32(table);
+	/* No Outer tag support outer_tag_flags remains to zero */
+}
+
+/**
+ * ice_vsi_add - Create a new VSI or fetch preallocated VSI
+ * @vsi: the VSI being configured
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command to create a new VSI.
+ */
+static int ice_vsi_add(struct ice_vsi *vsi)
+{
+	struct ice_vsi_ctx ctxt = { 0 };
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int ret = 0;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		ctxt.flags = ICE_AQ_VSI_TYPE_PF;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	ice_set_dflt_vsi_ctx(&ctxt);
+	/* if the switch is in VEB mode, allow VSI loopback */
+	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
+		ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+
+	ctxt.info.sw_id = vsi->port_info->sw_id;
+	ice_vsi_setup_q_map(vsi, &ctxt);
+
+	ret = ice_aq_add_vsi(hw, &ctxt, NULL);
+	if (ret) {
+		dev_err(&vsi->back->pdev->dev,
+			"Add VSI AQ call failed, err %d\n", ret);
+		return -EIO;
+	}
+	vsi->info = ctxt.info;
+	vsi->vsi_num = ctxt.vsi_num;
+
+	return ret;
+}
+
+/**
+ * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
+ * @vsi: the VSI having rings deallocated
+ */
+static void ice_vsi_clear_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (vsi->tx_rings) {
+		for (i = 0; i < vsi->alloc_txq; i++) {
+			if (vsi->tx_rings[i]) {
+				kfree_rcu(vsi->tx_rings[i], rcu);
+				vsi->tx_rings[i] = NULL;
+			}
+		}
+	}
+	if (vsi->rx_rings) {
+		for (i = 0; i < vsi->alloc_rxq; i++) {
+			if (vsi->rx_rings[i]) {
+				kfree_rcu(vsi->rx_rings[i], rcu);
+				vsi->rx_rings[i] = NULL;
+			}
+		}
+	}
+}
+
+/**
+ * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
+ * @vsi: VSI which is having rings allocated
+ */
+static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	/* Allocate tx_rings */
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->txq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_desc;
+
+		vsi->tx_rings[i] = ring;
+	}
+
+	/* Allocate rx_rings */
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		struct ice_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->rxq_map[i];
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_desc;
+		vsi->rx_rings[i] = ring;
+	}
+
+	return 0;
+
+err_out:
+	ice_vsi_clear_rings(vsi);
+	return -ENOMEM;
+}
+
 /**
  * ice_ena_misc_vector - enable the non-queue interrupts
  * @pf: board private structure
@@ -412,6 +707,189 @@ ena_intr:
 	return ret;
 }
 
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+	int q_vectors = vsi->num_q_vectors;
+	int tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.ring = NULL;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.ring = NULL;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.ring;
+			q_vector->rx.ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ */
+static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		vsi->alloc_txq = pf->num_lan_tx;
+		vsi->alloc_rxq = pf->num_lan_rx;
+		vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE);
+		vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx);
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
+ * @vsi: VSI pointer
+ * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ */
+static int ice_vsi_alloc_arrays(struct ice_vsi *vsi, bool alloc_qvectors)
+{
+	struct ice_pf *pf = vsi->back;
+
+	/* allocate memory for both Tx and Rx ring pointers */
+	vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				     sizeof(struct ice_ring *), GFP_KERNEL);
+	if (!vsi->tx_rings)
+		goto err_txrings;
+
+	vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				     sizeof(struct ice_ring *), GFP_KERNEL);
+	if (!vsi->rx_rings)
+		goto err_rxrings;
+
+	if (alloc_qvectors) {
+		/* allocate memory for q_vector pointers */
+		vsi->q_vectors = devm_kcalloc(&pf->pdev->dev,
+					      vsi->num_q_vectors,
+					      sizeof(struct ice_q_vector *),
+					      GFP_KERNEL);
+		if (!vsi->q_vectors)
+			goto err_vectors;
+	}
+
+	return 0;
+
+err_vectors:
+	devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+err_rxrings:
+	devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+err_txrings:
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_alloc - Allocates the next available struct vsi in the PF
+ * @pf: board private structure
+ * @type: type of VSI
+ *
+ * returns a pointer to a VSI on success, NULL on failure.
+ */
+static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type)
+{
+	struct ice_vsi *vsi = NULL;
+
+	/* Need to protect the allocation of the VSIs at the PF level */
+	mutex_lock(&pf->sw_mutex);
+
+	/* If we have already allocated our maximum number of VSIs,
+	 * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
+	 * is available to be populated
+	 */
+	if (pf->next_vsi == ICE_NO_VSI) {
+		dev_dbg(&pf->pdev->dev, "out of VSI slots!\n");
+		goto unlock_pf;
+	}
+
+	vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL);
+	if (!vsi)
+		goto unlock_pf;
+
+	vsi->type = type;
+	vsi->back = pf;
+	set_bit(__ICE_DOWN, vsi->state);
+	vsi->idx = pf->next_vsi;
+	vsi->work_lmt = ICE_DFLT_IRQ_WORK;
+
+	ice_vsi_set_num_qs(vsi);
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		if (ice_vsi_alloc_arrays(vsi, true))
+			goto err_rings;
+
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		goto unlock_pf;
+	}
+
+	/* fill VSI slot in the PF struct */
+	pf->vsi[pf->next_vsi] = vsi;
+
+	/* prepare pf->next_vsi for next use */
+	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+					 pf->next_vsi);
+	goto unlock_pf;
+
+err_rings:
+	devm_kfree(&pf->pdev->dev, vsi);
+	vsi = NULL;
+unlock_pf:
+	mutex_unlock(&pf->sw_mutex);
+	return vsi;
+}
+
 /**
  * ice_free_irq_msix_misc - Unroll misc vector setup
  * @pf: board private structure
@@ -493,6 +971,581 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 	return 0;
 }
 
+/**
+ * ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @vsi: the VSI getting queues
+ *
+ * Return 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs_contig(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int offset, ret = 0;
+
+	mutex_lock(&pf->avail_q_mutex);
+	/* look for contiguous block of queues for tx */
+	offset = bitmap_find_next_zero_area(pf->avail_txqs, ICE_MAX_TXQS,
+					    0, vsi->alloc_txq, 0);
+	if (offset < ICE_MAX_TXQS) {
+		int i;
+
+		bitmap_set(pf->avail_txqs, offset, vsi->alloc_txq);
+		for (i = 0; i < vsi->alloc_txq; i++)
+			vsi->txq_map[i] = i + offset;
+	} else {
+		ret = -ENOMEM;
+		vsi->tx_mapping_mode = ICE_VSI_MAP_SCATTER;
+	}
+
+	/* look for contiguous block of queues for rx */
+	offset = bitmap_find_next_zero_area(pf->avail_rxqs, ICE_MAX_RXQS,
+					    0, vsi->alloc_rxq, 0);
+	if (offset < ICE_MAX_RXQS) {
+		int i;
+
+		bitmap_set(pf->avail_rxqs, offset, vsi->alloc_rxq);
+		for (i = 0; i < vsi->alloc_rxq; i++)
+			vsi->rxq_map[i] = i + offset;
+	} else {
+		ret = -ENOMEM;
+		vsi->rx_mapping_mode = ICE_VSI_MAP_SCATTER;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_get_qs_scatter - Assign a scattered queues to VSI
+ * @vsi: the VSI getting queues
+ *
+ * Return 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs_scatter(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i, index = 0;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER) {
+		for (i = 0; i < vsi->alloc_txq; i++) {
+			index = find_next_zero_bit(pf->avail_txqs,
+						   ICE_MAX_TXQS, index);
+			if (index < ICE_MAX_TXQS) {
+				set_bit(index, pf->avail_txqs);
+				vsi->txq_map[i] = index;
+			} else {
+				goto err_scatter_tx;
+			}
+		}
+	}
+
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER) {
+		for (i = 0; i < vsi->alloc_rxq; i++) {
+			index = find_next_zero_bit(pf->avail_rxqs,
+						   ICE_MAX_RXQS, index);
+			if (index < ICE_MAX_RXQS) {
+				set_bit(index, pf->avail_rxqs);
+				vsi->rxq_map[i] = index;
+			} else {
+				goto err_scatter_rx;
+			}
+		}
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+	return 0;
+
+err_scatter_rx:
+	/* unflag any queues we have grabbed (i is failed position) */
+	for (index = 0; index < i; index++) {
+		clear_bit(vsi->rxq_map[index], pf->avail_rxqs);
+		vsi->rxq_map[index] = 0;
+	}
+	i = vsi->alloc_txq;
+err_scatter_tx:
+	/* i is either position of failed attempt or vsi->alloc_txq */
+	for (index = 0; index < i; index++) {
+		clear_bit(vsi->txq_map[index], pf->avail_txqs);
+		vsi->txq_map[index] = 0;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_get_qs - Assign queues from PF to VSI
+ * @vsi: the VSI to assign queues to
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs(struct ice_vsi *vsi)
+{
+	int ret = 0;
+
+	vsi->tx_mapping_mode = ICE_VSI_MAP_CONTIG;
+	vsi->rx_mapping_mode = ICE_VSI_MAP_CONTIG;
+
+	/* NOTE: ice_vsi_get_qs_contig() will set the rx/tx mapping
+	 * modes individually to scatter if assigning contiguous queues
+	 * to rx or tx fails
+	 */
+	ret = ice_vsi_get_qs_contig(vsi);
+	if (ret < 0) {
+		if (vsi->tx_mapping_mode == ICE_VSI_MAP_SCATTER)
+			vsi->alloc_txq = max_t(u16, vsi->alloc_txq,
+					       ICE_MAX_SCATTER_TXQS);
+		if (vsi->rx_mapping_mode == ICE_VSI_MAP_SCATTER)
+			vsi->alloc_rxq = max_t(u16, vsi->alloc_rxq,
+					       ICE_MAX_SCATTER_RXQS);
+		ret = ice_vsi_get_qs_scatter(vsi);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_put_qs - Release queues from VSI to PF
+ * @vsi: the VSI thats going to release queues
+ */
+static void ice_vsi_put_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->alloc_txq; i++) {
+		clear_bit(vsi->txq_map[i], pf->avail_txqs);
+		vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
+		vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_ring *ring;
+
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(&vsi->back->pdev->dev, "Queue vector at index %d not found\n",
+			v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+	ice_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	devm_kfree(&vsi->back->pdev->dev, q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+static void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
+		ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_cfg_netdev - Setup the netdev flags
+ * @vsi: the VSI being configured
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_cfg_netdev(struct ice_vsi *vsi)
+{
+	struct ice_netdev_priv *np;
+	struct net_device *netdev;
+	u8 mac_addr[ETH_ALEN];
+
+	netdev = alloc_etherdev_mqs(sizeof(struct ice_netdev_priv),
+				    vsi->alloc_txq, vsi->alloc_rxq);
+	if (!netdev)
+		return -ENOMEM;
+
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	/* set features that user can change */
+	netdev->hw_features = NETIF_F_SG	|
+			      NETIF_F_HIGHDMA	|
+			      NETIF_F_RXHASH;
+
+	/* enable features */
+	netdev->features |= netdev->hw_features;
+
+	if (vsi->type == ICE_VSI_PF) {
+		SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev);
+		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+
+		ether_addr_copy(netdev->dev_addr, mac_addr);
+		ether_addr_copy(netdev->perm_addr, mac_addr);
+	}
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* setup watchdog timeout value to be 5 second */
+	netdev->watchdog_timeo = 5 * HZ;
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = ICE_MAX_MTU;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_free_arrays - clean up vsi resources
+ * @vsi: pointer to VSI being cleared
+ * @free_qvectors: bool to specify if q_vectors should be deallocated
+ */
+static void ice_vsi_free_arrays(struct ice_vsi *vsi, bool free_qvectors)
+{
+	struct ice_pf *pf = vsi->back;
+
+	/* free the ring and vector containers */
+	if (free_qvectors && vsi->q_vectors) {
+		devm_kfree(&pf->pdev->dev, vsi->q_vectors);
+		vsi->q_vectors = NULL;
+	}
+	if (vsi->tx_rings) {
+		devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+		vsi->tx_rings = NULL;
+	}
+	if (vsi->rx_rings) {
+		devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+		vsi->rx_rings = NULL;
+	}
+}
+
+/**
+ * ice_vsi_clear - clean up and deallocate the provided vsi
+ * @vsi: pointer to VSI being cleared
+ *
+ * This deallocates the vsi's queue resources, removes it from the PF's
+ * VSI array if necessary, and deallocates the VSI
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_clear(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = NULL;
+
+	if (!vsi)
+		return 0;
+
+	if (!vsi->back)
+		return -EINVAL;
+
+	pf = vsi->back;
+
+	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
+		dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n",
+			vsi->idx);
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->sw_mutex);
+	/* updates the PF for this cleared vsi */
+
+	pf->vsi[vsi->idx] = NULL;
+	if (vsi->idx < pf->next_vsi)
+		pf->next_vsi = vsi->idx;
+
+	ice_vsi_free_arrays(vsi, true);
+	mutex_unlock(&pf->sw_mutex);
+	devm_kfree(&pf->pdev->dev, vsi);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the vsi struct
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	/* tie q_vector and vsi together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int v_idx = 0, num_q_vectors;
+	int err;
+
+	if (vsi->q_vectors[0]) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+			vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		num_q_vectors = vsi->num_q_vectors;
+	} else {
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(&pf->pdev->dev,
+		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_setup_vector_base - Set up the base vector for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after ice_vsi_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ */
+static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int num_q_vectors = 0;
+
+	if (vsi->base_vector) {
+		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+			vsi->vsi_num, vsi->base_vector);
+		return -EEXIST;
+	}
+
+	if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		return -ENOENT;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		num_q_vectors = vsi->num_q_vectors;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		break;
+	}
+
+	if (num_q_vectors)
+		vsi->base_vector = ice_get_res(pf, pf->irq_tracker,
+					       num_q_vectors, vsi->idx);
+
+	if (vsi->base_vector < 0) {
+		dev_err(&pf->pdev->dev,
+			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+			num_q_vectors, vsi->vsi_num, vsi->base_vector);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @type: VSI type
+ * @pi: pointer to the port_info instance
+ *
+ * This allocates the sw VSI structure and its queue resources.
+ *
+ * Returns pointer to the successfully allocated and configure VSI sw struct on
+ * success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
+	      struct ice_port_info *pi)
+{
+	struct device *dev = &pf->pdev->dev;
+	struct ice_vsi_ctx ctxt = { 0 };
+	struct ice_vsi *vsi;
+	int ret;
+
+	vsi = ice_vsi_alloc(pf, type);
+	if (!vsi) {
+		dev_err(dev, "could not allocate VSI\n");
+		return NULL;
+	}
+
+	vsi->port_info = pi;
+	vsi->vsw = pf->first_sw;
+
+	if (ice_vsi_get_qs(vsi)) {
+		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+			vsi->idx);
+		goto err_get_qs;
+	}
+
+	/* create the VSI */
+	ret = ice_vsi_add(vsi);
+	if (ret)
+		goto err_vsi;
+
+	ctxt.vsi_num = vsi->vsi_num;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		ret = ice_cfg_netdev(vsi);
+		if (ret)
+			goto err_cfg_netdev;
+
+		ret = register_netdev(vsi->netdev);
+		if (ret)
+			goto err_register_netdev;
+
+		netif_carrier_off(vsi->netdev);
+
+		/* make sure transmit queues start off as stopped */
+		netif_tx_stop_all_queues(vsi->netdev);
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_msix;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_rings;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+
+		break;
+	default:
+		/* if vsi type is not recognized, clean up the resources and
+		 * exit
+		 */
+		goto err_rings;
+	}
+	return vsi;
+
+err_rings:
+	ice_vsi_free_q_vectors(vsi);
+err_msix:
+	if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(vsi->netdev);
+err_register_netdev:
+	if (vsi->netdev) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_cfg_netdev:
+	ret = ice_aq_free_vsi(&pf->hw, &ctxt, false, NULL);
+	if (ret)
+		dev_err(&vsi->back->pdev->dev,
+			"Free VSI AQ call failed, err %d\n", ret);
+err_vsi:
+	ice_vsi_put_qs(vsi);
+err_get_qs:
+	pf->q_left_tx += vsi->alloc_txq;
+	pf->q_left_rx += vsi->alloc_rxq;
+	ice_vsi_clear(vsi);
+
+	return NULL;
+}
+
+/**
+ * ice_setup_pf_sw - Setup the HW switch on startup or after reset
+ * @pf: board private structure
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_setup_pf_sw(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi;
+	int status = 0;
+
+	vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info);
+	if (!vsi) {
+		status = -ENOMEM;
+		goto error_exit;
+	}
+
+error_exit:
+	if (vsi) {
+		ice_vsi_free_q_vectors(vsi);
+		if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
+			unregister_netdev(vsi->netdev);
+		if (vsi->netdev) {
+			free_netdev(vsi->netdev);
+			vsi->netdev = NULL;
+		}
+		ice_vsi_delete(vsi);
+		ice_vsi_put_qs(vsi);
+		pf->q_left_tx += vsi->alloc_txq;
+		pf->q_left_rx += vsi->alloc_rxq;
+		ice_vsi_clear(vsi);
+	}
+	return status;
+}
+
 /**
  * ice_determine_q_usage - Calculate queue distribution
  * @pf: board private structure
@@ -810,8 +1863,17 @@ static int ice_probe(struct pci_dev *pdev,
 	/* record the sw_id available for later use */
 	pf->first_sw->sw_id = hw->port_info->sw_id;
 
+	err = ice_setup_pf_sw(pf);
+	if (err) {
+		dev_err(&pdev->dev,
+			"probe failed due to setup pf switch:%d\n", err);
+		goto err_alloc_sw_unroll;
+	}
 	return 0;
 
+err_alloc_sw_unroll:
+	set_bit(__ICE_DOWN, pf->state);
+	devm_kfree(&pf->pdev->dev, pf->first_sw);
 err_msix_misc_unroll:
 	ice_free_irq_msix_misc(pf);
 err_init_interrupt_unroll:
@@ -832,12 +1894,24 @@ err_exit_unroll:
 static void ice_remove(struct pci_dev *pdev)
 {
 	struct ice_pf *pf = pci_get_drvdata(pdev);
+	int i = 0;
+	int err;
 
 	if (!pf)
 		return;
 
 	set_bit(__ICE_DOWN, pf->state);
 
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (!pf->vsi[i])
+			continue;
+
+		err = ice_vsi_release(pf->vsi[i]);
+		if (err)
+			dev_dbg(&pf->pdev->dev, "Failed to release VSI index %d (err %d)\n",
+				i, err);
+	}
+
 	ice_free_irq_msix_misc(pf);
 	ice_clear_interrupt_scheme(pf);
 	ice_deinit_pf(pf);
@@ -913,3 +1987,40 @@ static void __exit ice_module_exit(void)
 	pr_info("module unloaded\n");
 }
 module_exit(ice_module_exit);
+
+/**
+ * ice_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ */
+static int ice_vsi_release(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	if (!vsi->back)
+		return -ENODEV;
+	pf = vsi->back;
+
+	if (vsi->netdev) {
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+
+	/* reclaim interrupt vectors back to PF */
+	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
+	pf->num_avail_msix += vsi->num_q_vectors;
+
+	ice_vsi_delete(vsi);
+	ice_vsi_free_q_vectors(vsi);
+	ice_vsi_clear_rings(vsi);
+
+	ice_vsi_put_qs(vsi);
+	pf->q_left_tx += vsi->alloc_txq;
+	pf->q_left_rx += vsi->alloc_rxq;
+
+	ice_vsi_clear(vsi);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 8fc0579b0bbb..b438cee9521e 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -50,6 +50,121 @@ ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp *buf,
 	return status;
 }
 
+/**
+ * ice_aq_add_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware (0x0210)
+ */
+enum ice_status
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *res;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	enum ice_status status;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.vsi_cmd;
+	res = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi);
+
+	if (!vsi_ctx->alloc_from_pool)
+		cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num |
+					   ICE_AQ_VSI_IS_VALID);
+
+	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M;
+		vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_update_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware (0x0211)
+ */
+enum ice_status
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_free_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get VSI context info from hardware (0x0213)
+ */
+enum ice_status
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = (struct ice_aqc_add_update_free_vsi_resp *)&desc.params.raw;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+	if (keep_vsi_alloc)
+		cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
 /* ice_init_port_info - Initialize port_info with switch configuration data
  * @pi: pointer to port_info
  * @vsi_port_num: VSI number or port number
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index b98cb978a129..bb8612d8e07b 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -9,6 +9,27 @@
 #define ICE_SW_CFG_MAX_BUF_LEN 2048
 #define ICE_DFLT_VSI_INVAL 0xff
 
+/* VSI context structure for add/get/update/free operations */
+struct ice_vsi_ctx {
+	u16 vsi_num;
+	u16 vsis_allocd;
+	u16 vsis_unallocated;
+	u16 flags;
+	struct ice_aqc_vsi_props info;
+	bool alloc_from_pool;
+};
+
+/* VSI related commands */
+enum ice_status
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		  struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd);
+
 enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
 
 #endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 7ceb69ea745e..2dd2232127a7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -26,4 +26,30 @@ enum ice_dyn_idx_t {
 /* apply ITR HW granularity translation to program the HW registers */
 #define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran))
 
+/* descriptor ring, associated with a VSI */
+struct ice_ring {
+	struct ice_ring *next;		/* pointer to next ring in q_vector */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	struct ice_vsi *vsi;		/* Backreference to associated VSI */
+	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
+	u16 q_index;			/* Queue number of ring */
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+	bool ring_active;		/* is ring online or not */
+	struct rcu_head rcu;		/* to avoid race on free */
+} ____cacheline_internodealigned_in_smp;
+
+struct ice_ring_container {
+	/* array of pointers to rings */
+	struct ice_ring *ring;
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_pkts;	/* total packets processed this int */
+	u16 itr;
+};
+
+/* iterator for handling rings in ring container */
+#define ice_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos; pos = pos->next)
+
 #endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 2325be552a84..5f58fd84d558 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -55,6 +55,10 @@ enum ice_media_type {
 	ICE_MEDIA_DA,
 };
 
+enum ice_vsi_type {
+	ICE_VSI_PF = 0,
+};
+
 struct ice_link_status {
 	/* Refer to ice_aq_phy_type for bits definition */
 	u64 phy_type_low;

From 9daf8208dd4dee4e13079bd0520a5fb8d20e8b06 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:12 -0700
Subject: [PATCH 1258/1678] ice: Add support for switch filter programming

A VSI needs traffic directed towards it. This is done by programming
filter rules on the switch (embedded vSwitch) element in the hardware,
which connects the VSI to the ingress/egress port.

This patch introduces data structures and functions necessary to add
remove or update switch rules on the switch element. This is a pretty low
level function that is generic enough to add a whole range of filters.

This patch also introduces two top level functions ice_add_mac and
ice_remove mac which through a series of intermediate helper functions
eventually call ice_aq_sw_rules to add/delete simple MAC based filters.
It's worth noting that one invocation of ice_add_mac/ice_remove_mac
is capable of adding/deleting multiple MAC filters.

Also worth noting is the fact that the driver maintains a list of currently
active filters, so every filter addition/removal causes an update to this
list. This is done for a couple of reasons:

1) If two VSIs try to add the same filters, we need to detect it and do
   things a little differently (i.e. use VSI lists, described below) as
   the same filter can't be added more than once.

2) In the event of a hardware reset we can simply walk through this list
   and restore the filters.

VSI Lists:
In a multi-VSI situation, it's possible that multiple VSIs want to add the
same filter rule. For example, two VSIs that want to receive broadcast
traffic would both add a filter for destination MAC ff:ff:ff:ff:ff:ff.
This can become cumbersome to maintain and so this is handled using a
VSI list.

A VSI list is resource that can be allocated in the hardware using the
ice_aq_alloc_free_res admin queue command. Simply put, a VSI list can
be thought of as a subscription list containing a set of VSIs to which
the packet should be forwarded, should the filter match.

For example, if VSI-0 has already added a broadcast filter, and VSI-1
wants to do the same thing, the filter creation flow will detect this,
allocate a VSI list and update the switch rule so that broadcast traffic
will now be forwarded to the VSI list which contains VSI-0 and VSI-1.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  249 +++
 drivers/net/ethernet/intel/ice/ice_common.c   |   74 +-
 drivers/net/ethernet/intel/ice/ice_main.c     |   92 ++
 drivers/net/ethernet/intel/ice/ice_status.h   |    3 +
 drivers/net/ethernet/intel/ice/ice_switch.c   | 1378 +++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_switch.h   |  120 ++
 drivers/net/ethernet/intel/ice/ice_type.h     |   21 +
 7 files changed, 1935 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 682e8dac72cc..9237841439da 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -8,6 +8,7 @@
  * descriptor format.  It is shared between Firmware and Software.
  */
 
+#define ICE_MAX_VSI			768
 #define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
 #define ICE_AQ_SET_MAC_FRAME_SIZE_MAX	9728
 
@@ -191,6 +192,46 @@ struct ice_aqc_get_sw_cfg_resp {
 	struct ice_aqc_get_sw_cfg_resp_elem elements[1];
 };
 
+/* These resource type defines are used for all switch resource
+ * commands where a resource type is required, such as:
+ * Get Resource Allocation command (indirect 0x0204)
+ * Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ * Get Allocated Resource Descriptors Command (indirect 0x020A)
+ */
+#define ICE_AQC_RES_TYPE_VSI_LIST_REP			0x03
+#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE			0x04
+
+/* Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ */
+struct ice_aqc_alloc_free_res_cmd {
+	__le16 num_entries; /* Number of Resource entries */
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Resource descriptor */
+struct ice_aqc_res_elem {
+	union {
+		__le16 sw_resp;
+		__le16 flu_resp;
+	} e;
+};
+
+/* Buffer for Allocate/Free Resources commands */
+struct ice_aqc_alloc_free_res_elem {
+	__le16 res_type; /* Types defined above cmd 0x0204 */
+#define ICE_AQC_RES_TYPE_SHARED_S	7
+#define ICE_AQC_RES_TYPE_SHARED_M	(0x1 << ICE_AQC_RES_TYPE_SHARED_S)
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S	8
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M	\
+				(0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S)
+	__le16 num_elems;
+	struct ice_aqc_res_elem elem[1];
+};
+
 /* Add VSI (indirect 0x0210)
  * Update VSI (indirect 0x0211)
  * Get VSI (indirect 0x0212)
@@ -384,6 +425,202 @@ struct ice_aqc_vsi_props {
 	u8 reserved[24];
 };
 
+/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+ */
+struct ice_aqc_sw_rules {
+	/* ops: add switch rules, referring the number of rules.
+	 * ops: update switch rules, referring the number of filters
+	 * ops: remove switch rules, referring the entry index.
+	 * ops: get switch rules, referring to the number of filters.
+	 */
+	__le16 num_rules_fltr_entry_index;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add/Update/Get/Remove lookup Rx/Tx command/response entry
+ * This structures describes the lookup rules and associated actions.  "index"
+ * is returned as part of a response to a successful Add command, and can be
+ * used to identify the rule for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lkup_rx_tx {
+	__le16 recipe_id;
+#define ICE_SW_RECIPE_LOGICAL_PORT_FWD		10
+	/* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */
+	__le16 src;
+	__le32 act;
+
+	/* Bit 0:1 - Action type */
+#define ICE_SINGLE_ACT_TYPE_S	0x00
+#define ICE_SINGLE_ACT_TYPE_M	(0x3 << ICE_SINGLE_ACT_TYPE_S)
+
+	/* Bit 2 - Loop back enable
+	 * Bit 3 - LAN enable
+	 */
+#define ICE_SINGLE_ACT_LB_ENABLE	BIT(2)
+#define ICE_SINGLE_ACT_LAN_ENABLE	BIT(3)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_SINGLE_ACT_VSI_FORWARDING	0x0
+
+#define ICE_SINGLE_ACT_VSI_ID_S		4
+#define ICE_SINGLE_ACT_VSI_ID_M		(0x3FF << ICE_SINGLE_ACT_VSI_ID_S)
+#define ICE_SINGLE_ACT_VSI_LIST_ID_S	4
+#define ICE_SINGLE_ACT_VSI_LIST_ID_M	(0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_SINGLE_ACT_VSI_LIST		BIT(14)
+#define ICE_SINGLE_ACT_VALID_BIT	BIT(17)
+#define ICE_SINGLE_ACT_DROP		BIT(18)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_SINGLE_ACT_TO_Q		0x1
+#define ICE_SINGLE_ACT_Q_INDEX_S	4
+#define ICE_SINGLE_ACT_Q_INDEX_M	(0x7FF << ICE_SINGLE_ACT_Q_INDEX_S)
+#define ICE_SINGLE_ACT_Q_REGION_S	15
+#define ICE_SINGLE_ACT_Q_REGION_M	(0x7 << ICE_SINGLE_ACT_Q_REGION_S)
+#define ICE_SINGLE_ACT_Q_PRIORITY	BIT(18)
+
+	/* Action type = 2 - Prune */
+#define ICE_SINGLE_ACT_PRUNE		0x2
+#define ICE_SINGLE_ACT_EGRESS		BIT(15)
+#define ICE_SINGLE_ACT_INGRESS		BIT(16)
+#define ICE_SINGLE_ACT_PRUNET		BIT(17)
+	/* Bit 18 should be set to 0 for this action */
+
+	/* Action type = 2 - Pointer */
+#define ICE_SINGLE_ACT_PTR		0x2
+#define ICE_SINGLE_ACT_PTR_VAL_S	4
+#define ICE_SINGLE_ACT_PTR_VAL_M	(0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S)
+	/* Bit 18 should be set to 1 */
+#define ICE_SINGLE_ACT_PTR_BIT		BIT(18)
+
+	/* Action type = 3 - Other actions. Last two bits
+	 * are other action identifier
+	 */
+#define ICE_SINGLE_ACT_OTHER_ACTS		0x3
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S	17
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M	\
+				(0x3 << \ ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
+
+	/* Bit 17:18 - Defines other actions */
+	/* Other action = 0 - Mirror VSI */
+#define ICE_SINGLE_OTHER_ACT_MIRROR		0
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S	4
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M	\
+				(0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S)
+
+	/* Other action = 3 - Set Stat count */
+#define ICE_SINGLE_OTHER_ACT_STAT_COUNT		3
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S	4
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M	\
+				(0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S)
+
+	__le16 index; /* The index of the rule in the lookup table */
+	/* Length and values of the header to be matched per recipe or
+	 * lookup-type
+	 */
+	__le16 hdr_len;
+	u8 hdr[1];
+} __packed;
+
+/* Add/Update/Remove large action command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the action for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lg_act {
+	__le16 index; /* Index in large action table */
+	__le16 size;
+	__le32 act[1]; /* array of size for actions */
+	/* Max number of large actions */
+#define ICE_MAX_LG_ACT	4
+	/* Bit 0:1 - Action type */
+#define ICE_LG_ACT_TYPE_S	0
+#define ICE_LG_ACT_TYPE_M	(0x7 << ICE_LG_ACT_TYPE_S)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_LG_ACT_VSI_FORWARDING	0
+#define ICE_LG_ACT_VSI_ID_S		3
+#define ICE_LG_ACT_VSI_ID_M		(0x3FF << ICE_LG_ACT_VSI_ID_S)
+#define ICE_LG_ACT_VSI_LIST_ID_S	3
+#define ICE_LG_ACT_VSI_LIST_ID_M	(0x3FF << ICE_LG_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_LG_ACT_VSI_LIST		BIT(13)
+
+#define ICE_LG_ACT_VALID_BIT		BIT(16)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_LG_ACT_TO_Q			0x1
+#define ICE_LG_ACT_Q_INDEX_S		3
+#define ICE_LG_ACT_Q_INDEX_M		(0x7FF << ICE_LG_ACT_Q_INDEX_S)
+#define ICE_LG_ACT_Q_REGION_S		14
+#define ICE_LG_ACT_Q_REGION_M		(0x7 << ICE_LG_ACT_Q_REGION_S)
+#define ICE_LG_ACT_Q_PRIORITY_SET	BIT(17)
+
+	/* Action type = 2 - Prune */
+#define ICE_LG_ACT_PRUNE		0x2
+#define ICE_LG_ACT_EGRESS		BIT(14)
+#define ICE_LG_ACT_INGRESS		BIT(15)
+#define ICE_LG_ACT_PRUNET		BIT(16)
+
+	/* Action type = 3 - Mirror VSI */
+#define ICE_LG_OTHER_ACT_MIRROR		0x3
+#define ICE_LG_ACT_MIRROR_VSI_ID_S	3
+#define ICE_LG_ACT_MIRROR_VSI_ID_M	(0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S)
+
+	/* Action type = 5 - Large Action */
+#define ICE_LG_ACT_GENERIC		0x5
+#define ICE_LG_ACT_GENERIC_VALUE_S	3
+#define ICE_LG_ACT_GENERIC_VALUE_M	(0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S)
+#define ICE_LG_ACT_GENERIC_OFFSET_S	19
+#define ICE_LG_ACT_GENERIC_OFFSET_M	(0x7 << ICE_LG_ACT_GENERIC_OFFSET_S)
+#define ICE_LG_ACT_GENERIC_PRIORITY_S	22
+#define ICE_LG_ACT_GENERIC_PRIORITY_M	(0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S)
+
+	/* Action = 7 - Set Stat count */
+#define ICE_LG_ACT_STAT_COUNT		0x7
+#define ICE_LG_ACT_STAT_COUNT_S		3
+#define ICE_LG_ACT_STAT_COUNT_M		(0x7F << ICE_LG_ACT_STAT_COUNT_S)
+};
+
+/* Add/Update/Remove VSI list command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the VSI list for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_vsi_list {
+	__le16 index; /* Index of VSI/Prune list */
+	__le16 number_vsi;
+	__le16 vsi[1]; /* Array of number_vsi VSI numbers */
+};
+
+/* Query VSI list command/response entry */
+struct ice_sw_rule_vsi_list_query {
+	__le16 index;
+	DECLARE_BITMAP(vsi_list, ICE_MAX_VSI);
+} __packed;
+
+/* Add switch rule response:
+ * Content of return buffer is same as the input buffer. The status field and
+ * LUT index are updated as part of the response
+ */
+struct ice_aqc_sw_rules_elem {
+	__le16 type; /* Switch rule type, one of T_... */
+#define ICE_AQC_SW_RULES_T_LKUP_RX		0x0
+#define ICE_AQC_SW_RULES_T_LKUP_TX		0x1
+#define ICE_AQC_SW_RULES_T_LG_ACT		0x2
+#define ICE_AQC_SW_RULES_T_VSI_LIST_SET		0x3
+#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR	0x4
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET	0x5
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR	0x6
+	__le16 status;
+	union {
+		struct ice_sw_rule_lkup_rx_tx lkup_tx_rx;
+		struct ice_sw_rule_lg_act lg_act;
+		struct ice_sw_rule_vsi_list vsi_list;
+		struct ice_sw_rule_vsi_list_query vsi_list_query;
+	} __packed pdata;
+};
+
 /* Get Default Topology (indirect 0x0400) */
 struct ice_aqc_get_topo {
 	u8 port_num;
@@ -766,11 +1003,13 @@ struct ice_aq_desc {
 		struct ice_aqc_list_caps get_cap;
 		struct ice_aqc_get_phy_caps get_phy;
 		struct ice_aqc_get_sw_cfg get_sw_conf;
+		struct ice_aqc_sw_rules sw_rules;
 		struct ice_aqc_get_topo get_topo;
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
 		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
+		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
 		struct ice_aqc_get_link_status get_link_status;
 	} params;
 };
@@ -821,10 +1060,20 @@ enum ice_adminq_opc {
 	/* internal switch commands */
 	ice_aqc_opc_get_sw_cfg				= 0x0200,
 
+	/* Alloc/Free/Get Resources */
+	ice_aqc_opc_alloc_res				= 0x0208,
+	ice_aqc_opc_free_res				= 0x0209,
+
 	/* VSI commands */
 	ice_aqc_opc_add_vsi				= 0x0210,
 	ice_aqc_opc_update_vsi				= 0x0211,
 	ice_aqc_opc_free_vsi				= 0x0213,
+
+	/* switch rules population commands */
+	ice_aqc_opc_add_sw_rules			= 0x02A0,
+	ice_aqc_opc_update_sw_rules			= 0x02A1,
+	ice_aqc_opc_remove_sw_rules			= 0x02A2,
+
 	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
 
 	/* transmit scheduler commands */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index a4ce8a87fb0d..67301fe75482 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -258,6 +258,66 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	return status;
 }
 
+/**
+ * ice_init_fltr_mgmt_struct - initializes filter management list and locks
+ * @hw: pointer to the hw struct
+ */
+static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw;
+
+	hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw),
+				       sizeof(*hw->switch_info), GFP_KERNEL);
+	sw = hw->switch_info;
+
+	if (!sw)
+		return ICE_ERR_NO_MEMORY;
+
+	INIT_LIST_HEAD(&sw->vsi_list_map_head);
+
+	mutex_init(&sw->mac_list_lock);
+	INIT_LIST_HEAD(&sw->mac_list_head);
+
+	mutex_init(&sw->vlan_list_lock);
+	INIT_LIST_HEAD(&sw->vlan_list_head);
+
+	mutex_init(&sw->eth_m_list_lock);
+	INIT_LIST_HEAD(&sw->eth_m_list_head);
+
+	mutex_init(&sw->promisc_list_lock);
+	INIT_LIST_HEAD(&sw->promisc_list_head);
+
+	mutex_init(&sw->mac_vlan_list_lock);
+	INIT_LIST_HEAD(&sw->mac_vlan_list_head);
+
+	return 0;
+}
+
+/**
+ * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
+ * @hw: pointer to the hw struct
+ */
+static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_pos_map;
+	struct ice_vsi_list_map_info *v_tmp_map;
+
+	list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head,
+				 list_entry) {
+		list_del(&v_pos_map->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), v_pos_map);
+	}
+
+	mutex_destroy(&sw->mac_list_lock);
+	mutex_destroy(&sw->vlan_list_lock);
+	mutex_destroy(&sw->eth_m_list_lock);
+	mutex_destroy(&sw->promisc_list_lock);
+	mutex_destroy(&sw->mac_vlan_list_lock);
+
+	devm_kfree(ice_hw_to_dev(hw), sw);
+}
+
 /**
  * ice_init_hw - main hardware initialization routine
  * @hw: pointer to the hardware structure
@@ -321,6 +381,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	if (status)
 		goto err_unroll_alloc;
 
+	hw->evb_veb = true;
+
 	/* Query the allocated resources for tx scheduler */
 	status = ice_sched_query_res_alloc(hw);
 	if (status) {
@@ -352,21 +414,27 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	if (status)
 		goto err_unroll_sched;
 
+	status = ice_init_fltr_mgmt_struct(hw);
+	if (status)
+		goto err_unroll_sched;
+
 	/* Get port MAC information */
 	mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp);
 	mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL);
 
 	if (!mac_buf)
-		goto err_unroll_sched;
+		goto err_unroll_fltr_mgmt_struct;
 
 	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
 	devm_kfree(ice_hw_to_dev(hw), mac_buf);
 
 	if (status)
-		goto err_unroll_sched;
+		goto err_unroll_fltr_mgmt_struct;
 
 	return 0;
 
+err_unroll_fltr_mgmt_struct:
+	ice_cleanup_fltr_mgmt_struct(hw);
 err_unroll_sched:
 	ice_sched_cleanup_all(hw);
 err_unroll_alloc:
@@ -389,6 +457,8 @@ void ice_deinit_hw(struct ice_hw *hw)
 		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
 		hw->port_info = NULL;
 	}
+
+	ice_cleanup_fltr_mgmt_struct(hw);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 04e004ba2067..3b4a2691ddac 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -162,6 +162,57 @@ static int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
 	return count;
 }
 
+/**
+ * ice_add_mac_to_list - Add a mac address filter entry to the list
+ * @vsi: the VSI to be forwarded to
+ * @add_list: pointer to the list which contains MAC filter entries
+ * @macaddr: the MAC address to be added.
+ *
+ * Adds mac address filter entry to the temp list
+ *
+ * Returns 0 on success or ENOMEM on failure.
+ */
+static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
+			       const u8 *macaddr)
+{
+	struct ice_fltr_list_entry *tmp;
+	struct ice_pf *pf = vsi->back;
+
+	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info.flag = ICE_FLTR_TX;
+	tmp->fltr_info.src = vsi->vsi_num;
+	tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
+	ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr);
+
+	INIT_LIST_HEAD(&tmp->list_entry);
+	list_add(&tmp->list_entry, add_list);
+
+	return 0;
+}
+
+/**
+ * ice_free_fltr_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_add_mac_to_list
+ */
+static void ice_free_fltr_list(struct device *dev, struct list_head *h)
+{
+	struct ice_fltr_list_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, h, list_entry) {
+		list_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
 /**
  * __ice_clean_ctrlq - helper function to clean controlq rings
  * @pf: ptr to struct ice_pf
@@ -1519,6 +1570,8 @@ err_get_qs:
  */
 static int ice_setup_pf_sw(struct ice_pf *pf)
 {
+	LIST_HEAD(tmp_add_list);
+	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
 	int status = 0;
 
@@ -1528,7 +1581,37 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 		goto error_exit;
 	}
 
+	/* tmp_add_list contains a list of MAC addresses for which MAC
+	 * filters need to be programmed. Add the VSI's unicast MAC to
+	 * this list
+	 */
+	status = ice_add_mac_to_list(vsi, &tmp_add_list,
+				     vsi->port_info->mac.perm_addr);
+	if (status)
+		goto error_exit;
+
+	/* VSI needs to receive broadcast traffic, so add the broadcast
+	 * MAC address to the list.
+	 */
+	eth_broadcast_addr(broadcast);
+	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
+	if (status)
+		goto error_exit;
+
+	/* program MAC filters for entries in tmp_add_list */
+	status = ice_add_mac(&pf->hw, &tmp_add_list);
+	if (status) {
+		dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
+		status = -ENOMEM;
+		goto error_exit;
+	}
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return status;
+
 error_exit:
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+
 	if (vsi) {
 		ice_vsi_free_q_vectors(vsi);
 		if (vsi->netdev && vsi->netdev->reg_state == NETREG_REGISTERED)
@@ -1537,6 +1620,7 @@ error_exit:
 			free_netdev(vsi->netdev);
 			vsi->netdev = NULL;
 		}
+
 		ice_vsi_delete(vsi);
 		ice_vsi_put_qs(vsi);
 		pf->q_left_tx += vsi->alloc_txq;
@@ -1869,6 +1953,13 @@ static int ice_probe(struct pci_dev *pdev,
 			"probe failed due to setup pf switch:%d\n", err);
 		goto err_alloc_sw_unroll;
 	}
+
+	/* Driver is mostly up */
+	clear_bit(__ICE_DOWN, pf->state);
+
+	/* since everything is good, start the service timer */
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
 	return 0;
 
 err_alloc_sw_unroll:
@@ -2012,6 +2103,7 @@ static int ice_vsi_release(struct ice_vsi *vsi)
 	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
 	pf->num_avail_msix += vsi->num_q_vectors;
 
+	ice_remove_vsi_fltr(&pf->hw, vsi->vsi_num);
 	ice_vsi_delete(vsi);
 	ice_vsi_free_q_vectors(vsi);
 	ice_vsi_clear_rings(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 144712db1ad2..365dfb86dcb9 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -7,6 +7,7 @@
 /* Error Codes */
 enum ice_status {
 	ICE_ERR_PARAM				= -1,
+	ICE_ERR_NOT_IMPL			= -2,
 	ICE_ERR_NOT_READY			= -3,
 	ICE_ERR_INVAL_SIZE			= -6,
 	ICE_ERR_DEVICE_NOT_SUPPORTED		= -8,
@@ -15,6 +16,8 @@ enum ice_status {
 	ICE_ERR_NO_MEMORY			= -11,
 	ICE_ERR_CFG				= -12,
 	ICE_ERR_OUT_OF_RANGE			= -13,
+	ICE_ERR_ALREADY_EXISTS			= -14,
+	ICE_ERR_DOES_NOT_EXIST			= -15,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index b438cee9521e..3944dac6cfc6 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -3,6 +3,88 @@
 
 #include "ice_switch.h"
 
+#define ICE_ETH_DA_OFFSET		0
+#define ICE_ETH_ETHTYPE_OFFSET		12
+#define ICE_ETH_VLAN_TCI_OFFSET		14
+#define ICE_MAX_VLAN_ID			0xFFF
+
+/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem
+ * struct to configure any switch filter rules.
+ * {DA (6 bytes), SA(6 bytes),
+ * Ether type (2 bytes for header without VLAN tag) OR
+ * VLAN tag (4 bytes for header with VLAN tag) }
+ *
+ * Word on Hardcoded values
+ * byte 0 = 0x2: to identify it as locally administered DA MAC
+ * byte 6 = 0x2: to identify it as locally administered SA MAC
+ * byte 12 = 0x81 & byte 13 = 0x00:
+ *	In case of VLAN filter first two bytes defines ether type (0x8100)
+ *	and remaining two bytes are placeholder for programming a given VLAN id
+ *	In case of Ether type filter it is treated as header without VLAN tag
+ *	and byte 12 and 13 is used to program a given Ether type instead
+ */
+#define DUMMY_ETH_HDR_LEN		16
+static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
+							0x2, 0, 0, 0, 0, 0,
+							0x81, 0, 0, 0};
+
+#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_lkup_rx_tx) + DUMMY_ETH_HDR_LEN - 1)
+#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_lkup_rx_tx) - 1)
+#define ICE_SW_RULE_LG_ACT_SIZE(n) \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_lg_act) - \
+	 sizeof(((struct ice_sw_rule_lg_act *)0)->act) + \
+	 ((n) * sizeof(((struct ice_sw_rule_lg_act *)0)->act)))
+#define ICE_SW_RULE_VSI_LIST_SIZE(n) \
+	(sizeof(struct ice_aqc_sw_rules_elem) - \
+	 sizeof(((struct ice_aqc_sw_rules_elem *)0)->pdata) + \
+	 sizeof(struct ice_sw_rule_vsi_list) - \
+	 sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi) + \
+	 ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi)))
+
+/**
+ * ice_aq_alloc_free_res - command to allocate/free resources
+ * @hw: pointer to the hw struct
+ * @num_entries: number of resource entries in buffer
+ * @buf: Indirect buffer to hold data parameters and response
+ * @buf_size: size of buffer for indirect commands
+ * @opc: pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Helper function to allocate/free resources using the admin queue commands
+ */
+static enum ice_status
+ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries,
+		      struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+		      enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_alloc_free_res_cmd *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.sw_res_ctrl;
+
+	if (!buf)
+		return ICE_ERR_PARAM;
+
+	if (buf_size < (num_entries * sizeof(buf->elem[0])))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_entries = cpu_to_le16(num_entries);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
 /**
  * ice_aq_get_sw_cfg - get switch configuration
  * @hw: pointer to the hardware structure
@@ -165,6 +247,93 @@ ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
 	return status;
 }
 
+/**
+ * ice_aq_alloc_free_vsi_list
+ * @hw: pointer to the hw struct
+ * @vsi_list_id: VSI list id returned or used for lookup
+ * @lkup_type: switch rule filter lookup type
+ * @opc: switch rules population command type - pass in the command opcode
+ *
+ * allocates or free a VSI list resource
+ */
+static enum ice_status
+ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id,
+			   enum ice_sw_lkup_type lkup_type,
+			   enum ice_adminq_opc opc)
+{
+	struct ice_aqc_alloc_free_res_elem *sw_buf;
+	struct ice_aqc_res_elem *vsi_ele;
+	enum ice_status status;
+	u16 buf_len;
+
+	buf_len = sizeof(*sw_buf);
+	sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL);
+	if (!sw_buf)
+		return ICE_ERR_NO_MEMORY;
+	sw_buf->num_elems = cpu_to_le16(1);
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN) {
+		sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP);
+	} else if (lkup_type == ICE_SW_LKUP_VLAN) {
+		sw_buf->res_type =
+			cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE);
+	} else {
+		status = ICE_ERR_PARAM;
+		goto ice_aq_alloc_free_vsi_list_exit;
+	}
+
+	if (opc == ice_aqc_opc_free_res)
+		sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id);
+
+	status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, opc, NULL);
+	if (status)
+		goto ice_aq_alloc_free_vsi_list_exit;
+
+	if (opc == ice_aqc_opc_alloc_res) {
+		vsi_ele = &sw_buf->elem[0];
+		*vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp);
+	}
+
+ice_aq_alloc_free_vsi_list_exit:
+	devm_kfree(ice_hw_to_dev(hw), sw_buf);
+	return status;
+}
+
+/**
+ * ice_aq_sw_rules - add/update/remove switch rules
+ * @hw: pointer to the hw struct
+ * @rule_list: pointer to switch rule population list
+ * @rule_list_sz: total size of the rule list in bytes
+ * @num_rules: number of switch rules in the rule_list
+ * @opc: switch rules population command type - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware
+ */
+static enum ice_status
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+		u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	if (opc != ice_aqc_opc_add_sw_rules &&
+	    opc != ice_aqc_opc_update_sw_rules &&
+	    opc != ice_aqc_opc_remove_sw_rules)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	desc.params.sw_rules.num_rules_fltr_entry_index =
+		cpu_to_le16(num_rules);
+	return ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd);
+}
+
 /* ice_init_port_info - Initialize port_info with switch configuration data
  * @pi: pointer to port_info
  * @vsi_port_num: VSI number or port number
@@ -257,3 +426,1212 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
 	devm_kfree(ice_hw_to_dev(hw), (void *)rbuf);
 	return status;
 }
+
+/**
+ * ice_fill_sw_info - Helper function to populate lb_en and lan_en
+ * @hw: pointer to the hardware structure
+ * @f_info: filter info structure to fill/update
+ *
+ * This helper function populates the lb_en and lan_en elements of the provided
+ * ice_fltr_info struct using the switch's type and characteristics of the
+ * switch rule being configured.
+ */
+static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *f_info)
+{
+	f_info->lb_en = false;
+	f_info->lan_en = false;
+	if ((f_info->flag & ICE_FLTR_TX) &&
+	    (f_info->fltr_act == ICE_FWD_TO_VSI ||
+	     f_info->fltr_act == ICE_FWD_TO_VSI_LIST ||
+	     f_info->fltr_act == ICE_FWD_TO_Q ||
+	     f_info->fltr_act == ICE_FWD_TO_QGRP)) {
+		f_info->lb_en = true;
+		if (!(hw->evb_veb && f_info->lkup_type == ICE_SW_LKUP_MAC &&
+		      is_unicast_ether_addr(f_info->l_data.mac.mac_addr)))
+			f_info->lan_en = true;
+	}
+}
+
+/**
+ * ice_fill_sw_rule - Helper function to fill switch rule structure
+ * @hw: pointer to the hardware structure
+ * @f_info: entry containing packet forwarding information
+ * @s_rule: switch rule structure to be filled in based on mac_entry
+ * @opc: switch rules population command type - pass in the command opcode
+ */
+static void
+ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
+		 struct ice_aqc_sw_rules_elem *s_rule, enum ice_adminq_opc opc)
+{
+	u16 vlan_id = ICE_MAX_VLAN_ID + 1;
+	u8 eth_hdr[DUMMY_ETH_HDR_LEN];
+	void *daddr = NULL;
+	u32 act = 0;
+	__be16 *off;
+
+	if (opc == ice_aqc_opc_remove_sw_rules) {
+		s_rule->pdata.lkup_tx_rx.act = 0;
+		s_rule->pdata.lkup_tx_rx.index =
+			cpu_to_le16(f_info->fltr_rule_id);
+		s_rule->pdata.lkup_tx_rx.hdr_len = 0;
+		return;
+	}
+
+	/* initialize the ether header with a dummy header */
+	memcpy(eth_hdr, dummy_eth_header, sizeof(dummy_eth_header));
+	ice_fill_sw_info(hw, f_info);
+
+	switch (f_info->fltr_act) {
+	case ICE_FWD_TO_VSI:
+		act |= (f_info->fwd_id.vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
+			ICE_SINGLE_ACT_VSI_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_VSI_LIST:
+		act |= ICE_SINGLE_ACT_VSI_LIST;
+		act |= (f_info->fwd_id.vsi_list_id <<
+			ICE_SINGLE_ACT_VSI_LIST_ID_S) &
+			ICE_SINGLE_ACT_VSI_LIST_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_Q:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		break;
+	case ICE_FWD_TO_QGRP:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->qgrp_size << ICE_SINGLE_ACT_Q_REGION_S) &
+			ICE_SINGLE_ACT_Q_REGION_M;
+		break;
+	case ICE_DROP_PACKET:
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP;
+		break;
+	default:
+		return;
+	}
+
+	if (f_info->lb_en)
+		act |= ICE_SINGLE_ACT_LB_ENABLE;
+	if (f_info->lan_en)
+		act |= ICE_SINGLE_ACT_LAN_ENABLE;
+
+	switch (f_info->lkup_type) {
+	case ICE_SW_LKUP_MAC:
+		daddr = f_info->l_data.mac.mac_addr;
+		break;
+	case ICE_SW_LKUP_VLAN:
+		vlan_id = f_info->l_data.vlan.vlan_id;
+		if (f_info->fltr_act == ICE_FWD_TO_VSI ||
+		    f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+			act |= ICE_SINGLE_ACT_PRUNE;
+			act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS;
+		}
+		break;
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+		daddr = f_info->l_data.ethertype_mac.mac_addr;
+		/* fall-through */
+	case ICE_SW_LKUP_ETHERTYPE:
+		off = (__be16 *)&eth_hdr[ICE_ETH_ETHTYPE_OFFSET];
+		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		break;
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		/* fall-through */
+	case ICE_SW_LKUP_PROMISC:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		break;
+	default:
+		break;
+	}
+
+	s_rule->type = (f_info->flag & ICE_FLTR_RX) ?
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) :
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+
+	/* Recipe set depending on lookup type */
+	s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(f_info->lkup_type);
+	s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(f_info->src);
+	s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act);
+
+	if (daddr)
+		ether_addr_copy(&eth_hdr[ICE_ETH_DA_OFFSET], daddr);
+
+	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
+		off = (__be16 *)&eth_hdr[ICE_ETH_VLAN_TCI_OFFSET];
+		*off = cpu_to_be16(vlan_id);
+	}
+
+	/* Create the switch rule with the final dummy Ethernet header */
+	if (opc != ice_aqc_opc_update_sw_rules)
+		s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(sizeof(eth_hdr));
+
+	memcpy(s_rule->pdata.lkup_tx_rx.hdr, eth_hdr, sizeof(eth_hdr));
+}
+
+/**
+ * ice_add_marker_act
+ * @hw: pointer to the hardware structure
+ * @m_ent: the management entry for which sw marker needs to be added
+ * @sw_marker: sw marker to tag the Rx descriptor with
+ * @l_id: large action resource id
+ *
+ * Create a large action to hold software marker and update the switch rule
+ * entry pointed by m_ent with newly created large action
+ */
+static enum ice_status
+ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent,
+		   u16 sw_marker, u16 l_id)
+{
+	struct ice_aqc_sw_rules_elem *lg_act, *rx_tx;
+	/* For software marker we need 3 large actions
+	 * 1. FWD action: FWD TO VSI or VSI LIST
+	 * 2. GENERIC VALUE action to hold the profile id
+	 * 3. GENERIC VALUE action to hold the software marker id
+	 */
+	const u16 num_lg_acts = 3;
+	enum ice_status status;
+	u16 lg_act_size;
+	u16 rules_size;
+	u16 vsi_info;
+	u32 act;
+
+	if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+		return ICE_ERR_PARAM;
+
+	/* Create two back-to-back switch rules and submit them to the HW using
+	 * one memory buffer:
+	 *    1. Large Action
+	 *    2. Look up tx rx
+	 */
+	lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(num_lg_acts);
+	rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
+	lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL);
+	if (!lg_act)
+		return ICE_ERR_NO_MEMORY;
+
+	rx_tx = (struct ice_aqc_sw_rules_elem *)((u8 *)lg_act + lg_act_size);
+
+	/* Fill in the first switch rule i.e. large action */
+	lg_act->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT);
+	lg_act->pdata.lg_act.index = cpu_to_le16(l_id);
+	lg_act->pdata.lg_act.size = cpu_to_le16(num_lg_acts);
+
+	/* First action VSI forwarding or VSI list forwarding depending on how
+	 * many VSIs
+	 */
+	vsi_info = (m_ent->vsi_count > 1) ?
+		m_ent->fltr_info.fwd_id.vsi_list_id :
+		m_ent->fltr_info.fwd_id.vsi_id;
+
+	act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT;
+	act |= (vsi_info << ICE_LG_ACT_VSI_LIST_ID_S) &
+		ICE_LG_ACT_VSI_LIST_ID_M;
+	if (m_ent->vsi_count > 1)
+		act |= ICE_LG_ACT_VSI_LIST;
+	lg_act->pdata.lg_act.act[0] = cpu_to_le32(act);
+
+	/* Second action descriptor type */
+	act = ICE_LG_ACT_GENERIC;
+
+	act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+	lg_act->pdata.lg_act.act[1] = cpu_to_le32(act);
+
+	act = (7 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+
+	/* Third action Marker value */
+	act |= ICE_LG_ACT_GENERIC;
+	act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) &
+		ICE_LG_ACT_GENERIC_VALUE_M;
+
+	act |= (0 << ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+	lg_act->pdata.lg_act.act[2] = cpu_to_le32(act);
+
+	/* call the fill switch rule to fill the lookup tx rx structure */
+	ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
+			 ice_aqc_opc_update_sw_rules);
+
+	/* Update the action to point to the large action id */
+	rx_tx->pdata.lkup_tx_rx.act =
+		cpu_to_le32(ICE_SINGLE_ACT_PTR |
+			    ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
+			     ICE_SINGLE_ACT_PTR_VAL_M));
+
+	/* Use the filter rule id of the previously created rule with single
+	 * act. Once the update happens, hardware will treat this as large
+	 * action
+	 */
+	rx_tx->pdata.lkup_tx_rx.index =
+		cpu_to_le16(m_ent->fltr_info.fltr_rule_id);
+
+	status = ice_aq_sw_rules(hw, lg_act, rules_size, 2,
+				 ice_aqc_opc_update_sw_rules, NULL);
+	if (!status) {
+		m_ent->lg_act_idx = l_id;
+		m_ent->sw_marker_id = sw_marker;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), lg_act);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_map
+ * @hw: pointer to the hardware structure
+ * @vsi_array: array of VSIs to form a VSI list
+ * @num_vsi: num VSI in the array
+ * @vsi_list_id: VSI list id generated as part of allocate resource
+ *
+ * Helper function to create a new entry of VSI list id to VSI mapping
+ * using the given VSI list id
+ */
+static struct ice_vsi_list_map_info *
+ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+			u16 vsi_list_id)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_map;
+	int i;
+
+	v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL);
+	if (!v_map)
+		return NULL;
+
+	v_map->vsi_list_id = vsi_list_id;
+
+	for (i = 0; i < num_vsi; i++)
+		set_bit(vsi_array[i], v_map->vsi_map);
+
+	list_add(&v_map->list_entry, &sw->vsi_list_map_head);
+	return v_map;
+}
+
+/**
+ * ice_update_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_array: array of VSIs to form a VSI list
+ * @num_vsi: num VSI in the array
+ * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @remove: Boolean value to indicate if this is a remove action
+ * @opc: switch rules population command type - pass in the command opcode
+ * @lkup_type: lookup type of the filter
+ *
+ * Call AQ command to add a new switch rule or update existing switch rule
+ * using the given VSI list id
+ */
+static enum ice_status
+ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+			 u16 vsi_list_id, bool remove, enum ice_adminq_opc opc,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+	u16 type;
+	int i;
+
+	if (!num_vsi)
+		return ICE_ERR_PARAM;
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN)
+		type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR :
+				ICE_AQC_SW_RULES_T_VSI_LIST_SET;
+	else if (lkup_type == ICE_SW_LKUP_VLAN)
+		type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR :
+				ICE_AQC_SW_RULES_T_PRUNE_LIST_SET;
+	else
+		return ICE_ERR_PARAM;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(num_vsi);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < num_vsi; i++)
+		s_rule->pdata.vsi_list.vsi[i] = cpu_to_le16(vsi_array[i]);
+
+	s_rule->type = cpu_to_le16(type);
+	s_rule->pdata.vsi_list.number_vsi = cpu_to_le16(num_vsi);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_rule - Creates and populates a VSI list rule
+ * @hw: pointer to the hw struct
+ * @vsi_array: array of VSIs to form a VSI list
+ * @num_vsi: number of VSIs in the array
+ * @vsi_list_id: stores the ID of the VSI list to be created
+ * @lkup_type: switch rule filter's lookup type
+ */
+static enum ice_status
+ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_array, u16 num_vsi,
+			 u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type)
+{
+	enum ice_status status;
+	int i;
+
+	for (i = 0; i < num_vsi; i++)
+		if (vsi_array[i] >= ICE_MAX_VSI)
+			return ICE_ERR_OUT_OF_RANGE;
+
+	status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type,
+					    ice_aqc_opc_alloc_res);
+	if (status)
+		return status;
+
+	/* Update the newly created VSI list to include the specified VSIs */
+	return ice_update_vsi_list_rule(hw, vsi_array, num_vsi, *vsi_list_id,
+					false, ice_aqc_opc_add_sw_rules,
+					lkup_type);
+}
+
+/**
+ * ice_create_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: entry containing packet forwarding information
+ *
+ * Create switch rule with given filter information and add an entry
+ * to the corresponding filter management list to track this switch rule
+ * and VSI mapping
+ */
+static enum ice_status
+ice_create_pkt_fwd_rule(struct ice_hw *hw,
+			struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_sw_lkup_type l_type;
+	enum ice_status status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+	fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry),
+				GFP_KERNEL);
+	if (!fm_entry) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	fm_entry->fltr_info = f_entry->fltr_info;
+
+	/* Initialize all the fields for the management entry */
+	fm_entry->vsi_count = 1;
+	fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX;
+	fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID;
+	fm_entry->counter_index = ICE_INVAL_COUNTER_ID;
+
+	ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule,
+			 ice_aqc_opc_add_sw_rules);
+
+	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+				 ice_aqc_opc_add_sw_rules, NULL);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	f_entry->fltr_info.fltr_rule_id =
+		le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+	fm_entry->fltr_info.fltr_rule_id =
+		le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+
+	/* The book keeping entries will get removed when base driver
+	 * calls remove filter AQ command
+	 */
+	l_type = fm_entry->fltr_info.lkup_type;
+	if (l_type == ICE_SW_LKUP_MAC) {
+		mutex_lock(&sw->mac_list_lock);
+		list_add(&fm_entry->list_entry, &sw->mac_list_head);
+		mutex_unlock(&sw->mac_list_lock);
+	} else if (l_type == ICE_SW_LKUP_VLAN) {
+		mutex_lock(&sw->vlan_list_lock);
+		list_add(&fm_entry->list_entry, &sw->vlan_list_head);
+		mutex_unlock(&sw->vlan_list_lock);
+	} else if (l_type == ICE_SW_LKUP_ETHERTYPE ||
+		   l_type == ICE_SW_LKUP_ETHERTYPE_MAC) {
+		mutex_lock(&sw->eth_m_list_lock);
+		list_add(&fm_entry->list_entry, &sw->eth_m_list_head);
+		mutex_unlock(&sw->eth_m_list_lock);
+	} else if (l_type == ICE_SW_LKUP_PROMISC ||
+		   l_type == ICE_SW_LKUP_PROMISC_VLAN) {
+		mutex_lock(&sw->promisc_list_lock);
+		list_add(&fm_entry->list_entry, &sw->promisc_list_head);
+		mutex_unlock(&sw->promisc_list_lock);
+	} else if (fm_entry->fltr_info.lkup_type == ICE_SW_LKUP_MAC_VLAN) {
+		mutex_lock(&sw->mac_vlan_list_lock);
+		list_add(&fm_entry->list_entry, &sw->mac_vlan_list_head);
+		mutex_unlock(&sw->mac_vlan_list_lock);
+	} else {
+		status = ICE_ERR_NOT_IMPL;
+	}
+ice_create_pkt_fwd_rule_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_update_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @rule_id: rule of previously created switch rule to update
+ * @vsi_list_id: VSI list id to be updated with
+ * @f_info: ice_fltr_info to pull other information for switch rule
+ *
+ * Call AQ command to update a previously created switch rule with a
+ * VSI list id
+ */
+static enum ice_status
+ice_update_pkt_fwd_rule(struct ice_hw *hw, u16 rule_id, u16 vsi_list_id,
+			struct ice_fltr_info f_info)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	struct ice_fltr_info tmp_fltr;
+	enum ice_status status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	tmp_fltr = f_info;
+	tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+	tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+
+	ice_fill_sw_rule(hw, &tmp_fltr, s_rule,
+			 ice_aqc_opc_update_sw_rules);
+
+	s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id);
+
+	/* Update switch rule with new rule set to forward VSI list */
+	status = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+				 ice_aqc_opc_update_sw_rules, NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_handle_vsi_list_mgmt
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the booking keeping is described below :
+ * When a VSI needs to subscribe to a given filter( MAC/VLAN/Ethtype etc.)
+ *	if only one VSI has been added till now
+ *		Allocate a new VSI list and add two VSIs
+ *		to this list using switch rule command
+ *		Update the previously created switch rule with the
+ *		newly created VSI list id
+ *	if a VSI list was previously created
+ *		Add the new VSI to the previously created VSI list set
+ *		using the update switch rule command
+ */
+static enum ice_status
+ice_handle_vsi_list_mgmt(struct ice_hw *hw,
+			 struct ice_fltr_mgmt_list_entry *m_entry,
+			 struct ice_fltr_info *cur_fltr,
+			 struct ice_fltr_info *new_fltr)
+{
+	enum ice_status status = 0;
+	u16 vsi_list_id = 0;
+
+	if ((cur_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_QGRP))
+		return ICE_ERR_NOT_IMPL;
+
+	if ((new_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     new_fltr->fltr_act == ICE_FWD_TO_QGRP) &&
+	    (cur_fltr->fltr_act == ICE_FWD_TO_VSI ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST))
+		return ICE_ERR_NOT_IMPL;
+
+	if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+		/* Only one entry existed in the mapping and it was not already
+		 * a part of a VSI list. So, create a VSI list with the old and
+		 * new VSIs.
+		 */
+		u16 vsi_id_arr[2];
+		u16 fltr_rule;
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_fltr->fwd_id.vsi_id == new_fltr->fwd_id.vsi_id)
+			return ICE_ERR_ALREADY_EXISTS;
+
+		vsi_id_arr[0] = cur_fltr->fwd_id.vsi_id;
+		vsi_id_arr[1] = new_fltr->fwd_id.vsi_id;
+		status = ice_create_vsi_list_rule(hw, &vsi_id_arr[0], 2,
+						  &vsi_list_id,
+						  new_fltr->lkup_type);
+		if (status)
+			return status;
+
+		fltr_rule = cur_fltr->fltr_rule_id;
+		/* Update the previous switch rule of "MAC forward to VSI" to
+		 * "MAC fwd to VSI list"
+		 */
+		status = ice_update_pkt_fwd_rule(hw, fltr_rule, vsi_list_id,
+						 *new_fltr);
+		if (status)
+			return status;
+
+		cur_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+		m_entry->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_id_arr[0], 2,
+						vsi_list_id);
+
+		/* If this entry was large action then the large action needs
+		 * to be updated to point to FWD to VSI list
+		 */
+		if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID)
+			status =
+			    ice_add_marker_act(hw, m_entry,
+					       m_entry->sw_marker_id,
+					       m_entry->lg_act_idx);
+	} else {
+		u16 vsi_id = new_fltr->fwd_id.vsi_id;
+		enum ice_adminq_opc opcode;
+
+		/* A rule already exists with the new VSI being added */
+		if (test_bit(vsi_id, m_entry->vsi_list_info->vsi_map))
+			return 0;
+
+		/* Update the previously created VSI list set with
+		 * the new VSI id passed in
+		 */
+		vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
+		opcode = ice_aqc_opc_update_sw_rules;
+
+		status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id,
+						  false, opcode,
+						  new_fltr->lkup_type);
+		/* update VSI list mapping info with new VSI id */
+		if (!status)
+			set_bit(vsi_id, m_entry->vsi_list_info->vsi_map);
+	}
+	if (!status)
+		m_entry->vsi_count++;
+	return status;
+}
+
+/**
+ * ice_find_mac_entry
+ * @hw: pointer to the hardware structure
+ * @mac_addr: MAC address to search for
+ *
+ * Helper function to search for a MAC entry using a given MAC address
+ * Returns pointer to the entry if found.
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_mac_entry(struct ice_hw *hw, u8 *mac_addr)
+{
+	struct ice_fltr_mgmt_list_entry *m_list_itr, *mac_ret = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+
+	mutex_lock(&sw->mac_list_lock);
+	list_for_each_entry(m_list_itr, &sw->mac_list_head, list_entry) {
+		u8 *buf = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+
+		if (ether_addr_equal(buf, mac_addr)) {
+			mac_ret = m_list_itr;
+			break;
+		}
+	}
+	mutex_unlock(&sw->mac_list_lock);
+	return mac_ret;
+}
+
+/**
+ * ice_add_shared_mac - Add one MAC shared filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: structure containing MAC forwarding information
+ *
+ * Adds or updates the book keeping list for the MAC addresses
+ */
+static enum ice_status
+ice_add_shared_mac(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_fltr_mgmt_list_entry *m_entry;
+
+	new_fltr = &f_entry->fltr_info;
+
+	m_entry = ice_find_mac_entry(hw, &new_fltr->l_data.mac.mac_addr[0]);
+	if (!m_entry)
+		return ice_create_pkt_fwd_rule(hw, f_entry);
+
+	cur_fltr = &m_entry->fltr_info;
+
+	return ice_handle_vsi_list_mgmt(hw, m_entry, cur_fltr, new_fltr);
+}
+
+/**
+ * ice_add_mac - Add a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * IMPORTANT: When the ucast_shared flag is set to false and m_list has
+ * multiple unicast addresses, the function assumes that all the
+ * addresses are unique in a given add_mac call. It doesn't
+ * check for duplicates in this case, removing duplicates from a given
+ * list should be taken care of in the caller of this function.
+ */
+enum ice_status
+ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
+	struct ice_fltr_list_entry *m_list_itr;
+	u16 elem_sent, total_elem_left;
+	enum ice_status status = 0;
+	u16 num_unicast = 0;
+	u16 s_rule_size;
+
+	if (!m_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+
+		if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+			return ICE_ERR_PARAM;
+		if (is_zero_ether_addr(add))
+			return ICE_ERR_PARAM;
+		if (is_unicast_ether_addr(add) && !hw->ucast_shared) {
+			/* Don't overwrite the unicast address */
+			if (ice_find_mac_entry(hw, add))
+				return ICE_ERR_ALREADY_EXISTS;
+			num_unicast++;
+		} else if (is_multicast_ether_addr(add) ||
+			   (is_unicast_ether_addr(add) && hw->ucast_shared)) {
+			status = ice_add_shared_mac(hw, m_list_itr);
+			if (status) {
+				m_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
+				return status;
+			}
+			m_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+		}
+	}
+
+	/* Exit if no suitable entries were found for adding bulk switch rule */
+	if (!num_unicast)
+		return 0;
+
+	/* Allocate switch rule buffer for the bulk update for unicast */
+	s_rule_size = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE;
+	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
+			      GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
+		u8 *addr = &f_info->l_data.mac.mac_addr[0];
+
+		if (is_unicast_ether_addr(addr)) {
+			ice_fill_sw_rule(hw, &m_list_itr->fltr_info,
+					 r_iter, ice_aqc_opc_add_sw_rules);
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+	/* Call AQ bulk switch rule update for all unicast addresses */
+	r_iter = s_rule;
+	/* Call AQ switch rule in AQ_MAX chunk */
+	for (total_elem_left = num_unicast; total_elem_left > 0;
+	     total_elem_left -= elem_sent) {
+		struct ice_aqc_sw_rules_elem *entry = r_iter;
+
+		elem_sent = min(total_elem_left,
+				(u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size));
+		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
+					 elem_sent, ice_aqc_opc_add_sw_rules,
+					 NULL);
+		if (status)
+			goto ice_add_mac_exit;
+		r_iter = (struct ice_aqc_sw_rules_elem *)
+			((u8 *)r_iter + (elem_sent * s_rule_size));
+	}
+
+	/* Fill up rule id based on the value returned from FW */
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		struct ice_fltr_info *f_info = &m_list_itr->fltr_info;
+		u8 *addr = &f_info->l_data.mac.mac_addr[0];
+		struct ice_switch_info *sw = hw->switch_info;
+		struct ice_fltr_mgmt_list_entry *fm_entry;
+
+		if (is_unicast_ether_addr(addr)) {
+			f_info->fltr_rule_id =
+				le16_to_cpu(r_iter->pdata.lkup_tx_rx.index);
+			f_info->fltr_act = ICE_FWD_TO_VSI;
+			/* Create an entry to track this MAC address */
+			fm_entry = devm_kzalloc(ice_hw_to_dev(hw),
+						sizeof(*fm_entry), GFP_KERNEL);
+			if (!fm_entry) {
+				status = ICE_ERR_NO_MEMORY;
+				goto ice_add_mac_exit;
+			}
+			fm_entry->fltr_info = *f_info;
+			fm_entry->vsi_count = 1;
+			/* The book keeping entries will get removed when
+			 * base driver calls remove filter AQ command
+			 */
+			mutex_lock(&sw->mac_list_lock);
+			list_add(&fm_entry->list_entry, &sw->mac_list_head);
+			mutex_unlock(&sw->mac_list_lock);
+
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+ice_add_mac_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_remove_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_list_id: VSI list id generated as part of allocate resource
+ * @lkup_type: switch rule filter lookup type
+ */
+static enum ice_status
+ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	enum ice_status status;
+	u16 s_rule_size;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(0);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
+	s_rule->pdata.vsi_list.index = cpu_to_le16(vsi_list_id);
+	/* FW expects number of VSIs in vsi_list resource to be 0 for clear
+	 * command. Since memory is zero'ed out during initialization, it's not
+	 * necessary to explicitly initialize the variable to 0.
+	 */
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1,
+				 ice_aqc_opc_remove_sw_rules, NULL);
+	if (!status)
+		/* Free the vsi_list resource that we allocated */
+		status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
+						    ice_aqc_opc_free_res);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_handle_rem_vsi_list_mgmt
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of the VSI to remove
+ * @fm_list_itr: filter management entry for which the VSI list management
+ * needs to be done
+ */
+static enum ice_status
+ice_handle_rem_vsi_list_mgmt(struct ice_hw *hw, u16 vsi_id,
+			     struct ice_fltr_mgmt_list_entry *fm_list_itr)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	enum ice_status status = 0;
+	enum ice_sw_lkup_type lkup_type;
+	bool is_last_elem = true;
+	bool conv_list = false;
+	bool del_list = false;
+	u16 vsi_list_id;
+
+	lkup_type = fm_list_itr->fltr_info.lkup_type;
+	vsi_list_id = fm_list_itr->fltr_info.fwd_id.vsi_list_id;
+
+	if (fm_list_itr->vsi_count > 1) {
+		status = ice_update_vsi_list_rule(hw, &vsi_id, 1, vsi_list_id,
+						  true,
+						  ice_aqc_opc_update_sw_rules,
+						  lkup_type);
+		if (status)
+			return status;
+		fm_list_itr->vsi_count--;
+		is_last_elem = false;
+		clear_bit(vsi_id, fm_list_itr->vsi_list_info->vsi_map);
+	}
+
+	/* For non-VLAN rules that forward packets to a VSI list, convert them
+	 * to forwarding packets to a VSI if there is only one VSI left in the
+	 * list.  Unused lists are then removed.
+	 * VLAN rules need to use VSI lists even with only one VSI.
+	 */
+	if (fm_list_itr->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST) {
+		if (lkup_type == ICE_SW_LKUP_VLAN) {
+			del_list = is_last_elem;
+		} else if (fm_list_itr->vsi_count == 1) {
+			conv_list = true;
+			del_list = true;
+		}
+	}
+
+	if (del_list) {
+		/* Remove the VSI list since it is no longer used */
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list_itr->vsi_list_info;
+
+		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+		if (status)
+			return status;
+
+		if (conv_list) {
+			u16 rem_vsi_id;
+
+			rem_vsi_id = find_first_bit(vsi_list_info->vsi_map,
+						    ICE_MAX_VSI);
+
+			/* Error out when the expected last element is not in
+			 * the VSI list map
+			 */
+			if (rem_vsi_id == ICE_MAX_VSI)
+				return ICE_ERR_OUT_OF_RANGE;
+
+			/* Change the list entry action from VSI_LIST to VSI */
+			fm_list_itr->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+			fm_list_itr->fltr_info.fwd_id.vsi_id = rem_vsi_id;
+		}
+
+		list_del(&vsi_list_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+		fm_list_itr->vsi_list_info = NULL;
+	}
+
+	if (conv_list) {
+		/* Convert the rule's forward action to forwarding packets to
+		 * a VSI
+		 */
+		struct ice_aqc_sw_rules_elem *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE,
+				      GFP_KERNEL);
+		if (!s_rule)
+			return ICE_ERR_NO_MEMORY;
+
+		ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule,
+				 ice_aqc_opc_update_sw_rules);
+
+		s_rule->pdata.lkup_tx_rx.index =
+			cpu_to_le16(fm_list_itr->fltr_info.fltr_rule_id);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_ETH_HDR_SIZE, 1,
+					 ice_aqc_opc_update_sw_rules, NULL);
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+		if (status)
+			return status;
+	}
+
+	if (is_last_elem) {
+		/* Remove the lookup rule */
+		struct ice_aqc_sw_rules_elem *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_NO_HDR_SIZE,
+				      GFP_KERNEL);
+		if (!s_rule)
+			return ICE_ERR_NO_MEMORY;
+
+		ice_fill_sw_rule(hw, &fm_list_itr->fltr_info, s_rule,
+				 ice_aqc_opc_remove_sw_rules);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1,
+					 ice_aqc_opc_remove_sw_rules, NULL);
+		if (status)
+			return status;
+
+		/* Remove a book keeping entry from the MAC address list */
+		mutex_lock(&sw->mac_list_lock);
+		list_del(&fm_list_itr->list_entry);
+		mutex_unlock(&sw->mac_list_lock);
+		devm_kfree(ice_hw_to_dev(hw), fm_list_itr);
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+	}
+	return status;
+}
+
+/**
+ * ice_remove_mac_entry
+ * @hw: pointer to the hardware structure
+ * @f_entry: structure containing MAC forwarding information
+ */
+static enum ice_status
+ice_remove_mac_entry(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_mgmt_list_entry *m_entry;
+	u16 vsi_id;
+	u8 *add;
+
+	add = &f_entry->fltr_info.l_data.mac.mac_addr[0];
+
+	m_entry = ice_find_mac_entry(hw, add);
+	if (!m_entry)
+		return ICE_ERR_PARAM;
+
+	vsi_id = f_entry->fltr_info.fwd_id.vsi_id;
+	return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, m_entry);
+}
+
+/**
+ * ice_remove_mac - remove a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * This function removes either a MAC filter rule or a specific VSI from a
+ * VSI list for a multicast MAC address.
+ *
+ * Returns ICE_ERR_DOES_NOT_EXIST if a given entry was not added by
+ * ice_add_mac. Caller should be aware that this call will only work if all
+ * the entries passed into m_list were added previously. It will not attempt to
+ * do a partial remove of entries that were found.
+ */
+enum ice_status
+ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
+	u8 s_rule_size = ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *m_entry;
+	struct ice_fltr_list_entry *m_list_itr;
+	u16 elem_sent, total_elem_left;
+	enum ice_status status = 0;
+	u16 num_unicast = 0;
+
+	if (!m_list)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+
+		if (is_unicast_ether_addr(addr) && !hw->ucast_shared)
+			num_unicast++;
+		else if (is_multicast_ether_addr(addr) ||
+			 (is_unicast_ether_addr(addr) && hw->ucast_shared))
+			ice_remove_mac_entry(hw, m_list_itr);
+	}
+
+	/* Exit if no unicast addresses found. Multicast switch rules
+	 * were added individually
+	 */
+	if (!num_unicast)
+		return 0;
+
+	/* Allocate switch rule buffer for the bulk update for unicast */
+	s_rule = devm_kcalloc(ice_hw_to_dev(hw), num_unicast, s_rule_size,
+			      GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	r_iter = s_rule;
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+
+		if (is_unicast_ether_addr(addr)) {
+			m_entry = ice_find_mac_entry(hw, addr);
+			if (!m_entry) {
+				status = ICE_ERR_DOES_NOT_EXIST;
+				goto ice_remove_mac_exit;
+			}
+
+			ice_fill_sw_rule(hw, &m_entry->fltr_info,
+					 r_iter, ice_aqc_opc_remove_sw_rules);
+			r_iter = (struct ice_aqc_sw_rules_elem *)
+				((u8 *)r_iter + s_rule_size);
+		}
+	}
+
+	/* Call AQ bulk switch rule update for all unicast addresses */
+	r_iter = s_rule;
+	/* Call AQ switch rule in AQ_MAX chunk */
+	for (total_elem_left = num_unicast; total_elem_left > 0;
+	     total_elem_left -= elem_sent) {
+		struct ice_aqc_sw_rules_elem *entry = r_iter;
+
+		elem_sent = min(total_elem_left,
+				(u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size));
+		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
+					 elem_sent, ice_aqc_opc_remove_sw_rules,
+					 NULL);
+		if (status)
+			break;
+		r_iter = (struct ice_aqc_sw_rules_elem *)
+			((u8 *)r_iter + s_rule_size);
+	}
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *addr = m_list_itr->fltr_info.l_data.mac.mac_addr;
+
+		if (is_unicast_ether_addr(addr)) {
+			m_entry = ice_find_mac_entry(hw, addr);
+			if (!m_entry)
+				return ICE_ERR_OUT_OF_RANGE;
+			mutex_lock(&sw->mac_list_lock);
+			list_del(&m_entry->list_entry);
+			mutex_unlock(&sw->mac_list_lock);
+			devm_kfree(ice_hw_to_dev(hw), m_entry);
+		}
+	}
+
+ice_remove_mac_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_add_to_vsi_fltr_list - Add VSI filters to the list
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of VSI to remove filters from
+ * @lkup_list_head: pointer to the list that has certain lookup type filters
+ * @vsi_list_head: pointer to the list pertaining to VSI with vsi_id
+ */
+static enum ice_status
+ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_id,
+			 struct list_head *lkup_list_head,
+			 struct list_head *vsi_list_head)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+
+	/* check to make sure VSI id is valid and within boundary */
+	if (vsi_id >=
+	    (sizeof(fm_entry->vsi_list_info->vsi_map) * BITS_PER_BYTE - 1))
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
+		struct ice_fltr_info *fi;
+
+		fi = &fm_entry->fltr_info;
+		if ((fi->fltr_act == ICE_FWD_TO_VSI &&
+		     fi->fwd_id.vsi_id == vsi_id) ||
+		    (fi->fltr_act == ICE_FWD_TO_VSI_LIST &&
+		     (test_bit(vsi_id, fm_entry->vsi_list_info->vsi_map)))) {
+			struct ice_fltr_list_entry *tmp;
+
+			/* this memory is freed up in the caller function
+			 * ice_remove_vsi_lkup_fltr() once filters for
+			 * this VSI are removed
+			 */
+			tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp),
+					   GFP_KERNEL);
+			if (!tmp)
+				return ICE_ERR_NO_MEMORY;
+
+			memcpy(&tmp->fltr_info, fi, sizeof(*fi));
+
+			/* Expected below fields to be set to ICE_FWD_TO_VSI and
+			 * the particular VSI id since we are only removing this
+			 * one VSI
+			 */
+			if (fi->fltr_act == ICE_FWD_TO_VSI_LIST) {
+				tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+				tmp->fltr_info.fwd_id.vsi_id = vsi_id;
+			}
+
+			list_add(&tmp->list_entry, vsi_list_head);
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of VSI to remove filters from
+ * @lkup: switch rule filter lookup type
+ */
+static void
+ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id,
+			 enum ice_sw_lkup_type lkup)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry;
+	struct list_head remove_list_head;
+	struct ice_fltr_list_entry *tmp;
+	enum ice_status status;
+
+	INIT_LIST_HEAD(&remove_list_head);
+	switch (lkup) {
+	case ICE_SW_LKUP_MAC:
+		mutex_lock(&sw->mac_list_lock);
+		status = ice_add_to_vsi_fltr_list(hw, vsi_id,
+						  &sw->mac_list_head,
+						  &remove_list_head);
+		mutex_unlock(&sw->mac_list_lock);
+		if (!status) {
+			ice_remove_mac(hw, &remove_list_head);
+			goto free_fltr_list;
+		}
+		break;
+	case ICE_SW_LKUP_VLAN:
+	case ICE_SW_LKUP_MAC_VLAN:
+	case ICE_SW_LKUP_ETHERTYPE:
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+	case ICE_SW_LKUP_PROMISC:
+	case ICE_SW_LKUP_PROMISC_VLAN:
+	case ICE_SW_LKUP_DFLT:
+		ice_debug(hw, ICE_DBG_SW,
+			  "Remove filters for this lookup type hasn't been implemented yet\n");
+		break;
+	}
+
+	return;
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+}
+
+/**
+ * ice_remove_vsi_fltr - Remove all filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_id: ID of VSI to remove filters from
+ */
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id)
+{
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_MAC_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_DFLT);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_ETHERTYPE_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_id, ICE_SW_LKUP_PROMISC_VLAN);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index bb8612d8e07b..1b312e7ab2b5 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -19,6 +19,122 @@ struct ice_vsi_ctx {
 	bool alloc_from_pool;
 };
 
+enum ice_sw_fwd_act_type {
+	ICE_FWD_TO_VSI = 0,
+	ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
+	ICE_FWD_TO_Q,
+	ICE_FWD_TO_QGRP,
+	ICE_DROP_PACKET,
+	ICE_INVAL_ACT
+};
+
+/* Switch recipe ID enum values are specific to hardware */
+enum ice_sw_lkup_type {
+	ICE_SW_LKUP_ETHERTYPE = 0,
+	ICE_SW_LKUP_MAC = 1,
+	ICE_SW_LKUP_MAC_VLAN = 2,
+	ICE_SW_LKUP_PROMISC = 3,
+	ICE_SW_LKUP_VLAN = 4,
+	ICE_SW_LKUP_DFLT = 5,
+	ICE_SW_LKUP_ETHERTYPE_MAC = 8,
+	ICE_SW_LKUP_PROMISC_VLAN = 9,
+};
+
+struct ice_fltr_info {
+	/* Look up information: how to look up packet */
+	enum ice_sw_lkup_type lkup_type;
+	/* Forward action: filter action to do after lookup */
+	enum ice_sw_fwd_act_type fltr_act;
+	/* rule ID returned by firmware once filter rule is created */
+	u16 fltr_rule_id;
+	u16 flag;
+#define ICE_FLTR_RX		BIT(0)
+#define ICE_FLTR_TX		BIT(1)
+#define ICE_FLTR_TX_RX		(ICE_FLTR_RX | ICE_FLTR_TX)
+
+	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+	u16 src;
+
+	union {
+		struct {
+			u8 mac_addr[ETH_ALEN];
+		} mac;
+		struct {
+			u8 mac_addr[ETH_ALEN];
+			u16 vlan_id;
+		} mac_vlan;
+		struct {
+			u16 vlan_id;
+		} vlan;
+		/* Set lkup_type as ICE_SW_LKUP_ETHERTYPE
+		 * if just using ethertype as filter. Set lkup_type as
+		 * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be
+		 * passed in as filter.
+		 */
+		struct {
+			u16 ethertype;
+			u8 mac_addr[ETH_ALEN]; /* optional */
+		} ethertype_mac;
+	} l_data;
+
+	/* Depending on filter action */
+	union {
+		/* queue id in case of ICE_FWD_TO_Q and starting
+		 * queue id in case of ICE_FWD_TO_QGRP.
+		 */
+		u16 q_id:11;
+		u16 vsi_id:10;
+		u16 vsi_list_id:10;
+	} fwd_id;
+
+	/* Set to num_queues if action is ICE_FWD_TO_QGRP. This field
+	 * determines the range of queues the packet needs to be forwarded to
+	 */
+	u8 qgrp_size;
+
+	/* Rule creations populate these indicators basing on the switch type */
+	bool lb_en;	/* Indicate if packet can be looped back */
+	bool lan_en;	/* Indicate if packet can be forwarded to the uplink */
+};
+
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list id */
+struct ice_vsi_list_map_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
+	u16 vsi_list_id;
+};
+
+enum ice_sw_fltr_status {
+	ICE_FLTR_STATUS_NEW = 0,
+	ICE_FLTR_STATUS_FW_SUCCESS,
+	ICE_FLTR_STATUS_FW_FAIL,
+};
+
+struct ice_fltr_list_entry {
+	struct list_head list_entry;
+	enum ice_sw_fltr_status status;
+	struct ice_fltr_info fltr_info;
+};
+
+/* This defines an entry in the list that maintains MAC or VLAN membership
+ * to HW list mapping, since multiple VSIs can subscribe to the same MAC or
+ * VLAN. As an optimization the VSI list should be created only when a
+ * second VSI becomes a subscriber to the VLAN address.
+ */
+struct ice_fltr_mgmt_list_entry {
+	/* back pointer to VSI list id to VSI list mapping */
+	struct ice_vsi_list_map_info *vsi_list_info;
+	u16 vsi_count;
+#define ICE_INVAL_LG_ACT_INDEX 0xffff
+	u16 lg_act_idx;
+#define ICE_INVAL_SW_MARKER_ID 0xffff
+	u16 sw_marker_id;
+	struct list_head list_entry;
+	struct ice_fltr_info fltr_info;
+#define ICE_INVAL_COUNTER_ID 0xff
+	u8 counter_index;
+};
+
 /* VSI related commands */
 enum ice_status
 ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
@@ -32,4 +148,8 @@ ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
 
 enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
 
+/* Switch/bridge related commands */
+enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
+enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id);
 #endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 5f58fd84d558..8926715b76ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -223,6 +223,22 @@ struct ice_port_info {
 	bool is_vf;
 };
 
+struct ice_switch_info {
+	/* Switch VSI lists to MAC/VLAN translation */
+	struct mutex mac_list_lock;		/* protect MAC list */
+	struct list_head mac_list_head;
+	struct mutex vlan_list_lock;		/* protect VLAN list */
+	struct list_head vlan_list_head;
+	struct mutex eth_m_list_lock;	/* protect ethtype list */
+	struct list_head eth_m_list_head;
+	struct mutex promisc_list_lock;	/* protect promisc mode list */
+	struct list_head promisc_list_head;
+	struct mutex mac_vlan_list_lock;	/* protect MAC-VLAN list */
+	struct list_head mac_vlan_list_head;
+
+	struct list_head vsi_list_map_head;
+};
+
 /* Port hardware description */
 struct ice_hw {
 	u8 __iomem *hw_addr;
@@ -248,11 +264,14 @@ struct ice_hw {
 	u8 max_cgds;
 	u8 sw_entry_point_layer;
 
+	bool evb_veb;		/* true for VEB, false for VEPA */
 	struct ice_bus_info bus;
 	struct ice_nvm_info nvm;
 	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
 	struct ice_hw_func_caps func_caps;	/* function capabilities */
 
+	struct ice_switch_info *switch_info;	/* switch filter lists */
+
 	/* Control Queue info */
 	struct ice_ctl_q_info adminq;
 
@@ -276,6 +295,8 @@ struct ice_hw {
 	u8 itr_gran_100;
 	u8 itr_gran_50;
 	u8 itr_gran_25;
+	bool ucast_shared;	/* true if VSIs can share unicast addr */
+
 };
 
 /* Checksum and Shadow RAM pointers */

From cdedef59deb020e78721d820a5692100128c8c73 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:13 -0700
Subject: [PATCH 1259/1678] ice: Configure VSIs for Tx/Rx

This patch configures the VSIs to be able to send and receive
packets by doing the following:

1) Initialize flexible parser to extract and include certain
   fields in the Rx descriptor.

2) Add Tx queues by programming the Tx queue context (implemented in
   ice_vsi_cfg_txqs). Note that adding the queues also enables (starts)
   the queues.

3) Add Rx queues by programming Rx queue context (implemented in
   ice_vsi_cfg_rxqs). Note that this only adds queues but doesn't start
   them. The rings will be started by calling ice_vsi_start_rx_rings on
   interface up.

4) Configure interrupts for VSI queues.

5) Implement ice_open and ice_stop.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile       |    3 +-
 drivers/net/ethernet/intel/ice/ice.h          |   36 +-
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |   86 ++
 drivers/net/ethernet/intel/ice/ice_common.c   |  602 +++++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |   13 +
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |   59 +
 .../net/ethernet/intel/ice/ice_lan_tx_rx.h    |  246 ++++
 drivers/net/ethernet/intel/ice/ice_main.c     | 1140 ++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_sched.c    |  105 ++
 drivers/net/ethernet/intel/ice/ice_sched.h    |    5 +
 drivers/net/ethernet/intel/ice/ice_status.h   |    2 +
 drivers/net/ethernet/intel/ice/ice_txrx.c     |  361 ++++++
 drivers/net/ethernet/intel/ice/ice_txrx.h     |   75 ++
 drivers/net/ethernet/intel/ice/ice_type.h     |    2 +
 14 files changed, 2729 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
 create mode 100644 drivers/net/ethernet/intel/ice/ice_txrx.c

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index bd2cbe14e76e..de82fc875775 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -12,4 +12,5 @@ ice-y := ice_main.o	\
 	 ice_common.o	\
 	 ice_nvm.o	\
 	 ice_switch.o	\
-	 ice_sched.o
+	 ice_sched.o	\
+	 ice_txrx.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 21d0c237ee3f..46cf82212ed8 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -11,8 +11,10 @@
 #include <linux/netdevice.h>
 #include <linux/compiler.h>
 #include <linux/etherdevice.h>
+#include <linux/skbuff.h>
 #include <linux/cpumask.h>
 #include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
 #include <linux/pci.h>
 #include <linux/workqueue.h>
 #include <linux/aer.h>
@@ -43,6 +45,8 @@
 #define ICE_VSI_MAP_SCATTER	1
 #define ICE_MAX_SCATTER_TXQS	16
 #define ICE_MAX_SCATTER_RXQS	16
+#define ICE_Q_WAIT_RETRY_LIMIT	10
+#define ICE_Q_WAIT_MAX_RETRY	(5 * ICE_Q_WAIT_RETRY_LIMIT)
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
@@ -56,6 +60,14 @@
 		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
 		  ICE_AQ_VSI_UP_TABLE_UP##i##_M)
 
+#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
+
+#define ice_for_each_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
+
+#define ice_for_each_rxq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
+
 struct ice_tc_info {
 	u16 qoffset;
 	u16 qcount;
@@ -96,6 +108,9 @@ struct ice_vsi {
 	struct ice_ring **rx_rings;	 /* rx ring array */
 	struct ice_ring **tx_rings;	 /* tx ring array */
 	struct ice_q_vector **q_vectors; /* q_vector array */
+
+	irqreturn_t (*irq_handler)(int irq, void *data);
+
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
 	int num_q_vectors;
 	int base_vector;
@@ -106,8 +121,14 @@ struct ice_vsi {
 	/* Interrupt thresholds */
 	u16 work_lmt;
 
+	u16 max_frame;
+	u16 rx_buf_len;
+
 	struct ice_aqc_vsi_props info;	 /* VSI properties */
 
+	bool irqs_ready;
+	bool current_isup;		 /* Sync 'link up' logging */
+
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
 	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -128,9 +149,11 @@ struct ice_q_vector {
 	struct napi_struct napi;
 	struct ice_ring_container rx;
 	struct ice_ring_container tx;
+	struct irq_affinity_notify affinity_notify;
 	u16 v_idx;			/* index in the vsi->q_vector array. */
 	u8 num_ring_tx;			/* total number of tx rings in vector */
 	u8 num_ring_rx;			/* total number of rx rings in vector */
+	char name[ICE_INT_NAME_STR_LEN];
 } ____cacheline_internodealigned_in_smp;
 
 enum ice_pf_flags {
@@ -178,10 +201,14 @@ struct ice_netdev_priv {
 /**
  * ice_irq_dynamic_ena - Enable default interrupt generation settings
  * @hw: pointer to hw struct
+ * @vsi: pointer to vsi struct, can be NULL
+ * @q_vector: pointer to q_vector, can be NULL
  */
-static inline void ice_irq_dynamic_ena(struct ice_hw *hw)
+static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
+				       struct ice_q_vector *q_vector)
 {
-	u32 vector = ((struct ice_pf *)hw->back)->oicr_idx;
+	u32 vector = (vsi && q_vector) ? vsi->base_vector + q_vector->v_idx :
+					((struct ice_pf *)hw->back)->oicr_idx;
 	int itr = ICE_ITR_NONE;
 	u32 val;
 
@@ -190,7 +217,10 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw)
 	 */
 	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
 	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
-
+	if (vsi)
+		if (test_bit(__ICE_DOWN, vsi->state))
+			return;
 	wr32(hw, GLINT_DYN_CTL(vector), val);
 }
+
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 9237841439da..c7abcc1dbbcc 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -968,6 +968,87 @@ struct ice_aqc_nvm {
 	__le32	addr_low;
 };
 
+/* Add TX LAN Queues (indirect 0x0C30) */
+struct ice_aqc_add_txqs {
+	u8 num_qgrps;
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the descriptor of each queue entry for the Add TX LAN Queues
+ * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
+ */
+struct ice_aqc_add_txqs_perq {
+	__le16 txq_id;
+	u8 rsvd[2];
+	__le32 q_teid;
+	u8 txq_ctx[22];
+	u8 rsvd2[2];
+	struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add TX LAN Queues (0x0C30)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_tx_qgrp is variable due
+ * to the variable number of queues in each group!
+ */
+struct ice_aqc_add_tx_qgrp {
+	__le32 parent_teid;
+	u8 num_txqs;
+	u8 rsvd[3];
+	struct ice_aqc_add_txqs_perq txqs[1];
+};
+
+/* Disable TX LAN Queues (indirect 0x0C31) */
+struct ice_aqc_dis_txqs {
+	u8 cmd_type;
+#define ICE_AQC_Q_DIS_CMD_S		0
+#define ICE_AQC_Q_DIS_CMD_M		(0x3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET	(0 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VM_RESET	BIT(ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VF_RESET	(2 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_PF_RESET	(3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL	BIT(2)
+#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE	BIT(3)
+	u8 num_entries;
+	__le16 vmvf_and_timeout;
+#define ICE_AQC_Q_DIS_VMVF_NUM_S	0
+#define ICE_AQC_Q_DIS_VMVF_NUM_M	(0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S)
+#define ICE_AQC_Q_DIS_TIMEOUT_S		10
+#define ICE_AQC_Q_DIS_TIMEOUT_M		(0x3F << ICE_AQC_Q_DIS_TIMEOUT_S)
+	__le32 blocked_cgds;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* The buffer for Disable TX LAN Queues (indirect 0x0C31)
+ * contains the following structures, arrayed one after the
+ * other.
+ * Note: Since the q_id is 16 bits wide, if the
+ * number of queues is even, then 2 bytes of alignment MUST be
+ * added before the start of the next group, to allow correct
+ * alignment of the parent_teid field.
+ */
+struct ice_aqc_dis_txq_item {
+	__le32 parent_teid;
+	u8 num_qs;
+	u8 rsvd;
+	/* The length of the q_id array varies according to num_qs */
+	__le16 q_id[1];
+	/* This only applies from F8 onward */
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S		15
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q	\
+			(0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET	\
+			(1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+};
+
+struct ice_aqc_dis_txq {
+	struct ice_aqc_dis_txq_item qgrps[1];
+};
+
 /**
  * struct ice_aq_desc - Admin Queue (AQ) descriptor
  * @flags: ICE_AQ_FLAG_* flags
@@ -1008,6 +1089,8 @@ struct ice_aq_desc {
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_add_txqs add_txqs;
+		struct ice_aqc_dis_txqs dis_txqs;
 		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
 		struct ice_aqc_get_link_status get_link_status;
@@ -1088,6 +1171,9 @@ enum ice_adminq_opc {
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
 
+	/* TX queue handling commands/events */
+	ice_aqc_opc_add_txqs				= 0x0C30,
+	ice_aqc_opc_dis_txqs				= 0x0C31,
 };
 
 #endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 67301fe75482..3d5686636656 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -7,6 +7,25 @@
 
 #define ICE_PF_RESET_WAIT_COUNT	200
 
+#define ICE_NIC_FLX_ENTRY(hw, mdid, idx) \
+	wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(ICE_RXDID_FLEX_NIC), \
+	     ((ICE_RX_OPC_MDID << \
+	       GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \
+	      GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \
+	     (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \
+	      GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M))
+
+#define ICE_NIC_FLX_FLG_ENTRY(hw, flg_0, flg_1, flg_2, flg_3, idx) \
+	wr32((hw), GLFLXP_RXDID_FLAGS(ICE_RXDID_FLEX_NIC, idx), \
+	     (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \
+	     (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \
+	     (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \
+	     (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \
+	      GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M))
+
 /**
  * ice_set_mac_type - Sets MAC type
  * @hw: pointer to the HW structure
@@ -258,6 +277,33 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	return status;
 }
 
+/**
+ * ice_init_flex_parser - initialize rx flex parser
+ * @hw: pointer to the hardware structure
+ *
+ * Function to initialize flex descriptors
+ */
+static void ice_init_flex_parser(struct ice_hw *hw)
+{
+	u8 idx = 0;
+
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_LOW, 0);
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_HASH_HIGH, 1);
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_LOWER, 2);
+	ICE_NIC_FLX_ENTRY(hw, ICE_RX_MDID_FLOW_ID_HIGH, 3);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_FRG, ICE_RXFLG_UDP_GRE,
+			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_FIN, idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_SYN, ICE_RXFLG_RST,
+			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI,
+			      ICE_RXFLG_EVLAN_x8100, ICE_RXFLG_EVLAN_x9100,
+			      idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_VLAN_x8100, ICE_RXFLG_TNL_VLAN,
+			      ICE_RXFLG_TNL_MAC, ICE_RXFLG_TNL0, idx++);
+	ICE_NIC_FLX_FLG_ENTRY(hw, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2,
+			      ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx);
+}
+
 /**
  * ice_init_fltr_mgmt_struct - initializes filter management list and locks
  * @hw: pointer to the hw struct
@@ -431,6 +477,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
 
+	ice_init_flex_parser(hw);
+
 	return 0;
 
 err_unroll_fltr_mgmt_struct:
@@ -597,6 +645,114 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
 	return ice_check_reset(hw);
 }
 
+/**
+ * ice_copy_rxq_ctx_to_hw
+ * @hw: pointer to the hardware structure
+ * @ice_rxq_ctx: pointer to the rxq context
+ * @rxq_index: the index of the rx queue
+ *
+ * Copies rxq context from dense structure to hw register space
+ */
+static enum ice_status
+ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
+{
+	u8 i;
+
+	if (!ice_rxq_ctx)
+		return ICE_ERR_BAD_PTR;
+
+	if (rxq_index > QRX_CTRL_MAX_INDEX)
+		return ICE_ERR_PARAM;
+
+	/* Copy each dword separately to hw */
+	for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
+		wr32(hw, QRX_CONTEXT(i, rxq_index),
+		     *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+
+		ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i,
+			  *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+	}
+
+	return 0;
+}
+
+/* LAN Rx Queue Context */
+static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
+	/* Field		Width	LSB */
+	ICE_CTX_STORE(ice_rlan_ctx, head,		13,	0),
+	ICE_CTX_STORE(ice_rlan_ctx, cpuid,		8,	13),
+	ICE_CTX_STORE(ice_rlan_ctx, base,		57,	32),
+	ICE_CTX_STORE(ice_rlan_ctx, qlen,		13,	89),
+	ICE_CTX_STORE(ice_rlan_ctx, dbuf,		7,	102),
+	ICE_CTX_STORE(ice_rlan_ctx, hbuf,		5,	109),
+	ICE_CTX_STORE(ice_rlan_ctx, dtype,		2,	114),
+	ICE_CTX_STORE(ice_rlan_ctx, dsize,		1,	116),
+	ICE_CTX_STORE(ice_rlan_ctx, crcstrip,		1,	117),
+	ICE_CTX_STORE(ice_rlan_ctx, l2tsel,		1,	119),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_0,		4,	120),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_1,		2,	124),
+	ICE_CTX_STORE(ice_rlan_ctx, showiv,		1,	127),
+	ICE_CTX_STORE(ice_rlan_ctx, rxmax,		14,	174),
+	ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena,	1,	193),
+	ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena,	1,	194),
+	ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena,	1,	195),
+	ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena,	1,	196),
+	ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh,		3,	198),
+	{ 0 }
+};
+
+/**
+ * ice_write_rxq_ctx
+ * @hw: pointer to the hardware structure
+ * @rlan_ctx: pointer to the rxq context
+ * @rxq_index: the index of the rx queue
+ *
+ * Converts rxq context from sparse to dense structure and then writes
+ * it to hw register space
+ */
+enum ice_status
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index)
+{
+	u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
+
+	ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
+	return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
+}
+
+/* LAN Tx Queue Context */
+const struct ice_ctx_ele ice_tlan_ctx_info[] = {
+				    /* Field			Width	LSB */
+	ICE_CTX_STORE(ice_tlan_ctx, base,			57,	0),
+	ICE_CTX_STORE(ice_tlan_ctx, port_num,			3,	57),
+	ICE_CTX_STORE(ice_tlan_ctx, cgd_num,			5,	60),
+	ICE_CTX_STORE(ice_tlan_ctx, pf_num,			3,	65),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_num,			10,	68),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,			2,	78),
+	ICE_CTX_STORE(ice_tlan_ctx, src_vsi,			10,	80),
+	ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,			1,	90),
+	ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,			1,	92),
+	ICE_CTX_STORE(ice_tlan_ctx, cpuid,			8,	93),
+	ICE_CTX_STORE(ice_tlan_ctx, wb_mode,			1,	101),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc,			1,	102),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd,			1,	103),
+	ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc,			1,	104),
+	ICE_CTX_STORE(ice_tlan_ctx, cmpq_id,			9,	105),
+	ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func,		14,	114),
+	ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode,	1,	128),
+	ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id,		6,	129),
+	ICE_CTX_STORE(ice_tlan_ctx, qlen,			13,	135),
+	ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx,		4,	148),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_ena,			1,	152),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_qnum,			11,	153),
+	ICE_CTX_STORE(ice_tlan_ctx, legacy_int,			1,	164),
+	ICE_CTX_STORE(ice_tlan_ctx, drop_ena,			1,	165),
+	ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,		2,	166),
+	ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,	3,	168),
+	ICE_CTX_STORE(ice_tlan_ctx, int_q_state,		110,	171),
+	{ 0 }
+};
+
 /**
  * ice_debug_cq
  * @hw: pointer to the hardware structure
@@ -1104,3 +1260,449 @@ void ice_clear_pxe_mode(struct ice_hw *hw)
 	if (ice_check_sq_alive(hw, &hw->adminq))
 		ice_aq_clear_pxe_mode(hw);
 }
+
+/**
+ * ice_aq_add_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: Number of added queue groups
+ * @qg_list: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx LAN queue (0x0C30)
+ *
+ * NOTE:
+ * Prior to calling add Tx LAN queue:
+ * Initialize the following as part of the Tx queue context:
+ * Completion queue ID if the queue uses Completion queue, Quanta profile,
+ * Cache profile and Packet shaper profile.
+ *
+ * After add Tx LAN queue AQ command is completed:
+ * Interrupts should be associated with specific queues,
+ * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue
+ * flow.
+ */
+static enum ice_status
+ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	u16 i, sum_header_size, sum_q_size = 0;
+	struct ice_aqc_add_tx_qgrp *list;
+	struct ice_aqc_add_txqs *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.add_txqs;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs);
+
+	if (!qg_list)
+		return ICE_ERR_PARAM;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return ICE_ERR_PARAM;
+
+	sum_header_size = num_qgrps *
+		(sizeof(*qg_list) - sizeof(*qg_list->txqs));
+
+	list = qg_list;
+	for (i = 0; i < num_qgrps; i++) {
+		struct ice_aqc_add_txqs_perq *q = list->txqs;
+
+		sum_q_size += list->num_txqs * sizeof(*q);
+		list = (struct ice_aqc_add_tx_qgrp *)(q + list->num_txqs);
+	}
+
+	if (buf_size != (sum_header_size + sum_q_size))
+		return ICE_ERR_PARAM;
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_qgrps = num_qgrps;
+
+	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_dis_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: number of groups in the list
+ * @qg_list: the list of groups to disable
+ * @buf_size: the total size of the qg_list buffer in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * Disable LAN Tx queue (0x0C31)
+ */
+static enum ice_status
+ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_dis_txq_item *qg_list, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_dis_txqs *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sz = 0;
+
+	cmd = &desc.params.dis_txqs;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs);
+
+	if (!qg_list)
+		return ICE_ERR_PARAM;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return ICE_ERR_PARAM;
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_entries = num_qgrps;
+
+	for (i = 0; i < num_qgrps; ++i) {
+		/* Calculate the size taken up by the queue IDs in this group */
+		sz += qg_list[i].num_qs * sizeof(qg_list[i].q_id);
+
+		/* Add the size of the group header */
+		sz += sizeof(qg_list[i]) - sizeof(qg_list[i].q_id);
+
+		/* If the num of queues is even, add 2 bytes of padding */
+		if ((qg_list[i].num_qs % 2) == 0)
+			sz += 2;
+	}
+
+	if (buf_size != sz)
+		return ICE_ERR_PARAM;
+
+	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/* End of FW Admin Queue command wrappers */
+
+/**
+ * ice_write_byte - write a byte to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx,
+			   const struct ice_ctx_ele *ce_info)
+{
+	u8 src_byte, dest_byte, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = (u8)(BIT(ce_info->width) - 1);
+
+	src_byte = *from;
+	src_byte &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_byte <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_byte, dest, sizeof(dest_byte));
+
+	dest_byte &= ~mask;	/* get the bits not changing */
+	dest_byte |= src_byte;	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_byte, sizeof(dest_byte));
+}
+
+/**
+ * ice_write_word - write a word to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_word(u8 *src_ctx, u8 *dest_ctx,
+			   const struct ice_ctx_ele *ce_info)
+{
+	u16 src_word, mask;
+	__le16 dest_word;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = BIT(ce_info->width) - 1;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_word = *(u16 *)from;
+	src_word &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_word <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_word, dest, sizeof(dest_word));
+
+	dest_word &= ~(cpu_to_le16(mask));	/* get the bits not changing */
+	dest_word |= cpu_to_le16(src_word);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_word, sizeof(dest_word));
+}
+
+/**
+ * ice_write_dword - write a dword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx,
+			    const struct ice_ctx_ele *ce_info)
+{
+	u32 src_dword, mask;
+	__le32 dest_dword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 32 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 5 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 32)
+		mask = BIT(ce_info->width) - 1;
+	else
+		mask = (u32)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_dword = *(u32 *)from;
+	src_dword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_dword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_dword, dest, sizeof(dest_dword));
+
+	dest_dword &= ~(cpu_to_le32(mask));	/* get the bits not changing */
+	dest_dword |= cpu_to_le32(src_dword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_dword, sizeof(dest_dword));
+}
+
+/**
+ * ice_write_qword - write a qword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void ice_write_qword(u8 *src_ctx, u8 *dest_ctx,
+			    const struct ice_ctx_ele *ce_info)
+{
+	u64 src_qword, mask;
+	__le64 dest_qword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 64 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 6 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 64)
+		mask = BIT_ULL(ce_info->width) - 1;
+	else
+		mask = (u64)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_qword = *(u64 *)from;
+	src_qword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_qword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_qword, dest, sizeof(dest_qword));
+
+	dest_qword &= ~(cpu_to_le64(mask));	/* get the bits not changing */
+	dest_qword |= cpu_to_le64(src_qword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_qword, sizeof(dest_qword));
+}
+
+/**
+ * ice_set_ctx - set context bits in packed structure
+ * @src_ctx:  pointer to a generic non-packed context structure
+ * @dest_ctx: pointer to memory for the packed structure
+ * @ce_info:  a description of the structure to be transformed
+ */
+enum ice_status
+ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	int f;
+
+	for (f = 0; ce_info[f].width; f++) {
+		/* We have to deal with each element of the FW response
+		 * using the correct size so that we are correct regardless
+		 * of the endianness of the machine.
+		 */
+		switch (ce_info[f].size_of) {
+		case sizeof(u8):
+			ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u16):
+			ice_write_word(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u32):
+			ice_write_dword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u64):
+			ice_write_qword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		default:
+			return ICE_ERR_INVAL_SIZE;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ena_vsi_txq
+ * @pi: port information structure
+ * @vsi_id: VSI id
+ * @tc: tc number
+ * @num_qgrps: Number of added queue groups
+ * @buf: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function adds one lan q
+ */
+enum ice_status
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
+		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aqc_txsched_elem_data node = { 0 };
+	struct ice_sched_node *parent;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	if (num_qgrps > 1 || buf->num_txqs > 1)
+		return ICE_ERR_MAX_LIMIT;
+
+	hw = pi->hw;
+
+	mutex_lock(&pi->sched_lock);
+
+	/* find a parent node */
+	parent = ice_sched_get_free_qparent(pi, vsi_id, tc,
+					    ICE_SCHED_NODE_OWNER_LAN);
+	if (!parent) {
+		status = ICE_ERR_PARAM;
+		goto ena_txq_exit;
+	}
+	buf->parent_teid = parent->info.node_teid;
+	node.parent_teid = parent->info.node_teid;
+	/* Mark that the values in the "generic" section as valid. The default
+	 * value in the "generic" section is zero. This means that :
+	 * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0.
+	 * - 0 priority among siblings, indicated by Bit 1-3.
+	 * - WFQ, indicated by Bit 4.
+	 * - 0 Adjustment value is used in PSM credit update flow, indicated by
+	 * Bit 5-6.
+	 * - Bit 7 is reserved.
+	 * Without setting the generic section as valid in valid_sections, the
+	 * Admin Q command will fail with error code ICE_AQ_RC_EINVAL.
+	 */
+	buf->txqs[0].info.valid_sections = ICE_AQC_ELEM_VALID_GENERIC;
+
+	/* add the lan q */
+	status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
+	if (status)
+		goto ena_txq_exit;
+
+	node.node_teid = buf->txqs[0].q_teid;
+	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+
+	/* add a leaf node into schduler tree q layer */
+	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+
+ena_txq_exit:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_dis_vsi_txq
+ * @pi: port information structure
+ * @num_queues: number of queues
+ * @q_ids: pointer to the q_id array
+ * @q_teids: pointer to queue node teids
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function removes queues and their corresponding nodes in SW DB
+ */
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
+		u32 *q_teids, struct ice_sq_cd *cd)
+{
+	enum ice_status status = ICE_ERR_DOES_NOT_EXIST;
+	struct ice_aqc_dis_txq_item qg_list;
+	u16 i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < num_queues; i++) {
+		struct ice_sched_node *node;
+
+		node = ice_sched_find_node_by_teid(pi->root, q_teids[i]);
+		if (!node)
+			continue;
+		qg_list.parent_teid = node->info.parent_teid;
+		qg_list.num_qs = 1;
+		qg_list.q_id[0] = cpu_to_le16(q_ids[i]);
+		status = ice_aq_dis_lan_txq(pi->hw, 1, &qg_list,
+					    sizeof(qg_list), cd);
+
+		if (status)
+			break;
+		ice_free_sched_node(pi, node);
+	}
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index dd4473e84ebb..8ed1135bb189 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -30,9 +30,15 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		struct ice_sq_cd *cd);
 void ice_clear_pxe_mode(struct ice_hw *hw);
 enum ice_status ice_get_caps(struct ice_hw *hw);
+enum ice_status
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index);
 bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
 enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
 void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
+extern const struct ice_ctx_ele ice_tlan_ctx_info[];
+enum ice_status
+ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info);
 enum ice_status
 ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
 		void *buf, u16 buf_size, struct ice_sq_cd *cd);
@@ -41,4 +47,11 @@ enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
 enum ice_status
 ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		     struct ice_link_status *link, struct ice_sq_cd *cd);
+enum ice_status
+ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
+		u32 *q_teids, struct ice_sq_cd *cmd_details);
+enum ice_status
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
+		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
 #endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 446a8bbef488..fc9b0b179e99 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -6,6 +6,7 @@
 #ifndef _ICE_HW_AUTOGEN_H_
 #define _ICE_HW_AUTOGEN_H_
 
+#define QTX_COMM_DBELL(_DBQM)		(0x002C0000 + ((_DBQM) * 4))
 #define PF_FW_ARQBAH			0x00080180
 #define PF_FW_ARQBAL			0x00080080
 #define PF_FW_ARQH			0x00080380
@@ -40,6 +41,44 @@
 #define PF_FW_ATQLEN_ATQENABLE_S	31
 #define PF_FW_ATQLEN_ATQENABLE_M	BIT(PF_FW_ATQLEN_ATQENABLE_S)
 #define PF_FW_ATQT			0x00080400
+
+#define GLFLXP_RXDID_FLAGS(_i, _j)		(0x0045D000 + ((_i) * 4 + (_j) * 256))
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S	0
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S	8
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S	16
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S)
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S	24
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M	ICE_M(0x3F, GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S)
+#define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_1(_i)		(0x0045c900 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_2(_i)		(0x0045ca00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S)
+#define GLFLXP_RXDID_FLX_WRD_3(_i)		(0x0045cb00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M	ICE_M(0xFF, GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S)
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M	ICE_M(0x3, GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S)
+
+#define QRXFLXP_CNTXT(_QRX)		(0x00480000 + ((_QRX) * 4))
+#define QRXFLXP_CNTXT_RXDID_IDX_S	0
+#define QRXFLXP_CNTXT_RXDID_IDX_M	ICE_M(0x3F, QRXFLXP_CNTXT_RXDID_IDX_S)
+#define QRXFLXP_CNTXT_RXDID_PRIO_S	8
+#define QRXFLXP_CNTXT_RXDID_PRIO_M	ICE_M(0x7, QRXFLXP_CNTXT_RXDID_PRIO_S)
+#define QRXFLXP_CNTXT_TS_S		11
+#define QRXFLXP_CNTXT_TS_M		BIT(QRXFLXP_CNTXT_TS_S)
 #define GLGEN_RSTAT			0x000B8188
 #define GLGEN_RSTAT_DEVSTATE_S		0
 #define GLGEN_RSTAT_DEVSTATE_M		ICE_M(0x3, GLGEN_RSTAT_DEVSTATE_S)
@@ -62,6 +101,8 @@
 #define GLINT_DYN_CTL_INTENA_M		BIT(GLINT_DYN_CTL_INTENA_S)
 #define GLINT_DYN_CTL_CLEARPBA_S	1
 #define GLINT_DYN_CTL_CLEARPBA_M	BIT(GLINT_DYN_CTL_CLEARPBA_S)
+#define GLINT_DYN_CTL_SWINT_TRIG_S	2
+#define GLINT_DYN_CTL_SWINT_TRIG_M	BIT(GLINT_DYN_CTL_SWINT_TRIG_S)
 #define GLINT_DYN_CTL_ITR_INDX_S	3
 #define GLINT_DYN_CTL_SW_ITR_INDX_S	25
 #define GLINT_DYN_CTL_SW_ITR_INDX_M	ICE_M(0x3, GLINT_DYN_CTL_SW_ITR_INDX_S)
@@ -106,7 +147,25 @@
 #define PFINT_OICR_CTL_CAUSE_ENA_S	30
 #define PFINT_OICR_CTL_CAUSE_ENA_M	BIT(PFINT_OICR_CTL_CAUSE_ENA_S)
 #define PFINT_OICR_ENA			0x0016C900
+#define QINT_RQCTL(_QRX)		(0x00150000 + ((_QRX) * 4))
+#define QINT_RQCTL_MSIX_INDX_S		0
+#define QINT_RQCTL_ITR_INDX_S		11
+#define QINT_RQCTL_CAUSE_ENA_S		30
+#define QINT_RQCTL_CAUSE_ENA_M		BIT(QINT_RQCTL_CAUSE_ENA_S)
+#define QINT_TQCTL(_DBQM)		(0x00140000 + ((_DBQM) * 4))
+#define QINT_TQCTL_MSIX_INDX_S		0
+#define QINT_TQCTL_ITR_INDX_S		11
+#define QINT_TQCTL_CAUSE_ENA_S		30
+#define QINT_TQCTL_CAUSE_ENA_M		BIT(QINT_TQCTL_CAUSE_ENA_S)
 #define GLLAN_RCTL_0			0x002941F8
+#define QRX_CONTEXT(_i, _QRX)		(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
+#define QRX_CTRL(_QRX)			(0x00120000 + ((_QRX) * 4))
+#define QRX_CTRL_MAX_INDEX		2047
+#define QRX_CTRL_QENA_REQ_S		0
+#define QRX_CTRL_QENA_REQ_M		BIT(QRX_CTRL_QENA_REQ_S)
+#define QRX_CTRL_QENA_STAT_S		2
+#define QRX_CTRL_QENA_STAT_M		BIT(QRX_CTRL_QENA_STAT_S)
+#define QRX_TAIL(_QRX)			(0x00290000 + ((_QRX) * 4))
 #define GLNVM_FLA			0x000B6108
 #define GLNVM_FLA_LOCKED_S		6
 #define GLNVM_FLA_LOCKED_M		BIT(GLNVM_FLA_LOCKED_S)
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
new file mode 100644
index 000000000000..3c1aeb74d950
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LAN_TX_RX_H_
+#define _ICE_LAN_TX_RX_H_
+
+union ice_32byte_rx_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				__le16 mirroring_status;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow Director filter id */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/PTYPE/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			__le32 reserved;
+			__le32 fd_id;
+		} qword3;
+	} wb; /* writeback */
+};
+
+/* RX Flex Descriptor
+ * This descriptor is used instead of the legacy version descriptor when
+ * ice_rlan_ctx.adv_desc is set
+ */
+union ice_32b_rx_flex_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+				  /* bit 0 of hdr_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile id */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum ice_rxdid {
+	ICE_RXDID_START			= 0,
+	ICE_RXDID_LEGACY_0		= ICE_RXDID_START,
+	ICE_RXDID_LEGACY_1,
+	ICE_RXDID_FLX_START,
+	ICE_RXDID_FLEX_NIC		= ICE_RXDID_FLX_START,
+	ICE_RXDID_FLX_LAST		= 63,
+	ICE_RXDID_LAST			= ICE_RXDID_FLX_LAST
+};
+
+/* Receive Flex Descriptor Rx opcode values */
+#define ICE_RX_OPC_MDID		0x01
+
+/* Receive Descriptor MDID values */
+#define ICE_RX_MDID_FLOW_ID_LOWER	5
+#define ICE_RX_MDID_FLOW_ID_HIGH	6
+#define ICE_RX_MDID_HASH_LOW		56
+#define ICE_RX_MDID_HASH_HIGH		57
+
+/* Rx Flag64 packet flag bits */
+enum ice_rx_flg64_bits {
+	ICE_RXFLG_PKT_DSI	= 0,
+	ICE_RXFLG_EVLAN_x8100	= 15,
+	ICE_RXFLG_EVLAN_x9100,
+	ICE_RXFLG_VLAN_x8100,
+	ICE_RXFLG_TNL_MAC	= 22,
+	ICE_RXFLG_TNL_VLAN,
+	ICE_RXFLG_PKT_FRG,
+	ICE_RXFLG_FIN		= 32,
+	ICE_RXFLG_SYN,
+	ICE_RXFLG_RST,
+	ICE_RXFLG_TNL0		= 38,
+	ICE_RXFLG_TNL1,
+	ICE_RXFLG_TNL2,
+	ICE_RXFLG_UDP_GRE,
+	ICE_RXFLG_RSVD		= 63
+};
+
+#define ICE_RXQ_CTX_SIZE_DWORDS		8
+#define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+
+/* RLAN Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_rlan_ctx {
+	u16 head;
+	u16 cpuid; /* bigger than needed, see above for reason */
+	u64 base;
+	u16 qlen;
+#define ICE_RLAN_CTX_DBUF_S 7
+	u16 dbuf; /* bigger than needed, see above for reason */
+#define ICE_RLAN_CTX_HBUF_S 6
+	u16 hbuf; /* bigger than needed, see above for reason */
+	u8  dtype;
+	u8  dsize;
+	u8  crcstrip;
+	u8  l2tsel;
+	u8  hsplit_0;
+	u8  hsplit_1;
+	u8  showiv;
+	u32 rxmax; /* bigger than needed, see above for reason */
+	u8  tphrdesc_ena;
+	u8  tphwdesc_ena;
+	u8  tphdata_ena;
+	u8  tphhead_ena;
+	u16 lrxqthresh; /* bigger than needed, see above for reason */
+};
+
+struct ice_ctx_ele {
+	u16 offset;
+	u16 size_of;
+	u16 width;
+	u16 lsb;
+};
+
+#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) {	\
+	.offset = offsetof(struct _struct, _ele),	\
+	.size_of = FIELD_SIZEOF(struct _struct, _ele),	\
+	.width = _width,				\
+	.lsb = _lsb,					\
+}
+
+/* for hsplit_0 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_0 {
+	ICE_RLAN_RX_HSPLIT_0_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_IP		= 2,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP	= 4,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP		= 8,
+};
+
+/* for hsplit_1 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_1 {
+	ICE_RLAN_RX_HSPLIT_1_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS	= 2,
+};
+
+/* TX Descriptor */
+struct ice_tx_desc {
+	__le64 buf_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+#define ICE_LAN_TXQ_MAX_QGRPS	127
+#define ICE_LAN_TXQ_MAX_QDIS	1023
+
+/* Tx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_tlan_ctx {
+#define ICE_TLAN_CTX_BASE_S	7
+	u64 base;		/* base is defined in 128-byte units */
+	u8  port_num;
+	u16 cgd_num;		/* bigger than needed, see above for reason */
+	u8  pf_num;
+	u16 vmvf_num;
+	u8  vmvf_type;
+#define ICE_TLAN_CTX_VMVF_TYPE_VMQ	1
+#define ICE_TLAN_CTX_VMVF_TYPE_PF	2
+	u16 src_vsi;
+	u8  tsyn_ena;
+	u8  alt_vlan;
+	u16 cpuid;		/* bigger than needed, see above for reason */
+	u8  wb_mode;
+	u8  tphrd_desc;
+	u8  tphrd;
+	u8  tphwr_desc;
+	u16 cmpq_id;
+	u16 qnum_in_func;
+	u8  itr_notification_mode;
+	u8  adjust_prof_id;
+	u32 qlen;		/* bigger than needed, see above for reason */
+	u8  quanta_prof_idx;
+	u8  tso_ena;
+	u16 tso_qnum;
+	u8  legacy_int;
+	u8  drop_ena;
+	u8  cache_prof_idx;
+	u8  pkt_shaper_prof_idx;
+	u8  int_q_state;	/* width not needed - internal do not write */
+};
+#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 3b4a2691ddac..58e9eb40f64a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -27,6 +27,7 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 #endif /* !CONFIG_DYNAMIC_DEBUG */
 
 static struct workqueue_struct *ice_wq;
+static const struct net_device_ops ice_netdev_ops;
 
 static int ice_vsi_release(struct ice_vsi *vsi);
 
@@ -213,6 +214,75 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h)
 	}
 }
 
+/**
+ * ice_print_link_msg - print link up or down message
+ * @vsi: the VSI whose link status is being queried
+ * @isup: boolean for if the link is now up or down
+ */
+static void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
+{
+	const char *speed;
+	const char *fc;
+
+	if (vsi->current_isup == isup)
+		return;
+
+	vsi->current_isup = isup;
+
+	if (!isup) {
+		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_40GB:
+		speed = "40 G";
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		speed = "20 G";
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		speed = "10 G";
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		speed = "5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		speed = "2.5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		speed = "1 G";
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		speed = "100 M";
+		break;
+	default:
+		speed = "Unknown";
+		break;
+	}
+
+	switch (vsi->port_info->fc.current_mode) {
+	case ICE_FC_FULL:
+		fc = "RX/TX";
+		break;
+	case ICE_FC_TX_PAUSE:
+		fc = "TX";
+		break;
+	case ICE_FC_RX_PAUSE:
+		fc = "RX";
+		break;
+	default:
+		fc = "Unknown";
+		break;
+	}
+
+	netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
+		    speed, fc);
+}
+
 /**
  * __ice_clean_ctrlq - helper function to clean controlq rings
  * @pf: ptr to struct ice_pf
@@ -408,6 +478,104 @@ static void ice_set_ctrlq_len(struct ice_hw *hw)
 	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
 }
 
+/**
+ * ice_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ */
+static void ice_irq_affinity_notify(struct irq_affinity_notify *notify,
+				    const cpumask_t *mask)
+{
+	struct ice_q_vector *q_vector =
+		container_of(notify, struct ice_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * ice_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ */
+static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
+
+/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+static void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	u32 val;
+	int i;
+
+	/* disable interrupt causation from each queue */
+	if (vsi->tx_rings) {
+		ice_for_each_txq(vsi, i) {
+			if (vsi->tx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->tx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_TQCTL(reg));
+				val &= ~QINT_TQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_TQCTL(reg), val);
+			}
+		}
+	}
+
+	if (vsi->rx_rings) {
+		ice_for_each_rxq(vsi, i) {
+			if (vsi->rx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->rx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_RQCTL(reg));
+				val &= ~QINT_RQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_RQCTL(reg), val);
+			}
+		}
+	}
+
+	/* disable each interrupt */
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		for (i = vsi->base_vector;
+		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
+			wr32(hw, GLINT_DYN_CTL(i), 0);
+
+		ice_flush(hw);
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			synchronize_irq(pf->msix_entries[i + base].vector);
+	}
+}
+
+/**
+ * ice_vsi_ena_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_ena_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		int i;
+
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
+	}
+
+	ice_flush(hw);
+	return 0;
+}
+
 /**
  * ice_vsi_delete - delete a VSI from the switch
  * @vsi: pointer to VSI being removed
@@ -428,6 +596,73 @@ static void ice_vsi_delete(struct ice_vsi *vsi)
 			vsi->vsi_num);
 }
 
+/**
+ * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ */
+static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
+{
+	int q_vectors = vsi->num_q_vectors;
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	int vector, err;
+	int irq_num;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[vector];
+
+		irq_num = pf->msix_entries[base + vector].vector;
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "rx", rx_int_idx++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "tx", tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = devm_request_irq(&pf->pdev->dev,
+				       pf->msix_entries[base + vector].vector,
+				       vsi->irq_handler, 0, q_vector->name,
+				       q_vector);
+		if (err) {
+			netdev_err(vsi->netdev,
+				   "MSIX request_irq failed, error: %d\n", err);
+			goto free_q_irqs;
+		}
+
+		/* register for affinity change notifications */
+		q_vector->affinity_notify.notify = ice_irq_affinity_notify;
+		q_vector->affinity_notify.release = ice_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
+
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+	}
+
+	vsi->irqs_ready = true;
+	return 0;
+
+free_q_irqs:
+	while (vector) {
+		vector--;
+		irq_num = pf->msix_entries[base + vector].vector,
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]);
+	}
+	return err;
+}
+
 /**
  * ice_vsi_setup_q_map - Setup a VSI queue map
  * @vsi: the VSI being configured
@@ -590,6 +825,38 @@ static int ice_vsi_add(struct ice_vsi *vsi)
 	return ret;
 }
 
+/**
+ * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
+ * @vsi: the VSI being cleaned up
+ */
+static void ice_vsi_release_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u16 vector = vsi->base_vector;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0;
+	u32 rxq = 0;
+	int i, q;
+
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), 0);
+		wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), 0);
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
 /**
  * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
  * @vsi: the VSI having rings deallocated
@@ -672,6 +939,118 @@ err_out:
 	return -ENOMEM;
 }
 
+/**
+ * ice_vsi_free_irq - Free the irq association with the OS
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_free_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		int i;
+
+		if (!vsi->q_vectors || !vsi->irqs_ready)
+			return;
+
+		vsi->irqs_ready = false;
+		for (i = 0; i < vsi->num_q_vectors; i++) {
+			u16 vector = i + base;
+			int irq_num;
+
+			irq_num = pf->msix_entries[vector].vector;
+
+			/* free only the irqs that were actually requested */
+			if (!vsi->q_vectors[i] ||
+			    !(vsi->q_vectors[i]->num_ring_tx ||
+			      vsi->q_vectors[i]->num_ring_rx))
+				continue;
+
+			/* clear the affinity notifier in the IRQ descriptor */
+			irq_set_affinity_notifier(irq_num, NULL);
+
+			/* clear the affinity_mask in the IRQ descriptor */
+			irq_set_affinity_hint(irq_num, NULL);
+			synchronize_irq(irq_num);
+			devm_free_irq(&pf->pdev->dev, irq_num,
+				      vsi->q_vectors[i]);
+		}
+		ice_vsi_release_msix(vsi);
+	}
+}
+
+/**
+ * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_cfg_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u16 vector = vsi->base_vector;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0, rxq = 0;
+	int i, q, itr;
+	u8 itr_gran;
+
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		itr_gran = hw->itr_gran_200;
+
+		if (q_vector->num_ring_rx) {
+			q_vector->rx.itr =
+				ITR_TO_REG(vsi->rx_rings[rxq]->rx_itr_setting,
+					   itr_gran);
+			q_vector->rx.latency_range = ICE_LOW_LATENCY;
+		}
+
+		if (q_vector->num_ring_tx) {
+			q_vector->tx.itr =
+				ITR_TO_REG(vsi->tx_rings[txq]->tx_itr_setting,
+					   itr_gran);
+			q_vector->tx.latency_range = ICE_LOW_LATENCY;
+		}
+		wr32(hw, GLINT_ITR(ICE_RX_ITR, vector), q_vector->rx.itr);
+		wr32(hw, GLINT_ITR(ICE_TX_ITR, vector), q_vector->tx.itr);
+
+		/* Both Transmit Queue Interrupt Cause Control register
+		 * and Receive Queue Interrupt Cause control register
+		 * expects MSIX_INDX field to be the vector index
+		 * within the function space and not the absolute
+		 * vector index across PF or across device.
+		 * For SR-IOV VF VSIs queue vector index always starts
+		 * with 1 since first vector index(0) is used for OICR
+		 * in VF space. Since VMDq and other PF VSIs are withtin
+		 * the PF function space, use the vector index thats
+		 * tracked for this PF.
+		 */
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			u32 val;
+
+			itr = ICE_TX_ITR;
+			val = QINT_TQCTL_CAUSE_ENA_M |
+			      (itr << QINT_TQCTL_ITR_INDX_S)  |
+			      (vector << QINT_TQCTL_MSIX_INDX_S);
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			u32 val;
+
+			itr = ICE_RX_ITR;
+			val = QINT_RQCTL_CAUSE_ENA_M |
+			      (itr << QINT_RQCTL_ITR_INDX_S)  |
+			      (vector << QINT_RQCTL_MSIX_INDX_S);
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
 /**
  * ice_ena_misc_vector - enable the non-queue interrupts
  * @pf: board private structure
@@ -752,7 +1131,7 @@ ena_intr:
 	wr32(hw, PFINT_OICR_ENA, ena_mask);
 	if (!test_bit(__ICE_DOWN, pf->state)) {
 		ice_service_task_schedule(pf);
-		ice_irq_dynamic_ena(hw);
+		ice_irq_dynamic_ena(hw, NULL, NULL);
 	}
 
 	return ret;
@@ -1017,7 +1396,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 	     ITR_TO_REG(ICE_ITR_8K, itr_gran));
 
 	ice_flush(hw);
-	ice_irq_dynamic_ena(hw);
+	ice_irq_dynamic_ena(hw, NULL, NULL);
 
 	return 0;
 }
@@ -1262,6 +1641,9 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* assign netdev_ops */
+	netdev->netdev_ops = &ice_netdev_ops;
+
 	/* setup watchdog timeout value to be 5 second */
 	netdev->watchdog_timeo = 5 * HZ;
 
@@ -2079,6 +2461,704 @@ static void __exit ice_module_exit(void)
 }
 module_exit(ice_module_exit);
 
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	tlan_ctx->src_vsi = vsi->vsi_num;
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int ice_vsi_cfg_txqs(struct ice_vsi *vsi)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	u16 buf_len, i, pf_q;
+	int err = 0, tc = 0;
+	u8 num_q_grps;
+
+	buf_len = sizeof(struct ice_aqc_add_tx_qgrp);
+	qg_buf = devm_kzalloc(&pf->pdev->dev, buf_len, GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	if (vsi->num_txq > ICE_MAX_TXQ_PER_TXQG) {
+		err = -EINVAL;
+		goto err_cfg_txqs;
+	}
+	qg_buf->num_txqs = 1;
+	num_q_grps = 1;
+
+	/* set up and configure the tx queues */
+	ice_for_each_txq(vsi, i) {
+		struct ice_tlan_ctx tlan_ctx = { 0 };
+
+		pf_q = vsi->txq_map[i];
+		ice_setup_tx_ctx(vsi->tx_rings[i], &tlan_ctx, pf_q);
+		/* copy context contents into the qg_buf */
+		qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+		ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+			    ice_tlan_ctx_info);
+
+		/* init queue specific tail reg. It is referred as transmit
+		 * comm scheduler queue doorbell.
+		 */
+		vsi->tx_rings[i]->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+		status = ice_ena_vsi_txq(vsi->port_info, vsi->vsi_num, tc,
+					 num_q_grps, qg_buf, buf_len, NULL);
+		if (status) {
+			dev_err(&vsi->back->pdev->dev,
+				"Failed to set LAN Tx queue context, error: %d\n",
+				status);
+			err = -ENODEV;
+			goto err_cfg_txqs;
+		}
+
+		/* Add Tx Queue TEID into the VSI tx ring from the response
+		 * This will complete configuring and enabling the queue.
+		 */
+		txq = &qg_buf->txqs[0];
+		if (pf_q == le16_to_cpu(txq->txq_id))
+			vsi->tx_rings[i]->txq_teid =
+				le32_to_cpu(txq->q_teid);
+	}
+err_cfg_txqs:
+	devm_kfree(&pf->pdev->dev, qg_buf);
+	return err;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+static int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	u32 regval;
+	u16 pf_q;
+	int err;
+
+	/* what is RX queue number in global space of 2K rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	rlan_ctx.base = ring->dma >> 7;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = 1;
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+	rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+			       ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	 /* Enable Flexible Descriptors in the queue context which
+	  * allows this driver to select a specific receive descriptor format
+	  */
+	regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+	regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+		QRXFLXP_CNTXT_RXDID_IDX_M;
+
+	/* increasing context priority to pick up profile id;
+	 * default is 0x01; setting to 0x03 to ensure profile
+	 * is programming if prev context is of same priority
+	 */
+	regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+		QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+	ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+static int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+	int err = 0;
+	u16 i;
+
+	if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
+		vsi->max_frame = vsi->netdev->mtu +
+			ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	else
+		vsi->max_frame = ICE_RXBUF_2048;
+
+	vsi->rx_buf_len = ICE_RXBUF_2048;
+	/* set up individual rings */
+	for (i = 0; i < vsi->num_rxq && !err; i++)
+		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
+
+	if (err) {
+		dev_err(&vsi->back->pdev->dev, "ice_setup_rx_ctx failed\n");
+		return -EIO;
+	}
+	return err;
+}
+
+/**
+ * ice_vsi_cfg - Setup the VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_vsi_cfg(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_vsi_cfg_txqs(vsi);
+	if (!err)
+		err = ice_vsi_cfg_rxqs(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 *q_teids, val;
+	u16 *q_ids, i;
+	int err = 0;
+
+	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+		return -EINVAL;
+
+	q_teids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_teids),
+			       GFP_KERNEL);
+	if (!q_teids)
+		return -ENOMEM;
+
+	q_ids = devm_kcalloc(&pf->pdev->dev, vsi->num_txq, sizeof(*q_ids),
+			     GFP_KERNEL);
+	if (!q_ids) {
+		err = -ENOMEM;
+		goto err_alloc_q_ids;
+	}
+
+	/* set up the tx queue list to be disabled */
+	ice_for_each_txq(vsi, i) {
+		u16 v_idx;
+
+		if (!vsi->tx_rings || !vsi->tx_rings[i]) {
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		q_ids[i] = vsi->txq_map[i];
+		q_teids[i] = vsi->tx_rings[i]->txq_teid;
+
+		/* clear cause_ena bit for disabled queues */
+		val = rd32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
+		val &= ~QINT_TQCTL_CAUSE_ENA_M;
+		wr32(hw, QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
+
+		/* software is expected to wait for 100 ns */
+		ndelay(100);
+
+		/* trigger a software interrupt for the vector associated to
+		 * the queue to schedule napi handler
+		 */
+		v_idx = vsi->tx_rings[i]->q_vector->v_idx;
+		wr32(hw, GLINT_DYN_CTL(vsi->base_vector + v_idx),
+		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
+	}
+	status = ice_dis_vsi_txq(vsi->port_info, vsi->num_txq, q_ids, q_teids,
+				 NULL);
+	if (status) {
+		dev_err(&pf->pdev->dev,
+			"Failed to disable LAN Tx queues, error: %d\n",
+			status);
+		err = -ENODEV;
+	}
+
+err_out:
+	devm_kfree(&pf->pdev->dev, q_ids);
+
+err_alloc_q_ids:
+	devm_kfree(&pf->pdev->dev, q_teids);
+
+	return err;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
+		u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q));
+
+		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+			break;
+
+		usleep_range(10, 20);
+	}
+	if (i >= ICE_Q_WAIT_RETRY_LIMIT)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_ctrl_rx_rings - Start or stop a VSI's rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the rx rings
+ */
+static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int i, j, ret = 0;
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		int pf_q = vsi->rxq_map[i];
+		u32 rx_reg;
+
+		for (j = 0; j < ICE_Q_WAIT_MAX_RETRY; j++) {
+			rx_reg = rd32(hw, QRX_CTRL(pf_q));
+			if (((rx_reg >> QRX_CTRL_QENA_REQ_S) & 1) ==
+			    ((rx_reg >> QRX_CTRL_QENA_STAT_S) & 1))
+				break;
+			usleep_range(1000, 2000);
+		}
+
+		/* Skip if the queue is already in the requested state */
+		if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+			continue;
+
+		/* turn on/off the queue */
+		if (ena)
+			rx_reg |= QRX_CTRL_QENA_REQ_M;
+		else
+			rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+		wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+		/* wait for the change to finish */
+		ret = ice_pf_rxq_wait(pf, pf_q, ena);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"VSI idx %d Rx ring %d %sable timeout\n",
+				vsi->idx, pf_q, (ena ? "en" : "dis"));
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_start_rx_rings - start VSI's rx rings
+ * @vsi: the VSI whose rings are to be started
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_start_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_rx_rings(vsi, true);
+}
+
+/**
+ * ice_vsi_stop_rx_rings - stop VSI's rx rings
+ * @vsi: the VSI
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_rx_rings(vsi, false);
+}
+
+/**
+ * ice_vsi_stop_tx_rx_rings - stop VSI's tx and rx rings
+ * @vsi: the VSI
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi)
+{
+	int err_tx, err_rx;
+
+	err_tx = ice_vsi_stop_tx_rings(vsi);
+	if (err_tx)
+		dev_dbg(&vsi->back->pdev->dev, "Failed to disable Tx rings\n");
+
+	err_rx = ice_vsi_stop_rx_rings(vsi);
+	if (err_rx)
+		dev_dbg(&vsi->back->pdev->dev, "Failed to disable Rx rings\n");
+
+	if (err_tx || err_rx)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: The VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_up_complete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		ice_vsi_cfg_msix(vsi);
+	else
+		return -ENOTSUPP;
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	err = ice_vsi_start_rx_rings(vsi);
+	if (err)
+		return err;
+
+	clear_bit(__ICE_DOWN, vsi->state);
+	ice_vsi_ena_irq(vsi);
+
+	if (vsi->port_info &&
+	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
+	    vsi->netdev) {
+		ice_print_link_msg(vsi, true);
+		netif_tx_start_all_queues(vsi->netdev);
+		netif_carrier_on(vsi->netdev);
+	}
+
+	ice_service_task_schedule(pf);
+
+	return err;
+}
+
+/**
+ * ice_down - Shutdown the connection
+ * @vsi: The VSI being stopped
+ */
+static int ice_down(struct ice_vsi *vsi)
+{
+	int i, err;
+
+	/* Caller of this function is expected to set the
+	 * vsi->state __ICE_DOWN bit
+	 */
+	if (vsi->netdev) {
+		netif_carrier_off(vsi->netdev);
+		netif_tx_disable(vsi->netdev);
+	}
+
+	ice_vsi_dis_irq(vsi);
+	err = ice_vsi_stop_tx_rx_rings(vsi);
+
+	ice_for_each_txq(vsi, i)
+		ice_clean_tx_ring(vsi->tx_rings[i]);
+
+	ice_for_each_rxq(vsi, i)
+		ice_clean_rx_ring(vsi->rx_rings[i]);
+
+	if (err)
+		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+	return err;
+}
+
+/**
+ * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+{
+	int i, err;
+
+	if (!vsi->num_txq) {
+		dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_txq(vsi, i) {
+		err = ice_setup_tx_ring(vsi->tx_rings[i]);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+{
+	int i, err;
+
+	if (!vsi->num_rxq) {
+		dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		err = ice_setup_rx_ring(vsi->rx_rings[i]);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_req_irq - Request IRQ from the OS
+ * @vsi: The VSI IRQ is being requested for
+ * @basename: name for the vector
+ *
+ * Return 0 on success and a negative value on error
+ */
+static int ice_vsi_req_irq(struct ice_vsi *vsi, char *basename)
+{
+	struct ice_pf *pf = vsi->back;
+	int err = -EINVAL;
+
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		err = ice_vsi_req_irq_msix(vsi, basename);
+
+	return err;
+}
+
+/**
+ * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+static void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->tx_rings)
+		return;
+
+	ice_for_each_txq(vsi, i)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			ice_free_tx_ring(vsi->tx_rings[i]);
+}
+
+/**
+ * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+static void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->rx_rings)
+		return;
+
+	ice_for_each_rxq(vsi, i)
+		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+			ice_free_rx_ring(vsi->rx_rings[i]);
+}
+
+/**
+ * ice_vsi_open - Called when a network interface is made active
+ * @vsi: the VSI to open
+ *
+ * Initialization of the VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+static int ice_vsi_open(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+		 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
+	err = ice_vsi_req_irq(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
+	if (err)
+		goto err_set_qs;
+
+	err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
+	if (err)
+		goto err_set_qs;
+
+	err = ice_up_complete(vsi);
+	if (err)
+		goto err_up_complete;
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_set_qs:
+	ice_vsi_free_irq(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_close - Shut down a VSI
+ * @vsi: the VSI being shut down
+ */
+static void ice_vsi_close(struct ice_vsi *vsi)
+{
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
+		ice_down(vsi);
+
+	ice_vsi_free_irq(vsi);
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+}
+
 /**
  * ice_vsi_release - Delete a VSI and free its resources
  * @vsi: the VSI being removed
@@ -2099,6 +3179,9 @@ static int ice_vsi_release(struct ice_vsi *vsi)
 		vsi->netdev = NULL;
 	}
 
+	ice_vsi_dis_irq(vsi);
+	ice_vsi_close(vsi);
+
 	/* reclaim interrupt vectors back to PF */
 	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
 	pf->num_avail_msix += vsi->num_q_vectors;
@@ -2116,3 +3199,56 @@ static int ice_vsi_release(struct ice_vsi *vsi)
 
 	return 0;
 }
+
+/**
+ * ice_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_open(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int err;
+
+	netif_carrier_off(netdev);
+
+	err = ice_vsi_open(vsi);
+
+	if (err)
+		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+	return err;
+}
+
+/**
+ * ice_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state.  The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+static int ice_stop(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	ice_vsi_close(vsi);
+
+	return 0;
+}
+
+static const struct net_device_ops ice_netdev_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+};
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 22039f9eb591..e50eebbf78e3 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -462,6 +462,18 @@ void ice_sched_cleanup_all(struct ice_hw *hw)
 	hw->max_cgds = 0;
 }
 
+/**
+ * ice_sched_get_qgrp_layer - get the current queue group layer number
+ * @hw: pointer to the hw struct
+ *
+ * This function returns the current queue group layer number
+ */
+static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
+{
+	/* It's always total layers - 1, the array is 0 relative so -2 */
+	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
+}
+
 /**
  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
  * @pi: port information structure
@@ -666,3 +678,96 @@ sched_query_out:
 	devm_kfree(ice_hw_to_dev(hw), buf);
 	return status;
 }
+
+/**
+ * ice_sched_get_vsi_info_entry - Get the vsi entry list for given vsi_id
+ * @pi: port information structure
+ * @vsi_id: vsi id
+ *
+ * This function retrieves the vsi list for the given vsi id
+ */
+static struct ice_sched_vsi_info *
+ice_sched_get_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
+{
+	struct ice_sched_vsi_info *list_elem;
+
+	if (!pi)
+		return NULL;
+
+	list_for_each_entry(list_elem, &pi->vsi_info_list, list_entry)
+		if (list_elem->vsi_id == vsi_id)
+			return list_elem;
+	return NULL;
+}
+
+/**
+ * ice_sched_find_node_in_subtree - Find node in part of base node subtree
+ * @hw: pointer to the hw struct
+ * @base: pointer to the base node
+ * @node: pointer to the node to search
+ *
+ * This function checks whether a given node is part of the base node
+ * subtree or not
+ */
+static bool
+ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
+			       struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < base->num_children; i++) {
+		struct ice_sched_node *child = base->children[i];
+
+		if (node == child)
+			return true;
+		if (child->tx_sched_layer > node->tx_sched_layer)
+			return false;
+		/* this recursion is intentional, and wouldn't
+		 * go more than 8 calls
+		 */
+		if (ice_sched_find_node_in_subtree(hw, child, node))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * ice_sched_get_free_qparent - Get a free lan or rdma q group node
+ * @pi: port information structure
+ * @vsi_id: vsi id
+ * @tc: branch number
+ * @owner: lan or rdma
+ *
+ * This function retrieves a free lan or rdma q group node
+ */
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+			   u8 owner)
+{
+	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
+	struct ice_sched_vsi_info *list_elem;
+	u16 max_children;
+	u8 qgrp_layer;
+
+	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
+	max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children);
+	list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id);
+	if (!list_elem)
+		goto lan_q_exit;
+	vsi_node = list_elem->vsi_node[tc];
+	/* validate invalid VSI id */
+	if (!vsi_node)
+		goto lan_q_exit;
+	/* get the first q group node from VSI sub-tree */
+	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
+	while (qgrp_node) {
+		/* make sure the qgroup node is part of the VSI subtree */
+		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+			if (qgrp_node->num_children < max_children &&
+			    qgrp_node->owner == owner)
+				break;
+		qgrp_node = qgrp_node->sibling;
+	}
+lan_q_exit:
+	return qgrp_node;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 2926ee9c373e..639859a3d0f0 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -6,6 +6,8 @@
 
 #include "ice_common.h"
 
+#define ICE_QGRP_LAYER_OFFSET	2
+
 struct ice_sched_agg_vsi_info {
 	struct list_head list_entry;
 	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
@@ -31,4 +33,7 @@ ice_sched_add_node(struct ice_port_info *pi, u8 layer,
 		   struct ice_aqc_txsched_elem_data *info);
 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+			   u8 owner);
 #endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 365dfb86dcb9..9a95c4ffd7d7 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -9,6 +9,7 @@ enum ice_status {
 	ICE_ERR_PARAM				= -1,
 	ICE_ERR_NOT_IMPL			= -2,
 	ICE_ERR_NOT_READY			= -3,
+	ICE_ERR_BAD_PTR				= -5,
 	ICE_ERR_INVAL_SIZE			= -6,
 	ICE_ERR_DEVICE_NOT_SUPPORTED		= -8,
 	ICE_ERR_RESET_FAILED			= -9,
@@ -18,6 +19,7 @@ enum ice_status {
 	ICE_ERR_OUT_OF_RANGE			= -13,
 	ICE_ERR_ALREADY_EXISTS			= -14,
 	ICE_ERR_DOES_NOT_EXIST			= -15,
+	ICE_ERR_MAX_LIMIT			= -17,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
 	ICE_ERR_AQ_ERROR			= -100,
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
new file mode 100644
index 000000000000..6190ea30ee01
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* The driver transmit and receive code */
+
+#include <linux/prefetch.h>
+#include <linux/mm.h>
+#include "ice.h"
+
+/**
+ * ice_unmap_and_free_tx_buf - Release a Tx buffer
+ * @ring: the ring that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void
+ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
+{
+	if (tx_buf->skb) {
+		dev_kfree_skb_any(tx_buf->skb);
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(ring->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buf, len)) {
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buf, dma),
+			       dma_unmap_len(tx_buf, len),
+			       DMA_TO_DEVICE);
+	}
+
+	tx_buf->next_to_watch = NULL;
+	tx_buf->skb = NULL;
+	dma_unmap_len_set(tx_buf, len, 0);
+	/* tx_buf must be completely set up in the transmit path */
+}
+
+static struct netdev_queue *txring_txq(const struct ice_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->q_index);
+}
+
+/**
+ * ice_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ */
+void ice_clean_tx_ring(struct ice_ring *tx_ring)
+{
+	unsigned long size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!tx_ring->tx_buf)
+		return;
+
+	/* Free all the Tx ring sk_bufss */
+	for (i = 0; i < tx_ring->count; i++)
+		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+
+	size = sizeof(struct ice_tx_buf) * tx_ring->count;
+	memset(tx_ring->tx_buf, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	if (!tx_ring->netdev)
+		return;
+
+	/* cleanup Tx queue statistics */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * ice_free_tx_ring - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void ice_free_tx_ring(struct ice_ring *tx_ring)
+{
+	ice_clean_tx_ring(tx_ring);
+	devm_kfree(tx_ring->dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+
+	if (tx_ring->desc) {
+		dmam_free_coherent(tx_ring->dev, tx_ring->size,
+				   tx_ring->desc, tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_setup_tx_ring - Allocate the Tx descriptors
+ * @tx_ring: the tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_tx_ring(struct ice_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int bi_size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(tx_ring->tx_buf);
+	bi_size = sizeof(struct ice_tx_buf) * tx_ring->count;
+	tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
+	if (!tx_ring->tx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+	tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
+					    GFP_KERNEL);
+	if (!tx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			tx_ring->size);
+		goto err;
+	}
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	return 0;
+
+err:
+	devm_kfree(dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ */
+void ice_clean_rx_ring(struct ice_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	unsigned long size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_ring->rx_buf)
+		return;
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (rx_buf->skb) {
+			dev_kfree_skb(rx_buf->skb);
+			rx_buf->skb = NULL;
+		}
+		if (!rx_buf->page)
+			continue;
+
+		dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE);
+		__free_pages(rx_buf->page, 0);
+
+		rx_buf->page = NULL;
+		rx_buf->page_offset = 0;
+	}
+
+	size = sizeof(struct ice_rx_buf) * rx_ring->count;
+	memset(rx_ring->rx_buf, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * ice_free_rx_ring - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void ice_free_rx_ring(struct ice_ring *rx_ring)
+{
+	ice_clean_rx_ring(rx_ring);
+	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+
+	if (rx_ring->desc) {
+		dmam_free_coherent(rx_ring->dev, rx_ring->size,
+				   rx_ring->desc, rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_setup_rx_ring - Allocate the Rx descriptors
+ * @rx_ring: the rx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_rx_ring(struct ice_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int bi_size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(rx_ring->rx_buf);
+	bi_size = sizeof(struct ice_rx_buf) * rx_ring->count;
+	rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
+	if (!rx_ring->rx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+	rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
+					    GFP_KERNEL);
+	if (!rx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			rx_ring->size);
+		goto err;
+	}
+
+	rx_ring->next_to_use = 0;
+	rx_ring->next_to_clean = 0;
+	return 0;
+
+err:
+	devm_kfree(dev, rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+	writel(val, rx_ring->tail);
+}
+
+/**
+ * ice_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buf struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ */
+static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
+				  struct ice_rx_buf *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!page))
+		return false;
+
+	/* map page for use */
+	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, 0);
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = 0;
+
+	return true;
+}
+
+/**
+ * ice_alloc_rx_bufs - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail
+ */
+bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *bi;
+
+	/* do nothing if no valid netdev defined */
+	if (!rx_ring->netdev || !cleaned_count)
+		return false;
+
+	/* get the RX descriptor and buffer based on next_to_use */
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	bi = &rx_ring->rx_buf[ntu];
+
+	do {
+		if (!ice_alloc_mapped_page(rx_ring, bi))
+			goto no_bufs;
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buf;
+			ntu = 0;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.status_error0 = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return false;
+
+no_bufs:
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	/* make sure to come back via polling to try again after
+	 * allocation failure
+	 */
+	return true;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 2dd2232127a7..b7015cfad2d7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -5,6 +5,30 @@
 #define _ICE_TXRX_H_
 
 #define ICE_DFLT_IRQ_WORK	256
+#define ICE_RXBUF_2048		2048
+#define ICE_MAX_CHAINED_RX_BUFS	5
+#define ICE_MAX_TXQ_PER_TXQG	128
+
+#define ICE_DESC_UNUSED(R)	\
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+struct ice_tx_buf {
+	struct ice_tx_desc *next_to_watch;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	u32 tx_flags;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+};
+
+struct ice_rx_buf {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+	struct page *page;
+	unsigned int page_offset;
+};
 
 /* this enum matches hardware bits and is meant to be used by DYN_CTLN
  * registers and QINT registers or more generally anywhere in the manual
@@ -18,33 +42,77 @@ enum ice_dyn_idx_t {
 	ICE_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
 };
 
+/* Header split modes defined by DTYPE field of Rx RLAN context */
+enum ice_rx_dtype {
+	ICE_RX_DTYPE_NO_SPLIT		= 0,
+	ICE_RX_DTYPE_HEADER_SPLIT	= 1,
+	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
+};
+
 /* indices into GLINT_ITR registers */
 #define ICE_RX_ITR	ICE_IDX_ITR0
+#define ICE_TX_ITR	ICE_IDX_ITR1
 #define ICE_ITR_DYNAMIC	0x8000  /* use top bit as a flag */
 #define ICE_ITR_8K	0x003E
 
 /* apply ITR HW granularity translation to program the HW registers */
 #define ITR_TO_REG(val, itr_gran) (((val) & ~ICE_ITR_DYNAMIC) >> (itr_gran))
 
+/* Legacy or Advanced Mode Queue */
+#define ICE_TX_ADVANCED	0
+#define ICE_TX_LEGACY	1
+
 /* descriptor ring, associated with a VSI */
 struct ice_ring {
 	struct ice_ring *next;		/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
 	struct device *dev;		/* Used for DMA mapping */
 	struct net_device *netdev;	/* netdev ring maps to */
 	struct ice_vsi *vsi;		/* Backreference to associated VSI */
 	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
+	u8 __iomem *tail;
+	union {
+		struct ice_tx_buf *tx_buf;
+		struct ice_rx_buf *rx_buf;
+	};
 	u16 q_index;			/* Queue number of ring */
+	u32 txq_teid;			/* Added Tx queue TEID */
+
+	/* high bit set means dynamic, use accessor routines to read/write.
+	 * hardware supports 2us/1us resolution for the ITR registers.
+	 * these values always store the USER setting, and must be converted
+	 * before programming to a register.
+	 */
+	u16 rx_itr_setting;
+	u16 tx_itr_setting;
+
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
 	bool ring_active;		/* is ring online or not */
+	unsigned int size;		/* length of descriptor ring in bytes */
+	dma_addr_t dma;			/* physical address of ring */
 	struct rcu_head rcu;		/* to avoid race on free */
+	u16 next_to_alloc;
 } ____cacheline_internodealigned_in_smp;
 
+enum ice_latency_range {
+	ICE_LOWEST_LATENCY = 0,
+	ICE_LOW_LATENCY = 1,
+	ICE_BULK_LATENCY = 2,
+	ICE_ULTRA_LATENCY = 3,
+};
+
 struct ice_ring_container {
 	/* array of pointers to rings */
 	struct ice_ring *ring;
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_pkts;	/* total packets processed this int */
+	enum ice_latency_range latency_range;
 	u16 itr;
 };
 
@@ -52,4 +120,11 @@ struct ice_ring_container {
 #define ice_for_each_ring(pos, head) \
 	for (pos = (head).ring; pos; pos = pos->next)
 
+bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
+void ice_clean_tx_ring(struct ice_ring *tx_ring);
+void ice_clean_rx_ring(struct ice_ring *rx_ring);
+int ice_setup_tx_ring(struct ice_ring *tx_ring);
+int ice_setup_rx_ring(struct ice_ring *rx_ring);
+void ice_free_tx_ring(struct ice_ring *tx_ring);
+void ice_free_rx_ring(struct ice_ring *rx_ring);
 #endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 8926715b76ee..991ac56ca7b3 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -8,9 +8,11 @@
 #include "ice_hw_autogen.h"
 #include "ice_osdep.h"
 #include "ice_controlq.h"
+#include "ice_lan_tx_rx.h"
 
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_QCTX		BIT_ULL(6)
 #define ICE_DBG_NVM		BIT_ULL(7)
 #define ICE_DBG_LAN		BIT_ULL(8)
 #define ICE_DBG_SW		BIT_ULL(13)

From 2b245cb29421abbad508e93cdfedf81adc12edf1 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:14 -0700
Subject: [PATCH 1260/1678] ice: Implement transmit and NAPI support

This patch implements ice_start_xmit (the handler for ndo_start_xmit) and
related functions. ice_start_xmit ultimately calls ice_tx_map, where the
Tx descriptor is built and posted to the hardware by bumping the ring tail.

This patch also implements ice_napi_poll, which is invoked when there's an
interrupt on the VSI's queues. The interrupt can be due to either a
completed Tx or an Rx event. In case of a completed Tx/Rx event, resources
are reclaimed. Additionally, in case of an Rx event, the skb is fetched
and passed up to the network stack.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |    1 +
 .../net/ethernet/intel/ice/ice_lan_tx_rx.h    |   46 +
 drivers/net/ethernet/intel/ice/ice_main.c     |   55 +
 drivers/net/ethernet/intel/ice/ice_txrx.c     | 1026 ++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_txrx.h     |   45 +
 5 files changed, 1171 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 46cf82212ed8..6f01395a7f34 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -60,6 +60,7 @@
 		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
 		  ICE_AQ_VSI_UP_TABLE_UP##i##_M)
 
+#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
 #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
 
 #define ice_for_each_txq(vsi, i) \
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 3c1aeb74d950..e1221b9839d5 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -131,6 +131,33 @@ enum ice_rx_flg64_bits {
 	ICE_RXFLG_RSVD		= 63
 };
 
+/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */
+#define ICE_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for ice_32byte_rx_flex_desc.pkt_length member */
+#define ICE_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
+enum ice_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	ICE_RX_FLEX_DESC_STATUS0_EOF_S,
+	ICE_RX_FLEX_DESC_STATUS0_HBO_S,
+	ICE_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_LPBK_S,
+	ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	ICE_RX_FLEX_DESC_STATUS0_RXE_S,
+	ICE_RX_FLEX_DESC_STATUS0_CRCP_S,
+	ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
 #define ICE_RXQ_CTX_SIZE_DWORDS		8
 #define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
 
@@ -201,6 +228,25 @@ struct ice_tx_desc {
 	__le64 cmd_type_offset_bsz;
 };
 
+enum ice_tx_desc_dtype_value {
+	ICE_TX_DESC_DTYPE_DATA		= 0x0,
+	ICE_TX_DESC_DTYPE_CTX		= 0x1,
+	/* DESC_DONE - HW has completed write-back of descriptor */
+	ICE_TX_DESC_DTYPE_DESC_DONE	= 0xF,
+};
+
+#define ICE_TXD_QW1_CMD_S	4
+#define ICE_TXD_QW1_CMD_M	(0xFFFUL << ICE_TXD_QW1_CMD_S)
+
+enum ice_tx_desc_cmd_bits {
+	ICE_TX_DESC_CMD_EOP			= 0x0001,
+	ICE_TX_DESC_CMD_RS			= 0x0002,
+};
+
+#define ICE_TXD_QW1_OFFSET_S	16
+#define ICE_TXD_QW1_TX_BUF_SZ_S	34
+#define ICE_TXD_QW1_L2TAG1_S	48
+
 #define ICE_LAN_TXQ_MAX_QGRPS	127
 #define ICE_LAN_TXQ_MAX_QDIS	1023
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 58e9eb40f64a..dd07bf862afe 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1258,6 +1258,23 @@ err_txrings:
 	return -ENOMEM;
 }
 
+/**
+ * ice_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * ice_vsi_alloc - Allocates the next available struct vsi in the PF
  * @pf: board private structure
@@ -1298,6 +1315,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type)
 		if (ice_vsi_alloc_arrays(vsi, true))
 			goto err_rings;
 
+		/* Setup default MSIX irq handler for VSI */
+		vsi->irq_handler = ice_msix_clean_rings;
 		break;
 	default:
 		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
@@ -1741,6 +1760,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
 	if (cpu_online(v_idx))
 		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
 
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+			       NAPI_POLL_WEIGHT);
 	/* tie q_vector and vsi together */
 	vsi->q_vectors[v_idx] = q_vector;
 
@@ -2914,6 +2936,21 @@ static int ice_vsi_stop_tx_rx_rings(struct ice_vsi *vsi)
 	return 0;
 }
 
+/**
+ * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_napi_enable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+		napi_enable(&vsi->q_vectors[q_idx]->napi);
+}
+
 /**
  * ice_up_complete - Finish the last steps of bringing up a connection
  * @vsi: The VSI being configured
@@ -2939,6 +2976,7 @@ static int ice_up_complete(struct ice_vsi *vsi)
 		return err;
 
 	clear_bit(__ICE_DOWN, vsi->state);
+	ice_napi_enable_all(vsi);
 	ice_vsi_ena_irq(vsi);
 
 	if (vsi->port_info &&
@@ -2954,6 +2992,21 @@ static int ice_up_complete(struct ice_vsi *vsi)
 	return err;
 }
 
+/**
+ * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: VSI having NAPI disabled
+ */
+static void ice_napi_disable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+		napi_disable(&vsi->q_vectors[q_idx]->napi);
+}
+
 /**
  * ice_down - Shutdown the connection
  * @vsi: The VSI being stopped
@@ -2972,6 +3025,7 @@ static int ice_down(struct ice_vsi *vsi)
 
 	ice_vsi_dis_irq(vsi);
 	err = ice_vsi_stop_tx_rx_rings(vsi);
+	ice_napi_disable_all(vsi);
 
 	ice_for_each_txq(vsi, i)
 		ice_clean_tx_ring(vsi->tx_rings[i]);
@@ -3251,4 +3305,5 @@ static int ice_stop(struct net_device *netdev)
 static const struct net_device_ops ice_netdev_ops = {
 	.ndo_open = ice_open,
 	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 6190ea30ee01..1ccf8e69b85a 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -7,6 +7,8 @@
 #include <linux/mm.h>
 #include "ice.h"
 
+#define ICE_RX_HDR_SIZE		256
+
 /**
  * ice_unmap_and_free_tx_buf - Release a Tx buffer
  * @ring: the ring that owns the buffer
@@ -92,6 +94,129 @@ void ice_free_tx_ring(struct ice_ring *tx_ring)
 	}
 }
 
+/**
+ * ice_clean_tx_irq - Reclaim resources after transmit completes
+ * @vsi: the VSI we care about
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring,
+			     int napi_budget)
+{
+	unsigned int total_bytes = 0, total_pkts = 0;
+	unsigned int budget = vsi->work_lmt;
+	s16 i = tx_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		smp_rmb();	/* prevent any other reads prior to eop_desc */
+
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_pkts += tx_buf->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buf data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buf++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buf = tx_ring->tx_buf;
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.pkts += total_pkts;
+	u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->q_vector->tx.total_bytes += total_bytes;
+	tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
+				  total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+	if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
+		     (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->q_index) &&
+		   !test_bit(__ICE_DOWN, vsi->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->q_index);
+			++tx_ring->tx_stats.restart_q;
+		}
+	}
+
+	return !!budget;
+}
+
 /**
  * ice_setup_tx_ring - Allocate the Tx descriptors
  * @tx_ring: the tx ring to set up
@@ -274,13 +399,17 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
 	dma_addr_t dma;
 
 	/* since we are recycling buffers we should seldom need to alloc */
-	if (likely(page))
+	if (likely(page)) {
+		rx_ring->rx_stats.page_reuse_count++;
 		return true;
+	}
 
 	/* alloc new page for storage */
 	page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
-	if (unlikely(!page))
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
+	}
 
 	/* map page for use */
 	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
@@ -290,6 +419,7 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
 		__free_pages(page, 0);
+		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
 	}
 
@@ -359,3 +489,895 @@ no_bufs:
 	 */
 	return true;
 }
+
+/**
+ * ice_page_is_reserved - check if reuse is possible
+ * @page: page struct to check
+ */
+static bool ice_page_is_reserved(struct page *page)
+{
+	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * ice_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_buf: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buf to place the data into
+ *
+ * This function will add the data contained in rx_buf->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ */
+static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf,
+			    union ice_32b_rx_flex_desc *rx_desc,
+			    struct sk_buff *skb)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ICE_RXBUF_2048;
+#else
+	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
+	unsigned int truesize;
+#endif /* PAGE_SIZE < 8192) */
+
+	struct page *page;
+	unsigned int size;
+
+	size = le16_to_cpu(rx_desc->wb.pkt_len) &
+		ICE_RX_FLX_DESC_PKT_LEN_M;
+
+	page = rx_buf->page;
+
+#if (PAGE_SIZE >= 8192)
+	truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif /* PAGE_SIZE >= 8192) */
+
+	/* will the data fit in the skb we allocated? if so, just
+	 * copy it as it is pretty small anyway
+	 */
+	if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {
+		unsigned char *va = page_address(page) + rx_buf->page_offset;
+
+		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+		/* page is not reserved, we can reuse buffer as-is */
+		if (likely(!ice_page_is_reserved(page)))
+			return true;
+
+		/* this page cannot be reused so discard it */
+		__free_pages(page, 0);
+		return false;
+	}
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+			rx_buf->page_offset, size, truesize);
+
+	/* avoid re-using remote pages */
+	if (unlikely(ice_page_is_reserved(page)))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely(page_count(page) != 1))
+		return false;
+
+	/* flip page offset to other buffer */
+	rx_buf->page_offset ^= truesize;
+#else
+	/* move offset up to the next cache line */
+	rx_buf->page_offset += truesize;
+
+	if (rx_buf->page_offset > last_offset)
+		return false;
+#endif /* PAGE_SIZE < 8192) */
+
+	/* Even if we own the page, we are not allowed to use atomic_set()
+	 * This would break get_page_unless_zero() users.
+	 */
+	get_page(rx_buf->page);
+
+	return true;
+}
+
+/**
+ * ice_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buf: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void ice_reuse_rx_page(struct ice_ring *rx_ring,
+			      struct ice_rx_buf *old_buf)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct ice_rx_buf *new_buf;
+
+	new_buf = &rx_ring->rx_buf[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	*new_buf = *old_buf;
+}
+
+/**
+ * ice_fetch_rx_buf - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_desc: descriptor containing info written by hardware
+ *
+ * This function allocates an skb on the fly, and populates it with the page
+ * data from the current receive descriptor, taking care to set up the skb
+ * correctly, as well as handling calling the page recycle function if
+ * necessary.
+ */
+static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring,
+					union ice_32b_rx_flex_desc *rx_desc)
+{
+	struct ice_rx_buf *rx_buf;
+	struct sk_buff *skb;
+	struct page *page;
+
+	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+	page = rx_buf->page;
+	prefetchw(page);
+
+	skb = rx_buf->skb;
+
+	if (likely(!skb)) {
+		u8 *page_addr = page_address(page) + rx_buf->page_offset;
+
+		/* prefetch first cache line of first page */
+		prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+		prefetch((void *)(page_addr + L1_CACHE_BYTES));
+#endif /* L1_CACHE_BYTES */
+
+		/* allocate a skb to store the frags */
+		skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+				       ICE_RX_HDR_SIZE,
+				       GFP_ATOMIC | __GFP_NOWARN);
+		if (unlikely(!skb)) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			return NULL;
+		}
+
+		/* we will be copying header into skb->data in
+		 * pskb_may_pull so it is in our interest to prefetch
+		 * it now to avoid a possible cache miss
+		 */
+		prefetchw(skb->data);
+
+		skb_record_rx_queue(skb, rx_ring->q_index);
+	} else {
+		/* we are reusing so sync this buffer for CPU use */
+		dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
+					      rx_buf->page_offset,
+					      ICE_RXBUF_2048,
+					      DMA_FROM_DEVICE);
+
+		rx_buf->skb = NULL;
+	}
+
+	/* pull page into skb */
+	if (ice_add_rx_frag(rx_buf, rx_desc, skb)) {
+		/* hand second half of page back to the ring */
+		ice_reuse_rx_page(rx_ring, rx_buf);
+		rx_ring->rx_stats.page_reuse_count++;
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
+			       DMA_FROM_DEVICE);
+	}
+
+	/* clear contents of buffer_info */
+	rx_buf->page = NULL;
+
+	return skb;
+}
+
+/**
+ * ice_pull_tail - ice specific version of skb_pull_tail
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ice specific version of __pskb_pull_tail.  The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ice_pull_tail(struct sk_buff *skb)
+{
+	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int pull_len;
+	unsigned char *va;
+
+	/* it is valid to use page_address instead of kmap since we are
+	 * working with pages allocated out of the lomem pool per
+	 * alloc_page(GFP_ATOMIC)
+	 */
+	va = skb_frag_address(frag);
+
+	/* we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	skb_frag_size_sub(frag, pull_len);
+	frag->page_offset += pull_len;
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+}
+
+/**
+ * ice_cleanup_headers - Correct empty headers
+ * @skb: pointer to current skb being fixed
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool ice_cleanup_headers(struct sk_buff *skb)
+{
+	/* place header in linear portion of buffer */
+	if (skb_is_nonlinear(skb))
+		ice_pull_tail(skb);
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc,
+			     const u16 stat_err_bits)
+{
+	return !!(rx_desc->wb.status_error0 &
+		  cpu_to_le16(stat_err_bits));
+}
+
+/**
+ * ice_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ */
+static bool ice_is_non_eop(struct ice_ring *rx_ring,
+			   union ice_32b_rx_flex_desc *rx_desc,
+			   struct sk_buff *skb)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(ICE_RX_DESC(rx_ring, ntc));
+
+	/* if we are the last buffer then there is nothing else to do */
+#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
+	if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
+		return false;
+
+	/* place skb in next buffer to be received */
+	rx_ring->rx_buf[ntc].skb = skb;
+	rx_ring->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: vlan tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without vlan tag)
+ */
+static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb,
+			    u16 vlan_tag)
+{
+	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (vlan_tag & VLAN_VID_MASK)) {
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	}
+	napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing.  The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	bool failure = false;
+
+	/* start the loop to process RX packets bounded by 'budget' */
+	while (likely(total_rx_pkts < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		struct sk_buff *skb;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= ICE_RX_BUF_WRITE) {
+			failure = failure ||
+				  ice_alloc_rx_bufs(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		/* get the RX desc from RX ring based on 'next_to_clean' */
+		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then it will be non-zero
+		 */
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * DD bit is set.
+		 */
+		dma_rmb();
+
+		/* allocate (if needed) and populate skb */
+		skb = ice_fetch_rx_buf(rx_ring, rx_desc);
+		if (!skb)
+			break;
+
+		cleaned_count++;
+
+		/* skip if it is NOP desc */
+		if (ice_is_non_eop(rx_ring, rx_desc, skb))
+			continue;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+		if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+		if (ice_test_staterr(rx_desc, stat_err_bits))
+			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+		/* correct empty headers and pad skb if needed (to make valid
+		 * ethernet frame
+		 */
+		if (ice_cleanup_headers(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* send completed skb up the stack */
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+
+		/* update budget accounting */
+		total_rx_pkts++;
+	}
+
+	/* update queue and vector specific stats */
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.pkts += total_rx_pkts;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
+	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_pkts;
+}
+
+/**
+ * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ */
+int ice_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct ice_q_vector *q_vector =
+				container_of(napi, struct ice_q_vector, napi);
+	struct ice_vsi *vsi = q_vector->vsi;
+	struct ice_pf *pf = vsi->back;
+	bool clean_complete = true;
+	int budget_per_ring = 0;
+	struct ice_ring *ring;
+	int work_done = 0;
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	ice_for_each_ring(ring, q_vector->tx)
+		if (!ice_clean_tx_irq(vsi, ring, budget))
+			clean_complete = false;
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0)
+		return budget;
+
+	/* We attempt to distribute budget to each Rx queue fairly, but don't
+	 * allow the budget to go below 1 because that would exit polling early.
+	 */
+	if (q_vector->num_ring_rx)
+		budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
+
+	ice_for_each_ring(ring, q_vector->rx) {
+		int cleaned;
+
+		cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+		work_done += cleaned;
+		/* if we clean as many as budgeted, we must not be done */
+		if (cleaned >= budget_per_ring)
+			clean_complete = false;
+	}
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete)
+		return budget;
+
+	/* Work is done so exit the polling mode and re-enable the interrupt */
+	napi_complete_done(napi, work_done);
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);
+	return 0;
+}
+
+/* helper function for building cmd/type/offset */
+static __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
+			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
+			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * __ice_maybe_stop_tx - 2nd level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size: the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ */
+static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	/* Check again in a case another CPU has just made room available. */
+	if (likely(ICE_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_subqueue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->q_index);
+	++tx_ring->tx_stats.restart_q;
+	return 0;
+}
+
+/**
+ * ice_maybe_stop_tx - 1st level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
+{
+	if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+	return __ice_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * ice_tx_map - Build the Tx descriptor
+ * @tx_ring: ring to send buffer on
+ * @first: first buffer info buffer to use
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit descriptor.
+ */
+static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first)
+{
+	u64 td_offset = 0, td_tag = 0, td_cmd = 0;
+	u16 i = tx_ring->next_to_use;
+	struct skb_frag_struct *frag;
+	unsigned int data_len, size;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	struct sk_buff *skb;
+	dma_addr_t dma;
+
+	skb = first->skb;
+
+	data_len = skb->data_len;
+	size = skb_headlen(skb);
+
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buf = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+
+		/* account for data chunks larger than the hardware
+		 * can handle
+		 */
+		while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
+			tx_desc->cmd_type_offset_bsz =
+				build_ctob(td_cmd, td_offset, max_data, td_tag);
+
+			tx_desc++;
+			i++;
+
+			if (i == tx_ring->count) {
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+			tx_desc->buf_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
+							  size, td_tag);
+
+		tx_desc++;
+		i++;
+
+		if (i == tx_ring->count) {
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buf = &tx_ring->tx_buf[i];
+	}
+
+	/* record bytecount for BQL */
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* record SW timestamp if HW timestamp is not available */
+	skb_tx_timestamp(first->skb);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+			build_ctob(td_cmd, td_offset, size, td_tag);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	tx_ring->next_to_use = i;
+
+	ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
+		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
+	}
+
+	return;
+
+dma_error:
+	/* clear dma mappings for failed tx_buf map */
+	for (;;) {
+		tx_buf = &tx_ring->tx_buf[i];
+		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
+		if (tx_buf == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+/**
+ * ice_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
+ */
+static unsigned int ice_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + 1;
+}
+
+/**
+ * ice_xmit_desc_count - calculate number of tx descriptors needed
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
+{
+	const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	unsigned int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += ice_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * __ice_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb: send buffer
+ *
+ * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ */
+static bool __ice_chk_linearize(struct sk_buff *skb)
+{
+	const struct skb_frag_struct *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than 7 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags < (ICE_MAX_BUF_TXD - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of 6 fragments totals at least gso_size.
+	 */
+	nr_frags -= ICE_MAX_BUF_TXD - 2;
+	frag = &skb_shinfo(skb)->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1.  We
+	 * use this as the worst case scenerio in which the frag ahead
+	 * of us only provides one byte which is why we are limited to 6
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - skb_shinfo(skb)->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	stale = &skb_shinfo(skb)->frags[0];
+	for (;;) {
+		sum += skb_frag_size(frag++);
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= skb_frag_size(stale++);
+	}
+
+	return false;
+}
+
+/**
+ * ice_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ */
+static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < ICE_MAX_BUF_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __ice_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != ICE_MAX_BUF_TXD;
+}
+
+/**
+ * ice_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb: send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t
+ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
+{
+	struct ice_tx_buf *first;
+	unsigned int count;
+
+	count = ice_xmit_desc_count(skb);
+	if (ice_chk_linearize(skb, count)) {
+		if (__skb_linearize(skb))
+			goto out_drop;
+		count = ice_txd_use_count(skb->len);
+		tx_ring->tx_stats.tx_linearize++;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
+	 *       + 4 desc gap to avoid the cache line where head is,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buf[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+	first->gso_segs = 1;
+
+	ice_tx_map(tx_ring, first);
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/**
+ * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *tx_ring;
+
+	tx_ring = vsi->tx_rings[skb->queue_mapping];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, ICE_MIN_TX_LEN))
+		return NETDEV_TX_OK;
+
+	return ice_xmit_frame_ring(skb, tx_ring);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b7015cfad2d7..8fa4450514b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -7,8 +7,23 @@
 #define ICE_DFLT_IRQ_WORK	256
 #define ICE_RXBUF_2048		2048
 #define ICE_MAX_CHAINED_RX_BUFS	5
+#define ICE_MAX_BUF_TXD		8
+#define ICE_MIN_TX_LEN		17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define ICE_MAX_READ_REQ_SIZE	4096
+#define ICE_MAX_DATA_PER_TXD	(16 * 1024 - 1)
+#define ICE_MAX_DATA_PER_TXD_ALIGNED \
+	(~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD)
+
+#define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
 #define ICE_MAX_TXQ_PER_TXQG	128
 
+/* Tx Descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
 #define ICE_DESC_UNUSED(R)	\
 	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	(R)->next_to_clean - (R)->next_to_use - 1)
@@ -30,6 +45,24 @@ struct ice_rx_buf {
 	unsigned int page_offset;
 };
 
+struct ice_q_stats {
+	u64 pkts;
+	u64 bytes;
+};
+
+struct ice_txq_stats {
+	u64 restart_q;
+	u64 tx_busy;
+	u64 tx_linearize;
+};
+
+struct ice_rxq_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buf_failed;
+	u64 page_reuse_count;
+};
+
 /* this enum matches hardware bits and is meant to be used by DYN_CTLN
  * registers and QINT registers or more generally anywhere in the manual
  * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
@@ -94,6 +127,15 @@ struct ice_ring {
 	u16 next_to_clean;
 
 	bool ring_active;		/* is ring online or not */
+
+	/* stats structs */
+	struct ice_q_stats	stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct ice_txq_stats tx_stats;
+		struct ice_rxq_stats rx_stats;
+	};
+
 	unsigned int size;		/* length of descriptor ring in bytes */
 	dma_addr_t dma;			/* physical address of ring */
 	struct rcu_head rcu;		/* to avoid race on free */
@@ -121,10 +163,13 @@ struct ice_ring_container {
 	for (pos = (head).ring; pos; pos = pos->next)
 
 bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 void ice_clean_tx_ring(struct ice_ring *tx_ring);
 void ice_clean_rx_ring(struct ice_ring *rx_ring);
 int ice_setup_tx_ring(struct ice_ring *tx_ring);
 int ice_setup_rx_ring(struct ice_ring *rx_ring);
 void ice_free_tx_ring(struct ice_ring *tx_ring);
 void ice_free_rx_ring(struct ice_ring *rx_ring);
+int ice_napi_poll(struct napi_struct *napi, int budget);
+
 #endif /* _ICE_TXRX_H_ */

From d76a60ba7afb89523c88cf2ed3a044ce4180289e Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:15 -0700
Subject: [PATCH 1261/1678] ice: Add support for VLANs and offloads

This patch adds support for VLANs. When a VLAN is created a switch filter
is added to direct the VLAN traffic to the corresponding VSI. When a VLAN
is deleted, the filter is deleted as well.

This patch also adds support for the following hardware offloads.
    1) VLAN tag insertion/stripping
    2) Receive Side Scaling (RSS)
    3) Tx checksum and TCP segmentation
    4) Rx checksum

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  19 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  62 ++
 drivers/net/ethernet/intel/ice/ice_common.c   | 188 ++++++
 drivers/net/ethernet/intel/ice/ice_common.h   |  13 +
 .../net/ethernet/intel/ice/ice_lan_tx_rx.h    | 169 +++++
 drivers/net/ethernet/intel/ice/ice_main.c     | 601 +++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_switch.c   | 169 +++++
 drivers/net/ethernet/intel/ice/ice_switch.h   |   4 +
 drivers/net/ethernet/intel/ice/ice_txrx.c     | 405 +++++++++++-
 drivers/net/ethernet/intel/ice/ice_txrx.h     |  17 +
 10 files changed, 1631 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 6f01395a7f34..51cc955c7220 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -23,7 +23,10 @@
 #include <linux/delay.h>
 #include <linux/bitmap.h>
 #include <linux/log2.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/if_bridge.h>
+#include <net/ipv6.h>
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
@@ -47,6 +50,8 @@
 #define ICE_MAX_SCATTER_RXQS	16
 #define ICE_Q_WAIT_RETRY_LIMIT	10
 #define ICE_Q_WAIT_MAX_RETRY	(5 * ICE_Q_WAIT_RETRY_LIMIT)
+#define ICE_MAX_LG_RSS_QS	256
+#define ICE_MAX_SMALL_RSS_QS	8
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
@@ -62,6 +67,7 @@
 
 #define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
 #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
+#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
 
 #define ice_for_each_txq(vsi, i) \
 	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
@@ -113,6 +119,7 @@ struct ice_vsi {
 	irqreturn_t (*irq_handler)(int irq, void *data);
 
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	int num_q_vectors;
 	int base_vector;
 	enum ice_vsi_type type;
@@ -122,6 +129,13 @@ struct ice_vsi {
 	/* Interrupt thresholds */
 	u16 work_lmt;
 
+	/* RSS config */
+	u16 rss_table_size;	/* HW RSS table size */
+	u16 rss_size;		/* Allocated RSS queues */
+	u8 *rss_hkey_user;	/* User configured hash keys */
+	u8 *rss_lut_user;	/* User configured lookup table entries */
+	u8 rss_lut_type;	/* used to configure Get/Set RSS LUT AQ call */
+
 	u16 max_frame;
 	u16 rx_buf_len;
 
@@ -181,6 +195,7 @@ struct ice_pf {
 	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
 	u32 msg_enable;
+	u32 hw_csum_rx_error;
 	u32 oicr_idx;		/* Other interrupt cause vector index */
 	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
 	u32 num_avail_msix;	/* remaining MSIX vectors left unclaimed */
@@ -224,4 +239,8 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
 	wr32(hw, GLINT_DYN_CTL(vector), val);
 }
 
+int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index c7abcc1dbbcc..051427dcfb3c 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -968,6 +968,60 @@ struct ice_aqc_nvm {
 	__le32	addr_low;
 };
 
+/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
+struct ice_aqc_get_set_rss_key {
+#define ICE_AQC_GSET_RSS_KEY_VSI_VALID	BIT(15)
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_S	0
+#define ICE_AQC_GSET_RSS_KEY_VSI_ID_M	(0x3FF << ICE_AQC_GSET_RSS_KEY_VSI_ID_S)
+	__le16 vsi_id;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE	0x28
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE	0xC
+
+struct ice_aqc_get_set_rss_keys {
+	u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+	u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE];
+};
+
+/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */
+struct  ice_aqc_get_set_rss_lut {
+#define ICE_AQC_GSET_RSS_LUT_VSI_VALID	BIT(15)
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_S	0
+#define ICE_AQC_GSET_RSS_LUT_VSI_ID_M	(0x1FF << ICE_AQC_GSET_RSS_LUT_VSI_ID_S)
+	__le16 vsi_id;
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S	0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M	\
+				(0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI	 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF	 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL	 2
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S	 2
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M	 \
+				(0x3 << ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S)
+
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128	 128
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG 0
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512	 512
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG 1
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K	 2048
+#define ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG	 2
+
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S	 4
+#define ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M	 \
+				(0xF << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S)
+
+	__le16 flags;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* Add TX LAN Queues (indirect 0x0C30) */
 struct ice_aqc_add_txqs {
 	u8 num_qgrps;
@@ -1089,6 +1143,8 @@ struct ice_aq_desc {
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
+		struct ice_aqc_get_set_rss_lut get_set_rss_lut;
+		struct ice_aqc_get_set_rss_key get_set_rss_key;
 		struct ice_aqc_add_txqs add_txqs;
 		struct ice_aqc_dis_txqs dis_txqs;
 		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
@@ -1171,6 +1227,12 @@ enum ice_adminq_opc {
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
 
+	/* RSS commands */
+	ice_aqc_opc_set_rss_key				= 0x0B02,
+	ice_aqc_opc_set_rss_lut				= 0x0B03,
+	ice_aqc_opc_get_rss_key				= 0x0B04,
+	ice_aqc_opc_get_rss_lut				= 0x0B05,
+
 	/* TX queue handling commands/events */
 	ice_aqc_opc_add_txqs				= 0x0C30,
 	ice_aqc_opc_dis_txqs				= 0x0C31,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3d5686636656..b4e834359a95 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1261,6 +1261,194 @@ void ice_clear_pxe_mode(struct ice_hw *hw)
 		ice_aq_clear_pxe_mode(hw);
 }
 
+/**
+ * __ice_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ * @glob_lut_idx: global LUT index
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
+ */
+static enum ice_status
+__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+			 u16 lut_size, u8 glob_lut_idx, bool set)
+{
+	struct ice_aqc_get_set_rss_lut *cmd_resp;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+	u16 flags = 0;
+
+	cmd_resp = &desc.params.get_set_rss_lut;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_lut);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_lut);
+	}
+
+	cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+					 ICE_AQC_GSET_RSS_LUT_VSI_ID_S) &
+					ICE_AQC_GSET_RSS_LUT_VSI_ID_M) |
+				       ICE_AQC_GSET_RSS_LUT_VSI_VALID);
+
+	switch (lut_type) {
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI:
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF:
+	case ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL:
+		flags |= ((lut_type << ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_S) &
+			  ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_M);
+		break;
+	default:
+		status = ICE_ERR_PARAM;
+		goto ice_aq_get_set_rss_lut_exit;
+	}
+
+	if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_GLOBAL) {
+		flags |= ((glob_lut_idx << ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_S) &
+			  ICE_AQC_GSET_RSS_LUT_GLOBAL_IDX_M);
+
+		if (!set)
+			goto ice_aq_get_set_rss_lut_send;
+	} else if (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF) {
+		if (!set)
+			goto ice_aq_get_set_rss_lut_send;
+	} else {
+		goto ice_aq_get_set_rss_lut_send;
+	}
+
+	/* LUT size is only valid for Global and PF table types */
+	if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128) {
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_128_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+	} else if (lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512) {
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_512_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+	} else if ((lut_size == ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K) &&
+		   (lut_type == ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF)) {
+		flags |= (ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_2K_FLAG <<
+			  ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_S) &
+			 ICE_AQC_GSET_RSS_LUT_TABLE_SIZE_M;
+	} else {
+		status = ICE_ERR_PARAM;
+		goto ice_aq_get_set_rss_lut_exit;
+	}
+
+ice_aq_get_set_rss_lut_send:
+	cmd_resp->flags = cpu_to_le16(flags);
+	status = ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL);
+
+ice_aq_get_set_rss_lut_exit:
+	return status;
+}
+
+/**
+ * ice_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ *
+ * get the RSS lookup table, PF or VSI type
+ */
+enum ice_status
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size)
+{
+	return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0,
+					false);
+}
+
+/**
+ * ice_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: VSI FW index
+ * @lut_type: LUT table type
+ * @lut: pointer to the LUT buffer provided by the caller
+ * @lut_size: size of the LUT buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ */
+enum ice_status
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size)
+{
+	return __ice_aq_get_set_rss_lut(hw, vsi_id, lut_type, lut, lut_size, 0,
+					true);
+}
+
+/**
+ * __ice_aq_get_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get (0x0B04) or set (0x0B02) the RSS key per VSI
+ */
+static enum
+ice_status __ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+				    struct ice_aqc_get_set_rss_keys *key,
+				    bool set)
+{
+	struct ice_aqc_get_set_rss_key *cmd_resp;
+	u16 key_size = sizeof(*key);
+	struct ice_aq_desc desc;
+
+	cmd_resp = &desc.params.get_set_rss_key;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key);
+	}
+
+	cmd_resp->vsi_id = cpu_to_le16(((vsi_id <<
+					 ICE_AQC_GSET_RSS_KEY_VSI_ID_S) &
+					ICE_AQC_GSET_RSS_KEY_VSI_ID_M) |
+				       ICE_AQC_GSET_RSS_KEY_VSI_VALID);
+
+	return ice_aq_send_cmd(hw, &desc, key, key_size, NULL);
+}
+
+/**
+ * ice_aq_get_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ *
+ * get the RSS key per VSI
+ */
+enum ice_status
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *key)
+{
+	return __ice_aq_get_set_rss_key(hw, vsi_id, key, false);
+}
+
+/**
+ * ice_aq_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: VSI FW index
+ * @keys: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ */
+enum ice_status
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *keys)
+{
+	return __ice_aq_get_set_rss_key(hw, vsi_id, keys, true);
+}
+
 /**
  * ice_aq_add_lan_txq
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 8ed1135bb189..38719ba01d9b 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -33,6 +33,19 @@ enum ice_status ice_get_caps(struct ice_hw *hw);
 enum ice_status
 ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 		  u32 rxq_index);
+
+enum ice_status
+ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size);
+enum ice_status
+ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
+		   u16 lut_size);
+enum ice_status
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *keys);
+enum ice_status
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+		   struct ice_aqc_get_set_rss_keys *keys);
 bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
 enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
 void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index e1221b9839d5..57acbcff740b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -40,6 +40,65 @@ union ice_32byte_rx_desc {
 	} wb; /* writeback */
 };
 
+struct ice_rx_ptype_decoded {
+	u32 ptype:10;
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:2;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum ice_rx_ptype_outer_ip {
+	ICE_RX_PTYPE_OUTER_L2	= 0,
+	ICE_RX_PTYPE_OUTER_IP	= 1,
+};
+
+enum ice_rx_ptype_outer_ip_ver {
+	ICE_RX_PTYPE_OUTER_NONE	= 0,
+	ICE_RX_PTYPE_OUTER_IPV4	= 1,
+	ICE_RX_PTYPE_OUTER_IPV6	= 2,
+};
+
+enum ice_rx_ptype_outer_fragmented {
+	ICE_RX_PTYPE_NOT_FRAG	= 0,
+	ICE_RX_PTYPE_FRAG	= 1,
+};
+
+enum ice_rx_ptype_tunnel_type {
+	ICE_RX_PTYPE_TUNNEL_NONE		= 0,
+	ICE_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum ice_rx_ptype_tunnel_end_prot {
+	ICE_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	ICE_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	ICE_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum ice_rx_ptype_inner_prot {
+	ICE_RX_PTYPE_INNER_PROT_NONE		= 0,
+	ICE_RX_PTYPE_INNER_PROT_UDP		= 1,
+	ICE_RX_PTYPE_INNER_PROT_TCP		= 2,
+	ICE_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	ICE_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	ICE_RX_PTYPE_INNER_PROT_TIMESYNC	= 5,
+};
+
+enum ice_rx_ptype_payload_layer {
+	ICE_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
 /* RX Flex Descriptor
  * This descriptor is used instead of the legacy version descriptor when
  * ice_rlan_ctx.adv_desc is set
@@ -88,6 +147,41 @@ union ice_32b_rx_flex_desc {
 	} wb; /* writeback */
 };
 
+/* Rx Flex Descriptor NIC Profile
+ * This descriptor corresponds to RxDID 2 which contains
+ * metadata fields for RSS, flow id and timestamp info
+ */
+struct ice_32b_rx_flex_desc_nic {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 vlan_id;
+			__le16 flow_id_ipv6;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
 /* Receive Flex Descriptor profile IDs: There are a total
  * of 64 profiles where profile IDs 0/1 are for legacy; and
  * profiles 2-63 are flex profiles that can be programmed
@@ -241,12 +335,56 @@ enum ice_tx_desc_dtype_value {
 enum ice_tx_desc_cmd_bits {
 	ICE_TX_DESC_CMD_EOP			= 0x0001,
 	ICE_TX_DESC_CMD_RS			= 0x0002,
+	ICE_TX_DESC_CMD_IL2TAG1			= 0x0008,
+	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
+	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
+	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
+	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
+	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
 };
 
 #define ICE_TXD_QW1_OFFSET_S	16
+#define ICE_TXD_QW1_OFFSET_M	(0x3FFFFULL << ICE_TXD_QW1_OFFSET_S)
+
+enum ice_tx_desc_len_fields {
+	/* Note: These are predefined bit offsets */
+	ICE_TX_DESC_LEN_MACLEN_S	= 0, /* 7 BITS */
+	ICE_TX_DESC_LEN_IPLEN_S	= 7, /* 7 BITS */
+	ICE_TX_DESC_LEN_L4_LEN_S	= 14 /* 4 BITS */
+};
+
 #define ICE_TXD_QW1_TX_BUF_SZ_S	34
 #define ICE_TXD_QW1_L2TAG1_S	48
 
+/* Context descriptors */
+struct ice_tx_ctx_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 qw1;
+};
+
+#define ICE_TXD_CTX_QW1_CMD_S	4
+#define ICE_TXD_CTX_QW1_CMD_M	(0x7FUL << ICE_TXD_CTX_QW1_CMD_S)
+
+#define ICE_TXD_CTX_QW1_TSO_LEN_S	30
+#define ICE_TXD_CTX_QW1_TSO_LEN_M	\
+			(0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
+
+#define ICE_TXD_CTX_QW1_MSS_S	50
+
+enum ice_tx_ctx_desc_cmd_bits {
+	ICE_TX_CTX_DESC_TSO		= 0x01,
+	ICE_TX_CTX_DESC_TSYN		= 0x02,
+	ICE_TX_CTX_DESC_IL2TAG2		= 0x04,
+	ICE_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	ICE_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	ICE_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	ICE_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	ICE_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	ICE_TX_CTX_DESC_RESERVED	= 0x40
+};
+
 #define ICE_LAN_TXQ_MAX_QGRPS	127
 #define ICE_LAN_TXQ_MAX_QDIS	1023
 
@@ -289,4 +427,35 @@ struct ice_tlan_ctx {
 	u8  pkt_shaper_prof_idx;
 	u8  int_q_state;	/* width not needed - internal do not write */
 };
+
+/* macro to make the table lines short */
+#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	{	PTYPE, \
+		1, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		ICE_RX_PTYPE_##OUTER_FRAG, \
+		ICE_RX_PTYPE_TUNNEL_##T, \
+		ICE_RX_PTYPE_TUNNEL_END_##TE, \
+		ICE_RX_PTYPE_##TEF, \
+		ICE_RX_PTYPE_INNER_PROT_##I, \
+		ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define ICE_RX_PTYPE_NOF		ICE_RX_PTYPE_NOT_FRAG
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+static const struct ice_rx_ptype_decoded ice_ptype_lkup[] = {
+	/* L2 Packet types */
+	ICE_PTT_UNUSED_ENTRY(0),
+	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	ICE_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+};
+
+static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
+{
+	return ice_ptype_lkup[ptype];
+}
 #endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index dd07bf862afe..2124e7ad640c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -663,6 +663,35 @@ free_q_irqs:
 	return err;
 }
 
+/**
+ * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
+{
+	struct ice_hw_common_caps *cap;
+	struct ice_pf *pf = vsi->back;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		vsi->rss_size = 1;
+		return;
+	}
+
+	cap = &pf->hw.func_caps.common_cap;
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		vsi->rss_table_size = cap->rss_table_size;
+		vsi->rss_size = min_t(int, num_online_cpus(),
+				      BIT(cap->rss_table_entry_width));
+		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		break;
+	}
+}
+
 /**
  * ice_vsi_setup_q_map - Setup a VSI queue map
  * @vsi: the VSI being configured
@@ -670,7 +699,8 @@ free_q_irqs:
  */
 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 {
-	u16 offset = 0, qmap = 0, pow = 0, qcount;
+	u16 offset = 0, qmap = 0, numq_tc;
+	u16 pow = 0, max_rss = 0, qcount;
 	u16 qcount_tx = vsi->alloc_txq;
 	u16 qcount_rx = vsi->alloc_rxq;
 	bool ena_tc0 = false;
@@ -689,13 +719,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 		vsi->tc_cfg.ena_tc |= 1;
 	}
 
-	qcount = qcount_rx / vsi->tc_cfg.numtc;
-
-	/* find higher power-of-2 of qcount */
-	pow = ilog2(qcount);
-
-	if (!is_power_of_2(qcount))
-		pow++;
+	numq_tc = qcount_rx / vsi->tc_cfg.numtc;
 
 	/* TC mapping is a function of the number of Rx queues assigned to the
 	 * VSI for each traffic class and the offset of these queues.
@@ -708,6 +732,26 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 	 *
 	 * Setup number and offset of Rx queues for all TCs for the VSI
 	 */
+
+	/* qcount will change if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
+		if (vsi->type == ICE_VSI_PF)
+			max_rss = ICE_MAX_LG_RSS_QS;
+		else
+			max_rss = ICE_MAX_SMALL_RSS_QS;
+
+		qcount = min_t(int, numq_tc, max_rss);
+		qcount = min_t(int, qcount, vsi->rss_size);
+	} else {
+		qcount = numq_tc;
+	}
+
+	/* find higher power-of-2 of qcount */
+	pow = ilog2(qcount);
+
+	if (!is_power_of_2(qcount))
+		pow++;
+
 	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
 		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
 			/* TC is not enabled */
@@ -783,6 +827,33 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
 	/* No Outer tag support outer_tag_flags remains to zero */
 }
 
+/**
+ * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 lut_type, hash_type;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	default:
+		dev_warn(&vsi->back->pdev->dev, "Unknown VSI type %d\n",
+			 vsi->type);
+		return;
+	}
+
+	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
+				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+}
+
 /**
  * ice_vsi_add - Create a new VSI or fetch preallocated VSI
  * @vsi: the VSI being configured
@@ -810,6 +881,10 @@ static int ice_vsi_add(struct ice_vsi *vsi)
 	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
 		ctxt.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
 
+	/* Set LUT type and HASH type if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_set_rss_vsi_ctx(&ctxt, vsi);
+
 	ctxt.info.sw_id = vsi->port_info->sw_id;
 	ice_vsi_setup_q_map(vsi, &ctxt);
 
@@ -1629,6 +1704,10 @@ static void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
  */
 static int ice_cfg_netdev(struct ice_vsi *vsi)
 {
+	netdev_features_t csumo_features;
+	netdev_features_t vlano_features;
+	netdev_features_t dflt_features;
+	netdev_features_t tso_features;
 	struct ice_netdev_priv *np;
 	struct net_device *netdev;
 	u8 mac_addr[ETH_ALEN];
@@ -1642,13 +1721,31 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	np = netdev_priv(netdev);
 	np->vsi = vsi;
 
+	dflt_features = NETIF_F_SG	|
+			NETIF_F_HIGHDMA	|
+			NETIF_F_RXHASH;
+
+	csumo_features = NETIF_F_RXCSUM	  |
+			 NETIF_F_IP_CSUM  |
+			 NETIF_F_IPV6_CSUM;
+
+	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
+			 NETIF_F_HW_VLAN_CTAG_TX     |
+			 NETIF_F_HW_VLAN_CTAG_RX;
+
+	tso_features = NETIF_F_TSO;
+
 	/* set features that user can change */
-	netdev->hw_features = NETIF_F_SG	|
-			      NETIF_F_HIGHDMA	|
-			      NETIF_F_RXHASH;
+	netdev->hw_features = dflt_features | csumo_features |
+			      vlano_features | tso_features;
 
 	/* enable features */
 	netdev->features |= netdev->hw_features;
+	/* encap and VLAN devices inherit default, csumo and tso features */
+	netdev->hw_enc_features |= dflt_features | csumo_features |
+				   tso_features;
+	netdev->vlan_features |= dflt_features | csumo_features |
+				 tso_features;
 
 	if (vsi->type == ICE_VSI_PF) {
 		SET_NETDEV_DEV(netdev, &vsi->back->pdev->dev);
@@ -1862,6 +1959,83 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 	return 0;
 }
 
+/**
+ * ice_fill_rss_lut - Fill the RSS lookup table with default values
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * ice_vsi_cfg_rss - Configure RSS params for a VSI
+ * @vsi: VSI to be configured
+ */
+static int ice_vsi_cfg_rss(struct ice_vsi *vsi)
+{
+	u8 seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+	struct ice_aqc_get_set_rss_keys *key;
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	int err = 0;
+	u8 *lut;
+
+	vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
+
+	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+
+	status = ice_aq_set_rss_lut(&pf->hw, vsi->vsi_num, vsi->rss_lut_type,
+				    lut, vsi->rss_table_size);
+
+	if (status) {
+		dev_err(&vsi->back->pdev->dev,
+			"set_rss_lut failed, error %d\n", status);
+		err = -EIO;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	key = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*key), GFP_KERNEL);
+	if (!key) {
+		err = -ENOMEM;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user,
+		       ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed,
+				    ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
+	memcpy(&key->standard_rss_key, seed,
+	       ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
+
+	status = ice_aq_set_rss_key(&pf->hw, vsi->vsi_num, key);
+
+	if (status) {
+		dev_err(&vsi->back->pdev->dev, "set_rss_key failed, error %d\n",
+			status);
+		err = -EIO;
+	}
+
+	devm_kfree(&pf->pdev->dev, key);
+ice_vsi_cfg_rss_exit:
+	devm_kfree(&pf->pdev->dev, lut);
+	return err;
+}
+
 /**
  * ice_vsi_setup - Set up a VSI by a given type
  * @pf: board private structure
@@ -1897,6 +2071,9 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
 		goto err_get_qs;
 	}
 
+	/* set RSS capabilities */
+	ice_vsi_set_rss_params(vsi);
+
 	/* create the VSI */
 	ret = ice_vsi_add(vsi);
 	if (ret)
@@ -1932,6 +2109,12 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
 
 		ice_vsi_map_rings_to_vectors(vsi);
 
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+			ice_vsi_cfg_rss(vsi);
 		break;
 	default:
 		/* if vsi type is not recognized, clean up the resources and
@@ -1966,6 +2149,140 @@ err_get_qs:
 	return NULL;
 }
 
+/**
+ * ice_vsi_add_vlan - Add vsi membership for given vlan
+ * @vsi: the vsi being configured
+ * @vid: vlan id to be added
+ */
+static int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_fltr_list_entry *tmp;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+	enum ice_status status;
+	int err = 0;
+
+	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.flag = ICE_FLTR_TX;
+	tmp->fltr_info.src = vsi->vsi_num;
+	tmp->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
+	tmp->fltr_info.l_data.vlan.vlan_id = vid;
+
+	INIT_LIST_HEAD(&tmp->list_entry);
+	list_add(&tmp->list_entry, &tmp_add_list);
+
+	status = ice_add_vlan(&pf->hw, &tmp_add_list);
+	if (status) {
+		err = -ENODEV;
+		dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n",
+			vid, vsi->vsi_num);
+	}
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	return err;
+}
+
+/**
+ * ice_vlan_rx_add_vid - Add a vlan id filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol
+ * @vid: vlan id to be added
+ *
+ * net_device_ops implementation for adding vlan ids
+ */
+static int ice_vlan_rx_add_vid(struct net_device *netdev,
+			       __always_unused __be16 proto, u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = 0;
+
+	if (vid >= VLAN_N_VID) {
+		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
+			   vid, VLAN_N_VID);
+		return -EINVAL;
+	}
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* Add all VLAN ids including 0 to the switch filter. VLAN id 0 is
+	 * needed to continue allowing all untagged packets since VLAN prune
+	 * list is applied to all packets by the switch
+	 */
+	ret = ice_vsi_add_vlan(vsi, vid);
+
+	if (!ret)
+		set_bit(vid, vsi->active_vlans);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_kill_vlan - Remove VSI membership for a given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN id to be removed
+ */
+static void ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
+{
+	struct ice_fltr_list_entry *list;
+	struct ice_pf *pf = vsi->back;
+	LIST_HEAD(tmp_add_list);
+
+	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	if (!list)
+		return;
+
+	list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
+	list->fltr_info.fwd_id.vsi_id = vsi->vsi_num;
+	list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	list->fltr_info.l_data.vlan.vlan_id = vid;
+	list->fltr_info.flag = ICE_FLTR_TX;
+	list->fltr_info.src = vsi->vsi_num;
+
+	INIT_LIST_HEAD(&list->list_entry);
+	list_add(&list->list_entry, &tmp_add_list);
+
+	if (ice_remove_vlan(&pf->hw, &tmp_add_list))
+		dev_err(&pf->pdev->dev, "Error removing VLAN %d on vsi %i\n",
+			vid, vsi->vsi_num);
+
+	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
+/**
+ * ice_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol
+ * @vid: vlan id to be removed
+ *
+ * net_device_ops implementation for removing vlan ids
+ */
+static int ice_vlan_rx_kill_vid(struct net_device *netdev,
+				__always_unused __be16 proto, u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* return code is ignored as there is nothing a user
+	 * can do about failure to remove and a log message was
+	 * already printed from the other function
+	 */
+	ice_vsi_kill_vlan(vsi, vid);
+
+	clear_bit(vid, vsi->active_vlans);
+
+	return 0;
+}
+
 /**
  * ice_setup_pf_sw - Setup the HW switch on startup or after reset
  * @pf: board private structure
@@ -2047,9 +2364,14 @@ static void ice_determine_q_usage(struct ice_pf *pf)
 	q_left_tx = pf->hw.func_caps.common_cap.num_txq;
 	q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
 
-	/* initial support for only 1 tx and 1 rx queue */
+	/* initial support for only 1 tx queue */
 	pf->num_lan_tx = 1;
-	pf->num_lan_rx = 1;
+
+	/* only 1 rx queue unless RSS is enabled */
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		pf->num_lan_rx = 1;
+	else
+		pf->num_lan_rx = min_t(int, q_left_rx, num_online_cpus());
 
 	pf->q_left_tx = q_left_tx - pf->num_lan_tx;
 	pf->q_left_rx = q_left_rx - pf->num_lan_rx;
@@ -2087,6 +2409,9 @@ static void ice_init_pf(struct ice_pf *pf)
 	bitmap_zero(pf->avail_rxqs, ICE_MAX_RXQS);
 	mutex_unlock(&pf->avail_q_mutex);
 
+	if (pf->hw.func_caps.common_cap.rss_table_size)
+		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
+
 	/* setup service timer and periodic service task */
 	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
 	pf->serv_tmr_period = HZ;
@@ -2483,6 +2808,144 @@ static void __exit ice_module_exit(void)
 }
 module_exit(ice_module_exit);
 
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the vsi being changed
+ */
+static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx ctxt = { 0 };
+	enum ice_status status;
+
+	/* Here we are configuring the VSI to let the driver add VLAN tags by
+	 * setting port_vlan_flags to ICE_AQ_VSI_PVLAN_MODE_ALL. The actual VLAN
+	 * tag insertion happens in the Tx hot path, in ice_tx_map.
+	 */
+	ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_MODE_ALL;
+
+	ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+	ctxt.vsi_num = vsi->vsi_num;
+
+	status = ice_aq_update_vsi(hw, &ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		return -EIO;
+	}
+
+	vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags;
+	return 0;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the vsi being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx ctxt = { 0 };
+	enum ice_status status;
+
+	/* Here we are configuring what the VSI should do with the VLAN tag in
+	 * the Rx packet. We can either leave the tag in the packet or put it in
+	 * the Rx descriptor.
+	 */
+	if (ena) {
+		/* Strip VLAN tag from Rx packet and put it in the desc */
+		ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_STR_BOTH;
+	} else {
+		/* Disable stripping. Leave tag in packet */
+		ctxt.info.port_vlan_flags = ICE_AQ_VSI_PVLAN_EMOD_NOTHING;
+	}
+
+	ctxt.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+	ctxt.vsi_num = vsi->vsi_num;
+
+	status = ice_aq_update_vsi(hw, &ctxt, NULL);
+	if (status) {
+		dev_err(dev, "update VSI for VALN strip failed, ena = %d err %d aq_err %d\n",
+			ena, status, hw->adminq.sq_last_status);
+		return -EIO;
+	}
+
+	vsi->info.port_vlan_flags = ctxt.info.port_vlan_flags;
+	return 0;
+}
+
+/**
+ * ice_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int ice_set_features(struct net_device *netdev,
+			    netdev_features_t features)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = 0;
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
+		ret = ice_vsi_manage_vlan_stripping(vsi, true);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
+		ret = ice_vsi_manage_vlan_stripping(vsi, false);
+	else if ((features & NETIF_F_HW_VLAN_CTAG_TX) &&
+		 !(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+	else if (!(features & NETIF_F_HW_VLAN_CTAG_TX) &&
+		 (netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+
+	return ret;
+}
+
+/**
+ * ice_vsi_vlan_setup - Setup vlan offload properties on a VSI
+ * @vsi: VSI to setup vlan properties for
+ */
+static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
+{
+	int ret = 0;
+
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		ret = ice_vsi_manage_vlan_stripping(vsi, true);
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		ret = ice_vsi_manage_vlan_insertion(vsi);
+
+	return ret;
+}
+
+/**
+ * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up
+ * @vsi: the VSI being brought back up
+ */
+static int ice_restore_vlan(struct ice_vsi *vsi)
+{
+	int err;
+	u16 vid;
+
+	if (!vsi->netdev)
+		return -EINVAL;
+
+	err = ice_vsi_vlan_setup(vsi);
+	if (err)
+		return err;
+
+	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) {
+		err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
 /**
  * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
  * @ring: The Tx ring to configure
@@ -2735,6 +3198,10 @@ static int ice_vsi_cfg(struct ice_vsi *vsi)
 {
 	int err;
 
+	err = ice_restore_vlan(vsi);
+	if (err)
+		return err;
+
 	err = ice_vsi_cfg_txqs(vsi);
 	if (!err)
 		err = ice_vsi_cfg_rxqs(vsi);
@@ -3213,6 +3680,22 @@ static void ice_vsi_close(struct ice_vsi *vsi)
 	ice_vsi_free_rx_rings(vsi);
 }
 
+/**
+ * ice_rss_clean - Delete RSS related VSI structures that hold user inputs
+ * @vsi: the VSI being removed
+ */
+static void ice_rss_clean(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+
+	if (vsi->rss_hkey_user)
+		devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user);
+	if (vsi->rss_lut_user)
+		devm_kfree(&pf->pdev->dev, vsi->rss_lut_user);
+}
+
 /**
  * ice_vsi_release - Delete a VSI and free its resources
  * @vsi: the VSI being removed
@@ -3233,6 +3716,10 @@ static int ice_vsi_release(struct ice_vsi *vsi)
 		vsi->netdev = NULL;
 	}
 
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_rss_clean(vsi);
+
+	/* Disable VSI and free resources */
 	ice_vsi_dis_irq(vsi);
 	ice_vsi_close(vsi);
 
@@ -3254,6 +3741,91 @@ static int ice_vsi_release(struct ice_vsi *vsi)
 	return 0;
 }
 
+/**
+ * ice_set_rss - Set RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+
+	if (seed) {
+		struct ice_aqc_get_set_rss_keys *buf =
+				  (struct ice_aqc_get_set_rss_keys *)seed;
+
+		status = ice_aq_set_rss_key(hw, vsi->vsi_num, buf);
+
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot set RSS key, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	if (lut) {
+		status = ice_aq_set_rss_lut(hw, vsi->vsi_num,
+					    vsi->rss_lut_type, lut, lut_size);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot set RSS lut, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rss - Get RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+
+	if (seed) {
+		struct ice_aqc_get_set_rss_keys *buf =
+				  (struct ice_aqc_get_set_rss_keys *)seed;
+
+		status = ice_aq_get_rss_key(hw, vsi->vsi_num, buf);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot get RSS key, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	if (lut) {
+		status = ice_aq_get_rss_lut(hw, vsi->vsi_num,
+					    vsi->rss_lut_type, lut, lut_size);
+		if (status) {
+			dev_err(&pf->pdev->dev,
+				"Cannot get RSS lut, err %d aq_err %d\n",
+				status, hw->adminq.rq_last_status);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * ice_open - Called when a network interface becomes active
  * @netdev: network interface device structure
@@ -3306,4 +3878,7 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_open = ice_open,
 	.ndo_stop = ice_stop,
 	.ndo_start_xmit = ice_start_xmit,
+	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
+	.ndo_set_features = ice_set_features,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 3944dac6cfc6..5624df9eb6fc 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1217,6 +1217,117 @@ ice_add_mac_exit:
 	return status;
 }
 
+/**
+ * ice_find_vlan_entry
+ * @hw: pointer to the hardware structure
+ * @vlan_id: VLAN id to search for
+ *
+ * Helper function to search for a VLAN entry using a given VLAN id
+ * Returns pointer to the entry if found.
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_vlan_entry(struct ice_hw *hw, u16 vlan_id)
+{
+	struct ice_fltr_mgmt_list_entry *vlan_list_itr, *vlan_ret = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+
+	mutex_lock(&sw->vlan_list_lock);
+	list_for_each_entry(vlan_list_itr, &sw->vlan_list_head, list_entry)
+		if (vlan_list_itr->fltr_info.l_data.vlan.vlan_id == vlan_id) {
+			vlan_ret = vlan_list_itr;
+			break;
+		}
+
+	mutex_unlock(&sw->vlan_list_lock);
+	return vlan_ret;
+}
+
+/**
+ * ice_add_vlan_internal - Add one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static enum ice_status
+ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_fltr_mgmt_list_entry *v_list_itr;
+	u16 vlan_id;
+
+	new_fltr = &f_entry->fltr_info;
+	/* VLAN id should only be 12 bits */
+	if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
+		return ICE_ERR_PARAM;
+
+	vlan_id = new_fltr->l_data.vlan.vlan_id;
+	v_list_itr = ice_find_vlan_entry(hw, vlan_id);
+	if (!v_list_itr) {
+		u16 vsi_id = ICE_VSI_INVAL_ID;
+		enum ice_status status;
+		u16 vsi_list_id = 0;
+
+		if (new_fltr->fltr_act == ICE_FWD_TO_VSI) {
+			enum ice_sw_lkup_type lkup_type = new_fltr->lkup_type;
+
+			/* All VLAN pruning rules use a VSI list.
+			 * Convert the action to forwarding to a VSI list.
+			 */
+			vsi_id = new_fltr->fwd_id.vsi_id;
+			status = ice_create_vsi_list_rule(hw, &vsi_id, 1,
+							  &vsi_list_id,
+							  lkup_type);
+			if (status)
+				return status;
+			new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+			new_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		}
+
+		status = ice_create_pkt_fwd_rule(hw, f_entry);
+		if (!status && vsi_id != ICE_VSI_INVAL_ID) {
+			v_list_itr = ice_find_vlan_entry(hw, vlan_id);
+			if (!v_list_itr)
+				return ICE_ERR_DOES_NOT_EXIST;
+			v_list_itr->vsi_list_info =
+				ice_create_vsi_list_map(hw, &vsi_id, 1,
+							vsi_list_id);
+		}
+
+		return status;
+	}
+
+	cur_fltr = &v_list_itr->fltr_info;
+	return ice_handle_vsi_list_mgmt(hw, v_list_itr, cur_fltr, new_fltr);
+}
+
+/**
+ * ice_add_vlan - Add VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+enum ice_status
+ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr;
+
+	if (!v_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(v_list_itr, v_list, list_entry) {
+		enum ice_status status;
+
+		if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN)
+			return ICE_ERR_PARAM;
+
+		status = ice_add_vlan_internal(hw, v_list_itr);
+		if (status) {
+			v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
+			return status;
+		}
+		v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+	}
+	return 0;
+}
+
 /**
  * ice_remove_vsi_list_rule
  * @hw: pointer to the hardware structure
@@ -1515,6 +1626,54 @@ ice_remove_mac_exit:
 	return status;
 }
 
+/**
+ * ice_remove_vlan_internal - Remove one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static enum ice_status
+ice_remove_vlan_internal(struct ice_hw *hw,
+			 struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_info *new_fltr;
+	struct ice_fltr_mgmt_list_entry *v_list_elem;
+	u16 vsi_id;
+
+	new_fltr = &f_entry->fltr_info;
+
+	v_list_elem = ice_find_vlan_entry(hw, new_fltr->l_data.vlan.vlan_id);
+	if (!v_list_elem)
+		return ICE_ERR_PARAM;
+
+	vsi_id = f_entry->fltr_info.fwd_id.vsi_id;
+	return ice_handle_rem_vsi_list_mgmt(hw, vsi_id, v_list_elem);
+}
+
+/**
+ * ice_remove_vlan - Remove VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+enum ice_status
+ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr;
+	enum ice_status status = 0;
+
+	if (!v_list || !hw)
+		return ICE_ERR_PARAM;
+
+	list_for_each_entry(v_list_itr, v_list, list_entry) {
+		status = ice_remove_vlan_internal(hw, v_list_itr);
+		if (status) {
+			v_list_itr->status = ICE_FLTR_STATUS_FW_FAIL;
+			return status;
+		}
+		v_list_itr->status = ICE_FLTR_STATUS_FW_SUCCESS;
+	}
+	return status;
+}
+
 /**
  * ice_add_to_vsi_fltr_list - Add VSI filters to the list
  * @hw: pointer to the hardware structure
@@ -1600,6 +1759,16 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_id,
 		}
 		break;
 	case ICE_SW_LKUP_VLAN:
+		mutex_lock(&sw->vlan_list_lock);
+		status = ice_add_to_vsi_fltr_list(hw, vsi_id,
+						  &sw->vlan_list_head,
+						  &remove_list_head);
+		mutex_unlock(&sw->vlan_list_lock);
+		if (!status) {
+			ice_remove_vlan(hw, &remove_list_head);
+			goto free_fltr_list;
+		}
+		break;
 	case ICE_SW_LKUP_MAC_VLAN:
 	case ICE_SW_LKUP_ETHERTYPE:
 	case ICE_SW_LKUP_ETHERTYPE_MAC:
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 1b312e7ab2b5..80b4e1951b32 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -8,6 +8,7 @@
 
 #define ICE_SW_CFG_MAX_BUF_LEN 2048
 #define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_VSI_INVAL_ID 0xffff
 
 /* VSI context structure for add/get/update/free operations */
 struct ice_vsi_ctx {
@@ -152,4 +153,7 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
 enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
 enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
 void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id);
+enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
+enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+
 #endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 1ccf8e69b85a..6481e3d86374 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -796,6 +796,134 @@ static bool ice_is_non_eop(struct ice_ring *rx_ring,
 	return true;
 }
 
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	    struct sk_buff *skb, u8 rx_ptype)
+{
+	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	u32 hash;
+
+	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+		return;
+
+	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	hash = le32_to_cpu(nic_mdid->rss_hash);
+	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @vsi: the VSI we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
+			union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+	struct ice_rx_ptype_decoded decoded;
+	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
+
+	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_error = rx_status;
+
+	decoded = ice_decode_rx_desc_ptype(ptype);
+
+	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
+	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
+
+	/* check if Rx checksum is enabled */
+	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+		return;
+
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+		goto checksum_fail;
+	else if (ipv6 && (rx_status &
+		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+		goto checksum_fail;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+		goto checksum_fail;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case ICE_RX_PTYPE_INNER_PROT_TCP:
+	case ICE_RX_PTYPE_INNER_PROT_UDP:
+	case ICE_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	default:
+		break;
+	}
+	return;
+
+checksum_fail:
+	vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+static void ice_process_skb_fields(struct ice_ring *rx_ring,
+				   union ice_32b_rx_flex_desc *rx_desc,
+				   struct sk_buff *skb, u8 ptype)
+{
+	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+	ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype);
+}
+
 /**
  * ice_receive_skb - Send a completed packet up the stack
  * @rx_ring: rx ring in play
@@ -839,6 +967,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		struct sk_buff *skb;
 		u16 stat_err_bits;
 		u16 vlan_tag = 0;
+		u8 rx_ptype;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= ICE_RX_BUF_WRITE) {
@@ -882,6 +1011,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 			continue;
 		}
 
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+			ICE_RX_FLEX_DESC_PTYPE_M;
+
 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
 		if (ice_test_staterr(rx_desc, stat_err_bits))
 			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
@@ -897,6 +1029,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 
+		/* populate checksum, VLAN, and protocol */
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+
 		/* send completed skb up the stack */
 		ice_receive_skb(rx_ring, skb, vlan_tag);
 
@@ -1026,14 +1161,17 @@ static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
  * ice_tx_map - Build the Tx descriptor
  * @tx_ring: ring to send buffer on
  * @first: first buffer info buffer to use
+ * @off: pointer to struct that holds offload parameters
  *
  * This function loops over the skb data pointed to by *first
  * and gets a physical address for each memory location and programs
  * it and the length into the transmit descriptor.
  */
-static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first)
+static void
+ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
+	   struct ice_tx_offload_params *off)
 {
-	u64 td_offset = 0, td_tag = 0, td_cmd = 0;
+	u64 td_offset, td_tag, td_cmd;
 	u16 i = tx_ring->next_to_use;
 	struct skb_frag_struct *frag;
 	unsigned int data_len, size;
@@ -1042,6 +1180,9 @@ static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first)
 	struct sk_buff *skb;
 	dma_addr_t dma;
 
+	td_tag = off->td_l2tag1;
+	td_cmd = off->td_cmd;
+	td_offset = off->td_offset;
 	skb = first->skb;
 
 	data_len = skb->data_len;
@@ -1049,6 +1190,12 @@ static void ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first)
 
 	tx_desc = ICE_TX_DESC(tx_ring, i);
 
+	if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
+		td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
+		td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
+			  ICE_TX_FLAGS_VLAN_S;
+	}
+
 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
 
 	tx_buf = first;
@@ -1169,6 +1316,223 @@ dma_error:
 	tx_ring->next_to_use = i;
 }
 
+/**
+ * ice_tx_csum - Enable Tx checksum offloads
+ * @first: pointer to the first descriptor
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
+ */
+static
+int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	u32 l4_len = 0, l3_len = 0, l2_len = 0;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	__be16 frag_off, protocol;
+	unsigned char *exthdr;
+	u32 offset, cmd = 0;
+	u8 l4_proto = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute outer L2 header size */
+	l2_len = ip.hdr - skb->data;
+	offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
+
+	if (skb->encapsulation)
+		return -1;
+
+	/* Enable IP checksum offloads */
+	protocol = vlan_get_protocol(skb);
+	if (protocol == htons(ETH_P_IP)) {
+		l4_proto = ip.v4->protocol;
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		else
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+
+	} else if (protocol == htons(ETH_P_IPV6)) {
+		cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+		exthdr = ip.hdr + sizeof(*ip.v6);
+		l4_proto = ip.v6->nexthdr;
+		if (l4.hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
+					 &frag_off);
+	} else {
+		return -1;
+	}
+
+	/* compute inner L3 header size */
+	l3_len = l4.hdr - ip.hdr;
+	offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+		l4_len = l4.tcp->doff;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+		l4_len = (sizeof(struct udphdr) >> 2);
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_SCTP:
+	default:
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			return -1;
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	off->td_cmd |= cmd;
+	off->td_offset |= offset;
+	return 1;
+}
+
+/**
+ * ice_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ *
+ * Returns error code indicate the frame should be dropped upon error and the
+ * otherwise returns 0 to indicate the flags has been set properly.
+ */
+static int
+ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+	__be16 protocol = skb->protocol;
+
+	if (protocol == htons(ETH_P_8021Q) &&
+	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
+		/* when HW VLAN acceleration is turned off by the user the
+		 * stack sets the protocol to 8021q so that the driver
+		 * can take any steps required to support the SW only
+		 * VLAN handling. In our case the driver doesn't need
+		 * to take any further steps so just set the protocol
+		 * to the encapsulated ethertype.
+		 */
+		skb->protocol = vlan_get_protocol(skb);
+		goto out;
+	}
+
+	/* if we have a HW VLAN tag being added, default to the HW one */
+	if (skb_vlan_tag_present(skb)) {
+		first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
+		first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+	} else if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vhdr, _vhdr;
+
+		/* for SW VLAN, check the next protocol and store the tag */
+		vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN,
+							     sizeof(_vhdr),
+							     &_vhdr);
+		if (!vhdr)
+			return -EINVAL;
+
+		first->tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
+				   ICE_TX_FLAGS_VLAN_S;
+		first->tx_flags |= ICE_TX_FLAGS_SW_VLAN;
+	}
+
+out:
+	return 0;
+}
+
+/**
+ * ice_tso - computes mss and TSO length to prepare for TSO
+ * @first: pointer to struct ice_tx_buf
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
+ */
+static
+int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	u64 cd_mss, cd_tso_len;
+	u32 paylen, l4_start;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+	} else {
+		ip.v6->payload_len = 0;
+	}
+
+	/* determine offset of transport header */
+	l4_start = l4.hdr - skb->data;
+
+	/* remove payload length from checksum */
+	paylen = skb->len - l4_start;
+	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+
+	/* compute length of segmentation header */
+	off->header_len = (l4.tcp->doff * 4) + l4_start;
+
+	/* update gso_segs and bytecount */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount = (first->gso_segs - 1) * off->header_len;
+
+	cd_tso_len = skb->len - off->header_len;
+	cd_mss = skb_shinfo(skb)->gso_size;
+
+	/* record cdesc_qw1 with TSO parameters */
+	off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
+			 (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			 (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			 (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
+	first->tx_flags |= ICE_TX_FLAGS_TSO;
+	return 1;
+}
+
 /**
  * ice_txd_use_count  - estimate the number of descriptors needed for Tx
  * @size: transmit request size in bytes
@@ -1322,8 +1686,10 @@ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
 static netdev_tx_t
 ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 {
+	struct ice_tx_offload_params offload = { 0 };
 	struct ice_tx_buf *first;
 	unsigned int count;
+	int tso, csum;
 
 	count = ice_xmit_desc_count(skb);
 	if (ice_chk_linearize(skb, count)) {
@@ -1344,13 +1710,46 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 		return NETDEV_TX_BUSY;
 	}
 
+	offload.tx_ring = tx_ring;
+
 	/* record the location of the first descriptor for this packet */
 	first = &tx_ring->tx_buf[tx_ring->next_to_use];
 	first->skb = skb;
 	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
 	first->gso_segs = 1;
+	first->tx_flags = 0;
 
-	ice_tx_map(tx_ring, first);
+	/* prepare the VLAN tagging flags for Tx */
+	if (ice_tx_prepare_vlan_flags(tx_ring, first))
+		goto out_drop;
+
+	/* set up TSO offload */
+	tso = ice_tso(first, &offload);
+	if (tso < 0)
+		goto out_drop;
+
+	/* always set up Tx checksum offload */
+	csum = ice_tx_csum(first, &offload);
+	if (csum < 0)
+		goto out_drop;
+
+	if (tso || offload.cd_tunnel_params) {
+		struct ice_tx_ctx_desc *cdesc;
+		int i = tx_ring->next_to_use;
+
+		/* grab the next descriptor */
+		cdesc = ICE_TX_CTX_DESC(tx_ring, i);
+		i++;
+		tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+		/* setup context descriptor */
+		cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
+		cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
+		cdesc->rsvd = cpu_to_le16(0);
+		cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
+	}
+
+	ice_tx_map(tx_ring, first, &offload);
 	return NETDEV_TX_OK;
 
 out_drop:
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 8fa4450514b4..567067b650c4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -28,6 +28,12 @@
 	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	(R)->next_to_clean - (R)->next_to_use - 1)
 
+#define ICE_TX_FLAGS_TSO	BIT(0)
+#define ICE_TX_FLAGS_HW_VLAN	BIT(1)
+#define ICE_TX_FLAGS_SW_VLAN	BIT(2)
+#define ICE_TX_FLAGS_VLAN_M	0xffff0000
+#define ICE_TX_FLAGS_VLAN_S	16
+
 struct ice_tx_buf {
 	struct ice_tx_desc *next_to_watch;
 	struct sk_buff *skb;
@@ -38,6 +44,17 @@ struct ice_tx_buf {
 	DEFINE_DMA_UNMAP_LEN(len);
 };
 
+struct ice_tx_offload_params {
+	u8 header_len;
+	u32 td_cmd;
+	u32 td_offset;
+	u32 td_l2tag1;
+	u16 cd_l2tag2;
+	u32 cd_tunnel_params;
+	u64 cd_qw1;
+	struct ice_ring *tx_ring;
+};
+
 struct ice_rx_buf {
 	struct sk_buff *skb;
 	dma_addr_t dma;

From fcea6f3da546b93050f3534aadea7bd96c1d7349 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:16 -0700
Subject: [PATCH 1262/1678] ice: Add stats and ethtool support

This patch implements a watchdog task to get packet statistics from
the device.

This patch also adds support for the following ethtool operations:

ethtool devname
ethtool -s devname [msglvl N] [msglevel type on|off]
ethtool -g|--show-ring devname
ethtool -G|--set-ring devname [rx N] [tx N]
ethtool -i|--driver devname
ethtool -d|--register-dump devname [raw on|off] [hex on|off] [file name]
ethtool -k|--show-features|--show-offload devname
ethtool -K|--features|--offload devname feature on|off
ethtool -P|--show-permaddr devname
ethtool -S|--statistics devname
ethtool -a|--show-pause devname
ethtool -A|--pause devname [autoneg on|off] [rx on|off] [tx on|off]
ethtool -r|--negotiate devname

CC: Andrew Lunn <andrew@lunn.ch>
CC: Jakub Kicinski <kubakici@wp.pl>
CC: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile       |   3 +-
 drivers/net/ethernet/intel/ice/ice.h          |  28 +-
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  43 +
 drivers/net/ethernet/intel/ice/ice_common.c   | 195 ++++
 drivers/net/ethernet/intel/ice/ice_common.h   |   5 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c  | 940 ++++++++++++++++++
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |  80 ++
 drivers/net/ethernet/intel/ice/ice_main.c     | 469 ++++++++-
 drivers/net/ethernet/intel/ice/ice_type.h     |  70 ++
 9 files changed, 1828 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_ethtool.c

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index de82fc875775..4058673fd853 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -13,4 +13,5 @@ ice-y := ice_main.o	\
 	 ice_nvm.o	\
 	 ice_switch.o	\
 	 ice_sched.o	\
-	 ice_txrx.o
+	 ice_txrx.o	\
+	 ice_ethtool.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 51cc955c7220..66d0976afa46 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -13,12 +13,14 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/cpumask.h>
+#include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
 #include <linux/dma-mapping.h>
 #include <linux/pci.h>
 #include <linux/workqueue.h>
 #include <linux/aer.h>
 #include <linux/interrupt.h>
+#include <linux/ethtool.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/bitmap.h>
@@ -34,10 +36,14 @@
 #include "ice_common.h"
 #include "ice_sched.h"
 
+extern const char ice_drv_ver[];
 #define ICE_BAR0		0
 #define ICE_DFLT_NUM_DESC	128
+#define ICE_MIN_NUM_DESC	8
+#define ICE_MAX_NUM_DESC	8160
 #define ICE_REQ_DESC_MULTIPLE	32
 #define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
+#define ICE_ETHTOOL_FWVER_LEN	32
 #define ICE_AQ_LEN		64
 #define ICE_MIN_MSIX		2
 #define ICE_NO_VSI		0xffff
@@ -56,6 +62,8 @@
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
 
+#define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
+
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
 #define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
@@ -102,6 +110,7 @@ enum ice_state {
 	__ICE_DOWN,
 	__ICE_PFR_REQ,			/* set by driver and peers */
 	__ICE_ADMINQ_EVENT_PENDING,
+	__ICE_CFG_BUSY,
 	__ICE_SERVICE_SCHED,
 	__ICE_STATE_NBITS		/* must be last */
 };
@@ -118,8 +127,13 @@ struct ice_vsi {
 
 	irqreturn_t (*irq_handler)(int irq, void *data);
 
+	u64 tx_linearize;
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	u32 tx_restart;
+	u32 tx_busy;
+	u32 rx_buf_failed;
+	u32 rx_page_failed;
 	int num_q_vectors;
 	int base_vector;
 	enum ice_vsi_type type;
@@ -141,8 +155,14 @@ struct ice_vsi {
 
 	struct ice_aqc_vsi_props info;	 /* VSI properties */
 
+	/* VSI stats */
+	struct rtnl_link_stats64 net_stats;
+	struct ice_eth_stats eth_stats;
+	struct ice_eth_stats eth_stats_prev;
+
 	bool irqs_ready;
 	bool current_isup;		 /* Sync 'link up' logging */
+	bool stat_offsets_loaded;
 
 	/* queue information */
 	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -205,8 +225,10 @@ struct ice_pf {
 	u16 q_left_rx;		/* remaining num rx queues left unclaimed */
 	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
 	u16 num_alloc_vsi;
-
+	struct ice_hw_port_stats stats;
+	struct ice_hw_port_stats stats_prev;
 	struct ice_hw hw;
+	bool stat_prev_loaded;	/* has previous stats been loaded */
 	char int_name[ICE_INT_NAME_STR_LEN];
 };
 
@@ -239,8 +261,12 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
 	wr32(hw, GLINT_DYN_CTL(vector), val);
 }
 
+void ice_set_ethtool_ops(struct net_device *netdev);
+int ice_up(struct ice_vsi *vsi);
+int ice_down(struct ice_vsi *vsi);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
 
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 051427dcfb3c..ae7376f2135b 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -859,6 +859,45 @@ struct ice_aqc_get_phy_caps_data {
 	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
 };
 
+/* Set PHY capabilities (direct 0x0601)
+ * NOTE: This command must be followed by setup link and restart auto-neg
+ */
+struct ice_aqc_set_phy_cfg {
+	u8 lport_num;
+	u8 reserved[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Set PHY config command data structure */
+struct ice_aqc_set_phy_cfg_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 rsvd0;
+	u8 caps;
+#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY		BIT(0)
+#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY		BIT(1)
+#define ICE_AQ_PHY_ENA_LOW_POWER		BIT(2)
+#define ICE_AQ_PHY_ENA_LINK			BIT(3)
+#define ICE_AQ_PHY_ENA_ATOMIC_LINK		BIT(5)
+	u8 low_power_ctrl;
+	__le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
+	__le16 eeer_value;
+	u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */
+	u8 rsvd1;
+};
+
+/* Restart AN command data structure (direct 0x0605)
+ * Also used for response, with only the lport_num field present.
+ */
+struct ice_aqc_restart_an {
+	u8 lport_num;
+	u8 reserved;
+	u8 cmd_flags;
+#define ICE_AQC_RESTART_AN_LINK_RESTART	BIT(1)
+#define ICE_AQC_RESTART_AN_LINK_ENABLE	BIT(2)
+	u8 reserved2[13];
+};
+
 /* Get link status (indirect 0x0607), also used for Link Status Event */
 struct ice_aqc_get_link_status {
 	u8 lport_num;
@@ -1137,6 +1176,8 @@ struct ice_aq_desc {
 		struct ice_aqc_clear_pxe clear_pxe;
 		struct ice_aqc_list_caps get_cap;
 		struct ice_aqc_get_phy_caps get_phy;
+		struct ice_aqc_set_phy_cfg set_phy;
+		struct ice_aqc_restart_an restart_an;
 		struct ice_aqc_get_sw_cfg get_sw_conf;
 		struct ice_aqc_sw_rules sw_rules;
 		struct ice_aqc_get_topo get_topo;
@@ -1222,6 +1263,8 @@ enum ice_adminq_opc {
 
 	/* PHY commands */
 	ice_aqc_opc_get_phy_caps			= 0x0600,
+	ice_aqc_opc_set_phy_cfg				= 0x0601,
+	ice_aqc_opc_restart_an				= 0x0605,
 	ice_aqc_opc_get_link_status			= 0x0607,
 
 	/* NVM commands */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index b4e834359a95..02d589275db0 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1261,6 +1261,201 @@ void ice_clear_pxe_mode(struct ice_hw *hw)
 		ice_aq_clear_pxe_mode(hw);
 }
 
+/**
+ * ice_aq_set_phy_cfg
+ * @hw: pointer to the hw struct
+ * @lport: logical port number
+ * @cfg: structure with PHY configuration data to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters supported on the Port.
+ * One or more of the Set PHY config parameters may be ignored in an MFP
+ * mode as the PF may not have the privilege to set some of the PHY Config
+ * parameters. This status will be indicated by the command response (0x0601).
+ */
+static enum ice_status
+ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_phy_cfg *cmd;
+	struct ice_aq_desc desc;
+
+	if (!cfg)
+		return ICE_ERR_PARAM;
+
+	cmd = &desc.params.set_phy;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
+	cmd->lport_num = lport;
+
+	return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+}
+
+/**
+ * ice_update_link_info - update status of the HW network link
+ * @pi: port info structure of the interested logical port
+ */
+static enum ice_status
+ice_update_link_info(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_phy_info *phy_info;
+	enum ice_status status;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	hw = pi->hw;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return ICE_ERR_NO_MEMORY;
+
+	phy_info = &pi->phy;
+	status = ice_aq_get_link_info(pi, true, NULL, NULL);
+	if (status)
+		goto out;
+
+	if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+					     pcaps, NULL);
+		if (status)
+			goto out;
+
+		memcpy(phy_info->link_info.module_type, &pcaps->module_type,
+		       sizeof(phy_info->link_info.module_type));
+	}
+out:
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	return status;
+}
+
+/**
+ * ice_set_fc
+ * @pi: port information structure
+ * @aq_failures: pointer to status code, specific to ice_set_fc routine
+ * @atomic_restart: enable automatic link update
+ *
+ * Set the requested flow control mode.
+ */
+enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart)
+{
+	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	enum ice_status status;
+	u8 pause_mask = 0x0;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+	*aq_failures = ICE_SET_FC_AQ_FAIL_NONE;
+
+	switch (pi->fc.req_mode) {
+	case ICE_FC_FULL:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_RX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_TX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		break;
+	default:
+		break;
+	}
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Get the current phy config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+				     NULL);
+	if (status) {
+		*aq_failures = ICE_SET_FC_AQ_FAIL_GET;
+		goto out;
+	}
+
+	/* clear the old pause settings */
+	cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
+				   ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+	/* set the new capabilities */
+	cfg.caps |= pause_mask;
+	/* If the capabilities have changed, then set the new config */
+	if (cfg.caps != pcaps->caps) {
+		int retry_count, retry_max = 10;
+
+		/* Auto restart link so settings take effect */
+		if (atomic_restart)
+			cfg.caps |= ICE_AQ_PHY_ENA_ATOMIC_LINK;
+		/* Copy over all the old settings */
+		cfg.phy_type_low = pcaps->phy_type_low;
+		cfg.low_power_ctrl = pcaps->low_power_ctrl;
+		cfg.eee_cap = pcaps->eee_cap;
+		cfg.eeer_value = pcaps->eeer_value;
+		cfg.link_fec_opt = pcaps->link_fec_options;
+
+		status = ice_aq_set_phy_cfg(hw, pi->lport, &cfg, NULL);
+		if (status) {
+			*aq_failures = ICE_SET_FC_AQ_FAIL_SET;
+			goto out;
+		}
+
+		/* Update the link info
+		 * It sometimes takes a really long time for link to
+		 * come back from the atomic reset. Thus, we wait a
+		 * little bit.
+		 */
+		for (retry_count = 0; retry_count < retry_max; retry_count++) {
+			status = ice_update_link_info(pi);
+
+			if (!status)
+				break;
+
+			mdelay(100);
+		}
+
+		if (status)
+			*aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE;
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	return status;
+}
+
+/**
+ * ice_aq_set_link_restart_an
+ * @pi: pointer to the port information structure
+ * @ena_link: if true: enable link, if false: disable link
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ */
+enum ice_status
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_restart_an *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.restart_an;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an);
+
+	cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART;
+	cmd->lport_num = pi->lport;
+	if (ena_link)
+		cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE;
+	else
+		cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE;
+
+	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
 /**
  * __ice_aq_get_set_rss_lut
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 38719ba01d9b..c4213fbf9579 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -58,6 +58,11 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
 enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
 enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
 enum ice_status
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart);
+enum ice_status
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd);
+enum ice_status
 ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		     struct ice_link_status *link, struct ice_sq_cd *cd);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
new file mode 100644
index 000000000000..186764a5c263
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -0,0 +1,940 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* ethtool support for ice */
+
+#include "ice.h"
+
+struct ice_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define ICE_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+#define ICE_VSI_STAT(_name, _stat) \
+		ICE_STAT(struct ice_vsi, _name, _stat)
+#define ICE_PF_STAT(_name, _stat) \
+		ICE_STAT(struct ice_pf, _name, _stat)
+
+static int ice_q_stats_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return ((np->vsi->num_txq + np->vsi->num_rxq) *
+		(sizeof(struct ice_q_stats) / sizeof(u64)));
+}
+
+#define ICE_PF_STATS_LEN	ARRAY_SIZE(ice_gstrings_pf_stats)
+#define ICE_VSI_STATS_LEN	ARRAY_SIZE(ice_gstrings_vsi_stats)
+
+#define ICE_ALL_STATS_LEN(n)	(ICE_PF_STATS_LEN + ICE_VSI_STATS_LEN + \
+				 ice_q_stats_len(n))
+
+static const struct ice_stats ice_gstrings_vsi_stats[] = {
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
+	ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
+	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+};
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they aren't. This device is capable of supporting multiple
+ * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
+ * netdevs whereas the PF_STATs are for the physical function that's
+ * hosting these netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev.
+ */
+static struct ice_stats ice_gstrings_pf_stats[] = {
+	ICE_PF_STAT("tx_bytes", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_bytes", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_unicast", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_unicast", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_multicast", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_multicast", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_broadcast", stats.eth.tx_broadcast),
+	ICE_PF_STAT("rx_broadcast", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_errors", stats.eth.tx_errors),
+	ICE_PF_STAT("tx_size_64", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_64", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_127", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_127", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_255", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_255", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_511", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_511", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_1023", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1023", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1522", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_1522", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_big", stats.tx_size_big),
+	ICE_PF_STAT("rx_size_big", stats.rx_size_big),
+	ICE_PF_STAT("link_xon_tx", stats.link_xon_tx),
+	ICE_PF_STAT("link_xon_rx", stats.link_xon_rx),
+	ICE_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
+	ICE_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
+	ICE_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
+};
+
+static u32 ice_regs_dump_list[] = {
+	PFGEN_STATE,
+	PRTGEN_STATUS,
+	QRX_CTRL(0),
+	QINT_TQCTL(0),
+	QINT_RQCTL(0),
+	PFINT_OICR_ENA,
+	QRX_ITR(0),
+};
+
+/**
+ * ice_nvm_version_str - format the NVM version strings
+ * @hw: ptr to the hardware info
+ */
+static char *ice_nvm_version_str(struct ice_hw *hw)
+{
+	static char buf[ICE_ETHTOOL_FWVER_LEN];
+	u8 ver, patch;
+	u32 full_ver;
+	u16 build;
+
+	full_ver = hw->nvm.oem_ver;
+	ver = (u8)((full_ver & ICE_OEM_VER_MASK) >> ICE_OEM_VER_SHIFT);
+	build = (u16)((full_ver & ICE_OEM_VER_BUILD_MASK) >>
+		      ICE_OEM_VER_BUILD_SHIFT);
+	patch = (u8)(full_ver & ICE_OEM_VER_PATCH_MASK);
+
+	snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d",
+		 (hw->nvm.ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT,
+		 (hw->nvm.ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT,
+		 hw->nvm.eetrack, ver, build, patch);
+
+	return buf;
+}
+
+static void
+ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version));
+	strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw),
+		sizeof(drvinfo->fw_version));
+	strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
+		sizeof(drvinfo->bus_info));
+}
+
+static int ice_get_regs_len(struct net_device __always_unused *netdev)
+{
+	return ARRAY_SIZE(ice_regs_dump_list);
+}
+
+static void
+ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 *regs_buf = (u32 *)p;
+	int i;
+
+	regs->version = 1;
+
+	for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list) / sizeof(u32); ++i)
+		regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
+}
+
+static u32 ice_get_msglevel(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (pf->hw.debug_mask)
+		netdev_info(netdev, "hw debug_mask: 0x%llX\n",
+			    pf->hw.debug_mask);
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+	return pf->msg_enable;
+}
+
+static void ice_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (ICE_DBG_USER & data)
+		pf->hw.debug_mask = data;
+	else
+		pf->msg_enable = data;
+#else
+	pf->msg_enable = data;
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+}
+
+static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	char *p = (char *)data;
+	unsigned int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "%s",
+				 ice_gstrings_vsi_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+
+		ice_for_each_txq(vsi, i) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "tx-queue-%u.tx_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+
+		ice_for_each_rxq(vsi, i) {
+			snprintf(p, ETH_GSTRING_LEN,
+				 "rx-queue-%u.rx_packets", i);
+			p += ETH_GSTRING_LEN;
+			snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+
+		if (vsi->type != ICE_VSI_PF)
+			return;
+
+		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
+			snprintf(p, ETH_GSTRING_LEN, "port.%s",
+				 ice_gstrings_pf_stats[i].stat_string);
+			p += ETH_GSTRING_LEN;
+		}
+
+		break;
+	default:
+		break;
+	}
+}
+
+static int ice_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ICE_ALL_STATS_LEN(netdev);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+ice_get_ethtool_stats(struct net_device *netdev,
+		      struct ethtool_stats __always_unused *stats, u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+	unsigned int j = 0;
+	int i = 0;
+	char *p;
+
+	for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+		p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+			    sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	/* populate per queue stats */
+	rcu_read_lock();
+
+	ice_for_each_txq(vsi, j) {
+		ring = READ_ONCE(vsi->tx_rings[j]);
+		if (!ring)
+			continue;
+		data[i++] = ring->stats.pkts;
+		data[i++] = ring->stats.bytes;
+	}
+
+	ice_for_each_rxq(vsi, j) {
+		ring = READ_ONCE(vsi->rx_rings[j]);
+		data[i++] = ring->stats.pkts;
+		data[i++] = ring->stats.bytes;
+	}
+
+	rcu_read_unlock();
+
+	if (vsi->type != ICE_VSI_PF)
+		return;
+
+	for (j = 0; j < ICE_PF_STATS_LEN; j++) {
+		p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+}
+
+static int
+ice_get_link_ksettings(struct net_device *netdev,
+		       struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	struct ice_vsi *vsi = np->vsi;
+	bool link_up;
+
+	hw_link_info = &vsi->port_info->phy.link_info;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	ethtool_link_ksettings_add_link_mode(ks, supported,
+					     10000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(ks, advertising,
+					     10000baseT_Full);
+
+	/* set speed and duplex */
+	if (link_up) {
+		switch (hw_link_info->link_speed) {
+		case ICE_AQ_LINK_SPEED_100MB:
+			ks->base.speed = SPEED_100;
+			break;
+		case ICE_AQ_LINK_SPEED_2500MB:
+			ks->base.speed = SPEED_2500;
+			break;
+		case ICE_AQ_LINK_SPEED_5GB:
+			ks->base.speed = SPEED_5000;
+			break;
+		case ICE_AQ_LINK_SPEED_10GB:
+			ks->base.speed = SPEED_10000;
+			break;
+		case ICE_AQ_LINK_SPEED_25GB:
+			ks->base.speed = SPEED_25000;
+			break;
+		case ICE_AQ_LINK_SPEED_40GB:
+			ks->base.speed = SPEED_40000;
+			break;
+		default:
+			ks->base.speed = SPEED_UNKNOWN;
+			break;
+		}
+
+		ks->base.duplex = DUPLEX_FULL;
+	} else {
+		ks->base.speed = SPEED_UNKNOWN;
+		ks->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	/* set autoneg settings */
+	ks->base.autoneg = ((hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
+			    AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	/* set media type settings */
+	switch (vsi->port_info->phy.media_type) {
+	case ICE_MEDIA_FIBER:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ks->base.port = PORT_FIBRE;
+		break;
+	case ICE_MEDIA_BASET:
+		ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+		ks->base.port = PORT_TP;
+		break;
+	case ICE_MEDIA_BACKPLANE:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Backplane);
+		ks->base.port = PORT_NONE;
+		break;
+	case ICE_MEDIA_DA:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_DA;
+		break;
+	default:
+		ks->base.port = PORT_OTHER;
+		break;
+	}
+
+	/* flow control is symmetric and always supported */
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+	switch (vsi->port_info->fc.req_mode) {
+	case ICE_FC_FULL:
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		break;
+	case ICE_FC_TX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_RX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_PFC:
+	default:
+		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: buffer to rturn Rx flow classification rules
+ *
+ * Returns Success if the command is supported.
+ */
+static int ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+			 u32 __always_unused *rule_locs)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vsi->rss_size;
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void
+ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	ring->rx_max_pending = ICE_MAX_NUM_DESC;
+	ring->tx_max_pending = ICE_MAX_NUM_DESC;
+	ring->rx_pending = vsi->rx_rings[0]->count;
+	ring->tx_pending = vsi->tx_rings[0]->count;
+	ring->rx_mini_pending = ICE_MIN_NUM_DESC;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int
+ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
+{
+	struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int i, timeout = 50, err = 0;
+	u32 new_rx_cnt, new_tx_cnt;
+
+	if (ring->tx_pending > ICE_MAX_NUM_DESC ||
+	    ring->tx_pending < ICE_MIN_NUM_DESC ||
+	    ring->rx_pending > ICE_MAX_NUM_DESC ||
+	    ring->rx_pending < ICE_MIN_NUM_DESC) {
+		netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n",
+			   ring->tx_pending, ring->rx_pending,
+			   ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC);
+		return -EINVAL;
+	}
+
+	new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
+	new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
+
+	/* if nothing to do return success */
+	if (new_tx_cnt == vsi->tx_rings[0]->count &&
+	    new_rx_cnt == vsi->rx_rings[0]->count) {
+		netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+		return 0;
+	}
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		for (i = 0; i < vsi->alloc_txq; i++)
+			vsi->tx_rings[i]->count = new_tx_cnt;
+		for (i = 0; i < vsi->alloc_rxq; i++)
+			vsi->rx_rings[i]->count = new_rx_cnt;
+		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
+		goto done;
+	}
+
+	if (new_tx_cnt == vsi->tx_rings[0]->count)
+		goto process_rx;
+
+	/* alloc updated Tx resources */
+	netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
+		    vsi->tx_rings[0]->count, new_tx_cnt);
+
+	tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+				sizeof(struct ice_ring), GFP_KERNEL);
+	if (!tx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < vsi->num_txq; i++) {
+		/* clone ring and setup updated count */
+		tx_rings[i] = *vsi->tx_rings[i];
+		tx_rings[i].count = new_tx_cnt;
+		tx_rings[i].desc = NULL;
+		tx_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&tx_rings[i]);
+		if (err) {
+			while (i) {
+				i--;
+				ice_clean_tx_ring(&tx_rings[i]);
+			}
+			devm_kfree(&pf->pdev->dev, tx_rings);
+			goto done;
+		}
+	}
+
+process_rx:
+	if (new_rx_cnt == vsi->rx_rings[0]->count)
+		goto process_link;
+
+	/* alloc updated Rx resources */
+	netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
+		    vsi->rx_rings[0]->count, new_rx_cnt);
+
+	rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+				sizeof(struct ice_ring), GFP_KERNEL);
+	if (!rx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < vsi->num_rxq; i++) {
+		/* clone ring and setup updated count */
+		rx_rings[i] = *vsi->rx_rings[i];
+		rx_rings[i].count = new_rx_cnt;
+		rx_rings[i].desc = NULL;
+		rx_rings[i].rx_buf = NULL;
+		/* this is to allow wr32 to have something to write to
+		 * during early allocation of Rx buffers
+		 */
+		rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS;
+
+		err = ice_setup_rx_ring(&rx_rings[i]);
+		if (err)
+			goto rx_unwind;
+
+		/* allocate Rx buffers */
+		err = ice_alloc_rx_bufs(&rx_rings[i],
+					ICE_DESC_UNUSED(&rx_rings[i]));
+rx_unwind:
+		if (err) {
+			while (i) {
+				i--;
+				ice_free_rx_ring(&rx_rings[i]);
+			}
+			devm_kfree(&pf->pdev->dev, rx_rings);
+			err = -ENOMEM;
+			goto free_tx;
+		}
+	}
+
+process_link:
+	/* Bring interface down, copy in the new ring info, then restore the
+	 * interface. if VSI is up, bring it down and then back up
+	 */
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		ice_down(vsi);
+
+		if (tx_rings) {
+			for (i = 0; i < vsi->alloc_txq; i++) {
+				ice_free_tx_ring(vsi->tx_rings[i]);
+				*vsi->tx_rings[i] = tx_rings[i];
+			}
+			devm_kfree(&pf->pdev->dev, tx_rings);
+		}
+
+		if (rx_rings) {
+			for (i = 0; i < vsi->alloc_rxq; i++) {
+				ice_free_rx_ring(vsi->rx_rings[i]);
+				/* copy the real tail offset */
+				rx_rings[i].tail = vsi->rx_rings[i]->tail;
+				/* this is to fake out the allocation routine
+				 * into thinking it has to realloc everything
+				 * but the recycling logic will let us re-use
+				 * the buffers allocated above
+				 */
+				rx_rings[i].next_to_use = 0;
+				rx_rings[i].next_to_clean = 0;
+				rx_rings[i].next_to_alloc = 0;
+				*vsi->rx_rings[i] = rx_rings[i];
+			}
+			devm_kfree(&pf->pdev->dev, rx_rings);
+		}
+
+		ice_up(vsi);
+	}
+	goto done;
+
+free_tx:
+	/* error cleanup if the Rx allocations failed after getting Tx */
+	if (tx_rings) {
+		for (i = 0; i < vsi->alloc_txq; i++)
+			ice_free_tx_ring(&tx_rings[i]);
+		devm_kfree(&pf->pdev->dev, tx_rings);
+	}
+
+done:
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+static int ice_nway_reset(struct net_device *netdev)
+{
+	/* restart autonegotiation */
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	bool link_up;
+
+	pi = vsi->port_info;
+	hw_link_info = &pi->phy.link_info;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	status = ice_aq_set_link_restart_an(pi, link_up, NULL);
+	if (status) {
+		netdev_info(netdev, "link restart failed, err %d aq_err %d\n",
+			    status, pi->hw->adminq.sq_last_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_pauseparam - Get Flow Control status
+ * @netdev: network interface device structure
+ * @pause: ethernet pause (flow control) parameters
+ */
+static void
+ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi;
+
+	pi = np->vsi->port_info;
+	pause->autoneg =
+		((pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) ?
+		 AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (pi->fc.current_mode == ICE_FC_RX_PAUSE) {
+		pause->rx_pause = 1;
+	} else if (pi->fc.current_mode == ICE_FC_TX_PAUSE) {
+		pause->tx_pause = 1;
+	} else if (pi->fc.current_mode == ICE_FC_FULL) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+/**
+ * ice_set_pauseparam - Set Flow Control parameter
+ * @netdev: network interface device structure
+ * @pause: return tx/rx flow control status
+ */
+static int
+ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_link_status *hw_link_info;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_port_info *pi;
+	enum ice_status status;
+	u8 aq_failures;
+	bool link_up;
+	int err = 0;
+
+	pi = vsi->port_info;
+	hw_link_info = &pi->phy.link_info;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	/* Changing the port's flow control is not supported if this isn't the
+	 * PF VSI
+	 */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (pause->autoneg != (hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If we have link and don't have autoneg */
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+		/* Send message that it might not necessarily work*/
+		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
+	}
+
+	if (pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_FULL;
+	else if (pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_RX_PAUSE;
+	else if (!pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_TX_PAUSE;
+	else if (!pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_NONE;
+	else
+		return -EINVAL;
+
+	/* Tell the OS link is going down, the link will go back up when fw
+	 * says it is ready asynchronously
+	 */
+	ice_print_link_msg(vsi, false);
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	/* Set the FC mode and only restart AN if link is up */
+	status = ice_set_fc(pi, &aq_failures, link_up);
+
+	if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
+		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n",
+			    status, hw->adminq.sq_last_status);
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
+		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n",
+			    status, hw->adminq.sq_last_status);
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
+		netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n",
+			    status, hw->adminq.sq_last_status);
+		err = -EAGAIN;
+	}
+
+	if (!test_bit(__ICE_DOWN, pf->state)) {
+		/* Give it a little more time to try to come back */
+		msleep(75);
+		if (!test_bit(__ICE_DOWN, pf->state))
+			return ice_nway_reset(netdev);
+	}
+
+	return err;
+}
+
+/**
+ * ice_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev)
+{
+	return ICE_VSIQF_HKEY_ARRAY_SIZE;
+}
+
+/**
+ * ice_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->rss_table_size;
+}
+
+/**
+ * ice_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware.
+ */
+static int
+ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret = 0, i;
+	u8 *lut;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	for (i = 0; i < vsi->rss_table_size; i++)
+		indir[i] = (u32)(lut[i]);
+
+out:
+	devm_kfree(&pf->pdev->dev, lut);
+	return ret;
+}
+
+/**
+ * ice_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * returns 0 after programming the table.
+ */
+static int ice_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u8 *seed = NULL;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	if (key) {
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user =
+				devm_kzalloc(&pf->pdev->dev,
+					     ICE_VSIQF_HKEY_ARRAY_SIZE,
+					     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
+		}
+		memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
+		seed = vsi->rss_hkey_user;
+	}
+
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev,
+						 vsi->rss_table_size,
+						 GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
+	}
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	if (indir) {
+		int i;
+
+		for (i = 0; i < vsi->rss_table_size; i++)
+			vsi->rss_lut_user[i] = (u8)(indir[i]);
+	} else {
+		ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size,
+				 vsi->rss_size);
+	}
+
+	if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size))
+		return -EIO;
+
+	return 0;
+}
+
+static const struct ethtool_ops ice_ethtool_ops = {
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.get_drvinfo            = ice_get_drvinfo,
+	.get_regs_len           = ice_get_regs_len,
+	.get_regs               = ice_get_regs,
+	.get_msglevel           = ice_get_msglevel,
+	.set_msglevel           = ice_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_strings		= ice_get_strings,
+	.get_ethtool_stats      = ice_get_ethtool_stats,
+	.get_sset_count		= ice_get_sset_count,
+	.get_rxnfc		= ice_get_rxnfc,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+	.get_pauseparam		= ice_get_pauseparam,
+	.set_pauseparam		= ice_set_pauseparam,
+	.get_rxfh_key_size	= ice_get_rxfh_key_size,
+	.get_rxfh_indir_size	= ice_get_rxfh_indir_size,
+	.get_rxfh		= ice_get_rxfh,
+	.set_rxfh		= ice_set_rxfh,
+};
+
+/**
+ * ice_set_ethtool_ops - setup netdev ethtool ops
+ * @netdev: network interface device structure
+ *
+ * setup netdev ethtool ops with ice specific ops
+ */
+void ice_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index fc9b0b179e99..777a1c653edb 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -94,6 +94,8 @@
 #define PFGEN_CTRL			0x00091000
 #define PFGEN_CTRL_PFSWR_S		0
 #define PFGEN_CTRL_PFSWR_M		BIT(PFGEN_CTRL_PFSWR_S)
+#define PFGEN_STATE			0x00088000
+#define PRTGEN_STATUS			0x000B8100
 #define PFHMC_ERRORDATA			0x00520500
 #define PFHMC_ERRORINFO			0x00520400
 #define GLINT_DYN_CTL(_INT)		(0x00160000 + ((_INT) * 4))
@@ -165,6 +167,7 @@
 #define QRX_CTRL_QENA_REQ_M		BIT(QRX_CTRL_QENA_REQ_S)
 #define QRX_CTRL_QENA_STAT_S		2
 #define QRX_CTRL_QENA_STAT_M		BIT(QRX_CTRL_QENA_STAT_S)
+#define QRX_ITR(_QRX)			(0x00292000 + ((_QRX) * 4))
 #define QRX_TAIL(_QRX)			(0x00290000 + ((_QRX) * 4))
 #define GLNVM_FLA			0x000B6108
 #define GLNVM_FLA_LOCKED_S		6
@@ -180,5 +183,82 @@
 #define PF_FUNC_RID			0x0009E880
 #define PF_FUNC_RID_FUNC_NUM_S		0
 #define PF_FUNC_RID_FUNC_NUM_M		ICE_M(0x7, PF_FUNC_RID_FUNC_NUM_S)
+#define GLPRT_BPRCH(_i)			(0x00381384 + ((_i) * 8))
+#define GLPRT_BPRCL(_i)			(0x00381380 + ((_i) * 8))
+#define GLPRT_BPTCH(_i)			(0x00381244 + ((_i) * 8))
+#define GLPRT_BPTCL(_i)			(0x00381240 + ((_i) * 8))
+#define GLPRT_CRCERRS(_i)		(0x00380100 + ((_i) * 8))
+#define GLPRT_GORCH(_i)			(0x00380004 + ((_i) * 8))
+#define GLPRT_GORCL(_i)			(0x00380000 + ((_i) * 8))
+#define GLPRT_GOTCH(_i)			(0x00380B44 + ((_i) * 8))
+#define GLPRT_GOTCL(_i)			(0x00380B40 + ((_i) * 8))
+#define GLPRT_ILLERRC(_i)		(0x003801C0 + ((_i) * 8))
+#define GLPRT_LXOFFRXC(_i)		(0x003802C0 + ((_i) * 8))
+#define GLPRT_LXOFFTXC(_i)		(0x00381180 + ((_i) * 8))
+#define GLPRT_LXONRXC(_i)		(0x00380280 + ((_i) * 8))
+#define GLPRT_LXONTXC(_i)		(0x00381140 + ((_i) * 8))
+#define GLPRT_MLFC(_i)			(0x00380040 + ((_i) * 8))
+#define GLPRT_MPRCH(_i)			(0x00381344 + ((_i) * 8))
+#define GLPRT_MPRCL(_i)			(0x00381340 + ((_i) * 8))
+#define GLPRT_MPTCH(_i)			(0x00381204 + ((_i) * 8))
+#define GLPRT_MPTCL(_i)			(0x00381200 + ((_i) * 8))
+#define GLPRT_MRFC(_i)			(0x00380080 + ((_i) * 8))
+#define GLPRT_PRC1023H(_i)		(0x00380A04 + ((_i) * 8))
+#define GLPRT_PRC1023L(_i)		(0x00380A00 + ((_i) * 8))
+#define GLPRT_PRC127H(_i)		(0x00380944 + ((_i) * 8))
+#define GLPRT_PRC127L(_i)		(0x00380940 + ((_i) * 8))
+#define GLPRT_PRC1522H(_i)		(0x00380A44 + ((_i) * 8))
+#define GLPRT_PRC1522L(_i)		(0x00380A40 + ((_i) * 8))
+#define GLPRT_PRC255H(_i)		(0x00380984 + ((_i) * 8))
+#define GLPRT_PRC255L(_i)		(0x00380980 + ((_i) * 8))
+#define GLPRT_PRC511H(_i)		(0x003809C4 + ((_i) * 8))
+#define GLPRT_PRC511L(_i)		(0x003809C0 + ((_i) * 8))
+#define GLPRT_PRC64H(_i)		(0x00380904 + ((_i) * 8))
+#define GLPRT_PRC64L(_i)		(0x00380900 + ((_i) * 8))
+#define GLPRT_PRC9522H(_i)		(0x00380A84 + ((_i) * 8))
+#define GLPRT_PRC9522L(_i)		(0x00380A80 + ((_i) * 8))
+#define GLPRT_PTC1023H(_i)		(0x00380C84 + ((_i) * 8))
+#define GLPRT_PTC1023L(_i)		(0x00380C80 + ((_i) * 8))
+#define GLPRT_PTC127H(_i)		(0x00380BC4 + ((_i) * 8))
+#define GLPRT_PTC127L(_i)		(0x00380BC0 + ((_i) * 8))
+#define GLPRT_PTC1522H(_i)		(0x00380CC4 + ((_i) * 8))
+#define GLPRT_PTC1522L(_i)		(0x00380CC0 + ((_i) * 8))
+#define GLPRT_PTC255H(_i)		(0x00380C04 + ((_i) * 8))
+#define GLPRT_PTC255L(_i)		(0x00380C00 + ((_i) * 8))
+#define GLPRT_PTC511H(_i)		(0x00380C44 + ((_i) * 8))
+#define GLPRT_PTC511L(_i)		(0x00380C40 + ((_i) * 8))
+#define GLPRT_PTC64H(_i)		(0x00380B84 + ((_i) * 8))
+#define GLPRT_PTC64L(_i)		(0x00380B80 + ((_i) * 8))
+#define GLPRT_PTC9522H(_i)		(0x00380D04 + ((_i) * 8))
+#define GLPRT_PTC9522L(_i)		(0x00380D00 + ((_i) * 8))
+#define GLPRT_RFC(_i)			(0x00380AC0 + ((_i) * 8))
+#define GLPRT_RJC(_i)			(0x00380B00 + ((_i) * 8))
+#define GLPRT_RLEC(_i)			(0x00380140 + ((_i) * 8))
+#define GLPRT_ROC(_i)			(0x00380240 + ((_i) * 8))
+#define GLPRT_RUC(_i)			(0x00380200 + ((_i) * 8))
+#define GLPRT_TDOLD(_i)			(0x00381280 + ((_i) * 8))
+#define GLPRT_UPRCH(_i)			(0x00381304 + ((_i) * 8))
+#define GLPRT_UPRCL(_i)			(0x00381300 + ((_i) * 8))
+#define GLPRT_UPTCH(_i)			(0x003811C4 + ((_i) * 8))
+#define GLPRT_UPTCL(_i)			(0x003811C0 + ((_i) * 8))
+#define GLV_BPRCH(_i)			(0x003B6004 + ((_i) * 8))
+#define GLV_BPRCL(_i)			(0x003B6000 + ((_i) * 8))
+#define GLV_BPTCH(_i)			(0x0030E004 + ((_i) * 8))
+#define GLV_BPTCL(_i)			(0x0030E000 + ((_i) * 8))
+#define GLV_GORCH(_i)			(0x003B0004 + ((_i) * 8))
+#define GLV_GORCL(_i)			(0x003B0000 + ((_i) * 8))
+#define GLV_GOTCH(_i)			(0x00300004 + ((_i) * 8))
+#define GLV_GOTCL(_i)			(0x00300000 + ((_i) * 8))
+#define GLV_MPRCH(_i)			(0x003B4004 + ((_i) * 8))
+#define GLV_MPRCL(_i)			(0x003B4000 + ((_i) * 8))
+#define GLV_MPTCH(_i)			(0x0030C004 + ((_i) * 8))
+#define GLV_MPTCL(_i)			(0x0030C000 + ((_i) * 8))
+#define GLV_RDPC(_i)			(0x00294C04 + ((_i) * 4))
+#define GLV_TEPC(_VSI)			(0x00312000 + ((_VSI) * 4))
+#define GLV_UPRCH(_i)			(0x003B2004 + ((_i) * 8))
+#define GLV_UPRCL(_i)			(0x003B2000 + ((_i) * 8))
+#define GLV_UPTCH(_i)			(0x0030A004 + ((_i) * 8))
+#define GLV_UPTCL(_i)			(0x0030A000 + ((_i) * 8))
+#define VSIQF_HKEY_MAX_INDEX		12
 
 #endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2124e7ad640c..9c1841700314 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -9,7 +9,7 @@
 
 #define DRV_VERSION	"ice-0.0.1-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
-static const char ice_drv_ver[] = DRV_VERSION;
+const char ice_drv_ver[] = DRV_VERSION;
 static const char ice_driver_string[] = DRV_SUMMARY;
 static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
 
@@ -30,6 +30,8 @@ static struct workqueue_struct *ice_wq;
 static const struct net_device_ops ice_netdev_ops;
 
 static int ice_vsi_release(struct ice_vsi *vsi);
+static void ice_update_vsi_stats(struct ice_vsi *vsi);
+static void ice_update_pf_stats(struct ice_pf *pf);
 
 /**
  * ice_get_free_slot - get the next non-NULL location index in array
@@ -214,12 +216,41 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h)
 	}
 }
 
+/**
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
+ */
+static void ice_watchdog_subtask(struct ice_pf *pf)
+{
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* make sure we don't do these things too often */
+	if (time_before(jiffies,
+			pf->serv_tmr_prev + pf->serv_tmr_period))
+		return;
+
+	pf->serv_tmr_prev = jiffies;
+
+	/* Update the stats for active netdevs so the network stack
+	 * can look at updated numbers whenever it cares to
+	 */
+	ice_update_pf_stats(pf);
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i] && pf->vsi[i]->netdev)
+			ice_update_vsi_stats(pf->vsi[i]);
+}
+
 /**
  * ice_print_link_msg - print link up or down message
  * @vsi: the VSI whose link status is being queried
  * @isup: boolean for if the link is now up or down
  */
-static void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 {
 	const char *speed;
 	const char *fc;
@@ -452,6 +483,7 @@ static void ice_service_task(struct work_struct *work)
 	unsigned long start_time = jiffies;
 
 	/* subtasks */
+	ice_watchdog_subtask(pf);
 	ice_clean_adminq_subtask(pf);
 
 	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
@@ -1763,6 +1795,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	/* setup watchdog timeout value to be 5 second */
 	netdev->watchdog_timeo = 5 * HZ;
 
+	ice_set_ethtool_ops(netdev);
+
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = ICE_MAX_MTU;
 
@@ -3459,6 +3493,434 @@ static int ice_up_complete(struct ice_vsi *vsi)
 	return err;
 }
 
+/**
+ * ice_up - Bring the connection back up after being down
+ * @vsi: VSI being configured
+ */
+int ice_up(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_vsi_cfg(vsi);
+	if (!err)
+		err = ice_up_complete(vsi);
+
+	return err;
+}
+
+/**
+ * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
+ * @ring: Tx or Rx ring to read stats from
+ * @pkts: packets stats counter
+ * @bytes: bytes stats counter
+ *
+ * This function fetches stats from the ring considering the atomic operations
+ * that needs to be performed to read u64 values in 32 bit machine.
+ */
+static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts,
+					 u64 *bytes)
+{
+	unsigned int start;
+	*pkts = 0;
+	*bytes = 0;
+
+	if (!ring)
+		return;
+	do {
+		start = u64_stats_fetch_begin_irq(&ring->syncp);
+		*pkts = ring->stats.pkts;
+		*bytes = ring->stats.bytes;
+	} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+}
+
+/**
+ * ice_stat_update40 - read 40 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @hireg: high 32 bit HW register to read from
+ * @loreg: low 32 bit HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+static void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg,
+			      bool prev_stat_loaded, u64 *prev_stat,
+			      u64 *cur_stat)
+{
+	u64 new_data;
+
+	new_data = rd32(hw, loreg);
+	new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. So save the first values read and use them as
+	 * offsets to be subtracted from the raw values in order to report stats
+	 * that count from zero.
+	 */
+	if (!prev_stat_loaded)
+		*prev_stat = new_data;
+	if (likely(new_data >= *prev_stat))
+		*cur_stat = new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat = (new_data + BIT_ULL(40)) - *prev_stat;
+	*cur_stat &= 0xFFFFFFFFFFULL;
+}
+
+/**
+ * ice_stat_update32 - read 32 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+static void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+			      u64 *prev_stat, u64 *cur_stat)
+{
+	u32 new_data;
+
+	new_data = rd32(hw, reg);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. So save the first values read and use them as
+	 * offsets to be subtracted from the raw values in order to report stats
+	 * that count from zero.
+	 */
+	if (!prev_stat_loaded)
+		*prev_stat = new_data;
+	if (likely(new_data >= *prev_stat))
+		*cur_stat = new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat = (new_data + BIT_ULL(32)) - *prev_stat;
+}
+
+/**
+ * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_eth_stats(struct ice_vsi *vsi)
+{
+	struct ice_eth_stats *prev_es, *cur_es;
+	struct ice_hw *hw = &vsi->back->hw;
+	u16 vsi_num = vsi->vsi_num;    /* HW absolute index of a VSI */
+
+	prev_es = &vsi->eth_stats_prev;
+	cur_es = &vsi->eth_stats;
+
+	ice_stat_update40(hw, GLV_GORCH(vsi_num), GLV_GORCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_bytes,
+			  &cur_es->rx_bytes);
+
+	ice_stat_update40(hw, GLV_UPRCH(vsi_num), GLV_UPRCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_unicast,
+			  &cur_es->rx_unicast);
+
+	ice_stat_update40(hw, GLV_MPRCH(vsi_num), GLV_MPRCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_multicast,
+			  &cur_es->rx_multicast);
+
+	ice_stat_update40(hw, GLV_BPRCH(vsi_num), GLV_BPRCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->rx_broadcast,
+			  &cur_es->rx_broadcast);
+
+	ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_discards, &cur_es->rx_discards);
+
+	ice_stat_update40(hw, GLV_GOTCH(vsi_num), GLV_GOTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_bytes,
+			  &cur_es->tx_bytes);
+
+	ice_stat_update40(hw, GLV_UPTCH(vsi_num), GLV_UPTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_unicast,
+			  &cur_es->tx_unicast);
+
+	ice_stat_update40(hw, GLV_MPTCH(vsi_num), GLV_MPTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_multicast,
+			  &cur_es->tx_multicast);
+
+	ice_stat_update40(hw, GLV_BPTCH(vsi_num), GLV_BPTCL(vsi_num),
+			  vsi->stat_offsets_loaded, &prev_es->tx_broadcast,
+			  &cur_es->tx_broadcast);
+
+	ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_errors, &cur_es->tx_errors);
+
+	vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * ice_update_vsi_ring_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
+	struct ice_ring *ring;
+	u64 pkts, bytes;
+	int i;
+
+	/* reset netdev stats */
+	vsi_stats->tx_packets = 0;
+	vsi_stats->tx_bytes = 0;
+	vsi_stats->rx_packets = 0;
+	vsi_stats->rx_bytes = 0;
+
+	/* reset non-netdev (extended) stats */
+	vsi->tx_restart = 0;
+	vsi->tx_busy = 0;
+	vsi->tx_linearize = 0;
+	vsi->rx_buf_failed = 0;
+	vsi->rx_page_failed = 0;
+
+	rcu_read_lock();
+
+	/* update Tx rings counters */
+	ice_for_each_txq(vsi, i) {
+		ring = READ_ONCE(vsi->tx_rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->tx_packets += pkts;
+		vsi_stats->tx_bytes += bytes;
+		vsi->tx_restart += ring->tx_stats.restart_q;
+		vsi->tx_busy += ring->tx_stats.tx_busy;
+		vsi->tx_linearize += ring->tx_stats.tx_linearize;
+	}
+
+	/* update Rx rings counters */
+	ice_for_each_rxq(vsi, i) {
+		ring = READ_ONCE(vsi->rx_rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->rx_packets += pkts;
+		vsi_stats->rx_bytes += bytes;
+		vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
+		vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
+	}
+
+	rcu_read_unlock();
+}
+
+/**
+ * ice_update_vsi_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
+	struct ice_eth_stats *cur_es = &vsi->eth_stats;
+	struct ice_pf *pf = vsi->back;
+
+	if (test_bit(__ICE_DOWN, vsi->state) ||
+	    test_bit(__ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* get stats as recorded by Tx/Rx rings */
+	ice_update_vsi_ring_stats(vsi);
+
+	/* get VSI stats as recorded by the hardware */
+	ice_update_eth_stats(vsi);
+
+	cur_ns->tx_errors = cur_es->tx_errors;
+	cur_ns->rx_dropped = cur_es->rx_discards;
+	cur_ns->tx_dropped = cur_es->tx_discards;
+	cur_ns->multicast = cur_es->rx_multicast;
+
+	/* update some more netdev stats if this is main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		cur_ns->rx_crc_errors = pf->stats.crc_errors;
+		cur_ns->rx_errors = pf->stats.crc_errors +
+				    pf->stats.illegal_bytes;
+		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
+	}
+}
+
+/**
+ * ice_update_pf_stats - Update PF port stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+static void ice_update_pf_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u8 pf_id;
+
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+	pf_id = hw->pf_id;
+
+	ice_stat_update40(hw, GLPRT_GORCH(pf_id), GLPRT_GORCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_bytes,
+			  &cur_ps->eth.rx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPRCH(pf_id), GLPRT_UPRCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_unicast,
+			  &cur_ps->eth.rx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPRCH(pf_id), GLPRT_MPRCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_multicast,
+			  &cur_ps->eth.rx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPRCH(pf_id), GLPRT_BPRCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.rx_broadcast,
+			  &cur_ps->eth.rx_broadcast);
+
+	ice_stat_update40(hw, GLPRT_GOTCH(pf_id), GLPRT_GOTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_bytes,
+			  &cur_ps->eth.tx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPTCH(pf_id), GLPRT_UPTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_unicast,
+			  &cur_ps->eth.tx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPTCH(pf_id), GLPRT_MPTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_multicast,
+			  &cur_ps->eth.tx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPTCH(pf_id), GLPRT_BPTCL(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->eth.tx_broadcast,
+			  &cur_ps->eth.tx_broadcast);
+
+	ice_stat_update32(hw, GLPRT_TDOLD(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_dropped_link_down,
+			  &cur_ps->tx_dropped_link_down);
+
+	ice_stat_update40(hw, GLPRT_PRC64H(pf_id), GLPRT_PRC64L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_64,
+			  &cur_ps->rx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PRC127H(pf_id), GLPRT_PRC127L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_127,
+			  &cur_ps->rx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PRC255H(pf_id), GLPRT_PRC255L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_255,
+			  &cur_ps->rx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PRC511H(pf_id), GLPRT_PRC511L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->rx_size_511,
+			  &cur_ps->rx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PRC1023H(pf_id),
+			  GLPRT_PRC1023L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PRC1522H(pf_id),
+			  GLPRT_PRC1522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PRC9522H(pf_id),
+			  GLPRT_PRC9522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
+
+	ice_stat_update40(hw, GLPRT_PTC64H(pf_id), GLPRT_PTC64L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_64,
+			  &cur_ps->tx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PTC127H(pf_id), GLPRT_PTC127L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_127,
+			  &cur_ps->tx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PTC255H(pf_id), GLPRT_PTC255L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_255,
+			  &cur_ps->tx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PTC511H(pf_id), GLPRT_PTC511L(pf_id),
+			  pf->stat_prev_loaded, &prev_ps->tx_size_511,
+			  &cur_ps->tx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PTC1023H(pf_id),
+			  GLPRT_PTC1023L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PTC1522H(pf_id),
+			  GLPRT_PTC1522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PTC9522H(pf_id),
+			  GLPRT_PTC9522L(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
+
+	ice_stat_update32(hw, GLPRT_LXONRXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFRXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
+
+	ice_stat_update32(hw, GLPRT_LXONTXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFTXC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
+
+	ice_stat_update32(hw, GLPRT_CRCERRS(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->crc_errors, &cur_ps->crc_errors);
+
+	ice_stat_update32(hw, GLPRT_ILLERRC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
+
+	ice_stat_update32(hw, GLPRT_MLFC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->mac_local_faults,
+			  &cur_ps->mac_local_faults);
+
+	ice_stat_update32(hw, GLPRT_MRFC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->mac_remote_faults,
+			  &cur_ps->mac_remote_faults);
+
+	ice_stat_update32(hw, GLPRT_RLEC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
+
+	ice_stat_update32(hw, GLPRT_RUC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
+
+	ice_stat_update32(hw, GLPRT_RFC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
+
+	ice_stat_update32(hw, GLPRT_ROC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
+
+	ice_stat_update32(hw, GLPRT_RJC(pf_id), pf->stat_prev_loaded,
+			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
+
+	pf->stat_prev_loaded = true;
+}
+
+/**
+ * ice_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static
+void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct rtnl_link_stats64 *vsi_stats;
+	struct ice_vsi *vsi = np->vsi;
+
+	vsi_stats = &vsi->net_stats;
+
+	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->num_txq || !vsi->num_rxq)
+		return;
+	/* netdev packet/byte stats come from ring counter. These are obtained
+	 * by summing up ring counters (done by ice_update_vsi_ring_stats).
+	 */
+	ice_update_vsi_ring_stats(vsi);
+	stats->tx_packets = vsi_stats->tx_packets;
+	stats->tx_bytes = vsi_stats->tx_bytes;
+	stats->rx_packets = vsi_stats->rx_packets;
+	stats->rx_bytes = vsi_stats->rx_bytes;
+
+	/* The rest of the stats can be read from the hardware but instead we
+	 * just return values that the watchdog task has already obtained from
+	 * the hardware.
+	 */
+	stats->multicast = vsi_stats->multicast;
+	stats->tx_errors = vsi_stats->tx_errors;
+	stats->tx_dropped = vsi_stats->tx_dropped;
+	stats->rx_errors = vsi_stats->rx_errors;
+	stats->rx_dropped = vsi_stats->rx_dropped;
+	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
+	stats->rx_length_errors = vsi_stats->rx_length_errors;
+}
+
 /**
  * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
  * @vsi: VSI having NAPI disabled
@@ -3478,7 +3940,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi)
  * ice_down - Shutdown the connection
  * @vsi: The VSI being stopped
  */
-static int ice_down(struct ice_vsi *vsi)
+int ice_down(struct ice_vsi *vsi)
 {
 	int i, err;
 
@@ -3878,6 +4340,7 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_open = ice_open,
 	.ndo_stop = ice_stop,
 	.ndo_start_xmit = ice_start_xmit,
+	.ndo_get_stats64 = ice_get_stats64,
 	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
 	.ndo_set_features = ice_set_features,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 991ac56ca7b3..c7e0de3b8662 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -20,6 +20,7 @@
 #define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
 #define ICE_DBG_AQ_CMD		BIT_ULL(27)
+#define ICE_DBG_USER		BIT_ULL(31)
 
 enum ice_aq_res_ids {
 	ICE_NVM_RES_ID = 1,
@@ -42,6 +43,13 @@ enum ice_fc_mode {
 	ICE_FC_DFLT
 };
 
+enum ice_set_fc_aq_failures {
+	ICE_SET_FC_AQ_FAIL_NONE = 0,
+	ICE_SET_FC_AQ_FAIL_GET,
+	ICE_SET_FC_AQ_FAIL_SET,
+	ICE_SET_FC_AQ_FAIL_UPDATE
+};
+
 /* Various MAC types */
 enum ice_mac_type {
 	ICE_MAC_UNKNOWN = 0,
@@ -301,10 +309,72 @@ struct ice_hw {
 
 };
 
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct ice_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+};
+
+/* Statistics collected by the MAC */
+struct ice_hw_port_stats {
+	/* eth stats collected by the port */
+	struct ice_eth_stats eth;
+	/* additional port specific stats */
+	u64 tx_dropped_link_down;	/* tdold */
+	u64 crc_errors;			/* crcerrs */
+	u64 illegal_bytes;		/* illerrc */
+	u64 error_bytes;		/* errbc */
+	u64 mac_local_faults;		/* mlfc */
+	u64 mac_remote_faults;		/* mrfc */
+	u64 rx_len_errors;		/* rlec */
+	u64 link_xon_rx;		/* lxonrxc */
+	u64 link_xoff_rx;		/* lxoffrxc */
+	u64 link_xon_tx;		/* lxontxc */
+	u64 link_xoff_tx;		/* lxofftxc */
+	u64 rx_size_64;			/* prc64 */
+	u64 rx_size_127;		/* prc127 */
+	u64 rx_size_255;		/* prc255 */
+	u64 rx_size_511;		/* prc511 */
+	u64 rx_size_1023;		/* prc1023 */
+	u64 rx_size_1522;		/* prc1522 */
+	u64 rx_size_big;		/* prc9522 */
+	u64 rx_undersize;		/* ruc */
+	u64 rx_fragments;		/* rfc */
+	u64 rx_oversize;		/* roc */
+	u64 rx_jabber;			/* rjc */
+	u64 tx_size_64;			/* ptc64 */
+	u64 tx_size_127;		/* ptc127 */
+	u64 tx_size_255;		/* ptc255 */
+	u64 tx_size_511;		/* ptc511 */
+	u64 tx_size_1023;		/* ptc1023 */
+	u64 tx_size_1522;		/* ptc1522 */
+	u64 tx_size_big;		/* ptc9522 */
+};
+
 /* Checksum and Shadow RAM pointers */
 #define ICE_SR_NVM_DEV_STARTER_VER	0x18
 #define ICE_SR_NVM_EETRACK_LO		0x2D
 #define ICE_SR_NVM_EETRACK_HI		0x2E
+#define ICE_NVM_VER_LO_SHIFT		0
+#define ICE_NVM_VER_LO_MASK		(0xff << ICE_NVM_VER_LO_SHIFT)
+#define ICE_NVM_VER_HI_SHIFT		12
+#define ICE_NVM_VER_HI_MASK		(0xf << ICE_NVM_VER_HI_SHIFT)
+#define ICE_OEM_VER_PATCH_SHIFT		0
+#define ICE_OEM_VER_PATCH_MASK		(0xff << ICE_OEM_VER_PATCH_SHIFT)
+#define ICE_OEM_VER_BUILD_SHIFT		8
+#define ICE_OEM_VER_BUILD_MASK		(0xffff << ICE_OEM_VER_BUILD_SHIFT)
+#define ICE_OEM_VER_SHIFT		24
+#define ICE_OEM_VER_MASK		(0xff << ICE_OEM_VER_SHIFT)
 #define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
 #define ICE_SR_WORDS_IN_1KB		512
 

From 5513b920a4f7869acffc8e6d800aebee608409e0 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:17 -0700
Subject: [PATCH 1263/1678] ice: Update Tx scheduler tree for VSI multi-Tx
 queue support

This patch adds the ability for a VSI to use multiple Tx queues. More
specifically, the patch
    1) Provides the ability to update the Tx scheduler tree in the
       firmware. The driver can configure the Tx scheduler tree by
       adding/removing multiple Tx queues per TC per VSI.

    2) Allows a VSI to reconfigure its Tx queues during runtime.

    3) Synchronizes the Tx scheduler update operations using locks.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |   7 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  28 +
 drivers/net/ethernet/intel/ice/ice_common.c   |  54 ++
 drivers/net/ethernet/intel/ice/ice_common.h   |   3 +
 drivers/net/ethernet/intel/ice/ice_main.c     |  20 +-
 drivers/net/ethernet/intel/ice/ice_sched.c    | 886 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_sched.h    |   4 +
 drivers/net/ethernet/intel/ice/ice_type.h     |   7 +
 8 files changed, 1006 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 66d0976afa46..8898255e5a65 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -42,6 +42,7 @@ extern const char ice_drv_ver[];
 #define ICE_MIN_NUM_DESC	8
 #define ICE_MAX_NUM_DESC	8160
 #define ICE_REQ_DESC_MULTIPLE	32
+#define ICE_DFLT_TRAFFIC_CLASS	BIT(0)
 #define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
 #define ICE_ETHTOOL_FWVER_LEN	32
 #define ICE_AQ_LEN		64
@@ -261,6 +262,12 @@ static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
 	wr32(hw, GLINT_DYN_CTL(vector), val);
 }
 
+static inline void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+	vsi->tc_cfg.ena_tc =  ICE_DFLT_TRAFFIC_CLASS;
+	vsi->tc_cfg.numtc = 1;
+}
+
 void ice_set_ethtool_ops(struct net_device *netdev);
 int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ae7376f2135b..348b9cc3c596 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -631,6 +631,25 @@ struct ice_aqc_get_topo {
 	__le32 addr_low;
 };
 
+/* Update TSE (indirect 0x0403)
+ * Get TSE (indirect 0x0404)
+ */
+struct ice_aqc_get_cfg_elem {
+	__le16 num_elem_req;	/* Used by commands */
+	__le16 num_elem_resp;	/* Used by responses */
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the buffer for:
+ * Suspend Nodes (indirect 0x0409)
+ * Resume Nodes (indirect 0x040A)
+ */
+struct ice_aqc_suspend_resume_elem {
+	__le32 teid[1];
+};
+
 /* Add TSE (indirect 0x0401)
  * Delete TSE (indirect 0x040F)
  * Move TSE (indirect 0x0408)
@@ -691,6 +710,11 @@ struct ice_aqc_txsched_topo_grp_info_hdr {
 	__le16 reserved2;
 };
 
+struct ice_aqc_add_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data generic[1];
+};
+
 struct ice_aqc_get_topo_elem {
 	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
 	struct ice_aqc_txsched_elem_data
@@ -1181,6 +1205,7 @@ struct ice_aq_desc {
 		struct ice_aqc_get_sw_cfg get_sw_conf;
 		struct ice_aqc_sw_rules sw_rules;
 		struct ice_aqc_get_topo get_topo;
+		struct ice_aqc_get_cfg_elem get_update_elem;
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_add_move_delete_elem add_move_delete_elem;
 		struct ice_aqc_nvm nvm;
@@ -1258,6 +1283,9 @@ enum ice_adminq_opc {
 
 	/* transmit scheduler commands */
 	ice_aqc_opc_get_dflt_topo			= 0x0400,
+	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_suspend_sched_elems			= 0x0409,
+	ice_aqc_opc_resume_sched_elems			= 0x040A,
 	ice_aqc_opc_delete_sched_elems			= 0x040F,
 	ice_aqc_opc_query_sched_res			= 0x0412,
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 02d589275db0..0ab9bf66c84e 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2089,3 +2089,57 @@ ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
 	mutex_unlock(&pi->sched_lock);
 	return status;
 }
+
+/**
+ * ice_cfg_vsi_qs - configure the new/exisiting VSI queues
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc_bitmap: TC bitmap
+ * @maxqs: max queues array per TC
+ * @owner: lan or rdma
+ *
+ * This function adds/updates the VSI queues per TC.
+ */
+static enum ice_status
+ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+	       u16 *maxqs, u8 owner)
+{
+	enum ice_status status = 0;
+	u8 i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return ICE_ERR_CFG;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		/* configuration is possible only if TC node is present */
+		if (!ice_sched_get_tc_node(pi, i))
+			continue;
+
+		status = ice_sched_cfg_vsi(pi, vsi_id, i, maxqs[i], owner,
+					   ice_is_tc_ena(tc_bitmap, i));
+		if (status)
+			break;
+	}
+
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_lan - configure VSI lan queues
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc_bitmap: TC bitmap
+ * @max_lanqs: max lan queues array per TC
+ *
+ * This function adds/updates the VSI lan queues per TC.
+ */
+enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+		u16 *max_lanqs)
+{
+	return ice_cfg_vsi_qs(pi, vsi_id, tc_bitmap, max_lanqs,
+			      ICE_SCHED_NODE_OWNER_LAN);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index c4213fbf9579..971277ebcc86 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -69,6 +69,9 @@ enum ice_status
 ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
 		u32 *q_teids, struct ice_sq_cd *cmd_details);
 enum ice_status
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_id, u8 tc_bitmap,
+		u16 *max_lanqs);
+enum ice_status
 ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_id, u8 tc, u8 num_qgrps,
 		struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
 		struct ice_sq_cd *cd);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9c1841700314..0f8637b97a34 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2085,10 +2085,11 @@ static struct ice_vsi *
 ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
 	      struct ice_port_info *pi)
 {
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct device *dev = &pf->pdev->dev;
 	struct ice_vsi_ctx ctxt = { 0 };
 	struct ice_vsi *vsi;
-	int ret;
+	int ret, i;
 
 	vsi = ice_vsi_alloc(pf, type);
 	if (!vsi) {
@@ -2156,6 +2157,20 @@ ice_vsi_setup(struct ice_pf *pf, enum ice_vsi_type type,
 		 */
 		goto err_rings;
 	}
+
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num,
+			      vsi->tc_cfg.ena_tc, max_txqs);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "Failed VSI lan queue config\n");
+		goto err_rings;
+	}
+
 	return vsi;
 
 err_rings:
@@ -2398,8 +2413,7 @@ static void ice_determine_q_usage(struct ice_pf *pf)
 	q_left_tx = pf->hw.func_caps.common_cap.num_txq;
 	q_left_rx = pf->hw.func_caps.common_cap.num_rxq;
 
-	/* initial support for only 1 tx queue */
-	pf->num_lan_tx = 1;
+	pf->num_lan_tx = min_t(int, q_left_tx, num_online_cpus());
 
 	/* only 1 rx queue unless RSS is enabled */
 	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index e50eebbf78e3..f16ff3e4a840 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -359,6 +359,110 @@ ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
 	return status;
 }
 
+/**
+ * ice_aq_add_sched_elems - adds scheduling element
+ * @hw: pointer to the hw struct
+ * @grps_req: the number of groups that are requested to be added
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_added: returns total number of groups added
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add scheduling elements (0x0401)
+ */
+static enum ice_status
+ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
+		       struct ice_aqc_add_elem *buf, u16 buf_size,
+		       u16 *grps_added, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_move_delete_elem *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.add_move_delete_elem;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_sched_elems);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_grps_req = cpu_to_le16(grps_req);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && grps_added)
+		*grps_added = le16_to_cpu(cmd->num_grps_updated);
+
+	return status;
+}
+
+/**
+ * ice_suspend_resume_elems - suspend/resume scheduler elements
+ * @hw: pointer to the hw struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ * @cmd_code: command code for suspend or resume
+ *
+ * suspend/resume scheduler elements
+ */
+static enum ice_status
+ice_suspend_resume_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_suspend_resume_elem *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd,
+			 enum ice_adminq_opc cmd_code)
+{
+	struct ice_aqc_get_cfg_elem *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.get_update_elem;
+	ice_fill_dflt_direct_cmd_desc(&desc, cmd_code);
+	cmd->num_elem_req = cpu_to_le16(elems_req);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && elems_ret)
+		*elems_ret = le16_to_cpu(cmd->num_elem_resp);
+	return status;
+}
+
+/**
+ * ice_aq_suspend_sched_elems - suspend scheduler elements
+ * @hw: pointer to the hw struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ *
+ * Suspend scheduling elements (0x0409)
+ */
+static enum ice_status
+ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
+			   struct ice_aqc_suspend_resume_elem *buf,
+			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
+					cd, ice_aqc_opc_suspend_sched_elems);
+}
+
+/**
+ * ice_aq_resume_sched_elems - resume scheduler elements
+ * @hw: pointer to the hw struct
+ * @elems_req: number of elements to resume
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements resumed
+ * @cd: pointer to command details structure or NULL
+ *
+ * resume scheduling elements (0x040A)
+ */
+static enum ice_status
+ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
+			  struct ice_aqc_suspend_resume_elem *buf,
+			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_suspend_resume_elems(hw, elems_req, buf, buf_size, elems_ret,
+					cd, ice_aqc_opc_resume_sched_elems);
+}
+
 /**
  * ice_aq_query_sched_res - query scheduler resource
  * @hw: pointer to the hw struct
@@ -379,6 +483,46 @@ ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
 }
 
+/**
+ * ice_sched_suspend_resume_elems - suspend or resume hw nodes
+ * @hw: pointer to the hw struct
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be suspended or resumed
+ * @suspend: true means suspend / false means resume
+ *
+ * This function suspends or resumes hw nodes
+ */
+static enum ice_status
+ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
+			       bool suspend)
+{
+	struct ice_aqc_suspend_resume_elem *buf;
+	u16 i, buf_size, num_elem_ret = 0;
+	enum ice_status status;
+
+	buf_size = sizeof(*buf) * num_nodes;
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	for (i = 0; i < num_nodes; i++)
+		buf->teid[i] = cpu_to_le32(node_teids[i]);
+
+	if (suspend)
+		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
+						    buf_size, &num_elem_ret,
+						    NULL);
+	else
+		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
+						   buf_size, &num_elem_ret,
+						   NULL);
+	if (status || num_elem_ret != num_nodes)
+		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
 /**
  * ice_sched_clear_tx_topo - clears the schduler tree nodes
  * @pi: port information structure
@@ -462,6 +606,215 @@ void ice_sched_cleanup_all(struct ice_hw *hw)
 	hw->max_cgds = 0;
 }
 
+/**
+ * ice_sched_create_vsi_info_entry - create an empty new VSI entry
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ *
+ * This function creates a new VSI entry and adds it to list
+ */
+static struct ice_sched_vsi_info *
+ice_sched_create_vsi_info_entry(struct ice_port_info *pi, u16 vsi_id)
+{
+	struct ice_sched_vsi_info *vsi_elem;
+
+	if (!pi)
+		return NULL;
+
+	vsi_elem = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*vsi_elem),
+				GFP_KERNEL);
+	if (!vsi_elem)
+		return NULL;
+
+	list_add(&vsi_elem->list_entry, &pi->vsi_info_list);
+	vsi_elem->vsi_id = vsi_id;
+	return vsi_elem;
+}
+
+/**
+ * ice_sched_add_elems - add nodes to hw and SW DB
+ * @pi: port information structure
+ * @tc_node: pointer to the branch node
+ * @parent: pointer to the parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes
+ * @num_nodes_added: pointer to num nodes added
+ * @first_node_teid: if new nodes are added then return the teid of first node
+ *
+ * This function add nodes to hw as well as to SW DB for a given layer
+ */
+static enum ice_status
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+		    u16 *num_nodes_added, u32 *first_node_teid)
+{
+	struct ice_sched_node *prev, *new_node;
+	struct ice_aqc_add_elem *buf;
+	u16 i, num_groups_added = 0;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 buf_size;
+	u32 teid;
+
+	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++) {
+		buf->generic[i].parent_teid = parent->info.node_teid;
+		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
+		buf->generic[i].data.valid_sections =
+			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+			ICE_AQC_ELEM_VALID_EIR;
+		buf->generic[i].data.generic = 0;
+		buf->generic[i].data.cir_bw.bw_profile_idx =
+			ICE_SCHED_DFLT_RL_PROF_ID;
+		buf->generic[i].data.eir_bw.bw_profile_idx =
+			ICE_SCHED_DFLT_RL_PROF_ID;
+	}
+
+	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
+					&num_groups_added, NULL);
+	if (status || num_groups_added != 1) {
+		ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n");
+		devm_kfree(ice_hw_to_dev(hw), buf);
+		return ICE_ERR_CFG;
+	}
+
+	*num_nodes_added = num_nodes;
+	/* add nodes to the SW DB */
+	for (i = 0; i < num_nodes; i++) {
+		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "add nodes in SW DB failed status =%d\n",
+				  status);
+			break;
+		}
+
+		teid = le32_to_cpu(buf->generic[i].node_teid);
+		new_node = ice_sched_find_node_by_teid(parent, teid);
+
+		if (!new_node) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "Node is missing for teid =%d\n", teid);
+			break;
+		}
+
+		new_node->sibling = NULL;
+		new_node->tc_num = tc_node->tc_num;
+
+		/* add it to previous node sibling pointer */
+		/* Note: siblings are not linked across branches */
+		prev = ice_sched_get_first_node(hw, tc_node, layer);
+
+		if (prev && prev != new_node) {
+			while (prev->sibling)
+				prev = prev->sibling;
+			prev->sibling = new_node;
+		}
+
+		if (i == 0)
+			*first_node_teid = teid;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node teid
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * This function add nodes to a given layer.
+ */
+static enum ice_status
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+			     struct ice_sched_node *tc_node,
+			     struct ice_sched_node *parent, u8 layer,
+			     u16 num_nodes, u32 *first_node_teid,
+			     u16 *num_nodes_added)
+{
+	u32 *first_teid_ptr = first_node_teid;
+	u16 new_num_nodes, max_child_nodes;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 num_added = 0;
+	u32 temp;
+
+	if (!num_nodes)
+		return status;
+
+	if (!parent || layer < hw->sw_entry_point_layer)
+		return ICE_ERR_PARAM;
+
+	*num_nodes_added = 0;
+
+	/* max children per node per layer */
+	max_child_nodes =
+	    le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children);
+
+	/* current number of children + required nodes exceed max children ? */
+	if ((parent->num_children + num_nodes) > max_child_nodes) {
+		/* Fail if the parent is a TC node */
+		if (parent == tc_node)
+			return ICE_ERR_CFG;
+
+		/* utilize all the spaces if the parent is not full */
+		if (parent->num_children < max_child_nodes) {
+			new_num_nodes = max_child_nodes - parent->num_children;
+			/* this recursion is intentional, and wouldn't
+			 * go more than 2 calls
+			 */
+			status = ice_sched_add_nodes_to_layer(pi, tc_node,
+							      parent, layer,
+							      new_num_nodes,
+							      first_node_teid,
+							      &num_added);
+			if (status)
+				return status;
+
+			*num_nodes_added += num_added;
+		}
+		/* Don't modify the first node teid memory if the first node was
+		 * added already in the above call. Instead send some temp
+		 * memory for all other recursive calls.
+		 */
+		if (num_added)
+			first_teid_ptr = &temp;
+
+		new_num_nodes = num_nodes - num_added;
+
+		/* This parent is full, try the next sibling */
+		parent = parent->sibling;
+
+		/* this recursion is intentional, for 1024 queues
+		 * per VSI, it goes max of 16 iterations.
+		 * 1024 / 8 = 128 layer 8 nodes
+		 * 128 /8 = 16 (add 8 nodes per iteration)
+		 */
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      layer, new_num_nodes,
+						      first_teid_ptr,
+						      &num_added);
+		*num_nodes_added += num_added;
+		return status;
+	}
+
+	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
+				     num_nodes_added, first_node_teid);
+	return status;
+}
+
 /**
  * ice_sched_get_qgrp_layer - get the current queue group layer number
  * @hw: pointer to the hw struct
@@ -474,6 +827,101 @@ static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
 }
 
+/**
+ * ice_sched_get_vsi_layer - get the current VSI layer number
+ * @hw: pointer to the hw struct
+ *
+ * This function returns the current VSI layer number
+ */
+static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
+{
+	/* Num Layers       VSI layer
+	 *     9               6
+	 *     7               4
+	 *     5 or less       sw_entry_point_layer
+	 */
+	/* calculate the vsi layer based on number of layers. */
+	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
+		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
+
+		if (layer > hw->sw_entry_point_layer)
+			return layer;
+	}
+	return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_sched_get_num_nodes_per_layer - Get the total number of nodes per layer
+ * @pi: pointer to the port info struct
+ * @layer: layer number
+ *
+ * This function calculates the number of nodes present in the scheduler tree
+ * including all the branches for a given layer
+ */
+static u16
+ice_sched_get_num_nodes_per_layer(struct ice_port_info *pi, u8 layer)
+{
+	struct ice_hw *hw;
+	u16 num_nodes = 0;
+	u8 i;
+
+	if (!pi)
+		return num_nodes;
+
+	hw = pi->hw;
+
+	/* Calculate the number of nodes for all TCs */
+	for (i = 0; i < pi->root->num_children; i++) {
+		struct ice_sched_node *tc_node, *node;
+
+		tc_node = pi->root->children[i];
+
+		/* Get the first node */
+		node = ice_sched_get_first_node(hw, tc_node, layer);
+		if (!node)
+			continue;
+
+		/* count the siblings */
+		while (node) {
+			num_nodes++;
+			node = node->sibling;
+		}
+	}
+
+	return num_nodes;
+}
+
+/**
+ * ice_sched_val_max_nodes - check max number of nodes reached or not
+ * @pi: port information structure
+ * @new_num_nodes_per_layer: pointer to the new number of nodes array
+ *
+ * This function checks whether the scheduler tree layers have enough space to
+ * add new nodes
+ */
+static enum ice_status
+ice_sched_validate_for_max_nodes(struct ice_port_info *pi,
+				 u16 *new_num_nodes_per_layer)
+{
+	struct ice_hw *hw = pi->hw;
+	u8 i, qg_layer;
+	u16 num_nodes;
+
+	qg_layer = ice_sched_get_qgrp_layer(hw);
+
+	/* walk through all the layers from SW entry point to qgroup layer */
+	for (i = hw->sw_entry_point_layer; i <= qg_layer; i++) {
+		num_nodes = ice_sched_get_num_nodes_per_layer(pi, i);
+		if (num_nodes + new_num_nodes_per_layer[i] >
+		    le16_to_cpu(hw->layer_info[i].max_pf_nodes)) {
+			ice_debug(hw, ICE_DBG_SCHED,
+				  "max nodes reached for layer = %d\n", i);
+			return ICE_ERR_CFG;
+		}
+	}
+	return 0;
+}
+
 /**
  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
  * @pi: port information structure
@@ -516,6 +964,7 @@ ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
 	struct ice_sched_node *node;
 
 	ice_rm_dflt_leaf_node(pi);
+
 	/* remove the default nodes except TC and root nodes */
 	node = pi->root;
 	while (node) {
@@ -525,6 +974,7 @@ ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
 			ice_free_sched_node(pi, node);
 			break;
 		}
+
 		if (!node->num_children)
 			break;
 		node = node->children[0];
@@ -720,8 +1170,10 @@ ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
 
 		if (node == child)
 			return true;
+
 		if (child->tx_sched_layer > node->tx_sched_layer)
 			return false;
+
 		/* this recursion is intentional, and wouldn't
 		 * go more than 8 calls
 		 */
@@ -751,13 +1203,17 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
 
 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
 	max_children = le16_to_cpu(pi->hw->layer_info[qgrp_layer].max_children);
+
 	list_elem = ice_sched_get_vsi_info_entry(pi, vsi_id);
 	if (!list_elem)
 		goto lan_q_exit;
+
 	vsi_node = list_elem->vsi_node[tc];
+
 	/* validate invalid VSI id */
 	if (!vsi_node)
 		goto lan_q_exit;
+
 	/* get the first q group node from VSI sub-tree */
 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
 	while (qgrp_node) {
@@ -768,6 +1224,436 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
 				break;
 		qgrp_node = qgrp_node->sibling;
 	}
+
 lan_q_exit:
 	return qgrp_node;
 }
+
+/**
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI id
+ * @hw: pointer to the hw struct
+ * @tc_node: pointer to the TC node
+ * @vsi_id: VSI id
+ *
+ * This function retrieves a VSI node for a given VSI id from a given
+ * TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
+		       u16 vsi_id)
+{
+	struct ice_sched_node *node;
+	u8 vsi_layer;
+
+	vsi_layer = ice_sched_get_vsi_layer(hw);
+	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
+
+	/* Check whether it already exists */
+	while (node) {
+		if (node->vsi_id == vsi_id)
+			return node;
+		node = node->sibling;
+	}
+
+	return node;
+}
+
+/**
+ * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
+ * @hw: pointer to the hw struct
+ * @num_qs: number of queues
+ * @num_nodes: num nodes array
+ *
+ * This function calculates the number of VSI child nodes based on the
+ * number of queues.
+ */
+static void
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+{
+	u16 num = num_qs;
+	u8 i, qgl, vsil;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+
+	/* calculate num nodes from q group to VSI layer */
+	for (i = qgl; i > vsil; i--) {
+		u16 max_children = le16_to_cpu(hw->layer_info[i].max_children);
+
+		/* round to the next integer if there is a remainder */
+		num = DIV_ROUND_UP(num, max_children);
+
+		/* need at least one node */
+		num_nodes[i] = num ? num : 1;
+	}
+}
+
+/**
+ * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
+ * @pi: port information structure
+ * @vsi_id: VSI id
+ * @tc_node: pointer to the TC node
+ * @num_nodes: pointer to the num nodes that needs to be added per layer
+ * @owner: node owner (lan or rdma)
+ *
+ * This function adds the VSI child nodes to tree. It gets called for
+ * lan and rdma separately.
+ */
+static enum ice_status
+ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id,
+			      struct ice_sched_node *tc_node, u16 *num_nodes,
+			      u8 owner)
+{
+	struct ice_sched_node *parent, *node;
+	struct ice_hw *hw = pi->hw;
+	enum ice_status status;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, qgl, vsil;
+
+	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
+	if (status)
+		return status;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+	for (i = vsil + 1; i <= qgl; i++) {
+		if (!parent)
+			return ICE_ERR_CFG;
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+						      num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return ICE_ERR_CFG;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added) {
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+			node = parent;
+			while (node) {
+				node->owner = owner;
+				node = node->sibling;
+			}
+		} else {
+			parent = parent->children[0];
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree
+ * @pi: port information structure
+ * @vsi_node: pointer to the VSI node
+ * @num_nodes: pointer to the num nodes that needs to be removed per layer
+ * @owner: node owner (lan or rdma)
+ *
+ * This function removes the VSI child nodes from the tree. It gets called for
+ * lan and rdma separately.
+ */
+static void
+ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi,
+			     struct ice_sched_node *vsi_node, u16 *num_nodes,
+			     u8 owner)
+{
+	struct ice_sched_node *node, *next;
+	u8 i, qgl, vsil;
+	u16 num;
+
+	qgl = ice_sched_get_qgrp_layer(pi->hw);
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+
+	for (i = qgl; i > vsil; i--) {
+		num = num_nodes[i];
+		node = ice_sched_get_first_node(pi->hw, vsi_node, i);
+		while (node && num) {
+			next = node->sibling;
+			if (node->owner == owner && !node->num_children) {
+				ice_free_sched_node(pi, node);
+				num--;
+			}
+			node = next;
+		}
+	}
+}
+
+/**
+ * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
+ * @hw: pointer to the hw struct
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function calculates the number of supported nodes needed to add this
+ * VSI into tx tree including the VSI, parent and intermediate nodes in below
+ * layers
+ */
+static void
+ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
+				 struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *node;
+	u16 max_child;
+	u8 i, vsil;
+
+	vsil = ice_sched_get_vsi_layer(hw);
+	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
+		/* Add intermediate nodes if TC has no children and
+		 * need at least one node for VSI
+		 */
+		if (!tc_node->num_children || i == vsil) {
+			num_nodes[i]++;
+		} else {
+			/* If intermediate nodes are reached max children
+			 * then add a new one.
+			 */
+			node = ice_sched_get_first_node(hw, tc_node, i);
+			max_child = le16_to_cpu(hw->layer_info[i].max_children);
+
+			/* scan all the siblings */
+			while (node) {
+				if (node->num_children < max_child)
+					break;
+				node = node->sibling;
+			}
+
+			/* all the nodes are full, allocate a new one */
+			if (!node)
+				num_nodes[i]++;
+		}
+}
+
+/**
+ * ice_sched_add_vsi_support_nodes - add VSI supported nodes into tx tree
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function adds the VSI supported nodes into tx tree including the
+ * VSI, its parent and intermediate nodes in below layers
+ */
+static enum ice_status
+ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_id,
+				struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *parent = tc_node;
+	enum ice_status status;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, vsil;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	status = ice_sched_validate_for_max_nodes(pi, num_nodes);
+	if (status)
+		return status;
+
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      i, num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return ICE_ERR_CFG;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added)
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+		else
+			parent = parent->children[0];
+
+		if (!parent)
+			return ICE_ERR_CFG;
+
+		if (i == vsil)
+			parent->vsi_id = vsi_id;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_add_vsi_to_topo - add a new VSI into tree
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc: TC number
+ *
+ * This function adds a new VSI into scheduler tree
+ */
+static enum ice_status
+ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_id, u8 tc)
+{
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *tc_node;
+	struct ice_hw *hw = pi->hw;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_PARAM;
+
+	/* calculate number of supported nodes needed for this VSI */
+	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
+
+	/* add vsi supported nodes to tc subtree */
+	return ice_sched_add_vsi_support_nodes(pi, vsi_id, tc_node, num_nodes);
+}
+
+/**
+ * ice_sched_update_vsi_child_nodes - update VSI child nodes
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc: TC number
+ * @new_numqs: new number of max queues
+ * @owner: owner of this subtree
+ *
+ * This function updates the VSI child nodes based on the number of queues
+ */
+static enum ice_status
+ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_id, u8 tc,
+				 u16 new_numqs, u8 owner)
+{
+	u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *vsi_node;
+	struct ice_sched_node *tc_node;
+	struct ice_sched_vsi_info *vsi;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+	u16 prev_numqs;
+	u8 i;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_CFG;
+
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+	if (!vsi_node)
+		return ICE_ERR_CFG;
+
+	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
+	if (!vsi)
+		return ICE_ERR_CFG;
+
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		prev_numqs = vsi->max_lanq[tc];
+	else
+		return ICE_ERR_PARAM;
+
+	/* num queues are not changed */
+	if (prev_numqs == new_numqs)
+		return status;
+
+	/* calculate number of nodes based on prev/new number of qs */
+	if (prev_numqs)
+		ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes);
+
+	if (new_numqs)
+		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
+
+	if (prev_numqs > new_numqs) {
+		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+			new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i];
+
+		ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes,
+					     owner);
+	} else {
+		for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+			new_num_nodes[i] -= prev_num_nodes[i];
+
+		status = ice_sched_add_vsi_child_nodes(pi, vsi_id, tc_node,
+						       new_num_nodes, owner);
+		if (status)
+			return status;
+	}
+
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		vsi->max_lanq[tc] = new_numqs;
+
+	return status;
+}
+
+/**
+ * ice_sched_cfg_vsi - configure the new/exisiting VSI
+ * @pi: port information structure
+ * @vsi_id: VSI Id
+ * @tc: TC number
+ * @maxqs: max number of queues
+ * @owner: lan or rdma
+ * @enable: TC enabled or disabled
+ *
+ * This function adds/updates VSI nodes based on the number of queues. If TC is
+ * enabled and VSI is in suspended state then resume the VSI back. If TC is
+ * disabled then suspend the VSI if it is not already.
+ */
+enum ice_status
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
+		  u8 owner, bool enable)
+{
+	struct ice_sched_node *vsi_node, *tc_node;
+	struct ice_sched_vsi_info *vsi;
+	enum ice_status status = 0;
+	struct ice_hw *hw = pi->hw;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return ICE_ERR_PARAM;
+
+	vsi = ice_sched_get_vsi_info_entry(pi, vsi_id);
+	if (!vsi)
+		vsi = ice_sched_create_vsi_info_entry(pi, vsi_id);
+	if (!vsi)
+		return ICE_ERR_NO_MEMORY;
+
+	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+
+	/* suspend the VSI if tc is not enabled */
+	if (!enable) {
+		if (vsi_node && vsi_node->in_use) {
+			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
+								true);
+			if (!status)
+				vsi_node->in_use = false;
+		}
+		return status;
+	}
+
+	/* TC is enabled, if it is a new VSI then add it to the tree */
+	if (!vsi_node) {
+		status = ice_sched_add_vsi_to_topo(pi, vsi_id, tc);
+		if (status)
+			return status;
+		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_id);
+		if (!vsi_node)
+			return ICE_ERR_CFG;
+		vsi->vsi_node[tc] = vsi_node;
+		vsi_node->in_use = true;
+	}
+
+	/* update the VSI child nodes */
+	status = ice_sched_update_vsi_child_nodes(pi, vsi_id, tc, maxqs, owner);
+	if (status)
+		return status;
+
+	/* TC is enabled, resume the VSI if it is in the suspend state */
+	if (!vsi_node->in_use) {
+		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
+		if (!status)
+			vsi_node->in_use = true;
+	}
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 639859a3d0f0..badadcc120d3 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -7,6 +7,7 @@
 #include "ice_common.h"
 
 #define ICE_QGRP_LAYER_OFFSET	2
+#define ICE_VSI_LAYER_OFFSET	4
 
 struct ice_sched_agg_vsi_info {
 	struct list_head list_entry;
@@ -36,4 +37,7 @@ struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
 struct ice_sched_node *
 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_id, u8 tc,
 			   u8 owner);
+enum ice_status
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_id, u8 tc, u16 maxqs,
+		  u8 owner, bool enable);
 #endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c7e0de3b8662..137a59a74b71 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -10,6 +10,11 @@
 #include "ice_controlq.h"
 #include "ice_lan_tx_rx.h"
 
+static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
+{
+	return test_bit(tc, (unsigned long *)&bitmap);
+}
+
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
 #define ICE_DBG_QCTX		BIT_ULL(6)
@@ -194,6 +199,8 @@ enum ice_agg_type {
 	ICE_AGG_TYPE_QG
 };
 
+#define ICE_SCHED_DFLT_RL_PROF_ID	0
+
 /* vsi type list entry to locate corresponding vsi/ag nodes */
 struct ice_sched_vsi_info {
 	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];

From 0b28b702e72a6ff90d417689159f72e8891fed78 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:18 -0700
Subject: [PATCH 1264/1678] ice: Support link events, reset and rebuild

Link events are posted to a PF's admin receive queue (ARQ). This patch
adds the ability to detect and process link events.

This patch also adds the ability to process resets.

The driver can process the following resets:
    1) EMP Reset (EMPR)
    2) Global Reset (GLOBR)
    3) Core Reset (CORER)
    4) Physical Function Reset (PFR)

EMPR is the largest level of reset that the driver can handle. An EMPR
resets the manageability block and also the data path, including PHY and
link for all the PFs. The affected PFs are notified of this event through
a miscellaneous interrupt.

GLOBR is a subset of EMPR. It does everything EMPR does except that it
doesn't reset the manageability block.

CORER is a subset of GLOBR. It does everything GLOBR does but doesn't
reset PHY and link.

PFR is a subset of CORER and affects only the given physical function.
In other words, PFR can be thought of as a CORER for a single PF. Since
only the issuing PF is affected, a PFR doesn't result in the miscellaneous
interrupt being triggered.

All the resets have the following in common:
1) Tx/Rx is halted and all queues are stopped.
2) All the VSIs and filters programmed for the PF are lost and have to be
   reprogrammed.
3) Control queue interfaces are reset and have to be reprogrammed.

In the rebuild flow, control queues are reinitialized, VSIs are reallocated
and filters are restored.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  19 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  19 +
 drivers/net/ethernet/intel/ice/ice_common.c   |  60 ++
 drivers/net/ethernet/intel/ice/ice_common.h   |   5 +
 .../net/ethernet/intel/ice/ice_hw_autogen.h   |   2 +
 drivers/net/ethernet/intel/ice/ice_main.c     | 581 +++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_type.h     |   1 +
 7 files changed, 681 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 8898255e5a65..24ce4b71c9ce 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -78,6 +78,11 @@ extern const char ice_drv_ver[];
 #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
 #define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
 
+/* Macro for each VSI in a PF */
+#define ice_for_each_vsi(pf, i) \
+	for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
+
+/* Macros for each tx/rx ring in a VSI */
 #define ice_for_each_txq(vsi, i) \
 	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
 
@@ -109,7 +114,16 @@ struct ice_sw {
 
 enum ice_state {
 	__ICE_DOWN,
+	__ICE_NEEDS_RESTART,
+	__ICE_RESET_RECOVERY_PENDING,	/* set by driver when reset starts */
 	__ICE_PFR_REQ,			/* set by driver and peers */
+	__ICE_CORER_REQ,		/* set by driver and peers */
+	__ICE_GLOBR_REQ,		/* set by driver and peers */
+	__ICE_CORER_RECV,		/* set by OICR handler */
+	__ICE_GLOBR_RECV,		/* set by OICR handler */
+	__ICE_EMPR_RECV,		/* set by OICR handler */
+	__ICE_SUSPENDED,		/* set on module remove path */
+	__ICE_RESET_FAILED,		/* set by reset/rebuild */
 	__ICE_ADMINQ_EVENT_PENDING,
 	__ICE_CFG_BUSY,
 	__ICE_SERVICE_SCHED,
@@ -226,6 +240,11 @@ struct ice_pf {
 	u16 q_left_rx;		/* remaining num rx queues left unclaimed */
 	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
 	u16 num_alloc_vsi;
+	u16 corer_count;	/* Core reset count */
+	u16 globr_count;	/* Global reset count */
+	u16 empr_count;		/* EMP reset count */
+	u16 pfr_count;		/* PF reset count */
+
 	struct ice_hw_port_stats stats;
 	struct ice_hw_port_stats stats_prev;
 	struct ice_hw hw;
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 348b9cc3c596..ea822a9edce8 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1009,6 +1009,23 @@ struct ice_aqc_get_link_status_data {
 	__le64 reserved4;
 };
 
+/* Set event mask command (direct 0x0613) */
+struct ice_aqc_set_event_mask {
+	u8	lport_num;
+	u8	reserved[7];
+	__le16	event_mask;
+#define ICE_AQ_LINK_EVENT_UPDOWN		BIT(1)
+#define ICE_AQ_LINK_EVENT_MEDIA_NA		BIT(2)
+#define ICE_AQ_LINK_EVENT_LINK_FAULT		BIT(3)
+#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM	BIT(4)
+#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS	BIT(5)
+#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT		BIT(6)
+#define ICE_AQ_LINK_EVENT_AN_COMPLETED		BIT(7)
+#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL	BIT(8)
+#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED	BIT(9)
+	u8	reserved1[6];
+};
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
@@ -1215,6 +1232,7 @@ struct ice_aq_desc {
 		struct ice_aqc_dis_txqs dis_txqs;
 		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
+		struct ice_aqc_set_event_mask set_event_mask;
 		struct ice_aqc_get_link_status get_link_status;
 	} params;
 };
@@ -1294,6 +1312,7 @@ enum ice_adminq_opc {
 	ice_aqc_opc_set_phy_cfg				= 0x0601,
 	ice_aqc_opc_restart_an				= 0x0605,
 	ice_aqc_opc_get_link_status			= 0x0607,
+	ice_aqc_opc_set_event_mask			= 0x0613,
 
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 0ab9bf66c84e..791f1eba5953 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1427,6 +1427,39 @@ out:
 	return status;
 }
 
+/**
+ * ice_get_link_status - get status of the HW network link
+ * @pi: port information structure
+ * @link_up: pointer to bool (true/false = linkup/linkdown)
+ *
+ * Variable link_up is true if link is up, false if link is down.
+ * The variable link_up is invalid if status is non zero. As a
+ * result of this call, link status reporting becomes enabled
+ */
+enum ice_status ice_get_link_status(struct ice_port_info *pi, bool *link_up)
+{
+	struct ice_phy_info *phy_info;
+	enum ice_status status = 0;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+
+	phy_info = &pi->phy;
+
+	if (phy_info->get_link_info) {
+		status = ice_update_link_info(pi);
+
+		if (status)
+			ice_debug(pi->hw, ICE_DBG_LINK,
+				  "get link status error, status = %d\n",
+				  status);
+	}
+
+	*link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP;
+
+	return status;
+}
+
 /**
  * ice_aq_set_link_restart_an
  * @pi: pointer to the port information structure
@@ -1456,6 +1489,33 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
 	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
 }
 
+/**
+ * ice_aq_set_event_mask
+ * @hw: pointer to the hw struct
+ * @port_num: port number of the physical function
+ * @mask: event mask to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set event mask (0x0613)
+ */
+enum ice_status
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_event_mask *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_event_mask;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
+
+	cmd->lport_num = port_num;
+
+	cmd->event_mask = cpu_to_le16(mask);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
 /**
  * __ice_aq_get_set_rss_lut
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 971277ebcc86..ad350a4921a1 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -20,6 +20,8 @@ enum ice_status
 ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		  struct ice_rq_event_info *e, u16 *pending);
 enum ice_status
+ice_get_link_status(struct ice_port_info *pi, bool *link_up);
+enum ice_status
 ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
 		enum ice_aq_res_access_type access);
 void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
@@ -66,6 +68,9 @@ enum ice_status
 ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		     struct ice_link_status *link, struct ice_sq_cd *cd);
 enum ice_status
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd);
+enum ice_status
 ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
 		u32 *q_teids, struct ice_sq_cd *cmd_details);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 777a1c653edb..1b9e2ef48a9d 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -85,6 +85,8 @@
 #define GLGEN_RSTCTL			0x000B8180
 #define GLGEN_RSTCTL_GRSTDEL_S		0
 #define GLGEN_RSTCTL_GRSTDEL_M		ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RSTAT_RESET_TYPE_S	2
+#define GLGEN_RSTAT_RESET_TYPE_M	ICE_M(0x3, GLGEN_RSTAT_RESET_TYPE_S)
 #define GLGEN_RTRIG			0x000B8190
 #define GLGEN_RTRIG_CORER_S		0
 #define GLGEN_RTRIG_CORER_M		BIT(GLGEN_RTRIG_CORER_S)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 0f8637b97a34..2478dbe9bf4d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -29,6 +29,8 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 static struct workqueue_struct *ice_wq;
 static const struct net_device_ops ice_netdev_ops;
 
+static void ice_pf_dis_all_vsi(struct ice_pf *pf);
+static void ice_rebuild(struct ice_pf *pf);
 static int ice_vsi_release(struct ice_vsi *vsi);
 static void ice_update_vsi_stats(struct ice_vsi *vsi);
 static void ice_update_pf_stats(struct ice_pf *pf);
@@ -216,6 +218,132 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h)
 	}
 }
 
+/**
+ * ice_is_reset_recovery_pending - schedule a reset
+ * @state: pf state field
+ */
+static bool ice_is_reset_recovery_pending(unsigned long int *state)
+{
+	return test_bit(__ICE_RESET_RECOVERY_PENDING, state);
+}
+
+/**
+ * ice_prepare_for_reset - prep for the core to reset
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for reset.
+ */
+static void
+ice_prepare_for_reset(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_remove_vsi_fltr(hw, pf->vsi[v]->vsi_num);
+
+	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
+
+	/* disable the VSIs and their queues that are not already DOWN */
+	/* pf_dis_all_vsi modifies netdev structures -rtnl_lock needed */
+	ice_pf_dis_all_vsi(pf);
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			pf->vsi[v]->vsi_num = 0;
+
+	ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_do_reset - Initiate one of many types of resets
+ * @pf: board private structure
+ * @reset_type: reset type requested
+ * before this function was called.
+ */
+static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct device *dev = &pf->pdev->dev;
+	struct ice_hw *hw = &pf->hw;
+
+	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
+	WARN_ON(in_interrupt());
+
+	/* PFR is a bit of a special case because it doesn't result in an OICR
+	 * interrupt. So for PFR, we prepare for reset, issue the reset and
+	 * rebuild sequentially.
+	 */
+	if (reset_type == ICE_RESET_PFR) {
+		set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		ice_prepare_for_reset(pf);
+	}
+
+	/* trigger the reset */
+	if (ice_reset(hw, reset_type)) {
+		dev_err(dev, "reset %d failed\n", reset_type);
+		set_bit(__ICE_RESET_FAILED, pf->state);
+		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		return;
+	}
+
+	if (reset_type == ICE_RESET_PFR) {
+		pf->pfr_count++;
+		ice_rebuild(pf);
+		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+	}
+}
+
+/**
+ * ice_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ */
+static void ice_reset_subtask(struct ice_pf *pf)
+{
+	enum ice_reset_req reset_type;
+
+	rtnl_lock();
+
+	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
+	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what
+	 * type of reset happened and sets __ICE_RESET_RECOVERY_PENDING bit in
+	 * pf->state. So if reset/recovery is pending (as indicated by this bit)
+	 * we do a rebuild and return.
+	 */
+	if (ice_is_reset_recovery_pending(pf->state)) {
+		clear_bit(__ICE_GLOBR_RECV, pf->state);
+		clear_bit(__ICE_CORER_RECV, pf->state);
+		ice_prepare_for_reset(pf);
+
+		/* make sure we are ready to rebuild */
+		if (ice_check_reset(&pf->hw))
+			set_bit(__ICE_RESET_FAILED, pf->state);
+		else
+			ice_rebuild(pf);
+		clear_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		goto unlock;
+	}
+
+	/* No pending resets to finish processing. Check for new resets */
+	if (test_and_clear_bit(__ICE_GLOBR_REQ, pf->state))
+		reset_type = ICE_RESET_GLOBR;
+	else if (test_and_clear_bit(__ICE_CORER_REQ, pf->state))
+		reset_type = ICE_RESET_CORER;
+	else if (test_and_clear_bit(__ICE_PFR_REQ, pf->state))
+		reset_type = ICE_RESET_PFR;
+	else
+		goto unlock;
+
+	/* reset if not already down or resetting */
+	if (!test_bit(__ICE_DOWN, pf->state) &&
+	    !test_bit(__ICE_CFG_BUSY, pf->state)) {
+		ice_do_reset(pf, reset_type);
+	}
+
+unlock:
+	rtnl_unlock();
+}
+
 /**
  * ice_watchdog_subtask - periodic tasks not using event driven scheduling
  * @pf: board private structure
@@ -314,6 +442,144 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 		    speed, fc);
 }
 
+/**
+ * ice_init_link_events - enable/initialize link events
+ * @pi: pointer to the port_info instance
+ *
+ * Returns -EIO on failure, 0 on success
+ */
+static int ice_init_link_events(struct ice_port_info *pi)
+{
+	u16 mask;
+
+	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
+		       ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
+
+	if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw),
+			"Failed to set link event mask for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw),
+			"Failed to enable link events for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_link_event - update the vsi's netdev
+ * @vsi: the vsi on which the link event occurred
+ * @link_up: whether or not the vsi needs to be set up or down
+ */
+static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
+{
+	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	if (vsi->type == ICE_VSI_PF) {
+		if (!vsi->netdev) {
+			dev_dbg(&vsi->back->pdev->dev,
+				"vsi->netdev is not initialized!\n");
+			return;
+		}
+		if (link_up) {
+			netif_carrier_on(vsi->netdev);
+			netif_tx_wake_all_queues(vsi->netdev);
+		} else {
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+	}
+}
+
+/**
+ * ice_link_event - process the link event
+ * @pf: pf that the link event is associated with
+ * @pi: port_info for the port that the link event is associated with
+ *
+ * Returns -EIO if ice_get_link_status() fails
+ * Returns 0 on success
+ */
+static int
+ice_link_event(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	u8 new_link_speed, old_link_speed;
+	struct ice_phy_info *phy_info;
+	bool new_link_same_as_old;
+	bool new_link, old_link;
+	u8 lport;
+	u16 v;
+
+	phy_info = &pi->phy;
+	phy_info->link_info_old = phy_info->link_info;
+	/* Force ice_get_link_status() to update link info */
+	phy_info->get_link_info = true;
+
+	old_link = (phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
+	old_link_speed = phy_info->link_info_old.link_speed;
+
+	lport = pi->lport;
+	if (ice_get_link_status(pi, &new_link)) {
+		dev_dbg(&pf->pdev->dev,
+			"Could not get link status for port %d\n", lport);
+		return -EIO;
+	}
+
+	new_link_speed = phy_info->link_info.link_speed;
+
+	new_link_same_as_old = (new_link == old_link &&
+				new_link_speed == old_link_speed);
+
+	ice_for_each_vsi(pf, v) {
+		struct ice_vsi *vsi = pf->vsi[v];
+
+		if (!vsi || !vsi->port_info)
+			continue;
+
+		if (new_link_same_as_old &&
+		    (test_bit(__ICE_DOWN, vsi->state) ||
+		    new_link == netif_carrier_ok(vsi->netdev)))
+			continue;
+
+		if (vsi->port_info->lport == lport) {
+			ice_print_link_msg(vsi, new_link);
+			ice_vsi_link_event(vsi, new_link);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_handle_link_event - handle link event via ARQ
+ * @pf: pf that the link event is associated with
+ *
+ * Return -EINVAL if port_info is null
+ * Return status on succes
+ */
+static int ice_handle_link_event(struct ice_pf *pf)
+{
+	struct ice_port_info *port_info;
+	int status;
+
+	port_info = pf->hw.port_info;
+	if (!port_info)
+		return -EINVAL;
+
+	status = ice_link_event(pf, port_info);
+	if (status)
+		dev_dbg(&pf->pdev->dev,
+			"Could not process link event, error %d\n", status);
+
+	return status;
+}
+
 /**
  * __ice_clean_ctrlq - helper function to clean controlq rings
  * @pf: ptr to struct ice_pf
@@ -328,6 +594,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 	const char *qtype;
 	u32 oldval, val;
 
+	/* Do not clean control queue if/when PF reset fails */
+	if (test_bit(__ICE_RESET_FAILED, pf->state))
+		return 0;
+
 	switch (q_type) {
 	case ICE_CTL_Q_ADMIN:
 		cq = &hw->adminq;
@@ -394,6 +664,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 
 	do {
 		enum ice_status ret;
+		u16 opcode;
 
 		ret = ice_clean_rq_elem(hw, cq, &event, &pending);
 		if (ret == ICE_ERR_AQ_NO_WORK)
@@ -404,6 +675,21 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 				ret);
 			break;
 		}
+
+		opcode = le16_to_cpu(event.desc.opcode);
+
+		switch (opcode) {
+		case ice_aqc_opc_get_link_status:
+			if (ice_handle_link_event(pf))
+				dev_err(&pf->pdev->dev,
+					"Could not handle link event");
+			break;
+		default:
+			dev_dbg(&pf->pdev->dev,
+				"%s Receive Queue unknown event 0x%04x ignored\n",
+				qtype, opcode);
+			break;
+		}
 	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
 
 	devm_kfree(&pf->pdev->dev, event.msg_buf);
@@ -483,6 +769,17 @@ static void ice_service_task(struct work_struct *work)
 	unsigned long start_time = jiffies;
 
 	/* subtasks */
+
+	/* process reset requests first */
+	ice_reset_subtask(pf);
+
+	/* bail if a reset/recovery cycle is pending */
+	if (ice_is_reset_recovery_pending(pf->state) ||
+	    test_bit(__ICE_SUSPENDED, pf->state)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
 	ice_watchdog_subtask(pf);
 	ice_clean_adminq_subtask(pf);
 
@@ -1208,6 +1505,37 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	if (!(oicr & PFINT_OICR_INTEVENT_M))
 		goto ena_intr;
 
+	if (oicr & PFINT_OICR_GRST_M) {
+		u32 reset;
+		/* we have a reset warning */
+		ena_mask &= ~PFINT_OICR_GRST_M;
+		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
+			GLGEN_RSTAT_RESET_TYPE_S;
+
+		if (reset == ICE_RESET_CORER)
+			pf->corer_count++;
+		else if (reset == ICE_RESET_GLOBR)
+			pf->globr_count++;
+		else
+			pf->empr_count++;
+
+		/* If a reset cycle isn't already in progress, we set a bit in
+		 * pf->state so that the service task can start a reset/rebuild.
+		 * We also make note of which reset happened so that peer
+		 * devices/drivers can be informed.
+		 */
+		if (!test_bit(__ICE_RESET_RECOVERY_PENDING, pf->state)) {
+			if (reset == ICE_RESET_CORER)
+				set_bit(__ICE_CORER_RECV, pf->state);
+			else if (reset == ICE_RESET_GLOBR)
+				set_bit(__ICE_GLOBR_RECV, pf->state);
+			else
+				set_bit(__ICE_EMPR_RECV, pf->state);
+
+			set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+		}
+	}
+
 	if (oicr & PFINT_OICR_HMC_ERR_M) {
 		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
 		dev_dbg(&pf->pdev->dev,
@@ -1226,9 +1554,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		 */
 		if (oicr & (PFINT_OICR_PE_CRITERR_M |
 			    PFINT_OICR_PCI_EXCEPTION_M |
-			    PFINT_OICR_ECC_ERR_M))
+			    PFINT_OICR_ECC_ERR_M)) {
 			set_bit(__ICE_PFR_REQ, pf->state);
-
+			ice_service_task_schedule(pf);
+		}
 		ena_mask &= ~oicr;
 	}
 	ret = IRQ_HANDLED;
@@ -1485,6 +1814,13 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 			 dev_driver_string(&pf->pdev->dev),
 			 dev_name(&pf->pdev->dev));
 
+	/* Do not request IRQ but do enable OICR interrupt since settings are
+	 * lost during reset. Note that this function is called only during
+	 * rebuild path and not while reset is in progress.
+	 */
+	if (ice_is_reset_recovery_pending(pf->state))
+		goto skip_req_irq;
+
 	/* reserve one vector in irq_tracker for misc interrupts */
 	oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
 	if (oicr_idx < 0)
@@ -1503,6 +1839,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 		return err;
 	}
 
+skip_req_irq:
 	ice_ena_misc_vector(pf);
 
 	val = (pf->oicr_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
@@ -2070,6 +2407,100 @@ ice_vsi_cfg_rss_exit:
 	return err;
 }
 
+/**
+ * ice_vsi_reinit_setup - return resource and reallocate resource for a VSI
+ * @vsi: pointer to the ice_vsi
+ *
+ * This reallocates the VSIs queue resources
+ *
+ * Returns 0 on success and negative value on failure
+ */
+static int ice_vsi_reinit_setup(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	int ret, i;
+
+	if (!vsi)
+		return -EINVAL;
+
+	ice_vsi_free_q_vectors(vsi);
+	ice_free_res(vsi->back->irq_tracker, vsi->base_vector, vsi->idx);
+	vsi->base_vector = 0;
+	ice_vsi_clear_rings(vsi);
+	ice_vsi_free_arrays(vsi, false);
+	ice_vsi_set_num_qs(vsi);
+
+	/* Initialize VSI struct elements and create VSI in FW */
+	ret = ice_vsi_add(vsi);
+	if (ret < 0)
+		goto err_vsi;
+
+	ret = ice_vsi_alloc_arrays(vsi, false);
+	if (ret < 0)
+		goto err_vsi;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		if (!vsi->netdev) {
+			ret = ice_cfg_netdev(vsi);
+			if (ret)
+				goto err_rings;
+
+			ret = register_netdev(vsi->netdev);
+			if (ret)
+				goto err_rings;
+
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto err_rings;
+
+		ret = ice_vsi_setup_vector_base(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto err_vectors;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		break;
+	default:
+		break;
+	}
+
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* configure VSI nodes based on number of queues and TC's */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->vsi_num,
+			      vsi->tc_cfg.ena_tc, max_txqs);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed VSI lan queue config\n");
+		goto err_vectors;
+	}
+	return 0;
+
+err_vectors:
+	ice_vsi_free_q_vectors(vsi);
+err_rings:
+	if (vsi->netdev) {
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_vsi:
+	ice_vsi_clear(vsi);
+	set_bit(__ICE_RESET_FAILED, vsi->back->state);
+	return ret;
+}
+
 /**
  * ice_vsi_setup - Set up a VSI by a given type
  * @pf: board private structure
@@ -2345,10 +2776,17 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 	struct ice_vsi *vsi;
 	int status = 0;
 
-	vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info);
-	if (!vsi) {
-		status = -ENOMEM;
-		goto error_exit;
+	if (!ice_is_reset_recovery_pending(pf->state)) {
+		vsi = ice_vsi_setup(pf, ICE_VSI_PF, pf->hw.port_info);
+		if (!vsi) {
+			status = -ENOMEM;
+			goto error_exit;
+		}
+	} else {
+		vsi = pf->vsi[0];
+		status = ice_vsi_reinit_setup(vsi);
+		if (status < 0)
+			return -EIO;
 	}
 
 	/* tmp_add_list contains a list of MAC addresses for which MAC
@@ -2737,6 +3175,12 @@ static int ice_probe(struct pci_dev *pdev,
 	/* since everything is good, start the service timer */
 	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
 
+	err = ice_init_link_events(pf->hw.port_info);
+	if (err) {
+		dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err);
+		goto err_alloc_sw_unroll;
+	}
+
 	return 0;
 
 err_alloc_sw_unroll:
@@ -4217,6 +4661,131 @@ static int ice_vsi_release(struct ice_vsi *vsi)
 	return 0;
 }
 
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ */
+static void ice_dis_vsi(struct ice_vsi *vsi)
+{
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->netdev && netif_running(vsi->netdev) &&
+	    vsi->type == ICE_VSI_PF)
+		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+
+	ice_vsi_close(vsi);
+}
+
+/**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ */
+static void ice_ena_vsi(struct ice_vsi *vsi)
+{
+	if (!test_and_clear_bit(__ICE_NEEDS_RESTART, vsi->state))
+		return;
+
+	if (vsi->netdev && netif_running(vsi->netdev))
+		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
+	else if (ice_vsi_open(vsi))
+		/* this clears the DOWN bit */
+		dev_dbg(&vsi->back->pdev->dev, "Failed open VSI 0x%04X on switch 0x%04X\n",
+			vsi->vsi_num, vsi->vsw->sw_id);
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ */
+static void ice_pf_dis_all_vsi(struct ice_pf *pf)
+{
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_dis_vsi(pf->vsi[v]);
+}
+
+/**
+ * ice_pf_ena_all_vsi - Resume all VSIs on a PF
+ * @pf: the PF
+ */
+static void ice_pf_ena_all_vsi(struct ice_pf *pf)
+{
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_ena_vsi(pf->vsi[v]);
+}
+
+/**
+ * ice_rebuild - rebuild after reset
+ * @pf: pf to rebuild
+ */
+static void ice_rebuild(struct ice_pf *pf)
+{
+	struct device *dev = &pf->pdev->dev;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status ret;
+	int err;
+
+	if (test_bit(__ICE_DOWN, pf->state))
+		goto clear_recovery;
+
+	dev_dbg(dev, "rebuilding pf\n");
+
+	ret = ice_init_all_ctrlq(hw);
+	if (ret) {
+		dev_err(dev, "control queues init failed %d\n", ret);
+		goto fail_reset;
+	}
+
+	ret = ice_clear_pf_cfg(hw);
+	if (ret) {
+		dev_err(dev, "clear PF configuration failed %d\n", ret);
+		goto fail_reset;
+	}
+
+	ice_clear_pxe_mode(hw);
+
+	ret = ice_get_caps(hw);
+	if (ret) {
+		dev_err(dev, "ice_get_caps failed %d\n", ret);
+		goto fail_reset;
+	}
+
+	/* basic nic switch setup */
+	err = ice_setup_pf_sw(pf);
+	if (err) {
+		dev_err(dev, "ice_setup_pf_sw failed\n");
+		goto fail_reset;
+	}
+
+	/* start misc vector */
+	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
+		err = ice_req_irq_msix_misc(pf);
+		if (err) {
+			dev_err(dev, "misc vector setup failed: %d\n", err);
+			goto fail_reset;
+		}
+	}
+
+	/* restart the VSIs that were rebuilt and running before the reset */
+	ice_pf_ena_all_vsi(pf);
+
+	return;
+
+fail_reset:
+	ice_shutdown_all_ctrlq(hw);
+	set_bit(__ICE_RESET_FAILED, pf->state);
+clear_recovery:
+	set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
+}
+
 /**
  * ice_set_rss - Set RSS keys and lut
  * @vsi: Pointer to VSI structure
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 137a59a74b71..c45cdee4e03c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -17,6 +17,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
 
 /* debug masks - set these bits in hw->debug_mask to control output */
 #define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_LINK		BIT_ULL(4)
 #define ICE_DBG_QCTX		BIT_ULL(6)
 #define ICE_DBG_NVM		BIT_ULL(7)
 #define ICE_DBG_LAN		BIT_ULL(8)

From e94d4478669357cd742170c77fc28d6db2040ce4 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 20 Mar 2018 07:58:19 -0700
Subject: [PATCH 1265/1678] ice: Implement filter sync, NDO operations and bump
 version

This patch implements multiple pieces of functionality:

1. Added ice_vsi_sync_filters, which is called through the service task
   to push filter updates to the hardware.

2. Add support to enable/disable promiscuous mode on an interface.
   Enabling/disabling promiscuous mode on an interface results in
   addition/removal of a promisc filter rule through ice_vsi_sync_filters.

3. Implement handlers for ndo_set_mac_address, ndo_change_mtu,
   ndo_poll_controller and ndo_set_rx_mode.

This patch also marks the end of the driver addition by bumping up the
driver version.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  14 +
 .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  21 +
 drivers/net/ethernet/intel/ice/ice_common.c   |  28 +
 drivers/net/ethernet/intel/ice/ice_common.h   |   3 +
 .../net/ethernet/intel/ice/ice_lan_tx_rx.h    |  12 +
 drivers/net/ethernet/intel/ice/ice_main.c     | 567 +++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_switch.c   |  77 +++
 drivers/net/ethernet/intel/ice/ice_switch.h   |   2 +
 drivers/net/ethernet/intel/ice/ice_type.h     |   5 +
 9 files changed, 728 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 24ce4b71c9ce..d8b5fff581e7 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -125,11 +125,20 @@ enum ice_state {
 	__ICE_SUSPENDED,		/* set on module remove path */
 	__ICE_RESET_FAILED,		/* set by reset/rebuild */
 	__ICE_ADMINQ_EVENT_PENDING,
+	__ICE_FLTR_OVERFLOW_PROMISC,
 	__ICE_CFG_BUSY,
 	__ICE_SERVICE_SCHED,
 	__ICE_STATE_NBITS		/* must be last */
 };
 
+enum ice_vsi_flags {
+	ICE_VSI_FLAG_UMAC_FLTR_CHANGED,
+	ICE_VSI_FLAG_MMAC_FLTR_CHANGED,
+	ICE_VSI_FLAG_VLAN_FLTR_CHANGED,
+	ICE_VSI_FLAG_PROMISC_CHANGED,
+	ICE_VSI_FLAG_NBITS		/* must be last */
+};
+
 /* struct that defines a VSI, associated with a dev */
 struct ice_vsi {
 	struct net_device *netdev;
@@ -144,7 +153,9 @@ struct ice_vsi {
 
 	u64 tx_linearize;
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	unsigned int current_netdev_flags;
 	u32 tx_restart;
 	u32 tx_busy;
 	u32 rx_buf_failed;
@@ -175,6 +186,9 @@ struct ice_vsi {
 	struct ice_eth_stats eth_stats;
 	struct ice_eth_stats eth_stats_prev;
 
+	struct list_head tmp_sync_list;		/* MAC filters to be synced */
+	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
+
 	bool irqs_ready;
 	bool current_isup;		 /* Sync 'link up' logging */
 	bool stat_offsets_loaded;
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index ea822a9edce8..5b13ca1bd85f 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -135,6 +135,24 @@ struct ice_aqc_manage_mac_read_resp {
 	u8 mac_addr[ETH_ALEN];
 };
 
+/* Manage MAC address, write command - direct (0x0108) */
+struct ice_aqc_manage_mac_write {
+	u8 port_num;
+	u8 flags;
+#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
+#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
+#define ICE_AQC_MAN_MAC_WR_S		6
+#define ICE_AQC_MAN_MAC_WR_M		(3 << ICE_AQC_MAN_MAC_WR_S)
+#define ICE_AQC_MAN_MAC_UPDATE_LAA	0
+#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL	(BIT(0) << ICE_AQC_MAN_MAC_WR_S)
+	/* High 16 bits of MAC address in big endian order */
+	__be16 sah;
+	/* Low 32 bits of MAC address in big endian order */
+	__be32 sal;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* Clear PXE Command and response (direct 0x0110) */
 struct ice_aqc_clear_pxe {
 	u8 rx_cnt;
@@ -1214,6 +1232,7 @@ struct ice_aq_desc {
 		struct ice_aqc_q_shutdown q_shutdown;
 		struct ice_aqc_req_res res_owner;
 		struct ice_aqc_manage_mac_read mac_read;
+		struct ice_aqc_manage_mac_write mac_write;
 		struct ice_aqc_clear_pxe clear_pxe;
 		struct ice_aqc_list_caps get_cap;
 		struct ice_aqc_get_phy_caps get_phy;
@@ -1258,6 +1277,7 @@ enum ice_aq_err {
 	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
 	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
 	ICE_AQ_RC_EEXIST	= 13, /* object already exists */
+	ICE_AQ_RC_ENOSPC	= 16, /* No space left or allocation failure */
 };
 
 /* Admin Queue command opcodes */
@@ -1276,6 +1296,7 @@ enum ice_adminq_opc {
 
 	/* manage MAC address */
 	ice_aqc_opc_manage_mac_read			= 0x0107,
+	ice_aqc_opc_manage_mac_write			= 0x0108,
 
 	/* PXE */
 	ice_aqc_opc_clear_pxe_mode			= 0x0110,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 791f1eba5953..385f5d425d19 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1232,6 +1232,34 @@ enum ice_status ice_get_caps(struct ice_hw *hw)
 	return status;
 }
 
+/**
+ * ice_aq_manage_mac_write - manage MAC address write command
+ * @hw: pointer to the hw struct
+ * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
+ * @flags: flags to control write behavior
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to write MAC address to the NVM (0x0108).
+ */
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_write *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.mac_write;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write);
+
+	cmd->flags = flags;
+
+	/* Prep values for flags, sah, sal */
+	cmd->sah = htons(*((u16 *)mac_addr));
+	cmd->sal = htonl(*((u32 *)(mac_addr + 2)));
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
 /**
  * ice_aq_clear_pxe_mode
  * @hw: pointer to the hw struct
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index ad350a4921a1..9a5519130af1 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -58,6 +58,9 @@ enum ice_status
 ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
 		void *buf, u16 buf_size, struct ice_sq_cd *cd);
 enum ice_status ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_manage_mac_write(struct ice_hw *hw, u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd);
 enum ice_status ice_clear_pf_cfg(struct ice_hw *hw);
 enum ice_status
 ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool atomic_restart);
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 57acbcff740b..d23a91665b46 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -353,6 +353,18 @@ enum ice_tx_desc_len_fields {
 	ICE_TX_DESC_LEN_L4_LEN_S	= 14 /* 4 BITS */
 };
 
+#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S)
+#define ICE_TXD_QW1_IPLEN_M  (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S)
+#define ICE_TXD_QW1_L4LEN_M  (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S)
+
+/* Tx descriptor field limits in bytes */
+#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \
+			     ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD)
+#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \
+			    ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD)
+#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \
+			    ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD)
+
 #define ICE_TXD_QW1_TX_BUF_SZ_S	34
 #define ICE_TXD_QW1_L2TAG1_S	48
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2478dbe9bf4d..210b7910f1cd 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -7,7 +7,7 @@
 
 #include "ice.h"
 
-#define DRV_VERSION	"ice-0.0.1-k"
+#define DRV_VERSION	"ice-0.7.0-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
 const char ice_drv_ver[] = DRV_VERSION;
 static const char ice_driver_string[] = DRV_SUMMARY;
@@ -200,6 +200,48 @@ static int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
 	return 0;
 }
 
+/**
+ * ice_add_mac_to_sync_list - creates list of mac addresses to be synced
+ * @netdev: the net device on which the sync is happening
+ * @addr: mac address to sync
+ *
+ * This is a callback function which is called by the in kernel device sync
+ * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
+ * populates the tmp_sync_list, which is later used by ice_add_mac to add the
+ * mac filters from the hardware.
+ */
+static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_add_mac_to_unsync_list - creates list of mac addresses to be unsynced
+ * @netdev: the net device on which the unsync is happening
+ * @addr: mac address to unsync
+ *
+ * This is a callback function which is called by the in kernel device unsync
+ * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
+ * populates the tmp_unsync_list, which is later used by ice_remove_mac to
+ * delete the mac filters from the hardware.
+ */
+static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr))
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * ice_free_fltr_list - free filter lists helper
  * @dev: pointer to the device struct
@@ -218,6 +260,183 @@ static void ice_free_fltr_list(struct device *dev, struct list_head *h)
 	}
 }
 
+/**
+ * ice_vsi_fltr_changed - check if filter state changed
+ * @vsi: VSI to be checked
+ *
+ * returns true if filter state has changed, false otherwise.
+ */
+static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
+{
+	return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) ||
+	       test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) ||
+	       test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+}
+
+/**
+ * ice_vsi_sync_fltr - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ */
+static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	struct net_device *netdev = vsi->netdev;
+	bool promisc_forced_on = false;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status = 0;
+	u32 changed_flags = 0;
+	int err = 0;
+
+	if (!vsi->netdev)
+		return -EINVAL;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state))
+		usleep_range(1000, 2000);
+
+	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+	vsi->current_netdev_flags = vsi->netdev->flags;
+
+	INIT_LIST_HEAD(&vsi->tmp_sync_list);
+	INIT_LIST_HEAD(&vsi->tmp_unsync_list);
+
+	if (ice_vsi_fltr_changed(vsi)) {
+		clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+
+		/* grab the netdev's addr_list_lock */
+		netif_addr_lock_bh(netdev);
+		__dev_uc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		__dev_mc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		/* our temp lists are populated. release lock */
+		netif_addr_unlock_bh(netdev);
+	}
+
+	/* Remove mac addresses in the unsync list */
+	status = ice_remove_mac(hw, &vsi->tmp_unsync_list);
+	ice_free_fltr_list(dev, &vsi->tmp_unsync_list);
+	if (status) {
+		netdev_err(netdev, "Failed to delete MAC filters\n");
+		/* if we failed because of alloc failures, just bail */
+		if (status == ICE_ERR_NO_MEMORY) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	/* Add mac addresses in the sync list */
+	status = ice_add_mac(hw, &vsi->tmp_sync_list);
+	ice_free_fltr_list(dev, &vsi->tmp_sync_list);
+	if (status) {
+		netdev_err(netdev, "Failed to add MAC filters\n");
+		/* If there is no more space for new umac filters, vsi
+		 * should go into promiscuous mode. There should be some
+		 * space reserved for promiscuous filters.
+		 */
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
+		    !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC,
+				      vsi->state)) {
+			promisc_forced_on = true;
+			netdev_warn(netdev,
+				    "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
+				    vsi->vsi_num);
+		} else {
+			err = -EIO;
+			goto out;
+		}
+	}
+	/* check for changes in promiscuous modes */
+	if (changed_flags & IFF_ALLMULTI)
+		netdev_warn(netdev, "Unsupported configuration\n");
+
+	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
+	    test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
+		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+		if (vsi->current_netdev_flags & IFF_PROMISC) {
+			/* Apply TX filter rule to get traffic from VMs */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true,
+						  ICE_FLTR_TX);
+			if (status) {
+				netdev_err(netdev, "Error setting default VSI %i tx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags &= ~IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+			/* Apply RX filter rule to get traffic from wire */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, true,
+						  ICE_FLTR_RX);
+			if (status) {
+				netdev_err(netdev, "Error setting default VSI %i rx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags &= ~IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+		} else {
+			/* Clear TX filter rule to stop traffic from VMs */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false,
+						  ICE_FLTR_TX);
+			if (status) {
+				netdev_err(netdev, "Error clearing default VSI %i tx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags |= IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+			/* Clear filter RX to remove traffic from wire */
+			status = ice_cfg_dflt_vsi(hw, vsi->vsi_num, false,
+						  ICE_FLTR_RX);
+			if (status) {
+				netdev_err(netdev, "Error clearing default VSI %i rx rule\n",
+					   vsi->vsi_num);
+				vsi->current_netdev_flags |= IFF_PROMISC;
+				err = -EIO;
+				goto out_promisc;
+			}
+		}
+	}
+	goto exit;
+
+out_promisc:
+	set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+	goto exit;
+out:
+	/* if something went wrong then set the changed flag so we try again */
+	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+exit:
+	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	return err;
+}
+
+/**
+ * ice_sync_fltr_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ */
+static void ice_sync_fltr_subtask(struct ice_pf *pf)
+{
+	int v;
+
+	if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
+		return;
+
+	clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+
+	for (v = 0; v < pf->num_alloc_vsi; v++)
+		if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
+		    ice_vsi_sync_fltr(pf->vsi[v])) {
+			/* come back and try again later */
+			set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+			break;
+		}
+}
+
 /**
  * ice_is_reset_recovery_pending - schedule a reset
  * @state: pf state field
@@ -780,6 +999,7 @@ static void ice_service_task(struct work_struct *work)
 		return;
 	}
 
+	ice_sync_fltr_subtask(pf);
 	ice_watchdog_subtask(pf);
 	ice_clean_adminq_subtask(pf);
 
@@ -2491,6 +2711,7 @@ err_vectors:
 	ice_vsi_free_q_vectors(vsi);
 err_rings:
 	if (vsi->netdev) {
+		vsi->current_netdev_flags = 0;
 		unregister_netdev(vsi->netdev);
 		free_netdev(vsi->netdev);
 		vsi->netdev = NULL;
@@ -3300,6 +3521,197 @@ static void __exit ice_module_exit(void)
 }
 module_exit(ice_module_exit);
 
+/**
+ * ice_set_mac_address - NDO callback to set mac address
+ * @netdev: network interface device structure
+ * @pi: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_set_mac_address(struct net_device *netdev, void *pi)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct sockaddr *addr = pi;
+	enum ice_status status;
+	LIST_HEAD(a_mac_list);
+	LIST_HEAD(r_mac_list);
+	u8 flags = 0;
+	int err;
+	u8 *mac;
+
+	mac = (u8 *)addr->sa_data;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(netdev->dev_addr, mac)) {
+		netdev_warn(netdev, "already using mac %pM\n", mac);
+		return 0;
+	}
+
+	if (test_bit(__ICE_DOWN, pf->state) ||
+	    ice_is_reset_recovery_pending(pf->state)) {
+		netdev_err(netdev, "can't set mac %pM. device not ready\n",
+			   mac);
+		return -EBUSY;
+	}
+
+	/* When we change the mac address we also have to change the mac address
+	 * based filter rules that were created previously for the old mac
+	 * address. So first, we remove the old filter rule using ice_remove_mac
+	 * and then create a new filter rule using ice_add_mac. Note that for
+	 * both these operations, we first need to form a "list" of mac
+	 * addresses (even though in this case, we have only 1 mac address to be
+	 * added/removed) and this done using ice_add_mac_to_list. Depending on
+	 * the ensuing operation this "list" of mac addresses is either to be
+	 * added or removed from the filter.
+	 */
+	err = ice_add_mac_to_list(vsi, &r_mac_list, netdev->dev_addr);
+	if (err) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+	status = ice_remove_mac(hw, &r_mac_list);
+	if (status) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+	err = ice_add_mac_to_list(vsi, &a_mac_list, mac);
+	if (err) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+	status = ice_add_mac(hw, &a_mac_list);
+	if (status) {
+		err = -EADDRNOTAVAIL;
+		goto free_lists;
+	}
+
+free_lists:
+	/* free list entries */
+	ice_free_fltr_list(&pf->pdev->dev, &r_mac_list);
+	ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
+
+	if (err) {
+		netdev_err(netdev, "can't set mac %pM. filter update failed\n",
+			   mac);
+		return err;
+	}
+
+	/* change the netdev's mac address */
+	memcpy(netdev->dev_addr, mac, netdev->addr_len);
+	netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
+		   netdev->dev_addr);
+
+	/* write new mac address to the firmware */
+	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
+	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
+	if (status) {
+		netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
+			   mac);
+	}
+	return 0;
+}
+
+/**
+ * ice_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ */
+static void ice_set_rx_mode(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (!vsi)
+		return;
+
+	/* Set the flags to synchronize filters
+	 * ndo_set_rx_mode may be triggered even without a change in netdev
+	 * flags
+	 */
+	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	ice_service_task_schedule(vsi->back);
+}
+
+/**
+ * ice_fdb_add - add an entry to the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN id
+ * @flags: instructions from stack about fdb operation
+ */
+static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
+		       struct net_device *dev, const unsigned char *addr,
+		       u16 vid, u16 flags)
+{
+	int err;
+
+	if (vid) {
+		netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
+		return -EINVAL;
+	}
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+	else
+		err = -EINVAL;
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+/**
+ * ice_fdb_del - delete an entry from the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN id
+ */
+static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
+		       struct net_device *dev, const unsigned char *addr,
+		       __always_unused u16 vid)
+{
+	int err;
+
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr))
+		err = dev_uc_del(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_del(dev, addr);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+
 /**
  * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
  * @vsi: the vsi being changed
@@ -3690,6 +4102,8 @@ static int ice_vsi_cfg(struct ice_vsi *vsi)
 {
 	int err;
 
+	ice_set_rx_mode(vsi->netdev);
+
 	err = ice_restore_vlan(vsi);
 	if (err)
 		return err;
@@ -4379,6 +4793,30 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 	stats->rx_length_errors = vsi_stats->rx_length_errors;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/**
+ * ice_netpoll - polling "interrupt" handler
+ * @netdev: network interface device structure
+ *
+ * Used by netconsole to send skbs without having to re-enable interrupts.
+ * This is not called in the normal interrupt path.
+ */
+static void ice_netpoll(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	if (test_bit(__ICE_DOWN, vsi->state) ||
+	    !test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
+		return;
+
+	for (i = 0; i < vsi->num_q_vectors; i++)
+		ice_msix_clean_rings(0, vsi->q_vectors[i]);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
 /**
  * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
  * @vsi: VSI having NAPI disabled
@@ -4786,6 +5224,73 @@ clear_recovery:
 	set_bit(__ICE_RESET_RECOVERY_PENDING, pf->state);
 }
 
+/**
+ * ice_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u8 count = 0;
+
+	if (new_mtu == netdev->mtu) {
+		netdev_warn(netdev, "mtu is already %d\n", netdev->mtu);
+		return 0;
+	}
+
+	if (new_mtu < netdev->min_mtu) {
+		netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
+			   netdev->min_mtu);
+		return -EINVAL;
+	} else if (new_mtu > netdev->max_mtu) {
+		netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
+			   netdev->min_mtu);
+		return -EINVAL;
+	}
+	/* if a reset is in progress, wait for some time for it to complete */
+	do {
+		if (ice_is_reset_recovery_pending(pf->state)) {
+			count++;
+			usleep_range(1000, 2000);
+		} else {
+			break;
+		}
+
+	} while (count < 100);
+
+	if (count == 100) {
+		netdev_err(netdev, "can't change mtu. Device is busy\n");
+		return -EBUSY;
+	}
+
+	netdev->mtu = new_mtu;
+
+	/* if VSI is up, bring it down and then back up */
+	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		int err;
+
+		err = ice_down(vsi);
+		if (err) {
+			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			return err;
+		}
+
+		err = ice_up(vsi);
+		if (err) {
+			netdev_err(netdev, "change mtu if_up err %d\n", err);
+			return err;
+		}
+	}
+
+	netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
+	return 0;
+}
+
 /**
  * ice_set_rss - Set RSS keys and lut
  * @vsi: Pointer to VSI structure
@@ -4919,12 +5424,72 @@ static int ice_stop(struct net_device *netdev)
 	return 0;
 }
 
+/**
+ * ice_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t
+ice_features_check(struct sk_buff *skb,
+		   struct net_device __always_unused *netdev,
+		   netdev_features_t features)
+{
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(ICE_TXD_MACLEN_MAX))
+		goto out_rm_features;
+
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(ICE_TXD_IPLEN_MAX))
+		goto out_rm_features;
+
+	if (skb->encapsulation) {
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(ICE_TXD_L4LEN_MAX))
+			goto out_rm_features;
+
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(ICE_TXD_IPLEN_MAX))
+			goto out_rm_features;
+	}
+
+	return features;
+out_rm_features:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
 static const struct net_device_ops ice_netdev_ops = {
 	.ndo_open = ice_open,
 	.ndo_stop = ice_stop,
 	.ndo_start_xmit = ice_start_xmit,
+	.ndo_features_check = ice_features_check,
+	.ndo_set_rx_mode = ice_set_rx_mode,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = ice_netpoll,
+#endif /* CONFIG_NET_POLL_CONTROLLER */
 	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
 	.ndo_set_features = ice_set_features,
+	.ndo_fdb_add = ice_fdb_add,
+	.ndo_fdb_del = ice_fdb_del,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 5624df9eb6fc..723d15f1e90b 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1626,6 +1626,83 @@ ice_remove_mac_exit:
 	return status;
 }
 
+/**
+ * ice_cfg_dflt_vsi - add filter rule to set/unset given VSI as default
+ * VSI for the switch (represented by swid)
+ * @hw: pointer to the hardware structure
+ * @vsi_id: number of VSI to set as default
+ * @set: true to add the above mentioned switch rule, false to remove it
+ * @direction: ICE_FLTR_RX or ICE_FLTR_TX
+ */
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction)
+{
+	struct ice_aqc_sw_rules_elem *s_rule;
+	struct ice_fltr_info f_info;
+	enum ice_adminq_opc opcode;
+	enum ice_status status;
+	u16 s_rule_size;
+
+	s_rule_size = set ? ICE_SW_RULE_RX_TX_ETH_HDR_SIZE :
+			    ICE_SW_RULE_RX_TX_NO_HDR_SIZE;
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return ICE_ERR_NO_MEMORY;
+
+	memset(&f_info, 0, sizeof(f_info));
+
+	f_info.lkup_type = ICE_SW_LKUP_DFLT;
+	f_info.flag = direction;
+	f_info.fltr_act = ICE_FWD_TO_VSI;
+	f_info.fwd_id.vsi_id = vsi_id;
+
+	if (f_info.flag & ICE_FLTR_RX) {
+		f_info.src = hw->port_info->lport;
+		if (!set)
+			f_info.fltr_rule_id =
+				hw->port_info->dflt_rx_vsi_rule_id;
+	} else if (f_info.flag & ICE_FLTR_TX) {
+		f_info.src = vsi_id;
+		if (!set)
+			f_info.fltr_rule_id =
+				hw->port_info->dflt_tx_vsi_rule_id;
+	}
+
+	if (set)
+		opcode = ice_aqc_opc_add_sw_rules;
+	else
+		opcode = ice_aqc_opc_remove_sw_rules;
+
+	ice_fill_sw_rule(hw, &f_info, s_rule, opcode);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opcode, NULL);
+	if (status || !(f_info.flag & ICE_FLTR_TX_RX))
+		goto out;
+	if (set) {
+		u16 index = le16_to_cpu(s_rule->pdata.lkup_tx_rx.index);
+
+		if (f_info.flag & ICE_FLTR_TX) {
+			hw->port_info->dflt_tx_vsi_num = vsi_id;
+			hw->port_info->dflt_tx_vsi_rule_id = index;
+		} else if (f_info.flag & ICE_FLTR_RX) {
+			hw->port_info->dflt_rx_vsi_num = vsi_id;
+			hw->port_info->dflt_rx_vsi_rule_id = index;
+		}
+	} else {
+		if (f_info.flag & ICE_FLTR_TX) {
+			hw->port_info->dflt_tx_vsi_num = ICE_DFLT_VSI_INVAL;
+			hw->port_info->dflt_tx_vsi_rule_id = ICE_INVAL_ACT;
+		} else if (f_info.flag & ICE_FLTR_RX) {
+			hw->port_info->dflt_rx_vsi_num = ICE_DFLT_VSI_INVAL;
+			hw->port_info->dflt_rx_vsi_rule_id = ICE_INVAL_ACT;
+		}
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
 /**
  * ice_remove_vlan_internal - Remove one VLAN based filter rule
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 80b4e1951b32..6f4a0d159dbf 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -155,5 +155,7 @@ enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
 void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_id);
 enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
 enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+enum ice_status
+ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_id, bool set, u8 direction);
 
 #endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c45cdee4e03c..99c8a9a71b5e 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -10,6 +10,9 @@
 #include "ice_controlq.h"
 #include "ice_lan_tx_rx.h"
 
+#define ICE_BYTES_PER_WORD	2
+#define ICE_BYTES_PER_DWORD	4
+
 static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
 {
 	return test_bit(tc, (unsigned long *)&bitmap);
@@ -227,7 +230,9 @@ struct ice_port_info {
 	u8 port_state;
 #define ICE_SCHED_PORT_STATE_INIT	0x0
 #define ICE_SCHED_PORT_STATE_READY	0x1
+	u16 dflt_tx_vsi_rule_id;
 	u16 dflt_tx_vsi_num;
+	u16 dflt_rx_vsi_rule_id;
 	u16 dflt_rx_vsi_num;
 	struct ice_fc_info fc;
 	struct ice_mac_info mac;

From bfe040c3851ad09e0944de7b0973a71323610e23 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:30 -0700
Subject: [PATCH 1266/1678] i40e: move I40E_FLAG_FILTER_SYNC to a state bit

The I40E_FLAG_FILTER_SYNC flag is modified during run time possibly when
the RTNL lock is not held. Thus, it should not be part of pf->flags, and
instead should be using atomic bit operations in the pf->state field.

Create a __I40E_MACVLAN_SYNC_PENDING state bit, and use it instead of
the old I40E_FLAG_FILTER_SYNC flag.

This is part of a larger effort to remove the need for cmpxchg64 in
i40e_set_priv_flags().

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      |  3 ++-
 drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 1d33a8b3ef54..52f99142244b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -162,6 +162,7 @@ enum i40e_state_t {
 	__I40E_RESET_FAILED,
 	__I40E_PORT_SUSPENDED,
 	__I40E_VF_DISABLE,
+	__I40E_MACVLAN_SYNC_PENDING,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
@@ -516,7 +517,7 @@ struct i40e_pf {
 #define I40E_FLAG_MSIX_ENABLED			BIT_ULL(2)
 #define I40E_FLAG_RSS_ENABLED			BIT_ULL(3)
 #define I40E_FLAG_VMDQ_ENABLED			BIT_ULL(4)
-#define I40E_FLAG_FILTER_SYNC			BIT_ULL(5)
+/* Gap for BIT_ULL(5) */
 #define I40E_FLAG_SRIOV_ENABLED			BIT_ULL(6)
 #define I40E_FLAG_DCB_CAPABLE			BIT_ULL(7)
 #define I40E_FLAG_DCB_ENABLED			BIT_ULL(8)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 536ed8e8a96f..7fc2c6d89637 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1382,7 +1382,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 		hash_add(vsi->mac_filter_hash, &f->hlist, key);
 
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
 	}
 
 	/* If we're asked to add a filter that has been marked for removal, it
@@ -1432,7 +1432,7 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
 	}
 
 	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-	vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->state);
 }
 
 /**
@@ -1955,7 +1955,7 @@ static void i40e_set_rx_mode(struct net_device *netdev)
 	/* check for other flag changes */
 	if (vsi->current_netdev_flags != vsi->netdev->flags) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
 	}
 }
 
@@ -2577,9 +2577,10 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 {
 	int v;
 
-	if (!pf || !(pf->flags & I40E_FLAG_FILTER_SYNC))
+	if (!pf)
+		return;
+	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
 		return;
-	pf->flags &= ~I40E_FLAG_FILTER_SYNC;
 
 	for (v = 0; v < pf->num_alloc_vsi; v++) {
 		if (pf->vsi[v] &&
@@ -2588,7 +2589,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 
 			if (ret) {
 				/* come back and try again later */
-				pf->flags |= I40E_FLAG_FILTER_SYNC;
+				set_bit(__I40E_MACVLAN_SYNC_PENDING,
+					pf->state);
 				break;
 			}
 		}
@@ -12240,7 +12242,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 
 	if (f_count) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		pf->flags |= I40E_FLAG_FILTER_SYNC;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
 	}
 
 	/* Update VSI BW information */

From 2fcb12df7d2fa5a004fc3e7f589e58a08f7ed8c9 Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Thu, 17 Aug 2017 16:39:47 +0300
Subject: [PATCH 1267/1678] net/mlx5e: Expose PFC stall prevention counters

Add the needed capability bit and counters to device spec description.
Expose the following two counters in ethtool:

tx_pause_storm_warning_events: when the device is stalled for a period
longer than a pre-configured watermark, the counter increase, allowing
the debug utility an insight into current device status.

tx_pause_storm_error_events: when the device is stalled for a period
longer than a pre-configured timeout, the pause transmission is disabled,
and the counter increase.

Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 19 ++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c  |  3 ++
 include/linux/mlx5/device.h                   |  4 +++
 include/linux/mlx5/mlx5_ifc.h                 | 28 +++++++++++++++++--
 4 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 5f0f3493d747..2553c58dcf1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -754,7 +754,15 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
 	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
+static const struct counter_desc pport_pfc_stall_stats_desc[] = {
+	{ "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) },
+	{ "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) },
+};
+
 #define NUM_PPORT_PER_PRIO_PFC_COUNTERS		ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+#define NUM_PPORT_PFC_STALL_COUNTERS(priv)	(ARRAY_SIZE(pport_pfc_stall_stats_desc) * \
+						 MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) * \
+						 MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
 
 static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 {
@@ -790,7 +798,8 @@ static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv)
 {
 	return (mlx5e_query_global_pause_combined(priv) +
 		hweight8(mlx5e_query_pfc_combined(priv))) *
-		NUM_PPORT_PER_PRIO_PFC_COUNTERS;
+		NUM_PPORT_PER_PRIO_PFC_COUNTERS +
+		NUM_PPORT_PFC_STALL_COUNTERS(priv);
 }
 
 static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
@@ -818,6 +827,10 @@ static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv,
 		}
 	}
 
+	for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pport_pfc_stall_stats_desc[i].format);
+
 	return idx;
 }
 
@@ -845,6 +858,10 @@ static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv,
 		}
 	}
 
+	for (i = 0; i < NUM_PPORT_PFC_STALL_COUNTERS(priv); i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
+						  pport_pfc_stall_stats_desc, i);
+
 	return idx;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 9d11e92fb541..d7bb10ab2173 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -183,6 +183,9 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, debug))
+		mlx5_core_get_caps(dev, MLX5_CAP_DEBUG);
+
 	if (MLX5_CAP_GEN(dev, pcam_reg))
 		mlx5_get_pcam_reg(dev);
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index e5258ee4e38b..4b5939c78cdd 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1013,6 +1013,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_RESERVED,
 	MLX5_CAP_VECTOR_CALC,
 	MLX5_CAP_QOS,
+	MLX5_CAP_DEBUG,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1140,6 +1141,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_QOS(mdev, cap)\
 	MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap)
 
+#define MLX5_CAP_DEBUG(mdev, cap)\
+	MLX5_GET(debug_cap, mdev->caps.hca_cur[MLX5_CAP_DEBUG], cap)
+
 #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
 	MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 14ad84afe8ba..c7d50eccff9e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -593,6 +593,16 @@ struct mlx5_ifc_qos_cap_bits {
 	u8         reserved_at_100[0x700];
 };
 
+struct mlx5_ifc_debug_cap_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x2];
+	u8         stall_detect[0x1];
+	u8         reserved_at_23[0x1d];
+
+	u8         reserved_at_40[0x7c0];
+};
+
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         csum_cap[0x1];
 	u8         vlan_cap[0x1];
@@ -855,7 +865,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
 	u8         retransmission_q_counters[0x1];
-	u8         reserved_at_183[0x1];
+	u8         debug[0x1];
 	u8         modify_rq_counter_set_id[0x1];
 	u8         rq_delay_drop[0x1];
 	u8         max_qp_cnt[0xa];
@@ -1572,7 +1582,17 @@ struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
 
 	u8         rx_pause_transition_low[0x20];
 
-	u8         reserved_at_3c0[0x400];
+	u8         reserved_at_3c0[0x40];
+
+	u8         device_stall_minor_watermark_cnt_high[0x20];
+
+	u8         device_stall_minor_watermark_cnt_low[0x20];
+
+	u8         device_stall_critical_watermark_cnt_high[0x20];
+
+	u8         device_stall_critical_watermark_cnt_low[0x20];
+
+	u8         reserved_at_480[0x340];
 };
 
 struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
@@ -7874,8 +7894,10 @@ struct mlx5_ifc_peir_reg_bits {
 };
 
 struct mlx5_ifc_pcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7b];
+	u8         reserved_at_0[0x76];
 
+	u8         pfcc_mask[0x1];
+	u8         reserved_at_77[0x4];
 	u8         rx_buffer_fullness_counters[0x1];
 	u8         ptys_connector_type[0x1];
 	u8         reserved_at_7d[0x1];

From 41898c66ef02628326827e503c4fd78e71bc13f7 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:31 -0700
Subject: [PATCH 1268/1678] i40e: move I40E_FLAG_UDP_FILTER_SYNC to the state
 field

This flag is modified during run time, possibly even when the RTNL lock
is not held. Additionally it has a few places which should be using
test_and_set or test_and_clear atomic bit operations.

Create a new state bit __I40E_UDP_SYNC_PENDING and use it instead of the
ole I40E_FLAG_UDP_FILTER_SYNC flag.

This is part of a larger effort to remove the need for using cmpxchg64
in i40e_set_priv_flags.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      |  3 ++-
 drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 52f99142244b..f557c3c36080 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -163,6 +163,7 @@ enum i40e_state_t {
 	__I40E_PORT_SUSPENDED,
 	__I40E_VF_DISABLE,
 	__I40E_MACVLAN_SYNC_PENDING,
+	__I40E_UDP_FILTER_SYNC_PENDING,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
@@ -526,7 +527,7 @@ struct i40e_pf {
 #define I40E_FLAG_FD_SB_AUTO_DISABLED		BIT_ULL(11)
 #define I40E_FLAG_FD_ATR_AUTO_DISABLED		BIT_ULL(12)
 #define I40E_FLAG_MFP_ENABLED			BIT_ULL(13)
-#define I40E_FLAG_UDP_FILTER_SYNC		BIT_ULL(14)
+/* Gap for BIT_ULL(14) */
 #define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(15)
 #define I40E_FLAG_VEB_MODE_ENABLED		BIT_ULL(16)
 #define I40E_FLAG_VEB_STATS_ENABLED		BIT_ULL(17)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7fc2c6d89637..3177e82059e6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -9721,7 +9721,7 @@ static void i40e_sync_udp_filters(struct i40e_pf *pf)
 			pf->pending_udp_bitmap |= BIT_ULL(i);
 	}
 
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 }
 
 /**
@@ -9735,11 +9735,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 	u16 port;
 	int i;
 
-	if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC))
+	if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
 		return;
 
-	pf->flags &= ~I40E_FLAG_UDP_FILTER_SYNC;
-
 	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
 		if (pf->pending_udp_bitmap & BIT_ULL(i)) {
 			pf->pending_udp_bitmap &= ~BIT_ULL(i);
@@ -11439,7 +11437,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
 	/* New port: add it and mark its index in the bitmap */
 	pf->udp_ports[next_idx].port = port;
 	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 }
 
 /**
@@ -11480,7 +11478,7 @@ static void i40e_udp_tunnel_del(struct net_device *netdev,
 	 */
 	pf->udp_ports[idx].port = 0;
 	pf->pending_udp_bitmap |= BIT_ULL(idx);
-	pf->flags |= I40E_FLAG_UDP_FILTER_SYNC;
+	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
 
 	return;
 not_found:

From 134201aeadf3109ac9982ea81a79ec68442a07d1 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:32 -0700
Subject: [PATCH 1269/1678] i40e: move AUTO_DISABLED flags into the state field

The two Flow Directory auto disable flags are used at run time to mark
when the flow director features needed to be disabled. Thus the flags
could change even when the RTNL lock is not held.

They also have some code constructions which really should be
test_and_set or test_and_clear using atomic bit operations.

Create new state fields to mark this, and stop including them in
pf->flags.

This is part of a larger effort to remove the need for cmpxchg64 in
i40e_set_priv_flags().

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h        |  5 ++--
 .../net/ethernet/intel/i40e/i40e_ethtool.c    |  4 ++--
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 23 ++++++++-----------
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 21 ++++++++++-------
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index f557c3c36080..38633563dacd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -159,6 +159,8 @@ enum i40e_state_t {
 	__I40E_BAD_EEPROM,
 	__I40E_DOWN_REQUESTED,
 	__I40E_FD_FLUSH_REQUESTED,
+	__I40E_FD_ATR_AUTO_DISABLED,
+	__I40E_FD_SB_AUTO_DISABLED,
 	__I40E_RESET_FAILED,
 	__I40E_PORT_SUSPENDED,
 	__I40E_VF_DISABLE,
@@ -524,8 +526,7 @@ struct i40e_pf {
 #define I40E_FLAG_DCB_ENABLED			BIT_ULL(8)
 #define I40E_FLAG_FD_SB_ENABLED			BIT_ULL(9)
 #define I40E_FLAG_FD_ATR_ENABLED		BIT_ULL(10)
-#define I40E_FLAG_FD_SB_AUTO_DISABLED		BIT_ULL(11)
-#define I40E_FLAG_FD_ATR_AUTO_DISABLED		BIT_ULL(12)
+/* Gap for BIT_ULL(11) and BIT_ULL(12) */
 #define I40E_FLAG_MFP_ENABLED			BIT_ULL(13)
 /* Gap for BIT_ULL(14) */
 #define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(15)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 846a9d597e01..2a9c93091e3d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -3951,7 +3951,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 		return -EOPNOTSUPP;
 
-	if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)
+	if (test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
 		return -ENOSPC;
 
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
@@ -4460,7 +4460,7 @@ flags_complete:
 	/* Flush current ATR settings if ATR was disabled */
 	if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
 	    !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) {
-		pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3177e82059e6..a478153818bc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1083,13 +1083,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
 			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
 
 	if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
-	    !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED))
+	    !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
 		nsd->fd_sb_status = true;
 	else
 		nsd->fd_sb_status = false;
 
 	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
-	    !(pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
+	    !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		nsd->fd_atr_status = true;
 	else
 		nsd->fd_atr_status = false;
@@ -8144,12 +8144,10 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
  **/
 static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
 {
-	if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-		pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED;
+	if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
 		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
 			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
-	}
 }
 
 /**
@@ -8158,7 +8156,7 @@ static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
  **/
 static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
 {
-	if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
+	if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) {
 		/* ATR uses the same filtering logic as SB rules. It only
 		 * functions properly if the input set mask is at the default
 		 * settings. It is safe to restore the default input set
@@ -8168,7 +8166,6 @@ static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
 					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
 					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
 
-		pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
 		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
 			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
@@ -8291,7 +8288,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 	}
 
 	pf->fd_flush_timestamp = jiffies;
-	pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+	set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 	/* flush all filters */
 	wr32(&pf->hw, I40E_PFQF_CTL_1,
 	     I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
@@ -8311,7 +8308,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 		/* replay sideband filters */
 		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
 		if (!disable_atr && !pf->fd_tcp4_filter_cnt)
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+			clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
@@ -11291,20 +11288,18 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 			need_reset = true;
 			i40e_fdir_filter_exit(pf);
 		}
-		pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED |
-			       I40E_FLAG_FD_SB_AUTO_DISABLED);
+		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
 		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
 
 		/* reset fd counters */
 		pf->fd_add_err = 0;
 		pf->fd_atr_cnt = 0;
 		/* if ATR was auto disabled it can be re-enabled. */
-		if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) {
-			pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 			    (I40E_DEBUG_FD & pf->hw.debug_mask))
 				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
-		}
 	}
 	return need_reset;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 7ccd05bf4b06..797bcdd3504e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -336,7 +336,7 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
 		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
 		    I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
-		pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 	} else {
 		pf->fd_tcp4_filter_cnt--;
 	}
@@ -594,8 +594,14 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
 
 		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
-		    pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) {
-			pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED;
+		    test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
+			/* These set_bit() calls aren't atomic with the
+			 * test_bit() here, but worse case we potentially
+			 * disable ATR and queue a flush right after SB
+			 * support is re-enabled. That shouldn't cause an
+			 * issue in practice
+			 */
+			set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 			set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 		}
 
@@ -608,11 +614,10 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 		 */
 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-			    !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)) {
-				pf->flags |= I40E_FLAG_FD_SB_AUTO_DISABLED;
+			    !test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
+					      pf->state))
 				if (I40E_DEBUG_FD & pf->hw.debug_mask)
 					dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
-			}
 		}
 	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
@@ -2647,7 +2652,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
 		return;
 
-	if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED)
+	if (test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		return;
 
 	/* if sampling is disabled do nothing */
@@ -2687,7 +2692,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	th = (struct tcphdr *)(hdr.network + hlen);
 
 	/* Due to lack of space, no more new filters can be programmed */
-	if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED))
+	if (th->syn && test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		return;
 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
 		/* HW ATR eviction will take care of removing filters on FIN

From e1577c1c881b09e9f15a743a4a1907815b74d0f7 Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Mon, 20 Nov 2017 16:14:30 +0200
Subject: [PATCH 1270/1678] ethtool: Add support for configuring PFC stall
 prevention in ethtool

In the event where the device unexpectedly becomes unresponsive
for a long period of time, flow control mechanism may propagate
pause frames which will cause congestion spreading to the entire
network.
To prevent this scenario, when the device is stalled for a period
longer than a pre-configured timeout, flow control mechanisms are
automatically disabled.

This patch adds support for the ETHTOOL_PFC_STALL_PREVENTION
as a tunable.
This API provides support for configuring flow control storm prevention
timeout (msec).

Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Cc: Michal Kubecek <mkubecek@suse.cz>
Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/uapi/linux/ethtool.h | 4 ++++
 net/core/ethtool.c           | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 20da156aaf64..4ca65b56084f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -217,10 +217,14 @@ struct ethtool_value {
 	__u32	data;
 };
 
+#define PFC_STORM_PREVENTION_AUTO	0xffff
+#define PFC_STORM_PREVENTION_DISABLE	0
+
 enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
 	ETHTOOL_TX_COPYBREAK,
+	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
 	/*
 	 * Add your fresh new tubale attribute above and remember to update
 	 * tunable_strings[] in net/core/ethtool.c
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 157cd9efa4be..bb6e498c6e3d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -121,6 +121,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_ID_UNSPEC]     = "Unspec",
 	[ETHTOOL_RX_COPYBREAK]	= "rx-copybreak",
 	[ETHTOOL_TX_COPYBREAK]	= "tx-copybreak",
+	[ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
 };
 
 static const char
@@ -2311,6 +2312,11 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
 		    tuna->type_id != ETHTOOL_TUNABLE_U32)
 			return -EINVAL;
 		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		if (tuna->len != sizeof(u16) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U16)
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}

From 2afa609f5c970185a8cae73f6a4caadf97fbea54 Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Mon, 20 Nov 2017 18:06:20 +0200
Subject: [PATCH 1271/1678] net/mlx5e: PFC stall prevention support

Implement set/get functions to configure PFC stall prevention
timeout by tunables api through ethtool.
By default the stall prevention timeout is configured to 8 sec.
Timeout range is: 80-8000 msec.

Enabling stall prevention with the auto timeout will set
the timeout to 100 msec.

Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 57 +++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/port.c    | 64 ++++++++++++++++---
 include/linux/mlx5/mlx5_ifc.h                 | 17 +++--
 include/linux/mlx5/port.h                     |  6 ++
 4 files changed, 132 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc8048f68f11..62061fd23143 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1066,6 +1066,57 @@ static int mlx5e_get_rxnfc(struct net_device *netdev,
 	return err;
 }
 
+#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC		100
+#define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC		8000
+#define MLX5E_PFC_PREVEN_MINOR_PRECENT		85
+#define MLX5E_PFC_PREVEN_TOUT_MIN_MSEC		80
+#define MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout) \
+	max_t(u16, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC, \
+	      (critical_tout * MLX5E_PFC_PREVEN_MINOR_PRECENT) / 100)
+
+static int mlx5e_get_pfc_prevention_tout(struct net_device *netdev,
+					 u16 *pfc_prevention_tout)
+{
+	struct mlx5e_priv *priv    = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+		return -EOPNOTSUPP;
+
+	return mlx5_query_port_stall_watermark(mdev, pfc_prevention_tout, NULL);
+}
+
+static int mlx5e_set_pfc_prevention_tout(struct net_device *netdev,
+					 u16 pfc_preven)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 critical_tout;
+	u16 minor;
+
+	if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, pfcc_mask) ||
+	    !MLX5_CAP_DEBUG((priv)->mdev, stall_detect))
+		return -EOPNOTSUPP;
+
+	critical_tout = (pfc_preven == PFC_STORM_PREVENTION_AUTO) ?
+			MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC :
+			pfc_preven;
+
+	if (critical_tout != PFC_STORM_PREVENTION_DISABLE &&
+	    (critical_tout > MLX5E_PFC_PREVEN_TOUT_MAX_MSEC ||
+	     critical_tout < MLX5E_PFC_PREVEN_TOUT_MIN_MSEC)) {
+		netdev_info(netdev, "%s: pfc prevention tout not in range (%d-%d)\n",
+			    __func__, MLX5E_PFC_PREVEN_TOUT_MIN_MSEC,
+			    MLX5E_PFC_PREVEN_TOUT_MAX_MSEC);
+		return -EINVAL;
+	}
+
+	minor = MLX5E_DEVICE_STALL_MINOR_WATERMARK(critical_tout);
+	return mlx5_set_port_stall_watermark(mdev, critical_tout,
+					     minor);
+}
+
 static int mlx5e_get_tunable(struct net_device *dev,
 			     const struct ethtool_tunable *tuna,
 			     void *data)
@@ -1077,6 +1128,9 @@ static int mlx5e_get_tunable(struct net_device *dev,
 	case ETHTOOL_TX_COPYBREAK:
 		*(u32 *)data = priv->channels.params.tx_max_inline;
 		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		err = mlx5e_get_pfc_prevention_tout(dev, data);
+		break;
 	default:
 		err = -EINVAL;
 		break;
@@ -1118,6 +1172,9 @@ static int mlx5e_set_tunable(struct net_device *dev,
 			break;
 		mlx5e_switch_priv_channels(priv, &new_channels, NULL);
 
+		break;
+	case ETHTOOL_PFC_PREVENTION_TOUT:
+		err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
 		break;
 	default:
 		err = -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index c37d00cd472a..fa9d0760dd36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -483,6 +483,17 @@ int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
 
+static int mlx5_query_pfcc_reg(struct mlx5_core_dev *dev, u32 *out,
+			       u32 out_size)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    out_size, MLX5_REG_PFCC, 0, 0);
+}
+
 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
@@ -500,13 +511,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
 int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 			  u32 *rx_pause, u32 *tx_pause)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	MLX5_SET(pfcc_reg, in, local_port, 1);
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
 	if (err)
 		return err;
 
@@ -520,6 +528,49 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
 
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+				  u16 stall_critical_watermark,
+				  u16 stall_minor_watermark)
+{
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+
+	MLX5_SET(pfcc_reg, in, local_port, 1);
+	MLX5_SET(pfcc_reg, in, pptx_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, pprx_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, ppan_mask_n, 1);
+	MLX5_SET(pfcc_reg, in, critical_stall_mask, 1);
+	MLX5_SET(pfcc_reg, in, minor_stall_mask, 1);
+	MLX5_SET(pfcc_reg, in, device_stall_critical_watermark,
+		 stall_critical_watermark);
+	MLX5_SET(pfcc_reg, in, device_stall_minor_watermark, stall_minor_watermark);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+				    sizeof(out), MLX5_REG_PFCC, 0, 1);
+}
+
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+				    u16 *stall_critical_watermark,
+				    u16 *stall_minor_watermark)
+{
+	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
+	int err;
+
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
+	if (err)
+		return err;
+
+	if (stall_critical_watermark)
+		*stall_critical_watermark = MLX5_GET(pfcc_reg, out,
+						     device_stall_critical_watermark);
+
+	if (stall_minor_watermark)
+		*stall_minor_watermark = MLX5_GET(pfcc_reg, out,
+						  device_stall_minor_watermark);
+
+	return 0;
+}
+
 int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
 {
 	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
@@ -538,13 +589,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
 
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	MLX5_SET(pfcc_reg, in, local_port, 1);
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_PFCC, 0, 0);
+	err = mlx5_query_pfcc_reg(dev, out, sizeof(out));
 	if (err)
 		return err;
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c7d50eccff9e..f3200a9696d6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7833,7 +7833,11 @@ struct mlx5_ifc_pifr_reg_bits {
 struct mlx5_ifc_pfcc_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_at_10[0x10];
+	u8         reserved_at_10[0xb];
+	u8         ppan_mask_n[0x1];
+	u8         minor_stall_mask[0x1];
+	u8         critical_stall_mask[0x1];
+	u8         reserved_at_1e[0x2];
 
 	u8         ppan[0x4];
 	u8         reserved_at_24[0x4];
@@ -7843,17 +7847,22 @@ struct mlx5_ifc_pfcc_reg_bits {
 
 	u8         pptx[0x1];
 	u8         aptx[0x1];
-	u8         reserved_at_42[0x6];
+	u8         pptx_mask_n[0x1];
+	u8         reserved_at_43[0x5];
 	u8         pfctx[0x8];
 	u8         reserved_at_50[0x10];
 
 	u8         pprx[0x1];
 	u8         aprx[0x1];
-	u8         reserved_at_62[0x6];
+	u8         pprx_mask_n[0x1];
+	u8         reserved_at_63[0x5];
 	u8         pfcrx[0x8];
 	u8         reserved_at_70[0x10];
 
-	u8         reserved_at_80[0x80];
+	u8         device_stall_minor_watermark[0x10];
+	u8         device_stall_critical_watermark[0x10];
+
+	u8         reserved_at_a0[0x60];
 };
 
 struct mlx5_ifc_pelc_reg_bits {
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 035f0d4dc9fe..34aed6032f86 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -151,6 +151,12 @@ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx);
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
 			u8 *pfc_en_rx);
 
+int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev,
+				  u16 stall_critical_watermark,
+				  u16 stall_minor_watermark);
+int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev,
+				    u16 *stall_critical_watermark, u16 *stall_minor_watermark);
+
 int mlx5_max_tc(struct mlx5_core_dev *mdev);
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);

From 61c5b5c9178288a4caa3e39095aafb391c5100f6 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Sun, 7 Jan 2018 16:45:27 +0200
Subject: [PATCH 1272/1678] net/mlx5: Add support for QUERY_VNIC_ENV command

Add support for new FW command QUERY_VNIC_ENV.
The command is used by the driver to query vnic diagnostic statistics
from FW.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  2 +
 include/linux/mlx5/mlx5_ifc.h                 | 50 ++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index e9a1fbcc4adf..fe5428667ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -359,6 +359,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
 	case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
+	case MLX5_CMD_OP_QUERY_VNIC_ENV:
 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
@@ -501,6 +502,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT);
 	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID);
 	MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY);
+	MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV);
 	MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER);
 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3200a9696d6..52e373dd2679 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -143,6 +143,7 @@ enum {
 	MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT      = 0x763,
 	MLX5_CMD_OP_QUERY_HCA_VPORT_GID           = 0x764,
 	MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY          = 0x765,
+	MLX5_CMD_OP_QUERY_VNIC_ENV                = 0x76f,
 	MLX5_CMD_OP_QUERY_VPORT_COUNTER           = 0x770,
 	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
 	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
@@ -875,7 +876,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         vhca_group_manager[0x1];
 	u8         ib_virt[0x1];
 	u8         eth_virt[0x1];
-	u8         reserved_at_1a4[0x1];
+	u8         vnic_env_queue_counters[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
 	u8         eswitch_flow_table[0x1];
@@ -2386,6 +2387,24 @@ struct mlx5_ifc_xrc_srqc_bits {
 	u8         reserved_at_180[0x80];
 };
 
+struct mlx5_ifc_vnic_diagnostic_statistics_bits {
+	u8         counter_error_queues[0x20];
+
+	u8         total_error_queues[0x20];
+
+	u8         send_queue_priority_update_flow[0x20];
+
+	u8         reserved_at_60[0x20];
+
+	u8         nic_receive_steering_discard[0x40];
+
+	u8         receive_discard_vport_down[0x40];
+
+	u8         transmit_discard_vport_down[0x40];
+
+	u8         reserved_at_140[0xec0];
+};
+
 struct mlx5_ifc_traffic_counter_bits {
 	u8         packets[0x40];
 
@@ -3661,6 +3680,35 @@ struct mlx5_ifc_query_vport_state_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_query_vnic_env_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_vnic_diagnostic_statistics_bits vport_env;
+};
+
+enum {
+	MLX5_QUERY_VNIC_ENV_IN_OP_MOD_VPORT_DIAG_STATISTICS  = 0x0,
+};
+
+struct mlx5_ifc_query_vnic_env_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_at_41[0xf];
+	u8         vport_number[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_query_vport_counter_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];

From 5c298143be17f5100656b9c140af672c644116d9 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Tue, 26 Dec 2017 16:46:29 +0200
Subject: [PATCH 1273/1678] net/mlx5e: Add vnic steering drop statistics

Added the following packets drop counter:
Rx steering missed dropped packets - counts packets which were dropped
due to miss on NIC rx steering rules.
This counter will be shown on ethtool as a new counter called
rx_steer_missed_packets.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 65 +++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  5 ++
 include/linux/mlx5/mlx5_ifc.h                 |  3 +-
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 2553c58dcf1c..552510c03ef2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -211,6 +211,65 @@ static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv)
 	qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer);
 }
 
+#define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
+static const struct counter_desc vnic_env_stats_desc[] = {
+	{ "rx_steer_missed_packets",
+		VNIC_ENV_OFF(vport_env.nic_receive_steering_discard) },
+};
+
+#define NUM_VNIC_ENV_COUNTERS		ARRAY_SIZE(vnic_env_stats_desc)
+
+static int mlx5e_grp_vnic_env_get_num_stats(struct mlx5e_priv *priv)
+{
+	return MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard) ?
+		NUM_VNIC_ENV_COUNTERS : 0;
+}
+
+static int mlx5e_grp_vnic_env_fill_strings(struct mlx5e_priv *priv, u8 *data,
+					   int idx)
+{
+	int i;
+
+	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+		return idx;
+
+	for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       vnic_env_stats_desc[i].format);
+	return idx;
+}
+
+static int mlx5e_grp_vnic_env_fill_stats(struct mlx5e_priv *priv, u64 *data,
+					 int idx)
+{
+	int i;
+
+	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+		return idx;
+
+	for (i = 0; i < NUM_VNIC_ENV_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vnic.query_vnic_env_out,
+						  vnic_env_stats_desc, i);
+	return idx;
+}
+
+static void mlx5e_grp_vnic_env_update_stats(struct mlx5e_priv *priv)
+{
+	u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
+	int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+		return;
+
+	MLX5_SET(query_vnic_env_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+	MLX5_SET(query_vnic_env_in, in, other_vport, 0);
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
 #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
 static const struct counter_desc vport_stats_desc[] = {
 	{ "rx_vport_unicast_packets",
@@ -1111,6 +1170,12 @@ const struct mlx5e_stats_grp mlx5e_stats_grps[] = {
 		.update_stats_mask = MLX5E_NDO_UPDATE_STATS,
 		.update_stats = mlx5e_grp_q_update_stats,
 	},
+	{
+		.get_num_stats = mlx5e_grp_vnic_env_get_num_stats,
+		.fill_strings = mlx5e_grp_vnic_env_fill_strings,
+		.fill_stats = mlx5e_grp_vnic_env_fill_stats,
+		.update_stats = mlx5e_grp_vnic_env_update_stats,
+	},
 	{
 		.get_num_stats = mlx5e_grp_vport_get_num_stats,
 		.fill_strings = mlx5e_grp_vport_fill_strings,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 0b3320a2b072..847388ff8ca8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -99,6 +99,10 @@ struct mlx5e_qcounter_stats {
 	u32 rx_out_of_buffer;
 };
 
+struct mlx5e_vnic_env_stats {
+	__be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
+};
+
 #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
 						vstats->query_vport_out, c)
 
@@ -201,6 +205,7 @@ struct mlx5e_ch_stats {
 struct mlx5e_stats {
 	struct mlx5e_sw_stats sw;
 	struct mlx5e_qcounter_stats qcnt;
+	struct mlx5e_vnic_env_stats vnic;
 	struct mlx5e_vport_stats vport;
 	struct mlx5e_pport_stats pport;
 	struct rtnl_link_stats64 vf_vport;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 52e373dd2679..9202113f552c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1008,7 +1008,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_330[0xb];
 	u8         log_max_xrcd[0x5];
 
-	u8         reserved_at_340[0x8];
+	u8         nic_receive_steering_discard[0x1];
+	u8         reserved_at_341[0x7];
 	u8         log_max_flow_counter_bulk[0x8];
 	u8         max_flow_counter_15_0[0x10];
 

From aaabd0783b1cf46569f5fc1c60d79709815497fc Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Sun, 14 Jan 2018 00:56:25 +0200
Subject: [PATCH 1274/1678] net/mlx5: Add packet dropped while vport down
 statistics

Added the following packets dropped while vport down statistics:

Rx dropped while vport down - counts packets which were steered by
e-switch to a vport, but dropped since the vport was down. This counter
will be shown on ip link tool as part of the vport rx_dropped counter.

Tx dropped while vport down - counts packets which were transmitted by
a vport, but dropped due to vport logical link down. This counter
will be shown on ip link tool as part of the vport tx_dropped counter.

The counters are read from FW by command QUERY_VNIC_ENV.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch.c | 31 ++++++++++++++++---
 .../net/ethernet/mellanox/mlx5/core/vport.c   | 26 ++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                 |  4 ++-
 include/linux/mlx5/vport.h                    |  3 ++
 4 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 77b7272eaaa8..332bc56306bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2096,17 +2096,19 @@ unlock:
 	return err;
 }
 
-static void mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
-						int vport_idx,
-						struct mlx5_vport_drop_stats *stats)
+static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
+					       int vport_idx,
+					       struct mlx5_vport_drop_stats *stats)
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	struct mlx5_vport *vport = &esw->vports[vport_idx];
+	u64 rx_discard_vport_down, tx_discard_vport_down;
 	u64 bytes = 0;
 	u16 idx = 0;
+	int err = 0;
 
 	if (!vport->enabled || esw->mode != SRIOV_LEGACY)
-		return;
+		return 0;
 
 	if (vport->egress.drop_counter) {
 		idx = vport->egress.drop_counter->id;
@@ -2117,6 +2119,23 @@ static void mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
 		idx = vport->ingress.drop_counter->id;
 		mlx5_fc_query(dev, idx, &stats->tx_dropped, &bytes);
 	}
+
+	if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+	    !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+		return 0;
+
+	err = mlx5_query_vport_down_stats(dev, vport_idx,
+					  &rx_discard_vport_down,
+					  &tx_discard_vport_down);
+	if (err)
+		return err;
+
+	if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+		stats->rx_dropped += rx_discard_vport_down;
+	if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+		stats->tx_dropped += tx_discard_vport_down;
+
+	return 0;
 }
 
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
@@ -2180,7 +2199,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 	vf_stats->broadcast =
 		MLX5_GET_CTR(out, received_eth_broadcast.packets);
 
-	mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+	err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+	if (err)
+		goto free_out;
 	vf_stats->rx_dropped = stats.rx_dropped;
 	vf_stats->tx_dropped = stats.tx_dropped;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index dfe36cf6fbea..177e076b8d17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1070,6 +1070,32 @@ free:
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
 
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+				u64 *rx_discard_vport_down,
+				u64 *tx_discard_vport_down)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+	int err;
+
+	MLX5_SET(query_vnic_env_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
+	MLX5_SET(query_vnic_env_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*rx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+					    vport_env.receive_discard_vport_down);
+	*tx_discard_vport_down = MLX5_GET64(query_vnic_env_out, out,
+					    vport_env.transmit_discard_vport_down);
+	return 0;
+}
+
 int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 				       u8 other_vport, u8 port_num,
 				       int vf,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9202113f552c..1f3483d40055 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1009,7 +1009,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_xrcd[0x5];
 
 	u8         nic_receive_steering_discard[0x1];
-	u8         reserved_at_341[0x7];
+	u8         receive_discard_vport_down[0x1];
+	u8         transmit_discard_vport_down[0x1];
+	u8         reserved_at_343[0x5];
 	u8         log_max_flow_counter_bulk[0x8];
 	u8         max_flow_counter_15_0[0x10];
 
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 64e193e87394..9208cb8809ac 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -107,6 +107,9 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
+				u64 *rx_discard_vport_down,
+				u64 *tx_discard_vport_down);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 				  int vf, u8 port_num, void *out,
 				  size_t out_sz);

From 7cbaf9a3ea09ea36c7101e7b244f734f9c09c72b Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 8 Feb 2018 15:09:57 +0200
Subject: [PATCH 1275/1678] net/mlx5e: Add interface down dropped packets
 statistics

Added the following packets drop counter:
Rx interface down dropped packets - counts packets which were received
while the ETH interface was down.
This counter will be shown on ethtool as a new counter called
rx_if_down_packets.

The implementation allocates a q_counter for drop rq which gets all the
received traffic while the interface is down.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  3 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 46 +++++++++++-------
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 48 ++++++++++++++-----
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  1 +
 4 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4c9360b25532..48e0b2a747d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -781,7 +781,8 @@ struct mlx5e_priv {
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
 	struct hwtstamp_config     tstamp;
-	u16 q_counter;
+	u16                        q_counter;
+	u16                        drop_rq_q_counter;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	struct mlx5e_dcbx          dcbx;
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index da94c8cba5ee..f8bc3bcdf046 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -615,8 +615,7 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
 static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state,
 				 int next_state)
 {
-	struct mlx5e_channel *c = rq->channel;
-	struct mlx5_core_dev *mdev = c->mdev;
+	struct mlx5_core_dev *mdev = rq->mdev;
 
 	void *in;
 	void *rqc;
@@ -1768,14 +1767,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	param->wq.linear = 1;
 }
 
-static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
+static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 				      struct mlx5e_rq_param *param)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
 	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
+	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
 
 	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
 }
@@ -2643,15 +2644,16 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
 	return mlx5e_alloc_cq_common(mdev, param, cq);
 }
 
-static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev,
+static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
 			      struct mlx5e_rq *drop_rq)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_cq_param cq_param = {};
 	struct mlx5e_rq_param rq_param = {};
 	struct mlx5e_cq *cq = &drop_rq->cq;
 	int err;
 
-	mlx5e_build_drop_rq_param(mdev, &rq_param);
+	mlx5e_build_drop_rq_param(priv, &rq_param);
 
 	err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param);
 	if (err)
@@ -2669,6 +2671,10 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev,
 	if (err)
 		goto err_free_rq;
 
+	err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+	if (err)
+		mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
+
 	return 0;
 
 err_free_rq:
@@ -4183,7 +4189,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	mlx5e_ipsec_build_netdev(priv);
 }
 
-static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
+static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
@@ -4193,14 +4199,21 @@ static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
 		mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
 		priv->q_counter = 0;
 	}
+
+	err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter);
+	if (err) {
+		mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err);
+		priv->drop_rq_q_counter = 0;
+	}
 }
 
-static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv)
+static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
 {
-	if (!priv->q_counter)
-		return;
+	if (priv->q_counter)
+		mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
 
-	mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+	if (priv->drop_rq_q_counter)
+		mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
 }
 
 static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
@@ -4439,18 +4452,18 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 	if (err)
 		goto out;
 
-	err = mlx5e_open_drop_rq(mdev, &priv->drop_rq);
+	mlx5e_create_q_counters(priv);
+
+	err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
 	if (err) {
 		mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
-		goto err_cleanup_tx;
+		goto err_destroy_q_counters;
 	}
 
 	err = profile->init_rx(priv);
 	if (err)
 		goto err_close_drop_rq;
 
-	mlx5e_create_q_counter(priv);
-
 	if (profile->enable)
 		profile->enable(priv);
 
@@ -4459,7 +4472,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 err_close_drop_rq:
 	mlx5e_close_drop_rq(&priv->drop_rq);
 
-err_cleanup_tx:
+err_destroy_q_counters:
+	mlx5e_destroy_q_counters(priv);
 	profile->cleanup_tx(priv);
 
 out:
@@ -4476,9 +4490,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
 		profile->disable(priv);
 	flush_workqueue(priv->wq);
 
-	mlx5e_destroy_q_counter(priv);
 	profile->cleanup_rx(priv);
 	mlx5e_close_drop_rq(&priv->drop_rq);
+	mlx5e_destroy_q_counters(priv);
 	profile->cleanup_tx(priv);
 	cancel_delayed_work_sync(&priv->update_stats_work);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 552510c03ef2..c0dab9a8969e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -170,11 +170,24 @@ static const struct counter_desc q_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
 };
 
+static const struct counter_desc drop_rq_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_if_down_packets) },
+};
+
 #define NUM_Q_COUNTERS			ARRAY_SIZE(q_stats_desc)
+#define NUM_DROP_RQ_COUNTERS		ARRAY_SIZE(drop_rq_stats_desc)
 
 static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv)
 {
-	return priv->q_counter ? NUM_Q_COUNTERS : 0;
+	int num_stats = 0;
+
+	if (priv->q_counter)
+		num_stats += NUM_Q_COUNTERS;
+
+	if (priv->drop_rq_q_counter)
+		num_stats += NUM_DROP_RQ_COUNTERS;
+
+	return num_stats;
 }
 
 static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
@@ -182,7 +195,13 @@ static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx)
 	int i;
 
 	for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       q_stats_desc[i].format);
+
+	for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       drop_rq_stats_desc[i].format);
+
 	return idx;
 }
 
@@ -191,7 +210,11 @@ static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
 	int i;
 
 	for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++)
-		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i);
+		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+						   q_stats_desc, i);
+	for (i = 0; i < NUM_DROP_RQ_COUNTERS && priv->drop_rq_q_counter; i++)
+		data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+						   drop_rq_stats_desc, i);
 	return idx;
 }
 
@@ -199,16 +222,17 @@ static void mlx5e_grp_q_update_stats(struct mlx5e_priv *priv)
 {
 	struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
 	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
-	int err;
 
-	if (!priv->q_counter)
-		return;
-
-	err = mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out, sizeof(out));
-	if (err)
-		return;
-
-	qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out, out, out_of_buffer);
+	if (priv->q_counter &&
+	    !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out,
+				       sizeof(out)))
+		qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+						  out, out_of_buffer);
+	if (priv->drop_rq_q_counter &&
+	    !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0,
+				       out, sizeof(out)))
+		qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out,
+						    out_of_buffer);
 }
 
 #define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 847388ff8ca8..43a72efa28c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -97,6 +97,7 @@ struct mlx5e_sw_stats {
 
 struct mlx5e_qcounter_stats {
 	u32 rx_out_of_buffer;
+	u32 rx_if_down_packets;
 };
 
 struct mlx5e_vnic_env_stats {

From aa24670ef66cb38e667d7bb039f5ce29d926f2e0 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 30 Jan 2018 14:13:28 +0000
Subject: [PATCH 1276/1678] net/mlx5: E-Switch, Use same source for offloaded
 actions check

Align the checks for modify header and encap actions with the
rest of the code.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 0a8303c1b52f..21ebe3e80e6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -88,10 +88,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
 		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		flow_act.modify_id = attr->mod_hdr_id;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 		flow_act.encap_id = attr->encap_id;
 
 	rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,

From 0c06897a9ac7e2db9ad2df15bc6511e8ab88378f Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 28 Jan 2018 20:14:20 +0200
Subject: [PATCH 1277/1678] net/mlx5: Add core support for vlan push/pop
 steering action

Newer NICs (ConnectX-5 and onward) can apply vlan pop or push as an
action taking place during flow steering. Add the core bits for that.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/diag/fs_tracepoint.h      |  2 ++
 .../net/ethernet/mellanox/mlx5/core/eswitch.h    |  3 ---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 10 +++++++++-
 .../net/ethernet/mellanox/mlx5/core/fs_core.c    |  4 +++-
 include/linux/mlx5/fs.h                          |  7 +++++++
 include/linux/mlx5/mlx5_ifc.h                    | 16 ++++++++++++++--
 6 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index a6ba57fbb414..09f178a3fcab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -136,6 +136,8 @@ TRACE_EVENT(mlx5_fs_del_fg,
 	{MLX5_FLOW_CONTEXT_ACTION_ENCAP,	 "ENCAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_DECAP,	 "DECAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_POP,	 "VLAN_POP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
 
 TRACE_EVENT(mlx5_fs_set_fte,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 98d2177d0806..a435eb7971c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -227,9 +227,6 @@ enum {
 	SET_VLAN_INSERT	= BIT(1)
 };
 
-#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x4000
-#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000
-
 struct mlx5_esw_flow_attr {
 	struct mlx5_eswitch_rep *in_rep;
 	struct mlx5_eswitch_rep *out_rep;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 645f83cac34d..ef5afd7c9325 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -317,7 +317,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 		fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
 	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
 	struct mlx5_flow_rule *dst;
-	void *in_flow_context;
+	void *in_flow_context, *vlan;
 	void *in_match_value;
 	void *in_dests;
 	u32 *in;
@@ -340,11 +340,19 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
 	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
 	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
 	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
 	MLX5_SET(flow_context, in_flow_context, modify_header_id,
 		 fte->action.modify_id);
+
+	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan.ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan.vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan.prio);
+
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, sizeof(fte->val));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 3ba07c7096ef..de51e7c39bc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1439,7 +1439,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
 			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
-			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR))
+			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
 		return true;
 
 	return false;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index b957e52434f8..47aecc4fa8c2 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -142,6 +142,12 @@ struct mlx5_flow_group *
 mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in);
 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
+struct mlx5_fs_vlan {
+        u16 ethtype;
+        u16 vid;
+        u8  prio;
+};
+
 struct mlx5_flow_act {
 	u32 action;
 	bool has_flow_tag;
@@ -149,6 +155,7 @@ struct mlx5_flow_act {
 	u32 encap_id;
 	u32 modify_id;
 	uintptr_t esp_id;
+	struct mlx5_fs_vlan vlan;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1f3483d40055..c19e611d2782 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -314,7 +314,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         flow_table_modify[0x1];
 	u8         encap[0x1];
 	u8         decap[0x1];
-	u8         reserved_at_9[0x17];
+	u8         reserved_at_9[0x1];
+	u8         pop_vlan[0x1];
+	u8         push_vlan[0x1];
+	u8         reserved_at_c[0x14];
 
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
@@ -2311,10 +2314,19 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
 	MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
+};
+
+struct mlx5_ifc_vlan_bits {
+	u8         ethtype[0x10];
+	u8         prio[0x3];
+	u8         cfi[0x1];
+	u8         vid[0xc];
 };
 
 struct mlx5_ifc_flow_context_bits {
-	u8         reserved_at_0[0x20];
+	struct mlx5_ifc_vlan_bits push_vlan;
 
 	u8         group_id[0x20];
 

From 6acfbf38a98a4c455f5b8c827854e1dc201be745 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 31 Jan 2018 18:36:03 +0200
Subject: [PATCH 1278/1678] net/mlx5e: Offload tc vlan push/pop using HW action

Currently, we are emulating the offload of vlan push/pop actions using
global setup as done by commit f5f82476090f ("net/mlx5: E-Switch, Support
VLAN actions in the offloads mode"). With newer NICs, we can apply a flow
action for that matter, do that while keeping the emulated path for the
older HW brands.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 15 +++++++----
 .../net/ethernet/mellanox/mlx5/core/eswitch.h | 10 ++++++-
 .../mellanox/mlx5/core/eswitch_offloads.c     | 26 +++++++++++++++----
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 7c33df2034f0..3e4a7e81b67f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2530,12 +2530,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
 			} else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
-				if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
-				    tcf_vlan_push_prio(a))
-					return -EOPNOTSUPP;
-
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
-				attr->vlan = tcf_vlan_push_vid(a);
+				attr->vlan_vid = tcf_vlan_push_vid(a);
+				if (mlx5_eswitch_vlan_actions_supported(priv->mdev)) {
+					attr->vlan_prio = tcf_vlan_push_prio(a);
+					attr->vlan_proto = tcf_vlan_push_proto(a);
+					if (!attr->vlan_proto)
+						attr->vlan_proto = htons(ETH_P_8021Q);
+				} else if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) ||
+					   tcf_vlan_push_prio(a)) {
+					return -EOPNOTSUPP;
+				}
 			} else { /* action is TCA_VLAN_ACT_MODIFY */
 				return -EOPNOTSUPP;
 			}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a435eb7971c6..4cd773fa55e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -232,7 +232,9 @@ struct mlx5_esw_flow_attr {
 	struct mlx5_eswitch_rep *out_rep;
 
 	int	action;
-	u16	vlan;
+	__be16	vlan_proto;
+	u16	vlan_vid;
+	u8	vlan_prio;
 	bool	vlan_handled;
 	u32	encap_id;
 	u32	mod_hdr_id;
@@ -255,6 +257,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  int vport, u16 vlan, u8 qos, u8 set_flags);
 
+static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, pop_vlan) &&
+	       MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan);
+}
+
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
 #define esw_info(dev, format, ...)				\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 21ebe3e80e6e..35e256eb2f6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -58,8 +58,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (esw->mode != SRIOV_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	/* per flow vlan pop/push is emulated, don't set that into the firmware */
-	flow_act.action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	flow_act.action = attr->action;
+	/* if per flow vlan pop/push is emulated, don't set that into the firmware */
+	if (!mlx5_eswitch_vlan_actions_supported(esw->dev))
+		flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+				     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+		flow_act.vlan.ethtype = ntohs(attr->vlan_proto);
+		flow_act.vlan.vid = attr->vlan_vid;
+		flow_act.vlan.prio = attr->vlan_prio;
+	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -185,7 +193,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
 	/* protects against (1) setting rules with different vlans to push and
 	 * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
 	 */
-	if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan))
+	if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid))
 		goto out_notsupp;
 
 	return 0;
@@ -202,6 +210,10 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 	bool push, pop, fwd;
 	int err = 0;
 
+	/* nop if we're on the vlan push/pop non emulation mode */
+	if (mlx5_eswitch_vlan_actions_supported(esw->dev))
+		return 0;
+
 	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
 	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
@@ -239,11 +251,11 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 		if (vport->vlan_refcount)
 			goto skip_set_push;
 
-		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0,
+		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan_vid, 0,
 						    SET_VLAN_INSERT | SET_VLAN_STRIP);
 		if (err)
 			goto out;
-		vport->vlan = attr->vlan;
+		vport->vlan = attr->vlan_vid;
 skip_set_push:
 		vport->vlan_refcount++;
 	}
@@ -261,6 +273,10 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 	bool push, pop, fwd;
 	int err = 0;
 
+	/* nop if we're on the vlan push/pop non emulation mode */
+	if (mlx5_eswitch_vlan_actions_supported(esw->dev))
+		return 0;
+
 	if (!attr->vlan_handled)
 		return 0;
 

From 957f6ba8adc7be401a74ccff427e4cfd88d3bfcb Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 2 Jan 2018 16:49:56 +0200
Subject: [PATCH 1279/1678] net/mlx5: Protect from command bit overflow

The system with CONFIG_UBSAN enabled on produces the following error
during driver initialization. The reason to it that max_reg_cmds can be
larger enough to cause to "1 << max_reg_cmds" overflow the unsigned long.

================================================================================
UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlx5/core/cmd.c:1805:42
signed integer overflow:
-2147483648 - 1 cannot be represented in type 'int'
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2-00032-g06cda2358d9b-dirty #724
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
Call Trace:
 dump_stack+0xe9/0x18f
 ? dma_virt_alloc+0x81/0x81
 ubsan_epilogue+0xe/0x4e
 handle_overflow+0x187/0x20c
 mlx5_cmd_init+0x73a/0x12b0
 mlx5_load_one+0x1c3d/0x1d30
 init_one+0xd02/0xf10
 pci_device_probe+0x26c/0x3b0
 driver_probe_device+0x622/0xb40
 __driver_attach+0x175/0x1b0
 bus_for_each_dev+0xef/0x190
 bus_add_driver+0x2db/0x490
 driver_register+0x16b/0x1e0
 __pci_register_driver+0x177/0x1b0
 init+0x6d/0x92
 do_one_initcall+0x15b/0x270
 kernel_init_freeable+0x2d8/0x3d0
 kernel_init+0x14/0x190
 ret_from_fork+0x24/0x30
================================================================================

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index fe5428667ad1..21cd1703a862 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1804,7 +1804,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
 	cmd->checksum_disabled = 1;
 	cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
-	cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
+	cmd->bitmask = (1UL << cmd->max_reg_cmds) - 1;
 
 	cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
 	if (cmd->cmdif_rev > CMD_IF_REV) {

From acac5ec0c2e77b79c7861db43d50640999d5d37b Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 11 Jan 2018 16:24:06 +0200
Subject: [PATCH 1280/1678] net/mlx5e: Remove redundant check in get ethtool
 stats

ethtool core code makes sure data isn't NULL before calling
get_ethtool_stats, testing it again in the driver is redundant.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 62061fd23143..d415e67b557b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -203,9 +203,6 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 {
 	int i, idx = 0;
 
-	if (!data)
-		return;
-
 	mutex_lock(&priv->state_lock);
 	mlx5e_update_stats(priv);
 	mutex_unlock(&priv->state_lock);

From 707129dceaf4b8c4721425d75e855aed4fd7b832 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 31 Jan 2018 14:45:40 +0200
Subject: [PATCH 1281/1678] net/mlx5e: Make choose LRO timeout function static

The function is used in en_main.c only, we can make it static and remove
its declaration from en.h

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 48e0b2a747d9..294bc9f175a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1062,7 +1062,6 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
 void mlx5e_update_stats_work(struct work_struct *work);
-u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
 
 int mlx5e_bits_invert(unsigned long a, int size);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f8bc3bcdf046..eb38a35564c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3967,7 +3967,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 }
 
-u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
 {
 	int i;
 

From be0f780bc50a00216ca9c7dc63485e2fc31bf8c8 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Thu, 11 Jan 2018 18:46:20 +0200
Subject: [PATCH 1282/1678] net/mlx5e: Add a helper macro in set features ndo

Add a new macro to prevent copy-pasting the same code for each new
feature.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 26 ++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index eb38a35564c7..fb936a978f20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3242,24 +3242,20 @@ static int mlx5e_set_features(struct net_device *netdev,
 			      netdev_features_t features)
 {
 	netdev_features_t oper_features = netdev->features;
-	int err;
+	int err = 0;
 
-	err  = mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_LRO, set_feature_lro);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_HW_VLAN_CTAG_FILTER,
+#define MLX5E_HANDLE_FEATURE(feature, handler) \
+	mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
+
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
 				    set_feature_cvlan_filter);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_HW_TC, set_feature_tc_num_filters);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_RXALL, set_feature_rx_all);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_RXFCS, set_feature_rx_fcs);
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
 #ifdef CONFIG_RFS_ACCEL
-	err |= mlx5e_handle_feature(netdev, &oper_features, features,
-				    NETIF_F_NTUPLE, set_feature_arfs);
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
 #endif
 
 	if (err) {

From 71186172b71fe7266eaa768f028c20725202dfd5 Mon Sep 17 00:00:00 2001
From: Aviv Heller <avivh@mellanox.com>
Date: Thu, 17 Aug 2017 16:44:16 +0300
Subject: [PATCH 1283/1678] net/mlx5e: Add VLAN offload features to
 hw_enc_features

We support outer VLAN offload in driver and HW regardless of whether
an encapsulation is present in the next headers.

Exposing this in hw_enc_features will allow us to offload outer VLANs
in cases where encapsulation protocols like VXLAN and IPsec are used.

Signed-off-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fb936a978f20..1d36d7569f44 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4106,6 +4106,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->vlan_features    |= NETIF_F_RXCSUM;
 	netdev->vlan_features    |= NETIF_F_RXHASH;
 
+	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
+	netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
+
 	if (!!MLX5_CAP_ETH(mdev, lro_cap))
 		netdev->vlan_features    |= NETIF_F_LRO;
 

From 0605c45ce5f33a51e0b23e1d36f2e56db3c95f58 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:33 -0700
Subject: [PATCH 1284/1678] i40e: move I40E_FLAG_TEMP_LINK_POLLING to state
 field

This flag is modified outside of the RTNL lock and thus should not be
part of the pf->flags variable.

Use a state bit instead, so that we can use atomic bit operations.

This is part of a larger effort to remove cmpxchg64 in
i40e_set_priv_flags()

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      | 3 ++-
 drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 38633563dacd..1ffe802d489f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -166,6 +166,7 @@ enum i40e_state_t {
 	__I40E_VF_DISABLE,
 	__I40E_MACVLAN_SYNC_PENDING,
 	__I40E_UDP_FILTER_SYNC_PENDING,
+	__I40E_TEMP_LINK_POLLING,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
@@ -534,7 +535,7 @@ struct i40e_pf {
 #define I40E_FLAG_VEB_STATS_ENABLED		BIT_ULL(17)
 #define I40E_FLAG_LINK_POLLING_ENABLED		BIT_ULL(18)
 #define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT_ULL(19)
-#define I40E_FLAG_TEMP_LINK_POLLING		BIT_ULL(20)
+/* Gap for BIT_ULL(20) */
 #define I40E_FLAG_LEGACY_RX			BIT_ULL(21)
 #define I40E_FLAG_PTP				BIT_ULL(22)
 #define I40E_FLAG_IWARP_ENABLED			BIT_ULL(23)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index a478153818bc..307f5bf65708 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8432,13 +8432,12 @@ static void i40e_link_event(struct i40e_pf *pf)
 
 	/* On success, disable temp link polling */
 	if (status == I40E_SUCCESS) {
-		if (pf->flags & I40E_FLAG_TEMP_LINK_POLLING)
-			pf->flags &= ~I40E_FLAG_TEMP_LINK_POLLING;
+		clear_bit(__I40E_TEMP_LINK_POLLING, pf->state);
 	} else {
 		/* Enable link polling temporarily until i40e_get_link_status
 		 * returns I40E_SUCCESS
 		 */
-		pf->flags |= I40E_FLAG_TEMP_LINK_POLLING;
+		set_bit(__I40E_TEMP_LINK_POLLING, pf->state);
 		dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
 			status);
 		return;
@@ -8490,7 +8489,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
 	pf->service_timer_previous = jiffies;
 
 	if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
-	    (pf->flags & I40E_FLAG_TEMP_LINK_POLLING))
+	    test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
 		i40e_link_event(pf);
 
 	/* Update the stats for active netdevs so the network stack

From 5f76a704b8df9b4da898a230ac40d143eaca9cd8 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:34 -0700
Subject: [PATCH 1285/1678] i40e: move client flags into state bits

The iWarp client flags are all potentially changed when the RTNL lock is
not held, so they should not be part of the pf->flags variable. Instead,
move them into the state field so that we can use atomic bit operations.

This is part of a larger effort to remove cmpxchg64 in
i40e_set_priv_flags()

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h        |  7 +++---
 drivers/net/ethernet/intel/i40e/i40e_client.c |  7 +++---
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 22 +++++++++----------
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 1ffe802d489f..67518013ca4d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -167,6 +167,9 @@ enum i40e_state_t {
 	__I40E_MACVLAN_SYNC_PENDING,
 	__I40E_UDP_FILTER_SYNC_PENDING,
 	__I40E_TEMP_LINK_POLLING,
+	__I40E_CLIENT_SERVICE_REQUESTED,
+	__I40E_CLIENT_L2_CHANGE,
+	__I40E_CLIENT_RESET,
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
@@ -539,9 +542,7 @@ struct i40e_pf {
 #define I40E_FLAG_LEGACY_RX			BIT_ULL(21)
 #define I40E_FLAG_PTP				BIT_ULL(22)
 #define I40E_FLAG_IWARP_ENABLED			BIT_ULL(23)
-#define I40E_FLAG_SERVICE_CLIENT_REQUESTED	BIT_ULL(24)
-#define I40E_FLAG_CLIENT_L2_CHANGE		BIT_ULL(25)
-#define I40E_FLAG_CLIENT_RESET			BIT_ULL(26)
+/* Gap for BIT_ULL(24) through BIT_ULL(26) */
 #define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT_ULL(27)
 #define I40E_FLAG_SOURCE_PRUNING_DISABLED	BIT_ULL(28)
 #define I40E_FLAG_TC_MQPRIO			BIT_ULL(29)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 999dea5a7c9e..d8ce4999864f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -376,9 +376,8 @@ void i40e_client_subtask(struct i40e_pf *pf)
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	int ret = 0;
 
-	if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
+	if (!test_and_clear_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state))
 		return;
-	pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED;
 	cdev = pf->cinst;
 
 	/* If we're down or resetting, just bail */
@@ -459,7 +458,7 @@ int i40e_lan_add_device(struct i40e_pf *pf)
 	 * added, we can schedule a subtask to go initiate the clients if
 	 * they can be launched at probe time.
 	 */
-	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	i40e_service_event_schedule(pf);
 
 out:
@@ -554,7 +553,7 @@ static void i40e_client_prepare(struct i40e_client *client)
 		pf = ldev->pf;
 		i40e_client_add_instance(pf);
 		/* Start the client subtask */
-		pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 		i40e_service_event_schedule(pf);
 	}
 	mutex_unlock(&i40e_device_mutex);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 307f5bf65708..68b51761c509 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2634,8 +2634,8 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 	netdev->mtu = new_mtu;
 	if (netif_running(netdev))
 		i40e_vsi_reinit_locked(vsi);
-	pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
-		      I40E_FLAG_CLIENT_L2_CHANGE);
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
 	return 0;
 }
 
@@ -4722,9 +4722,9 @@ static void i40e_vsi_close(struct i40e_vsi *vsi)
 	i40e_vsi_free_tx_resources(vsi);
 	i40e_vsi_free_rx_resources(vsi);
 	vsi->current_netdev_flags = 0;
-	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
-		pf->flags |=  I40E_FLAG_CLIENT_RESET;
+		set_bit(__I40E_CLIENT_RESET, pf->state);
 }
 
 /**
@@ -6495,7 +6495,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 	/* On the next run of the service_task, notify any clients of the new
 	 * opened netdev
 	 */
-	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	i40e_service_event_schedule(pf);
 
 	return 0;
@@ -8037,8 +8037,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 		i40e_service_event_schedule(pf);
 	} else {
 		i40e_pf_unquiesce_all_vsi(pf);
-	pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
-		      I40E_FLAG_CLIENT_L2_CHANGE);
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
 	}
 
 exit:
@@ -9785,17 +9785,15 @@ static void i40e_service_task(struct work_struct *work)
 	i40e_vc_process_vflr_event(pf);
 	i40e_watchdog_subtask(pf);
 	i40e_fdir_reinit_subtask(pf);
-	if (pf->flags & I40E_FLAG_CLIENT_RESET) {
+	if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
 		/* Client subtask will reopen next time through. */
 		i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true);
-		pf->flags &= ~I40E_FLAG_CLIENT_RESET;
 	} else {
 		i40e_client_subtask(pf);
-		if (pf->flags & I40E_FLAG_CLIENT_L2_CHANGE) {
+		if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
+				       pf->state))
 			i40e_notify_client_of_l2_param_changes(
 							pf->vsi[pf->lan_vsi]);
-			pf->flags &= ~I40E_FLAG_CLIENT_L2_CHANGE;
-		}
 	}
 	i40e_sync_filters_subtask(pf);
 	i40e_sync_udp_filters_subtask(pf);

From f0ee70a042e267a517e943220e18ae62d3c1995a Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:35 -0700
Subject: [PATCH 1286/1678] i40e: hold the RTNL lock while changing interrupt
 schemes

When we suspend and resume, we need to clear and re-enable the interrupt
scheme. This was previously not done while holding the RTNL lock, which
could be problematic, because we are actually destroying and re-creating
queues.

Hold the RTNL lock for the entire sequence of preparing for reset, and
when resuming. This additionally protects the flags related to interrupt
scheme under RTNL lock so that their modification is properly threaded.

This is part of a larger effort to remove the need for cmpxchg64 in
i40e_set_priv_flags().

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 68b51761c509..5efd6d7bfa59 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -14348,7 +14348,13 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 
-	i40e_prep_for_reset(pf, false);
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
+
+	i40e_prep_for_reset(pf, true);
 
 	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
@@ -14360,6 +14366,8 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	 */
 	i40e_clear_interrupt_scheme(pf);
 
+	rtnl_unlock();
+
 	return 0;
 }
 
@@ -14377,6 +14385,11 @@ static int __maybe_unused i40e_resume(struct device *dev)
 	if (!test_bit(__I40E_SUSPENDED, pf->state))
 		return 0;
 
+	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
+	 * since we're going to be restoring queues
+	 */
+	rtnl_lock();
+
 	/* We cleared the interrupt scheme when we suspended, so we need to
 	 * restore it now to resume device functionality.
 	 */
@@ -14387,7 +14400,9 @@ static int __maybe_unused i40e_resume(struct device *dev)
 	}
 
 	clear_bit(__I40E_DOWN, pf->state);
-	i40e_reset_and_rebuild(pf, false, false);
+	i40e_reset_and_rebuild(pf, false, true);
+
+	rtnl_unlock();
 
 	/* Clear suspended state last after everything is recovered */
 	clear_bit(__I40E_SUSPENDED, pf->state);

From 886ff146a73627c287262a7d92ddfb50baa29552 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:36 -0700
Subject: [PATCH 1287/1678] i40e: stop using cmpxchg flow in
 i40e_set_priv_flags()

Now that the only places which modify flags are either (a) during
initialization prior to creating a netdevice, or (b) while holding the
rtnl lock, we no longer need the cmpxchg64 call in i40e_set_priv_flags.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/i40e/i40e_ethtool.c    | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 2a9c93091e3d..b974482ff630 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4436,21 +4436,12 @@ flags_complete:
 		}
 	}
 
-	/* Compare and exchange the new flags into place. If we failed, that
-	 * is if cmpxchg returns anything but the old value, this means that
-	 * something else has modified the flags variable since we copied it
-	 * originally. We'll just punt with an error and log something in the
-	 * message buffer.
-	 *
-	 * This is the point of no return for this function.  We need to have
-	 * checked any discrepancies or misconfigurations and returned
-	 * EOPNOTSUPP before updating pf->flags here.
+	/* Now that we've checked to ensure that the new flags are valid, load
+	 * them into place. Since we only modify flags either (a) during
+	 * initialization or (b) while holding the RTNL lock, we don't need
+	 * anything fancy here.
 	 */
-	if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
-		dev_warn(&pf->pdev->dev,
-			 "Unable to update pf->flags as it was modified by another thread...\n");
-		return -EAGAIN;
-	}
+	pf->flags = new_flags;
 
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were

From 8f769dd14a4364bdce1f5afda267cf2f758f8a47 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 16 Mar 2018 01:26:37 -0700
Subject: [PATCH 1288/1678] i40e: re-number feature flags to remove gaps

Remove the gaps created by the recent refactor of various feature flags
that have moved to the state field. Use only a u32 now that we have
fewer than 32 flags in the field.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h | 57 ++++++++++++--------------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 67518013ca4d..a44139c1de80 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -518,37 +518,32 @@ struct i40e_pf {
 #define I40E_HW_RESTART_AUTONEG			BIT(18)
 #define I40E_HW_STOPPABLE_FW_LLDP		BIT(19)
 
-	u64 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(0)
-#define I40E_FLAG_MSI_ENABLED			BIT_ULL(1)
-#define I40E_FLAG_MSIX_ENABLED			BIT_ULL(2)
-#define I40E_FLAG_RSS_ENABLED			BIT_ULL(3)
-#define I40E_FLAG_VMDQ_ENABLED			BIT_ULL(4)
-/* Gap for BIT_ULL(5) */
-#define I40E_FLAG_SRIOV_ENABLED			BIT_ULL(6)
-#define I40E_FLAG_DCB_CAPABLE			BIT_ULL(7)
-#define I40E_FLAG_DCB_ENABLED			BIT_ULL(8)
-#define I40E_FLAG_FD_SB_ENABLED			BIT_ULL(9)
-#define I40E_FLAG_FD_ATR_ENABLED		BIT_ULL(10)
-/* Gap for BIT_ULL(11) and BIT_ULL(12) */
-#define I40E_FLAG_MFP_ENABLED			BIT_ULL(13)
-/* Gap for BIT_ULL(14) */
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT_ULL(15)
-#define I40E_FLAG_VEB_MODE_ENABLED		BIT_ULL(16)
-#define I40E_FLAG_VEB_STATS_ENABLED		BIT_ULL(17)
-#define I40E_FLAG_LINK_POLLING_ENABLED		BIT_ULL(18)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT_ULL(19)
-/* Gap for BIT_ULL(20) */
-#define I40E_FLAG_LEGACY_RX			BIT_ULL(21)
-#define I40E_FLAG_PTP				BIT_ULL(22)
-#define I40E_FLAG_IWARP_ENABLED			BIT_ULL(23)
-/* Gap for BIT_ULL(24) through BIT_ULL(26) */
-#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT_ULL(27)
-#define I40E_FLAG_SOURCE_PRUNING_DISABLED	BIT_ULL(28)
-#define I40E_FLAG_TC_MQPRIO			BIT_ULL(29)
-#define I40E_FLAG_FD_SB_INACTIVE		BIT_ULL(30)
-#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT_ULL(31)
-#define I40E_FLAG_DISABLE_FW_LLDP		BIT_ULL(32)
+	u32 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define I40E_FLAG_MSI_ENABLED			BIT(1)
+#define I40E_FLAG_MSIX_ENABLED			BIT(2)
+#define I40E_FLAG_RSS_ENABLED			BIT(3)
+#define I40E_FLAG_VMDQ_ENABLED			BIT(4)
+#define I40E_FLAG_SRIOV_ENABLED			BIT(5)
+#define I40E_FLAG_DCB_CAPABLE			BIT(6)
+#define I40E_FLAG_DCB_ENABLED			BIT(7)
+#define I40E_FLAG_FD_SB_ENABLED			BIT(8)
+#define I40E_FLAG_FD_ATR_ENABLED		BIT(9)
+#define I40E_FLAG_MFP_ENABLED			BIT(10)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT(11)
+#define I40E_FLAG_VEB_MODE_ENABLED		BIT(12)
+#define I40E_FLAG_VEB_STATS_ENABLED		BIT(13)
+#define I40E_FLAG_LINK_POLLING_ENABLED		BIT(14)
+#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT(15)
+#define I40E_FLAG_LEGACY_RX			BIT(16)
+#define I40E_FLAG_PTP				BIT(17)
+#define I40E_FLAG_IWARP_ENABLED			BIT(18)
+#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT(19)
+#define I40E_FLAG_SOURCE_PRUNING_DISABLED       BIT(20)
+#define I40E_FLAG_TC_MQPRIO			BIT(21)
+#define I40E_FLAG_FD_SB_INACTIVE		BIT(22)
+#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT(23)
+#define I40E_FLAG_DISABLE_FW_LLDP		BIT(24)
 
 	struct i40e_client_instance *cinst;
 	bool stat_offsets_loaded;

From 8ce29c679a6ecefb88dc794260f4e91a284eed9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Thu, 22 Mar 2018 16:14:33 +0100
Subject: [PATCH 1289/1678] i40e: tweak page counting for XDP_REDIRECT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit tweaks the page counting for XDP_REDIRECT to function
properly. XDP_REDIRECT support will be added in a future commit.

The current page counting scheme assumes that the reference count
cannot decrease until the received frame is sent to the upper layers
of the networking stack. This assumption does not hold for the
XDP_REDIRECT action, since a page (pointed out by xdp_buff) can have
its reference count decreased via the xdp_do_redirect call.

To work around that, we now start off by a large page count and then
don't allow a refcount less than two.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 797bcdd3504e..9c338cef8315 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1588,9 +1588,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = i40e_rx_offset(rx_ring);
-
-	/* initialize pagecnt_bias to 1 representing we fully own page */
-	bi->pagecnt_bias = 1;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 
 	return true;
 }
@@ -1956,8 +1955,8 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer)
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
 

From d9314c474d4fc1985e836b92fba4c40dd84885a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Thu, 22 Mar 2018 16:14:34 +0100
Subject: [PATCH 1290/1678] i40e: add support for XDP_REDIRECT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver now acts upon the XDP_REDIRECT return action. Two new ndos
are implemented, ndo_xdp_xmit and ndo_xdp_flush.

XDP_REDIRECT action enables XDP program to redirect frames to other
netdevs.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c |  2 +
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 74 ++++++++++++++++++---
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |  2 +
 3 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5efd6d7bfa59..16229998fb1e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11815,6 +11815,8 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_bridge_getlink	= i40e_ndo_bridge_getlink,
 	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
 	.ndo_bpf		= i40e_xdp,
+	.ndo_xdp_xmit		= i40e_xdp_xmit,
+	.ndo_xdp_flush		= i40e_xdp_flush,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9c338cef8315..f174c72480ab 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2214,7 +2214,7 @@ static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
 static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 				    struct xdp_buff *xdp)
 {
-	int result = I40E_XDP_PASS;
+	int err, result = I40E_XDP_PASS;
 	struct i40e_ring *xdp_ring;
 	struct bpf_prog *xdp_prog;
 	u32 act;
@@ -2233,6 +2233,10 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 		result = i40e_xmit_xdp_ring(xdp, xdp_ring);
 		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 	case XDP_ABORTED:
@@ -2268,6 +2272,15 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
 #endif
 }
 
+static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
 /**
  * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: rx descriptor ring to transact packets on
@@ -2402,16 +2415,11 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	}
 
 	if (xdp_xmit) {
-		struct i40e_ring *xdp_ring;
+		struct i40e_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 
-		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.
-		 */
-		wmb();
-
-		writel(xdp_ring->next_to_use, xdp_ring->tail);
+		i40e_xdp_ring_update_tail(xdp_ring);
+		xdp_do_flush_map();
 	}
 
 	rx_ring->skb = skb;
@@ -3659,3 +3667,49 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 	return i40e_xmit_frame_ring(skb, tx_ring);
 }
+
+/**
+ * i40e_xdp_xmit - Implements ndo_xdp_xmit
+ * @dev: netdev
+ * @xdp: XDP buffer
+ *
+ * Returns Zero if sent, else an error code
+ **/
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct i40e_vsi *vsi = np->vsi;
+	int err;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+		return -ENXIO;
+
+	err = i40e_xmit_xdp_ring(xdp, vsi->xdp_rings[queue_index]);
+	if (err != I40E_XDP_TX)
+		return -ENOSPC;
+
+	return 0;
+}
+
+/**
+ * i40e_xdp_flush - Implements ndo_xdp_flush
+ * @dev: netdev
+ **/
+void i40e_xdp_flush(struct net_device *dev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+		return;
+
+	i40e_xdp_ring_update_tail(vsi->xdp_rings[queue_index]);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 7f8220e65374..3043483ec426 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -510,6 +510,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
 void i40e_detect_recover_hung(struct i40e_vsi *vsi);
 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
+int i40e_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp);
+void i40e_xdp_flush(struct net_device *dev);
 
 /**
  * i40e_get_head - Retrieve head from head writeback

From 255dd5b79d5474dad3ecebe99be7818684921abd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Mar 2018 22:18:24 +0100
Subject: [PATCH 1291/1678] Bluetooth: btrsi: rework dependencies

The linkage between the bluetooth driver and the wireless
driver is not defined properly, leading to build problems
such as:

warning: (BT_HCIRSI) selects RSI_COEX which has unmet direct dependencies (NETDEVICES && WLAN && WLAN_VENDOR_RSI && BT_HCIRSI && RSI_91X)
drivers/net/wireless/rsi/rsi_91x_main.o: In function `rsi_read_pkt':
(.text+0x205): undefined reference to `rsi_bt_ops'

As the dependency is actually the reverse (RSI_91X uses
the BT_RSI driver, not the other way round), this changes
the dependency to match, and enables the bluetooth driver
from the RSI_COEX symbol.

Fixes: 38aa4da50483 ("Bluetooth: btrsi: add new rsi bluetooth driver")
Acked-by; Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bluetooth/Kconfig        | 4 +---
 drivers/net/wireless/rsi/Kconfig | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index d8bbd661dbdb..149a38ee1fce 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -393,9 +393,7 @@ config BT_QCOMSMD
 	  kernel or say M to compile as a module.
 
 config BT_HCIRSI
-	tristate "Redpine HCI support"
-	default n
-	select RSI_COEX
+	tristate
 	help
 	  Redpine BT driver.
 	  This driver handles BT traffic from upper layers and pass
diff --git a/drivers/net/wireless/rsi/Kconfig b/drivers/net/wireless/rsi/Kconfig
index f004be33fcfa..976c21866230 100644
--- a/drivers/net/wireless/rsi/Kconfig
+++ b/drivers/net/wireless/rsi/Kconfig
@@ -13,6 +13,7 @@ if WLAN_VENDOR_RSI
 
 config RSI_91X
 	tristate "Redpine Signals Inc 91x WLAN driver support"
+	select BT_HCIRSI if RSI_COEX
 	depends on MAC80211
 	---help---
 	  This option enabes support for RSI 1x1 devices.
@@ -44,7 +45,8 @@ config RSI_USB
 
 config RSI_COEX
 	bool "Redpine Signals WLAN BT Coexistence support"
-	depends on BT_HCIRSI && RSI_91X
+	depends on BT && RSI_91X
+	depends on !(BT=m && RSI_91X=y)
 	default y
 	---help---
 	  This option enables the WLAN BT coex support in rsi drivers.

From 2ef00c53049b6a8758d118188992da01d75f3628 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 23 Mar 2018 15:54:37 -0700
Subject: [PATCH 1292/1678] wireless: Use octal not symbolic permissions

Prefer the direct use of octal for permissions.

Done with checkpatch -f --types=SYMBOLIC_PERMS --fix-inplace
and some typing.

Miscellanea:

o Whitespace neatening around these conversions.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath5k/base.c         |   6 +-
 drivers/net/wireless/ath/ath5k/debug.c        |  37 ++----
 drivers/net/wireless/ath/ath5k/sysfs.c        |   8 +-
 drivers/net/wireless/ath/ath6kl/debug.c       |  43 ++++---
 drivers/net/wireless/ath/ath9k/common-debug.c |   9 +-
 .../net/wireless/ath/ath9k/common-spectral.c  |  10 +-
 drivers/net/wireless/ath/ath9k/debug.c        |  40 +++----
 drivers/net/wireless/ath/ath9k/debug_sta.c    |   6 +-
 drivers/net/wireless/ath/ath9k/dfs_debug.c    |   4 +-
 .../net/wireless/ath/ath9k/htc_drv_debug.c    |  16 +--
 drivers/net/wireless/ath/ath9k/tx99.c         |   4 +-
 drivers/net/wireless/ath/carl9170/debug.c     |   8 +-
 drivers/net/wireless/ath/carl9170/main.c      |   4 +-
 drivers/net/wireless/ath/wcn36xx/debug.c      |   5 +-
 .../broadcom/brcm80211/brcmfmac/common.c      |   6 +-
 .../broadcom/brcm80211/brcmsmac/debug.c       |   2 +-
 .../broadcom/brcm80211/brcmsmac/mac80211_if.c |   2 +-
 drivers/net/wireless/cisco/airo.c             |   6 +-
 drivers/net/wireless/intel/ipw2x00/ipw2100.c  |  29 +++--
 drivers/net/wireless/intel/ipw2x00/ipw2200.c  |  51 ++++----
 .../wireless/intel/ipw2x00/libipw_module.c    |   2 +-
 .../net/wireless/intel/iwlegacy/3945-mac.c    |  35 +++---
 .../net/wireless/intel/iwlegacy/4965-mac.c    |  19 ++-
 drivers/net/wireless/intel/iwlegacy/4965-rs.c |   8 +-
 drivers/net/wireless/intel/iwlegacy/common.c  |   4 +-
 drivers/net/wireless/intel/iwlegacy/debug.c   |  58 +++++----
 .../net/wireless/intel/iwlwifi/dvm/debugfs.c  |  76 ++++++------
 drivers/net/wireless/intel/iwlwifi/dvm/rs.c   |  16 +--
 .../net/wireless/intel/iwlwifi/fw/debugfs.c   |   2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c  |  43 +++----
 .../wireless/intel/iwlwifi/mvm/debugfs-vif.c  |  38 +++---
 .../net/wireless/intel/iwlwifi/mvm/debugfs.c  | 110 +++++++++---------
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c  |   6 +-
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c   |  12 +-
 .../net/wireless/intel/iwlwifi/pcie/trans.c   |  12 +-
 drivers/net/wireless/intersil/p54/main.c      |   2 +-
 drivers/net/wireless/mediatek/mt76/debugfs.c  |  10 +-
 .../wireless/mediatek/mt76/mt76x2_debugfs.c   |   8 +-
 .../net/wireless/mediatek/mt7601u/debugfs.c   |  14 +--
 .../net/wireless/ralink/rt2x00/rt2500usb.c    |   2 +-
 .../net/wireless/ralink/rt2x00/rt2800pci.c    |   2 +-
 .../net/wireless/ralink/rt2x00/rt2800soc.c    |   2 +-
 .../net/wireless/ralink/rt2x00/rt2800usb.c    |   2 +-
 .../net/wireless/ralink/rt2x00/rt2x00debug.c  |  64 +++++-----
 drivers/net/wireless/ralink/rt2x00/rt61pci.c  |   2 +-
 drivers/net/wireless/ralink/rt2x00/rt73usb.c  |   2 +-
 drivers/net/wireless/ray_cs.c                 |   8 +-
 drivers/net/wireless/st/cw1200/debug.c        |   6 +-
 drivers/net/wireless/st/cw1200/main.c         |   2 +-
 drivers/net/wireless/ti/wl18xx/main.c         |  27 ++---
 drivers/net/wireless/ti/wlcore/main.c         |   8 +-
 drivers/net/wireless/ti/wlcore/sdio.c         |   2 +-
 drivers/net/wireless/ti/wlcore/sysfs.c        |   7 +-
 53 files changed, 424 insertions(+), 483 deletions(-)

diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 527afcf39246..a2351ef45ae0 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -73,16 +73,16 @@
 #include "trace.h"
 
 bool ath5k_modparam_nohwcrypt;
-module_param_named(nohwcrypt, ath5k_modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, ath5k_modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 static bool modparam_fastchanswitch;
-module_param_named(fastchanswitch, modparam_fastchanswitch, bool, S_IRUGO);
+module_param_named(fastchanswitch, modparam_fastchanswitch, bool, 0444);
 MODULE_PARM_DESC(fastchanswitch, "Enable fast channel switching for AR2413/AR5413 radios.");
 
 static bool ath5k_modparam_no_hw_rfkill_switch;
 module_param_named(no_hw_rfkill_switch, ath5k_modparam_no_hw_rfkill_switch,
-								bool, S_IRUGO);
+		   bool, 0444);
 MODULE_PARM_DESC(no_hw_rfkill_switch, "Ignore the GPIO RFKill switch state");
 
 
diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c
index bd7f6d7b199e..3513bbec4639 100644
--- a/drivers/net/wireless/ath/ath5k/debug.c
+++ b/drivers/net/wireless/ath/ath5k/debug.c
@@ -1004,32 +1004,17 @@ ath5k_debug_init_device(struct ath5k_hw *ah)
 	if (!phydir)
 		return;
 
-	debugfs_create_file("debug", S_IWUSR | S_IRUSR, phydir, ah,
-			    &fops_debug);
-
-	debugfs_create_file("registers", S_IRUSR, phydir, ah, &fops_registers);
-
-	debugfs_create_file("beacon", S_IWUSR | S_IRUSR, phydir, ah,
-			    &fops_beacon);
-
-	debugfs_create_file("reset", S_IWUSR, phydir, ah, &fops_reset);
-
-	debugfs_create_file("antenna", S_IWUSR | S_IRUSR, phydir, ah,
-			    &fops_antenna);
-
-	debugfs_create_file("misc", S_IRUSR, phydir, ah, &fops_misc);
-
-	debugfs_create_file("eeprom", S_IRUSR, phydir, ah, &fops_eeprom);
-
-	debugfs_create_file("frameerrors", S_IWUSR | S_IRUSR, phydir, ah,
-			    &fops_frameerrors);
-
-	debugfs_create_file("ani", S_IWUSR | S_IRUSR, phydir, ah, &fops_ani);
-
-	debugfs_create_file("queue", S_IWUSR | S_IRUSR, phydir, ah,
-			    &fops_queue);
-
-	debugfs_create_bool("32khz_clock", S_IWUSR | S_IRUSR, phydir,
+	debugfs_create_file("debug", 0600, phydir, ah, &fops_debug);
+	debugfs_create_file("registers", 0400, phydir, ah, &fops_registers);
+	debugfs_create_file("beacon", 0600, phydir, ah, &fops_beacon);
+	debugfs_create_file("reset", 0200, phydir, ah, &fops_reset);
+	debugfs_create_file("antenna", 0600, phydir, ah, &fops_antenna);
+	debugfs_create_file("misc", 0400, phydir, ah, &fops_misc);
+	debugfs_create_file("eeprom", 0400, phydir, ah, &fops_eeprom);
+	debugfs_create_file("frameerrors", 0600, phydir, ah, &fops_frameerrors);
+	debugfs_create_file("ani", 0600, phydir, ah, &fops_ani);
+	debugfs_create_file("queue", 0600, phydir, ah, &fops_queue);
+	debugfs_create_bool("32khz_clock", 0600, phydir,
 			    &ah->ah_use_32khz_clock);
 }
 
diff --git a/drivers/net/wireless/ath/ath5k/sysfs.c b/drivers/net/wireless/ath/ath5k/sysfs.c
index 25978c732fe1..8113baddd8fc 100644
--- a/drivers/net/wireless/ath/ath5k/sysfs.c
+++ b/drivers/net/wireless/ath/ath5k/sysfs.c
@@ -31,7 +31,7 @@ static ssize_t ath5k_attr_store_##name(struct device *dev,		\
 	set(ah, val);						\
 	return count;							\
 }									\
-static DEVICE_ATTR(name, S_IRUGO | S_IWUSR,				\
+static DEVICE_ATTR(name, 0644,						\
 		   ath5k_attr_show_##name, ath5k_attr_store_##name)
 
 #define SIMPLE_SHOW(name, get)						\
@@ -43,7 +43,7 @@ static ssize_t ath5k_attr_show_##name(struct device *dev,		\
 	struct ath5k_hw *ah = hw->priv;				\
 	return snprintf(buf, PAGE_SIZE, "%d\n", get);			\
 }									\
-static DEVICE_ATTR(name, S_IRUGO, ath5k_attr_show_##name, NULL)
+static DEVICE_ATTR(name, 0444, ath5k_attr_show_##name, NULL)
 
 /*** ANI ***/
 
@@ -66,7 +66,7 @@ static ssize_t ath5k_attr_show_noise_immunity_level_max(struct device *dev,
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", ATH5K_ANI_MAX_NOISE_IMM_LVL);
 }
-static DEVICE_ATTR(noise_immunity_level_max, S_IRUGO,
+static DEVICE_ATTR(noise_immunity_level_max, 0444,
 		   ath5k_attr_show_noise_immunity_level_max, NULL);
 
 static ssize_t ath5k_attr_show_firstep_level_max(struct device *dev,
@@ -75,7 +75,7 @@ static ssize_t ath5k_attr_show_firstep_level_max(struct device *dev,
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", ATH5K_ANI_MAX_FIRSTEP_LVL);
 }
-static DEVICE_ATTR(firstep_level_max, S_IRUGO,
+static DEVICE_ATTR(firstep_level_max, 0444,
 		   ath5k_attr_show_firstep_level_max, NULL);
 
 static struct attribute *ath5k_sysfs_entries_ani[] = {
diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index 1eea6c23976f..0f965e9f38a4 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -1794,69 +1794,68 @@ int ath6kl_debug_init_fs(struct ath6kl *ar)
 	if (!ar->debugfs_phy)
 		return -ENOMEM;
 
-	debugfs_create_file("tgt_stats", S_IRUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("tgt_stats", 0400, ar->debugfs_phy, ar,
 			    &fops_tgt_stats);
 
 	if (ar->hif_type == ATH6KL_HIF_TYPE_SDIO)
-		debugfs_create_file("credit_dist_stats", S_IRUSR,
+		debugfs_create_file("credit_dist_stats", 0400,
 				    ar->debugfs_phy, ar,
 				    &fops_credit_dist_stats);
 
-	debugfs_create_file("endpoint_stats", S_IRUSR | S_IWUSR,
+	debugfs_create_file("endpoint_stats", 0600,
 			    ar->debugfs_phy, ar, &fops_endpoint_stats);
 
-	debugfs_create_file("fwlog", S_IRUSR, ar->debugfs_phy, ar,
-			    &fops_fwlog);
+	debugfs_create_file("fwlog", 0400, ar->debugfs_phy, ar, &fops_fwlog);
 
-	debugfs_create_file("fwlog_block", S_IRUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("fwlog_block", 0400, ar->debugfs_phy, ar,
 			    &fops_fwlog_block);
 
-	debugfs_create_file("fwlog_mask", S_IRUSR | S_IWUSR, ar->debugfs_phy,
+	debugfs_create_file("fwlog_mask", 0600, ar->debugfs_phy,
 			    ar, &fops_fwlog_mask);
 
-	debugfs_create_file("reg_addr", S_IRUSR | S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("reg_addr", 0600, ar->debugfs_phy, ar,
 			    &fops_diag_reg_read);
 
-	debugfs_create_file("reg_dump", S_IRUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("reg_dump", 0400, ar->debugfs_phy, ar,
 			    &fops_reg_dump);
 
-	debugfs_create_file("lrssi_roam_threshold", S_IRUSR | S_IWUSR,
+	debugfs_create_file("lrssi_roam_threshold", 0600,
 			    ar->debugfs_phy, ar, &fops_lrssi_roam_threshold);
 
-	debugfs_create_file("reg_write", S_IRUSR | S_IWUSR,
+	debugfs_create_file("reg_write", 0600,
 			    ar->debugfs_phy, ar, &fops_diag_reg_write);
 
-	debugfs_create_file("war_stats", S_IRUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("war_stats", 0400, ar->debugfs_phy, ar,
 			    &fops_war_stats);
 
-	debugfs_create_file("roam_table", S_IRUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("roam_table", 0400, ar->debugfs_phy, ar,
 			    &fops_roam_table);
 
-	debugfs_create_file("force_roam", S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("force_roam", 0200, ar->debugfs_phy, ar,
 			    &fops_force_roam);
 
-	debugfs_create_file("roam_mode", S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("roam_mode", 0200, ar->debugfs_phy, ar,
 			    &fops_roam_mode);
 
-	debugfs_create_file("keepalive", S_IRUSR | S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("keepalive", 0600, ar->debugfs_phy, ar,
 			    &fops_keepalive);
 
-	debugfs_create_file("disconnect_timeout", S_IRUSR | S_IWUSR,
+	debugfs_create_file("disconnect_timeout", 0600,
 			    ar->debugfs_phy, ar, &fops_disconnect_timeout);
 
-	debugfs_create_file("create_qos", S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("create_qos", 0200, ar->debugfs_phy, ar,
 			    &fops_create_qos);
 
-	debugfs_create_file("delete_qos", S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("delete_qos", 0200, ar->debugfs_phy, ar,
 			    &fops_delete_qos);
 
-	debugfs_create_file("bgscan_interval", S_IWUSR,
+	debugfs_create_file("bgscan_interval", 0200,
 			    ar->debugfs_phy, ar, &fops_bgscan_int);
 
-	debugfs_create_file("listen_interval", S_IRUSR | S_IWUSR,
+	debugfs_create_file("listen_interval", 0600,
 			    ar->debugfs_phy, ar, &fops_listen_int);
 
-	debugfs_create_file("power_params", S_IWUSR, ar->debugfs_phy, ar,
+	debugfs_create_file("power_params", 0200, ar->debugfs_phy, ar,
 			    &fops_power_params);
 
 	return 0;
diff --git a/drivers/net/wireless/ath/ath9k/common-debug.c b/drivers/net/wireless/ath/ath9k/common-debug.c
index 84afcf78151f..239429f10378 100644
--- a/drivers/net/wireless/ath/ath9k/common-debug.c
+++ b/drivers/net/wireless/ath/ath9k/common-debug.c
@@ -47,7 +47,7 @@ static const struct file_operations fops_modal_eeprom = {
 void ath9k_cmn_debug_modal_eeprom(struct dentry *debugfs_phy,
 				  struct ath_hw *ah)
 {
-	debugfs_create_file("modal_eeprom", S_IRUSR, debugfs_phy, ah,
+	debugfs_create_file("modal_eeprom", 0400, debugfs_phy, ah,
 			    &fops_modal_eeprom);
 }
 EXPORT_SYMBOL(ath9k_cmn_debug_modal_eeprom);
@@ -82,7 +82,7 @@ static const struct file_operations fops_base_eeprom = {
 void ath9k_cmn_debug_base_eeprom(struct dentry *debugfs_phy,
 				 struct ath_hw *ah)
 {
-	debugfs_create_file("base_eeprom", S_IRUSR, debugfs_phy, ah,
+	debugfs_create_file("base_eeprom", 0400, debugfs_phy, ah,
 			    &fops_base_eeprom);
 }
 EXPORT_SYMBOL(ath9k_cmn_debug_base_eeprom);
@@ -178,8 +178,7 @@ static const struct file_operations fops_recv = {
 void ath9k_cmn_debug_recv(struct dentry *debugfs_phy,
 			  struct ath_rx_stats *rxstats)
 {
-	debugfs_create_file("recv", S_IRUSR, debugfs_phy, rxstats,
-			    &fops_recv);
+	debugfs_create_file("recv", 0400, debugfs_phy, rxstats, &fops_recv);
 }
 EXPORT_SYMBOL(ath9k_cmn_debug_recv);
 
@@ -255,7 +254,7 @@ static const struct file_operations fops_phy_err = {
 void ath9k_cmn_debug_phy_err(struct dentry *debugfs_phy,
 			     struct ath_rx_stats *rxstats)
 {
-	debugfs_create_file("phy_err", S_IRUSR, debugfs_phy, rxstats,
+	debugfs_create_file("phy_err", 0400, debugfs_phy, rxstats,
 			    &fops_phy_err);
 }
 EXPORT_SYMBOL(ath9k_cmn_debug_phy_err);
diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c
index a41bcbda1d9e..440e16e641e4 100644
--- a/drivers/net/wireless/ath/ath9k/common-spectral.c
+++ b/drivers/net/wireless/ath/ath9k/common-spectral.c
@@ -1098,23 +1098,23 @@ void ath9k_cmn_spectral_init_debug(struct ath_spec_scan_priv *spec_priv,
 		return;
 
 	debugfs_create_file("spectral_scan_ctl",
-			    S_IRUSR | S_IWUSR,
+			    0600,
 			    debugfs_phy, spec_priv,
 			    &fops_spec_scan_ctl);
 	debugfs_create_file("spectral_short_repeat",
-			    S_IRUSR | S_IWUSR,
+			    0600,
 			    debugfs_phy, spec_priv,
 			    &fops_spectral_short_repeat);
 	debugfs_create_file("spectral_count",
-			    S_IRUSR | S_IWUSR,
+			    0600,
 			    debugfs_phy, spec_priv,
 			    &fops_spectral_count);
 	debugfs_create_file("spectral_period",
-			    S_IRUSR | S_IWUSR,
+			    0600,
 			    debugfs_phy, spec_priv,
 			    &fops_spectral_period);
 	debugfs_create_file("spectral_fft_period",
-			    S_IRUSR | S_IWUSR,
+			    0600,
 			    debugfs_phy, spec_priv,
 			    &fops_spectral_fft_period);
 }
diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c
index 9e8aed5c478c..f685843a2ff3 100644
--- a/drivers/net/wireless/ath/ath9k/debug.c
+++ b/drivers/net/wireless/ath/ath9k/debug.c
@@ -1385,7 +1385,7 @@ int ath9k_init_debug(struct ath_hw *ah)
 		return -ENOMEM;
 
 #ifdef CONFIG_ATH_DEBUG
-	debugfs_create_file("debug", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
+	debugfs_create_file("debug", 0600, sc->debug.debugfs_phy,
 			    sc, &fops_debug);
 #endif
 
@@ -1409,22 +1409,22 @@ int ath9k_init_debug(struct ath_hw *ah)
 	ath9k_cmn_debug_recv(sc->debug.debugfs_phy, &sc->debug.stats.rxstats);
 	ath9k_cmn_debug_phy_err(sc->debug.debugfs_phy, &sc->debug.stats.rxstats);
 
-	debugfs_create_u8("rx_chainmask", S_IRUSR, sc->debug.debugfs_phy,
+	debugfs_create_u8("rx_chainmask", 0400, sc->debug.debugfs_phy,
 			  &ah->rxchainmask);
-	debugfs_create_u8("tx_chainmask", S_IRUSR, sc->debug.debugfs_phy,
+	debugfs_create_u8("tx_chainmask", 0400, sc->debug.debugfs_phy,
 			  &ah->txchainmask);
-	debugfs_create_file("ani", S_IRUSR | S_IWUSR,
+	debugfs_create_file("ani", 0600,
 			    sc->debug.debugfs_phy, sc, &fops_ani);
-	debugfs_create_bool("paprd", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
+	debugfs_create_bool("paprd", 0600, sc->debug.debugfs_phy,
 			    &sc->sc_ah->config.enable_paprd);
-	debugfs_create_file("regidx", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
+	debugfs_create_file("regidx", 0600, sc->debug.debugfs_phy,
 			    sc, &fops_regidx);
-	debugfs_create_file("regval", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
+	debugfs_create_file("regval", 0600, sc->debug.debugfs_phy,
 			    sc, &fops_regval);
-	debugfs_create_bool("ignore_extcca", S_IRUSR | S_IWUSR,
+	debugfs_create_bool("ignore_extcca", 0600,
 			    sc->debug.debugfs_phy,
 			    &ah->config.cwm_ignore_extcca);
-	debugfs_create_file("regdump", S_IRUSR, sc->debug.debugfs_phy, sc,
+	debugfs_create_file("regdump", 0400, sc->debug.debugfs_phy, sc,
 			    &fops_regdump);
 	debugfs_create_devm_seqfile(sc->dev, "dump_nfcal",
 				    sc->debug.debugfs_phy,
@@ -1433,35 +1433,33 @@ int ath9k_init_debug(struct ath_hw *ah)
 	ath9k_cmn_debug_base_eeprom(sc->debug.debugfs_phy, sc->sc_ah);
 	ath9k_cmn_debug_modal_eeprom(sc->debug.debugfs_phy, sc->sc_ah);
 
-	debugfs_create_u32("gpio_mask", S_IRUSR | S_IWUSR,
+	debugfs_create_u32("gpio_mask", 0600,
 			   sc->debug.debugfs_phy, &sc->sc_ah->gpio_mask);
-	debugfs_create_u32("gpio_val", S_IRUSR | S_IWUSR,
+	debugfs_create_u32("gpio_val", 0600,
 			   sc->debug.debugfs_phy, &sc->sc_ah->gpio_val);
-	debugfs_create_file("antenna_diversity", S_IRUSR,
+	debugfs_create_file("antenna_diversity", 0400,
 			    sc->debug.debugfs_phy, sc, &fops_antenna_diversity);
 #ifdef CONFIG_ATH9K_BTCOEX_SUPPORT
-	debugfs_create_file("bt_ant_diversity", S_IRUSR | S_IWUSR,
+	debugfs_create_file("bt_ant_diversity", 0600,
 			    sc->debug.debugfs_phy, sc, &fops_bt_ant_diversity);
-	debugfs_create_file("btcoex", S_IRUSR, sc->debug.debugfs_phy, sc,
+	debugfs_create_file("btcoex", 0400, sc->debug.debugfs_phy, sc,
 			    &fops_btcoex);
 #endif
 
 #ifdef CONFIG_ATH9K_WOW
-	debugfs_create_file("wow", S_IRUSR | S_IWUSR,
-			    sc->debug.debugfs_phy, sc, &fops_wow);
+	debugfs_create_file("wow", 0600, sc->debug.debugfs_phy, sc, &fops_wow);
 #endif
 
 #ifdef CONFIG_ATH9K_DYNACK
-	debugfs_create_file("ack_to", S_IRUSR, sc->debug.debugfs_phy,
+	debugfs_create_file("ack_to", 0400, sc->debug.debugfs_phy,
 			    sc, &fops_ackto);
 #endif
-	debugfs_create_file("tpc", S_IRUSR | S_IWUSR,
-			    sc->debug.debugfs_phy, sc, &fops_tpc);
+	debugfs_create_file("tpc", 0600, sc->debug.debugfs_phy, sc, &fops_tpc);
 
-	debugfs_create_u16("airtime_flags", S_IRUSR | S_IWUSR,
+	debugfs_create_u16("airtime_flags", 0600,
 			   sc->debug.debugfs_phy, &sc->airtime_flags);
 
-	debugfs_create_file("nf_override", S_IRUSR | S_IWUSR,
+	debugfs_create_file("nf_override", 0600,
 			    sc->debug.debugfs_phy, sc, &fops_nf_override);
 
 	return 0;
diff --git a/drivers/net/wireless/ath/ath9k/debug_sta.c b/drivers/net/wireless/ath/ath9k/debug_sta.c
index efc692ee67d4..a6f45f1bb5bb 100644
--- a/drivers/net/wireless/ath/ath9k/debug_sta.c
+++ b/drivers/net/wireless/ath/ath9k/debug_sta.c
@@ -302,7 +302,7 @@ void ath9k_sta_add_debugfs(struct ieee80211_hw *hw,
 {
 	struct ath_node *an = (struct ath_node *)sta->drv_priv;
 
-	debugfs_create_file("node_aggr", S_IRUGO, dir, an, &fops_node_aggr);
-	debugfs_create_file("node_recv", S_IRUGO, dir, an, &fops_node_recv);
-	debugfs_create_file("airtime", S_IRUGO, dir, an, &fops_airtime);
+	debugfs_create_file("node_aggr", 0444, dir, an, &fops_node_aggr);
+	debugfs_create_file("node_recv", 0444, dir, an, &fops_node_recv);
+	debugfs_create_file("airtime", 0444, dir, an, &fops_airtime);
 }
diff --git a/drivers/net/wireless/ath/ath9k/dfs_debug.c b/drivers/net/wireless/ath/ath9k/dfs_debug.c
index 8824610c21fb..3251c9abe270 100644
--- a/drivers/net/wireless/ath/ath9k/dfs_debug.c
+++ b/drivers/net/wireless/ath/ath9k/dfs_debug.c
@@ -144,8 +144,8 @@ static const struct file_operations fops_dfs_stats = {
 
 void ath9k_dfs_init_debug(struct ath_softc *sc)
 {
-	debugfs_create_file("dfs_stats", S_IRUSR,
+	debugfs_create_file("dfs_stats", 0400,
 			    sc->debug.debugfs_phy, sc, &fops_dfs_stats);
-	debugfs_create_file("dfs_simulate_radar", S_IWUSR,
+	debugfs_create_file("dfs_simulate_radar", 0200,
 			    sc->debug.debugfs_phy, sc, &fops_simulate_radar);
 }
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
index dc79afd7e151..b3ed65e5c4da 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c
@@ -496,25 +496,25 @@ int ath9k_htc_init_debug(struct ath_hw *ah)
 
 	ath9k_cmn_spectral_init_debug(&priv->spec_priv, priv->debug.debugfs_phy);
 
-	debugfs_create_file("tgt_int_stats", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("tgt_int_stats", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_tgt_int_stats);
-	debugfs_create_file("tgt_tx_stats", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("tgt_tx_stats", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_tgt_tx_stats);
-	debugfs_create_file("tgt_rx_stats", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("tgt_rx_stats", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_tgt_rx_stats);
-	debugfs_create_file("xmit", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("xmit", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_xmit);
-	debugfs_create_file("skb_rx", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("skb_rx", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_skb_rx);
 
 	ath9k_cmn_debug_recv(priv->debug.debugfs_phy, &priv->debug.rx_stats);
 	ath9k_cmn_debug_phy_err(priv->debug.debugfs_phy, &priv->debug.rx_stats);
 
-	debugfs_create_file("slot", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("slot", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_slot);
-	debugfs_create_file("queue", S_IRUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("queue", 0400, priv->debug.debugfs_phy,
 			    priv, &fops_queue);
-	debugfs_create_file("debug", S_IRUSR | S_IWUSR, priv->debug.debugfs_phy,
+	debugfs_create_file("debug", 0600, priv->debug.debugfs_phy,
 			    priv, &fops_debug);
 
 	ath9k_cmn_debug_base_eeprom(priv->debug.debugfs_phy, priv->ah);
diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c
index fe3a8263b224..ce50d8f5835e 100644
--- a/drivers/net/wireless/ath/ath9k/tx99.c
+++ b/drivers/net/wireless/ath/ath9k/tx99.c
@@ -278,10 +278,10 @@ void ath9k_tx99_init_debug(struct ath_softc *sc)
 	if (!AR_SREV_9280_20_OR_LATER(sc->sc_ah))
 		return;
 
-	debugfs_create_file("tx99", S_IRUSR | S_IWUSR,
+	debugfs_create_file("tx99", 0600,
 			    sc->debug.debugfs_phy, sc,
 			    &fops_tx99);
-	debugfs_create_file("tx99_power", S_IRUSR | S_IWUSR,
+	debugfs_create_file("tx99_power", 0600,
 			    sc->debug.debugfs_phy, sc,
 			    &fops_tx99_power);
 }
diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c
index ec3a64e5d2bb..a9b6dc17e408 100644
--- a/drivers/net/wireless/ath/carl9170/debug.c
+++ b/drivers/net/wireless/ath/carl9170/debug.c
@@ -187,21 +187,21 @@ static const struct carl9170_debugfs_fops carl_debugfs_##name ##_ops = {\
 
 #define DEBUGFS_DECLARE_RO_FILE(name, _read_bufsize)			\
 	DEBUGFS_DECLARE_FILE(name, carl9170_debugfs_##name ##_read,	\
-			     NULL, _read_bufsize, S_IRUSR)
+			     NULL, _read_bufsize, 0400)
 
 #define DEBUGFS_DECLARE_WO_FILE(name)					\
 	DEBUGFS_DECLARE_FILE(name, NULL, carl9170_debugfs_##name ##_write,\
-			     0, S_IWUSR)
+			     0, 0200)
 
 #define DEBUGFS_DECLARE_RW_FILE(name, _read_bufsize)			\
 	DEBUGFS_DECLARE_FILE(name, carl9170_debugfs_##name ##_read,	\
 			     carl9170_debugfs_##name ##_write,		\
-			     _read_bufsize, S_IRUSR | S_IWUSR)
+			     _read_bufsize, 0600)
 
 #define __DEBUGFS_DECLARE_RW_FILE(name, _read_bufsize, _dstate)		\
 	__DEBUGFS_DECLARE_FILE(name, carl9170_debugfs_##name ##_read,	\
 			     carl9170_debugfs_##name ##_write,		\
-			     _read_bufsize, S_IRUSR | S_IWUSR, _dstate)
+			     _read_bufsize, 0600, _dstate)
 
 #define DEBUGFS_READONLY_FILE(name, _read_bufsize, fmt, value...)	\
 static char *carl9170_debugfs_ ##name ## _read(struct ar9170 *ar,	\
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 988c8857d78c..29e93c953d93 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -48,11 +48,11 @@
 #include "cmd.h"
 
 static bool modparam_nohwcrypt;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware crypto offload.");
 
 int modparam_noht;
-module_param_named(noht, modparam_noht, int, S_IRUGO);
+module_param_named(noht, modparam_noht, int, 0444);
 MODULE_PARM_DESC(noht, "Disable MPDU aggregation.");
 
 #define RATE(_bitrate, _hw_rate, _txpidx, _flags) {	\
diff --git a/drivers/net/wireless/ath/wcn36xx/debug.c b/drivers/net/wireless/ath/wcn36xx/debug.c
index 2a6bb62e785c..389b5e7129a6 100644
--- a/drivers/net/wireless/ath/wcn36xx/debug.c
+++ b/drivers/net/wireless/ath/wcn36xx/debug.c
@@ -161,9 +161,8 @@ void wcn36xx_debugfs_init(struct wcn36xx *wcn)
 		dfs->rootdir = NULL;
 	}
 
-	ADD_FILE(bmps_switcher, S_IRUSR | S_IWUSR,
-		 &fops_wcn36xx_bmps, wcn);
-	ADD_FILE(dump, S_IWUSR, &fops_wcn36xx_dump, wcn);
+	ADD_FILE(bmps_switcher, 0600, &fops_wcn36xx_bmps, wcn);
+	ADD_FILE(dump, 0200, &fops_wcn36xx_dump, wcn);
 }
 
 void wcn36xx_debugfs_exit(struct wcn36xx *wcn)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 70ef9835b647..74f7b56bc7d4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -51,7 +51,7 @@ MODULE_PARM_DESC(txglomsz, "Maximum tx packet chain size [SDIO]");
 
 /* Debug level configuration. See debug.h for bits, sysfs modifiable */
 int brcmf_msg_level;
-module_param_named(debug, brcmf_msg_level, int, S_IRUSR | S_IWUSR);
+module_param_named(debug, brcmf_msg_level, int, 0600);
 MODULE_PARM_DESC(debug, "Level of debug output");
 
 static int brcmf_p2p_enable;
@@ -64,7 +64,7 @@ MODULE_PARM_DESC(feature_disable, "Disable features");
 
 static char brcmf_firmware_path[BRCMF_FW_ALTPATH_LEN];
 module_param_string(alternative_fw_path, brcmf_firmware_path,
-		    BRCMF_FW_ALTPATH_LEN, S_IRUSR);
+		    BRCMF_FW_ALTPATH_LEN, 0400);
 MODULE_PARM_DESC(alternative_fw_path, "Alternative firmware path");
 
 static int brcmf_fcmode;
@@ -72,7 +72,7 @@ module_param_named(fcmode, brcmf_fcmode, int, 0);
 MODULE_PARM_DESC(fcmode, "Mode of firmware signalled flow control");
 
 static int brcmf_roamoff;
-module_param_named(roamoff, brcmf_roamoff, int, S_IRUSR);
+module_param_named(roamoff, brcmf_roamoff, int, 0400);
 MODULE_PARM_DESC(roamoff, "Do not use internal roaming engine");
 
 #ifdef DEBUG
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c
index 7a1fbb2e3a71..2fe1f6863278 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/debug.c
@@ -214,7 +214,7 @@ brcms_debugfs_add_entry(struct brcms_pub *drvr, const char *fn,
 	entry->read = read_fn;
 	entry->drvr = drvr;
 
-	dentry = debugfs_create_file(fn, S_IRUGO, dentry, entry,
+	dentry = debugfs_create_file(fn, 0444, dentry, entry,
 				     &brcms_debugfs_def_ops);
 
 	return PTR_ERR_OR_ZERO(dentry);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
index ddfdfe177e24..8e58f6800483 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
@@ -108,7 +108,7 @@ MODULE_DEVICE_TABLE(bcma, brcms_coreid_table);
  * flags are specified by the BRCM_DL_* macros in
  * drivers/net/wireless/brcm80211/include/defs.h.
  */
-module_param_named(debug, brcm_msg_level, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug, brcm_msg_level, uint, 0644);
 #endif
 
 static struct ieee80211_channel brcms_2ghz_chantable[] = {
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 54201c02fdb8..ce0fbf83285f 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -4519,21 +4519,21 @@ static int setup_proc_entry( struct net_device *dev,
 	proc_set_user(apriv->proc_entry, proc_kuid, proc_kgid);
 
 	/* Setup the StatsDelta */
-	entry = proc_create_data("StatsDelta", S_IRUGO & proc_perm,
+	entry = proc_create_data("StatsDelta", 0444 & proc_perm,
 				 apriv->proc_entry, &proc_statsdelta_ops, dev);
 	if (!entry)
 		goto fail;
 	proc_set_user(entry, proc_kuid, proc_kgid);
 
 	/* Setup the Stats */
-	entry = proc_create_data("Stats", S_IRUGO & proc_perm,
+	entry = proc_create_data("Stats", 0444 & proc_perm,
 				 apriv->proc_entry, &proc_stats_ops, dev);
 	if (!entry)
 		goto fail;
 	proc_set_user(entry, proc_kuid, proc_kgid);
 
 	/* Setup the Status */
-	entry = proc_create_data("Status", S_IRUGO & proc_perm,
+	entry = proc_create_data("Status", 0444 & proc_perm,
 				 apriv->proc_entry, &proc_status_ops, dev);
 	if (!entry)
 		goto fail;
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index 19c442cb93e4..236b52423506 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -3538,7 +3538,7 @@ static ssize_t show_pci(struct device *d, struct device_attribute *attr,
 	return out - buf;
 }
 
-static DEVICE_ATTR(pci, S_IRUGO, show_pci, NULL);
+static DEVICE_ATTR(pci, 0444, show_pci, NULL);
 
 static ssize_t show_cfg(struct device *d, struct device_attribute *attr,
 			char *buf)
@@ -3547,7 +3547,7 @@ static ssize_t show_cfg(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "0x%08x\n", (int)p->config);
 }
 
-static DEVICE_ATTR(cfg, S_IRUGO, show_cfg, NULL);
+static DEVICE_ATTR(cfg, 0444, show_cfg, NULL);
 
 static ssize_t show_status(struct device *d, struct device_attribute *attr,
 			   char *buf)
@@ -3556,7 +3556,7 @@ static ssize_t show_status(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "0x%08x\n", (int)p->status);
 }
 
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status, 0444, show_status, NULL);
 
 static ssize_t show_capability(struct device *d, struct device_attribute *attr,
 			       char *buf)
@@ -3565,7 +3565,7 @@ static ssize_t show_capability(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "0x%08x\n", (int)p->capability);
 }
 
-static DEVICE_ATTR(capability, S_IRUGO, show_capability, NULL);
+static DEVICE_ATTR(capability, 0444, show_capability, NULL);
 
 #define IPW2100_REG(x) { IPW_ ##x, #x }
 static const struct {
@@ -3822,7 +3822,7 @@ static ssize_t show_registers(struct device *d, struct device_attribute *attr,
 	return out - buf;
 }
 
-static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
+static DEVICE_ATTR(registers, 0444, show_registers, NULL);
 
 static ssize_t show_hardware(struct device *d, struct device_attribute *attr,
 			     char *buf)
@@ -3863,7 +3863,7 @@ static ssize_t show_hardware(struct device *d, struct device_attribute *attr,
 	return out - buf;
 }
 
-static DEVICE_ATTR(hardware, S_IRUGO, show_hardware, NULL);
+static DEVICE_ATTR(hardware, 0444, show_hardware, NULL);
 
 static ssize_t show_memory(struct device *d, struct device_attribute *attr,
 			   char *buf)
@@ -3957,7 +3957,7 @@ static ssize_t store_memory(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(memory, S_IWUSR | S_IRUGO, show_memory, store_memory);
+static DEVICE_ATTR(memory, 0644, show_memory, store_memory);
 
 static ssize_t show_ordinals(struct device *d, struct device_attribute *attr,
 			     char *buf)
@@ -3993,7 +3993,7 @@ static ssize_t show_ordinals(struct device *d, struct device_attribute *attr,
 	return len;
 }
 
-static DEVICE_ATTR(ordinals, S_IRUGO, show_ordinals, NULL);
+static DEVICE_ATTR(ordinals, 0444, show_ordinals, NULL);
 
 static ssize_t show_stats(struct device *d, struct device_attribute *attr,
 			  char *buf)
@@ -4014,7 +4014,7 @@ static ssize_t show_stats(struct device *d, struct device_attribute *attr,
 	return out - buf;
 }
 
-static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
+static DEVICE_ATTR(stats, 0444, show_stats, NULL);
 
 static int ipw2100_switch_mode(struct ipw2100_priv *priv, u32 mode)
 {
@@ -4112,7 +4112,7 @@ static ssize_t show_internals(struct device *d, struct device_attribute *attr,
 	return len;
 }
 
-static DEVICE_ATTR(internals, S_IRUGO, show_internals, NULL);
+static DEVICE_ATTR(internals, 0444, show_internals, NULL);
 
 static ssize_t show_bssinfo(struct device *d, struct device_attribute *attr,
 			    char *buf)
@@ -4157,7 +4157,7 @@ static ssize_t show_bssinfo(struct device *d, struct device_attribute *attr,
 	return out - buf;
 }
 
-static DEVICE_ATTR(bssinfo, S_IRUGO, show_bssinfo, NULL);
+static DEVICE_ATTR(bssinfo, 0444, show_bssinfo, NULL);
 
 #ifdef CONFIG_IPW2100_DEBUG
 static ssize_t debug_level_show(struct device_driver *d, char *buf)
@@ -4216,8 +4216,7 @@ static ssize_t store_fatal_error(struct device *d,
 	return count;
 }
 
-static DEVICE_ATTR(fatal_error, S_IWUSR | S_IRUGO, show_fatal_error,
-		   store_fatal_error);
+static DEVICE_ATTR(fatal_error, 0644, show_fatal_error, store_fatal_error);
 
 static ssize_t show_scan_age(struct device *d, struct device_attribute *attr,
 			     char *buf)
@@ -4250,7 +4249,7 @@ static ssize_t store_scan_age(struct device *d, struct device_attribute *attr,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(scan_age, S_IWUSR | S_IRUGO, show_scan_age, store_scan_age);
+static DEVICE_ATTR(scan_age, 0644, show_scan_age, store_scan_age);
 
 static ssize_t show_rf_kill(struct device *d, struct device_attribute *attr,
 			    char *buf)
@@ -4304,7 +4303,7 @@ static ssize_t store_rf_kill(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(rf_kill, S_IWUSR | S_IRUGO, show_rf_kill, store_rf_kill);
+static DEVICE_ATTR(rf_kill, 0644, show_rf_kill, store_rf_kill);
 
 static struct attribute *ipw2100_sysfs_entries[] = {
 	&dev_attr_hardware.attr,
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 8da87496cb58..87a5e414c2f7 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -1303,7 +1303,7 @@ static ssize_t show_event_log(struct device *d,
 	return len;
 }
 
-static DEVICE_ATTR(event_log, S_IRUGO, show_event_log, NULL);
+static DEVICE_ATTR(event_log, 0444, show_event_log, NULL);
 
 static ssize_t show_error(struct device *d,
 			  struct device_attribute *attr, char *buf)
@@ -1351,7 +1351,7 @@ static ssize_t clear_error(struct device *d,
 	return count;
 }
 
-static DEVICE_ATTR(error, S_IRUGO | S_IWUSR, show_error, clear_error);
+static DEVICE_ATTR(error, 0644, show_error, clear_error);
 
 static ssize_t show_cmd_log(struct device *d,
 			    struct device_attribute *attr, char *buf)
@@ -1378,7 +1378,7 @@ static ssize_t show_cmd_log(struct device *d,
 	return len;
 }
 
-static DEVICE_ATTR(cmd_log, S_IRUGO, show_cmd_log, NULL);
+static DEVICE_ATTR(cmd_log, 0444, show_cmd_log, NULL);
 
 #ifdef CONFIG_IPW2200_PROMISCUOUS
 static void ipw_prom_free(struct ipw_priv *priv);
@@ -1443,8 +1443,7 @@ static ssize_t show_rtap_iface(struct device *d,
 	}
 }
 
-static DEVICE_ATTR(rtap_iface, S_IWUSR | S_IRUSR, show_rtap_iface,
-		   store_rtap_iface);
+static DEVICE_ATTR(rtap_iface, 0600, show_rtap_iface, store_rtap_iface);
 
 static ssize_t store_rtap_filter(struct device *d,
 			 struct device_attribute *attr,
@@ -1475,8 +1474,7 @@ static ssize_t show_rtap_filter(struct device *d,
 		       priv->prom_priv ? priv->prom_priv->filter : 0);
 }
 
-static DEVICE_ATTR(rtap_filter, S_IWUSR | S_IRUSR, show_rtap_filter,
-		   store_rtap_filter);
+static DEVICE_ATTR(rtap_filter, 0600, show_rtap_filter, store_rtap_filter);
 #endif
 
 static ssize_t show_scan_age(struct device *d, struct device_attribute *attr,
@@ -1520,7 +1518,7 @@ static ssize_t store_scan_age(struct device *d, struct device_attribute *attr,
 	return len;
 }
 
-static DEVICE_ATTR(scan_age, S_IWUSR | S_IRUGO, show_scan_age, store_scan_age);
+static DEVICE_ATTR(scan_age, 0644, show_scan_age, store_scan_age);
 
 static ssize_t show_led(struct device *d, struct device_attribute *attr,
 			char *buf)
@@ -1553,7 +1551,7 @@ static ssize_t store_led(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(led, S_IWUSR | S_IRUGO, show_led, store_led);
+static DEVICE_ATTR(led, 0644, show_led, store_led);
 
 static ssize_t show_status(struct device *d,
 			   struct device_attribute *attr, char *buf)
@@ -1562,7 +1560,7 @@ static ssize_t show_status(struct device *d,
 	return sprintf(buf, "0x%08x\n", (int)p->status);
 }
 
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status, 0444, show_status, NULL);
 
 static ssize_t show_cfg(struct device *d, struct device_attribute *attr,
 			char *buf)
@@ -1571,7 +1569,7 @@ static ssize_t show_cfg(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "0x%08x\n", (int)p->config);
 }
 
-static DEVICE_ATTR(cfg, S_IRUGO, show_cfg, NULL);
+static DEVICE_ATTR(cfg, 0444, show_cfg, NULL);
 
 static ssize_t show_nic_type(struct device *d,
 			     struct device_attribute *attr, char *buf)
@@ -1580,7 +1578,7 @@ static ssize_t show_nic_type(struct device *d,
 	return sprintf(buf, "TYPE: %d\n", priv->nic_type);
 }
 
-static DEVICE_ATTR(nic_type, S_IRUGO, show_nic_type, NULL);
+static DEVICE_ATTR(nic_type, 0444, show_nic_type, NULL);
 
 static ssize_t show_ucode_version(struct device *d,
 				  struct device_attribute *attr, char *buf)
@@ -1594,7 +1592,7 @@ static ssize_t show_ucode_version(struct device *d,
 	return sprintf(buf, "0x%08x\n", tmp);
 }
 
-static DEVICE_ATTR(ucode_version, S_IWUSR | S_IRUGO, show_ucode_version, NULL);
+static DEVICE_ATTR(ucode_version, 0644, show_ucode_version, NULL);
 
 static ssize_t show_rtc(struct device *d, struct device_attribute *attr,
 			char *buf)
@@ -1608,7 +1606,7 @@ static ssize_t show_rtc(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "0x%08x\n", tmp);
 }
 
-static DEVICE_ATTR(rtc, S_IWUSR | S_IRUGO, show_rtc, NULL);
+static DEVICE_ATTR(rtc, 0644, show_rtc, NULL);
 
 /*
  * Add a device attribute to view/control the delay between eeprom
@@ -1630,8 +1628,7 @@ static ssize_t store_eeprom_delay(struct device *d,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(eeprom_delay, S_IWUSR | S_IRUGO,
-		   show_eeprom_delay, store_eeprom_delay);
+static DEVICE_ATTR(eeprom_delay, 0644, show_eeprom_delay, store_eeprom_delay);
 
 static ssize_t show_command_event_reg(struct device *d,
 				      struct device_attribute *attr, char *buf)
@@ -1654,7 +1651,7 @@ static ssize_t store_command_event_reg(struct device *d,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(command_event_reg, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(command_event_reg, 0644,
 		   show_command_event_reg, store_command_event_reg);
 
 static ssize_t show_mem_gpio_reg(struct device *d,
@@ -1678,8 +1675,7 @@ static ssize_t store_mem_gpio_reg(struct device *d,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(mem_gpio_reg, S_IWUSR | S_IRUGO,
-		   show_mem_gpio_reg, store_mem_gpio_reg);
+static DEVICE_ATTR(mem_gpio_reg, 0644, show_mem_gpio_reg, store_mem_gpio_reg);
 
 static ssize_t show_indirect_dword(struct device *d,
 				   struct device_attribute *attr, char *buf)
@@ -1705,7 +1701,7 @@ static ssize_t store_indirect_dword(struct device *d,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(indirect_dword, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(indirect_dword, 0644,
 		   show_indirect_dword, store_indirect_dword);
 
 static ssize_t show_indirect_byte(struct device *d,
@@ -1732,7 +1728,7 @@ static ssize_t store_indirect_byte(struct device *d,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(indirect_byte, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(indirect_byte, 0644,
 		   show_indirect_byte, store_indirect_byte);
 
 static ssize_t show_direct_dword(struct device *d,
@@ -1759,8 +1755,7 @@ static ssize_t store_direct_dword(struct device *d,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(direct_dword, S_IWUSR | S_IRUGO,
-		   show_direct_dword, store_direct_dword);
+static DEVICE_ATTR(direct_dword, 0644, show_direct_dword, store_direct_dword);
 
 static int rf_kill_active(struct ipw_priv *priv)
 {
@@ -1831,7 +1826,7 @@ static ssize_t store_rf_kill(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(rf_kill, S_IWUSR | S_IRUGO, show_rf_kill, store_rf_kill);
+static DEVICE_ATTR(rf_kill, 0644, show_rf_kill, store_rf_kill);
 
 static ssize_t show_speed_scan(struct device *d, struct device_attribute *attr,
 			       char *buf)
@@ -1884,8 +1879,7 @@ static ssize_t store_speed_scan(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(speed_scan, S_IWUSR | S_IRUGO, show_speed_scan,
-		   store_speed_scan);
+static DEVICE_ATTR(speed_scan, 0644, show_speed_scan, store_speed_scan);
 
 static ssize_t show_net_stats(struct device *d, struct device_attribute *attr,
 			      char *buf)
@@ -1906,8 +1900,7 @@ static ssize_t store_net_stats(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(net_stats, S_IWUSR | S_IRUGO,
-		   show_net_stats, store_net_stats);
+static DEVICE_ATTR(net_stats, 0644, show_net_stats, store_net_stats);
 
 static ssize_t show_channels(struct device *d,
 			     struct device_attribute *attr,
@@ -1953,7 +1946,7 @@ static ssize_t show_channels(struct device *d,
 	return len;
 }
 
-static DEVICE_ATTR(channels, S_IRUSR, show_channels, NULL);
+static DEVICE_ATTR(channels, 0400, show_channels, NULL);
 
 static void notify_wx_assoc_event(struct ipw_priv *priv)
 {
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_module.c b/drivers/net/wireless/intel/ipw2x00/libipw_module.c
index c58c5b2dcce5..f00d45f54c76 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_module.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_module.c
@@ -276,7 +276,7 @@ static int __init libipw_init(void)
 				" proc directory\n");
 		return -EIO;
 	}
-	e = proc_create("debug_level", S_IRUGO | S_IWUSR, libipw_proc,
+	e = proc_create("debug_level", 0644, libipw_proc,
 			&debug_level_proc_fops);
 	if (!e) {
 		remove_proc_entry(DRV_PROCNAME, init_net.proc_net);
diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index 4b53ebf00c7f..62a9794f952b 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -3122,7 +3122,7 @@ il3945_store_debug_level(struct device *d, struct device_attribute *attr,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(debug_level, S_IWUSR | S_IRUGO, il3945_show_debug_level,
+static DEVICE_ATTR(debug_level, 0644, il3945_show_debug_level,
 		   il3945_store_debug_level);
 
 #endif /* CONFIG_IWLEGACY_DEBUG */
@@ -3139,7 +3139,7 @@ il3945_show_temperature(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "%d\n", il3945_hw_get_temperature(il));
 }
 
-static DEVICE_ATTR(temperature, S_IRUGO, il3945_show_temperature, NULL);
+static DEVICE_ATTR(temperature, 0444, il3945_show_temperature, NULL);
 
 static ssize_t
 il3945_show_tx_power(struct device *d, struct device_attribute *attr, char *buf)
@@ -3165,8 +3165,7 @@ il3945_store_tx_power(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(tx_power, S_IWUSR | S_IRUGO, il3945_show_tx_power,
-		   il3945_store_tx_power);
+static DEVICE_ATTR(tx_power, 0644, il3945_show_tx_power, il3945_store_tx_power);
 
 static ssize_t
 il3945_show_flags(struct device *d, struct device_attribute *attr, char *buf)
@@ -3199,8 +3198,7 @@ il3945_store_flags(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(flags, S_IWUSR | S_IRUGO, il3945_show_flags,
-		   il3945_store_flags);
+static DEVICE_ATTR(flags, 0644, il3945_show_flags, il3945_store_flags);
 
 static ssize_t
 il3945_show_filter_flags(struct device *d, struct device_attribute *attr,
@@ -3235,7 +3233,7 @@ il3945_store_filter_flags(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(filter_flags, S_IWUSR | S_IRUGO, il3945_show_filter_flags,
+static DEVICE_ATTR(filter_flags, 0644, il3945_show_filter_flags,
 		   il3945_store_filter_flags);
 
 static ssize_t
@@ -3306,7 +3304,7 @@ il3945_store_measurement(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(measurement, S_IRUSR | S_IWUSR, il3945_show_measurement,
+static DEVICE_ATTR(measurement, 0600, il3945_show_measurement,
 		   il3945_store_measurement);
 
 static ssize_t
@@ -3330,7 +3328,7 @@ il3945_show_retry_rate(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "%d", il->retry_rate);
 }
 
-static DEVICE_ATTR(retry_rate, S_IWUSR | S_IRUSR, il3945_show_retry_rate,
+static DEVICE_ATTR(retry_rate, 0600, il3945_show_retry_rate,
 		   il3945_store_retry_rate);
 
 static ssize_t
@@ -3340,7 +3338,7 @@ il3945_show_channels(struct device *d, struct device_attribute *attr, char *buf)
 	return 0;
 }
 
-static DEVICE_ATTR(channels, S_IRUSR, il3945_show_channels, NULL);
+static DEVICE_ATTR(channels, 0400, il3945_show_channels, NULL);
 
 static ssize_t
 il3945_show_antenna(struct device *d, struct device_attribute *attr, char *buf)
@@ -3377,8 +3375,7 @@ il3945_store_antenna(struct device *d, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(antenna, S_IWUSR | S_IRUGO, il3945_show_antenna,
-		   il3945_store_antenna);
+static DEVICE_ATTR(antenna, 0644, il3945_show_antenna, il3945_store_antenna);
 
 static ssize_t
 il3945_show_status(struct device *d, struct device_attribute *attr, char *buf)
@@ -3389,7 +3386,7 @@ il3945_show_status(struct device *d, struct device_attribute *attr, char *buf)
 	return sprintf(buf, "0x%08x\n", (int)il->status);
 }
 
-static DEVICE_ATTR(status, S_IRUGO, il3945_show_status, NULL);
+static DEVICE_ATTR(status, 0444, il3945_show_status, NULL);
 
 static ssize_t
 il3945_dump_error_log(struct device *d, struct device_attribute *attr,
@@ -3404,7 +3401,7 @@ il3945_dump_error_log(struct device *d, struct device_attribute *attr,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(dump_errors, S_IWUSR, NULL, il3945_dump_error_log);
+static DEVICE_ATTR(dump_errors, 0200, NULL, il3945_dump_error_log);
 
 /*****************************************************************************
  *
@@ -3943,18 +3940,18 @@ il3945_exit(void)
 
 MODULE_FIRMWARE(IL3945_MODULE_FIRMWARE(IL3945_UCODE_API_MAX));
 
-module_param_named(antenna, il3945_mod_params.antenna, int, S_IRUGO);
+module_param_named(antenna, il3945_mod_params.antenna, int, 0444);
 MODULE_PARM_DESC(antenna, "select antenna (1=Main, 2=Aux, default 0 [both])");
-module_param_named(swcrypto, il3945_mod_params.sw_crypto, int, S_IRUGO);
+module_param_named(swcrypto, il3945_mod_params.sw_crypto, int, 0444);
 MODULE_PARM_DESC(swcrypto, "using software crypto (default 1 [software])");
 module_param_named(disable_hw_scan, il3945_mod_params.disable_hw_scan, int,
-		   S_IRUGO);
+		   0444);
 MODULE_PARM_DESC(disable_hw_scan, "disable hardware scanning (default 1)");
 #ifdef CONFIG_IWLEGACY_DEBUG
-module_param_named(debug, il_debug_level, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug, il_debug_level, uint, 0644);
 MODULE_PARM_DESC(debug, "debug output mask");
 #endif
-module_param_named(fw_restart, il3945_mod_params.restart_fw, int, S_IRUGO);
+module_param_named(fw_restart, il3945_mod_params.restart_fw, int, 0444);
 MODULE_PARM_DESC(fw_restart, "restart firmware in case of error");
 
 module_exit(il3945_exit);
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index de63f2518f23..562e94870a9c 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -4591,7 +4591,7 @@ il4965_store_debug_level(struct device *d, struct device_attribute *attr,
 	return strnlen(buf, count);
 }
 
-static DEVICE_ATTR(debug_level, S_IWUSR | S_IRUGO, il4965_show_debug_level,
+static DEVICE_ATTR(debug_level, 0644, il4965_show_debug_level,
 		   il4965_store_debug_level);
 
 #endif /* CONFIG_IWLEGACY_DEBUG */
@@ -4608,7 +4608,7 @@ il4965_show_temperature(struct device *d, struct device_attribute *attr,
 	return sprintf(buf, "%d\n", il->temperature);
 }
 
-static DEVICE_ATTR(temperature, S_IRUGO, il4965_show_temperature, NULL);
+static DEVICE_ATTR(temperature, 0444, il4965_show_temperature, NULL);
 
 static ssize_t
 il4965_show_tx_power(struct device *d, struct device_attribute *attr, char *buf)
@@ -4642,7 +4642,7 @@ il4965_store_tx_power(struct device *d, struct device_attribute *attr,
 	return ret;
 }
 
-static DEVICE_ATTR(tx_power, S_IWUSR | S_IRUGO, il4965_show_tx_power,
+static DEVICE_ATTR(tx_power, 0644, il4965_show_tx_power,
 		   il4965_store_tx_power);
 
 static struct attribute *il_sysfs_entries[] = {
@@ -6859,18 +6859,17 @@ module_exit(il4965_exit);
 module_init(il4965_init);
 
 #ifdef CONFIG_IWLEGACY_DEBUG
-module_param_named(debug, il_debug_level, uint, S_IRUGO | S_IWUSR);
+module_param_named(debug, il_debug_level, uint, 0644);
 MODULE_PARM_DESC(debug, "debug output mask");
 #endif
 
-module_param_named(swcrypto, il4965_mod_params.sw_crypto, int, S_IRUGO);
+module_param_named(swcrypto, il4965_mod_params.sw_crypto, int, 0444);
 MODULE_PARM_DESC(swcrypto, "using crypto in software (default 0 [hardware])");
-module_param_named(queues_num, il4965_mod_params.num_of_queues, int, S_IRUGO);
+module_param_named(queues_num, il4965_mod_params.num_of_queues, int, 0444);
 MODULE_PARM_DESC(queues_num, "number of hw queues.");
-module_param_named(11n_disable, il4965_mod_params.disable_11n, int, S_IRUGO);
+module_param_named(11n_disable, il4965_mod_params.disable_11n, int, 0444);
 MODULE_PARM_DESC(11n_disable, "disable 11n functionality");
-module_param_named(amsdu_size_8K, il4965_mod_params.amsdu_size_8K, int,
-		   S_IRUGO);
+module_param_named(amsdu_size_8K, il4965_mod_params.amsdu_size_8K, int, 0444);
 MODULE_PARM_DESC(amsdu_size_8K, "enable 8K amsdu size (default 0 [disabled])");
-module_param_named(fw_restart, il4965_mod_params.restart_fw, int, S_IRUGO);
+module_param_named(fw_restart, il4965_mod_params.restart_fw, int, 0444);
 MODULE_PARM_DESC(fw_restart, "restart firmware in case of error");
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
index 365a4187fc37..54ff83829afb 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
@@ -2768,16 +2768,16 @@ il4965_rs_add_debugfs(void *il, void *il_sta, struct dentry *dir)
 {
 	struct il_lq_sta *lq_sta = il_sta;
 	lq_sta->rs_sta_dbgfs_scale_table_file =
-	    debugfs_create_file("rate_scale_table", S_IRUSR | S_IWUSR, dir,
+	    debugfs_create_file("rate_scale_table", 0600, dir,
 				lq_sta, &rs_sta_dbgfs_scale_table_ops);
 	lq_sta->rs_sta_dbgfs_stats_table_file =
-	    debugfs_create_file("rate_stats_table", S_IRUSR, dir, lq_sta,
+	    debugfs_create_file("rate_stats_table", 0400, dir, lq_sta,
 				&rs_sta_dbgfs_stats_table_ops);
 	lq_sta->rs_sta_dbgfs_rate_scale_data_file =
-	    debugfs_create_file("rate_scale_data", S_IRUSR, dir, lq_sta,
+	    debugfs_create_file("rate_scale_data", 0400, dir, lq_sta,
 				&rs_sta_dbgfs_rate_scale_data_ops);
 	lq_sta->rs_sta_dbgfs_tx_agg_tid_en_file =
-	    debugfs_create_u8("tx_agg_tid_enable", S_IRUSR | S_IWUSR, dir,
+	    debugfs_create_u8("tx_agg_tid_enable", 0600, dir,
 			      &lq_sta->tx_agg_tid_en);
 
 }
diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
index 558bb16bfd46..063e19ced7c8 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.c
+++ b/drivers/net/wireless/intel/iwlegacy/common.c
@@ -435,7 +435,7 @@ EXPORT_SYMBOL(il_send_cmd_pdu_async);
 
 /* default: IL_LED_BLINK(0) using blinking idx table */
 static int led_mode;
-module_param(led_mode, int, S_IRUGO);
+module_param(led_mode, int, 0444);
 MODULE_PARM_DESC(led_mode,
 		 "0=system default, " "1=On(RF On)/Off(RF Off), 2=blinking");
 
@@ -3372,7 +3372,7 @@ MODULE_LICENSE("GPL");
  * default: bt_coex_active = true (BT_COEX_ENABLE)
  */
 static bool bt_coex_active = true;
-module_param(bt_coex_active, bool, S_IRUGO);
+module_param(bt_coex_active, bool, 0444);
 MODULE_PARM_DESC(bt_coex_active, "enable wifi/bluetooth co-exist");
 
 u32 il_debug_level;
diff --git a/drivers/net/wireless/intel/iwlegacy/debug.c b/drivers/net/wireless/intel/iwlegacy/debug.c
index 6fc6b7ff9849..d76073def677 100644
--- a/drivers/net/wireless/intel/iwlegacy/debug.c
+++ b/drivers/net/wireless/intel/iwlegacy/debug.c
@@ -135,16 +135,14 @@ EXPORT_SYMBOL(il_update_stats);
 
 #define DEBUGFS_ADD_BOOL(name, parent, ptr) do {			\
 	struct dentry *__tmp;						\
-	__tmp = debugfs_create_bool(#name, S_IWUSR | S_IRUSR,		\
-				    parent, ptr);			\
+	__tmp = debugfs_create_bool(#name, 0600, parent, ptr);		\
 	if (IS_ERR(__tmp) || !__tmp)					\
 		goto err;						\
 } while (0)
 
 #define DEBUGFS_ADD_X32(name, parent, ptr) do {				\
 	struct dentry *__tmp;						\
-	__tmp = debugfs_create_x32(#name, S_IWUSR | S_IRUSR,		\
-				   parent, ptr);			\
+	__tmp = debugfs_create_x32(#name, 0600, parent, ptr);		\
 	if (IS_ERR(__tmp) || !__tmp)					\
 		goto err;						\
 } while (0)
@@ -1365,35 +1363,35 @@ il_dbgfs_register(struct il_priv *il, const char *name)
 	if (!dir_debug)
 		goto err;
 
-	DEBUGFS_ADD_FILE(nvm, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(sram, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(stations, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(channels, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(status, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(interrupt, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(qos, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(disable_ht40, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(rx_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(tx_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(rx_queue, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(tx_queue, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(power_save_status, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(clear_ucode_stats, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(clear_traffic_stats, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(fh_reg, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(missed_beacon, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(force_reset, dir_debug, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_rx_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_tx_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_general_stats, dir_debug, S_IRUSR);
+	DEBUGFS_ADD_FILE(nvm, dir_data, 0400);
+	DEBUGFS_ADD_FILE(sram, dir_data, 0600);
+	DEBUGFS_ADD_FILE(stations, dir_data, 0400);
+	DEBUGFS_ADD_FILE(channels, dir_data, 0400);
+	DEBUGFS_ADD_FILE(status, dir_data, 0400);
+	DEBUGFS_ADD_FILE(interrupt, dir_data, 0600);
+	DEBUGFS_ADD_FILE(qos, dir_data, 0400);
+	DEBUGFS_ADD_FILE(disable_ht40, dir_data, 0600);
+	DEBUGFS_ADD_FILE(rx_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(tx_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(rx_queue, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(tx_queue, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(power_save_status, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(clear_ucode_stats, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(clear_traffic_stats, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(fh_reg, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(missed_beacon, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(force_reset, dir_debug, 0600);
+	DEBUGFS_ADD_FILE(ucode_rx_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(ucode_tx_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(ucode_general_stats, dir_debug, 0400);
 
 	if (il->cfg->sensitivity_calib_by_driver)
-		DEBUGFS_ADD_FILE(sensitivity, dir_debug, S_IRUSR);
+		DEBUGFS_ADD_FILE(sensitivity, dir_debug, 0400);
 	if (il->cfg->chain_noise_calib_by_driver)
-		DEBUGFS_ADD_FILE(chain_noise, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(rxon_flags, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(rxon_filter_flags, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(wd_timeout, dir_debug, S_IWUSR);
+		DEBUGFS_ADD_FILE(chain_noise, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(rxon_flags, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(rxon_filter_flags, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(wd_timeout, dir_debug, 0200);
 	if (il->cfg->sensitivity_calib_by_driver)
 		DEBUGFS_ADD_BOOL(disable_sensitivity, dir_rf,
 				 &il->disable_sens_cal);
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c
index 482ac8fdc67b..096a07c5a33f 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/debugfs.c
@@ -48,16 +48,14 @@
 
 #define DEBUGFS_ADD_BOOL(name, parent, ptr) do {			\
 	struct dentry *__tmp;						\
-	__tmp = debugfs_create_bool(#name, S_IWUSR | S_IRUSR,		\
-				    parent, ptr);			\
+	__tmp = debugfs_create_bool(#name, 0600, parent, ptr);		\
 	if (IS_ERR(__tmp) || !__tmp)					\
 		goto err;						\
 } while (0)
 
 #define DEBUGFS_ADD_X32(name, parent, ptr) do {				\
 	struct dentry *__tmp;						\
-	__tmp = debugfs_create_x32(#name, S_IWUSR | S_IRUSR,		\
-				   parent, ptr);			\
+	__tmp = debugfs_create_x32(#name, 0600, parent, ptr);		\
 	if (IS_ERR(__tmp) || !__tmp)					\
 		goto err;						\
 } while (0)
@@ -2370,48 +2368,48 @@ int iwl_dbgfs_register(struct iwl_priv *priv, struct dentry *dbgfs_dir)
 	if (!dir_debug)
 		goto err;
 
-	DEBUGFS_ADD_FILE(nvm, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(sram, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(wowlan_sram, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(stations, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(channels, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(status, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(rx_handlers, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(qos, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(sleep_level_override, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(current_sleep_command, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(thermal_throttling, dir_data, S_IRUSR);
-	DEBUGFS_ADD_FILE(disable_ht40, dir_data, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(temperature, dir_data, S_IRUSR);
+	DEBUGFS_ADD_FILE(nvm, dir_data, 0400);
+	DEBUGFS_ADD_FILE(sram, dir_data, 0600);
+	DEBUGFS_ADD_FILE(wowlan_sram, dir_data, 0400);
+	DEBUGFS_ADD_FILE(stations, dir_data, 0400);
+	DEBUGFS_ADD_FILE(channels, dir_data, 0400);
+	DEBUGFS_ADD_FILE(status, dir_data, 0400);
+	DEBUGFS_ADD_FILE(rx_handlers, dir_data, 0600);
+	DEBUGFS_ADD_FILE(qos, dir_data, 0400);
+	DEBUGFS_ADD_FILE(sleep_level_override, dir_data, 0600);
+	DEBUGFS_ADD_FILE(current_sleep_command, dir_data, 0400);
+	DEBUGFS_ADD_FILE(thermal_throttling, dir_data, 0400);
+	DEBUGFS_ADD_FILE(disable_ht40, dir_data, 0600);
+	DEBUGFS_ADD_FILE(temperature, dir_data, 0400);
 
-	DEBUGFS_ADD_FILE(power_save_status, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(clear_ucode_statistics, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(missed_beacon, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(plcp_delta, dir_debug, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(rf_reset, dir_debug, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_rx_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_tx_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_general_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(txfifo_flush, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(protection_mode, dir_debug, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(sensitivity, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(chain_noise, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_tracing, dir_debug, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(ucode_bt_stats, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(reply_tx_error, dir_debug, S_IRUSR);
-	DEBUGFS_ADD_FILE(rxon_flags, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(rxon_filter_flags, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(echo_test, dir_debug, S_IWUSR);
-	DEBUGFS_ADD_FILE(fw_restart, dir_debug, S_IWUSR);
+	DEBUGFS_ADD_FILE(power_save_status, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(clear_ucode_statistics, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(missed_beacon, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(plcp_delta, dir_debug, 0600);
+	DEBUGFS_ADD_FILE(rf_reset, dir_debug, 0600);
+	DEBUGFS_ADD_FILE(ucode_rx_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(ucode_tx_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(ucode_general_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(txfifo_flush, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(protection_mode, dir_debug, 0600);
+	DEBUGFS_ADD_FILE(sensitivity, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(chain_noise, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(ucode_tracing, dir_debug, 0600);
+	DEBUGFS_ADD_FILE(ucode_bt_stats, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(reply_tx_error, dir_debug, 0400);
+	DEBUGFS_ADD_FILE(rxon_flags, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(rxon_filter_flags, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(echo_test, dir_debug, 0200);
+	DEBUGFS_ADD_FILE(fw_restart, dir_debug, 0200);
 #ifdef CONFIG_IWLWIFI_DEBUG
-	DEBUGFS_ADD_FILE(log_event, dir_debug, S_IWUSR | S_IRUSR);
+	DEBUGFS_ADD_FILE(log_event, dir_debug, 0600);
 #endif
 
 	if (iwl_advanced_bt_coexist(priv))
-		DEBUGFS_ADD_FILE(bt_traffic, dir_debug, S_IRUSR);
+		DEBUGFS_ADD_FILE(bt_traffic, dir_debug, 0400);
 
 	/* Calibrations disabled/enabled status*/
-	DEBUGFS_ADD_FILE(calib_disabled, dir_rf, S_IWUSR | S_IRUSR);
+	DEBUGFS_ADD_FILE(calib_disabled, dir_rf, 0600);
 
 	/*
 	 * Create a symlink with mac80211. This is not very robust, as it does
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
index ddcd8c2d66cd..98050d7be411 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
@@ -3276,17 +3276,17 @@ static void rs_add_debugfs(void *priv, void *priv_sta,
 {
 	struct iwl_lq_sta *lq_sta = priv_sta;
 	lq_sta->rs_sta_dbgfs_scale_table_file =
-		debugfs_create_file("rate_scale_table", S_IRUSR | S_IWUSR, dir,
-				lq_sta, &rs_sta_dbgfs_scale_table_ops);
+		debugfs_create_file("rate_scale_table", 0600, dir,
+				    lq_sta, &rs_sta_dbgfs_scale_table_ops);
 	lq_sta->rs_sta_dbgfs_stats_table_file =
-		debugfs_create_file("rate_stats_table", S_IRUSR, dir,
-			lq_sta, &rs_sta_dbgfs_stats_table_ops);
+		debugfs_create_file("rate_stats_table", 0400, dir,
+				    lq_sta, &rs_sta_dbgfs_stats_table_ops);
 	lq_sta->rs_sta_dbgfs_rate_scale_data_file =
-		debugfs_create_file("rate_scale_data", S_IRUSR, dir,
-			lq_sta, &rs_sta_dbgfs_rate_scale_data_ops);
+		debugfs_create_file("rate_scale_data", 0400, dir,
+				    lq_sta, &rs_sta_dbgfs_rate_scale_data_ops);
 	lq_sta->rs_sta_dbgfs_tx_agg_tid_en_file =
-		debugfs_create_u8("tx_agg_tid_enable", S_IRUSR | S_IWUSR, dir,
-		&lq_sta->tx_agg_tid_en);
+		debugfs_create_u8("tx_agg_tid_enable", 0600, dir,
+				  &lq_sta->tx_agg_tid_en);
 
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
index e2ded29a145d..baec2fbaaf68 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
@@ -187,7 +187,7 @@ int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
 			    struct dentry *dbgfs_dir)
 {
 	INIT_DELAYED_WORK(&fwrt->timestamp.wk, iwl_fw_timestamp_marker_wk);
-	FWRT_DEBUGFS_ADD_FILE(timestamp_marker, dbgfs_dir, S_IWUSR);
+	FWRT_DEBUGFS_ADD_FILE(timestamp_marker, dbgfs_dir, 0200);
 	return 0;
 err:
 	IWL_ERR(fwrt, "Can't create the fwrt debugfs directory\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 9c4a7f648a44..aa2d5c14e202 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1768,41 +1768,36 @@ static void __exit iwl_drv_exit(void)
 module_exit(iwl_drv_exit);
 
 #ifdef CONFIG_IWLWIFI_DEBUG
-module_param_named(debug, iwlwifi_mod_params.debug_level, uint,
-		   S_IRUGO | S_IWUSR);
+module_param_named(debug, iwlwifi_mod_params.debug_level, uint, 0644);
 MODULE_PARM_DESC(debug, "debug output mask");
 #endif
 
-module_param_named(swcrypto, iwlwifi_mod_params.swcrypto, int, S_IRUGO);
+module_param_named(swcrypto, iwlwifi_mod_params.swcrypto, int, 0444);
 MODULE_PARM_DESC(swcrypto, "using crypto in software (default 0 [hardware])");
-module_param_named(11n_disable, iwlwifi_mod_params.disable_11n, uint, S_IRUGO);
+module_param_named(11n_disable, iwlwifi_mod_params.disable_11n, uint, 0444);
 MODULE_PARM_DESC(11n_disable,
 	"disable 11n functionality, bitmap: 1: full, 2: disable agg TX, 4: disable agg RX, 8 enable agg TX");
-module_param_named(amsdu_size, iwlwifi_mod_params.amsdu_size,
-		   int, S_IRUGO);
+module_param_named(amsdu_size, iwlwifi_mod_params.amsdu_size, int, 0444);
 MODULE_PARM_DESC(amsdu_size,
 		 "amsdu size 0: 12K for multi Rx queue devices, 4K for other devices 1:4K 2:8K 3:12K (default 0)");
-module_param_named(fw_restart, iwlwifi_mod_params.fw_restart, bool, S_IRUGO);
+module_param_named(fw_restart, iwlwifi_mod_params.fw_restart, bool, 0444);
 MODULE_PARM_DESC(fw_restart, "restart firmware in case of error (default true)");
 
 module_param_named(antenna_coupling, iwlwifi_mod_params.antenna_coupling,
-		   int, S_IRUGO);
+		   int, 0444);
 MODULE_PARM_DESC(antenna_coupling,
 		 "specify antenna coupling in dB (default: 0 dB)");
 
-module_param_named(nvm_file, iwlwifi_mod_params.nvm_file, charp, S_IRUGO);
+module_param_named(nvm_file, iwlwifi_mod_params.nvm_file, charp, 0444);
 MODULE_PARM_DESC(nvm_file, "NVM file name");
 
-module_param_named(d0i3_disable, iwlwifi_mod_params.d0i3_disable,
-		   bool, S_IRUGO);
+module_param_named(d0i3_disable, iwlwifi_mod_params.d0i3_disable, bool, 0444);
 MODULE_PARM_DESC(d0i3_disable, "disable d0i3 functionality (default: Y)");
 
-module_param_named(lar_disable, iwlwifi_mod_params.lar_disable,
-		   bool, S_IRUGO);
+module_param_named(lar_disable, iwlwifi_mod_params.lar_disable, bool, 0444);
 MODULE_PARM_DESC(lar_disable, "disable LAR functionality (default: N)");
 
-module_param_named(uapsd_disable, iwlwifi_mod_params.uapsd_disable,
-		   uint, S_IRUGO | S_IWUSR);
+module_param_named(uapsd_disable, iwlwifi_mod_params.uapsd_disable, uint, 0644);
 MODULE_PARM_DESC(uapsd_disable,
 		 "disable U-APSD functionality bitmap 1: BSS 2: P2P Client (default: 3)");
 
@@ -1823,31 +1818,27 @@ MODULE_PARM_DESC(uapsd_disable,
  * default: bt_coex_active = true (BT_COEX_ENABLE)
  */
 module_param_named(bt_coex_active, iwlwifi_mod_params.bt_coex_active,
-		bool, S_IRUGO);
+		   bool, 0444);
 MODULE_PARM_DESC(bt_coex_active, "enable wifi/bt co-exist (default: enable)");
 
-module_param_named(led_mode, iwlwifi_mod_params.led_mode, int, S_IRUGO);
+module_param_named(led_mode, iwlwifi_mod_params.led_mode, int, 0444);
 MODULE_PARM_DESC(led_mode, "0=system default, "
 		"1=On(RF On)/Off(RF Off), 2=blinking, 3=Off (default: 0)");
 
-module_param_named(power_save, iwlwifi_mod_params.power_save,
-		bool, S_IRUGO);
+module_param_named(power_save, iwlwifi_mod_params.power_save, bool, 0444);
 MODULE_PARM_DESC(power_save,
 		 "enable WiFi power management (default: disable)");
 
-module_param_named(power_level, iwlwifi_mod_params.power_level,
-		int, S_IRUGO);
+module_param_named(power_level, iwlwifi_mod_params.power_level, int, 0444);
 MODULE_PARM_DESC(power_level,
 		 "default power save level (range from 1 - 5, default: 1)");
 
-module_param_named(fw_monitor, iwlwifi_mod_params.fw_monitor, bool, S_IRUGO);
+module_param_named(fw_monitor, iwlwifi_mod_params.fw_monitor, bool, 0444);
 MODULE_PARM_DESC(fw_monitor,
 		 "firmware monitor - to debug FW (default: false - needs lots of memory)");
 
-module_param_named(d0i3_timeout, iwlwifi_mod_params.d0i3_timeout,
-		   uint, S_IRUGO);
+module_param_named(d0i3_timeout, iwlwifi_mod_params.d0i3_timeout, uint, 0444);
 MODULE_PARM_DESC(d0i3_timeout, "Timeout to D0i3 entry when idle (ms)");
 
-module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool,
-		   S_IRUGO);
+module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
 MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 4228fac77f41..4e9d1792baf3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -1506,44 +1506,36 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
 	    ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
 	     (vif->type == NL80211_IFTYPE_STATION && vif->p2p)))
-		MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, S_IWUSR |
-					 S_IRUSR);
+		MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, 0600);
 
-	MVM_DEBUGFS_ADD_FILE_VIF(tx_pwr_lmt, mvmvif->dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(mac_params, mvmvif->dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(low_latency, mvmvif->dbgfs_dir,
-				 S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(uapsd_misbehaving, mvmvif->dbgfs_dir,
-				 S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(rx_phyinfo, mvmvif->dbgfs_dir,
-				 S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir,
-				 S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff,
-				 mvmvif->dbgfs_dir, S_IRUSR);
+	MVM_DEBUGFS_ADD_FILE_VIF(tx_pwr_lmt, mvmvif->dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE_VIF(mac_params, mvmvif->dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE_VIF(low_latency, mvmvif->dbgfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE_VIF(uapsd_misbehaving, mvmvif->dbgfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE_VIF(rx_phyinfo, mvmvif->dbgfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff, mvmvif->dbgfs_dir, 0400);
 
 	if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
 	    mvmvif == mvm->bf_allowed_vif)
-		MVM_DEBUGFS_ADD_FILE_VIF(bf_params, mvmvif->dbgfs_dir,
-					 S_IRUSR | S_IWUSR);
+		MVM_DEBUGFS_ADD_FILE_VIF(bf_params, mvmvif->dbgfs_dir, 0600);
 
 	if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TOF_SUPPORT) &&
 	    !vif->p2p && (vif->type != NL80211_IFTYPE_P2P_DEVICE)) {
 		if (IWL_MVM_TOF_IS_RESPONDER && vif->type == NL80211_IFTYPE_AP)
 			MVM_DEBUGFS_ADD_FILE_VIF(tof_responder_params,
-						 mvmvif->dbgfs_dir,
-						 S_IRUSR | S_IWUSR);
+						 mvmvif->dbgfs_dir, 0600);
 
 		MVM_DEBUGFS_ADD_FILE_VIF(tof_range_request, mvmvif->dbgfs_dir,
-					 S_IRUSR | S_IWUSR);
+					 0600);
 		MVM_DEBUGFS_ADD_FILE_VIF(tof_range_req_ext, mvmvif->dbgfs_dir,
-					 S_IRUSR | S_IWUSR);
+					 0600);
 		MVM_DEBUGFS_ADD_FILE_VIF(tof_enable, mvmvif->dbgfs_dir,
-					 S_IRUSR | S_IWUSR);
+					 0600);
 		MVM_DEBUGFS_ADD_FILE_VIF(tof_range_abort, mvmvif->dbgfs_dir,
-					 S_IRUSR | S_IWUSR);
+					 0600);
 		MVM_DEBUGFS_ADD_FILE_VIF(tof_range_response, mvmvif->dbgfs_dir,
-					 S_IRUSR);
+					 0400);
 	}
 
 	/*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 9c436d8d001d..0e6401cd7ccc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1914,7 +1914,7 @@ void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw,
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 
 	if (iwl_mvm_has_tlc_offload(mvm))
-		MVM_DEBUGFS_ADD_STA_FILE(rs_data, dir, S_IRUSR);
+		MVM_DEBUGFS_ADD_STA_FILE(rs_data, dir, 0400);
 
 	return;
 err:
@@ -1930,48 +1930,45 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
 
 	mvm->debugfs_dir = dbgfs_dir;
 
-	MVM_DEBUGFS_ADD_FILE(tx_flush, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(sta_drain, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(sram, mvm->debugfs_dir, S_IWUSR | S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(set_nic_temperature, mvm->debugfs_dir,
-			     S_IWUSR | S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(nic_temp, dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(ctdp_budget, dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(stop_ctdp, dbgfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(force_ctkill, dbgfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(stations, dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(bt_notif, dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(bt_cmd, dbgfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(disable_power_off, mvm->debugfs_dir,
-			     S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(fw_ver, mvm->debugfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(fw_rx_stats, mvm->debugfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(drv_rx_stats, mvm->debugfs_dir, S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(fw_restart, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(fw_nmi, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(bt_tx_prio, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(bt_force_ant, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(scan_ant_rxchain, mvm->debugfs_dir,
-			     S_IWUSR | S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(prph_reg, mvm->debugfs_dir, S_IWUSR | S_IRUSR);
-	MVM_DEBUGFS_ADD_FILE(d0i3_refs, mvm->debugfs_dir, S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(fw_dbg_conf, mvm->debugfs_dir, S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(fw_dbg_collect, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(max_amsdu_len, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, S_IWUSR);
+	MVM_DEBUGFS_ADD_FILE(tx_flush, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(sta_drain, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(sram, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(set_nic_temperature, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(nic_temp, dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(ctdp_budget, dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(stop_ctdp, dbgfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(force_ctkill, dbgfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(stations, dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(bt_notif, dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(bt_cmd, dbgfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(disable_power_off, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(fw_ver, mvm->debugfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(fw_rx_stats, mvm->debugfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(drv_rx_stats, mvm->debugfs_dir, 0400);
+	MVM_DEBUGFS_ADD_FILE(fw_restart, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(fw_nmi, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(bt_tx_prio, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(bt_force_ant, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(scan_ant_rxchain, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(prph_reg, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(d0i3_refs, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(fw_dbg_conf, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(fw_dbg_collect, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(max_amsdu_len, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, 0200);
+	MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, 0200);
 #ifdef CONFIG_ACPI
-	MVM_DEBUGFS_ADD_FILE(sar_geo_profile, dbgfs_dir, S_IRUSR);
+	MVM_DEBUGFS_ADD_FILE(sar_geo_profile, dbgfs_dir, 0400);
 #endif
 
 	if (!debugfs_create_bool("enable_scan_iteration_notif",
-				 S_IRUSR | S_IWUSR,
+				 0600,
 				 mvm->debugfs_dir,
 				 &mvm->scan_iter_notif_enabled))
 		goto err;
-	if (!debugfs_create_bool("drop_bcn_ap_mode", S_IRUSR | S_IWUSR,
+	if (!debugfs_create_bool("drop_bcn_ap_mode", 0600,
 				 mvm->debugfs_dir, &mvm->drop_bcn_ap_mode))
 		goto err;
 
@@ -1982,50 +1979,49 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
 		if (!bcast_dir)
 			goto err;
 
-		if (!debugfs_create_bool("override", S_IRUSR | S_IWUSR,
-				bcast_dir,
-				&mvm->dbgfs_bcast_filtering.override))
+		if (!debugfs_create_bool("override", 0600,
+					 bcast_dir,
+					 &mvm->dbgfs_bcast_filtering.override))
 			goto err;
 
 		MVM_DEBUGFS_ADD_FILE_ALIAS("filters", bcast_filters,
-					   bcast_dir, S_IWUSR | S_IRUSR);
+					   bcast_dir, 0600);
 		MVM_DEBUGFS_ADD_FILE_ALIAS("macs", bcast_filters_macs,
-					   bcast_dir, S_IWUSR | S_IRUSR);
+					   bcast_dir, 0600);
 	}
 #endif
 
 #ifdef CONFIG_PM_SLEEP
-	MVM_DEBUGFS_ADD_FILE(d3_sram, mvm->debugfs_dir, S_IRUSR | S_IWUSR);
-	MVM_DEBUGFS_ADD_FILE(d3_test, mvm->debugfs_dir, S_IRUSR);
-	if (!debugfs_create_bool("d3_wake_sysassert", S_IRUSR | S_IWUSR,
+	MVM_DEBUGFS_ADD_FILE(d3_sram, mvm->debugfs_dir, 0600);
+	MVM_DEBUGFS_ADD_FILE(d3_test, mvm->debugfs_dir, 0400);
+	if (!debugfs_create_bool("d3_wake_sysassert", 0600,
 				 mvm->debugfs_dir, &mvm->d3_wake_sysassert))
 		goto err;
-	if (!debugfs_create_u32("last_netdetect_scans", S_IRUSR,
+	if (!debugfs_create_u32("last_netdetect_scans", 0400,
 				mvm->debugfs_dir, &mvm->last_netdetect_scans))
 		goto err;
 #endif
 
-	if (!debugfs_create_u8("ps_disabled", S_IRUSR,
+	if (!debugfs_create_u8("ps_disabled", 0400,
 			       mvm->debugfs_dir, &mvm->ps_disabled))
 		goto err;
-	if (!debugfs_create_blob("nvm_hw", S_IRUSR,
-				  mvm->debugfs_dir, &mvm->nvm_hw_blob))
+	if (!debugfs_create_blob("nvm_hw", 0400,
+				 mvm->debugfs_dir, &mvm->nvm_hw_blob))
 		goto err;
-	if (!debugfs_create_blob("nvm_sw", S_IRUSR,
-				  mvm->debugfs_dir, &mvm->nvm_sw_blob))
+	if (!debugfs_create_blob("nvm_sw", 0400,
+				 mvm->debugfs_dir, &mvm->nvm_sw_blob))
 		goto err;
-	if (!debugfs_create_blob("nvm_calib", S_IRUSR,
-				  mvm->debugfs_dir, &mvm->nvm_calib_blob))
+	if (!debugfs_create_blob("nvm_calib", 0400,
+				 mvm->debugfs_dir, &mvm->nvm_calib_blob))
 		goto err;
-	if (!debugfs_create_blob("nvm_prod", S_IRUSR,
-				  mvm->debugfs_dir, &mvm->nvm_prod_blob))
+	if (!debugfs_create_blob("nvm_prod", 0400,
+				 mvm->debugfs_dir, &mvm->nvm_prod_blob))
 		goto err;
-	if (!debugfs_create_blob("nvm_phy_sku", S_IRUSR,
+	if (!debugfs_create_blob("nvm_phy_sku", 0400,
 				 mvm->debugfs_dir, &mvm->nvm_phy_sku_blob))
 		goto err;
 
-	debugfs_create_file("mem", S_IRUSR | S_IWUSR, dbgfs_dir, mvm,
-			    &iwl_dbgfs_mem_ops);
+	debugfs_create_file("mem", 0600, dbgfs_dir, mvm, &iwl_dbgfs_mem_ops);
 
 	/*
 	 * Create a symlink with mac80211. It will be removed when mac80211
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index ab7fb5aad984..224bfa1bcf53 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -104,14 +104,14 @@ struct iwl_mvm_mod_params iwlmvm_mod_params = {
 	/* rest of fields are 0 by default */
 };
 
-module_param_named(init_dbg, iwlmvm_mod_params.init_dbg, bool, S_IRUGO);
+module_param_named(init_dbg, iwlmvm_mod_params.init_dbg, bool, 0444);
 MODULE_PARM_DESC(init_dbg,
 		 "set to true to debug an ASSERT in INIT fw (default: false");
-module_param_named(power_scheme, iwlmvm_mod_params.power_scheme, int, S_IRUGO);
+module_param_named(power_scheme, iwlmvm_mod_params.power_scheme, int, 0444);
 MODULE_PARM_DESC(power_scheme,
 		 "power management scheme: 1-active, 2-balanced, 3-low power, default: 2");
 module_param_named(tfd_q_hang_detect, iwlmvm_mod_params.tfd_q_hang_detect,
-		   bool, S_IRUGO);
+		   bool, 0444);
 MODULE_PARM_DESC(tfd_q_hang_detect,
 		 "TFD queues hang detection (default: true");
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 47f4c7a1d80d..5d776ec1840f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -4010,18 +4010,18 @@ static void rs_drv_add_sta_debugfs(void *mvm, void *priv_sta,
 	if (!mvmsta->vif)
 		return;
 
-	debugfs_create_file("rate_scale_table", S_IRUSR | S_IWUSR, dir,
+	debugfs_create_file("rate_scale_table", 0600, dir,
 			    lq_sta, &rs_sta_dbgfs_scale_table_ops);
-	debugfs_create_file("rate_stats_table", S_IRUSR, dir,
+	debugfs_create_file("rate_stats_table", 0400, dir,
 			    lq_sta, &rs_sta_dbgfs_stats_table_ops);
-	debugfs_create_file("drv_tx_stats", S_IRUSR | S_IWUSR, dir,
+	debugfs_create_file("drv_tx_stats", 0600, dir,
 			    lq_sta, &rs_sta_dbgfs_drv_tx_stats_ops);
-	debugfs_create_u8("tx_agg_tid_enable", S_IRUSR | S_IWUSR, dir,
+	debugfs_create_u8("tx_agg_tid_enable", 0600, dir,
 			  &lq_sta->tx_agg_tid_en);
-	debugfs_create_u8("reduced_tpc", S_IRUSR | S_IWUSR, dir,
+	debugfs_create_u8("reduced_tpc", 0600, dir,
 			  &lq_sta->pers.dbg_fixed_txp_reduction);
 
-	MVM_DEBUGFS_ADD_FILE_RS(ss_force, dir, S_IRUSR | S_IWUSR);
+	MVM_DEBUGFS_ADD_FILE_RS(ss_force, dir, 0600);
 	return;
 err:
 	IWL_ERR((struct iwl_mvm *)mvm, "Can't create debugfs entity\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index b406b536c850..f8a0234d332c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -2616,12 +2616,12 @@ int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
 {
 	struct dentry *dir = trans->dbgfs_dir;
 
-	DEBUGFS_ADD_FILE(rx_queue, dir, S_IRUSR);
-	DEBUGFS_ADD_FILE(tx_queue, dir, S_IRUSR);
-	DEBUGFS_ADD_FILE(interrupt, dir, S_IWUSR | S_IRUSR);
-	DEBUGFS_ADD_FILE(csr, dir, S_IWUSR);
-	DEBUGFS_ADD_FILE(fh_reg, dir, S_IRUSR);
-	DEBUGFS_ADD_FILE(rfkill, dir, S_IWUSR | S_IRUSR);
+	DEBUGFS_ADD_FILE(rx_queue, dir, 0400);
+	DEBUGFS_ADD_FILE(tx_queue, dir, 0400);
+	DEBUGFS_ADD_FILE(interrupt, dir, 0600);
+	DEBUGFS_ADD_FILE(csr, dir, 0200);
+	DEBUGFS_ADD_FILE(fh_reg, dir, 0400);
+	DEBUGFS_ADD_FILE(rfkill, dir, 0600);
 	return 0;
 
 err:
diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c
index ab6d39e12069..1c6d428515a4 100644
--- a/drivers/net/wireless/intersil/p54/main.c
+++ b/drivers/net/wireless/intersil/p54/main.c
@@ -27,7 +27,7 @@
 #include "lmac.h"
 
 static bool modparam_nohwcrypt;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 MODULE_AUTHOR("Michael Wu <flamingice@sourmilk.net>");
 MODULE_DESCRIPTION("Softmac Prism54 common code");
diff --git a/drivers/net/wireless/mediatek/mt76/debugfs.c b/drivers/net/wireless/mediatek/mt76/debugfs.c
index c121b502a462..a38d05dea599 100644
--- a/drivers/net/wireless/mediatek/mt76/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/debugfs.c
@@ -64,13 +64,13 @@ struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
 	if (!dir)
 		return NULL;
 
-	debugfs_create_u8("led_pin", S_IRUSR | S_IWUSR, dir, &dev->led_pin);
-	debugfs_create_u32("regidx", S_IRUSR | S_IWUSR, dir, &dev->debugfs_reg);
-	debugfs_create_file_unsafe("regval", S_IRUSR | S_IWUSR, dir, dev,
+	debugfs_create_u8("led_pin", 0600, dir, &dev->led_pin);
+	debugfs_create_u32("regidx", 0600, dir, &dev->debugfs_reg);
+	debugfs_create_file_unsafe("regval", 0600, dir, dev,
 				   &fops_regval);
-	debugfs_create_blob("eeprom", S_IRUSR, dir, &dev->eeprom);
+	debugfs_create_blob("eeprom", 0400, dir, &dev->eeprom);
 	if (dev->otp.data)
-		debugfs_create_blob("otp", S_IRUSR, dir, &dev->otp);
+		debugfs_create_blob("otp", 0400, dir, &dev->otp);
 	debugfs_create_devm_seqfile(dev->dev, "queues", dir, mt76_queues_read);
 
 	return dir;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_debugfs.c b/drivers/net/wireless/mediatek/mt76/mt76x2_debugfs.c
index 612feb593d7d..955ea3e692dd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_debugfs.c
@@ -123,11 +123,11 @@ void mt76x2_init_debugfs(struct mt76x2_dev *dev)
 	if (!dir)
 		return;
 
-	debugfs_create_u8("temperature", S_IRUSR, dir, &dev->cal.temp);
-	debugfs_create_bool("tpc", S_IRUSR | S_IWUSR, dir, &dev->enable_tpc);
+	debugfs_create_u8("temperature", 0400, dir, &dev->cal.temp);
+	debugfs_create_bool("tpc", 0600, dir, &dev->enable_tpc);
 
-	debugfs_create_file("ampdu_stat", S_IRUSR, dir, dev, &fops_ampdu_stat);
-	debugfs_create_file("dfs_stats", S_IRUSR, dir, dev, &fops_dfs_stat);
+	debugfs_create_file("ampdu_stat", 0400, dir, dev, &fops_ampdu_stat);
+	debugfs_create_file("dfs_stats", 0400, dir, dev, &fops_dfs_stat);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "txpower", dir,
 				    read_txpower);
 }
diff --git a/drivers/net/wireless/mediatek/mt7601u/debugfs.c b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
index fc008475a03b..991a6a729b1e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
@@ -160,13 +160,11 @@ void mt7601u_init_debugfs(struct mt7601u_dev *dev)
 	if (!dir)
 		return;
 
-	debugfs_create_u8("temperature", S_IRUSR, dir, &dev->raw_temp);
-	debugfs_create_u32("temp_mode", S_IRUSR, dir, &dev->temp_mode);
+	debugfs_create_u8("temperature", 0400, dir, &dev->raw_temp);
+	debugfs_create_u32("temp_mode", 0400, dir, &dev->temp_mode);
 
-	debugfs_create_u32("regidx", S_IRUSR | S_IWUSR, dir, &dev->debugfs_reg);
-	debugfs_create_file("regval", S_IRUSR | S_IWUSR, dir, dev,
-			    &fops_regval);
-	debugfs_create_file("ampdu_stat", S_IRUSR, dir, dev, &fops_ampdu_stat);
-	debugfs_create_file("eeprom_param", S_IRUSR, dir, dev,
-			    &fops_eeprom_param);
+	debugfs_create_u32("regidx", 0600, dir, &dev->debugfs_reg);
+	debugfs_create_file("regval", 0600, dir, dev, &fops_regval);
+	debugfs_create_file("ampdu_stat", 0400, dir, dev, &fops_ampdu_stat);
+	debugfs_create_file("eeprom_param", 0400, dir, dev, &fops_eeprom_param);
 }
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
index f4b48b77c491..3df8c4b895e7 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
@@ -37,7 +37,7 @@
  * Allow hardware encryption to be disabled.
  */
 static bool modparam_nohwcrypt;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 /*
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c
index 5cf655ff1430..1172eefd1c1a 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800pci.c
@@ -49,7 +49,7 @@
  * Allow hardware encryption to be disabled.
  */
 static bool modparam_nohwcrypt = false;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 static bool rt2800pci_hwcrypt_disabled(struct rt2x00_dev *rt2x00dev)
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c
index a985a5a7945e..6848ebc83534 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800soc.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800soc.c
@@ -41,7 +41,7 @@
 
 /* Allow hardware encryption to be disabled. */
 static bool modparam_nohwcrypt;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 static bool rt2800soc_hwcrypt_disabled(struct rt2x00_dev *rt2x00dev)
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
index 24fc6d2045ef..d901a41d36e4 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
@@ -43,7 +43,7 @@
  * Allow hardware encryption to be disabled.
  */
 static bool modparam_nohwcrypt;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 static bool rt2800usb_hwcrypt_disabled(struct rt2x00_dev *rt2x00dev)
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
index ac2572943ed0..0eee479583b8 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
@@ -606,7 +606,7 @@ static struct dentry *rt2x00debug_create_file_driver(const char *name,
 	data += sprintf(data, "version:\t%s\n", DRV_VERSION);
 	blob->size = strlen(blob->data);
 
-	return debugfs_create_blob(name, S_IRUSR, intf->driver_folder, blob);
+	return debugfs_create_blob(name, 0400, intf->driver_folder, blob);
 }
 
 static struct dentry *rt2x00debug_create_file_chipset(const char *name,
@@ -647,7 +647,7 @@ static struct dentry *rt2x00debug_create_file_chipset(const char *name,
 
 	blob->size = strlen(blob->data);
 
-	return debugfs_create_blob(name, S_IRUSR, intf->driver_folder, blob);
+	return debugfs_create_blob(name, 0400, intf->driver_folder, blob);
 }
 
 void rt2x00debug_register(struct rt2x00_dev *rt2x00dev)
@@ -682,13 +682,13 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev)
 	if (IS_ERR(intf->chipset_entry) || !intf->chipset_entry)
 		goto exit;
 
-	intf->dev_flags = debugfs_create_file("dev_flags", S_IRUSR,
+	intf->dev_flags = debugfs_create_file("dev_flags", 0400,
 					      intf->driver_folder, intf,
 					      &rt2x00debug_fop_dev_flags);
 	if (IS_ERR(intf->dev_flags) || !intf->dev_flags)
 		goto exit;
 
-	intf->cap_flags = debugfs_create_file("cap_flags", S_IRUSR,
+	intf->cap_flags = debugfs_create_file("cap_flags", 0400,
 					      intf->driver_folder, intf,
 					      &rt2x00debug_fop_cap_flags);
 	if (IS_ERR(intf->cap_flags) || !intf->cap_flags)
@@ -699,27 +699,28 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev)
 	if (IS_ERR(intf->register_folder) || !intf->register_folder)
 		goto exit;
 
-#define RT2X00DEBUGFS_CREATE_REGISTER_ENTRY(__intf, __name)			\
-({										\
-	if (debug->__name.read) {						\
-		(__intf)->__name##_off_entry =					\
-		debugfs_create_u32(__stringify(__name) "_offset",		\
-				       S_IRUSR | S_IWUSR,			\
-				       (__intf)->register_folder,		\
-				       &(__intf)->offset_##__name);		\
-		if (IS_ERR((__intf)->__name##_off_entry)			\
-				|| !(__intf)->__name##_off_entry)		\
-			goto exit;						\
-										\
-		(__intf)->__name##_val_entry =					\
-		debugfs_create_file(__stringify(__name) "_value",		\
-					S_IRUSR | S_IWUSR,			\
-					(__intf)->register_folder,		\
-					(__intf), &rt2x00debug_fop_##__name);	\
-		if (IS_ERR((__intf)->__name##_val_entry)			\
-				|| !(__intf)->__name##_val_entry)		\
-			goto exit;						\
-	}									\
+#define RT2X00DEBUGFS_CREATE_REGISTER_ENTRY(__intf, __name)		\
+({									\
+	if (debug->__name.read) {					\
+		(__intf)->__name##_off_entry =				\
+			debugfs_create_u32(__stringify(__name) "_offset", \
+					   0600,			\
+					   (__intf)->register_folder,	\
+					   &(__intf)->offset_##__name);	\
+		if (IS_ERR((__intf)->__name##_off_entry) ||		\
+		    !(__intf)->__name##_off_entry)			\
+			goto exit;					\
+									\
+		(__intf)->__name##_val_entry =				\
+			debugfs_create_file(__stringify(__name) "_value", \
+					    0600,			\
+					    (__intf)->register_folder,	\
+					    (__intf),			\
+					    &rt2x00debug_fop_##__name); \
+		if (IS_ERR((__intf)->__name##_val_entry) ||		\
+		    !(__intf)->__name##_val_entry)			\
+			goto exit;					\
+	}								\
 })
 
 	RT2X00DEBUGFS_CREATE_REGISTER_ENTRY(intf, csr);
@@ -736,8 +737,8 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev)
 		goto exit;
 
 	intf->queue_frame_dump_entry =
-	    debugfs_create_file("dump", S_IRUSR, intf->queue_folder,
-				intf, &rt2x00debug_fop_queue_dump);
+		debugfs_create_file("dump", 0400, intf->queue_folder,
+				    intf, &rt2x00debug_fop_queue_dump);
 	if (IS_ERR(intf->queue_frame_dump_entry)
 		|| !intf->queue_frame_dump_entry)
 		goto exit;
@@ -746,14 +747,15 @@ void rt2x00debug_register(struct rt2x00_dev *rt2x00dev)
 	init_waitqueue_head(&intf->frame_dump_waitqueue);
 
 	intf->queue_stats_entry =
-	    debugfs_create_file("queue", S_IRUSR, intf->queue_folder,
-				intf, &rt2x00debug_fop_queue_stats);
+		debugfs_create_file("queue", 0400, intf->queue_folder,
+				    intf, &rt2x00debug_fop_queue_stats);
 
 #ifdef CONFIG_RT2X00_LIB_CRYPTO
 	if (rt2x00_has_cap_hw_crypto(rt2x00dev))
 		intf->crypto_stats_entry =
-		    debugfs_create_file("crypto", S_IRUGO, intf->queue_folder,
-					intf, &rt2x00debug_fop_crypto_stats);
+			debugfs_create_file("crypto", 0444, intf->queue_folder,
+					    intf,
+					    &rt2x00debug_fop_crypto_stats);
 #endif
 
 	return;
diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c
index 234310200759..cb0e1196f2c2 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c
@@ -40,7 +40,7 @@
  * Allow hardware encryption to be disabled.
  */
 static bool modparam_nohwcrypt = false;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 /*
diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
index 9a212823f42c..319ec4f2d9d2 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
@@ -38,7 +38,7 @@
  * Allow hardware encryption to be disabled.
  */
 static bool modparam_nohwcrypt;
-module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO);
+module_param_named(nohwcrypt, modparam_nohwcrypt, bool, 0444);
 MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption.");
 
 /*
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 0133fcd4601b..7f9b16b97ea3 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2815,9 +2815,11 @@ static int __init init_ray_cs(void)
 	proc_mkdir("driver/ray_cs", NULL);
 
 	proc_create("driver/ray_cs/ray_cs", 0, NULL, &ray_cs_proc_fops);
-	proc_create("driver/ray_cs/essid", S_IWUSR, NULL, &ray_cs_essid_proc_fops);
-	proc_create_data("driver/ray_cs/net_type", S_IWUSR, NULL, &int_proc_fops, &net_type);
-	proc_create_data("driver/ray_cs/translate", S_IWUSR, NULL, &int_proc_fops, &translate);
+	proc_create("driver/ray_cs/essid", 0200, NULL, &ray_cs_essid_proc_fops);
+	proc_create_data("driver/ray_cs/net_type", 0200, NULL, &int_proc_fops,
+			 &net_type);
+	proc_create_data("driver/ray_cs/translate", 0200, NULL, &int_proc_fops,
+			 &translate);
 #endif
 	if (translate != 0)
 		translate = 1;
diff --git a/drivers/net/wireless/st/cw1200/debug.c b/drivers/net/wireless/st/cw1200/debug.c
index 34f97c31eecf..295cb1a29f25 100644
--- a/drivers/net/wireless/st/cw1200/debug.c
+++ b/drivers/net/wireless/st/cw1200/debug.c
@@ -398,15 +398,15 @@ int cw1200_debug_init(struct cw1200_common *priv)
 	if (!d->debugfs_phy)
 		goto err;
 
-	if (!debugfs_create_file("status", S_IRUSR, d->debugfs_phy,
+	if (!debugfs_create_file("status", 0400, d->debugfs_phy,
 				 priv, &fops_status))
 		goto err;
 
-	if (!debugfs_create_file("counters", S_IRUSR, d->debugfs_phy,
+	if (!debugfs_create_file("counters", 0400, d->debugfs_phy,
 				 priv, &fops_counters))
 		goto err;
 
-	if (!debugfs_create_file("wsm_dumps", S_IWUSR, d->debugfs_phy,
+	if (!debugfs_create_file("wsm_dumps", 0200, d->debugfs_phy,
 				 priv, &fops_wsm_dumps))
 		goto err;
 
diff --git a/drivers/net/wireless/st/cw1200/main.c b/drivers/net/wireless/st/cw1200/main.c
index a186d1df1f29..90dc979f260b 100644
--- a/drivers/net/wireless/st/cw1200/main.c
+++ b/drivers/net/wireless/st/cw1200/main.c
@@ -46,7 +46,7 @@ MODULE_ALIAS("cw1200_core");
 
 /* Accept MAC address of the form macaddr=0x00,0x80,0xE1,0x30,0x40,0x50 */
 static u8 cw1200_mac_template[ETH_ALEN] = {0x02, 0x80, 0xe1, 0x00, 0x00, 0x00};
-module_param_array_named(macaddr, cw1200_mac_template, byte, NULL, S_IRUGO);
+module_param_array_named(macaddr, cw1200_mac_template, byte, NULL, 0444);
 MODULE_PARM_DESC(macaddr, "Override platform_data MAC address");
 
 static char *cw1200_sdd_path;
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c
index 0cf3b4013dd6..ca0f936fc119 100644
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -2092,54 +2092,51 @@ static struct platform_driver wl18xx_driver = {
 };
 
 module_platform_driver(wl18xx_driver);
-module_param_named(ht_mode, ht_mode_param, charp, S_IRUSR);
+module_param_named(ht_mode, ht_mode_param, charp, 0400);
 MODULE_PARM_DESC(ht_mode, "Force HT mode: wide or siso20");
 
-module_param_named(board_type, board_type_param, charp, S_IRUSR);
+module_param_named(board_type, board_type_param, charp, 0400);
 MODULE_PARM_DESC(board_type, "Board type: fpga, hdk (default), evb, com8 or "
 		 "dvp");
 
-module_param_named(checksum, checksum_param, bool, S_IRUSR);
+module_param_named(checksum, checksum_param, bool, 0400);
 MODULE_PARM_DESC(checksum, "Enable TCP checksum: boolean (defaults to false)");
 
-module_param_named(dc2dc, dc2dc_param, int, S_IRUSR);
+module_param_named(dc2dc, dc2dc_param, int, 0400);
 MODULE_PARM_DESC(dc2dc, "External DC2DC: u8 (defaults to 0)");
 
-module_param_named(n_antennas_2, n_antennas_2_param, int, S_IRUSR);
+module_param_named(n_antennas_2, n_antennas_2_param, int, 0400);
 MODULE_PARM_DESC(n_antennas_2,
 		 "Number of installed 2.4GHz antennas: 1 (default) or 2");
 
-module_param_named(n_antennas_5, n_antennas_5_param, int, S_IRUSR);
+module_param_named(n_antennas_5, n_antennas_5_param, int, 0400);
 MODULE_PARM_DESC(n_antennas_5,
 		 "Number of installed 5GHz antennas: 1 (default) or 2");
 
-module_param_named(low_band_component, low_band_component_param, int,
-		   S_IRUSR);
+module_param_named(low_band_component, low_band_component_param, int, 0400);
 MODULE_PARM_DESC(low_band_component, "Low band component: u8 "
 		 "(default is 0x01)");
 
 module_param_named(low_band_component_type, low_band_component_type_param,
-		   int, S_IRUSR);
+		   int, 0400);
 MODULE_PARM_DESC(low_band_component_type, "Low band component type: u8 "
 		 "(default is 0x05 or 0x06 depending on the board_type)");
 
-module_param_named(high_band_component, high_band_component_param, int,
-		   S_IRUSR);
+module_param_named(high_band_component, high_band_component_param, int, 0400);
 MODULE_PARM_DESC(high_band_component, "High band component: u8, "
 		 "(default is 0x01)");
 
 module_param_named(high_band_component_type, high_band_component_type_param,
-		   int, S_IRUSR);
+		   int, 0400);
 MODULE_PARM_DESC(high_band_component_type, "High band component type: u8 "
 		 "(default is 0x09)");
 
 module_param_named(pwr_limit_reference_11_abg,
-		   pwr_limit_reference_11_abg_param, int, S_IRUSR);
+		   pwr_limit_reference_11_abg_param, int, 0400);
 MODULE_PARM_DESC(pwr_limit_reference_11_abg, "Power limit reference: u8 "
 		 "(default is 0xc8)");
 
-module_param_named(num_rx_desc,
-		   num_rx_desc_param, int, S_IRUSR);
+module_param_named(num_rx_desc, num_rx_desc_param, int, 0400);
 MODULE_PARM_DESC(num_rx_desc_param,
 		 "Number of Rx descriptors: u8 (default is 32)");
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 09714034dbf1..3a51ab116e79 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -6630,20 +6630,20 @@ EXPORT_SYMBOL_GPL(wlcore_remove);
 
 u32 wl12xx_debug_level = DEBUG_NONE;
 EXPORT_SYMBOL_GPL(wl12xx_debug_level);
-module_param_named(debug_level, wl12xx_debug_level, uint, S_IRUSR | S_IWUSR);
+module_param_named(debug_level, wl12xx_debug_level, uint, 0600);
 MODULE_PARM_DESC(debug_level, "wl12xx debugging level");
 
 module_param_named(fwlog, fwlog_param, charp, 0);
 MODULE_PARM_DESC(fwlog,
 		 "FW logger options: continuous, dbgpins or disable");
 
-module_param(fwlog_mem_blocks, int, S_IRUSR | S_IWUSR);
+module_param(fwlog_mem_blocks, int, 0600);
 MODULE_PARM_DESC(fwlog_mem_blocks, "fwlog mem_blocks");
 
-module_param(bug_on_recovery, int, S_IRUSR | S_IWUSR);
+module_param(bug_on_recovery, int, 0600);
 MODULE_PARM_DESC(bug_on_recovery, "BUG() on fw recovery");
 
-module_param(no_recovery, int, S_IRUSR | S_IWUSR);
+module_param(no_recovery, int, 0600);
 MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck.");
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index f8a1fea64e25..1f727babbea0 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -469,7 +469,7 @@ static void __exit wl1271_exit(void)
 module_init(wl1271_init);
 module_exit(wl1271_exit);
 
-module_param(dump, bool, S_IRUSR | S_IWUSR);
+module_param(dump, bool, 0600);
 MODULE_PARM_DESC(dump, "Enable sdio read/write dumps.");
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/wireless/ti/wlcore/sysfs.c b/drivers/net/wireless/ti/wlcore/sysfs.c
index b72e2101488b..d31eb775e023 100644
--- a/drivers/net/wireless/ti/wlcore/sysfs.c
+++ b/drivers/net/wireless/ti/wlcore/sysfs.c
@@ -80,7 +80,7 @@ static ssize_t wl1271_sysfs_store_bt_coex_state(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(bt_coex_state, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(bt_coex_state, 0644,
 		   wl1271_sysfs_show_bt_coex_state,
 		   wl1271_sysfs_store_bt_coex_state);
 
@@ -103,8 +103,7 @@ static ssize_t wl1271_sysfs_show_hw_pg_ver(struct device *dev,
 	return len;
 }
 
-static DEVICE_ATTR(hw_pg_ver, S_IRUGO,
-		   wl1271_sysfs_show_hw_pg_ver, NULL);
+static DEVICE_ATTR(hw_pg_ver, 0444, wl1271_sysfs_show_hw_pg_ver, NULL);
 
 static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj,
 				       struct bin_attribute *bin_attr,
@@ -139,7 +138,7 @@ static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj,
 }
 
 static const struct bin_attribute fwlog_attr = {
-	.attr = {.name = "fwlog", .mode = S_IRUSR},
+	.attr = { .name = "fwlog", .mode = 0400 },
 	.read = wl1271_sysfs_read_fwlog,
 };
 

From 6c20495b7debf1f937027526306df3d5daeeb7bc Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <gbhat@marvell.com>
Date: Thu, 15 Mar 2018 11:50:32 +0530
Subject: [PATCH 1293/1678] mwifiex: remove warnings in
 mwifiex_cmd_append_11n_tlv()

Fix the following sparse warning in mwifiex_cmd_append_11n_tlv:

drivers/net/wireless/marvell/mwifiex/11n.c:358:65: warning: invalid assignment: &=
drivers/net/wireless/marvell/mwifiex/11n.c:358:65:    left side has type restricted __le16
drivers/net/wireless/marvell/mwifiex/11n.c:358:65:    right side has type int
drivers/net/wireless/marvell/mwifiex/11n.c:360:65: warning: invalid assignment: &=
drivers/net/wireless/marvell/mwifiex/11n.c:360:65:    left side has type restricted __le16
drivers/net/wireless/marvell/mwifiex/11n.c:360:65:    right side has type int
drivers/net/wireless/marvell/mwifiex/11n.c:366:65: warning: invalid assignment: &=
drivers/net/wireless/marvell/mwifiex/11n.c:366:65:    left side has type restricted __le16
drivers/net/wireless/marvell/mwifiex/11n.c:366:65:    right side has type int
drivers/net/wireless/marvell/mwifiex/11n.c:368:65: warning: invalid assignment: &=
drivers/net/wireless/marvell/mwifiex/11n.c:368:65:    left side has type restricted __le16
drivers/net/wireless/marvell/mwifiex/11n.c:368:65:    right side has type int

Fixes: 77423fa73927 ("mwifiex: fix incorrect ht capability problem")
Signed-off-by: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/11n.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index feebfdcf025a..5d75c971004b 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -356,17 +356,19 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv,
 			case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 				if (chan->flags & IEEE80211_CHAN_NO_HT40PLUS) {
 					ht_cap->ht_cap.cap_info &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+					cpu_to_le16
+					(~IEEE80211_HT_CAP_SUP_WIDTH_20_40);
 					ht_cap->ht_cap.cap_info &=
-					~IEEE80211_HT_CAP_SGI_40;
+					cpu_to_le16(~IEEE80211_HT_CAP_SGI_40);
 				}
 				break;
 			case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
 				if (chan->flags & IEEE80211_CHAN_NO_HT40MINUS) {
 					ht_cap->ht_cap.cap_info &=
-					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+					cpu_to_le16
+					(~IEEE80211_HT_CAP_SUP_WIDTH_20_40);
 					ht_cap->ht_cap.cap_info &=
-					~IEEE80211_HT_CAP_SGI_40;
+					cpu_to_le16(~IEEE80211_HT_CAP_SGI_40);
 				}
 				break;
 			}

From 44f98a9332e4b97b0215a1c38e647bc0cd8e5a5f Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Thu, 15 Mar 2018 13:31:25 +1100
Subject: [PATCH 1294/1678] rsi: Remove stack VLA usage

The use of stack Variable Length Arrays needs to be avoided, as they
can be a vector for stack exhaustion, which can be both a runtime bug
(kernel Oops) or a security flaw (overwriting memory beyond the
stack). Also, in general, as code evolves it is easy to lose track of
how big a VLA can get. Thus, we can end up having runtime failures
that are hard to debug. As part of the directive[1] to remove all VLAs
from the kernel, and build with -Wvla.

Currently rsi code uses a VLA based on a function argument to
`rsi_sdio_load_data_master_write()`.  The function call chain is

Both these functions

	rsi_sdio_reinit_device()
	rsi_probe()

start the call chain:

	rsi_hal_device_init()
	rsi_load_fw()
	auto_fw_upgrade()
	ping_pong_write()
	rsi_sdio_load_data_master_write()

[Without familiarity with the code] it appears that none of the 4 locks

	mutex
	rx_mutex
	tx_mutex
	tx_bus_mutex

are held when `rsi_sdio_load_data_master_write()` is called.  It is therefore
safe to use kmalloc with GFP_KERNEL.

We can avoid using the VLA by using `kmalloc()` and free'ing the memory on all
exit paths.

Change buffer from 'u8 array' to 'u8 *'.  Call `kmalloc()` to allocate memory for
the buffer.  Using goto statement to call `kfree()` on all return paths.

It can be expected that this patch will result in a small increase in overhead
due to the use of `kmalloc()` however this code is only called on initialization
(and re-initialization) so this overhead should not degrade performance.

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 98c7d1dae18e..13705fca59dd 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -576,7 +576,7 @@ static int rsi_sdio_load_data_master_write(struct rsi_hw *adapter,
 {
 	u32 num_blocks, offset, i;
 	u16 msb_address, lsb_address;
-	u8 temp_buf[block_size];
+	u8 *temp_buf;
 	int status;
 
 	num_blocks = instructions_sz / block_size;
@@ -585,11 +585,15 @@ static int rsi_sdio_load_data_master_write(struct rsi_hw *adapter,
 	rsi_dbg(INFO_ZONE, "ins_size: %d, num_blocks: %d\n",
 		instructions_sz, num_blocks);
 
+	temp_buf = kmalloc(block_size, GFP_KERNEL);
+	if (!temp_buf)
+		return -ENOMEM;
+
 	/* Loading DM ms word in the sdio slave */
 	status = rsi_sdio_master_access_msword(adapter, msb_address);
 	if (status < 0) {
 		rsi_dbg(ERR_ZONE, "%s: Unable to set ms word reg\n", __func__);
-		return status;
+		goto out_free;
 	}
 
 	for (offset = 0, i = 0; i < num_blocks; i++, offset += block_size) {
@@ -601,7 +605,7 @@ static int rsi_sdio_load_data_master_write(struct rsi_hw *adapter,
 					 temp_buf, block_size);
 		if (status < 0) {
 			rsi_dbg(ERR_ZONE, "%s: failed to write\n", __func__);
-			return status;
+			goto out_free;
 		}
 		rsi_dbg(INFO_ZONE, "%s: loading block: %d\n", __func__, i);
 		base_address += block_size;
@@ -616,7 +620,7 @@ static int rsi_sdio_load_data_master_write(struct rsi_hw *adapter,
 				rsi_dbg(ERR_ZONE,
 					"%s: Unable to set ms word reg\n",
 					__func__);
-				return status;
+				goto out_free;
 			}
 		}
 	}
@@ -632,12 +636,16 @@ static int rsi_sdio_load_data_master_write(struct rsi_hw *adapter,
 					 temp_buf,
 					 instructions_sz % block_size);
 		if (status < 0)
-			return status;
+			goto out_free;
 		rsi_dbg(INFO_ZONE,
 			"Written Last Block in Address 0x%x Successfully\n",
 			offset | RSI_SD_REQUEST_MASTER);
 	}
-	return 0;
+
+	status = 0;
+out_free:
+	kfree(temp_buf);
+	return status;
 }
 
 #define FLASH_SIZE_ADDR                 0x04000016

From a31f9314114cc884b9979c16b6e147e37ac729d6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 14 Mar 2018 14:12:39 +0000
Subject: [PATCH 1295/1678] rsi: remove redundant duplicate assignment of
 buffer_size

Variable buffer_size is re-assigned the same value, this duplicated
assignment is redundant and can be removed.

Cleans up clang warning:
drivers/net/wireless/rsi/rsi_91x_usb.c:140:4: warning: Value stored
to 'buffer_size' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index be8236f404b5..7b8bae313aa9 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -140,7 +140,6 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface,
 			buffer_size = endpoint->wMaxPacketSize;
 			dev->bulkout_endpoint_addr[bout_found] =
 				endpoint->bEndpointAddress;
-			buffer_size = endpoint->wMaxPacketSize;
 			dev->bulkout_size[bout_found] = buffer_size;
 			bout_found++;
 		}

From 90b12aebe54b9157a28f5131203f48ad943ba79a Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Date: Tue, 20 Mar 2018 19:10:40 +0530
Subject: [PATCH 1296/1678] rsi: fix error path handling in SDIO probe

We miss to release IRQ in certain error path in SDIO probe which
causes following kernel panic. This patch corrects error path
handling

BUG: unable to handle kernel NULL pointer dereference at        (null)
IP:           (null)
PGD 0 P4D 0
Oops: 0010 [#1] SMP PTI
Call Trace:
 <IRQ>
 ? call_timer_fn+0x29/0x120
 ? run_timer_softirq+0x1da/0x420
 ? timer_interrupt+0x11/0x20
 ? __do_softirq+0xef/0x26e
 ? irq_exit+0xbe/0xd0
 ? do_IRQ+0x4a/0xc0
 ? common_interrupt+0xa2/0xa2
 </IRQ>
 ? cpuidle_enter_state+0x118/0x250
 ? do_idle+0x186/0x1e0
 ? cpu_startup_entry+0x6f/0x80
 ? start_kernel+0x47c/0x49c
 ? secondary_startup_64+0xa5/0xb0

Fixes: 50117605770c ("rsi: improve RX handling in SDIO interface")
Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 13705fca59dd..9ce2edb77cc0 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -967,7 +967,7 @@ static int rsi_probe(struct sdio_func *pfunction,
 				    rsi_sdio_rx_thread, "SDIO-RX-Thread");
 	if (status) {
 		rsi_dbg(ERR_ZONE, "%s: Unable to init rx thrd\n", __func__);
-		goto fail_free_adapter;
+		goto fail_kill_thread;
 	}
 	skb_queue_head_init(&sdev->rx_q.head);
 	sdev->rx_q.num_rx_pkts = 0;
@@ -977,7 +977,7 @@ static int rsi_probe(struct sdio_func *pfunction,
 		rsi_dbg(ERR_ZONE, "%s: Failed to request IRQ\n", __func__);
 		sdio_release_host(pfunction);
 		status = -EIO;
-		goto fail_kill_thread;
+		goto fail_claim_irq;
 	}
 	sdio_release_host(pfunction);
 	rsi_dbg(INIT_ZONE, "%s: Registered Interrupt handler\n", __func__);
@@ -985,7 +985,7 @@ static int rsi_probe(struct sdio_func *pfunction,
 	if (rsi_hal_device_init(adapter)) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in device init\n", __func__);
 		status = -EINVAL;
-		goto fail_kill_thread;
+		goto fail_dev_init;
 	}
 	rsi_dbg(INFO_ZONE, "===> RSI Device Init Done <===\n");
 
@@ -1002,10 +1002,13 @@ static int rsi_probe(struct sdio_func *pfunction,
 fail_dev_init:
 	sdio_claim_host(pfunction);
 	sdio_release_irq(pfunction);
+	sdio_release_host(pfunction);
+fail_claim_irq:
+	rsi_kill_thread(&sdev->rx_thread);
+fail_kill_thread:
+	sdio_claim_host(pfunction);
 	sdio_disable_func(pfunction);
 	sdio_release_host(pfunction);
-fail_kill_thread:
-	rsi_kill_thread(&sdev->rx_thread);
 fail_free_adapter:
 	rsi_91x_deinit(adapter);
 	rsi_dbg(ERR_ZONE, "%s: Failed in probe...Exiting\n", __func__);

From 864db4d5085349fcfa1f260b5bcd2adde3d7f2ed Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Date: Tue, 20 Mar 2018 19:10:41 +0530
Subject: [PATCH 1297/1678] rsi: fix kernel panic observed on 64bit machine

Following kernel panic is observed on 64bit machine while loading
the driver. It is fixed if we pass dynamically allocated memory to
SDIO for DMA.

BUG: unable to handle kernel paging request at ffffeb04000172e0
IP: sg_miter_stop+0x56/0x70
PGD 0 P4D 0
Oops: 0000 [#1] SMP PTI
Modules linked in: rsi_sdio(OE+) rsi_91x(OE) btrsi(OE) rfcomm bluetooth
ecdh_generic mac80211 mmc_block fuse xt_CHECKSUM iptable_mangle
drm_kms_helper mmc_core serio_raw drm firewire_ohci tg3
CPU: 0 PID: 4003 Comm: insmod Tainted: G           OE    4.16.0-rc1+ #27
Hardware name: Dell Inc. Latitude E5500                  /0DW634, BIOS
A19 06/13/2013
RIP: 0010:sg_miter_stop+0x56/0x70
RSP: 0018:ffff88007d003e78 EFLAGS: 00010002
RAX: 0000000000000003 RBX: 0000000000000004 RCX: 0000000000000000
RDX: ffffeb04000172c0 RSI: ffff88002f58002c RDI: ffff88007d003e80
RBP: 0000000000000004 R08: ffff88007d003e80 R09: 0000000000000008
R10: 0000000000000003 R11: 0000000000000001 R12: 0000000000000004
R13: ffff88002f580028 R14: 0000000000000000 R15: 0000000000000004
FS:  00007f35c29db700(0000) GS:ffff88007d000000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffeb04000172e0 CR3: 000000007038e000 CR4: 00000000000406f0
Call Trace:
<IRQ>
sg_copy_buffer+0xc6/0xf0
sdhci_tasklet_finish+0x170/0x260 [sdhci]
tasklet_action+0xf4/0x100
__do_softirq+0xef/0x26e
irq_exit+0xbe/0xd0
do_IRQ+0x4a/0xc0
common_interrupt+0xa2/0xa2
</IRQ>

Signed-off-by: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 32 ++++++++++++++++---------
 drivers/net/wireless/rsi/rsi_sdio.h     |  2 ++
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 9ce2edb77cc0..d76e69c0beaa 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -653,11 +653,14 @@ static int rsi_sdio_master_reg_read(struct rsi_hw *adapter, u32 addr,
 				    u32 *read_buf, u16 size)
 {
 	u32 addr_on_bus, *data;
-	u32 align[2] = {};
 	u16 ms_addr;
 	int status;
 
-	data = PTR_ALIGN(&align[0], 8);
+	data = kzalloc(RSI_MASTER_REG_BUF_SIZE, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data = PTR_ALIGN(data, 8);
 
 	ms_addr = (addr >> 16);
 	status = rsi_sdio_master_access_msword(adapter, ms_addr);
@@ -665,7 +668,7 @@ static int rsi_sdio_master_reg_read(struct rsi_hw *adapter, u32 addr,
 		rsi_dbg(ERR_ZONE,
 			"%s: Unable to set ms word to common reg\n",
 			__func__);
-		return status;
+		goto err;
 	}
 	addr &= 0xFFFF;
 
@@ -683,7 +686,7 @@ static int rsi_sdio_master_reg_read(struct rsi_hw *adapter, u32 addr,
 					 (u8 *)data, 4);
 	if (status < 0) {
 		rsi_dbg(ERR_ZONE, "%s: AHB register read failed\n", __func__);
-		return status;
+		goto err;
 	}
 	if (size == 2) {
 		if ((addr & 0x3) == 0)
@@ -705,17 +708,23 @@ static int rsi_sdio_master_reg_read(struct rsi_hw *adapter, u32 addr,
 		*read_buf = *data;
 	}
 
-	return 0;
+err:
+	kfree(data);
+	return status;
 }
 
 static int rsi_sdio_master_reg_write(struct rsi_hw *adapter,
 				     unsigned long addr,
 				     unsigned long data, u16 size)
 {
-	unsigned long data1[2], *data_aligned;
+	unsigned long *data_aligned;
 	int status;
 
-	data_aligned = PTR_ALIGN(&data1[0], 8);
+	data_aligned = kzalloc(RSI_MASTER_REG_BUF_SIZE, GFP_KERNEL);
+	if (!data_aligned)
+		return -ENOMEM;
+
+	data_aligned = PTR_ALIGN(data_aligned, 8);
 
 	if (size == 2) {
 		*data_aligned = ((data << 16) | (data & 0xFFFF));
@@ -734,6 +743,7 @@ static int rsi_sdio_master_reg_write(struct rsi_hw *adapter,
 		rsi_dbg(ERR_ZONE,
 			"%s: Unable to set ms word to common reg\n",
 			__func__);
+		kfree(data_aligned);
 		return -EIO;
 	}
 	addr = addr & 0xFFFF;
@@ -743,12 +753,12 @@ static int rsi_sdio_master_reg_write(struct rsi_hw *adapter,
 					(adapter,
 					 (addr | RSI_SD_REQUEST_MASTER),
 					 (u8 *)data_aligned, size);
-	if (status < 0) {
+	if (status < 0)
 		rsi_dbg(ERR_ZONE,
 			"%s: Unable to do AHB reg write\n", __func__);
-		return status;
-	}
-	return 0;
+
+	kfree(data_aligned);
+	return status;
 }
 
 /**
diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h
index ba649be284af..ead8e7c4df3a 100644
--- a/drivers/net/wireless/rsi/rsi_sdio.h
+++ b/drivers/net/wireless/rsi/rsi_sdio.h
@@ -46,6 +46,8 @@ enum sdio_interrupt_type {
 #define PKT_BUFF_AVAILABLE                      1
 #define FW_ASSERT_IND                           2
 
+#define RSI_MASTER_REG_BUF_SIZE			12
+
 #define RSI_DEVICE_BUFFER_STATUS_REGISTER       0xf3
 #define RSI_FN1_INT_REGISTER                    0xf9
 #define RSI_INT_ENABLE_REGISTER			0x04

From 91b9e6844240fc79e1155ac6fe36d2cebc292718 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:40 +0800
Subject: [PATCH 1298/1678] rtlwifi: Add modifier static to functions reported
 by sparse

sparse reports some functions were not declared, so add 'static' as
modifier. Remove an unused function btc8821a1ant_is_wifi_status_changed()
exposed due to 'static'.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../rtlwifi/btcoexist/halbtc8723b1ant.c       |  1 +
 .../rtlwifi/btcoexist/halbtc8723b2ant.c       |  6 ++--
 .../rtlwifi/btcoexist/halbtc8821a1ant.c       | 33 -------------------
 .../rtlwifi/btcoexist/halbtc8821a2ant.c       |  4 +--
 4 files changed, 6 insertions(+), 38 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
index 05beb16f0a0a..59553db020ef 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
@@ -1436,6 +1436,7 @@ static void halbtc8723b1ant_ps_tdma(struct btc_coexist *btcoexist,
 	coex_dm->pre_ps_tdma = coex_dm->cur_ps_tdma;
 }
 
+static
 void btc8723b1ant_tdma_dur_adj_for_acl(struct btc_coexist *btcoexist,
 				       u8 wifi_status)
 {
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
index 4907c2ffadfe..73ec31972944 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b2ant.c
@@ -833,9 +833,9 @@ static void btc8723b2ant_set_sw_fulltime_dac_swing(struct btc_coexist *btcoex,
 		btc8723b2ant_set_dac_swing_reg(btcoex, 0x18);
 }
 
-void btc8723b2ant_dac_swing(struct btc_coexist *btcoexist,
-			    bool force_exec, bool dac_swing_on,
-			    u32 dac_swing_lvl)
+static void btc8723b2ant_dac_swing(struct btc_coexist *btcoexist,
+				   bool force_exec, bool dac_swing_on,
+				   u32 dac_swing_lvl)
 {
 	struct rtl_priv *rtlpriv = btcoexist->adapter;
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c
index 0b26419881c0..202597cf8915 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a1ant.c
@@ -426,39 +426,6 @@ static void btc8821a1ant_query_bt_info(struct btc_coexist *btcoexist)
 	btcoexist->btc_fill_h2c(btcoexist, 0x61, 1, h2c_parameter);
 }
 
-bool btc8821a1ant_is_wifi_status_changed(struct btc_coexist *btcoexist)
-{
-	static bool pre_wifi_busy = true;
-	static bool pre_under_4way = true;
-	static bool pre_bt_hs_on = true;
-	bool wifi_busy = false, under_4way = false, bt_hs_on = false;
-	bool wifi_connected = false;
-
-	btcoexist->btc_get(btcoexist, BTC_GET_BL_WIFI_CONNECTED,
-			   &wifi_connected);
-	btcoexist->btc_get(btcoexist, BTC_GET_BL_WIFI_BUSY, &wifi_busy);
-	btcoexist->btc_get(btcoexist, BTC_GET_BL_HS_OPERATION, &bt_hs_on);
-	btcoexist->btc_get(btcoexist, BTC_GET_BL_WIFI_4_WAY_PROGRESS,
-			   &under_4way);
-
-	if (wifi_connected) {
-		if (wifi_busy != pre_wifi_busy) {
-			pre_wifi_busy = wifi_busy;
-			return true;
-		}
-		if (under_4way != pre_under_4way) {
-			pre_under_4way = under_4way;
-			return true;
-		}
-		if (bt_hs_on != pre_bt_hs_on) {
-			pre_bt_hs_on = bt_hs_on;
-			return true;
-		}
-	}
-
-	return false;
-}
-
 static void btc8821a1ant_update_bt_link_info(struct btc_coexist *btcoexist)
 {
 	struct btc_bt_link_info	*bt_link_info = &btcoexist->bt_link_info;
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
index d5f282cb9d24..2202d5e18977 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8821a2ant.c
@@ -359,7 +359,7 @@ static void btc8821a2ant_query_bt_info(struct btc_coexist *btcoexist)
 	btcoexist->btc_fill_h2c(btcoexist, 0x61, 1, h2c_parameter);
 }
 
-bool btc8821a2ant_is_wifi_status_changed(struct btc_coexist *btcoexist)
+static bool btc8821a2ant_is_wifi_status_changed(struct btc_coexist *btcoexist)
 {
 	static bool pre_wifi_busy = true;
 	static bool pre_under_4way = true;
@@ -1517,7 +1517,7 @@ static void btc8821a2ant_action_bt_inquiry(struct btc_coexist *btcoexist)
 	btc8821a2ant_sw_mechanism2(btcoexist, false, false, false, 0x18);
 }
 
-void btc8821a2ant_action_wifi_link_process(struct btc_coexist *btcoexist)
+static void btc8821a2ant_action_wifi_link_process(struct btc_coexist *btcoexist)
 {
 	struct rtl_priv *rtlpriv = btcoexist->adapter;
 	u8 u8tmpa, u8tmpb;

From 757a9eb8df1f224991a3358b59781d842ebce4ff Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:41 +0800
Subject: [PATCH 1299/1678] rtlwifi: remove redundant statement found by static
 checker

smatch reports "rtl_is_special_data() warn: inconsistent indenting", but
it is an obvious redundant statement so remove it.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 6db3389e2ced..762a29cdf7ad 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1549,7 +1549,6 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx,
 		/* EAPOL is seens as in-4way */
 		rtlpriv->btcoexist.btc_info.in_4way = true;
 		rtlpriv->btcoexist.btc_info.in_4way_ts = jiffies;
-	rtlpriv->btcoexist.btc_info.in_4way_ts = jiffies;
 
 		RT_TRACE(rtlpriv, (COMP_SEND | COMP_RECV), DBG_DMESG,
 			 "802.1X %s EAPOL pkt!!\n", (is_tx) ? "Tx" : "Rx");

From 5ffd1155d536d337058dfc677b1256e7d11e4e59 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:42 +0800
Subject: [PATCH 1300/1678] rtlwifi: btcoex: Add enum DM_INFO for btcoex to
 query dm's counters

btcoex uses dm's counters to check the environment is noisy or not. If it
is clean, btcoex set more time slots to WiFi so that it can transmit as
soon as possible. That will save time, and BT will have more time slots
after WiFi traffic is complete.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/wifi.h | 32 +++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 4f48b934ec01..438678c2d14a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -977,6 +977,38 @@ enum rtl_spec_ver {
 	RTL_SPEC_EXT_C2H = BIT(2),	/* extend FW C2H (e.g. TX REPORT) */
 };
 
+enum dm_info_query {
+	DM_INFO_FA_OFDM,
+	DM_INFO_FA_CCK,
+	DM_INFO_FA_TOTAL,
+	DM_INFO_CCA_OFDM,
+	DM_INFO_CCA_CCK,
+	DM_INFO_CCA_ALL,
+	DM_INFO_CRC32_OK_VHT,
+	DM_INFO_CRC32_OK_HT,
+	DM_INFO_CRC32_OK_LEGACY,
+	DM_INFO_CRC32_OK_CCK,
+	DM_INFO_CRC32_ERROR_VHT,
+	DM_INFO_CRC32_ERROR_HT,
+	DM_INFO_CRC32_ERROR_LEGACY,
+	DM_INFO_CRC32_ERROR_CCK,
+	DM_INFO_EDCCA_FLAG,
+	DM_INFO_OFDM_ENABLE,
+	DM_INFO_CCK_ENABLE,
+	DM_INFO_CRC32_OK_HT_AGG,
+	DM_INFO_CRC32_ERROR_HT_AGG,
+	DM_INFO_DBG_PORT_0,
+	DM_INFO_CURR_IGI,
+	DM_INFO_RSSI_MIN,
+	DM_INFO_RSSI_MAX,
+	DM_INFO_CLM_RATIO,
+	DM_INFO_NHM_RATIO,
+	DM_INFO_IQK_ALL,
+	DM_INFO_IQK_OK,
+	DM_INFO_IQK_NG,
+	DM_INFO_SIZE,
+};
+
 struct octet_string {
 	u8 *octet;
 	u16 length;

From a1ee1a09cbba06a5927030df466536d556914e0e Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:43 +0800
Subject: [PATCH 1301/1678] rtlwifi: btcoex: Add customer_id to do special deal
 to oem vendor

Add customer_id field in structure, and then this is a clue to implement
the behavior defined by vendor LENOVO_CHINA.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h | 1 +
 drivers/net/wireless/realtek/rtlwifi/wifi.h                   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index f5d8159a88eb..3c6a1e8851ec 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -154,6 +154,7 @@ struct btc_board_info {
 
 	u8 rfe_type;
 	u8 ant_div_cfg;
+	u8 customer_id;
 };
 
 enum btc_dbg_opcode {
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 438678c2d14a..d27e33960e77 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -556,6 +556,7 @@ enum rt_oem_id {
 	RT_CID_NETGEAR = 36,
 	RT_CID_PLANEX = 37,
 	RT_CID_CC_C = 38,
+	RT_CID_LENOVO_CHINA = 40,
 };
 
 enum hw_descs {

From 123068f2eb30ad4e05dc9d0732d8cc05a82e6983 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:44 +0800
Subject: [PATCH 1302/1678] rtlwifi: btcoex: Get status of multichannel
 concurrence

btcoex does different decision according to MCC or SCC status, but
driver is still SCC currently.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c | 3 +++
 .../net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 823694cb4fdb..1a24aed34094 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -577,6 +577,9 @@ static bool halbtc_get(void *void_btcoexist, u8 get_type, void *out_buf)
 			tmp = true;
 		*bool_tmp = tmp;
 		break;
+	case BTC_GET_BL_WIFI_DUAL_BAND_CONNECTED:
+		*u8_tmp = BTC_MULTIPORT_SCC;
+		break;
 	case BTC_GET_BL_WIFI_BUSY:
 		if (halbtc_is_wifi_busy(rtlpriv))
 			*bool_tmp = true;
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 3c6a1e8851ec..fe793b787716 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -251,6 +251,7 @@ enum btc_get_type {
 	BTC_GET_BL_HS_OPERATION,
 	BTC_GET_BL_HS_CONNECTING,
 	BTC_GET_BL_WIFI_CONNECTED,
+	BTC_GET_BL_WIFI_DUAL_BAND_CONNECTED,
 	BTC_GET_BL_WIFI_BUSY,
 	BTC_GET_BL_WIFI_SCAN,
 	BTC_GET_BL_WIFI_LINK,
@@ -436,6 +437,13 @@ enum btc_notify_type_stack_operation {
 	BTC_STACK_OP_MAX
 };
 
+enum {
+	BTC_MULTIPORT_SCC,
+	BTC_MULTIPORT_MCC_2CHANNEL,
+	BTC_MULTIPORT_MCC_2BAND,
+	BTC_MULTIPORT_MAX
+};
+
 struct btc_bt_info {
 	bool bt_disabled;
 	u8 rssi_adjust_for_agc_table_on;

From 1f0ab4fbacc60d24e76e311bdd8975a32312672f Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:45 +0800
Subject: [PATCH 1303/1678] rtlwifi: btcoex: Add rate table for the use of
 btcoex

The btcoex use the rate to rsolve IOT issue that some APs reduce TX
rate quickly, so it uses the RX rate as a clue to decide TDMA.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger#lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.h  | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index fe793b787716..bc2388f3f953 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -437,6 +437,100 @@ enum btc_notify_type_stack_operation {
 	BTC_STACK_OP_MAX
 };
 
+enum {
+	BTC_CCK_1,
+	BTC_CCK_2,
+	BTC_CCK_5_5,
+	BTC_CCK_11,
+	BTC_OFDM_6,
+	BTC_OFDM_9,
+	BTC_OFDM_12,
+	BTC_OFDM_18,
+	BTC_OFDM_24,
+	BTC_OFDM_36,
+	BTC_OFDM_48,
+	BTC_OFDM_54,
+	BTC_MCS_0,
+	BTC_MCS_1,
+	BTC_MCS_2,
+	BTC_MCS_3,
+	BTC_MCS_4,
+	BTC_MCS_5,
+	BTC_MCS_6,
+	BTC_MCS_7,
+	BTC_MCS_8,
+	BTC_MCS_9,
+	BTC_MCS_10,
+	BTC_MCS_11,
+	BTC_MCS_12,
+	BTC_MCS_13,
+	BTC_MCS_14,
+	BTC_MCS_15,
+	BTC_MCS_16,
+	BTC_MCS_17,
+	BTC_MCS_18,
+	BTC_MCS_19,
+	BTC_MCS_20,
+	BTC_MCS_21,
+	BTC_MCS_22,
+	BTC_MCS_23,
+	BTC_MCS_24,
+	BTC_MCS_25,
+	BTC_MCS_26,
+	BTC_MCS_27,
+	BTC_MCS_28,
+	BTC_MCS_29,
+	BTC_MCS_30,
+	BTC_MCS_31,
+	BTC_VHT_1SS_MCS_0,
+	BTC_VHT_1SS_MCS_1,
+	BTC_VHT_1SS_MCS_2,
+	BTC_VHT_1SS_MCS_3,
+	BTC_VHT_1SS_MCS_4,
+	BTC_VHT_1SS_MCS_5,
+	BTC_VHT_1SS_MCS_6,
+	BTC_VHT_1SS_MCS_7,
+	BTC_VHT_1SS_MCS_8,
+	BTC_VHT_1SS_MCS_9,
+	BTC_VHT_2SS_MCS_0,
+	BTC_VHT_2SS_MCS_1,
+	BTC_VHT_2SS_MCS_2,
+	BTC_VHT_2SS_MCS_3,
+	BTC_VHT_2SS_MCS_4,
+	BTC_VHT_2SS_MCS_5,
+	BTC_VHT_2SS_MCS_6,
+	BTC_VHT_2SS_MCS_7,
+	BTC_VHT_2SS_MCS_8,
+	BTC_VHT_2SS_MCS_9,
+	BTC_VHT_3SS_MCS_0,
+	BTC_VHT_3SS_MCS_1,
+	BTC_VHT_3SS_MCS_2,
+	BTC_VHT_3SS_MCS_3,
+	BTC_VHT_3SS_MCS_4,
+	BTC_VHT_3SS_MCS_5,
+	BTC_VHT_3SS_MCS_6,
+	BTC_VHT_3SS_MCS_7,
+	BTC_VHT_3SS_MCS_8,
+	BTC_VHT_3SS_MCS_9,
+	BTC_VHT_4SS_MCS_0,
+	BTC_VHT_4SS_MCS_1,
+	BTC_VHT_4SS_MCS_2,
+	BTC_VHT_4SS_MCS_3,
+	BTC_VHT_4SS_MCS_4,
+	BTC_VHT_4SS_MCS_5,
+	BTC_VHT_4SS_MCS_6,
+	BTC_VHT_4SS_MCS_7,
+	BTC_VHT_4SS_MCS_8,
+	BTC_VHT_4SS_MCS_9,
+	BTC_MCS_32,
+	BTC_UNKNOWN,
+	BTC_PKT_MGNT,
+	BTC_PKT_CTRL,
+	BTC_PKT_UNKNOWN,
+	BTC_PKT_NOT_FOR_ME,
+	BTC_RATE_MAX
+};
+
 enum {
 	BTC_MULTIPORT_SCC,
 	BTC_MULTIPORT_MCC_2CHANNEL,

From 7fd34dc4d827d3ba87e650cbb36aa0cbeca50d79 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:46 +0800
Subject: [PATCH 1304/1678] rtlwifi: btcoex: Add interaction with phydm

Get phydm's counter and version from the module phydm that is not
submitted so we implement dummy functions.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.c  | 38 +++++++++++++++++++
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.h  |  6 +++
 2 files changed, 44 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 1a24aed34094..bfb32d476a70 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -680,6 +680,21 @@ static bool halbtc_get(void *void_btcoexist, u8 get_type, void *out_buf)
 	case BTC_GET_U4_BT_FORBIDDEN_SLOT_VAL:
 		*u32_tmp = halbtc_get_bt_forbidden_slot_val(btcoexist);
 		break;
+	case BTC_GET_U4_WIFI_IQK_TOTAL:
+		*u32_tmp =
+			btcoexist->btc_phydm_query_phy_counter(btcoexist,
+							       DM_INFO_IQK_ALL);
+		break;
+	case BTC_GET_U4_WIFI_IQK_OK:
+		*u32_tmp =
+			btcoexist->btc_phydm_query_phy_counter(btcoexist,
+							       DM_INFO_IQK_OK);
+		break;
+	case BTC_GET_U4_WIFI_IQK_FAIL:
+		*u32_tmp =
+			btcoexist->btc_phydm_query_phy_counter(btcoexist,
+							       DM_INFO_IQK_NG);
+		break;
 	case BTC_GET_U1_WIFI_DOT11_CHNL:
 		*u8_tmp = rtlphy->current_channel;
 		break;
@@ -1122,6 +1137,25 @@ static bool halbtc_under_ips(struct btc_coexist *btcoexist)
 	return false;
 }
 
+static
+u32 halbtc_get_phydm_version(void *btc_context)
+{
+	return 0;
+}
+
+static
+void halbtc_phydm_modify_ra_pcr_threshold(void *btc_context,
+					  u8 ra_offset_direction,
+					  u8 ra_threshold_offset)
+{
+}
+
+static
+u32 halbtc_phydm_query_phy_counter(void *btc_context, enum dm_info_query dm_id)
+{
+	return 0;
+}
+
 static u8 halbtc_get_ant_det_val_from_bt(void *btc_context)
 {
 	struct btc_coexist *btcoexist = (struct btc_coexist *)btc_context;
@@ -1245,6 +1279,10 @@ bool exhalbtc_initlize_variables(struct rtl_priv *rtlpriv)
 					halbtc_get_bt_coex_supported_feature;
 	btcoexist->btc_get_bt_coex_supported_version =
 					halbtc_get_bt_coex_supported_version;
+	btcoexist->btc_get_bt_phydm_version = halbtc_get_phydm_version;
+	btcoexist->btc_phydm_modify_ra_pcr_threshold =
+					halbtc_phydm_modify_ra_pcr_threshold;
+	btcoexist->btc_phydm_query_phy_counter = halbtc_phydm_query_phy_counter;
 	btcoexist->btc_get_ant_det_val_from_bt = halbtc_get_ant_det_val_from_bt;
 	btcoexist->btc_get_ble_scan_type_from_bt =
 					halbtc_get_ble_scan_type_from_bt;
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index bc2388f3f953..1f12449363c2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -730,6 +730,12 @@ struct btc_coexist {
 			       u32 value);
 	u32 (*btc_get_bt_coex_supported_feature)(void *btcoexist);
 	u32 (*btc_get_bt_coex_supported_version)(void *btcoexist);
+	u32 (*btc_get_bt_phydm_version)(void *btcoexist);
+	void (*btc_phydm_modify_ra_pcr_threshold)(void *btcoexist,
+						  u8 ra_offset_direction,
+						  u8 ra_threshold_offset);
+	u32 (*btc_phydm_query_phy_counter)(void *btcoexist,
+					   enum dm_info_query dm_id);
 	u8 (*btc_get_ant_det_val_from_bt)(void *btcoexist);
 	u8 (*btc_get_ble_scan_type_from_bt)(void *btcoexist);
 	u32 (*btc_get_ble_scan_para_from_bt)(void *btcoexist, u8 scan_type);

From c3468950ef802c659c0ac215e757163833bc563f Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:47 +0800
Subject: [PATCH 1305/1678] rtlwifi: btcoex: Add pre- and post- normal LPS
 function

Normal LPS is decomposed into pre- and post- parts, so we can issue H2C
with TDMA parameters in the "critical section" that dirver can't change
LPS state at the moment.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.c  | 22 +++++++++++++++++++
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.h  |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index bfb32d476a70..0f664a0490a7 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -363,6 +363,22 @@ static void halbtc_normal_lps(struct btc_coexist *btcoexist)
 	}
 }
 
+static void halbtc_pre_normal_lps(struct btc_coexist *btcoexist)
+{
+	struct rtl_priv *rtlpriv = btcoexist->adapter;
+
+	if (btcoexist->bt_info.bt_ctrl_lps) {
+		btcoexist->bt_info.bt_lps_on = false;
+		rtl_lps_leave(rtlpriv->mac80211.hw);
+	}
+}
+
+static void halbtc_post_normal_lps(struct btc_coexist *btcoexist)
+{
+	if (btcoexist->bt_info.bt_ctrl_lps)
+		btcoexist->bt_info.bt_ctrl_lps = false;
+}
+
 static void halbtc_leave_low_power(struct btc_coexist *btcoexist)
 {
 }
@@ -806,6 +822,12 @@ static bool halbtc_set(void *void_btcoexist, u8 set_type, void *in_buf)
 	case BTC_SET_ACT_NORMAL_LPS:
 		halbtc_normal_lps(btcoexist);
 		break;
+	case BTC_SET_ACT_PRE_NORMAL_LPS:
+		halbtc_pre_normal_lps(btcoexist);
+		break;
+	case BTC_SET_ACT_POST_NORMAL_LPS:
+		halbtc_post_normal_lps(btcoexist);
+		break;
 	case BTC_SET_ACT_DISABLE_LOW_POWER:
 		halbtc_disable_low_power(btcoexist, *bool_tmp);
 		break;
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 1f12449363c2..ad80ec05975e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -349,6 +349,8 @@ enum btc_set_type {
 	BTC_SET_ACT_LEAVE_LPS,
 	BTC_SET_ACT_ENTER_LPS,
 	BTC_SET_ACT_NORMAL_LPS,
+	BTC_SET_ACT_PRE_NORMAL_LPS,
+	BTC_SET_ACT_POST_NORMAL_LPS,
 	BTC_SET_ACT_INC_FORCE_EXEC_PWR_CMD_CNT,
 	BTC_SET_ACT_DISABLE_LOW_POWER,
 	BTC_SET_ACT_UPDATE_RAMASK,

From 0843e98a3b9a373f63465fcfbcac31108fd6c3a3 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:48 +0800
Subject: [PATCH 1306/1678] rtlwifi: btcoex: add assoc type v2 to connection
 notify

In connection notify v1, btcoex only need to know start/end association,
and it will discriminate 2G and 5G band in v2.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.c       | 14 +++++++++++---
 .../realtek/rtlwifi/btcoexist/halbtcoutsrc.h       |  2 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 0f664a0490a7..05300b466d34 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -1610,7 +1610,8 @@ void exhalbtc_scan_notify_wifi_only(struct wifi_only_cfg *wifionly_cfg,
 
 void exhalbtc_connect_notify(struct btc_coexist *btcoexist, u8 action)
 {
-	u8 asso_type;
+	u8 asso_type, asso_type_v2;
+	bool wifi_under_5g;
 
 	if (!halbtc_is_bt_coexist_available(btcoexist))
 		return;
@@ -1618,10 +1619,17 @@ void exhalbtc_connect_notify(struct btc_coexist *btcoexist, u8 action)
 	if (btcoexist->manual_control)
 		return;
 
-	if (action)
+	btcoexist->btc_get(btcoexist, BTC_GET_BL_WIFI_UNDER_5G, &wifi_under_5g);
+
+	if (action) {
 		asso_type = BTC_ASSOCIATE_START;
-	else
+		asso_type_v2 = wifi_under_5g ? BTC_ASSOCIATE_5G_START :
+					       BTC_ASSOCIATE_START;
+	} else {
 		asso_type = BTC_ASSOCIATE_FINISH;
+		asso_type_v2 = wifi_under_5g ? BTC_ASSOCIATE_5G_FINISH :
+					       BTC_ASSOCIATE_FINISH;
+	}
 
 	halbtc_leave_low_power(btcoexist);
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index ad80ec05975e..0a7b09305bc3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -401,6 +401,8 @@ enum btc_notify_type_switchband {
 enum btc_notify_type_associate {
 	BTC_ASSOCIATE_FINISH = 0x0,
 	BTC_ASSOCIATE_START = 0x1,
+	BTC_ASSOCIATE_5G_FINISH = 0x2,
+	BTC_ASSOCIATE_5G_START = 0x3,
 	BTC_ASSOCIATE_MAX
 };
 

From b82f5e0fd2a224df544d3383836274a195f2549d Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:49 +0800
Subject: [PATCH 1307/1678] rtlwifi: btcoex: new definitions introduced by
 8822be

New constant and variables definitions are used by btcoex of 8822b, so
this commit add them.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 0a7b09305bc3..049e92a8afbe 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -62,6 +62,8 @@
 #define		BTC_ANT_PATH_WIFI			0
 #define		BTC_ANT_PATH_BT				1
 #define		BTC_ANT_PATH_PTA			2
+#define		BTC_ANT_PATH_WIFI5G			3
+#define		BTC_ANT_PATH_AUTO			4
 /* dual Antenna definition */
 #define		BTC_ANT_WIFI_AT_MAIN			0
 #define		BTC_ANT_WIFI_AT_AUX			1
@@ -205,6 +207,7 @@ enum btc_wifi_traffic_dir {
 enum btc_wifi_pnp {
 	BTC_WIFI_PNP_WAKE_UP = 0x0,
 	BTC_WIFI_PNP_SLEEP = 0x1,
+	BTC_WIFI_PNP_SLEEP_KEEP_ANT = 0x2,
 	BTC_WIFI_PNP_MAX
 };
 
@@ -387,6 +390,7 @@ enum btc_notify_type_lps {
 enum btc_notify_type_scan {
 	BTC_SCAN_FINISH = 0x0,
 	BTC_SCAN_START = 0x1,
+	BTC_SCAN_START_2G = 0x2,
 	BTC_SCAN_MAX
 };
 
@@ -561,6 +565,7 @@ struct btc_bt_info {
 	u16 bt_hci_ver;
 	u16 bt_real_fw_ver;
 	u8 bt_fw_ver;
+	u32 bt_get_fw_ver;
 
 	bool bt_disable_low_pwr;
 
@@ -632,6 +637,7 @@ struct btc_bt_link_info {
 	bool pan_exist;
 	bool pan_only;
 	bool slave_role;
+	bool acl_busy;
 };
 
 enum btc_antenna_pos {

From c3788947251565efa69ae0e1cfa9a1b697429ae9 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 6 Mar 2018 09:25:50 +0800
Subject: [PATCH 1308/1678] rtlwifi: btcoex: Add new but dummy definitions
 introduced by 8822b

btcoex support multiple platforms, but this drivers doesn't support full
functions yet, so this commit adds dummy definitions.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c    | 9 +++++++++
 .../wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h    | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index 05300b466d34..8b6b07a936f5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -656,6 +656,9 @@ static bool halbtc_get(void *void_btcoexist, u8 get_type, void *out_buf)
 	case BTC_GET_BL_IS_ASUS_8723B:
 		*bool_tmp = false;
 		break;
+	case BTC_GET_BL_RF4CE_CONNECTED:
+		*bool_tmp = false;
+		break;
 	case BTC_GET_S4_WIFI_RSSI:
 		*s32_tmp = halbtc_get_wifi_rssi(rtlpriv);
 		break;
@@ -1141,6 +1144,11 @@ static void halbtc_display_dbg_msg(void *bt_context, u8 disp_type,
 	}
 }
 
+static u32 halbtc_get_bt_reg(void *btc_context, u8 reg_type, u32 offset)
+{
+	return 0;
+}
+
 static bool halbtc_under_ips(struct btc_coexist *btcoexist)
 {
 	struct rtl_priv *rtlpriv = btcoexist->adapter;
@@ -1291,6 +1299,7 @@ bool exhalbtc_initlize_variables(struct rtl_priv *rtlpriv)
 	btcoexist->btc_get = halbtc_get;
 	btcoexist->btc_set = halbtc_set;
 	btcoexist->btc_set_bt_reg = halbtc_set_bt_reg;
+	btcoexist->btc_get_bt_reg = halbtc_get_bt_reg;
 
 	btcoexist->bt_info.bt_ctrl_buf_size = false;
 	btcoexist->bt_info.agg_buf_size = 5;
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
index 049e92a8afbe..9eae87d19120 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.h
@@ -338,6 +338,7 @@ enum btc_set_type {
 	BTC_SET_ACT_GET_BT_RSSI,
 	BTC_SET_ACT_AGGREGATE_CTRL,
 	BTC_SET_ACT_ANTPOSREGRISTRY_CTRL,
+	BTC_SET_MIMO_PS_MODE,
 
 	/********* for 1Ant **********/
 	/* type bool */
@@ -356,6 +357,7 @@ enum btc_set_type {
 	BTC_SET_ACT_POST_NORMAL_LPS,
 	BTC_SET_ACT_INC_FORCE_EXEC_PWR_CMD_CNT,
 	BTC_SET_ACT_DISABLE_LOW_POWER,
+	BTC_SET_BL_BT_LNA_CONSTRAIN_LEVEL,
 	BTC_SET_ACT_UPDATE_RAMASK,
 	BTC_SET_ACT_SEND_MIMO_PS,
 	/* BT Coex related */
@@ -738,6 +740,7 @@ struct btc_coexist {
 
 	void (*btc_set_bt_reg)(void *btc_context, u8 reg_type, u32 offset,
 			       u32 value);
+	u32 (*btc_get_bt_reg)(void *btc_context, u8 reg_type, u32 offset);
 	u32 (*btc_get_bt_coex_supported_feature)(void *btcoexist);
 	u32 (*btc_get_bt_coex_supported_version)(void *btcoexist);
 	u32 (*btc_get_bt_phydm_version)(void *btcoexist);

From 692f5deccdae665925cd9542d460fe4771835be5 Mon Sep 17 00:00:00 2001
From: Kevin Lo <kevlo@kevlo.org>
Date: Sat, 17 Mar 2018 22:26:07 +0800
Subject: [PATCH 1309/1678] rtlwifi: correct comment

Correct comment.  Set bit 3 and bit 4 of 0x0005 register (REG_APS_FSMCO + 1)
to 0 which means disable WL suspend, not enable WL suspend.

Signed-off-by: Kevin Lo <kevlo@kevlo.org>
Acked-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8188ee/pwrseq.h | 4 ++--
 drivers/net/wireless/realtek/rtlwifi/rtl8192ee/pwrseq.h | 4 ++--
 drivers/net/wireless/realtek/rtlwifi/rtl8723ae/pwrseq.h | 4 ++--
 drivers/net/wireless/realtek/rtlwifi/rtl8723be/pwrseq.h | 4 ++--
 drivers/net/wireless/realtek/rtlwifi/rtl8821ae/pwrseq.h | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/pwrseq.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/pwrseq.h
index f2d9c6116e5c..8379a3e5198c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/pwrseq.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/pwrseq.h
@@ -142,7 +142,7 @@
 	/*wait power state to suspend*/},				\
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
 	PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(3) | BIT(4), 0		\
-	/*0x04[12:11] = 2b'01enable WL suspend*/},
+	/*0x04[12:11] = 2b'00 disable WL suspend*/},
 
 #define RTL8188EE_TRANS_CARDEMU_TO_CARDDIS				\
 	{0x0026, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
@@ -179,7 +179,7 @@
 	/*wait power state to suspend*/},				\
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
 	PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(3)|BIT(4), 0		\
-	/*0x04[12:11] = 2b'01enable WL suspend*/},
+	/*0x04[12:11] = 2b'00 disable WL suspend*/},
 
 #define RTL8188EE_TRANS_CARDEMU_TO_PDN					\
 	{0x0006, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/pwrseq.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/pwrseq.h
index 781eeaa6af49..c570801508cc 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/pwrseq.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/pwrseq.h
@@ -134,7 +134,7 @@
 	/*wait power state to suspend*/					\
 	{0x0086, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_SDIO_MSK,	\
 	 PWR_BASEADDR_SDIO , PWR_CMD_POLLING, BIT(1), BIT(1)},		\
-	/*0x04[12:11] = 2b'01enable WL suspend*/			\
+	/*0x04[12:11] = 2b'00 disable WL suspend*/			\
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
 	 PWR_BASEADDR_MAC , PWR_CMD_WRITE, BIT(3) | BIT(4), 0},
 
@@ -181,7 +181,7 @@
 	/*Lock small LDO Register*/					\
 	{0x00CC, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_SDIO_MSK,	\
 	 PWR_BASEADDR_MAC , PWR_CMD_WRITE, BIT(2), 0},			\
-	/*0x04[12:11] = 2b'01enable WL suspend*/			\
+	/*0x04[12:11] = 2b'00 disable WL suspend*/			\
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
 	 PWR_BASEADDR_MAC , PWR_CMD_WRITE, BIT(3) | BIT(4), 0},
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/pwrseq.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/pwrseq.h
index 4ac7db526f15..e6c3aac3e9fd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/pwrseq.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/pwrseq.h
@@ -135,7 +135,7 @@
  /*wait power state to suspend*/ \
 	{0x0086, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_SDIO_MSK,\
 		PWR_BASEADDR_SDIO, PWR_CMD_POLLING, BIT(1), BIT(1)},\
- /*0x04[12:11] = 2b'01enable WL suspend*/ \
+ /*0x04[12:11] = 2b'00 disable WL suspend*/ \
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,\
 		PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(3)|BIT(4), 0},
 
@@ -172,7 +172,7 @@
 	{0x0086, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, \
 		PWR_INTF_SDIO_MSK, PWR_BASEADDR_SDIO,\
 		PWR_CMD_POLLING, BIT(1), BIT(1)},\
- /*0x04[12:11] = 2b'00enable WL suspend*/ \
+ /*0x04[12:11] = 2b'00 disable WL suspend*/ \
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, \
 		PWR_INTF_ALL_MSK, PWR_BASEADDR_MAC,\
 		PWR_CMD_WRITE, BIT(3)|BIT(4), 0},\
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/pwrseq.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/pwrseq.h
index 0fee5e0e55c2..3367cfbc9502 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/pwrseq.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/pwrseq.h
@@ -204,7 +204,7 @@
 	/*0x23[4] = 1b'0 12H LDO enter normal mode*/			\
 	{0x0023, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_SDIO_MSK,	\
 	 PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(4), 0},			\
-	/*0x04[12:11] = 2b'01enable WL suspend*/			\
+	/*0x04[12:11] = 2b'00 disable WL suspend*/			\
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
 	 PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(3)|BIT(4), 0},
 
@@ -251,7 +251,7 @@
 	/*0x48[16] = 0 to disable GPIO9 as EXT WAKEUP*/			\
 	{0x004A, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_USB_MSK,	\
 	 PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(0), 0},			\
-	/*0x04[12:11] = 2b'01enable WL suspend*/			\
+	/*0x04[12:11] = 2b'00 disable WL suspend*/			\
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,	\
 	 PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT(3)|BIT(4), 0},		\
 	/*0x23[4] = 1b'0 12H LDO enter normal mode*/			\
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/pwrseq.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/pwrseq.h
index 36b3e91d996e..6dd575435c63 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/pwrseq.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/pwrseq.h
@@ -531,7 +531,7 @@ extern struct wlan_pwr_cfg  rtl8812_leave_lps_flow
 	 /*0x23[4] = 1b'0 12H LDO enter normal mode*/},   \
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,\
 	PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT3|BIT4, 0 \
-	 /*0x04[12:11] = 2b'01enable WL suspend*/},
+	 /*0x04[12:11] = 2b'00 disable WL suspend*/},
 
 #define RTL8821A_TRANS_CARDEMU_TO_CARDDIS				\
 	{0x0007, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_SDIO_MSK,\
@@ -572,7 +572,7 @@ extern struct wlan_pwr_cfg  rtl8812_leave_lps_flow
 	 /*0x48[16] = 0 to disable GPIO9 as EXT WAKEUP*/},   \
 	{0x0005, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_ALL_MSK,\
 	PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT3|BIT4, 0 \
-	 /*0x04[12:11] = 2b'01enable WL suspend*/},\
+	 /*0x04[12:11] = 2b'00 disable WL suspend*/},\
 	{0x0023, PWR_CUT_ALL_MSK, PWR_FAB_ALL_MSK, PWR_INTF_SDIO_MSK,\
 	PWR_BASEADDR_MAC, PWR_CMD_WRITE, BIT4, 0 \
 	 /*0x23[4] = 1b'0 12H LDO enter normal mode*/},   \

From e153292ae4d1e0c5b27677db9356725f5b614134 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 19 Mar 2018 10:40:54 +0000
Subject: [PATCH 1310/1678] rtlwifi: rtl8821ae: fix spelling mistake:
 "Aboslute" -> "Absolute"

Trivial fix to spelling mistake in RT_TRACE message text.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net?
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/realtek/rtlwifi/rtl8821ae/dm.c  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
index b11365a5ee1f..9111ba7ff0a1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
@@ -1475,7 +1475,7 @@ void rtl8812ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
 		}
 	} else if (method == MIX_MODE) {
 		RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-			 "pDM_Odm->DefaultOfdmIndex=%d, pDM_Odm->Aboslute_OFDMSwingIdx[RFPath]=%d, RF_Path = %d\n",
+			 "pDM_Odm->DefaultOfdmIndex=%d, pDM_Odm->Absolute_OFDMSwingIdx[RFPath]=%d, RF_Path = %d\n",
 			 rtldm->default_ofdm_index,
 			 rtldm->absolute_ofdm_swing_idx[rf_path],
 			 rf_path);
@@ -1750,7 +1750,7 @@ void rtl8812ae_dm_txpower_tracking_callback_thermalmeter(
 			/*Record delta swing for mix mode power tracking*/
 
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-				 "******Temp is higher and pDM_Odm->Aboslute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
+				 "******Temp is higher and pDM_Odm->Absolute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
 			rtldm->absolute_ofdm_swing_idx[RF90_PATH_A]);
 
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
@@ -1766,7 +1766,7 @@ void rtl8812ae_dm_txpower_tracking_callback_thermalmeter(
 			/*Record delta swing for mix mode power tracking*/
 
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-				 "******Temp is higher and pDM_Odm->Aboslute_OFDMSwingIdx[ODM_RF_PATH_B] = %d\n",
+				 "******Temp is higher and pDM_Odm->Absolute_OFDMSwingIdx[ODM_RF_PATH_B] = %d\n",
 				 rtldm->absolute_ofdm_swing_idx[RF90_PATH_B]);
 		} else {
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
@@ -1782,7 +1782,7 @@ void rtl8812ae_dm_txpower_tracking_callback_thermalmeter(
 				-1 * delta_swing_table_idx_tdown_a[delta];
 			/* Record delta swing for mix mode power tracking*/
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-				 "******Temp is lower and pDM_Odm->Aboslute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
+				 "******Temp is lower and pDM_Odm->Absolute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
 				 rtldm->absolute_ofdm_swing_idx[RF90_PATH_A]);
 
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
@@ -1799,7 +1799,7 @@ void rtl8812ae_dm_txpower_tracking_callback_thermalmeter(
 			/*Record delta swing for mix mode power tracking*/
 
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-				 "******Temp is lower and pDM_Odm->Aboslute_OFDMSwingIdx[ODM_RF_PATH_B] = %d\n",
+				 "******Temp is lower and pDM_Odm->Absolute_OFDMSwingIdx[ODM_RF_PATH_B] = %d\n",
 				 rtldm->absolute_ofdm_swing_idx[RF90_PATH_B]);
 		}
 
@@ -2115,7 +2115,7 @@ void rtl8821ae_dm_txpwr_track_set_pwr(struct ieee80211_hw *hw,
 		}
 	} else if (method == MIX_MODE) {
 		RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-			 "pDM_Odm->DefaultOfdmIndex=%d,pDM_Odm->Aboslute_OFDMSwingIdx[RFPath]=%d, RF_Path = %d\n",
+			 "pDM_Odm->DefaultOfdmIndex=%d,pDM_Odm->Absolute_OFDMSwingIdx[RFPath]=%d, RF_Path = %d\n",
 			 rtldm->default_ofdm_index,
 			 rtldm->absolute_ofdm_swing_idx[rf_path],
 			 rf_path);
@@ -2329,7 +2329,7 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter(
 			/*Record delta swing for mix mode power tracking*/
 
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-				 "******Temp is higher and pDM_Odm->Aboslute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
+				 "******Temp is higher and pDM_Odm->Absolute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
 				 rtldm->absolute_ofdm_swing_idx[RF90_PATH_A]);
 		} else {
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
@@ -2345,7 +2345,7 @@ void rtl8821ae_dm_txpower_tracking_callback_thermalmeter(
 				-1 * delta_swing_table_idx_tdown_a[delta];
 			/* Record delta swing for mix mode power tracking*/
 			RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-				 "******Temp is lower and pDM_Odm->Aboslute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
+				 "******Temp is lower and pDM_Odm->Absolute_OFDMSwingIdx[ODM_RF_PATH_A] = %d\n",
 				 rtldm->absolute_ofdm_swing_idx[RF90_PATH_A]);
 		}
 

From 1170f6d1be6a39e1a115a2c0f50923eb4ce2a7ec Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:20 +0100
Subject: [PATCH 1311/1678] brcmfmac: do not convert linux error to firmware
 error string

In case of a linux error brcmf_fil_cmd_data() blurts an error message
in which the error code is translated to an error string. However, it
maps it to a firmware error string which should not happen. Simply
print only the numeric error code and be done with it.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
index fc5751116d99..802d7cb73b80 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
@@ -124,8 +124,7 @@ brcmf_fil_cmd_data(struct brcmf_if *ifp, u32 cmd, void *data, u32 len, bool set)
 					     data, len, &fwerr);
 
 	if (err) {
-		brcmf_dbg(FIL, "Failed: %s (%d)\n",
-			  brcmf_fil_get_errstr((u32)(-err)), err);
+		brcmf_dbg(FIL, "Failed: error=%d\n", err);
 	} else if (fwerr < 0) {
 		brcmf_dbg(FIL, "Firmware error: %s (%d)\n",
 			  brcmf_fil_get_errstr((u32)(-fwerr)), fwerr);

From 756a2b390874d274f2f615921318ef0856ff9313 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:21 +0100
Subject: [PATCH 1312/1678] brcmfmac: use brcmf_chip_name() to store name in
 revinfo

The chip id can either be four or five digits. For the chip name either
the hexadecimal value needs to be taken (four digits) or the decimal
value (five digits). The function brcmf_chip_name() does this conversion
so use it to store the name in driver revision info.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/chip.c        |  9 ++++----
 .../broadcom/brcm80211/brcmfmac/chip.h        |  3 ++-
 .../broadcom/brcm80211/brcmfmac/common.c      | 23 ++++++++++++++-----
 .../broadcom/brcm80211/brcmfmac/core.c        | 10 +-------
 .../broadcom/brcm80211/brcmfmac/core.h        |  3 +--
 5 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
index f7b30ce2300d..bf453699decb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
@@ -464,12 +464,12 @@ static void brcmf_chip_ai_resetcore(struct brcmf_core_priv *core, u32 prereset,
 	ci->ops->read32(ci->ctx, core->wrapbase + BCMA_IOCTL);
 }
 
-static char *brcmf_chip_name(uint chipid, char *buf, uint len)
+char *brcmf_chip_name(u32 id, u32 rev, char *buf, uint len)
 {
 	const char *fmt;
 
-	fmt = ((chipid > 0xa000) || (chipid < 0x4000)) ? "%d" : "%x";
-	snprintf(buf, len, fmt, chipid);
+	fmt = ((id > 0xa000) || (id < 0x4000)) ? "BCM%d/%u" : "BCM%x/%u";
+	snprintf(buf, len, fmt, id, rev);
 	return buf;
 }
 
@@ -924,7 +924,8 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
 	ci->pub.chiprev = (regdata & CID_REV_MASK) >> CID_REV_SHIFT;
 	socitype = (regdata & CID_TYPE_MASK) >> CID_TYPE_SHIFT;
 
-	brcmf_chip_name(ci->pub.chip, ci->pub.name, sizeof(ci->pub.name));
+	brcmf_chip_name(ci->pub.chip, ci->pub.chiprev,
+			ci->pub.name, sizeof(ci->pub.name));
 	brcmf_dbg(INFO, "found %s chip: BCM%s, rev=%d\n",
 		  socitype == SOCI_SB ? "SB" : "AXI", ci->pub.name,
 		  ci->pub.chiprev);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
index dd0ec3eba6a9..0ae3b33bab62 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
@@ -45,7 +45,7 @@ struct brcmf_chip {
 	u32 rambase;
 	u32 ramsize;
 	u32 srsize;
-	char name[8];
+	char name[12];
 };
 
 /**
@@ -93,5 +93,6 @@ void brcmf_chip_resetcore(struct brcmf_core *core, u32 prereset, u32 reset,
 void brcmf_chip_set_passive(struct brcmf_chip *ci);
 bool brcmf_chip_set_active(struct brcmf_chip *ci, u32 rstvec);
 bool brcmf_chip_sr_capable(struct brcmf_chip *pub);
+char *brcmf_chip_name(u32 chipid, u32 chiprev, char *buf, uint len);
 
 #endif /* BRCMF_AXIDMP_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 74f7b56bc7d4..21b0629fdbb3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -30,6 +30,7 @@
 #include "common.h"
 #include "of.h"
 #include "firmware.h"
+#include "chip.h"
 
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom 802.11 wireless LAN fullmac driver.");
@@ -127,14 +128,13 @@ static int brcmf_c_download(struct brcmf_if *ifp, u16 flag,
 static int brcmf_c_get_clm_name(struct brcmf_if *ifp, u8 *clm_name)
 {
 	struct brcmf_bus *bus = ifp->drvr->bus_if;
-	struct brcmf_rev_info *ri = &ifp->drvr->revinfo;
 	u8 fw_name[BRCMF_FW_NAME_LEN];
 	u8 *ptr;
 	size_t len;
 	s32 err;
 
 	memset(fw_name, 0, BRCMF_FW_NAME_LEN);
-	err = brcmf_bus_get_fwname(bus, ri->chipnum, ri->chiprev, fw_name);
+	err = brcmf_bus_get_fwname(bus, bus->chip, bus->chiprev, fw_name);
 	if (err) {
 		brcmf_err("get firmware name failed (%d)\n", err);
 		goto done;
@@ -234,6 +234,7 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 {
 	s8 eventmask[BRCMF_EVENTING_MASK_LEN];
 	u8 buf[BRCMF_DCMD_SMLEN];
+	struct brcmf_bus *bus;
 	struct brcmf_rev_info_le revinfo;
 	struct brcmf_rev_info *ri;
 	char *clmver;
@@ -249,16 +250,18 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 	}
 	memcpy(ifp->drvr->mac, ifp->mac_addr, sizeof(ifp->drvr->mac));
 
+	bus = ifp->drvr->bus_if;
+	ri = &ifp->drvr->revinfo;
+
 	err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_REVINFO,
 				     &revinfo, sizeof(revinfo));
-	ri = &ifp->drvr->revinfo;
 	if (err < 0) {
 		brcmf_err("retrieving revision info failed, %d\n", err);
+		strlcpy(ri->chipname, "UNKNOWN", sizeof(ri->chipname));
 	} else {
 		ri->vendorid = le32_to_cpu(revinfo.vendorid);
 		ri->deviceid = le32_to_cpu(revinfo.deviceid);
 		ri->radiorev = le32_to_cpu(revinfo.radiorev);
-		ri->chiprev = le32_to_cpu(revinfo.chiprev);
 		ri->corerev = le32_to_cpu(revinfo.corerev);
 		ri->boardid = le32_to_cpu(revinfo.boardid);
 		ri->boardvendor = le32_to_cpu(revinfo.boardvendor);
@@ -266,15 +269,23 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 		ri->driverrev = le32_to_cpu(revinfo.driverrev);
 		ri->ucoderev = le32_to_cpu(revinfo.ucoderev);
 		ri->bus = le32_to_cpu(revinfo.bus);
-		ri->chipnum = le32_to_cpu(revinfo.chipnum);
 		ri->phytype = le32_to_cpu(revinfo.phytype);
 		ri->phyrev = le32_to_cpu(revinfo.phyrev);
 		ri->anarev = le32_to_cpu(revinfo.anarev);
 		ri->chippkg = le32_to_cpu(revinfo.chippkg);
 		ri->nvramrev = le32_to_cpu(revinfo.nvramrev);
+
+		if (!bus->chip) {
+			bus->chip = le32_to_cpu(revinfo.chipnum);
+			bus->chiprev = le32_to_cpu(revinfo.chiprev);
+		}
 	}
 	ri->result = err;
 
+	if (bus->chip)
+		brcmf_chip_name(bus->chip, bus->chiprev,
+				ri->chipname, sizeof(ri->chipname));
+
 	/* Do any CLM downloading */
 	err = brcmf_c_process_clm_blob(ifp);
 	if (err < 0) {
@@ -295,7 +306,7 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 	strsep(&ptr, "\n");
 
 	/* Print fw version info */
-	brcmf_info("Firmware version = %s\n", buf);
+	brcmf_info("Firmware: %s %s\n", ri->chipname, buf);
 
 	/* locate firmware version number for ethtool */
 	ptr = strrchr(buf, ' ') + 1;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 19048526b4af..e8c16d7f2341 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -924,8 +924,7 @@ static int brcmf_revinfo_read(struct seq_file *s, void *data)
 	seq_printf(s, "vendorid: 0x%04x\n", ri->vendorid);
 	seq_printf(s, "deviceid: 0x%04x\n", ri->deviceid);
 	seq_printf(s, "radiorev: %s\n", brcmu_dotrev_str(ri->radiorev, drev));
-	seq_printf(s, "chipnum: %u (%x)\n", ri->chipnum, ri->chipnum);
-	seq_printf(s, "chiprev: %u\n", ri->chiprev);
+	seq_printf(s, "chip: %s\n", ri->chipname);
 	seq_printf(s, "chippkg: %u\n", ri->chippkg);
 	seq_printf(s, "corerev: %u\n", ri->corerev);
 	seq_printf(s, "boardid: 0x%04x\n", ri->boardid);
@@ -975,13 +974,6 @@ static int brcmf_bus_started(struct brcmf_pub *drvr)
 
 	brcmf_debugfs_add_entry(drvr, "revinfo", brcmf_revinfo_read);
 
-	/* assure we have chipid before feature attach */
-	if (!bus_if->chip) {
-		bus_if->chip = drvr->revinfo.chipnum;
-		bus_if->chiprev = drvr->revinfo.chiprev;
-		brcmf_dbg(INFO, "firmware revinfo: chip %x (%d) rev %d\n",
-			  bus_if->chip, bus_if->chip, bus_if->chiprev);
-	}
 	brcmf_feat_attach(drvr);
 
 	ret = brcmf_proto_init_done(drvr);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
index 232dcbb83311..0bb67aeb3afd 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
@@ -87,7 +87,6 @@ struct brcmf_rev_info {
 	u32 vendorid;
 	u32 deviceid;
 	u32 radiorev;
-	u32 chiprev;
 	u32 corerev;
 	u32 boardid;
 	u32 boardvendor;
@@ -95,7 +94,7 @@ struct brcmf_rev_info {
 	u32 driverrev;
 	u32 ucoderev;
 	u32 bus;
-	u32 chipnum;
+	char chipname[12];
 	u32 phytype;
 	u32 phyrev;
 	u32 anarev;

From c88cfa075de356ddf40c668896b2126340f19ba4 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:22 +0100
Subject: [PATCH 1313/1678] brcmfmac: use brcmf_chip_name() for consistency

When logging the chip id/revision information make use of
brcmf_chip_name() so it is always the same.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c    | 5 ++---
 .../net/wireless/broadcom/brcm80211/brcmfmac/firmware.c    | 7 +++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
index bf453699decb..3b829fed8631 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
@@ -926,9 +926,8 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
 
 	brcmf_chip_name(ci->pub.chip, ci->pub.chiprev,
 			ci->pub.name, sizeof(ci->pub.name));
-	brcmf_dbg(INFO, "found %s chip: BCM%s, rev=%d\n",
-		  socitype == SOCI_SB ? "SB" : "AXI", ci->pub.name,
-		  ci->pub.chiprev);
+	brcmf_dbg(INFO, "found %s chip: %s\n",
+		  socitype == SOCI_SB ? "SB" : "AXI", ci->pub.name);
 
 	if (socitype == SOCI_SB) {
 		if (ci->pub.chip != BRCM_CC_4329_CHIP_ID) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index 091b52979e03..e2199f53ebef 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -25,6 +25,7 @@
 #include "firmware.h"
 #include "core.h"
 #include "common.h"
+#include "chip.h"
 
 #define BRCMF_FW_MAX_NVRAM_SIZE			64000
 #define BRCMF_FW_NVRAM_DEVPATH_LEN		19	/* devpath0=pcie/1/4/ */
@@ -567,6 +568,7 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 			      u32 table_size, char fw_name[BRCMF_FW_NAME_LEN],
 			      char nvram_name[BRCMF_FW_NAME_LEN])
 {
+	char chipname[12];
 	u32 i;
 	char end;
 
@@ -581,6 +583,8 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 		return -ENODEV;
 	}
 
+	brcmf_chip_name(chip, chiprev, chipname, sizeof(chipname));
+
 	/* check if firmware path is provided by module parameter */
 	if (brcmf_mp_global.firmware_path[0] != '\0') {
 		strlcpy(fw_name, brcmf_mp_global.firmware_path,
@@ -601,8 +605,7 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 	if ((nvram_name) && (mapping_table[i].nvram))
 		strlcat(nvram_name, mapping_table[i].nvram, BRCMF_FW_NAME_LEN);
 
-	brcmf_info("using %s for chip %#08x(%d) rev %#08x\n",
-		   fw_name, chip, chip, chiprev);
+	brcmf_info("using %s for chip %s\n", fw_name, chipname);
 
 	return 0;
 }

From 856d5a011c86b59f6564be4508912fb1d866adfc Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:23 +0100
Subject: [PATCH 1314/1678] brcmfmac: allocate struct brcmf_pub instance using
 wiphy_new()

Rework the driver so the wiphy instance holds the main driver information
in its private buffer. Previously it held struct brcmf_cfg80211_info
instance so a bit of reorg was needed. This was done so that the wiphy
name or its parent device can be shown in debug output.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/btcoex.c      |  2 +-
 .../broadcom/brcm80211/brcmfmac/cfg80211.c    | 86 +++++++++----------
 .../broadcom/brcm80211/brcmfmac/cfg80211.h    | 17 ++--
 .../broadcom/brcm80211/brcmfmac/common.c      |  2 +
 .../broadcom/brcm80211/brcmfmac/core.c        | 27 ++++--
 .../broadcom/brcm80211/brcmfmac/core.h        |  1 +
 .../broadcom/brcm80211/brcmfmac/p2p.c         |  2 +-
 7 files changed, 76 insertions(+), 61 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c
index 03aae6bc1838..372363a6e752 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/btcoex.c
@@ -462,7 +462,7 @@ static void brcmf_btcoex_dhcp_end(struct brcmf_btcoex_info *btci)
 int brcmf_btcoex_set_mode(struct brcmf_cfg80211_vif *vif,
 			  enum brcmf_btcoex_mode mode, u16 duration)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(vif->wdev.wiphy);
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(vif->wdev.wiphy);
 	struct brcmf_btcoex_info *btci = cfg->btcoex;
 	struct brcmf_if *ifp = brcmf_get_ifp(cfg->pub, 0);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 74a83020c073..4133d662acc3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -753,7 +753,7 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
 static int brcmf_cfg80211_del_ap_iface(struct wiphy *wiphy,
 				       struct wireless_dev *wdev)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 	struct net_device *ndev = wdev->netdev;
 	struct brcmf_if *ifp = netdev_priv(ndev);
 	int ret;
@@ -786,7 +786,7 @@ err_unarm:
 static
 int brcmf_cfg80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 	struct net_device *ndev = wdev->netdev;
 
 	if (ndev && ndev == cfg_to_ndev(cfg))
@@ -831,7 +831,7 @@ brcmf_cfg80211_change_iface(struct wiphy *wiphy, struct net_device *ndev,
 			 enum nl80211_iftype type,
 			 struct vif_params *params)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 	struct brcmf_if *ifp = netdev_priv(ndev);
 	struct brcmf_cfg80211_vif *vif = ifp->vif;
 	s32 infra = 0;
@@ -2127,17 +2127,15 @@ static s32
 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev,
 			    s32 *dbm)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
-	struct net_device *ndev = cfg_to_ndev(cfg);
-	struct brcmf_if *ifp = netdev_priv(ndev);
+	struct brcmf_cfg80211_vif *vif = wdev_to_vif(wdev);
 	s32 qdbm = 0;
 	s32 err;
 
 	brcmf_dbg(TRACE, "Enter\n");
-	if (!check_vif_up(ifp->vif))
+	if (!check_vif_up(vif))
 		return -EIO;
 
-	err = brcmf_fil_iovar_int_get(ifp, "qtxpower", &qdbm);
+	err = brcmf_fil_iovar_int_get(vif->ifp, "qtxpower", &qdbm);
 	if (err) {
 		brcmf_err("error (%d)\n", err);
 		goto done;
@@ -3358,7 +3356,7 @@ brcmf_cfg80211_sched_scan_start(struct wiphy *wiphy,
 				struct cfg80211_sched_scan_request *req)
 {
 	struct brcmf_if *ifp = netdev_priv(ndev);
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 
 	brcmf_dbg(SCAN, "Enter: n_match_sets=%d n_ssids=%d\n",
 		  req->n_match_sets, req->n_ssids);
@@ -5190,6 +5188,12 @@ static struct cfg80211_ops brcmf_cfg80211_ops = {
 	.del_pmk = brcmf_cfg80211_del_pmk,
 };
 
+struct cfg80211_ops *brcmf_cfg80211_get_ops(void)
+{
+	return kmemdup(&brcmf_cfg80211_ops, sizeof(brcmf_cfg80211_ops),
+		       GFP_KERNEL);
+}
+
 struct brcmf_cfg80211_vif *brcmf_alloc_vif(struct brcmf_cfg80211_info *cfg,
 					   enum nl80211_iftype type)
 {
@@ -5897,7 +5901,7 @@ static void brcmf_update_bw40_channel_flag(struct ieee80211_channel *channel,
 static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg,
 				    u32 bw_cap[])
 {
-	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
+	struct brcmf_if *ifp = brcmf_get_ifp(cfg->pub, 0);
 	struct ieee80211_supported_band *band;
 	struct ieee80211_channel *channel;
 	struct wiphy *wiphy;
@@ -6012,7 +6016,7 @@ fail_pbuf:
 
 static int brcmf_enable_bw40_2g(struct brcmf_cfg80211_info *cfg)
 {
-	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
+	struct brcmf_if *ifp = brcmf_get_ifp(cfg->pub, 0);
 	struct ieee80211_supported_band *band;
 	struct brcmf_fil_bwcap_le band_bwcap;
 	struct brcmf_chanspec_list *list;
@@ -6197,10 +6201,10 @@ static void brcmf_update_vht_cap(struct ieee80211_supported_band *band,
 	}
 }
 
-static int brcmf_setup_wiphybands(struct wiphy *wiphy)
+static int brcmf_setup_wiphybands(struct brcmf_cfg80211_info *cfg)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
-	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
+	struct brcmf_if *ifp = brcmf_get_ifp(cfg->pub, 0);
+	struct wiphy *wiphy;
 	u32 nmode = 0;
 	u32 vhtmode = 0;
 	u32 bw_cap[2] = { WLC_BW_20MHZ_BIT, WLC_BW_20MHZ_BIT };
@@ -6794,8 +6798,8 @@ static s32 brcmf_translate_country_code(struct brcmf_pub *drvr, char alpha2[2],
 static void brcmf_cfg80211_reg_notifier(struct wiphy *wiphy,
 					struct regulatory_request *req)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
-	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
+	struct brcmf_if *ifp = brcmf_get_ifp(cfg->pub, 0);
 	struct brcmf_fil_country_le ccreq;
 	s32 err;
 	int i;
@@ -6830,7 +6834,7 @@ static void brcmf_cfg80211_reg_notifier(struct wiphy *wiphy,
 		brcmf_err("Firmware rejected country setting\n");
 		return;
 	}
-	brcmf_setup_wiphybands(wiphy);
+	brcmf_setup_wiphybands(cfg);
 }
 
 static void brcmf_free_wiphy(struct wiphy *wiphy)
@@ -6857,17 +6861,15 @@ static void brcmf_free_wiphy(struct wiphy *wiphy)
 	if (wiphy->wowlan != &brcmf_wowlan_support)
 		kfree(wiphy->wowlan);
 #endif
-	wiphy_free(wiphy);
 }
 
 struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
-						  struct device *busdev,
+						  struct cfg80211_ops *ops,
 						  bool p2pdev_forced)
 {
+	struct wiphy *wiphy = drvr->wiphy;
 	struct net_device *ndev = brcmf_get_ifp(drvr, 0)->ndev;
 	struct brcmf_cfg80211_info *cfg;
-	struct wiphy *wiphy;
-	struct cfg80211_ops *ops;
 	struct brcmf_cfg80211_vif *vif;
 	struct brcmf_if *ifp;
 	s32 err = 0;
@@ -6879,26 +6881,13 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 		return NULL;
 	}
 
-	ops = kmemdup(&brcmf_cfg80211_ops, sizeof(*ops), GFP_KERNEL);
-	if (!ops)
-		return NULL;
-
-	ifp = netdev_priv(ndev);
-#ifdef CONFIG_PM
-	if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_WOWL_GTK))
-		ops->set_rekey_data = brcmf_cfg80211_set_rekey_data;
-#endif
-	wiphy = wiphy_new(ops, sizeof(struct brcmf_cfg80211_info));
-	if (!wiphy) {
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg) {
 		brcmf_err("Could not allocate wiphy device\n");
-		goto ops_out;
+		return NULL;
 	}
-	memcpy(wiphy->perm_addr, drvr->mac, ETH_ALEN);
-	set_wiphy_dev(wiphy, busdev);
 
-	cfg = wiphy_priv(wiphy);
 	cfg->wiphy = wiphy;
-	cfg->ops = ops;
 	cfg->pub = drvr;
 	init_vif_event(&cfg->vif_event);
 	INIT_LIST_HEAD(&cfg->vif_list);
@@ -6907,6 +6896,7 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 	if (IS_ERR(vif))
 		goto wiphy_out;
 
+	ifp = netdev_priv(ndev);
 	vif->ifp = ifp;
 	vif->wdev.netdev = ndev;
 	ndev->ieee80211_ptr = &vif->wdev;
@@ -6933,6 +6923,11 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 	if (err < 0)
 		goto priv_out;
 
+	/* regulatory notifer below needs access to cfg so
+	 * assign it now.
+	 */
+	drvr->config = cfg;
+
 	brcmf_dbg(INFO, "Registering custom regulatory\n");
 	wiphy->reg_notifier = brcmf_cfg80211_reg_notifier;
 	wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG;
@@ -6946,13 +6941,17 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 		cap = &wiphy->bands[NL80211_BAND_2GHZ]->ht_cap.cap;
 		*cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
 	}
+#ifdef CONFIG_PM
+	if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_WOWL_GTK))
+		ops->set_rekey_data = brcmf_cfg80211_set_rekey_data;
+#endif
 	err = wiphy_register(wiphy);
 	if (err < 0) {
 		brcmf_err("Could not register wiphy device (%d)\n", err);
 		goto priv_out;
 	}
 
-	err = brcmf_setup_wiphybands(wiphy);
+	err = brcmf_setup_wiphybands(cfg);
 	if (err) {
 		brcmf_err("Setting wiphy bands failed (%d)\n", err);
 		goto wiphy_unreg_out;
@@ -6969,12 +6968,7 @@ struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
 		else
 			*cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
 	}
-	/* p2p might require that "if-events" get processed by fweh. So
-	 * activate the already registered event handlers now and activate
-	 * the rest when initialization has completed. drvr->config needs to
-	 * be assigned before activating events.
-	 */
-	drvr->config = cfg;
+
 	err = brcmf_fweh_activate_events(ifp);
 	if (err) {
 		brcmf_err("FWEH activation failed (%d)\n", err);
@@ -7042,8 +7036,7 @@ priv_out:
 	ifp->vif = NULL;
 wiphy_out:
 	brcmf_free_wiphy(wiphy);
-ops_out:
-	kfree(ops);
+	kfree(cfg);
 	return NULL;
 }
 
@@ -7058,4 +7051,5 @@ void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg)
 	kfree(cfg->ops);
 	wl_deinit_priv(cfg);
 	brcmf_free_wiphy(cfg->wiphy);
+	kfree(cfg);
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
index b5b5f0f10b63..a4aec0004e4f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
@@ -355,20 +355,24 @@ static inline struct wiphy *cfg_to_wiphy(struct brcmf_cfg80211_info *cfg)
 
 static inline struct brcmf_cfg80211_info *wiphy_to_cfg(struct wiphy *w)
 {
-	return (struct brcmf_cfg80211_info *)(wiphy_priv(w));
+	struct brcmf_pub *drvr = wiphy_priv(w);
+	return drvr->config;
 }
 
 static inline struct brcmf_cfg80211_info *wdev_to_cfg(struct wireless_dev *wd)
 {
-	return (struct brcmf_cfg80211_info *)(wdev_priv(wd));
+	return wiphy_to_cfg(wd->wiphy);
+}
+
+static inline struct brcmf_cfg80211_vif *wdev_to_vif(struct wireless_dev *wdev)
+{
+	return container_of(wdev, struct brcmf_cfg80211_vif, wdev);
 }
 
 static inline
 struct net_device *cfg_to_ndev(struct brcmf_cfg80211_info *cfg)
 {
-	struct brcmf_cfg80211_vif *vif;
-	vif = list_first_entry(&cfg->vif_list, struct brcmf_cfg80211_vif, list);
-	return vif->wdev.netdev;
+	return brcmf_get_ifp(cfg->pub, 0)->ndev;
 }
 
 static inline struct brcmf_cfg80211_info *ndev_to_cfg(struct net_device *ndev)
@@ -395,11 +399,12 @@ brcmf_cfg80211_connect_info *cfg_to_conn(struct brcmf_cfg80211_info *cfg)
 }
 
 struct brcmf_cfg80211_info *brcmf_cfg80211_attach(struct brcmf_pub *drvr,
-						  struct device *busdev,
+						  struct cfg80211_ops *ops,
 						  bool p2pdev_forced);
 void brcmf_cfg80211_detach(struct brcmf_cfg80211_info *cfg);
 s32 brcmf_cfg80211_up(struct net_device *ndev);
 s32 brcmf_cfg80211_down(struct net_device *ndev);
+struct cfg80211_ops *brcmf_cfg80211_get_ops(void);
 enum nl80211_iftype brcmf_cfg80211_get_iftype(struct brcmf_if *ifp);
 
 struct brcmf_cfg80211_vif *brcmf_alloc_vif(struct brcmf_cfg80211_info *cfg,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 21b0629fdbb3..bb584c525ec6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -248,6 +248,7 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 		brcmf_err("Retreiving cur_etheraddr failed, %d\n", err);
 		goto done;
 	}
+	memcpy(ifp->drvr->wiphy->perm_addr, ifp->drvr->mac, ETH_ALEN);
 	memcpy(ifp->drvr->mac, ifp->mac_addr, sizeof(ifp->drvr->mac));
 
 	bus = ifp->drvr->bus_if;
@@ -275,6 +276,7 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
 		ri->chippkg = le32_to_cpu(revinfo.chippkg);
 		ri->nvramrev = le32_to_cpu(revinfo.nvramrev);
 
+		/* use revinfo if not known yet */
 		if (!bus->chip) {
 			bus->chip = le32_to_cpu(revinfo.chipnum);
 			bus->chiprev = le32_to_cpu(revinfo.chiprev);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index e8c16d7f2341..8039732161f1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -943,7 +943,7 @@ static int brcmf_revinfo_read(struct seq_file *s, void *data)
 	return 0;
 }
 
-static int brcmf_bus_started(struct brcmf_pub *drvr)
+static int brcmf_bus_started(struct brcmf_pub *drvr, struct cfg80211_ops *ops)
 {
 	int ret = -1;
 	struct brcmf_bus *bus_if = drvr->bus_if;
@@ -982,7 +982,7 @@ static int brcmf_bus_started(struct brcmf_pub *drvr)
 
 	brcmf_proto_add_if(drvr, ifp);
 
-	drvr->config = brcmf_cfg80211_attach(drvr, bus_if->dev,
+	drvr->config = brcmf_cfg80211_attach(drvr, ops,
 					     drvr->settings->p2p_enable);
 	if (drvr->config == NULL) {
 		ret = -ENOMEM;
@@ -1037,17 +1037,26 @@ fail:
 
 int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
 {
+	struct wiphy *wiphy;
+	struct cfg80211_ops *ops;
 	struct brcmf_pub *drvr = NULL;
 	int ret = 0;
 	int i;
 
 	brcmf_dbg(TRACE, "Enter\n");
 
-	/* Allocate primary brcmf_info */
-	drvr = kzalloc(sizeof(*drvr), GFP_ATOMIC);
-	if (!drvr)
+	ops = brcmf_cfg80211_get_ops();
+	if (!ops)
 		return -ENOMEM;
 
+	wiphy = wiphy_new(ops, sizeof(*drvr));
+	if (!wiphy)
+		return -ENOMEM;
+
+	set_wiphy_dev(wiphy, dev);
+	drvr = wiphy_priv(wiphy);
+	drvr->wiphy = wiphy;
+
 	for (i = 0; i < ARRAY_SIZE(drvr->if2bss); i++)
 		drvr->if2bss[i] = BRCMF_BSSIDX_INVALID;
 
@@ -1076,15 +1085,18 @@ int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
 	/* attach firmware event handler */
 	brcmf_fweh_attach(drvr);
 
-	ret = brcmf_bus_started(drvr);
+	ret = brcmf_bus_started(drvr, ops);
 	if (ret != 0) {
 		brcmf_err("dongle is not responding: err=%d\n", ret);
 		goto fail;
 	}
+
+	drvr->config->ops = ops;
 	return 0;
 
 fail:
 	brcmf_detach(dev);
+	kfree(ops);
 
 	return ret;
 }
@@ -1142,6 +1154,7 @@ void brcmf_detach(struct device *dev)
 		brcmf_remove_interface(drvr->iflist[i], false);
 
 	brcmf_cfg80211_detach(drvr->config);
+	drvr->config = NULL;
 
 	brcmf_bus_stop(drvr->bus_if);
 
@@ -1149,7 +1162,7 @@ void brcmf_detach(struct device *dev)
 
 	brcmf_debug_detach(drvr);
 	bus_if->drvr = NULL;
-	kfree(drvr);
+	wiphy_free(drvr->wiphy);
 }
 
 s32 brcmf_iovar_data_set(struct device *dev, char *name, void *data, u32 len)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
index 0bb67aeb3afd..401f50458686 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
@@ -107,6 +107,7 @@ struct brcmf_pub {
 	/* Linkage ponters */
 	struct brcmf_bus *bus_if;
 	struct brcmf_proto *proto;
+	struct wiphy *wiphy;
 	struct brcmf_cfg80211_info *config;
 
 	/* Internal brcmf items */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 82064e909784..bcef208a81a5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2227,7 +2227,7 @@ fail:
  */
 int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 {
-	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
+	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 	struct brcmf_p2p_info *p2p = &cfg->p2p;
 	struct brcmf_cfg80211_vif *vif;
 	enum nl80211_iftype iftype;

From 34789d0cf682c643862792750a06c31ccf016cbc Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:24 +0100
Subject: [PATCH 1315/1678] brcmfmac: use wiphy debugfs dir entry

The driver used to create a brcmfmac dir entry at the top level
debugfs mount point. This moves the debugfs entries into the
wiphy debugfs dir entry.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/bcdc.c        |  6 +++
 .../broadcom/brcm80211/brcmfmac/common.c      |  5 ---
 .../broadcom/brcm80211/brcmfmac/core.c        | 11 +++--
 .../broadcom/brcm80211/brcmfmac/debug.c       | 42 ++-----------------
 .../broadcom/brcm80211/brcmfmac/debug.h       | 17 --------
 .../broadcom/brcm80211/brcmfmac/feature.c     |  3 ++
 .../broadcom/brcm80211/brcmfmac/feature.h     |  7 ++++
 .../broadcom/brcm80211/brcmfmac/fwsignal.c    | 11 +++--
 .../broadcom/brcm80211/brcmfmac/fwsignal.h    |  1 +
 .../broadcom/brcm80211/brcmfmac/msgbuf.c      |  8 +++-
 .../broadcom/brcm80211/brcmfmac/proto.c       |  3 +-
 .../broadcom/brcm80211/brcmfmac/proto.h       |  7 ++++
 12 files changed, 47 insertions(+), 74 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c
index 2d3a5dd07a3f..1068a2a4494c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c
@@ -445,6 +445,11 @@ brcmf_proto_bcdc_init_done(struct brcmf_pub *drvr)
 	return 0;
 }
 
+static void brcmf_proto_bcdc_debugfs_create(struct brcmf_pub *drvr)
+{
+	brcmf_fws_debugfs_create(drvr);
+}
+
 int brcmf_proto_bcdc_attach(struct brcmf_pub *drvr)
 {
 	struct brcmf_bcdc *bcdc;
@@ -472,6 +477,7 @@ int brcmf_proto_bcdc_attach(struct brcmf_pub *drvr)
 	drvr->proto->del_if = brcmf_proto_bcdc_del_if;
 	drvr->proto->reset_if = brcmf_proto_bcdc_reset_if;
 	drvr->proto->init_done = brcmf_proto_bcdc_init_done;
+	drvr->proto->debugfs_create = brcmf_proto_bcdc_debugfs_create;
 	drvr->proto->pd = bcdc;
 
 	drvr->hdrlen += BCDC_HEADER_LEN + BRCMF_PROT_FW_SIGNAL_MAX_TXBYTES;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index bb584c525ec6..3979f7814021 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -524,9 +524,6 @@ static int __init brcmfmac_module_init(void)
 {
 	int err;
 
-	/* Initialize debug system first */
-	brcmf_debugfs_init();
-
 	/* Get the platform data (if available) for our devices */
 	err = platform_driver_probe(&brcmf_pd, brcmf_common_pd_probe);
 	if (err == -ENODEV)
@@ -538,7 +535,6 @@ static int __init brcmfmac_module_init(void)
 	/* Continue the initialization by registering the different busses */
 	err = brcmf_core_init();
 	if (err) {
-		brcmf_debugfs_exit();
 		if (brcmfmac_pdata)
 			platform_driver_unregister(&brcmf_pd);
 	}
@@ -551,7 +547,6 @@ static void __exit brcmfmac_module_exit(void)
 	brcmf_core_exit();
 	if (brcmfmac_pdata)
 		platform_driver_unregister(&brcmf_pd);
-	brcmf_debugfs_exit();
 }
 
 module_init(brcmfmac_module_init);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 8039732161f1..eb5478513a1b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -972,8 +972,6 @@ static int brcmf_bus_started(struct brcmf_pub *drvr, struct cfg80211_ops *ops)
 	if (ret < 0)
 		goto fail;
 
-	brcmf_debugfs_add_entry(drvr, "revinfo", brcmf_revinfo_read);
-
 	brcmf_feat_attach(drvr);
 
 	ret = brcmf_proto_init_done(drvr);
@@ -1016,6 +1014,11 @@ static int brcmf_bus_started(struct brcmf_pub *drvr, struct cfg80211_ops *ops)
 #endif
 #endif /* CONFIG_INET */
 
+	/* populate debugfs */
+	brcmf_debugfs_add_entry(drvr, "revinfo", brcmf_revinfo_read);
+	brcmf_feat_debugfs_create(drvr);
+	brcmf_proto_debugfs_create(drvr);
+
 	return 0;
 
 fail:
@@ -1068,9 +1071,6 @@ int brcmf_attach(struct device *dev, struct brcmf_mp_device *settings)
 	drvr->bus_if->drvr = drvr;
 	drvr->settings = settings;
 
-	/* attach debug facilities */
-	brcmf_debug_attach(drvr);
-
 	/* Attach and link in the protocol */
 	ret = brcmf_proto_attach(drvr);
 	if (ret != 0) {
@@ -1160,7 +1160,6 @@ void brcmf_detach(struct device *dev)
 
 	brcmf_proto_detach(drvr);
 
-	brcmf_debug_detach(drvr);
 	bus_if->drvr = NULL;
 	wiphy_free(drvr->wiphy);
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
index 2d3e5e263a32..504832084eca 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
@@ -25,8 +25,6 @@
 #include "fweh.h"
 #include "debug.h"
 
-static struct dentry *root_folder;
-
 int brcmf_debug_create_memdump(struct brcmf_bus *bus, const void *data,
 			       size_t len)
 {
@@ -54,44 +52,9 @@ int brcmf_debug_create_memdump(struct brcmf_bus *bus, const void *data,
 	return 0;
 }
 
-void brcmf_debugfs_init(void)
-{
-	root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	if (IS_ERR(root_folder))
-		root_folder = NULL;
-}
-
-void brcmf_debugfs_exit(void)
-{
-	if (!root_folder)
-		return;
-
-	debugfs_remove_recursive(root_folder);
-	root_folder = NULL;
-}
-
-int brcmf_debug_attach(struct brcmf_pub *drvr)
-{
-	struct device *dev = drvr->bus_if->dev;
-
-	if (!root_folder)
-		return -ENODEV;
-
-	drvr->dbgfs_dir = debugfs_create_dir(dev_name(dev), root_folder);
-	return PTR_ERR_OR_ZERO(drvr->dbgfs_dir);
-}
-
-void brcmf_debug_detach(struct brcmf_pub *drvr)
-{
-	brcmf_fweh_unregister(drvr, BRCMF_E_PSM_WATCHDOG);
-
-	if (!IS_ERR_OR_NULL(drvr->dbgfs_dir))
-		debugfs_remove_recursive(drvr->dbgfs_dir);
-}
-
 struct dentry *brcmf_debugfs_get_devdir(struct brcmf_pub *drvr)
 {
-	return drvr->dbgfs_dir;
+	return drvr->wiphy->debugfsdir;
 }
 
 int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
@@ -99,7 +62,8 @@ int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
 {
 	struct dentry *e;
 
+	WARN(!drvr->wiphy->debugfsdir, "wiphy not (yet) registered\n");
 	e = debugfs_create_devm_seqfile(drvr->bus_if->dev, fn,
-					drvr->dbgfs_dir, read_fn);
+					drvr->wiphy->debugfsdir, read_fn);
 	return PTR_ERR_OR_ZERO(e);
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
index 35919d9e8e13..cfed0626bf5a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
@@ -113,29 +113,12 @@ extern int brcmf_msg_level;
 struct brcmf_bus;
 struct brcmf_pub;
 #ifdef DEBUG
-void brcmf_debugfs_init(void);
-void brcmf_debugfs_exit(void);
-int brcmf_debug_attach(struct brcmf_pub *drvr);
-void brcmf_debug_detach(struct brcmf_pub *drvr);
 struct dentry *brcmf_debugfs_get_devdir(struct brcmf_pub *drvr);
 int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
 			    int (*read_fn)(struct seq_file *seq, void *data));
 int brcmf_debug_create_memdump(struct brcmf_bus *bus, const void *data,
 			       size_t len);
 #else
-static inline void brcmf_debugfs_init(void)
-{
-}
-static inline void brcmf_debugfs_exit(void)
-{
-}
-static inline int brcmf_debug_attach(struct brcmf_pub *drvr)
-{
-	return 0;
-}
-static inline void brcmf_debug_detach(struct brcmf_pub *drvr)
-{
-}
 static inline
 int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
 			    int (*read_fn)(struct seq_file *seq, void *data))
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
index bede7b7fd996..876731c57bf5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
@@ -228,7 +228,10 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
 		/* no quirks */
 		break;
 	}
+}
 
+void brcmf_feat_debugfs_create(struct brcmf_pub *drvr)
+{
 	brcmf_debugfs_add_entry(drvr, "features", brcmf_feat_debugfs_read);
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
index 1ab4f1617112..d1193825e559 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
@@ -89,6 +89,13 @@ enum brcmf_feat_quirk {
  */
 void brcmf_feat_attach(struct brcmf_pub *drvr);
 
+/**
+ * brcmf_feat_debugfs_create() - create debugfs entries.
+ *
+ * @drvr: driver instance.
+ */
+void brcmf_feat_debugfs_create(struct brcmf_pub *drvr);
+
 /**
  * brcmf_feat_is_enabled() - query feature.
  *
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index f59642b2c935..f3cbf78c8899 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
@@ -2399,10 +2399,6 @@ struct brcmf_fws_info *brcmf_fws_attach(struct brcmf_pub *drvr)
 	brcmu_pktq_init(&fws->desc.other.psq, BRCMF_FWS_PSQ_PREC_COUNT,
 			BRCMF_FWS_PSQ_LEN);
 
-	/* create debugfs file for statistics */
-	brcmf_debugfs_add_entry(drvr, "fws_stats",
-				brcmf_debugfs_fws_stats_read);
-
 	brcmf_dbg(INFO, "%s bdcv2 tlv signaling [%x]\n",
 		  fws->fw_signals ? "enabled" : "disabled", tlv);
 	return fws;
@@ -2429,6 +2425,13 @@ void brcmf_fws_detach(struct brcmf_fws_info *fws)
 	kfree(fws);
 }
 
+void brcmf_fws_debugfs_create(struct brcmf_pub *drvr)
+{
+	/* create debugfs file for statistics */
+	brcmf_debugfs_add_entry(drvr, "fws_stats",
+				brcmf_debugfs_fws_stats_read);
+}
+
 bool brcmf_fws_queue_skbs(struct brcmf_fws_info *fws)
 {
 	return !fws->avoid_queueing;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h
index ba07bd972002..4e6835766d5d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h
@@ -20,6 +20,7 @@
 
 struct brcmf_fws_info *brcmf_fws_attach(struct brcmf_pub *drvr);
 void brcmf_fws_detach(struct brcmf_fws_info *fws);
+void brcmf_fws_debugfs_create(struct brcmf_pub *drvr);
 bool brcmf_fws_queue_skbs(struct brcmf_fws_info *fws);
 bool brcmf_fws_fc_active(struct brcmf_fws_info *fws);
 void brcmf_fws_hdrpull(struct brcmf_if *ifp, s16 siglen, struct sk_buff *skb);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
index e212a791a072..49d37ad96958 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
@@ -1418,6 +1418,11 @@ static int brcmf_msgbuf_stats_read(struct seq_file *seq, void *data)
 }
 #endif
 
+static void brcmf_msgbuf_debugfs_create(struct brcmf_pub *drvr)
+{
+	brcmf_debugfs_add_entry(drvr, "msgbuf_stats", brcmf_msgbuf_stats_read);
+}
+
 int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
 {
 	struct brcmf_bus_msgbuf *if_msgbuf;
@@ -1472,6 +1477,7 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
 	drvr->proto->delete_peer = brcmf_msgbuf_delete_peer;
 	drvr->proto->add_tdls_peer = brcmf_msgbuf_add_tdls_peer;
 	drvr->proto->rxreorder = brcmf_msgbuf_rxreorder;
+	drvr->proto->debugfs_create = brcmf_msgbuf_debugfs_create;
 	drvr->proto->pd = msgbuf;
 
 	init_waitqueue_head(&msgbuf->ioctl_resp_wait);
@@ -1525,8 +1531,6 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
 	spin_lock_init(&msgbuf->flowring_work_lock);
 	INIT_LIST_HEAD(&msgbuf->work_queue);
 
-	brcmf_debugfs_add_entry(drvr, "msgbuf_stats", brcmf_msgbuf_stats_read);
-
 	return 0;
 
 fail:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.c
index d26ff219ef66..c5ff551ec659 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.c
@@ -54,7 +54,8 @@ int brcmf_proto_attach(struct brcmf_pub *drvr)
 	if (!proto->tx_queue_data || (proto->hdrpull == NULL) ||
 	    (proto->query_dcmd == NULL) || (proto->set_dcmd == NULL) ||
 	    (proto->configure_addr_mode == NULL) ||
-	    (proto->delete_peer == NULL) || (proto->add_tdls_peer == NULL)) {
+	    (proto->delete_peer == NULL) || (proto->add_tdls_peer == NULL) ||
+	    (proto->debugfs_create == NULL)) {
 		brcmf_err("Not all proto handlers have been installed\n");
 		goto fail;
 	}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.h
index 8a8e08f09ea0..d3c3b9a815ad 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/proto.h
@@ -48,6 +48,7 @@ struct brcmf_proto {
 	void (*del_if)(struct brcmf_if *ifp);
 	void (*reset_if)(struct brcmf_if *ifp);
 	int (*init_done)(struct brcmf_pub *drvr);
+	void (*debugfs_create)(struct brcmf_pub *drvr);
 	void *pd;
 };
 
@@ -156,4 +157,10 @@ brcmf_proto_init_done(struct brcmf_pub *drvr)
 	return drvr->proto->init_done(drvr);
 }
 
+static inline void
+brcmf_proto_debugfs_create(struct brcmf_pub *drvr)
+{
+	drvr->proto->debugfs_create(drvr);
+}
+
 #endif /* BRCMFMAC_PROTO_H */

From 41f573dbb534f14e62a4a5411f602c970cad1d77 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:25 +0100
Subject: [PATCH 1316/1678] brcmfmac: derive firmware filenames from basename
 mapping

Instead of defining individual filenames for firmware and nvram
use a basename and derive the names from that.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/firmware.c    | 31 +++++---
 .../broadcom/brcm80211/brcmfmac/firmware.h    | 24 ++----
 .../broadcom/brcm80211/brcmfmac/pcie.c        | 56 +++++++-------
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 73 +++++++++----------
 .../broadcom/brcm80211/brcmfmac/usb.c         | 10 +--
 5 files changed, 96 insertions(+), 98 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index e2199f53ebef..6945f582dbb7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -563,6 +563,13 @@ int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
 					   0);
 }
 
+static void brcmf_fw_get_full_name(char fw_name[BRCMF_FW_NAME_LEN],
+				   const char *fw_base, const char *extension)
+{
+	strlcat(fw_name, fw_base, BRCMF_FW_NAME_LEN);
+	strlcat(fw_name, extension, BRCMF_FW_NAME_LEN);
+}
+
 int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 			      struct brcmf_firmware_mapping mapping_table[],
 			      u32 table_size, char fw_name[BRCMF_FW_NAME_LEN],
@@ -587,25 +594,31 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 
 	/* check if firmware path is provided by module parameter */
 	if (brcmf_mp_global.firmware_path[0] != '\0') {
-		strlcpy(fw_name, brcmf_mp_global.firmware_path,
-			BRCMF_FW_NAME_LEN);
-		if ((nvram_name) && (mapping_table[i].nvram))
+		if (fw_name)
+			strlcpy(fw_name, brcmf_mp_global.firmware_path,
+				BRCMF_FW_NAME_LEN);
+		if (nvram_name)
 			strlcpy(nvram_name, brcmf_mp_global.firmware_path,
 				BRCMF_FW_NAME_LEN);
 
 		end = brcmf_mp_global.firmware_path[
 				strlen(brcmf_mp_global.firmware_path) - 1];
 		if (end != '/') {
-			strlcat(fw_name, "/", BRCMF_FW_NAME_LEN);
-			if ((nvram_name) && (mapping_table[i].nvram))
+			if (fw_name)
+				strlcat(fw_name, "/", BRCMF_FW_NAME_LEN);
+			if (nvram_name)
 				strlcat(nvram_name, "/", BRCMF_FW_NAME_LEN);
 		}
 	}
-	strlcat(fw_name, mapping_table[i].fw, BRCMF_FW_NAME_LEN);
-	if ((nvram_name) && (mapping_table[i].nvram))
-		strlcat(nvram_name, mapping_table[i].nvram, BRCMF_FW_NAME_LEN);
 
-	brcmf_info("using %s for chip %s\n", fw_name, chipname);
+	brcmf_info("using %s for chip %s\n",
+		   mapping_table[i].fw_base, chipname);
+	if (fw_name)
+		brcmf_fw_get_full_name(fw_name,
+				       mapping_table[i].fw_base, ".bin");
+	if (nvram_name)
+		brcmf_fw_get_full_name(nvram_name,
+				       mapping_table[i].fw_base, ".txt");
 
 	return 0;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index 8fa4b7e1ab3d..50218e9b39ce 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
@@ -38,28 +38,16 @@
 struct brcmf_firmware_mapping {
 	u32 chipid;
 	u32 revmask;
-	const char *fw;
-	const char *nvram;
+	const char *fw_base;
 };
 
-#define BRCMF_FW_NVRAM_DEF(fw_nvram_name, fw, nvram) \
-static const char BRCM_ ## fw_nvram_name ## _FIRMWARE_NAME[] = \
-	BRCMF_FW_DEFAULT_PATH fw; \
-static const char BRCM_ ## fw_nvram_name ## _NVRAM_NAME[] = \
-	BRCMF_FW_DEFAULT_PATH nvram; \
-MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw);
-
-#define BRCMF_FW_DEF(fw_name, fw) \
-static const char BRCM_ ## fw_name ## _FIRMWARE_NAME[] = \
-	BRCMF_FW_DEFAULT_PATH fw; \
-MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw) \
-
-#define BRCMF_FW_NVRAM_ENTRY(chipid, mask, name) \
-	{ chipid, mask, \
-	  BRCM_ ## name ## _FIRMWARE_NAME, BRCM_ ## name ## _NVRAM_NAME }
+#define BRCMF_FW_DEF(fw_name, fw_base) \
+static const char BRCM_ ## fw_name ## _FIRMWARE_BASENAME[] = \
+	BRCMF_FW_DEFAULT_PATH fw_base; \
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw_base ".bin")
 
 #define BRCMF_FW_ENTRY(chipid, mask, name) \
-	{ chipid, mask, BRCM_ ## name ## _FIRMWARE_NAME, NULL }
+	{ chipid, mask, BRCM_ ## name ## _FIRMWARE_BASENAME }
 
 int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 			      struct brcmf_firmware_mapping mapping_table[],
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index a7d827ce1684..ca95f6c9f42d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -46,36 +46,36 @@ enum brcmf_pcie_state {
 	BRCMFMAC_PCIE_STATE_UP
 };
 
-BRCMF_FW_NVRAM_DEF(43602, "brcmfmac43602-pcie.bin", "brcmfmac43602-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4350, "brcmfmac4350-pcie.bin", "brcmfmac4350-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4350C, "brcmfmac4350c2-pcie.bin", "brcmfmac4350c2-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4356, "brcmfmac4356-pcie.bin", "brcmfmac4356-pcie.txt");
-BRCMF_FW_NVRAM_DEF(43570, "brcmfmac43570-pcie.bin", "brcmfmac43570-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4358, "brcmfmac4358-pcie.bin", "brcmfmac4358-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4359, "brcmfmac4359-pcie.bin", "brcmfmac4359-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4365B, "brcmfmac4365b-pcie.bin", "brcmfmac4365b-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4365C, "brcmfmac4365c-pcie.bin", "brcmfmac4365c-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4366B, "brcmfmac4366b-pcie.bin", "brcmfmac4366b-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4366C, "brcmfmac4366c-pcie.bin", "brcmfmac4366c-pcie.txt");
-BRCMF_FW_NVRAM_DEF(4371, "brcmfmac4371-pcie.bin", "brcmfmac4371-pcie.txt");
+BRCMF_FW_DEF(43602, "brcmfmac43602-pcie");
+BRCMF_FW_DEF(4350, "brcmfmac4350-pcie");
+BRCMF_FW_DEF(4350C, "brcmfmac4350c2-pcie");
+BRCMF_FW_DEF(4356, "brcmfmac4356-pcie");
+BRCMF_FW_DEF(43570, "brcmfmac43570-pcie");
+BRCMF_FW_DEF(4358, "brcmfmac4358-pcie");
+BRCMF_FW_DEF(4359, "brcmfmac4359-pcie");
+BRCMF_FW_DEF(4365B, "brcmfmac4365b-pcie");
+BRCMF_FW_DEF(4365C, "brcmfmac4365c-pcie");
+BRCMF_FW_DEF(4366B, "brcmfmac4366b-pcie");
+BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie");
+BRCMF_FW_DEF(4371, "brcmfmac4371-pcie");
 
 static struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4350_CHIP_ID, 0x000000FF, 4350C),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4350_CHIP_ID, 0xFFFFFF00, 4350),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43525_CHIP_ID, 0xFFFFFFF0, 4365C),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43567_CHIP_ID, 0xFFFFFFFF, 43570),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43570),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43570_CHIP_ID, 0xFFFFFFFF, 43570),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4358_CHIP_ID, 0xFFFFFFFF, 4358),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4365_CHIP_ID, 0x0000000F, 4365B),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4365_CHIP_ID, 0xFFFFFFF0, 4365C),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4366_CHIP_ID, 0x0000000F, 4366B),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4366_CHIP_ID, 0xFFFFFFF0, 4366C),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371),
+	BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602),
+	BRCMF_FW_ENTRY(BRCM_CC_43465_CHIP_ID, 0xFFFFFFF0, 4366C),
+	BRCMF_FW_ENTRY(BRCM_CC_4350_CHIP_ID, 0x000000FF, 4350C),
+	BRCMF_FW_ENTRY(BRCM_CC_4350_CHIP_ID, 0xFFFFFF00, 4350),
+	BRCMF_FW_ENTRY(BRCM_CC_43525_CHIP_ID, 0xFFFFFFF0, 4365C),
+	BRCMF_FW_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
+	BRCMF_FW_ENTRY(BRCM_CC_43567_CHIP_ID, 0xFFFFFFFF, 43570),
+	BRCMF_FW_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43570),
+	BRCMF_FW_ENTRY(BRCM_CC_43570_CHIP_ID, 0xFFFFFFFF, 43570),
+	BRCMF_FW_ENTRY(BRCM_CC_4358_CHIP_ID, 0xFFFFFFFF, 4358),
+	BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359),
+	BRCMF_FW_ENTRY(BRCM_CC_4365_CHIP_ID, 0x0000000F, 4365B),
+	BRCMF_FW_ENTRY(BRCM_CC_4365_CHIP_ID, 0xFFFFFFF0, 4365C),
+	BRCMF_FW_ENTRY(BRCM_CC_4366_CHIP_ID, 0x0000000F, 4366B),
+	BRCMF_FW_ENTRY(BRCM_CC_4366_CHIP_ID, 0xFFFFFFF0, 4366C),
+	BRCMF_FW_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371),
 };
 
 #define BRCMF_PCIE_FW_UP_TIMEOUT		2000 /* msec */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 4a6459a429ec..3f7fd6f817a8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -600,47 +600,44 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = {
 	{4,  0x1}
 };
 
-BRCMF_FW_NVRAM_DEF(43143, "brcmfmac43143-sdio.bin", "brcmfmac43143-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43241B0, "brcmfmac43241b0-sdio.bin",
-		   "brcmfmac43241b0-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43241B4, "brcmfmac43241b4-sdio.bin",
-		   "brcmfmac43241b4-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43241B5, "brcmfmac43241b5-sdio.bin",
-		   "brcmfmac43241b5-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4329, "brcmfmac4329-sdio.bin", "brcmfmac4329-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4330, "brcmfmac4330-sdio.bin", "brcmfmac4330-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4334, "brcmfmac4334-sdio.bin", "brcmfmac4334-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43340, "brcmfmac43340-sdio.bin", "brcmfmac43340-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4335, "brcmfmac4335-sdio.bin", "brcmfmac4335-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43362, "brcmfmac43362-sdio.bin", "brcmfmac43362-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4339, "brcmfmac4339-sdio.bin", "brcmfmac4339-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43430A0, "brcmfmac43430a0-sdio.bin", "brcmfmac43430a0-sdio.txt");
+BRCMF_FW_DEF(43143, "brcmfmac43143-sdio");
+BRCMF_FW_DEF(43241B0, "brcmfmac43241b0-sdio");
+BRCMF_FW_DEF(43241B4, "brcmfmac43241b4-sdio");
+BRCMF_FW_DEF(43241B5, "brcmfmac43241b5-sdio");
+BRCMF_FW_DEF(4329, "brcmfmac4329-sdio");
+BRCMF_FW_DEF(4330, "brcmfmac4330-sdio");
+BRCMF_FW_DEF(4334, "brcmfmac4334-sdio");
+BRCMF_FW_DEF(43340, "brcmfmac43340-sdio");
+BRCMF_FW_DEF(4335, "brcmfmac4335-sdio");
+BRCMF_FW_DEF(43362, "brcmfmac43362-sdio");
+BRCMF_FW_DEF(4339, "brcmfmac4339-sdio");
+BRCMF_FW_DEF(43430A0, "brcmfmac43430a0-sdio");
 /* Note the names are not postfixed with a1 for backward compatibility */
-BRCMF_FW_NVRAM_DEF(43430A1, "brcmfmac43430-sdio.bin", "brcmfmac43430-sdio.txt");
-BRCMF_FW_NVRAM_DEF(43455, "brcmfmac43455-sdio.bin", "brcmfmac43455-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4354, "brcmfmac4354-sdio.bin", "brcmfmac4354-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4356, "brcmfmac4356-sdio.bin", "brcmfmac4356-sdio.txt");
-BRCMF_FW_NVRAM_DEF(4373, "brcmfmac4373-sdio.bin", "brcmfmac4373-sdio.txt");
+BRCMF_FW_DEF(43430A1, "brcmfmac43430-sdio");
+BRCMF_FW_DEF(43455, "brcmfmac43455-sdio");
+BRCMF_FW_DEF(4354, "brcmfmac4354-sdio");
+BRCMF_FW_DEF(4356, "brcmfmac4356-sdio");
+BRCMF_FW_DEF(4373, "brcmfmac4373-sdio");
 
 static struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43241_CHIP_ID, 0x0000001F, 43241B0),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43241_CHIP_ID, 0x00000020, 43241B4),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43241_CHIP_ID, 0xFFFFFFC0, 43241B5),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4329_CHIP_ID, 0xFFFFFFFF, 4329),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4330_CHIP_ID, 0xFFFFFFFF, 4330),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4334_CHIP_ID, 0xFFFFFFFF, 4334),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43340_CHIP_ID, 0xFFFFFFFF, 43340),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43341_CHIP_ID, 0xFFFFFFFF, 43340),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, 4335),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, 43362),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, 4339),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000001, 43430A0),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFE, 43430A1),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFFC0, 43455),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, 4354),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
-	BRCMF_FW_NVRAM_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373)
+	BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
+	BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0x0000001F, 43241B0),
+	BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0x00000020, 43241B4),
+	BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0xFFFFFFC0, 43241B5),
+	BRCMF_FW_ENTRY(BRCM_CC_4329_CHIP_ID, 0xFFFFFFFF, 4329),
+	BRCMF_FW_ENTRY(BRCM_CC_4330_CHIP_ID, 0xFFFFFFFF, 4330),
+	BRCMF_FW_ENTRY(BRCM_CC_4334_CHIP_ID, 0xFFFFFFFF, 4334),
+	BRCMF_FW_ENTRY(BRCM_CC_43340_CHIP_ID, 0xFFFFFFFF, 43340),
+	BRCMF_FW_ENTRY(BRCM_CC_43341_CHIP_ID, 0xFFFFFFFF, 43340),
+	BRCMF_FW_ENTRY(BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, 4335),
+	BRCMF_FW_ENTRY(BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, 43362),
+	BRCMF_FW_ENTRY(BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, 4339),
+	BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000001, 43430A0),
+	BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFE, 43430A1),
+	BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFFC0, 43455),
+	BRCMF_FW_ENTRY(BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, 4354),
+	BRCMF_FW_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
+	BRCMF_FW_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373)
 };
 
 static void pkt_align(struct sk_buff *p, int len, int align)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 41642dda40fd..f8968cf66bd0 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -46,11 +46,11 @@
 #define BRCMF_USB_CBCTL_READ		1
 #define BRCMF_USB_MAX_PKT_SIZE		1600
 
-BRCMF_FW_DEF(43143, "brcmfmac43143.bin");
-BRCMF_FW_DEF(43236B, "brcmfmac43236b.bin");
-BRCMF_FW_DEF(43242A, "brcmfmac43242a.bin");
-BRCMF_FW_DEF(43569, "brcmfmac43569.bin");
-BRCMF_FW_DEF(4373, "brcmfmac4373.bin");
+BRCMF_FW_DEF(43143, "brcmfmac43143");
+BRCMF_FW_DEF(43236B, "brcmfmac43236b");
+BRCMF_FW_DEF(43242A, "brcmfmac43242a");
+BRCMF_FW_DEF(43569, "brcmfmac43569");
+BRCMF_FW_DEF(4373, "brcmfmac4373");
 
 static struct brcmf_firmware_mapping brcmf_usb_fwnames[] = {
 	BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),

From d09ae51a4b676151edaf572bcd5f272b5532639f Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:26 +0100
Subject: [PATCH 1317/1678] brcmfmac: pass struct in brcmf_fw_get_firmwares()

Make the function brcmf_fw_get_firmwares() a bit more easy to extend
using a structure to pass the request parameters.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/firmware.c    | 175 ++++++++++++------
 .../broadcom/brcm80211/brcmfmac/firmware.h    |  43 +++--
 .../broadcom/brcm80211/brcmfmac/pcie.c        |  38 +++-
 .../broadcom/brcm80211/brcmfmac/sdio.c        |  32 +++-
 .../broadcom/brcm80211/brcmfmac/usb.c         |  43 ++++-
 5 files changed, 245 insertions(+), 86 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index 6945f582dbb7..95004dbba942 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -438,18 +438,31 @@ void brcmf_fw_nvram_free(void *nvram)
 
 struct brcmf_fw {
 	struct device *dev;
-	u16 flags;
-	const struct firmware *code;
-	const char *nvram_name;
-	u16 domain_nr;
-	u16 bus_nr;
-	void (*done)(struct device *dev, int err, const struct firmware *fw,
-		     void *nvram_image, u32 nvram_len);
+	struct brcmf_fw_request *req;
+	u32 curpos;
+	void (*done)(struct device *dev, int err, struct brcmf_fw_request *req);
 };
 
+static void brcmf_fw_request_done(const struct firmware *fw, void *ctx);
+
+static void brcmf_fw_free_request(struct brcmf_fw_request *req)
+{
+	struct brcmf_fw_item *item;
+	int i;
+
+	for (i = 0, item = &req->items[0]; i < req->n_items; i++, item++) {
+		if (item->type == BRCMF_FW_TYPE_BINARY)
+			release_firmware(item->binary);
+		else if (item->type == BRCMF_FW_TYPE_NVRAM)
+			brcmf_fw_nvram_free(item->nv_data.data);
+	}
+	kfree(req);
+}
+
 static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
 {
 	struct brcmf_fw *fwctx = ctx;
+	struct brcmf_fw_item *cur;
 	u32 nvram_length = 0;
 	void *nvram = NULL;
 	u8 *data = NULL;
@@ -457,83 +470,150 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx)
 	bool raw_nvram;
 
 	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
+
+	cur = &fwctx->req->items[fwctx->curpos];
+
 	if (fw && fw->data) {
 		data = (u8 *)fw->data;
 		data_len = fw->size;
 		raw_nvram = false;
 	} else {
 		data = bcm47xx_nvram_get_contents(&data_len);
-		if (!data && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
+		if (!data && !(cur->flags & BRCMF_FW_REQF_OPTIONAL))
 			goto fail;
 		raw_nvram = true;
 	}
 
 	if (data)
 		nvram = brcmf_fw_nvram_strip(data, data_len, &nvram_length,
-					     fwctx->domain_nr, fwctx->bus_nr);
+					     fwctx->req->domain_nr,
+					     fwctx->req->bus_nr);
 
 	if (raw_nvram)
 		bcm47xx_nvram_release_contents(data);
 	release_firmware(fw);
-	if (!nvram && !(fwctx->flags & BRCMF_FW_REQ_NV_OPTIONAL))
+	if (!nvram && !(cur->flags & BRCMF_FW_REQF_OPTIONAL))
 		goto fail;
 
-	fwctx->done(fwctx->dev, 0, fwctx->code, nvram, nvram_length);
-	kfree(fwctx);
+	brcmf_dbg(TRACE, "nvram %p len %d\n", nvram, nvram_length);
+	cur->nv_data.data = nvram;
+	cur->nv_data.len = nvram_length;
 	return;
 
 fail:
 	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
-	release_firmware(fwctx->code);
-	fwctx->done(fwctx->dev, -ENOENT, NULL, NULL, 0);
+	fwctx->done(fwctx->dev, -ENOENT, NULL);
+	brcmf_fw_free_request(fwctx->req);
 	kfree(fwctx);
 }
 
-static void brcmf_fw_request_code_done(const struct firmware *fw, void *ctx)
+static int brcmf_fw_request_next_item(struct brcmf_fw *fwctx, bool async)
+{
+	struct brcmf_fw_item *cur;
+	const struct firmware *fw = NULL;
+	int ret;
+
+	cur = &fwctx->req->items[fwctx->curpos];
+
+	brcmf_dbg(TRACE, "%srequest for %s\n", async ? "async " : "",
+		  cur->path);
+
+	if (async)
+		ret = request_firmware_nowait(THIS_MODULE, true, cur->path,
+					      fwctx->dev, GFP_KERNEL, fwctx,
+					      brcmf_fw_request_done);
+	else
+		ret = request_firmware(&fw, cur->path, fwctx->dev);
+
+	if (ret < 0) {
+		brcmf_fw_request_done(NULL, fwctx);
+	} else if (!async && fw) {
+		brcmf_dbg(TRACE, "firmware %s %sfound\n", cur->path,
+			  fw ? "" : "not ");
+		if (cur->type == BRCMF_FW_TYPE_BINARY)
+			cur->binary = fw;
+		else if (cur->type == BRCMF_FW_TYPE_NVRAM)
+			brcmf_fw_request_nvram_done(fw, fwctx);
+		else
+			release_firmware(fw);
+
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static void brcmf_fw_request_done(const struct firmware *fw, void *ctx)
 {
 	struct brcmf_fw *fwctx = ctx;
+	struct brcmf_fw_item *cur;
 	int ret = 0;
 
-	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(fwctx->dev));
-	if (!fw) {
+	cur = &fwctx->req->items[fwctx->curpos];
+
+	brcmf_dbg(TRACE, "enter: firmware %s %sfound\n", cur->path,
+		  fw ? "" : "not ");
+
+	if (fw) {
+		if (cur->type == BRCMF_FW_TYPE_BINARY)
+			cur->binary = fw;
+		else if (cur->type == BRCMF_FW_TYPE_NVRAM)
+			brcmf_fw_request_nvram_done(fw, fwctx);
+		else
+			release_firmware(fw);
+	} else if (cur->type == BRCMF_FW_TYPE_NVRAM) {
+		brcmf_fw_request_nvram_done(NULL, fwctx);
+	} else if (!(cur->flags & BRCMF_FW_REQF_OPTIONAL)) {
 		ret = -ENOENT;
 		goto fail;
 	}
-	/* only requested code so done here */
-	if (!(fwctx->flags & BRCMF_FW_REQUEST_NVRAM))
-		goto done;
 
-	fwctx->code = fw;
-	ret = request_firmware_nowait(THIS_MODULE, true, fwctx->nvram_name,
-				      fwctx->dev, GFP_KERNEL, fwctx,
-				      brcmf_fw_request_nvram_done);
+	do {
+		if (++fwctx->curpos == fwctx->req->n_items) {
+			ret = 0;
+			goto done;
+		}
+
+		ret = brcmf_fw_request_next_item(fwctx, false);
+	} while (ret == -EAGAIN);
 
-	/* pass NULL to nvram callback for bcm47xx fallback */
-	if (ret)
-		brcmf_fw_request_nvram_done(NULL, fwctx);
 	return;
 
 fail:
-	brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev));
+	brcmf_dbg(TRACE, "failed err=%d: dev=%s, fw=%s\n", ret,
+		  dev_name(fwctx->dev), cur->path);
+	brcmf_fw_free_request(fwctx->req);
+	fwctx->req = NULL;
 done:
-	fwctx->done(fwctx->dev, ret, fw, NULL, 0);
+	fwctx->done(fwctx->dev, ret, fwctx->req);
 	kfree(fwctx);
 }
 
-int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
-				const char *code, const char *nvram,
-				void (*fw_cb)(struct device *dev, int err,
-					      const struct firmware *fw,
-					      void *nvram_image, u32 nvram_len),
-				u16 domain_nr, u16 bus_nr)
+static bool brcmf_fw_request_is_valid(struct brcmf_fw_request *req)
+{
+	struct brcmf_fw_item *item;
+	int i;
+
+	if (!req->n_items)
+		return false;
+
+	for (i = 0, item = &req->items[0]; i < req->n_items; i++, item++) {
+		if (!item->path)
+			return false;
+	}
+	return true;
+}
+
+int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
+			   void (*fw_cb)(struct device *dev, int err,
+					 struct brcmf_fw_request *req))
 {
 	struct brcmf_fw *fwctx;
 
 	brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(dev));
-	if (!fw_cb || !code)
+	if (!fw_cb)
 		return -EINVAL;
 
-	if ((flags & BRCMF_FW_REQUEST_NVRAM) && !nvram)
+	if (!brcmf_fw_request_is_valid(req))
 		return -EINVAL;
 
 	fwctx = kzalloc(sizeof(*fwctx), GFP_KERNEL);
@@ -541,26 +621,11 @@ int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
 		return -ENOMEM;
 
 	fwctx->dev = dev;
-	fwctx->flags = flags;
+	fwctx->req = req;
 	fwctx->done = fw_cb;
-	if (flags & BRCMF_FW_REQUEST_NVRAM)
-		fwctx->nvram_name = nvram;
-	fwctx->domain_nr = domain_nr;
-	fwctx->bus_nr = bus_nr;
 
-	return request_firmware_nowait(THIS_MODULE, true, code, dev,
-				       GFP_KERNEL, fwctx,
-				       brcmf_fw_request_code_done);
-}
-
-int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
-			   const char *code, const char *nvram,
-			   void (*fw_cb)(struct device *dev, int err,
-					 const struct firmware *fw,
-					 void *nvram_image, u32 nvram_len))
-{
-	return brcmf_fw_get_firmwares_pcie(dev, flags, code, nvram, fw_cb, 0,
-					   0);
+	brcmf_fw_request_next_item(fwctx, true);
+	return 0;
 }
 
 static void brcmf_fw_get_full_name(char fw_name[BRCMF_FW_NAME_LEN],
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index 50218e9b39ce..1ca630e237c8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
@@ -16,10 +16,7 @@
 #ifndef BRCMFMAC_FIRMWARE_H
 #define BRCMFMAC_FIRMWARE_H
 
-#define BRCMF_FW_REQUEST		0x000F
-#define  BRCMF_FW_REQUEST_NVRAM		0x0001
-#define BRCMF_FW_REQ_FLAGS		0x00F0
-#define  BRCMF_FW_REQ_NV_OPTIONAL	0x0010
+#define BRCMF_FW_REQF_OPTIONAL		0x0001
 
 #define	BRCMF_FW_NAME_LEN		320
 
@@ -54,21 +51,39 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 			      u32 table_size, char fw_name[BRCMF_FW_NAME_LEN],
 			      char nvram_name[BRCMF_FW_NAME_LEN]);
 void brcmf_fw_nvram_free(void *nvram);
+
+enum brcmf_fw_type {
+	BRCMF_FW_TYPE_BINARY,
+	BRCMF_FW_TYPE_NVRAM
+};
+
+struct brcmf_fw_item {
+	const char *path;
+	enum brcmf_fw_type type;
+	u16 flags;
+	union {
+		const struct firmware *binary;
+		struct {
+			void *data;
+			u32 len;
+		} nv_data;
+	};
+};
+
+struct brcmf_fw_request {
+	u16 domain_nr;
+	u16 bus_nr;
+	u32 n_items;
+	struct brcmf_fw_item items[0];
+};
+
 /*
  * Request firmware(s) asynchronously. When the asynchronous request
  * fails it will not use the callback, but call device_release_driver()
  * instead which will call the driver .remove() callback.
  */
-int brcmf_fw_get_firmwares_pcie(struct device *dev, u16 flags,
-				const char *code, const char *nvram,
-				void (*fw_cb)(struct device *dev, int err,
-					      const struct firmware *fw,
-					      void *nvram_image, u32 nvram_len),
-				u16 domain_nr, u16 bus_nr);
-int brcmf_fw_get_firmwares(struct device *dev, u16 flags,
-			   const char *code, const char *nvram,
+int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
 			   void (*fw_cb)(struct device *dev, int err,
-					 const struct firmware *fw,
-					 void *nvram_image, u32 nvram_len));
+					 struct brcmf_fw_request *req));
 
 #endif /* BRCMFMAC_FIRMWARE_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index ca95f6c9f42d..a3fe29e65bd9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1651,15 +1651,19 @@ static const struct brcmf_buscore_ops brcmf_pcie_buscore_ops = {
 	.write32 = brcmf_pcie_buscore_write32,
 };
 
+#define BRCMF_PCIE_FW_CODE	0
+#define BRCMF_PCIE_FW_NVRAM	1
+
 static void brcmf_pcie_setup(struct device *dev, int ret,
-			     const struct firmware *fw,
-			     void *nvram, u32 nvram_len)
+			     struct brcmf_fw_request *fwreq)
 {
+	const struct firmware *fw;
+	void *nvram;
 	struct brcmf_bus *bus;
 	struct brcmf_pciedev *pcie_bus_dev;
 	struct brcmf_pciedev_info *devinfo;
 	struct brcmf_commonring **flowrings;
-	u32 i;
+	u32 i, nvram_len;
 
 	/* check firmware loading result */
 	if (ret)
@@ -1670,6 +1674,11 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
 	devinfo = pcie_bus_dev->devinfo;
 	brcmf_pcie_attach(devinfo);
 
+	fw = fwreq->items[BRCMF_PCIE_FW_CODE].binary;
+	nvram = fwreq->items[BRCMF_PCIE_FW_NVRAM].nv_data.data;
+	nvram_len = fwreq->items[BRCMF_PCIE_FW_NVRAM].nv_data.len;
+	kfree(fwreq);
+
 	/* Some of the firmwares have the size of the memory of the device
 	 * defined inside the firmware. This is because part of the memory in
 	 * the device is shared and the devision is determined by FW. Parse
@@ -1730,6 +1739,7 @@ static int
 brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int ret;
+	struct brcmf_fw_request *fwreq;
 	struct brcmf_pciedev_info *devinfo;
 	struct brcmf_pciedev *pcie_bus_dev;
 	struct brcmf_bus *bus;
@@ -1800,12 +1810,26 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto fail_bus;
 
-	ret = brcmf_fw_get_firmwares_pcie(bus->dev, BRCMF_FW_REQUEST_NVRAM |
-						    BRCMF_FW_REQ_NV_OPTIONAL,
-					  devinfo->fw_name, devinfo->nvram_name,
-					  brcmf_pcie_setup, domain_nr, bus_nr);
+	fwreq = kzalloc(sizeof(*fwreq) + 2 * sizeof(struct brcmf_fw_item),
+			GFP_KERNEL);
+	if (!fwreq) {
+		ret = -ENOMEM;
+		goto fail_bus;
+	}
+
+	fwreq->items[BRCMF_PCIE_FW_CODE].path = devinfo->fw_name;
+	fwreq->items[BRCMF_PCIE_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+	fwreq->items[BRCMF_PCIE_FW_NVRAM].path = devinfo->nvram_name;
+	fwreq->items[BRCMF_PCIE_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
+	fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL;
+	fwreq->n_items = 2;
+	fwreq->domain_nr = domain_nr;
+	fwreq->bus_nr = bus_nr;
+	ret = brcmf_fw_get_firmwares(bus->dev, fwreq, brcmf_pcie_setup);
 	if (ret == 0)
 		return 0;
+
+	kfree(fwreq);
 fail_bus:
 	kfree(bus->msgbuf);
 	kfree(bus);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 3f7fd6f817a8..ab68eb1f04e4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4031,14 +4031,19 @@ static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
 	.get_fwname = brcmf_sdio_get_fwname,
 };
 
+#define BRCMF_SDIO_FW_CODE	0
+#define BRCMF_SDIO_FW_NVRAM	1
+
 static void brcmf_sdio_firmware_callback(struct device *dev, int err,
-					 const struct firmware *code,
-					 void *nvram, u32 nvram_len)
+					 struct brcmf_fw_request *fwreq)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
 	struct brcmf_sdio_dev *sdiod = bus_if->bus_priv.sdio;
 	struct brcmf_sdio *bus = sdiod->bus;
 	struct brcmf_core *core = bus->sdio_core;
+	const struct firmware *code;
+	void *nvram;
+	u32 nvram_len;
 	u8 saveclk;
 
 	brcmf_dbg(TRACE, "Enter: dev=%s, err=%d\n", dev_name(dev), err);
@@ -4046,6 +4051,11 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 	if (err)
 		goto fail;
 
+	code = fwreq->items[BRCMF_SDIO_FW_CODE].binary;
+	nvram = fwreq->items[BRCMF_SDIO_FW_NVRAM].nv_data.data;
+	nvram_len = fwreq->items[BRCMF_SDIO_FW_NVRAM].nv_data.len;
+	kfree(fwreq);
+
 	/* try to download image and nvram to the dongle */
 	bus->alp_only = true;
 	err = brcmf_sdio_download_firmware(bus, code, nvram, nvram_len);
@@ -4150,6 +4160,7 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	int ret;
 	struct brcmf_sdio *bus;
 	struct workqueue_struct *wq;
+	struct brcmf_fw_request *fwreq;
 
 	brcmf_dbg(TRACE, "Enter\n");
 
@@ -4239,11 +4250,24 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 	if (ret)
 		goto fail;
 
-	ret = brcmf_fw_get_firmwares(sdiodev->dev, BRCMF_FW_REQUEST_NVRAM,
-				     sdiodev->fw_name, sdiodev->nvram_name,
+	fwreq = kzalloc(sizeof(fwreq) + 2 * sizeof(struct brcmf_fw_item),
+			GFP_KERNEL);
+	if (!fwreq) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	fwreq->items[BRCMF_SDIO_FW_CODE].path = sdiodev->fw_name;
+	fwreq->items[BRCMF_SDIO_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+	fwreq->items[BRCMF_SDIO_FW_NVRAM].path = sdiodev->nvram_name;
+	fwreq->items[BRCMF_SDIO_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
+	fwreq->n_items = 2;
+
+	ret = brcmf_fw_get_firmwares(sdiodev->dev, fwreq,
 				     brcmf_sdio_firmware_callback);
 	if (ret != 0) {
 		brcmf_err("async firmware request failed: %d\n", ret);
+		kfree(fwreq);
 		goto fail;
 	}
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index f8968cf66bd0..055db1191445 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1155,18 +1155,23 @@ static const struct brcmf_bus_ops brcmf_usb_bus_ops = {
 	.get_fwname = brcmf_usb_get_fwname,
 };
 
+#define BRCMF_USB_FW_CODE	0
+
 static void brcmf_usb_probe_phase2(struct device *dev, int ret,
-				   const struct firmware *fw,
-				   void *nvram, u32 nvlen)
+				   struct brcmf_fw_request *fwreq)
 {
 	struct brcmf_bus *bus = dev_get_drvdata(dev);
 	struct brcmf_usbdev_info *devinfo = bus->bus_priv.usb->devinfo;
+	const struct firmware *fw;
 
 	if (ret)
 		goto error;
 
 	brcmf_dbg(USB, "Start fw downloading\n");
 
+	fw = fwreq->items[BRCMF_USB_FW_CODE].binary;
+	kfree(fwreq);
+
 	ret = check_file(fw->data);
 	if (ret < 0) {
 		brcmf_err("invalid firmware\n");
@@ -1200,6 +1205,7 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 	struct brcmf_bus *bus = NULL;
 	struct brcmf_usbdev *bus_pub = NULL;
 	struct device *dev = devinfo->dev;
+	struct brcmf_fw_request *fwreq;
 	int ret;
 
 	brcmf_dbg(USB, "Enter\n");
@@ -1250,11 +1256,22 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 	if (ret)
 		goto fail;
 
+	fwreq = kzalloc(sizeof(*fwreq) + sizeof(struct brcmf_fw_item),
+			GFP_KERNEL);
+	if (!fwreq) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	fwreq->items[BRCMF_USB_FW_CODE].path = devinfo->fw_name;
+	fwreq->items[BRCMF_USB_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+	fwreq->n_items = 1;
+
 	/* request firmware here */
-	ret = brcmf_fw_get_firmwares(dev, 0, devinfo->fw_name, NULL,
-				     brcmf_usb_probe_phase2);
+	ret = brcmf_fw_get_firmwares(dev, fwreq, brcmf_usb_probe_phase2);
 	if (ret) {
 		brcmf_err("firmware request failed: %d\n", ret);
+		kfree(fwreq);
 		goto fail;
 	}
 
@@ -1447,11 +1464,25 @@ static int brcmf_usb_reset_resume(struct usb_interface *intf)
 {
 	struct usb_device *usb = interface_to_usbdev(intf);
 	struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(&usb->dev);
+	struct brcmf_fw_request *fwreq;
+	int ret;
 
 	brcmf_dbg(USB, "Enter\n");
 
-	return brcmf_fw_get_firmwares(&usb->dev, 0, devinfo->fw_name, NULL,
-				      brcmf_usb_probe_phase2);
+	fwreq = kzalloc(sizeof(*fwreq) + sizeof(struct brcmf_fw_item),
+			GFP_KERNEL);
+	if (!fwreq)
+		return -ENOMEM;
+
+	fwreq->items[BRCMF_USB_FW_CODE].path = devinfo->fw_name;
+	fwreq->items[BRCMF_USB_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+	fwreq->n_items = 1;
+
+	ret = brcmf_fw_get_firmwares(&usb->dev, fwreq, brcmf_usb_probe_phase2);
+	if (ret < 0)
+		kfree(fwreq);
+
+	return ret;
 }
 
 #define BRCMF_USB_DEVICE(dev_id)	\

From 2baa3aaee27f137b8db9a9224d0fe9b281d28e34 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:27 +0100
Subject: [PATCH 1318/1678] brcmfmac: introduce brcmf_fw_alloc_request()
 function

The function brcmf_fw_alloc_request() takes a list of required files
and allocated the struct brcmf_fw_request instance accordingly. The
request can be modified by the caller before being passed to the
brcmf_fw_request_firmwares() function.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/firmware.c    | 58 +++++++++++++++++++
 .../broadcom/brcm80211/brcmfmac/firmware.h    | 11 ++++
 .../broadcom/brcm80211/brcmfmac/pcie.c        | 58 ++++++++++---------
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 38 +++++++-----
 .../broadcom/brcm80211/brcmfmac/usb.c         | 42 ++++++++------
 5 files changed, 147 insertions(+), 60 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index 95004dbba942..f495d2252ab5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -688,3 +688,61 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 	return 0;
 }
 
+struct brcmf_fw_request *
+brcmf_fw_alloc_request(u32 chip, u32 chiprev,
+		       struct brcmf_firmware_mapping mapping_table[],
+		       u32 table_size, struct brcmf_fw_name *fwnames,
+		       u32 n_fwnames)
+{
+	struct brcmf_fw_request *fwreq;
+	char chipname[12];
+	const char *mp_path;
+	u32 i, j;
+	char end;
+	size_t reqsz;
+
+	for (i = 0; i < table_size; i++) {
+		if (mapping_table[i].chipid == chip &&
+		    mapping_table[i].revmask & BIT(chiprev))
+			break;
+	}
+
+	if (i == table_size) {
+		brcmf_err("Unknown chipid %d [%d]\n", chip, chiprev);
+		return NULL;
+	}
+
+	reqsz = sizeof(*fwreq) + n_fwnames * sizeof(struct brcmf_fw_item);
+	fwreq = kzalloc(reqsz, GFP_KERNEL);
+	if (!fwreq)
+		return NULL;
+
+	brcmf_chip_name(chip, chiprev, chipname, sizeof(chipname));
+
+	brcmf_info("using %s for chip %s\n",
+		   mapping_table[i].fw_base, chipname);
+
+	mp_path = brcmf_mp_global.firmware_path;
+	end = mp_path[strlen(mp_path) - 1];
+	fwreq->n_items = n_fwnames;
+
+	for (j = 0; j < n_fwnames; j++) {
+		fwreq->items[j].path = fwnames[j].path;
+		/* check if firmware path is provided by module parameter */
+		if (brcmf_mp_global.firmware_path[0] != '\0') {
+			strlcpy(fwnames[j].path, mp_path,
+				BRCMF_FW_NAME_LEN);
+
+			if (end != '/') {
+				strlcat(fwnames[j].path, "/",
+					BRCMF_FW_NAME_LEN);
+			}
+		}
+		brcmf_fw_get_full_name(fwnames[j].path,
+				       mapping_table[i].fw_base,
+				       fwnames[j].extension);
+		fwreq->items[j].path = fwnames[j].path;
+	}
+
+	return fwreq;
+}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index 1ca630e237c8..1416fbbefebd 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
@@ -77,6 +77,17 @@ struct brcmf_fw_request {
 	struct brcmf_fw_item items[0];
 };
 
+struct brcmf_fw_name {
+	const char *extension;
+	char *path;
+};
+
+struct brcmf_fw_request *
+brcmf_fw_alloc_request(u32 chip, u32 chiprev,
+		       struct brcmf_firmware_mapping mapping_table[],
+		       u32 table_size, struct brcmf_fw_name *fwnames,
+		       u32 n_fwnames);
+
 /*
  * Request firmware(s) asynchronously. When the asynchronous request
  * fails it will not use the callback, but call device_release_driver()
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index a3fe29e65bd9..179177a1bba9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1735,6 +1735,31 @@ fail:
 	device_release_driver(dev);
 }
 
+static struct brcmf_fw_request *
+brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo)
+{
+	struct brcmf_fw_request *fwreq;
+	struct brcmf_fw_name fwnames[] = {
+		{ ".bin", devinfo->fw_name },
+		{ ".txt", devinfo->nvram_name },
+	};
+
+	fwreq = brcmf_fw_alloc_request(devinfo->ci->chip, devinfo->ci->chiprev,
+				       brcmf_pcie_fwnames,
+				       ARRAY_SIZE(brcmf_pcie_fwnames),
+				       fwnames, ARRAY_SIZE(fwnames));
+	if (!fwreq)
+		return NULL;
+
+	fwreq->items[BRCMF_PCIE_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+	fwreq->items[BRCMF_PCIE_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
+	fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL;
+	fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus);
+	fwreq->bus_nr = devinfo->pdev->bus->number;
+
+	return fwreq;
+}
+
 static int
 brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -1743,13 +1768,8 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct brcmf_pciedev_info *devinfo;
 	struct brcmf_pciedev *pcie_bus_dev;
 	struct brcmf_bus *bus;
-	u16 domain_nr;
-	u16 bus_nr;
 
-	domain_nr = pci_domain_nr(pdev->bus) + 1;
-	bus_nr = pdev->bus->number;
-	brcmf_dbg(PCIE, "Enter %x:%x (%d/%d)\n", pdev->vendor, pdev->device,
-		  domain_nr, bus_nr);
+	brcmf_dbg(PCIE, "Enter %x:%x\n", pdev->vendor, pdev->device);
 
 	ret = -ENOMEM;
 	devinfo = kzalloc(sizeof(*devinfo), GFP_KERNEL);
@@ -1803,33 +1823,19 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	bus->wowl_supported = pci_pme_capable(pdev, PCI_D3hot);
 	dev_set_drvdata(&pdev->dev, bus);
 
-	ret = brcmf_fw_map_chip_to_name(devinfo->ci->chip, devinfo->ci->chiprev,
-					brcmf_pcie_fwnames,
-					ARRAY_SIZE(brcmf_pcie_fwnames),
-					devinfo->fw_name, devinfo->nvram_name);
-	if (ret)
-		goto fail_bus;
-
-	fwreq = kzalloc(sizeof(*fwreq) + 2 * sizeof(struct brcmf_fw_item),
-			GFP_KERNEL);
+	fwreq = brcmf_pcie_prepare_fw_request(devinfo);
 	if (!fwreq) {
 		ret = -ENOMEM;
 		goto fail_bus;
 	}
 
-	fwreq->items[BRCMF_PCIE_FW_CODE].path = devinfo->fw_name;
-	fwreq->items[BRCMF_PCIE_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
-	fwreq->items[BRCMF_PCIE_FW_NVRAM].path = devinfo->nvram_name;
-	fwreq->items[BRCMF_PCIE_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
-	fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL;
-	fwreq->n_items = 2;
-	fwreq->domain_nr = domain_nr;
-	fwreq->bus_nr = bus_nr;
 	ret = brcmf_fw_get_firmwares(bus->dev, fwreq, brcmf_pcie_setup);
-	if (ret == 0)
-		return 0;
+	if (ret < 0) {
+		kfree(fwreq);
+		goto fail_bus;
+	}
+	return 0;
 
-	kfree(fwreq);
 fail_bus:
 	kfree(bus->msgbuf);
 	kfree(bus);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index ab68eb1f04e4..24dbf7ea2301 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4155,6 +4155,28 @@ fail:
 	device_release_driver(dev);
 }
 
+static struct brcmf_fw_request *
+brcmf_sdio_prepare_fw_request(struct brcmf_sdio *bus)
+{
+	struct brcmf_fw_request *fwreq;
+	struct brcmf_fw_name fwnames[] = {
+		{ ".bin", bus->sdiodev->fw_name },
+		{ ".txt", bus->sdiodev->nvram_name },
+	};
+
+	fwreq = brcmf_fw_alloc_request(bus->ci->chip, bus->ci->chiprev,
+				       brcmf_sdio_fwnames,
+				       ARRAY_SIZE(brcmf_sdio_fwnames),
+				       fwnames, ARRAY_SIZE(fwnames));
+	if (!fwreq)
+		return NULL;
+
+	fwreq->items[BRCMF_SDIO_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+	fwreq->items[BRCMF_SDIO_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
+
+	return fwreq;
+}
+
 struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 {
 	int ret;
@@ -4243,26 +4265,12 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
 
 	brcmf_dbg(INFO, "completed!!\n");
 
-	ret = brcmf_fw_map_chip_to_name(bus->ci->chip, bus->ci->chiprev,
-					brcmf_sdio_fwnames,
-					ARRAY_SIZE(brcmf_sdio_fwnames),
-					sdiodev->fw_name, sdiodev->nvram_name);
-	if (ret)
-		goto fail;
-
-	fwreq = kzalloc(sizeof(fwreq) + 2 * sizeof(struct brcmf_fw_item),
-			GFP_KERNEL);
+	fwreq = brcmf_sdio_prepare_fw_request(bus);
 	if (!fwreq) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 
-	fwreq->items[BRCMF_SDIO_FW_CODE].path = sdiodev->fw_name;
-	fwreq->items[BRCMF_SDIO_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
-	fwreq->items[BRCMF_SDIO_FW_NVRAM].path = sdiodev->nvram_name;
-	fwreq->items[BRCMF_SDIO_FW_NVRAM].type = BRCMF_FW_TYPE_NVRAM;
-	fwreq->n_items = 2;
-
 	ret = brcmf_fw_get_firmwares(sdiodev->dev, fwreq,
 				     brcmf_sdio_firmware_callback);
 	if (ret != 0) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 055db1191445..1c5f13546556 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1200,6 +1200,27 @@ error:
 	device_release_driver(dev);
 }
 
+static struct brcmf_fw_request *
+brcmf_usb_prepare_fw_request(struct brcmf_usbdev_info *devinfo)
+{
+	struct brcmf_fw_request *fwreq;
+	struct brcmf_fw_name fwnames[] = {
+		{ ".bin", devinfo->fw_name },
+	};
+
+	fwreq = brcmf_fw_alloc_request(devinfo->bus_pub.devid,
+				       devinfo->bus_pub.chiprev,
+				       brcmf_usb_fwnames,
+				       ARRAY_SIZE(brcmf_usb_fwnames),
+				       fwnames, ARRAY_SIZE(fwnames));
+	if (!fwreq)
+		return NULL;
+
+	fwreq->items[BRCMF_USB_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
+
+	return fwreq;
+}
+
 static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 {
 	struct brcmf_bus *bus = NULL;
@@ -1249,24 +1270,12 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
 	bus->chip = bus_pub->devid;
 	bus->chiprev = bus_pub->chiprev;
 
-	ret = brcmf_fw_map_chip_to_name(bus_pub->devid, bus_pub->chiprev,
-					brcmf_usb_fwnames,
-					ARRAY_SIZE(brcmf_usb_fwnames),
-					devinfo->fw_name, NULL);
-	if (ret)
-		goto fail;
-
-	fwreq = kzalloc(sizeof(*fwreq) + sizeof(struct brcmf_fw_item),
-			GFP_KERNEL);
+	fwreq = brcmf_usb_prepare_fw_request(devinfo);
 	if (!fwreq) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 
-	fwreq->items[BRCMF_USB_FW_CODE].path = devinfo->fw_name;
-	fwreq->items[BRCMF_USB_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
-	fwreq->n_items = 1;
-
 	/* request firmware here */
 	ret = brcmf_fw_get_firmwares(dev, fwreq, brcmf_usb_probe_phase2);
 	if (ret) {
@@ -1469,15 +1478,10 @@ static int brcmf_usb_reset_resume(struct usb_interface *intf)
 
 	brcmf_dbg(USB, "Enter\n");
 
-	fwreq = kzalloc(sizeof(*fwreq) + sizeof(struct brcmf_fw_item),
-			GFP_KERNEL);
+	fwreq = brcmf_usb_prepare_fw_request(devinfo);
 	if (!fwreq)
 		return -ENOMEM;
 
-	fwreq->items[BRCMF_USB_FW_CODE].path = devinfo->fw_name;
-	fwreq->items[BRCMF_USB_FW_CODE].type = BRCMF_FW_TYPE_BINARY;
-	fwreq->n_items = 1;
-
 	ret = brcmf_fw_get_firmwares(&usb->dev, fwreq, brcmf_usb_probe_phase2);
 	if (ret < 0)
 		kfree(fwreq);

From bf7a7b37f6ef5090a2bae7e7ae23cd26b741cca4 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:28 +0100
Subject: [PATCH 1319/1678] brcmfmac: add extension to .get_fwname() callbacks

This changes the bus layer api by having the caller provide an
extension. With this the callback can use brcmf_fw_alloc_request()
to get the needed firmware name.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/bus.h         |  6 +--
 .../broadcom/brcm80211/brcmfmac/common.c      | 43 +++----------------
 .../broadcom/brcm80211/brcmfmac/pcie.c        | 27 ++++++------
 .../broadcom/brcm80211/brcmfmac/sdio.c        | 26 +++++------
 .../broadcom/brcm80211/brcmfmac/usb.c         | 27 ++++++------
 5 files changed, 51 insertions(+), 78 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 0b90a63bdeb1..31858a20b97d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -88,7 +88,7 @@ struct brcmf_bus_ops {
 	void (*wowl_config)(struct device *dev, bool enabled);
 	size_t (*get_ramsize)(struct device *dev);
 	int (*get_memdump)(struct device *dev, void *data, size_t len);
-	int (*get_fwname)(struct device *dev, uint chip, uint chiprev,
+	int (*get_fwname)(struct device *dev, const char *ext,
 			  unsigned char *fw_name);
 };
 
@@ -228,10 +228,10 @@ int brcmf_bus_get_memdump(struct brcmf_bus *bus, void *data, size_t len)
 }
 
 static inline
-int brcmf_bus_get_fwname(struct brcmf_bus *bus, uint chip, uint chiprev,
+int brcmf_bus_get_fwname(struct brcmf_bus *bus, const char *ext,
 			 unsigned char *fw_name)
 {
-	return bus->ops->get_fwname(bus->dev, chip, chiprev, fw_name);
+	return bus->ops->get_fwname(bus->dev, ext, fw_name);
 }
 
 /*
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 3979f7814021..5975b3d8c0e6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -125,42 +125,9 @@ static int brcmf_c_download(struct brcmf_if *ifp, u16 flag,
 	return err;
 }
 
-static int brcmf_c_get_clm_name(struct brcmf_if *ifp, u8 *clm_name)
-{
-	struct brcmf_bus *bus = ifp->drvr->bus_if;
-	u8 fw_name[BRCMF_FW_NAME_LEN];
-	u8 *ptr;
-	size_t len;
-	s32 err;
-
-	memset(fw_name, 0, BRCMF_FW_NAME_LEN);
-	err = brcmf_bus_get_fwname(bus, bus->chip, bus->chiprev, fw_name);
-	if (err) {
-		brcmf_err("get firmware name failed (%d)\n", err);
-		goto done;
-	}
-
-	/* generate CLM blob file name */
-	ptr = strrchr(fw_name, '.');
-	if (!ptr) {
-		err = -ENOENT;
-		goto done;
-	}
-
-	len = ptr - fw_name + 1;
-	if (len + strlen(".clm_blob") > BRCMF_FW_NAME_LEN) {
-		err = -E2BIG;
-	} else {
-		strlcpy(clm_name, fw_name, len);
-		strlcat(clm_name, ".clm_blob", BRCMF_FW_NAME_LEN);
-	}
-done:
-	return err;
-}
-
 static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
 {
-	struct device *dev = ifp->drvr->bus_if->dev;
+	struct brcmf_bus *bus = ifp->drvr->bus_if;
 	struct brcmf_dload_data_le *chunk_buf;
 	const struct firmware *clm = NULL;
 	u8 clm_name[BRCMF_FW_NAME_LEN];
@@ -173,16 +140,16 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
 
 	brcmf_dbg(TRACE, "Enter\n");
 
-	memset(clm_name, 0, BRCMF_FW_NAME_LEN);
-	err = brcmf_c_get_clm_name(ifp, clm_name);
+	memset(clm_name, 0, sizeof(clm_name));
+	err = brcmf_bus_get_fwname(bus, ".clm_blob", clm_name);
 	if (err) {
 		brcmf_err("get CLM blob file name failed (%d)\n", err);
 		return err;
 	}
 
-	err = request_firmware(&clm, clm_name, dev);
+	err = request_firmware(&clm, clm_name, bus->dev);
 	if (err) {
-		brcmf_info("no clm_blob available(err=%d), device may have limited channels available\n",
+		brcmf_info("no clm_blob available (err=%d), device may have limited channels available\n",
 			   err);
 		return 0;
 	}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 179177a1bba9..091c191ce259 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1350,23 +1350,24 @@ static int brcmf_pcie_get_memdump(struct device *dev, void *data, size_t len)
 	return 0;
 }
 
-static int brcmf_pcie_get_fwname(struct device *dev, u32 chip, u32 chiprev,
-				 u8 *fw_name)
+static
+int brcmf_pcie_get_fwname(struct device *dev, const char *ext, u8 *fw_name)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_pciedev *buspub = bus_if->bus_priv.pcie;
-	struct brcmf_pciedev_info *devinfo = buspub->devinfo;
-	int ret = 0;
+	struct brcmf_fw_request *fwreq;
+	struct brcmf_fw_name fwnames[] = {
+		{ ext, fw_name },
+	};
 
-	if (devinfo->fw_name[0] != '\0')
-		strlcpy(fw_name, devinfo->fw_name, BRCMF_FW_NAME_LEN);
-	else
-		ret = brcmf_fw_map_chip_to_name(chip, chiprev,
-						brcmf_pcie_fwnames,
-						ARRAY_SIZE(brcmf_pcie_fwnames),
-						fw_name, NULL);
+	fwreq = brcmf_fw_alloc_request(bus_if->chip, bus_if->chiprev,
+				       brcmf_pcie_fwnames,
+				       ARRAY_SIZE(brcmf_pcie_fwnames),
+				       fwnames, ARRAY_SIZE(fwnames));
+	if (!fwreq)
+		return -ENOMEM;
 
-	return ret;
+	kfree(fwreq);
+	return 0;
 }
 
 static const struct brcmf_bus_ops brcmf_pcie_bus_ops = {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 24dbf7ea2301..1037df7297bb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4000,22 +4000,24 @@ brcmf_sdio_watchdog(struct timer_list *t)
 	}
 }
 
-static int brcmf_sdio_get_fwname(struct device *dev, u32 chip, u32 chiprev,
-				 u8 *fw_name)
+static
+int brcmf_sdio_get_fwname(struct device *dev, const char *ext, u8 *fw_name)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
-	int ret = 0;
+	struct brcmf_fw_request *fwreq;
+	struct brcmf_fw_name fwnames[] = {
+		{ ext, fw_name },
+	};
 
-	if (sdiodev->fw_name[0] != '\0')
-		strlcpy(fw_name, sdiodev->fw_name, BRCMF_FW_NAME_LEN);
-	else
-		ret = brcmf_fw_map_chip_to_name(chip, chiprev,
-						brcmf_sdio_fwnames,
-						ARRAY_SIZE(brcmf_sdio_fwnames),
-						fw_name, NULL);
+	fwreq = brcmf_fw_alloc_request(bus_if->chip, bus_if->chiprev,
+				       brcmf_sdio_fwnames,
+				       ARRAY_SIZE(brcmf_sdio_fwnames),
+				       fwnames, ARRAY_SIZE(fwnames));
+	if (!fwreq)
+		return -ENOMEM;
 
-	return ret;
+	kfree(fwreq);
+	return 0;
 }
 
 static const struct brcmf_bus_ops brcmf_sdio_bus_ops = {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 1c5f13546556..a0873adcc01c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1128,21 +1128,24 @@ static void brcmf_usb_wowl_config(struct device *dev, bool enabled)
 		device_set_wakeup_enable(devinfo->dev, false);
 }
 
-static int brcmf_usb_get_fwname(struct device *dev, u32 chip, u32 chiprev,
-				u8 *fw_name)
+static
+int brcmf_usb_get_fwname(struct device *dev, const char *ext, u8 *fw_name)
 {
-	struct brcmf_usbdev_info *devinfo = brcmf_usb_get_businfo(dev);
-	int ret = 0;
+	struct brcmf_bus *bus = dev_get_drvdata(dev);
+	struct brcmf_fw_request *fwreq;
+	struct brcmf_fw_name fwnames[] = {
+		{ ext, fw_name },
+	};
 
-	if (devinfo->fw_name[0] != '\0')
-		strlcpy(fw_name, devinfo->fw_name, BRCMF_FW_NAME_LEN);
-	else
-		ret = brcmf_fw_map_chip_to_name(chip, chiprev,
-						brcmf_usb_fwnames,
-						ARRAY_SIZE(brcmf_usb_fwnames),
-						fw_name, NULL);
+	fwreq = brcmf_fw_alloc_request(bus->chip, bus->chiprev,
+				       brcmf_usb_fwnames,
+				       ARRAY_SIZE(brcmf_usb_fwnames),
+				       fwnames, ARRAY_SIZE(fwnames));
+	if (!fwreq)
+		return -ENOMEM;
 
-	return ret;
+	kfree(fwreq);
+	return 0;
 }
 
 static const struct brcmf_bus_ops brcmf_usb_bus_ops = {

From 18c2b20e276e04476a3350b4a92c1dfad725d45d Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:29 +0100
Subject: [PATCH 1320/1678] brcmfmac: get rid of brcmf_fw_map_chip_to_name()

The function is no longer used so removing it.

Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/firmware.c    | 53 -------------------
 .../broadcom/brcm80211/brcmfmac/firmware.h    |  4 --
 2 files changed, 57 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index f495d2252ab5..c14a02ae68cb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -635,59 +635,6 @@ static void brcmf_fw_get_full_name(char fw_name[BRCMF_FW_NAME_LEN],
 	strlcat(fw_name, extension, BRCMF_FW_NAME_LEN);
 }
 
-int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
-			      struct brcmf_firmware_mapping mapping_table[],
-			      u32 table_size, char fw_name[BRCMF_FW_NAME_LEN],
-			      char nvram_name[BRCMF_FW_NAME_LEN])
-{
-	char chipname[12];
-	u32 i;
-	char end;
-
-	for (i = 0; i < table_size; i++) {
-		if (mapping_table[i].chipid == chip &&
-		    mapping_table[i].revmask & BIT(chiprev))
-			break;
-	}
-
-	if (i == table_size) {
-		brcmf_err("Unknown chipid %d [%d]\n", chip, chiprev);
-		return -ENODEV;
-	}
-
-	brcmf_chip_name(chip, chiprev, chipname, sizeof(chipname));
-
-	/* check if firmware path is provided by module parameter */
-	if (brcmf_mp_global.firmware_path[0] != '\0') {
-		if (fw_name)
-			strlcpy(fw_name, brcmf_mp_global.firmware_path,
-				BRCMF_FW_NAME_LEN);
-		if (nvram_name)
-			strlcpy(nvram_name, brcmf_mp_global.firmware_path,
-				BRCMF_FW_NAME_LEN);
-
-		end = brcmf_mp_global.firmware_path[
-				strlen(brcmf_mp_global.firmware_path) - 1];
-		if (end != '/') {
-			if (fw_name)
-				strlcat(fw_name, "/", BRCMF_FW_NAME_LEN);
-			if (nvram_name)
-				strlcat(nvram_name, "/", BRCMF_FW_NAME_LEN);
-		}
-	}
-
-	brcmf_info("using %s for chip %s\n",
-		   mapping_table[i].fw_base, chipname);
-	if (fw_name)
-		brcmf_fw_get_full_name(fw_name,
-				       mapping_table[i].fw_base, ".bin");
-	if (nvram_name)
-		brcmf_fw_get_full_name(nvram_name,
-				       mapping_table[i].fw_base, ".txt");
-
-	return 0;
-}
-
 struct brcmf_fw_request *
 brcmf_fw_alloc_request(u32 chip, u32 chiprev,
 		       struct brcmf_firmware_mapping mapping_table[],
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
index 1416fbbefebd..79a21095c349 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.h
@@ -46,10 +46,6 @@ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH fw_base ".bin")
 #define BRCMF_FW_ENTRY(chipid, mask, name) \
 	{ chipid, mask, BRCM_ ## name ## _FIRMWARE_BASENAME }
 
-int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
-			      struct brcmf_firmware_mapping mapping_table[],
-			      u32 table_size, char fw_name[BRCMF_FW_NAME_LEN],
-			      char nvram_name[BRCMF_FW_NAME_LEN]);
 void brcmf_fw_nvram_free(void *nvram);
 
 enum brcmf_fw_type {

From bf291b7247e53f52a4236c0b55a5df046d6177df Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:30 +0100
Subject: [PATCH 1321/1678] brcmfmac: get rid of brcmf_fw_get_full_name()

The function was pretty minimal and now it is called only from one
place so just get rid of it.

Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../broadcom/brcm80211/brcmfmac/firmware.c         | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index c14a02ae68cb..9277f4c2bfeb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -628,13 +628,6 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
 	return 0;
 }
 
-static void brcmf_fw_get_full_name(char fw_name[BRCMF_FW_NAME_LEN],
-				   const char *fw_base, const char *extension)
-{
-	strlcat(fw_name, fw_base, BRCMF_FW_NAME_LEN);
-	strlcat(fw_name, extension, BRCMF_FW_NAME_LEN);
-}
-
 struct brcmf_fw_request *
 brcmf_fw_alloc_request(u32 chip, u32 chiprev,
 		       struct brcmf_firmware_mapping mapping_table[],
@@ -685,9 +678,10 @@ brcmf_fw_alloc_request(u32 chip, u32 chiprev,
 					BRCMF_FW_NAME_LEN);
 			}
 		}
-		brcmf_fw_get_full_name(fwnames[j].path,
-				       mapping_table[i].fw_base,
-				       fwnames[j].extension);
+		strlcat(fwnames[j].path, mapping_table[i].fw_base,
+			BRCMF_FW_NAME_LEN);
+		strlcat(fwnames[j].path, fwnames[j].extension,
+			BRCMF_FW_NAME_LEN);
 		fwreq->items[j].path = fwnames[j].path;
 	}
 

From 48eaee3f272a5bfe6986d07c51f6975d3c2f74d1 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Thu, 22 Mar 2018 21:28:31 +0100
Subject: [PATCH 1322/1678] brcmfmac: add kerneldoc for struct
 brcmf_bus::msgbuf

This field did not have kerneldoc description so adding it now.

Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 31858a20b97d..27e693e93f21 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -140,6 +140,7 @@ struct brcmf_bus_stats {
  * @always_use_fws_queue: bus wants use queue also when fwsignal is inactive.
  * @wowl_supported: is wowl supported by bus driver.
  * @chiprev: revision of the dongle chip.
+ * @msgbuf: msgbuf protocol parameters provided by bus layer.
  */
 struct brcmf_bus {
 	union {

From b1c2d0f2507bf56d9f4dbd46dc4b99240fbd187c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 16 Mar 2018 16:41:49 +0100
Subject: [PATCH 1323/1678] brcmsmac: allocate ucode with GFP_KERNEL

The brcms_ucode_init_buf() duplicates the ucode chunks via kmemdup()
with GFP_ATOMIC as a precondition of wl->lock acquired.  This caused
allocation failures sometimes as reported in the bugzilla below.

When looking at the the real usage, one can find that it's called
solely from brcms_request_fw(), and it's obviously outside the lock.
Hence we can use GFP_KERNEL there safely for avoiding such allocation
errors.

Bugzilla: http://bugzilla.suse.com/show_bug.cgi?id=1085174
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
index 8e58f6800483..ecc89e718b9c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
@@ -1563,7 +1563,7 @@ void brcms_free_timer(struct brcms_timer *t)
 }
 
 /*
- * precondition: perimeter lock has been acquired
+ * precondition: no locking required
  */
 int brcms_ucode_init_buf(struct brcms_info *wl, void **pbuf, u32 idx)
 {
@@ -1578,7 +1578,7 @@ int brcms_ucode_init_buf(struct brcms_info *wl, void **pbuf, u32 idx)
 			if (le32_to_cpu(hdr->idx) == idx) {
 				pdata = wl->fw.fw_bin[i]->data +
 					le32_to_cpu(hdr->offset);
-				*pbuf = kmemdup(pdata, len, GFP_ATOMIC);
+				*pbuf = kmemdup(pdata, len, GFP_KERNEL);
 				if (*pbuf == NULL)
 					goto fail;
 

From db2ad7c25a9a01aaa53f8a74ff8ce6f35e61179f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 15 Mar 2018 18:24:20 +0100
Subject: [PATCH 1324/1678] mt76: use mt76_poll_msec routine in
 mt76pci_load_firmware()

Use mt76_poll_msec() in mt76pci_load_firmware to check if the firmware
has been started instead of explicitly poll MCU running register

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x2_mcu.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x2_mcu.c
index 15820b11f9db..dfd36d736b06 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_mcu.c
@@ -187,7 +187,7 @@ mt76pci_load_firmware(struct mt76x2_dev *dev)
 {
 	const struct firmware *fw;
 	const struct mt76x2_fw_header *hdr;
-	int i, len, ret;
+	int len, ret;
 	__le32 *cur;
 	u32 offset, val;
 
@@ -240,16 +240,7 @@ mt76pci_load_firmware(struct mt76x2_dev *dev)
 
 	/* trigger firmware */
 	mt76_wr(dev, MT_MCU_INT_LEVEL, 2);
-	for (i = 200; i > 0; i--) {
-		val = mt76_rr(dev, MT_MCU_COM_REG0);
-
-		if (val & 1)
-			break;
-
-		msleep(10);
-	}
-
-	if (!i) {
+	if (!mt76_poll_msec(dev, MT_MCU_COM_REG0, 1, 1, 200)) {
 		dev_err(dev->mt76.dev, "Firmware failed to start\n");
 		release_firmware(fw);
 		return -ETIMEDOUT;

From 6958b027435aa54d82bbef09a007fd287f439977 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 17 Mar 2018 12:29:27 +0100
Subject: [PATCH 1325/1678] mt76x2: fix possible NULL pointer dereferencing in
 mt76x2_tx()

Fix a theoretical NULL pointer dereferencing in mt76x2_tx routine that
can occurs for injected frames in a monitor vif since vif pointer could
be NULL for that interfaces

Fixes: 23405236460b ("mt76: fix transmission of encrypted mgmt frames")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Acked-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x2_tx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_tx.c b/drivers/net/wireless/mediatek/mt76/mt76x2_tx.c
index 534e4bf9a34c..e46eafc4c436 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_tx.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_tx.c
@@ -36,9 +36,12 @@ void mt76x2_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control,
 
 		msta = (struct mt76x2_sta *) control->sta->drv_priv;
 		wcid = &msta->wcid;
+		/* sw encrypted frames */
+		if (!info->control.hw_key && wcid->hw_key_idx != -1)
+			control->sta = NULL;
 	}
 
-	if (vif || (!info->control.hw_key && wcid->hw_key_idx != -1)) {
+	if (vif && !control->sta) {
 		struct mt76x2_vif *mvif;
 
 		mvif = (struct mt76x2_vif *) vif->drv_priv;

From c03a5aacde0c86f6dabab8f17a6d1911ee13b6c4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 17 Mar 2018 15:01:39 +0100
Subject: [PATCH 1326/1678] mt76x2: fix warning in ieee80211_get_key_rx_seq()

Fall back to software encryption for hw unsupported ciphers in order
to fix the following warning in ieee80211_get_key_rx_seq routine:

WARNING: CPU: 1 PID: 1277 at backports-2017-11-01/net/mac80211/key.c:
1010 mt76_wcid_key_setup+0x6c/0x138 [mt76]
CPU: 1 PID: 1277 Comm: hostapd Tainted: G        W       4.9.86 #0
Stack : 00000000 00000000 80527b4a 00000042 80523824 00000000 00000000 80520000
        8fd79a9c 804bbda7 80454c84 00000001 000004fd 80523824 8f7e4ba0 8eceda12
        00000010 8006af94 00000001 80520000 804c1f04 804c1f08 80459890 8ec999b4
        00000003 800a7840 8f7e4ba0 8eceda12 8121de20 00000000 00000001 00c999b4
        00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
        ...
Call Trace:
[<8000f52c>] show_stack+0x70/0x8c
[<801d8d04>] dump_stack+0x94/0xd0
[<8002bcd4>] __warn+0x110/0x118
[<8002bd70>] warn_slowpath_null+0x1c/0x2c
[<8f0415cc>] mt76_wcid_key_setup+0x6c/0x138 [mt76]
[<8f1311b4>] mt76x2_dma_cleanup+0xa38/0x1048 [mt76x2e]

Fixes: 30ce7f4456ae ("mt76: validate rx CCMP PN")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Acked-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x2_main.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
index 25f4cebef26d..73c127f92613 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2_main.c
@@ -336,6 +336,17 @@ mt76x2_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	int idx = key->keyidx;
 	int ret;
 
+	/* fall back to sw encryption for unsupported ciphers */
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+	case WLAN_CIPHER_SUITE_TKIP:
+	case WLAN_CIPHER_SUITE_CCMP:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
 	/*
 	 * The hardware does not support per-STA RX GTK, fall back
 	 * to software mode for these.

From 5306653850b444452937834adc5a5ac63bae275e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Mar 2018 16:55:00 +0800
Subject: [PATCH 1327/1678] sctp: remove unnecessary asoc in
 sctp_has_association

After Commit dae399d7fdee ("sctp: hold transport instead of assoc
when lookup assoc in rx path"), it put transport instead of asoc
in sctp_has_association. Variable 'asoc' is not used any more.

So this patch is to remove it, while at it,  it also changes the
return type of sctp_has_association to bool, and does the same
for it's caller sctp_endpoint_is_peeled_off.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  8 ++++----
 net/sctp/endpointola.c     |  8 ++++----
 net/sctp/input.c           | 13 ++++++-------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 012fb3e2f4cf..c63249ea34c3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1341,12 +1341,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
 	const struct sctp_endpoint *ep,
 	const union sctp_addr *paddr,
 	struct sctp_transport **);
-int sctp_endpoint_is_peeled_off(struct sctp_endpoint *,
-				const union sctp_addr *);
+bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
+				 const union sctp_addr *paddr);
 struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
 					struct net *, const union sctp_addr *);
-int sctp_has_association(struct net *net, const union sctp_addr *laddr,
-			 const union sctp_addr *paddr);
+bool sctp_has_association(struct net *net, const union sctp_addr *laddr,
+			  const union sctp_addr *paddr);
 
 int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
 		     const struct sctp_association *asoc,
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8b3146816519..e2f5a3ee41a7 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -349,8 +349,8 @@ out:
 /* Look for any peeled off association from the endpoint that matches the
  * given peer address.
  */
-int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
-				const union sctp_addr *paddr)
+bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
+				 const union sctp_addr *paddr)
 {
 	struct sctp_sockaddr_entry *addr;
 	struct sctp_bind_addr *bp;
@@ -362,10 +362,10 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
 	 */
 	list_for_each_entry(addr, &bp->address_list, list) {
 		if (sctp_has_association(net, &addr->a, paddr))
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /* Do delayed input processing.  This is scheduled by sctp_rcv().
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b381d78548ac..ba8a6e6c36fa 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1010,19 +1010,18 @@ struct sctp_association *sctp_lookup_association(struct net *net,
 }
 
 /* Is there an association matching the given local and peer addresses? */
-int sctp_has_association(struct net *net,
-			 const union sctp_addr *laddr,
-			 const union sctp_addr *paddr)
+bool sctp_has_association(struct net *net,
+			  const union sctp_addr *laddr,
+			  const union sctp_addr *paddr)
 {
-	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 
-	if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
+	if (sctp_lookup_association(net, laddr, paddr, &transport)) {
 		sctp_transport_put(transport);
-		return 1;
+		return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /*

From 8daf1a2d7e40685054ebae680733d822ced6df62 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Mar 2018 12:27:12 +0100
Subject: [PATCH 1328/1678] net/ncsi: check for null return from call to
 nla_nest_start

The call to nla_nest_start calls nla_put which can lead to a NULL
return so it's possible for attr to become NULL and we can potentially
get a NULL pointer dereference on attr.  Fix this by checking for
a NULL return.

Detected by CoverityScan, CID#1466125 ("Dereference null return")

Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ncsi/ncsi-netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 05fcfb4fbe1d..8d7e849d4825 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -190,6 +190,10 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
 	package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
 
 	attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST);
+	if (!attr) {
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
 	rc = ncsi_write_package_info(skb, ndp, package_id);
 
 	if (rc) {

From 47e0e14eb1a688c0868385e02db263093d2df6db Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 26 Mar 2018 15:34:22 +0200
Subject: [PATCH 1329/1678] net: mvpp2: Make mvpp2_prs_hw_read a parser entry
 init function

The mvpp2_prs_hw_read function uses the 'index' field of the struct
mvpp2_prs_entry to initialize the rest of the fields. This makes it
unclear from a caller's perspective, who needs to manipulate a struct
that is not entirely initialized.

This commit makes it an init function for prs_entry, by passing it the
index as a parameter. The function now zeroes the entry, and sets the
index field before doing all other init from HW.

The function is renamed 'mvpp2_prs_init_from_hw' to make that clear.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 48 +++++++++++-----------------
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index f8bc3d4a39ff..f51dcb3b09d7 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1582,14 +1582,18 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
 	return 0;
 }
 
-/* Read tcam entry from hw */
-static int mvpp2_prs_hw_read(struct mvpp2 *priv, struct mvpp2_prs_entry *pe)
+/* Initialize tcam entry from hw */
+static int mvpp2_prs_init_from_hw(struct mvpp2 *priv,
+				  struct mvpp2_prs_entry *pe, int tid)
 {
 	int i;
 
 	if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 		return -EINVAL;
 
+	memset(pe, 0, sizeof(*pe));
+	pe->index = tid;
+
 	/* Write tcam index - indirect access */
 	mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index);
 
@@ -1931,8 +1935,7 @@ static struct mvpp2_prs_entry *mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_FLOWS)
 			continue;
 
-		pe->index = tid;
-		mvpp2_prs_hw_read(priv, pe);
+		mvpp2_prs_init_from_hw(priv, pe, tid);
 		bits = mvpp2_prs_sram_ai_get(pe);
 
 		/* Sram store classification lookup ID in AI bits [5:0] */
@@ -1971,8 +1974,7 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add)
 
 	if (priv->prs_shadow[MVPP2_PE_DROP_ALL].valid) {
 		/* Entry exist - update port only */
-		pe.index = MVPP2_PE_DROP_ALL;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, MVPP2_PE_DROP_ALL);
 	} else {
 		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
@@ -2020,8 +2022,7 @@ static void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port,
 
 	/* promiscuous mode - Accept unknown unicast or multicast packets */
 	if (priv->prs_shadow[tid].valid) {
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	} else {
 		memset(&pe, 0, sizeof(pe));
 		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
@@ -2071,8 +2072,7 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add,
 
 	if (priv->prs_shadow[tid].valid) {
 		/* Entry exist - update port only */
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	} else {
 		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
@@ -2140,8 +2140,7 @@ static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port,
 
 	if (priv->prs_shadow[tid].valid) {
 		/* Entry exist - update port only */
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	} else {
 		/* Entry doesn't exist - create new */
 		memset(&pe, 0, sizeof(pe));
@@ -2210,9 +2209,7 @@ static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
 			continue;
 
-		pe->index = tid;
-
-		mvpp2_prs_hw_read(priv, pe);
+		mvpp2_prs_init_from_hw(priv, pe, tid);
 		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid));
 		if (!match)
 			continue;
@@ -2268,8 +2265,7 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
 				continue;
 
-			pe->index = tid_aux;
-			mvpp2_prs_hw_read(priv, pe);
+			mvpp2_prs_init_from_hw(priv, pe, tid_aux);
 			ri_bits = mvpp2_prs_sram_ri_get(pe);
 			if ((ri_bits & MVPP2_PRS_RI_VLAN_MASK) ==
 			    MVPP2_PRS_RI_VLAN_DOUBLE)
@@ -2351,8 +2347,7 @@ static struct mvpp2_prs_entry *mvpp2_prs_double_vlan_find(struct mvpp2 *priv,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
 			continue;
 
-		pe->index = tid;
-		mvpp2_prs_hw_read(priv, pe);
+		mvpp2_prs_init_from_hw(priv, pe, tid);
 
 		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid1))
 			&& mvpp2_prs_tcam_data_cmp(pe, 4, swab16(tpid2));
@@ -2406,8 +2401,7 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
 				continue;
 
-			pe->index = tid_aux;
-			mvpp2_prs_hw_read(priv, pe);
+			mvpp2_prs_init_from_hw(priv, pe, tid_aux);
 			ri_bits = mvpp2_prs_sram_ri_get(pe);
 			ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
 			if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
@@ -3513,9 +3507,7 @@ static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID)
 			continue;
 
-		pe.index = tid;
-
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 		mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
 		mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
 
@@ -3569,7 +3561,7 @@ static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
 		/* Mask all ports */
 		mvpp2_prs_tcam_port_map_set(&pe, 0);
 	} else {
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 
 	/* Enable the current port */
@@ -3793,8 +3785,7 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 		    (priv->prs_shadow[tid].udf != udf_type))
 			continue;
 
-		pe->index = tid;
-		mvpp2_prs_hw_read(priv, pe);
+		mvpp2_prs_init_from_hw(priv, pe, tid);
 		entry_pmap = mvpp2_prs_tcam_port_map_get(pe);
 
 		if (mvpp2_prs_mac_range_equals(pe, da, mask) &&
@@ -3935,8 +3926,7 @@ static void mvpp2_prs_mac_del_all(struct mvpp2_port *port)
 		    (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF))
 			continue;
 
-		pe.index = tid;
-		mvpp2_prs_hw_read(priv, &pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 
 		pmap = mvpp2_prs_tcam_port_map_get(&pe);
 

From 0c6d9b44145d8134ebe4e9ebfa02e0dd23744723 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 26 Mar 2018 15:34:23 +0200
Subject: [PATCH 1330/1678] net: mvpp2: Don't use dynamic allocs for local
 variables

Some helper functions that search for given entries in the TCAM filter
on PPv2 controller make use of dynamically alloced temporary variables,
allocated with GFP_KERNEL. These functions can be called in atomic
context, and dynamic alloc is not really needed in these cases anyways.

This commit gets rid of dynamic allocs and use stack allocation in the
following functions, and where they're used :
 - mvpp2_prs_flow_find
 - mvpp2_prs_vlan_find
 - mvpp2_prs_double_vlan_find
 - mvpp2_prs_mac_da_range_find

For all these functions, instead of returning an temporary object
representing the TCAM entry, we simply return the TCAM id that matches
the requested entry.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 286 ++++++++++++---------------
 1 file changed, 125 insertions(+), 161 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index f51dcb3b09d7..7075e5ab78f3 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1917,16 +1917,11 @@ static void mvpp2_prs_sram_offset_set(struct mvpp2_prs_entry *pe,
 }
 
 /* Find parser flow entry */
-static struct mvpp2_prs_entry *mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
+static int mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_FLOWS);
-
 	/* Go through the all entires with MVPP2_PRS_LU_FLOWS */
 	for (tid = MVPP2_PRS_TCAM_SRAM_SIZE - 1; tid >= 0; tid--) {
 		u8 bits;
@@ -1935,16 +1930,15 @@ static struct mvpp2_prs_entry *mvpp2_prs_flow_find(struct mvpp2 *priv, int flow)
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_FLOWS)
 			continue;
 
-		mvpp2_prs_init_from_hw(priv, pe, tid);
-		bits = mvpp2_prs_sram_ai_get(pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		bits = mvpp2_prs_sram_ai_get(&pe);
 
 		/* Sram store classification lookup ID in AI bits [5:0] */
 		if ((bits & MVPP2_PRS_FLOW_ID_MASK) == flow)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Return first free tcam index, seeking from start to end */
@@ -2188,17 +2182,11 @@ static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port,
 }
 
 /* Search for existing single/triple vlan entry */
-static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
-						   unsigned short tpid, int ai)
+static int mvpp2_prs_vlan_find(struct mvpp2 *priv, unsigned short tpid, int ai)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-
 	/* Go through the all entries with MVPP2_PRS_LU_VLAN */
 	for (tid = MVPP2_PE_FIRST_FREE_TID;
 	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
@@ -2209,17 +2197,17 @@ static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
 			continue;
 
-		mvpp2_prs_init_from_hw(priv, pe, tid);
-		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid));
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		match = mvpp2_prs_tcam_data_cmp(&pe, 0, swab16(tpid));
 		if (!match)
 			continue;
 
 		/* Get vlan type */
-		ri_bits = mvpp2_prs_sram_ri_get(pe);
+		ri_bits = mvpp2_prs_sram_ri_get(&pe);
 		ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
 
 		/* Get current ai value from tcam */
-		ai_bits = mvpp2_prs_tcam_ai_get(pe);
+		ai_bits = mvpp2_prs_tcam_ai_get(&pe);
 		/* Clear double vlan bit */
 		ai_bits &= ~MVPP2_PRS_DBL_VLAN_AI_BIT;
 
@@ -2228,34 +2216,31 @@ static struct mvpp2_prs_entry *mvpp2_prs_vlan_find(struct mvpp2 *priv,
 
 		if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
 		    ri_bits == MVPP2_PRS_RI_VLAN_TRIPLE)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Add/update single/triple vlan entry */
 static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 			      unsigned int port_map)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid_aux, tid;
 	int ret = 0;
 
-	pe = mvpp2_prs_vlan_find(priv, tpid, ai);
+	memset(&pe, 0, sizeof(pe));
 
-	if (!pe) {
+	tid = mvpp2_prs_vlan_find(priv, tpid, ai);
+
+	if (tid < 0) {
 		/* Create new tcam entry */
 		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_LAST_FREE_TID,
 						MVPP2_PE_FIRST_FREE_TID);
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-		if (!pe)
-			return -ENOMEM;
-
 		/* Get last double vlan tid */
 		for (tid_aux = MVPP2_PE_LAST_FREE_TID;
 		     tid_aux >= MVPP2_PE_FIRST_FREE_TID; tid_aux--) {
@@ -2265,48 +2250,46 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai,
 			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
 				continue;
 
-			mvpp2_prs_init_from_hw(priv, pe, tid_aux);
-			ri_bits = mvpp2_prs_sram_ri_get(pe);
+			mvpp2_prs_init_from_hw(priv, &pe, tid_aux);
+			ri_bits = mvpp2_prs_sram_ri_get(&pe);
 			if ((ri_bits & MVPP2_PRS_RI_VLAN_MASK) ==
 			    MVPP2_PRS_RI_VLAN_DOUBLE)
 				break;
 		}
 
-		if (tid <= tid_aux) {
-			ret = -EINVAL;
-			goto free_pe;
-		}
+		if (tid <= tid_aux)
+			return -EINVAL;
 
-		memset(pe, 0, sizeof(*pe));
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		pe->index = tid;
+		memset(&pe, 0, sizeof(pe));
+		pe.index = tid;
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
 
-		mvpp2_prs_match_etype(pe, 0, tpid);
+		mvpp2_prs_match_etype(&pe, 0, tpid);
 
 		/* VLAN tag detected, proceed with VID filtering */
-		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID);
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID);
 
 		/* Clear all ai bits for next iteration */
-		mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK);
+		mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK);
 
 		if (ai == MVPP2_PRS_SINGLE_VLAN_AI) {
-			mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_SINGLE,
+			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_SINGLE,
 						 MVPP2_PRS_RI_VLAN_MASK);
 		} else {
 			ai |= MVPP2_PRS_DBL_VLAN_AI_BIT;
-			mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_TRIPLE,
+			mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_TRIPLE,
 						 MVPP2_PRS_RI_VLAN_MASK);
 		}
-		mvpp2_prs_tcam_ai_update(pe, ai, MVPP2_PRS_SRAM_AI_MASK);
+		mvpp2_prs_tcam_ai_update(&pe, ai, MVPP2_PRS_SRAM_AI_MASK);
 
-		mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_VLAN);
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 	/* Update ports' mask */
-	mvpp2_prs_tcam_port_map_set(pe, port_map);
+	mvpp2_prs_tcam_port_map_set(&pe, port_map);
 
-	mvpp2_prs_hw_write(priv, pe);
-free_pe:
-	kfree(pe);
+	mvpp2_prs_hw_write(priv, &pe);
 
 	return ret;
 }
@@ -2325,18 +2308,12 @@ static int mvpp2_prs_double_vlan_ai_free_get(struct mvpp2 *priv)
 }
 
 /* Search for existing double vlan entry */
-static struct mvpp2_prs_entry *mvpp2_prs_double_vlan_find(struct mvpp2 *priv,
-							  unsigned short tpid1,
-							  unsigned short tpid2)
+static int mvpp2_prs_double_vlan_find(struct mvpp2 *priv, unsigned short tpid1,
+				      unsigned short tpid2)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-
 	/* Go through the all entries with MVPP2_PRS_LU_VLAN */
 	for (tid = MVPP2_PE_FIRST_FREE_TID;
 	     tid <= MVPP2_PE_LAST_FREE_TID; tid++) {
@@ -2347,21 +2324,20 @@ static struct mvpp2_prs_entry *mvpp2_prs_double_vlan_find(struct mvpp2 *priv,
 		    priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN)
 			continue;
 
-		mvpp2_prs_init_from_hw(priv, pe, tid);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 
-		match = mvpp2_prs_tcam_data_cmp(pe, 0, swab16(tpid1))
-			&& mvpp2_prs_tcam_data_cmp(pe, 4, swab16(tpid2));
+		match = mvpp2_prs_tcam_data_cmp(&pe, 0, swab16(tpid1)) &&
+			mvpp2_prs_tcam_data_cmp(&pe, 4, swab16(tpid2));
 
 		if (!match)
 			continue;
 
-		ri_mask = mvpp2_prs_sram_ri_get(pe) & MVPP2_PRS_RI_VLAN_MASK;
+		ri_mask = mvpp2_prs_sram_ri_get(&pe) & MVPP2_PRS_RI_VLAN_MASK;
 		if (ri_mask == MVPP2_PRS_RI_VLAN_DOUBLE)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Add or update double vlan entry */
@@ -2369,28 +2345,24 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 				     unsigned short tpid2,
 				     unsigned int port_map)
 {
-	struct mvpp2_prs_entry *pe;
 	int tid_aux, tid, ai, ret = 0;
+	struct mvpp2_prs_entry pe;
 
-	pe = mvpp2_prs_double_vlan_find(priv, tpid1, tpid2);
+	memset(&pe, 0, sizeof(pe));
 
-	if (!pe) {
+	tid = mvpp2_prs_double_vlan_find(priv, tpid1, tpid2);
+
+	if (tid < 0) {
 		/* Create new tcam entry */
 		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
 				MVPP2_PE_LAST_FREE_TID);
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-		if (!pe)
-			return -ENOMEM;
-
 		/* Set ai value for new double vlan entry */
 		ai = mvpp2_prs_double_vlan_ai_free_get(priv);
-		if (ai < 0) {
-			ret = ai;
-			goto free_pe;
-		}
+		if (ai < 0)
+			return ai;
 
 		/* Get first single/triple vlan tid */
 		for (tid_aux = MVPP2_PE_FIRST_FREE_TID;
@@ -2401,45 +2373,44 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1,
 			    priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN)
 				continue;
 
-			mvpp2_prs_init_from_hw(priv, pe, tid_aux);
-			ri_bits = mvpp2_prs_sram_ri_get(pe);
+			mvpp2_prs_init_from_hw(priv, &pe, tid_aux);
+			ri_bits = mvpp2_prs_sram_ri_get(&pe);
 			ri_bits &= MVPP2_PRS_RI_VLAN_MASK;
 			if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE ||
 			    ri_bits == MVPP2_PRS_RI_VLAN_TRIPLE)
 				break;
 		}
 
-		if (tid >= tid_aux) {
-			ret = -ERANGE;
-			goto free_pe;
-		}
+		if (tid >= tid_aux)
+			return -ERANGE;
 
-		memset(pe, 0, sizeof(*pe));
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_VLAN);
-		pe->index = tid;
+		memset(&pe, 0, sizeof(pe));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN);
+		pe.index = tid;
 
 		priv->prs_double_vlans[ai] = true;
 
-		mvpp2_prs_match_etype(pe, 0, tpid1);
-		mvpp2_prs_match_etype(pe, 4, tpid2);
+		mvpp2_prs_match_etype(&pe, 0, tpid1);
+		mvpp2_prs_match_etype(&pe, 4, tpid2);
 
-		mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN);
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN);
 		/* Shift 4 bytes - skip outer vlan tag */
-		mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN,
+		mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN,
 					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-		mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE,
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE,
 					 MVPP2_PRS_RI_VLAN_MASK);
-		mvpp2_prs_sram_ai_update(pe, ai | MVPP2_PRS_DBL_VLAN_AI_BIT,
+		mvpp2_prs_sram_ai_update(&pe, ai | MVPP2_PRS_DBL_VLAN_AI_BIT,
 					 MVPP2_PRS_SRAM_AI_MASK);
 
-		mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_VLAN);
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 
 	/* Update ports' mask */
-	mvpp2_prs_tcam_port_map_set(pe, port_map);
-	mvpp2_prs_hw_write(priv, pe);
-free_pe:
-	kfree(pe);
+	mvpp2_prs_tcam_port_map_set(&pe, port_map);
+	mvpp2_prs_hw_write(priv, &pe);
+
 	return ret;
 }
 
@@ -3508,6 +3479,7 @@ static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
 			continue;
 
 		mvpp2_prs_init_from_hw(priv, &pe, tid);
+
 		mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]);
 		mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]);
 
@@ -3520,7 +3492,7 @@ static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid,
 		return tid;
 	}
 
-	return 0;
+	return -ENOENT;
 }
 
 /* Write parser entry for VID filtering */
@@ -3533,6 +3505,8 @@ static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
 	struct mvpp2_prs_entry pe;
 	int tid;
 
+	memset(&pe, 0, sizeof(pe));
+
 	/* Scan TCAM and see if entry with this <vid,port> already exist */
 	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask);
 
@@ -3543,8 +3517,7 @@ static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid)
 		shift = MVPP2_VLAN_TAG_LEN;
 
 	/* No such entry */
-	if (!tid) {
-		memset(&pe, 0, sizeof(pe));
+	if (tid < 0) {
 
 		/* Go through all entries from first to last in vlan range */
 		tid = mvpp2_prs_tcam_first_free(priv, vid_start,
@@ -3596,7 +3569,7 @@ static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid)
 	tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff);
 
 	/* No such entry */
-	if (tid)
+	if (tid < 0)
 		return;
 
 	mvpp2_prs_hw_inv(priv, tid);
@@ -3763,18 +3736,13 @@ static bool mvpp2_prs_mac_range_equals(struct mvpp2_prs_entry *pe,
 }
 
 /* Find tcam entry with matched pair <MAC DA, port> */
-static struct mvpp2_prs_entry *
+static int
 mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 			    unsigned char *mask, int udf_type)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_ATOMIC);
-	if (!pe)
-		return NULL;
-	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
-
 	/* Go through the all entires with MVPP2_PRS_LU_MAC */
 	for (tid = MVPP2_PE_MAC_RANGE_START;
 	     tid <= MVPP2_PE_MAC_RANGE_END; tid++) {
@@ -3785,16 +3753,15 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 		    (priv->prs_shadow[tid].udf != udf_type))
 			continue;
 
-		mvpp2_prs_init_from_hw(priv, pe, tid);
-		entry_pmap = mvpp2_prs_tcam_port_map_get(pe);
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
+		entry_pmap = mvpp2_prs_tcam_port_map_get(&pe);
 
-		if (mvpp2_prs_mac_range_equals(pe, da, mask) &&
+		if (mvpp2_prs_mac_range_equals(&pe, da, mask) &&
 		    entry_pmap == pmap)
-			return pe;
+			return tid;
 	}
-	kfree(pe);
 
-	return NULL;
+	return -ENOENT;
 }
 
 /* Update parser's mac da entry */
@@ -3804,15 +3771,17 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
 	unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	struct mvpp2 *priv = port->priv;
 	unsigned int pmap, len, ri;
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
+	memset(&pe, 0, sizeof(pe));
+
 	/* Scan TCAM and see if entry with this <MAC DA, port> already exist */
-	pe = mvpp2_prs_mac_da_range_find(priv, BIT(port->id), da, mask,
-					 MVPP2_PRS_UDF_MAC_DEF);
+	tid = mvpp2_prs_mac_da_range_find(priv, BIT(port->id), da, mask,
+					  MVPP2_PRS_UDF_MAC_DEF);
 
 	/* No such entry */
-	if (!pe) {
+	if (tid < 0) {
 		if (!add)
 			return 0;
 
@@ -3824,39 +3793,37 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_ATOMIC);
-		if (!pe)
-			return -ENOMEM;
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
-		pe->index = tid;
+		pe.index = tid;
 
 		/* Mask all ports */
-		mvpp2_prs_tcam_port_map_set(pe, 0);
+		mvpp2_prs_tcam_port_map_set(&pe, 0);
+	} else {
+		mvpp2_prs_init_from_hw(priv, &pe, tid);
 	}
 
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC);
+
 	/* Update port mask */
-	mvpp2_prs_tcam_port_set(pe, port->id, add);
+	mvpp2_prs_tcam_port_set(&pe, port->id, add);
 
 	/* Invalidate the entry if no ports are left enabled */
-	pmap = mvpp2_prs_tcam_port_map_get(pe);
+	pmap = mvpp2_prs_tcam_port_map_get(&pe);
 	if (pmap == 0) {
-		if (add) {
-			kfree(pe);
+		if (add)
 			return -EINVAL;
-		}
-		mvpp2_prs_hw_inv(priv, pe->index);
-		priv->prs_shadow[pe->index].valid = false;
-		kfree(pe);
+
+		mvpp2_prs_hw_inv(priv, pe.index);
+		priv->prs_shadow[pe.index].valid = false;
 		return 0;
 	}
 
 	/* Continue - set next lookup */
-	mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_DSA);
+	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_DSA);
 
 	/* Set match on DA */
 	len = ETH_ALEN;
 	while (len--)
-		mvpp2_prs_tcam_data_byte_set(pe, len, da[len], 0xff);
+		mvpp2_prs_tcam_data_byte_set(&pe, len, da[len], 0xff);
 
 	/* Set result info bits */
 	if (is_broadcast_ether_addr(da)) {
@@ -3870,21 +3837,19 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da,
 			ri |= MVPP2_PRS_RI_MAC_ME_MASK;
 	}
 
-	mvpp2_prs_sram_ri_update(pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
+	mvpp2_prs_sram_ri_update(&pe, ri, MVPP2_PRS_RI_L2_CAST_MASK |
 				 MVPP2_PRS_RI_MAC_ME_MASK);
-	mvpp2_prs_shadow_ri_set(priv, pe->index, ri, MVPP2_PRS_RI_L2_CAST_MASK |
+	mvpp2_prs_shadow_ri_set(priv, pe.index, ri, MVPP2_PRS_RI_L2_CAST_MASK |
 				MVPP2_PRS_RI_MAC_ME_MASK);
 
 	/* Shift to ethertype */
-	mvpp2_prs_sram_shift_set(pe, 2 * ETH_ALEN,
+	mvpp2_prs_sram_shift_set(&pe, 2 * ETH_ALEN,
 				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
 
 	/* Update shadow table and hw entry */
-	priv->prs_shadow[pe->index].udf = MVPP2_PRS_UDF_MAC_DEF;
-	mvpp2_prs_shadow_set(priv, pe->index, MVPP2_PRS_LU_MAC);
-	mvpp2_prs_hw_write(priv, pe);
-
-	kfree(pe);
+	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_MAC_DEF;
+	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC);
+	mvpp2_prs_hw_write(priv, &pe);
 
 	return 0;
 }
@@ -4004,13 +3969,15 @@ static int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type)
 /* Set prs flow for the port */
 static int mvpp2_prs_def_flow(struct mvpp2_port *port)
 {
-	struct mvpp2_prs_entry *pe;
+	struct mvpp2_prs_entry pe;
 	int tid;
 
-	pe = mvpp2_prs_flow_find(port->priv, port->id);
+	memset(&pe, 0, sizeof(pe));
+
+	tid = mvpp2_prs_flow_find(port->priv, port->id);
 
 	/* Such entry not exist */
-	if (!pe) {
+	if (tid < 0) {
 		/* Go through the all entires from last to first */
 		tid = mvpp2_prs_tcam_first_free(port->priv,
 						MVPP2_PE_LAST_FREE_TID,
@@ -4018,24 +3985,21 @@ static int mvpp2_prs_def_flow(struct mvpp2_port *port)
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
-		if (!pe)
-			return -ENOMEM;
-
-		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_FLOWS);
-		pe->index = tid;
+		pe.index = tid;
 
 		/* Set flow ID*/
-		mvpp2_prs_sram_ai_update(pe, port->id, MVPP2_PRS_FLOW_ID_MASK);
-		mvpp2_prs_sram_bits_set(pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
+		mvpp2_prs_sram_ai_update(&pe, port->id, MVPP2_PRS_FLOW_ID_MASK);
+		mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_DONE_BIT, 1);
 
 		/* Update shadow table */
-		mvpp2_prs_shadow_set(port->priv, pe->index, MVPP2_PRS_LU_FLOWS);
+		mvpp2_prs_shadow_set(port->priv, pe.index, MVPP2_PRS_LU_FLOWS);
+	} else {
+		mvpp2_prs_init_from_hw(port->priv, &pe, tid);
 	}
 
-	mvpp2_prs_tcam_port_map_set(pe, (1 << port->id));
-	mvpp2_prs_hw_write(port->priv, pe);
-	kfree(pe);
+	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
+	mvpp2_prs_tcam_port_map_set(&pe, (1 << port->id));
+	mvpp2_prs_hw_write(port->priv, &pe);
 
 	return 0;
 }

From 6a91ded32d6c8a6d0aee1928bb741e31577af24f Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 26 Mar 2018 14:32:27 +0000
Subject: [PATCH 1331/1678] net: aquantia: Make function
 hw_atl_utils_mpi_set_speed() static

Fixes the following sparse warning:

drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c:508:5: warning:
 symbol 'hw_atl_utils_mpi_set_speed' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index d3b847ec7465..84d7f4dd4ce1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -505,7 +505,7 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
 err_exit:;
 }
 
-int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
+static int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
 {
 	u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
 

From c76f2481b60a62814f4cb1678e48efa3385895aa Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 26 Mar 2018 14:32:44 +0000
Subject: [PATCH 1332/1678] tipc: fix error handling in tipc_udp_enable()

Release alloced resource before return from the error handling
case in tipc_udp_enable(), otherwise will cause memory leak.

Fixes: 52dfae5c85a4 ("tipc: obtain node identity from interface by default")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/udp_media.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 2c13b18426d9..e7d91f5d5cae 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -687,7 +687,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 	}
 	if (!tipc_own_id(net)) {
 		pr_warn("Failed to set node id, please configure manually\n");
-		return -EINVAL;
+		err = -EINVAL;
+		goto err;
 	}
 
 	b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;

From e1a22d13eb1f302afd692583777e27828d375a39 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 26 Mar 2018 14:33:13 +0000
Subject: [PATCH 1333/1678] tipc: tipc_node_create() can be static

Fixes the following sparse warning:

net/tipc/node.c:336:18: warning:
 symbol 'tipc_node_create' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4a95c8c155c6..4fb4327311bb 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -333,8 +333,8 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	}
 }
 
-struct tipc_node *tipc_node_create(struct net *net, u32 addr,
-				   u8 *peer_id, u16 capabilities)
+static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
+					  u8 *peer_id, u16 capabilities)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *n, *temp_node;

From cd464197f2378499db134d6c44af3b4e3c0c14b5 Mon Sep 17 00:00:00 2001
From: Lucas Bates <lucasb@mojatatu.com>
Date: Mon, 26 Mar 2018 10:46:14 -0400
Subject: [PATCH 1334/1678] tc-testing: Correct compound statements for
 namespace execution

If tdc is executing test cases inside a namespace, only the
first command in a compound statement will be executed inside
the namespace by tdc. As a result, the subsequent commands
are not executed inside the namespace and the test will fail.

Example:

for i in {x..y}; do args="foo"; done && tc actions add $args

The namespace execution feature will prepend 'ip netns exec'
to the command:

ip netns exec tcut for i in {x..y}; do args="foo"; done && \
  tc actions add $args

So the actual tc command is not parsed by the shell as being
part of the namespace execution.

Enclosing these compound statements inside a bash invocation
with proper escape characters resolves the problem by creating
a subshell inside the namespace.

Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/actions/gact.json    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index ae96d0350d7e..68c91023cdb9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -481,7 +481,7 @@
                 255
             ]
         ],
-        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action pass index $i \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action pass index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action gact",
         "matchPattern": "^[ \t]+index [0-9]+ ref",
@@ -505,7 +505,7 @@
                 255
             ]
         ],
-        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action continue index $i cookie aabbccddeeff112233445566778800a1 \"; args=\"$args$cmd\"; done && $TC actions add $args",
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action gact",
         "matchPattern": "^[ \t]+index [0-9]+ ref",
@@ -528,13 +528,13 @@
                 1,
                 255
             ],
-            "for i in `seq 1 32`; do cmd=\"action continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args"
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
         ],
-        "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action gact index $i \"; args=\"$args$cmd\"; done && $TC actions del $args",
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action gact index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
         "expExitCode": "0",
         "verifyCmd": "$TC actions list action gact",
         "matchPattern": "^[ \t]+index [0-9]+ ref",
         "matchCount": "0",
         "teardown": []
     }
-]
\ No newline at end of file
+]

From e32ac25018558f7bcab387708187ab5aa2733cf8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 26 Mar 2018 08:35:01 -0700
Subject: [PATCH 1335/1678] ipv6: addrconf: Use normal debugging style

Remove local ADBG macro and use netdev_dbg/pr_debug

Miscellanea:

o Remove unnecessary debug message after allocation failure as there
  already is a dump_stack() on the failure paths
o Leave the allocation failure message on snmp6_alloc_dev as there
  is one code path that does not do a dump_stack()

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ca90cd9fba4..189eac80f4ef 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -94,15 +94,6 @@
 #include <linux/seq_file.h>
 #include <linux/export.h>
 
-/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
-
-#if ACONF_DEBUG >= 3
-#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
 
 #define IPV6_MAX_STRLEN \
@@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	dev_hold(dev);
 
 	if (snmp6_alloc_dev(ndev) < 0) {
-		ADBG(KERN_WARNING
-			"%s: cannot allocate memory for statistics; dev=%s.\n",
-			__func__, dev->name);
+		netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+			   __func__);
 		neigh_parms_release(&nd_tbl, ndev->nd_parms);
 		dev_put(dev);
 		kfree(ndev);
@@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	}
 
 	if (snmp6_register_dev(ndev) < 0) {
-		ADBG(KERN_WARNING
-			"%s: cannot create /proc/net/dev_snmp6/%s\n",
-			__func__, dev->name);
+		netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+			   __func__, dev->name);
 		goto err_release;
 	}
 
@@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
 
 	/* Ignore adding duplicate addresses on an interface */
 	if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
-		ADBG("ipv6_add_addr: already assigned\n");
+		netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
 		err = -EEXIST;
 	} else {
 		hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
@@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	ifa = kzalloc(sizeof(*ifa), gfp_flags);
 	if (!ifa) {
-		ADBG("ipv6_add_addr: malloc failed\n");
 		err = -ENOBUFS;
 		goto out;
 	}
@@ -2618,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	pinfo = (struct prefix_info *) opt;
 
 	if (len < sizeof(struct prefix_info)) {
-		ADBG("addrconf: prefix option too short\n");
+		netdev_dbg(dev, "addrconf: prefix option too short\n");
 		return;
 	}
 
@@ -4446,8 +4434,8 @@ restart:
 	if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
 		next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
 
-	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
-	      now, next, next_sec, next_sched);
+	pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+		 now, next, next_sec, next_sched);
 	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
 	rcu_read_unlock_bh();
 }

From 4171ec060073e1789cb868f43585983c69ff767b Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Mon, 26 Mar 2018 13:40:27 -0700
Subject: [PATCH 1336/1678] liquidio: Removed duplicate Tx queue status check

Napi is checking Tx queue status and waking the Tx queue if required.
Same operation is being done while freeing every Tx buffer.
So removed the duplicate operation of checking Tx queue status from the Tx
buffer free functions.

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_main.c   | 28 ------------------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 29 -------------------
 2 files changed, 57 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 58b5c75fd2ee..43c5ba0af12b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1634,28 +1634,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
-/**
- * \brief Check Tx queue state for a given network buffer
- * @param lio per-network private data
- * @param skb network buffer
- */
-static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
-{
-	int q, iq;
-
-	q = skb->queue_mapping;
-	iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
-
-	if (octnet_iq_is_full(lio->oct_dev, iq))
-		return 0;
-
-	if (__netif_subqueue_stopped(lio->netdev, q)) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-		netif_wake_subqueue(lio->netdev, q);
-	}
-	return 1;
-}
-
 /**
  * \brief Unmap and free network buffer
  * @param buf buffer
@@ -1673,8 +1651,6 @@ static void free_netbuf(void *buf)
 	dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
 			 DMA_TO_DEVICE);
 
-	check_txq_state(lio, skb);
-
 	tx_buffer_free(skb);
 }
 
@@ -1715,8 +1691,6 @@ static void free_netsgbuf(void *buf)
 	list_add_tail(&g->list, &lio->glist[iq]);
 	spin_unlock(&lio->glist_lock[iq]);
 
-	check_txq_state(lio, skb);     /* mq support: sub-queue state check */
-
 	tx_buffer_free(skb);
 }
 
@@ -1762,8 +1736,6 @@ static void free_netsgbuf_with_resp(void *buf)
 	spin_unlock(&lio->glist_lock[iq]);
 
 	/* Don't free the skb yet */
-
-	check_txq_state(lio, skb);
 }
 
 /**
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index d5f5c9a693ee..dc62698bdaf7 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -953,29 +953,6 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
-/**
- * \brief Check Tx queue state for a given network buffer
- * @param lio per-network private data
- * @param skb network buffer
- */
-static int check_txq_state(struct lio *lio, struct sk_buff *skb)
-{
-	int q, iq;
-
-	q = skb->queue_mapping;
-	iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
-
-	if (octnet_iq_is_full(lio->oct_dev, iq))
-		return 0;
-
-	if (__netif_subqueue_stopped(lio->netdev, q)) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
-		netif_wake_subqueue(lio->netdev, q);
-	}
-
-	return 1;
-}
-
 /**
  * \brief Unmap and free network buffer
  * @param buf buffer
@@ -993,8 +970,6 @@ static void free_netbuf(void *buf)
 	dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
 			 DMA_TO_DEVICE);
 
-	check_txq_state(lio, skb);
-
 	tx_buffer_free(skb);
 }
 
@@ -1036,8 +1011,6 @@ static void free_netsgbuf(void *buf)
 	list_add_tail(&g->list, &lio->glist[iq]);
 	spin_unlock(&lio->glist_lock[iq]);
 
-	check_txq_state(lio, skb); /* mq support: sub-queue state check */
-
 	tx_buffer_free(skb);
 }
 
@@ -1083,8 +1056,6 @@ static void free_netsgbuf_with_resp(void *buf)
 	spin_unlock(&lio->glist_lock[iq]);
 
 	/* Don't free the skb yet */
-
-	check_txq_state(lio, skb);
 }
 
 /**

From cdcfeb0fb473e34e012b9a78b5cb377a6ad1434d Mon Sep 17 00:00:00 2001
From: Yan Markman <ymarkman@marvell.com>
Date: Tue, 27 Mar 2018 16:49:05 +0200
Subject: [PATCH 1337/1678] net: mvpp2: Use relaxed I/O in data path

Use relaxed I/O on the hot path. This achieves significant performance
improvements. On a 10G link, this makes a basic iperf TCP test go from
an average of 4.5 Gbits/sec to about 9.40 Gbits/sec.

Signed-off-by: Yan Markman <ymarkman@marvell.com>
[Maxime: Commit message, cosmetic changes]
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 43 +++++++++++++++++++---------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 7075e5ab78f3..7fc1bbf51c44 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1359,6 +1359,10 @@ static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 	return readl(priv->swth_base[0] + offset);
 }
 
+static u32 mvpp2_read_relaxed(struct mvpp2 *priv, u32 offset)
+{
+	return readl_relaxed(priv->swth_base[0] + offset);
+}
 /* These accessors should be used to access:
  *
  * - per-CPU registers, where each CPU has its own copy of the
@@ -1407,6 +1411,18 @@ static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
 	return readl(priv->swth_base[cpu] + offset);
 }
 
+static void mvpp2_percpu_write_relaxed(struct mvpp2 *priv, int cpu,
+				       u32 offset, u32 data)
+{
+	writel_relaxed(data, priv->swth_base[cpu] + offset);
+}
+
+static u32 mvpp2_percpu_read_relaxed(struct mvpp2 *priv, int cpu,
+				     u32 offset)
+{
+	return readl_relaxed(priv->swth_base[cpu] + offset);
+}
+
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
 					    struct mvpp2_tx_desc *tx_desc)
 {
@@ -4442,8 +4458,8 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 				<< MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT) &
 				MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK;
 
-		mvpp2_percpu_write(port->priv, cpu,
-				   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
+		mvpp2_percpu_write_relaxed(port->priv, cpu,
+					   MVPP22_BM_ADDR_HIGH_RLS_REG, val);
 	}
 
 	/* MVPP2_BM_VIRT_RLS_REG is not interpreted by HW, and simply
@@ -4451,10 +4467,10 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool,
 	 * descriptor. Instead of storing the virtual address, we
 	 * store the physical address
 	 */
-	mvpp2_percpu_write(port->priv, cpu,
-			   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
-	mvpp2_percpu_write(port->priv, cpu,
-			   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
+	mvpp2_percpu_write_relaxed(port->priv, cpu,
+				   MVPP2_BM_VIRT_RLS_REG, buf_phys_addr);
+	mvpp2_percpu_write_relaxed(port->priv, cpu,
+				   MVPP2_BM_PHY_RLS_REG(pool), buf_dma_addr);
 
 	put_cpu();
 }
@@ -5546,7 +5562,8 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv,
 	if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) {
 		/* Update number of occupied aggregated Tx descriptors */
 		int cpu = smp_processor_id();
-		u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu));
+		u32 val = mvpp2_read_relaxed(priv,
+					     MVPP2_AGGR_TXQ_STATUS_REG(cpu));
 
 		aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK;
 	}
@@ -5570,9 +5587,9 @@ static int mvpp2_txq_alloc_reserved_desc(struct mvpp2 *priv,
 	int cpu = smp_processor_id();
 
 	val = (txq->id << MVPP2_TXQ_RSVD_REQ_Q_OFFSET) | num;
-	mvpp2_percpu_write(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
+	mvpp2_percpu_write_relaxed(priv, cpu, MVPP2_TXQ_RSVD_REQ_REG, val);
 
-	val = mvpp2_percpu_read(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
+	val = mvpp2_percpu_read_relaxed(priv, cpu, MVPP2_TXQ_RSVD_RSLT_REG);
 
 	return val & MVPP2_TXQ_RSVD_RSLT_MASK;
 }
@@ -5677,8 +5694,8 @@ static inline int mvpp2_txq_sent_desc_proc(struct mvpp2_port *port,
 	u32 val;
 
 	/* Reading status reg resets transmitted descriptor counter */
-	val = mvpp2_percpu_read(port->priv, smp_processor_id(),
-				MVPP2_TXQ_SENT_REG(txq->id));
+	val = mvpp2_percpu_read_relaxed(port->priv, smp_processor_id(),
+					MVPP2_TXQ_SENT_REG(txq->id));
 
 	return (val & MVPP2_TRANSMITTED_COUNT_MASK) >>
 		MVPP2_TRANSMITTED_COUNT_OFFSET;
@@ -7044,8 +7061,8 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 	 *
 	 * Each CPU has its own Rx/Tx cause register
 	 */
-	cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
-					MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
+	cause_rx_tx = mvpp2_percpu_read_relaxed(port->priv, qv->sw_thread_id,
+						MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
 
 	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 	if (cause_misc) {

From 67441c2472ddd19f75ef900ac93e71543066f25e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 27 Mar 2018 18:01:51 +0300
Subject: [PATCH 1338/1678] net: Convert nfsd_net_ops

These pernet_operations look similar to rpcsec_gss_net_ops,
they just create and destroy another caches. So, they also
can be async.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/nfsd/nfsctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d107b4426f7e..1e3824e6cce0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1263,6 +1263,7 @@ static struct pernet_operations nfsd_net_ops = {
 	.exit = nfsd_exit_net,
 	.id   = &nfsd_net_id,
 	.size = sizeof(struct nfsd_net),
+	.async = true,
 };
 
 static int __init init_nfsd(void)

From 094374e5e173c6639eccf6a2af5e1357a0869848 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 27 Mar 2018 18:02:01 +0300
Subject: [PATCH 1339/1678] net: Reflect all pernet_operations are converted

All pernet_operations are reviewed and converted, hooray!
Reflect this in core code: setup_net() and cleanup_net()
will take down_read() always.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 43 ++++++----------------------------------
 1 file changed, 6 insertions(+), 37 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 95ba2c53bd9a..0f614523a13f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -40,9 +40,8 @@ struct net init_net = {
 EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
-static unsigned nr_sync_pernet_ops;
 /*
- * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
+ * net_sem: protects: pernet_list, net_generic_ids,
  * init_net_initialized and first_device pointer.
  */
 DECLARE_RWSEM(net_sem);
@@ -406,7 +405,6 @@ struct net *copy_net_ns(unsigned long flags,
 {
 	struct ucounts *ucounts;
 	struct net *net;
-	unsigned write;
 	int rv;
 
 	if (!(flags & CLONE_NEWNET))
@@ -424,25 +422,14 @@ struct net *copy_net_ns(unsigned long flags,
 	refcount_set(&net->passive, 1);
 	net->ucounts = ucounts;
 	get_user_ns(user_ns);
-again:
-	write = READ_ONCE(nr_sync_pernet_ops);
-	if (write)
-		rv = down_write_killable(&net_sem);
-	else
-		rv = down_read_killable(&net_sem);
+
+	rv = down_read_killable(&net_sem);
 	if (rv < 0)
 		goto put_userns;
 
-	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
-		up_read(&net_sem);
-		goto again;
-	}
 	rv = setup_net(net, user_ns);
 
-	if (write)
-		up_write(&net_sem);
-	else
-		up_read(&net_sem);
+	up_read(&net_sem);
 
 	if (rv < 0) {
 put_userns:
@@ -490,21 +477,11 @@ static void cleanup_net(struct work_struct *work)
 	struct net *net, *tmp, *last;
 	struct llist_node *net_kill_list;
 	LIST_HEAD(net_exit_list);
-	unsigned write;
 
 	/* Atomically snapshot the list of namespaces to cleanup */
 	net_kill_list = llist_del_all(&cleanup_list);
-again:
-	write = READ_ONCE(nr_sync_pernet_ops);
-	if (write)
-		down_write(&net_sem);
-	else
-		down_read(&net_sem);
 
-	if (!write && unlikely(READ_ONCE(nr_sync_pernet_ops))) {
-		up_read(&net_sem);
-		goto again;
-	}
+	down_read(&net_sem);
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
@@ -543,10 +520,7 @@ again:
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	if (write)
-		up_write(&net_sem);
-	else
-		up_read(&net_sem);
+	up_read(&net_sem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -1006,9 +980,6 @@ again:
 		rcu_barrier();
 		if (ops->id)
 			ida_remove(&net_generic_ids, *ops->id);
-	} else if (!ops->async) {
-		pr_info_once("Pernet operations %ps are sync.\n", ops);
-		nr_sync_pernet_ops++;
 	}
 
 	return error;
@@ -1016,8 +987,6 @@ again:
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
 {
-	if (!ops->async)
-		BUG_ON(nr_sync_pernet_ops-- == 0);
 	__unregister_pernet_operations(ops);
 	rcu_barrier();
 	if (ops->id)

From 2f635ceeb22ba13c307236d69795fbb29cfa3e7c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 27 Mar 2018 18:02:13 +0300
Subject: [PATCH 1340/1678] net: Drop pernet_operations::async

Synchronous pernet_operations are not allowed anymore.
All are asynchronous. So, drop the structure member.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/cma.c                  | 1 -
 drivers/net/bonding/bond_main.c                | 1 -
 drivers/net/geneve.c                           | 1 -
 drivers/net/gtp.c                              | 1 -
 drivers/net/ipvlan/ipvlan_main.c               | 1 -
 drivers/net/loopback.c                         | 1 -
 drivers/net/ppp/ppp_generic.c                  | 1 -
 drivers/net/ppp/pppoe.c                        | 1 -
 drivers/net/vrf.c                              | 1 -
 drivers/net/vxlan.c                            | 1 -
 drivers/net/wireless/mac80211_hwsim.c          | 1 -
 fs/lockd/svc.c                                 | 1 -
 fs/nfs/blocklayout/rpc_pipefs.c                | 1 -
 fs/nfs/dns_resolve.c                           | 1 -
 fs/nfs/inode.c                                 | 1 -
 fs/nfs_common/grace.c                          | 1 -
 fs/nfsd/nfsctl.c                               | 1 -
 fs/proc/proc_net.c                             | 1 -
 include/net/net_namespace.h                    | 6 ------
 kernel/audit.c                                 | 1 -
 lib/kobject_uevent.c                           | 1 -
 net/8021q/vlan.c                               | 1 -
 net/bridge/br.c                                | 1 -
 net/bridge/br_netfilter_hooks.c                | 1 -
 net/bridge/netfilter/ebtable_broute.c          | 1 -
 net/bridge/netfilter/ebtable_filter.c          | 1 -
 net/bridge/netfilter/ebtable_nat.c             | 1 -
 net/bridge/netfilter/nf_log_bridge.c           | 1 -
 net/caif/caif_dev.c                            | 1 -
 net/can/af_can.c                               | 1 -
 net/can/bcm.c                                  | 1 -
 net/can/gw.c                                   | 1 -
 net/core/dev.c                                 | 2 --
 net/core/fib_notifier.c                        | 1 -
 net/core/fib_rules.c                           | 1 -
 net/core/net-procfs.c                          | 2 --
 net/core/net_namespace.c                       | 2 --
 net/core/pktgen.c                              | 1 -
 net/core/rtnetlink.c                           | 1 -
 net/core/sock.c                                | 2 --
 net/core/sock_diag.c                           | 1 -
 net/core/sysctl_net_core.c                     | 1 -
 net/dccp/ipv4.c                                | 1 -
 net/dccp/ipv6.c                                | 1 -
 net/ieee802154/6lowpan/reassembly.c            | 1 -
 net/ieee802154/core.c                          | 1 -
 net/ipv4/af_inet.c                             | 2 --
 net/ipv4/arp.c                                 | 1 -
 net/ipv4/devinet.c                             | 1 -
 net/ipv4/fib_frontend.c                        | 1 -
 net/ipv4/fou.c                                 | 1 -
 net/ipv4/icmp.c                                | 1 -
 net/ipv4/igmp.c                                | 1 -
 net/ipv4/ip_fragment.c                         | 1 -
 net/ipv4/ip_gre.c                              | 3 ---
 net/ipv4/ip_vti.c                              | 1 -
 net/ipv4/ipip.c                                | 1 -
 net/ipv4/ipmr.c                                | 1 -
 net/ipv4/netfilter/arp_tables.c                | 1 -
 net/ipv4/netfilter/arptable_filter.c           | 1 -
 net/ipv4/netfilter/ip_tables.c                 | 1 -
 net/ipv4/netfilter/ipt_CLUSTERIP.c             | 1 -
 net/ipv4/netfilter/iptable_filter.c            | 1 -
 net/ipv4/netfilter/iptable_mangle.c            | 1 -
 net/ipv4/netfilter/iptable_nat.c               | 1 -
 net/ipv4/netfilter/iptable_raw.c               | 1 -
 net/ipv4/netfilter/iptable_security.c          | 1 -
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 1 -
 net/ipv4/netfilter/nf_defrag_ipv4.c            | 1 -
 net/ipv4/netfilter/nf_log_arp.c                | 1 -
 net/ipv4/netfilter/nf_log_ipv4.c               | 1 -
 net/ipv4/ping.c                                | 1 -
 net/ipv4/proc.c                                | 1 -
 net/ipv4/raw.c                                 | 1 -
 net/ipv4/route.c                               | 4 ----
 net/ipv4/sysctl_net_ipv4.c                     | 1 -
 net/ipv4/tcp_ipv4.c                            | 2 --
 net/ipv4/tcp_metrics.c                         | 1 -
 net/ipv4/udp.c                                 | 2 --
 net/ipv4/udplite.c                             | 1 -
 net/ipv4/xfrm4_policy.c                        | 1 -
 net/ipv6/addrconf.c                            | 2 --
 net/ipv6/addrlabel.c                           | 1 -
 net/ipv6/af_inet6.c                            | 1 -
 net/ipv6/fib6_rules.c                          | 1 -
 net/ipv6/icmp.c                                | 1 -
 net/ipv6/ila/ila_xlat.c                        | 1 -
 net/ipv6/ip6_fib.c                             | 1 -
 net/ipv6/ip6_flowlabel.c                       | 1 -
 net/ipv6/ip6_gre.c                             | 1 -
 net/ipv6/ip6_tunnel.c                          | 1 -
 net/ipv6/ip6_vti.c                             | 1 -
 net/ipv6/ip6mr.c                               | 1 -
 net/ipv6/mcast.c                               | 1 -
 net/ipv6/ndisc.c                               | 1 -
 net/ipv6/netfilter/ip6_tables.c                | 1 -
 net/ipv6/netfilter/ip6table_filter.c           | 1 -
 net/ipv6/netfilter/ip6table_mangle.c           | 1 -
 net/ipv6/netfilter/ip6table_nat.c              | 1 -
 net/ipv6/netfilter/ip6table_raw.c              | 1 -
 net/ipv6/netfilter/ip6table_security.c         | 1 -
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 1 -
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 1 -
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      | 1 -
 net/ipv6/netfilter/nf_log_ipv6.c               | 1 -
 net/ipv6/ping.c                                | 1 -
 net/ipv6/proc.c                                | 1 -
 net/ipv6/raw.c                                 | 1 -
 net/ipv6/reassembly.c                          | 1 -
 net/ipv6/route.c                               | 3 ---
 net/ipv6/seg6.c                                | 1 -
 net/ipv6/sit.c                                 | 1 -
 net/ipv6/sysctl_net_ipv6.c                     | 1 -
 net/ipv6/tcp_ipv6.c                            | 1 -
 net/ipv6/udplite.c                             | 1 -
 net/ipv6/xfrm6_policy.c                        | 1 -
 net/ipv6/xfrm6_tunnel.c                        | 1 -
 net/kcm/kcmproc.c                              | 1 -
 net/kcm/kcmsock.c                              | 1 -
 net/key/af_key.c                               | 1 -
 net/l2tp/l2tp_core.c                           | 1 -
 net/l2tp/l2tp_ppp.c                            | 1 -
 net/mpls/af_mpls.c                             | 1 -
 net/netfilter/core.c                           | 1 -
 net/netfilter/ipset/ip_set_core.c              | 1 -
 net/netfilter/ipvs/ip_vs_core.c                | 2 --
 net/netfilter/ipvs/ip_vs_ftp.c                 | 1 -
 net/netfilter/ipvs/ip_vs_lblc.c                | 1 -
 net/netfilter/ipvs/ip_vs_lblcr.c               | 1 -
 net/netfilter/nf_conntrack_netlink.c           | 1 -
 net/netfilter/nf_conntrack_proto_gre.c         | 1 -
 net/netfilter/nf_conntrack_standalone.c        | 1 -
 net/netfilter/nf_log.c                         | 1 -
 net/netfilter/nf_log_netdev.c                  | 1 -
 net/netfilter/nf_synproxy_core.c               | 1 -
 net/netfilter/nf_tables_api.c                  | 1 -
 net/netfilter/nfnetlink.c                      | 1 -
 net/netfilter/nfnetlink_acct.c                 | 1 -
 net/netfilter/nfnetlink_cttimeout.c            | 1 -
 net/netfilter/nfnetlink_log.c                  | 1 -
 net/netfilter/nfnetlink_queue.c                | 1 -
 net/netfilter/x_tables.c                       | 1 -
 net/netfilter/xt_hashlimit.c                   | 1 -
 net/netfilter/xt_recent.c                      | 1 -
 net/netlink/af_netlink.c                       | 2 --
 net/netlink/genetlink.c                        | 1 -
 net/openvswitch/datapath.c                     | 1 -
 net/packet/af_packet.c                         | 1 -
 net/phonet/pn_dev.c                            | 1 -
 net/rds/tcp.c                                  | 1 -
 net/rxrpc/net_ns.c                             | 1 -
 net/sched/act_api.c                            | 1 -
 net/sched/act_bpf.c                            | 1 -
 net/sched/act_connmark.c                       | 1 -
 net/sched/act_csum.c                           | 1 -
 net/sched/act_gact.c                           | 1 -
 net/sched/act_ife.c                            | 1 -
 net/sched/act_ipt.c                            | 2 --
 net/sched/act_mirred.c                         | 1 -
 net/sched/act_nat.c                            | 1 -
 net/sched/act_pedit.c                          | 1 -
 net/sched/act_police.c                         | 1 -
 net/sched/act_sample.c                         | 1 -
 net/sched/act_simple.c                         | 1 -
 net/sched/act_skbedit.c                        | 1 -
 net/sched/act_skbmod.c                         | 1 -
 net/sched/act_tunnel_key.c                     | 1 -
 net/sched/act_vlan.c                           | 1 -
 net/sched/cls_api.c                            | 1 -
 net/sched/sch_api.c                            | 1 -
 net/sctp/protocol.c                            | 2 --
 net/sunrpc/auth_gss/auth_gss.c                 | 1 -
 net/sunrpc/sunrpc_syms.c                       | 1 -
 net/sysctl_net.c                               | 1 -
 net/tipc/core.c                                | 1 -
 net/unix/af_unix.c                             | 1 -
 net/wireless/core.c                            | 1 -
 net/wireless/wext-core.c                       | 1 -
 net/xfrm/xfrm_policy.c                         | 1 -
 net/xfrm/xfrm_user.c                           | 1 -
 security/selinux/hooks.c                       | 1 -
 security/smack/smack_netfilter.c               | 1 -
 182 files changed, 206 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 66f203730e80..6ab1059fed66 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4554,7 +4554,6 @@ static struct pernet_operations cma_pernet_operations = {
 	.exit = cma_exit_net,
 	.id = &cma_pernet_id,
 	.size = sizeof(struct cma_pernet),
-	.async = true,
 };
 
 static int __init cma_init(void)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4c19d23dd282..c669554d70bb 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4791,7 +4791,6 @@ static struct pernet_operations bond_net_ops = {
 	.exit = bond_net_exit,
 	.id   = &bond_net_id,
 	.size = sizeof(struct bond_net),
-	.async = true,
 };
 
 static int __init bonding_init(void)
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 516dd59249d7..b919e89a9b93 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1694,7 +1694,6 @@ static struct pernet_operations geneve_net_ops = {
 	.exit_batch = geneve_exit_batch_net,
 	.id   = &geneve_net_id,
 	.size = sizeof(struct geneve_net),
-	.async = true,
 };
 
 static int __init geneve_init_module(void)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 127edd23018f..f38e32a7ec9c 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1325,7 +1325,6 @@ static struct pernet_operations gtp_net_ops = {
 	.exit	= gtp_net_exit,
 	.id	= &gtp_net_id,
 	.size	= sizeof(struct gtp_net),
-	.async	= true,
 };
 
 static int __init gtp_init(void)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 743d37fb034a..450eec264a5e 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -1040,7 +1040,6 @@ static struct pernet_operations ipvlan_net_ops = {
 	.id = &ipvlan_netid,
 	.size = sizeof(struct ipvlan_netns),
 	.exit = ipvlan_ns_exit,
-	.async = true,
 };
 
 static int __init ipvlan_init_module(void)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index b97a907ea5aa..30612497643c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -230,5 +230,4 @@ out:
 /* Registered in net/core/dev.c */
 struct pernet_operations __net_initdata loopback_net_ops = {
 	.init = loopback_net_init,
-	.async = true,
 };
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 22fcff3c7a9a..dc7c7ec43202 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -970,7 +970,6 @@ static struct pernet_operations ppp_net_ops = {
 	.exit = ppp_exit_net,
 	.id   = &ppp_net_id,
 	.size = sizeof(struct ppp_net),
-	.async = true,
 };
 
 static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index f9552a400271..1483bc7b01e1 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1161,7 +1161,6 @@ static struct pernet_operations pppoe_net_ops = {
 	.exit = pppoe_exit_net,
 	.id   = &pppoe_net_id,
 	.size = sizeof(struct pppoe_net),
-	.async = true,
 };
 
 static int __init pppoe_init(void)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index c6be49d3a9eb..102582459bef 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1435,7 +1435,6 @@ static struct pernet_operations vrf_net_ops __net_initdata = {
 	.init = vrf_netns_init,
 	.id   = &vrf_net_id,
 	.size = sizeof(bool),
-	.async = true,
 };
 
 static int __init vrf_init_module(void)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index aa5f034d6ad1..fab7a4db249e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3752,7 +3752,6 @@ static struct pernet_operations vxlan_net_ops = {
 	.exit_batch = vxlan_exit_batch_net,
 	.id   = &vxlan_net_id,
 	.size = sizeof(struct vxlan_net),
-	.async = true,
 };
 
 static int __init vxlan_init_module(void)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 100cf42db65d..a37f4b1d9d30 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3542,7 +3542,6 @@ static struct pernet_operations hwsim_net_ops = {
 	.exit = hwsim_exit_net,
 	.id   = &hwsim_net_id,
 	.size = sizeof(struct hwsim_net),
-	.async = true,
 };
 
 static void hwsim_exit_netlink(void)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2dee4e03ff1c..9c36d614bf89 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -709,7 +709,6 @@ static struct pernet_operations lockd_net_ops = {
 	.exit = lockd_exit_net,
 	.id = &lockd_net_id,
 	.size = sizeof(struct lockd_net),
-	.async = true,
 };
 
 
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index ef9fa111b009..9fb067a6f7e0 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -261,7 +261,6 @@ static void nfs4blocklayout_net_exit(struct net *net)
 static struct pernet_operations nfs4blocklayout_net_ops = {
 	.init = nfs4blocklayout_net_init,
 	.exit = nfs4blocklayout_net_exit,
-	.async = true,
 };
 
 int __init bl_init_pipefs(void)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index e90bd69ab653..060c658eab66 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -410,7 +410,6 @@ static void nfs4_dns_net_exit(struct net *net)
 static struct pernet_operations nfs4_dns_resolver_ops = {
 	.init = nfs4_dns_net_init,
 	.exit = nfs4_dns_net_exit,
-	.async = true,
 };
 
 static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6c3083c992e5..7d893543cf3b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2122,7 +2122,6 @@ static struct pernet_operations nfs_net_ops = {
 	.exit = nfs_net_exit,
 	.id   = &nfs_net_id,
 	.size = sizeof(struct nfs_net),
-	.async = true,
 };
 
 /*
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 8c743a405df6..5be08f02a76b 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -118,7 +118,6 @@ static struct pernet_operations grace_net_ops = {
 	.exit = grace_exit_net,
 	.id   = &grace_net_id,
 	.size = sizeof(struct list_head),
-	.async = true,
 };
 
 static int __init
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1e3824e6cce0..d107b4426f7e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1263,7 +1263,6 @@ static struct pernet_operations nfsd_net_ops = {
 	.exit = nfsd_exit_net,
 	.id   = &nfsd_net_id,
 	.size = sizeof(struct nfsd_net),
-	.async = true,
 };
 
 static int __init init_nfsd(void)
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index da6f8733c9c5..68c06ae7888c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -237,7 +237,6 @@ static __net_exit void proc_net_ns_exit(struct net *net)
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
 	.init = proc_net_ns_init,
 	.exit = proc_net_ns_exit,
-	.async = true,
 };
 
 int __init proc_net_init(void)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 09e30bdc7876..37bcf8382b61 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -333,12 +333,6 @@ struct pernet_operations {
 	void (*exit_batch)(struct list_head *net_exit_list);
 	unsigned int *id;
 	size_t size;
-	/*
-	 * Indicates above methods are allowed to be executed in parallel
-	 * with methods of any other pernet_operations, i.e. they are not
-	 * need write locked net_sem.
-	 */
-	bool async;
 };
 
 /*
diff --git a/kernel/audit.c b/kernel/audit.c
index 5e49b614d0e6..227db99b0f19 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1526,7 +1526,6 @@ static struct pernet_operations audit_net_ops __net_initdata = {
 	.exit = audit_net_exit,
 	.id = &audit_net_id,
 	.size = sizeof(struct audit_net),
-	.async = true,
 };
 
 /* Initialize audit support at boot time. */
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index fa10ad8e9b17..15ea216a67ce 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -724,7 +724,6 @@ static void uevent_net_exit(struct net *net)
 static struct pernet_operations uevent_net_ops = {
 	.init	= uevent_net_init,
 	.exit	= uevent_net_exit,
-	.async  = true,
 };
 
 static int __init kobject_uevent_init(void)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bd0ed39f65fb..bad01b14a4ad 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -729,7 +729,6 @@ static struct pernet_operations vlan_net_ops = {
 	.exit = vlan_exit_net,
 	.id   = &vlan_net_id,
 	.size = sizeof(struct vlan_net),
-	.async = true,
 };
 
 static int __init vlan_proto_init(void)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a3f95ab9d6a3..26e1616b2c90 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -188,7 +188,6 @@ static void __net_exit br_net_exit(struct net *net)
 
 static struct pernet_operations br_net_ops = {
 	.exit	= br_net_exit,
-	.async	= true,
 };
 
 static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c2120eb889a9..9b16eaf33819 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -969,7 +969,6 @@ static struct pernet_operations brnf_net_ops __read_mostly = {
 	.exit = brnf_exit_net,
 	.id   = &brnf_net_id,
 	.size = sizeof(struct brnf_net),
-	.async = true,
 };
 
 static struct notifier_block brnf_notifier __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index f070b5e5b9dd..276b60262981 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -77,7 +77,6 @@ static void __net_exit broute_net_exit(struct net *net)
 static struct pernet_operations broute_net_ops = {
 	.init = broute_net_init,
 	.exit = broute_net_exit,
-	.async = true,
 };
 
 static int __init ebtable_broute_init(void)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 4151afc8efcc..c41da5fac84f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -105,7 +105,6 @@ static void __net_exit frame_filter_net_exit(struct net *net)
 static struct pernet_operations frame_filter_net_ops = {
 	.init = frame_filter_net_init,
 	.exit = frame_filter_net_exit,
-	.async = true,
 };
 
 static int __init ebtable_filter_init(void)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index b8da2dfe2ec5..08df7406ecb3 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -105,7 +105,6 @@ static void __net_exit frame_nat_net_exit(struct net *net)
 static struct pernet_operations frame_nat_net_ops = {
 	.init = frame_nat_net_init,
 	.exit = frame_nat_net_exit,
-	.async = true,
 };
 
 static int __init ebtable_nat_init(void)
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index 91bfc2ac055a..bd2b3c78f59b 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -48,7 +48,6 @@ static void __net_exit nf_log_bridge_net_exit(struct net *net)
 static struct pernet_operations nf_log_bridge_net_ops = {
 	.init = nf_log_bridge_net_init,
 	.exit = nf_log_bridge_net_exit,
-	.async = true,
 };
 
 static int __init nf_log_bridge_init(void)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 7a78268cc572..e0adcd123f48 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -544,7 +544,6 @@ static struct pernet_operations caif_net_ops = {
 	.exit = caif_exit_net,
 	.id   = &caif_net_id,
 	.size = sizeof(struct caif_net),
-	.async = true,
 };
 
 /* Initialize Caif devices list */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 2f0d0a72e4b5..1684ba5b51eb 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -954,7 +954,6 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
 static struct pernet_operations can_pernet_ops __read_mostly = {
 	.init = can_pernet_init,
 	.exit = can_pernet_exit,
-	.async = true,
 };
 
 static __init int can_init(void)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 26730d39e048..ac5e5e34fee3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1717,7 +1717,6 @@ static void canbcm_pernet_exit(struct net *net)
 static struct pernet_operations canbcm_pernet_ops __read_mostly = {
 	.init = canbcm_pernet_init,
 	.exit = canbcm_pernet_exit,
-	.async = true,
 };
 
 static int __init bcm_module_init(void)
diff --git a/net/can/gw.c b/net/can/gw.c
index 8d71e199d5b3..faa3da88a127 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1010,7 +1010,6 @@ static void __net_exit cangw_pernet_exit(struct net *net)
 static struct pernet_operations cangw_pernet_ops = {
 	.init = cangw_pernet_init,
 	.exit = cangw_pernet_exit,
-	.async = true,
 };
 
 static __init int cgw_module_init(void)
diff --git a/net/core/dev.c b/net/core/dev.c
index 97a96df4b6da..e13807b5c84d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8883,7 +8883,6 @@ static void __net_exit netdev_exit(struct net *net)
 static struct pernet_operations __net_initdata netdev_net_ops = {
 	.init = netdev_init,
 	.exit = netdev_exit,
-	.async = true,
 };
 
 static void __net_exit default_device_exit(struct net *net)
@@ -8984,7 +8983,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
 static struct pernet_operations __net_initdata default_device_ops = {
 	.exit = default_device_exit,
 	.exit_batch = default_device_exit_batch,
-	.async = true,
 };
 
 /*
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 5ace0705a3f9..0c048bdeb016 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -171,7 +171,6 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
 static struct pernet_operations fib_notifier_net_ops = {
 	.init = fib_notifier_net_init,
 	.exit = fib_notifier_net_exit,
-	.async = true,
 };
 
 static int __init fib_notifier_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f6f04fc0f629..9d87ce868402 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1130,7 +1130,6 @@ static void __net_exit fib_rules_net_exit(struct net *net)
 static struct pernet_operations fib_rules_net_ops = {
 	.init = fib_rules_net_init,
 	.exit = fib_rules_net_exit,
-	.async = true,
 };
 
 static int __init fib_rules_init(void)
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index dd6ae431d038..9737302907b1 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -349,7 +349,6 @@ static void __net_exit dev_proc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_proc_ops = {
 	.init = dev_proc_net_init,
 	.exit = dev_proc_net_exit,
-	.async = true,
 };
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
@@ -406,7 +405,6 @@ static void __net_exit dev_mc_net_exit(struct net *net)
 static struct pernet_operations __net_initdata dev_mc_net_ops = {
 	.init = dev_mc_net_init,
 	.exit = dev_mc_net_exit,
-	.async = true,
 };
 
 int __init dev_proc_init(void)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 0f614523a13f..eef17ad29dea 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -338,7 +338,6 @@ static int __net_init net_defaults_init_net(struct net *net)
 
 static struct pernet_operations net_defaults_ops = {
 	.init = net_defaults_init_net,
-	.async = true,
 };
 
 static __init int net_defaults_init(void)
@@ -628,7 +627,6 @@ static __net_exit void net_ns_net_exit(struct net *net)
 static struct pernet_operations __net_initdata net_ns_ops = {
 	.init = net_ns_net_init,
 	.exit = net_ns_net_exit,
-	.async = true,
 };
 
 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 545cf08cd558..7e4ede34cc52 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3852,7 +3852,6 @@ static struct pernet_operations pg_net_ops = {
 	.exit = pg_net_exit,
 	.id   = &pg_net_id,
 	.size = sizeof(struct pktgen_net),
-	.async = true,
 };
 
 static int __init pg_init(void)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 87079eaa871b..31438b63d4b4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4730,7 +4730,6 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
 static struct pernet_operations rtnetlink_net_ops = {
 	.init = rtnetlink_net_init,
 	.exit = rtnetlink_net_exit,
-	.async = true,
 };
 
 void __init rtnetlink_init(void)
diff --git a/net/core/sock.c b/net/core/sock.c
index 8cee2920a47f..6444525f610c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3175,7 +3175,6 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
 static struct pernet_operations net_inuse_ops = {
 	.init = sock_inuse_init_net,
 	.exit = sock_inuse_exit_net,
-	.async = true,
 };
 
 static __init int net_inuse_init(void)
@@ -3470,7 +3469,6 @@ static __net_exit void proto_exit_net(struct net *net)
 static __net_initdata struct pernet_operations proto_net_ops = {
 	.init = proto_init_net,
 	.exit = proto_exit_net,
-	.async = true,
 };
 
 static int __init proto_init(void)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a3392a8f9276..c37b5be7c5e4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -324,7 +324,6 @@ static void __net_exit diag_net_exit(struct net *net)
 static struct pernet_operations diag_net_ops = {
 	.init = diag_net_init,
 	.exit = diag_net_exit,
-	.async = true,
 };
 
 static int __init sock_diag_init(void)
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4f47f92459cc..b3b609f0eeb5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -584,7 +584,6 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_core_ops = {
 	.init = sysctl_core_net_init,
 	.exit = sysctl_core_net_exit,
-	.async = true,
 };
 
 static __init int sysctl_core_init(void)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 13ad28ab1e79..e65fcb45c3f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1031,7 +1031,6 @@ static struct pernet_operations dccp_v4_ops = {
 	.init	= dccp_v4_init_net,
 	.exit	= dccp_v4_exit_net,
 	.exit_batch = dccp_v4_exit_batch,
-	.async	= true,
 };
 
 static int __init dccp_v4_init(void)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 2f48c020f8c3..5df7857fc0f3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1116,7 +1116,6 @@ static struct pernet_operations dccp_v6_ops = {
 	.init   = dccp_v6_init_net,
 	.exit   = dccp_v6_exit_net,
 	.exit_batch = dccp_v6_exit_batch,
-	.async	= true,
 };
 
 static int __init dccp_v6_init(void)
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index a9ccb1322f69..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -603,7 +603,6 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
 static struct pernet_operations lowpan_frags_ops = {
 	.init = lowpan_frags_init_net,
 	.exit = lowpan_frags_exit_net,
-	.async = true,
 };
 
 int __init lowpan_net_frag_init(void)
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 9104943c15ba..cb7176cd4cd6 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -345,7 +345,6 @@ static void __net_exit cfg802154_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg802154_pernet_ops = {
 	.exit = cfg802154_pernet_exit,
-	.async = true,
 };
 
 static int __init wpan_phy_class_init(void)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e8c7fad8c329..f98e2f0db841 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1735,7 +1735,6 @@ static __net_exit void ipv4_mib_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
 	.init = ipv4_mib_init_net,
 	.exit = ipv4_mib_exit_net,
-	.async = true,
 };
 
 static int __init init_ipv4_mibs(void)
@@ -1789,7 +1788,6 @@ static __net_exit void inet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations af_inet_ops = {
 	.init = inet_init_net,
 	.exit = inet_exit_net,
-	.async = true,
 };
 
 static int __init init_inet_pernet_ops(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4c6ba0fb2630..be4c595edccb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1447,7 +1447,6 @@ static void __net_exit arp_net_exit(struct net *net)
 static struct pernet_operations arp_net_ops = {
 	.init = arp_net_init,
 	.exit = arp_net_exit,
-	.async = true,
 };
 
 static int __init arp_proc_init(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5ae0d1f097ca..40f001782c1b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2469,7 +2469,6 @@ static __net_exit void devinet_exit_net(struct net *net)
 static __net_initdata struct pernet_operations devinet_ops = {
 	.init = devinet_init_net,
 	.exit = devinet_exit_net,
-	.async = true,
 };
 
 static struct rtnl_af_ops inet_af_ops __read_mostly = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ac71c3d496c0..f05afaf3235c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1362,7 +1362,6 @@ static void __net_exit fib_net_exit(struct net *net)
 static struct pernet_operations fib_net_ops = {
 	.init = fib_net_init,
 	.exit = fib_net_exit,
-	.async = true,
 };
 
 void __init ip_fib_init(void)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index d3e1a9af478b..1540db65241a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1081,7 +1081,6 @@ static struct pernet_operations fou_net_ops = {
 	.exit = fou_exit_net,
 	.id   = &fou_net_id,
 	.size = sizeof(struct fou_net),
-	.async = true,
 };
 
 static int __init fou_init(void)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index cc56efa64d5c..1617604c9284 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1257,7 +1257,6 @@ fail:
 static struct pernet_operations __net_initdata icmp_sk_ops = {
        .init = icmp_sk_init,
        .exit = icmp_sk_exit,
-       .async = true,
 };
 
 int __init icmp_init(void)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f17cd83ba164..b26a81a7de42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -3028,7 +3028,6 @@ static void __net_exit igmp_net_exit(struct net *net)
 static struct pernet_operations igmp_net_ops = {
 	.init = igmp_net_init,
 	.exit = igmp_net_exit,
-	.async = true,
 };
 #endif
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 5e843ae5e468..bbf1b94942c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -885,7 +885,6 @@ static void __net_exit ipv4_frags_exit_net(struct net *net)
 static struct pernet_operations ip4_frags_ops = {
 	.init = ipv4_frags_init_net,
 	.exit = ipv4_frags_exit_net,
-	.async = true,
 };
 
 void __init ipfrag_init(void)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9ab1aa2f7660..a8772a978224 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1044,7 +1044,6 @@ static struct pernet_operations ipgre_net_ops = {
 	.exit_batch = ipgre_exit_batch_net,
 	.id   = &ipgre_net_id,
 	.size = sizeof(struct ip_tunnel_net),
-	.async = true,
 };
 
 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1628,7 +1627,6 @@ static struct pernet_operations ipgre_tap_net_ops = {
 	.exit_batch = ipgre_tap_exit_batch_net,
 	.id   = &gre_tap_net_id,
 	.size = sizeof(struct ip_tunnel_net),
-	.async = true,
 };
 
 static int __net_init erspan_init_net(struct net *net)
@@ -1647,7 +1645,6 @@ static struct pernet_operations erspan_net_ops = {
 	.exit_batch = erspan_exit_batch_net,
 	.id   = &erspan_net_id,
 	.size = sizeof(struct ip_tunnel_net),
-	.async = true,
 };
 
 static int __init ipgre_init(void)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index b10bf563afd9..51b1669334fe 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -454,7 +454,6 @@ static struct pernet_operations vti_net_ops = {
 	.exit_batch = vti_exit_batch_net,
 	.id   = &vti_net_id,
 	.size = sizeof(struct ip_tunnel_net),
-	.async = true,
 };
 
 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 9c5a4d164f09..c891235b4966 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -669,7 +669,6 @@ static struct pernet_operations ipip_net_ops = {
 	.exit_batch = ipip_exit_batch_net,
 	.id   = &ipip_net_id,
 	.size = sizeof(struct ip_tunnel_net),
-	.async = true,
 };
 
 static int __init ipip_init(void)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e79211a8537c..2fb4de3f7f66 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3009,7 +3009,6 @@ static void __net_exit ipmr_net_exit(struct net *net)
 static struct pernet_operations ipmr_net_ops = {
 	.init = ipmr_net_init,
 	.exit = ipmr_net_exit,
-	.async = true,
 };
 
 int __init ip_mr_init(void)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c36ffce3c812..e3e420f3ba7b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1635,7 +1635,6 @@ static void __net_exit arp_tables_net_exit(struct net *net)
 static struct pernet_operations arp_tables_net_ops = {
 	.init = arp_tables_net_init,
 	.exit = arp_tables_net_exit,
-	.async = true,
 };
 
 static int __init arp_tables_init(void)
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 49c2490193ae..8f8713b4388f 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -65,7 +65,6 @@ static void __net_exit arptable_filter_net_exit(struct net *net)
 
 static struct pernet_operations arptable_filter_net_ops = {
 	.exit = arptable_filter_net_exit,
-	.async = true,
 };
 
 static int __init arptable_filter_init(void)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d4f7584d2dbe..e38395a8dcf2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1916,7 +1916,6 @@ static void __net_exit ip_tables_net_exit(struct net *net)
 static struct pernet_operations ip_tables_net_ops = {
 	.init = ip_tables_net_init,
 	.exit = ip_tables_net_exit,
-	.async = true,
 };
 
 static int __init ip_tables_init(void)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 31b4cca588d0..2c8d313ae216 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -845,7 +845,6 @@ static struct pernet_operations clusterip_net_ops = {
 	.exit = clusterip_net_exit,
 	.id   = &clusterip_net_id,
 	.size = sizeof(struct clusterip_net),
-	.async = true,
 };
 
 static int __init clusterip_tg_init(void)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c1c136a93911..9ac92ea7b93c 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -87,7 +87,6 @@ static void __net_exit iptable_filter_net_exit(struct net *net)
 static struct pernet_operations iptable_filter_net_ops = {
 	.init = iptable_filter_net_init,
 	.exit = iptable_filter_net_exit,
-	.async = true,
 };
 
 static int __init iptable_filter_init(void)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index f6074059531a..dea138ca8925 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -113,7 +113,6 @@ static void __net_exit iptable_mangle_net_exit(struct net *net)
 
 static struct pernet_operations iptable_mangle_net_ops = {
 	.exit = iptable_mangle_net_exit,
-	.async = true,
 };
 
 static int __init iptable_mangle_init(void)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index b771af74be79..0f7255cc65ee 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -129,7 +129,6 @@ static void __net_exit iptable_nat_net_exit(struct net *net)
 
 static struct pernet_operations iptable_nat_net_ops = {
 	.exit	= iptable_nat_net_exit,
-	.async	= true,
 };
 
 static int __init iptable_nat_init(void)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 963753e50842..960625aabf04 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -76,7 +76,6 @@ static void __net_exit iptable_raw_net_exit(struct net *net)
 
 static struct pernet_operations iptable_raw_net_ops = {
 	.exit = iptable_raw_net_exit,
-	.async = true,
 };
 
 static int __init iptable_raw_init(void)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index c40d6b3d8b6a..e5379fe57b64 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -76,7 +76,6 @@ static void __net_exit iptable_security_net_exit(struct net *net)
 
 static struct pernet_operations iptable_security_net_ops = {
 	.exit = iptable_security_net_exit,
-	.async = true,
 };
 
 static int __init iptable_security_init(void)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 6531f69db010..b50721d9d30e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -399,7 +399,6 @@ static struct pernet_operations ipv4_net_ops = {
 	.exit = ipv4_net_exit,
 	.id = &conntrack4_net_id,
 	.size = sizeof(struct conntrack4_net),
-	.async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv4_init(void)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 57244b62a4fc..a0d3ad60a411 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -118,7 +118,6 @@ static void __net_exit defrag4_net_exit(struct net *net)
 
 static struct pernet_operations defrag4_net_ops = {
 	.exit = defrag4_net_exit,
-	.async = true,
 };
 
 static int __init nf_defrag_init(void)
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 162293469ac2..df5c2a2061a4 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -122,7 +122,6 @@ static void __net_exit nf_log_arp_net_exit(struct net *net)
 static struct pernet_operations nf_log_arp_net_ops = {
 	.init = nf_log_arp_net_init,
 	.exit = nf_log_arp_net_exit,
-	.async = true,
 };
 
 static int __init nf_log_arp_init(void)
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 7a06de140f3c..4388de0e5380 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -358,7 +358,6 @@ static void __net_exit nf_log_ipv4_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv4_net_ops = {
 	.init = nf_log_ipv4_net_init,
 	.exit = nf_log_ipv4_net_exit,
-	.async = true,
 };
 
 static int __init nf_log_ipv4_init(void)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1f24bc8273a0..05e47d777009 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1204,7 +1204,6 @@ static void __net_exit ping_v4_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v4_net_ops = {
 	.init = ping_v4_proc_init_net,
 	.exit = ping_v4_proc_exit_net,
-	.async = true,
 };
 
 int __init ping_proc_init(void)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 80de2e659dcb..adfb75340275 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -549,7 +549,6 @@ static __net_exit void ip_proc_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ip_proc_ops = {
 	.init = ip_proc_init_net,
 	.exit = ip_proc_exit_net,
-	.async = true,
 };
 
 int __init ip_misc_proc_init(void)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 0ee2501a9027..1b4d3355624a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1154,7 +1154,6 @@ static __net_exit void raw_exit_net(struct net *net)
 static __net_initdata struct pernet_operations raw_net_ops = {
 	.init = raw_init_net,
 	.exit = raw_exit_net,
-	.async = true,
 };
 
 int __init raw_proc_init(void)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ce9bd5380d21..8322e479f299 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -418,7 +418,6 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
 	.init = ip_rt_do_proc_init,
 	.exit = ip_rt_do_proc_exit,
-	.async = true,
 };
 
 static int __init ip_rt_proc_init(void)
@@ -3017,7 +3016,6 @@ static __net_exit void sysctl_route_net_exit(struct net *net)
 static __net_initdata struct pernet_operations sysctl_route_ops = {
 	.init = sysctl_route_net_init,
 	.exit = sysctl_route_net_exit,
-	.async = true,
 };
 #endif
 
@@ -3031,7 +3029,6 @@ static __net_init int rt_genid_init(struct net *net)
 
 static __net_initdata struct pernet_operations rt_genid_ops = {
 	.init = rt_genid_init,
-	.async = true,
 };
 
 static int __net_init ipv4_inetpeer_init(struct net *net)
@@ -3057,7 +3054,6 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net)
 static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
 	.init	=	ipv4_inetpeer_init,
 	.exit	=	ipv4_inetpeer_exit,
-	.async	=	true,
 };
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5b72d97693f8..4b195bac8ac0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1219,7 +1219,6 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 	.init = ipv4_sysctl_init_net,
 	.exit = ipv4_sysctl_exit_net,
-	.async = true,
 };
 
 static __init int sysctl_ipv4_init(void)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fec8b1fd7b63..9639334ebb7c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2391,7 +2391,6 @@ static void __net_exit tcp4_proc_exit_net(struct net *net)
 static struct pernet_operations tcp4_net_ops = {
 	.init = tcp4_proc_init_net,
 	.exit = tcp4_proc_exit_net,
-	.async = true,
 };
 
 int __init tcp4_proc_init(void)
@@ -2578,7 +2577,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
        .init	   = tcp_sk_init,
        .exit	   = tcp_sk_exit,
        .exit_batch = tcp_sk_exit_batch,
-       .async	   = true,
 };
 
 void __init tcp_v4_init(void)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index aa6fea9f3328..03b51cdcc731 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1024,7 +1024,6 @@ static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_lis
 static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
 	.init		=	tcp_net_metrics_init,
 	.exit_batch	=	tcp_net_metrics_exit_batch,
-	.async		=	true,
 };
 
 void __init tcp_metrics_init(void)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fb8f3a36bd14..f49e14cd3891 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2756,7 +2756,6 @@ static void __net_exit udp4_proc_exit_net(struct net *net)
 static struct pernet_operations udp4_net_ops = {
 	.init = udp4_proc_init_net,
 	.exit = udp4_proc_exit_net,
-	.async = true,
 };
 
 int __init udp4_proc_init(void)
@@ -2843,7 +2842,6 @@ static int __net_init udp_sysctl_init(struct net *net)
 
 static struct pernet_operations __net_initdata udp_sysctl_ops = {
 	.init	= udp_sysctl_init,
-	.async	= true,
 };
 
 void __init udp_init(void)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 72f2c3806408..f96614e9b9a5 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -104,7 +104,6 @@ static void __net_exit udplite4_proc_exit_net(struct net *net)
 static struct pernet_operations udplite4_net_ops = {
 	.init = udplite4_proc_init_net,
 	.exit = udplite4_proc_exit_net,
-	.async = true,
 };
 
 static __init int udplite4_proc_init(void)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6c76a757fa4a..d73a6d6652f6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -367,7 +367,6 @@ static void __net_exit xfrm4_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm4_net_ops = {
 	.init	= xfrm4_net_init,
 	.exit	= xfrm4_net_exit,
-	.async	= true,
 };
 
 static void __init xfrm4_policy_init(void)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 189eac80f4ef..78cef00c9596 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4282,7 +4282,6 @@ static void __net_exit if6_proc_net_exit(struct net *net)
 static struct pernet_operations if6_proc_net_ops = {
 	.init = if6_proc_net_init,
 	.exit = if6_proc_net_exit,
-	.async = true,
 };
 
 int __init if6_proc_init(void)
@@ -6592,7 +6591,6 @@ static void __net_exit addrconf_exit_net(struct net *net)
 static struct pernet_operations addrconf_ops = {
 	.init = addrconf_init_net,
 	.exit = addrconf_exit_net,
-	.async = true,
 };
 
 static struct rtnl_af_ops inet6_ops __read_mostly = {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ba2e63633370..1d6ced37ad71 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -344,7 +344,6 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
 static struct pernet_operations ipv6_addr_label_ops = {
 	.init = ip6addrlbl_net_init,
 	.exit = ip6addrlbl_net_exit,
-	.async = true,
 };
 
 int __init ipv6_addr_label_init(void)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index dbbe04018813..c1e292db04db 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -857,7 +857,6 @@ static void __net_exit inet6_net_exit(struct net *net)
 static struct pernet_operations inet6_net_ops = {
 	.init = inet6_net_init,
 	.exit = inet6_net_exit,
-	.async = true,
 };
 
 static const struct ipv6_stub ipv6_stub_impl = {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 00ef9467f3c0..df113c7b5fc8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -397,7 +397,6 @@ static void __net_exit fib6_rules_net_exit(struct net *net)
 static struct pernet_operations fib6_rules_net_ops = {
 	.init = fib6_rules_net_init,
 	.exit = fib6_rules_net_exit,
-	.async = true,
 };
 
 int __init fib6_rules_init(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6f84668be6ea..d8c4b6374377 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -998,7 +998,6 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
 static struct pernet_operations icmpv6_sk_ops = {
 	.init = icmpv6_sk_init,
 	.exit = icmpv6_sk_exit,
-	.async = true,
 };
 
 int __init icmpv6_init(void)
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e438699f000f..44c39c5f0638 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -613,7 +613,6 @@ static struct pernet_operations ila_net_ops = {
 	.exit = ila_exit_net,
 	.id   = &ila_net_id,
 	.size = sizeof(struct ila_net),
-	.async = true,
 };
 
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2f995e9e3050..908b8e5b615a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2161,7 +2161,6 @@ static void fib6_net_exit(struct net *net)
 static struct pernet_operations fib6_net_ops = {
 	.init = fib6_net_init,
 	.exit = fib6_net_exit,
-	.async = true,
 };
 
 int __init fib6_init(void)
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f75b06ba8325..c05c4e82a7ca 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -873,7 +873,6 @@ static void __net_exit ip6_flowlabel_net_exit(struct net *net)
 static struct pernet_operations ip6_flowlabel_net_ops = {
 	.init = ip6_flowlabel_proc_init,
 	.exit = ip6_flowlabel_net_exit,
-	.async = true,
 };
 
 int ip6_flowlabel_init(void)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3a98c694da5f..22e86557aca4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1528,7 +1528,6 @@ static struct pernet_operations ip6gre_net_ops = {
 	.exit_batch = ip6gre_exit_batch_net,
 	.id   = &ip6gre_net_id,
 	.size = sizeof(struct ip6gre_net),
-	.async = true,
 };
 
 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 456fcf942f95..df4c29f7d59f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2260,7 +2260,6 @@ static struct pernet_operations ip6_tnl_net_ops = {
 	.exit_batch = ip6_tnl_exit_batch_net,
 	.id   = &ip6_tnl_net_id,
 	.size = sizeof(struct ip6_tnl_net),
-	.async = true,
 };
 
 /**
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index a482b854eeea..60b771f49fb5 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1148,7 +1148,6 @@ static struct pernet_operations vti6_net_ops = {
 	.exit_batch = vti6_exit_batch_net,
 	.id   = &vti6_net_id,
 	.size = sizeof(struct vti6_net),
-	.async = true,
 };
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 1c8fa29d155a..298fd8b6ed17 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1348,7 +1348,6 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 static struct pernet_operations ip6mr_net_ops = {
 	.init = ip6mr_net_init,
 	.exit = ip6mr_net_exit,
-	.async = true,
 };
 
 int __init ip6_mr_init(void)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1e4c2b6ebd78..793159d77d8a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2997,7 +2997,6 @@ static void __net_exit igmp6_net_exit(struct net *net)
 static struct pernet_operations igmp6_net_ops = {
 	.init = igmp6_net_init,
 	.exit = igmp6_net_exit,
-	.async = true,
 };
 
 int __init igmp6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d1d0b2fa7a07..9de4dfb126ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1883,7 +1883,6 @@ static void __net_exit ndisc_net_exit(struct net *net)
 static struct pernet_operations ndisc_net_ops = {
 	.init = ndisc_net_init,
 	.exit = ndisc_net_exit,
-	.async = true,
 };
 
 int __init ndisc_init(void)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4de8ac1e5af4..62358b93bbac 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1928,7 +1928,6 @@ static void __net_exit ip6_tables_net_exit(struct net *net)
 static struct pernet_operations ip6_tables_net_ops = {
 	.init = ip6_tables_net_init,
 	.exit = ip6_tables_net_exit,
-	.async = true,
 };
 
 static int __init ip6_tables_init(void)
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 06561c84c0bc..1343077dde93 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -87,7 +87,6 @@ static void __net_exit ip6table_filter_net_exit(struct net *net)
 static struct pernet_operations ip6table_filter_net_ops = {
 	.init = ip6table_filter_net_init,
 	.exit = ip6table_filter_net_exit,
-	.async = true,
 };
 
 static int __init ip6table_filter_init(void)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a11e25936b45..b0524b18c4fb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -107,7 +107,6 @@ static void __net_exit ip6table_mangle_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_mangle_net_ops = {
 	.exit = ip6table_mangle_net_exit,
-	.async = true,
 };
 
 static int __init ip6table_mangle_init(void)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 4475fd300bb6..47306e45a80a 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -131,7 +131,6 @@ static void __net_exit ip6table_nat_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_nat_net_ops = {
 	.exit	= ip6table_nat_net_exit,
-	.async	= true,
 };
 
 static int __init ip6table_nat_init(void)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index a88f3b1995b1..710fa0806c37 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -75,7 +75,6 @@ static void __net_exit ip6table_raw_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_raw_net_ops = {
 	.exit = ip6table_raw_net_exit,
-	.async = true,
 };
 
 static int __init ip6table_raw_init(void)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 320048c008dc..cf26ccb04056 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -74,7 +74,6 @@ static void __net_exit ip6table_security_net_exit(struct net *net)
 
 static struct pernet_operations ip6table_security_net_ops = {
 	.exit = ip6table_security_net_exit,
-	.async = true,
 };
 
 static int __init ip6table_security_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ba54bb3bd1e4..663827ee3cf8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,7 +401,6 @@ static struct pernet_operations ipv6_net_ops = {
 	.exit = ipv6_net_exit,
 	.id = &conntrack6_net_id,
 	.size = sizeof(struct conntrack6_net),
-	.async = true,
 };
 
 static int __init nf_conntrack_l3proto_ipv6_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 34136fe80ed5..b84ce3e6d728 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -646,7 +646,6 @@ static void nf_ct_net_exit(struct net *net)
 static struct pernet_operations nf_ct_net_ops = {
 	.init = nf_ct_net_init,
 	.exit = nf_ct_net_exit,
-	.async = true,
 };
 
 int nf_ct_frag6_init(void)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 32f98bc06900..c87b48359e8f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -103,7 +103,6 @@ static void __net_exit defrag6_net_exit(struct net *net)
 
 static struct pernet_operations defrag6_net_ops = {
 	.exit = defrag6_net_exit,
-	.async = true,
 };
 
 static int __init nf_defrag_init(void)
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 0220e584589c..b397a8fe88b9 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -390,7 +390,6 @@ static void __net_exit nf_log_ipv6_net_exit(struct net *net)
 static struct pernet_operations nf_log_ipv6_net_ops = {
 	.init = nf_log_ipv6_net_init,
 	.exit = nf_log_ipv6_net_exit,
-	.async = true,
 };
 
 static int __init nf_log_ipv6_init(void)
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 318c6e914234..d12c55dad7d1 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -240,7 +240,6 @@ static void __net_init ping_v6_proc_exit_net(struct net *net)
 static struct pernet_operations ping_v6_net_ops = {
 	.init = ping_v6_proc_init_net,
 	.exit = ping_v6_proc_exit_net,
-	.async = true,
 };
 #endif
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index f9891fa672f3..6e57028d2e91 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -343,7 +343,6 @@ static void __net_exit ipv6_proc_exit_net(struct net *net)
 static struct pernet_operations ipv6_proc_ops = {
 	.init = ipv6_proc_init_net,
 	.exit = ipv6_proc_exit_net,
-	.async = true,
 };
 
 int __init ipv6_misc_proc_init(void)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index b5e5de732494..5eb9b08947ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1332,7 +1332,6 @@ static void __net_exit raw6_exit_net(struct net *net)
 static struct pernet_operations raw6_net_ops = {
 	.init = raw6_init_net,
 	.exit = raw6_exit_net,
-	.async = true,
 };
 
 int __init raw6_proc_init(void)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index b5da69c83123..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -733,7 +733,6 @@ static void __net_exit ipv6_frags_exit_net(struct net *net)
 static struct pernet_operations ip6_frags_ops = {
 	.init = ipv6_frags_init_net,
 	.exit = ipv6_frags_exit_net,
-	.async = true,
 };
 
 int __init ipv6_frag_init(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1d0eaa69874d..ba8d5df50ebe 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5083,7 +5083,6 @@ static void __net_exit ip6_route_net_exit_late(struct net *net)
 static struct pernet_operations ip6_route_net_ops = {
 	.init = ip6_route_net_init,
 	.exit = ip6_route_net_exit,
-	.async = true,
 };
 
 static int __net_init ipv6_inetpeer_init(struct net *net)
@@ -5109,13 +5108,11 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net)
 static struct pernet_operations ipv6_inetpeer_ops = {
 	.init	=	ipv6_inetpeer_init,
 	.exit	=	ipv6_inetpeer_exit,
-	.async	=	true,
 };
 
 static struct pernet_operations ip6_route_net_late_ops = {
 	.init = ip6_route_net_init_late,
 	.exit = ip6_route_net_exit_late,
-	.async = true,
 };
 
 static struct notifier_block ip6_route_dev_notifier = {
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index c3f13c3bd8a9..7f5621d09571 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -395,7 +395,6 @@ static void __net_exit seg6_net_exit(struct net *net)
 static struct pernet_operations ip6_segments_ops = {
 	.init = seg6_net_init,
 	.exit = seg6_net_exit,
-	.async = true,
 };
 
 static const struct genl_ops seg6_genl_ops[] = {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8a4f8fddd812..1522bcfd253f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1888,7 +1888,6 @@ static struct pernet_operations sit_net_ops = {
 	.exit_batch = sit_exit_batch_net,
 	.id   = &sit_net_id,
 	.size = sizeof(struct sit_net),
-	.async = true,
 };
 
 static void __exit sit_cleanup(void)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 966c42af92f4..6fbdef630152 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -278,7 +278,6 @@ static void __net_exit ipv6_sysctl_net_exit(struct net *net)
 static struct pernet_operations ipv6_sysctl_net_ops = {
 	.init = ipv6_sysctl_net_init,
 	.exit = ipv6_sysctl_net_exit,
-	.async = true,
 };
 
 static struct ctl_table_header *ip6_header;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5425d7b100ee..883df0ad5bfe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2007,7 +2007,6 @@ static struct pernet_operations tcpv6_net_ops = {
 	.init	    = tcpv6_net_init,
 	.exit	    = tcpv6_net_exit,
 	.exit_batch = tcpv6_net_exit_batch,
-	.async	    = true,
 };
 
 int __init tcpv6_init(void)
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f3839780dc31..14ae32bb1f3d 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -123,7 +123,6 @@ static void __net_exit udplite6_proc_exit_net(struct net *net)
 static struct pernet_operations udplite6_net_ops = {
 	.init = udplite6_proc_init_net,
 	.exit = udplite6_proc_exit_net,
-	.async = true,
 };
 
 int __init udplite6_proc_init(void)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cbb270bd81b0..416fe67271a9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -400,7 +400,6 @@ static void __net_exit xfrm6_net_exit(struct net *net)
 static struct pernet_operations xfrm6_net_ops = {
 	.init	= xfrm6_net_init,
 	.exit	= xfrm6_net_exit,
-	.async	= true,
 };
 
 int __init xfrm6_init(void)
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index a9673619e0e9..f85f0d7480ac 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -353,7 +353,6 @@ static struct pernet_operations xfrm6_tunnel_net_ops = {
 	.exit	= xfrm6_tunnel_net_exit,
 	.id	= &xfrm6_tunnel_net_id,
 	.size	= sizeof(struct xfrm6_tunnel_net),
-	.async	= true,
 };
 
 static int __init xfrm6_tunnel_init(void)
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 4c2e9907f254..1fac92543094 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -433,7 +433,6 @@ static void kcm_proc_exit_net(struct net *net)
 static struct pernet_operations kcm_net_ops = {
 	.init = kcm_proc_init_net,
 	.exit = kcm_proc_exit_net,
-	.async = true,
 };
 
 int __init kcm_proc_init(void)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 516cfad71b85..dc76bc346829 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -2028,7 +2028,6 @@ static struct pernet_operations kcm_net_ops = {
 	.exit = kcm_exit_net,
 	.id   = &kcm_net_id,
 	.size = sizeof(struct kcm_net),
-	.async = true,
 };
 
 static int __init kcm_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3ac08ab26207..7e2e7188e7f4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3863,7 +3863,6 @@ static struct pernet_operations pfkey_net_ops = {
 	.exit = pfkey_net_exit,
 	.id   = &pfkey_net_id,
 	.size = sizeof(struct netns_pfkey),
-	.async = true,
 };
 
 static void __exit ipsec_pfkey_exit(void)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b86868da50d4..14b67dfacc4b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1789,7 +1789,6 @@ static struct pernet_operations l2tp_net_ops = {
 	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
-	.async = true,
 };
 
 static int __init l2tp_init(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f24504efe729..d6deca11da19 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1762,7 +1762,6 @@ static struct pernet_operations pppol2tp_net_ops = {
 	.init = pppol2tp_init_net,
 	.exit = pppol2tp_exit_net,
 	.id   = &pppol2tp_net_id,
-	.async = true,
 };
 
 /*****************************************************************************
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d4a89a8be013..7a4de6d618b1 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2488,7 +2488,6 @@ static void mpls_net_exit(struct net *net)
 static struct pernet_operations mpls_net_ops = {
 	.init = mpls_net_init,
 	.exit = mpls_net_exit,
-	.async = true,
 };
 
 static struct rtnl_af_ops mpls_af_ops __read_mostly = {
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index d72cc786c7b7..0f6b8172fb9a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -629,7 +629,6 @@ static void __net_exit netfilter_net_exit(struct net *net)
 static struct pernet_operations netfilter_net_ops = {
 	.init = netfilter_net_init,
 	.exit = netfilter_net_exit,
-	.async = true,
 };
 
 int __init netfilter_init(void)
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 2523ebe2b3cc..bc4bd247bb7d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2095,7 +2095,6 @@ static struct pernet_operations ip_set_net_ops = {
 	.exit   = ip_set_net_exit,
 	.id	= &ip_set_net_id,
 	.size	= sizeof(struct ip_set_net),
-	.async	= true,
 };
 
 static int __init
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 6a6cb9db030b..5f6f73cf2174 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2289,12 +2289,10 @@ static struct pernet_operations ipvs_core_ops = {
 	.exit = __ip_vs_cleanup,
 	.id   = &ip_vs_net_id,
 	.size = sizeof(struct netns_ipvs),
-	.async = true,
 };
 
 static struct pernet_operations ipvs_core_dev_ops = {
 	.exit = __ip_vs_dev_cleanup,
-	.async = true,
 };
 
 /*
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 8b25aab41928..58d5d05aec24 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -479,7 +479,6 @@ static void __ip_vs_ftp_exit(struct net *net)
 static struct pernet_operations ip_vs_ftp_ops = {
 	.init = __ip_vs_ftp_init,
 	.exit = __ip_vs_ftp_exit,
-	.async = true,
 };
 
 static int __init ip_vs_ftp_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 6a340c94c4b8..d625179de485 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -604,7 +604,6 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblc_ops = {
 	.init = __ip_vs_lblc_init,
 	.exit = __ip_vs_lblc_exit,
-	.async = true,
 };
 
 static int __init ip_vs_lblc_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0627881128da..84c57b62a588 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -789,7 +789,6 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
 static struct pernet_operations ip_vs_lblcr_ops = {
 	.init = __ip_vs_lblcr_init,
 	.exit = __ip_vs_lblcr_exit,
-	.async = true,
 };
 
 static int __init ip_vs_lblcr_init(void)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8884d302d33a..dd177ebee9aa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3417,7 +3417,6 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations ctnetlink_net_ops = {
 	.init		= ctnetlink_net_init,
 	.exit_batch	= ctnetlink_net_exit_batch,
-	.async		= true,
 };
 
 static int __init ctnetlink_init(void)
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9bcd72fe91f9..d049ea5a3770 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -406,7 +406,6 @@ static struct pernet_operations proto_gre_net_ops = {
 	.exit = proto_gre_net_exit,
 	.id   = &proto_gre_net_id,
 	.size = sizeof(struct netns_proto_gre),
-	.async = true,
 };
 
 static int __init nf_ct_proto_gre_init(void)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 98844c87d01e..037fec54c850 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -705,7 +705,6 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
 	.init		= nf_conntrack_pernet_init,
 	.exit_batch	= nf_conntrack_pernet_exit,
-	.async		= true,
 };
 
 static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a964e4d356cc..6d0357817cda 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -577,7 +577,6 @@ static void __net_exit nf_log_net_exit(struct net *net)
 static struct pernet_operations nf_log_net_ops = {
 	.init = nf_log_net_init,
 	.exit = nf_log_net_exit,
-	.async = true,
 };
 
 int __init netfilter_log_init(void)
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
index 254c2c6bde48..350eb147754d 100644
--- a/net/netfilter/nf_log_netdev.c
+++ b/net/netfilter/nf_log_netdev.c
@@ -47,7 +47,6 @@ static void __net_exit nf_log_netdev_net_exit(struct net *net)
 static struct pernet_operations nf_log_netdev_net_ops = {
 	.init = nf_log_netdev_net_init,
 	.exit = nf_log_netdev_net_exit,
-	.async = true,
 };
 
 static int __init nf_log_netdev_init(void)
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 8f16fd27132d..6039b350abbe 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -398,7 +398,6 @@ static struct pernet_operations synproxy_net_ops = {
 	.exit		= synproxy_net_exit,
 	.id		= &synproxy_net_id,
 	.size		= sizeof(struct synproxy_net),
-	.async		= true,
 };
 
 static int __init synproxy_core_init(void)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fd13d28e4ca7..c4acc7340eb1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6597,7 +6597,6 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 static struct pernet_operations nf_tables_net_ops = {
 	.init	= nf_tables_init_net,
 	.exit	= nf_tables_exit_net,
-	.async	= true,
 };
 
 static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 84fc4954862d..03ead8a9e90c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -566,7 +566,6 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 static struct pernet_operations nfnetlink_net_ops = {
 	.init		= nfnetlink_net_init,
 	.exit_batch	= nfnetlink_net_exit_batch,
-	.async		= true,
 };
 
 static int __init nfnetlink_init(void)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 8d9f18bb8840..88d427f9f9e6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -515,7 +515,6 @@ static void __net_exit nfnl_acct_net_exit(struct net *net)
 static struct pernet_operations nfnl_acct_ops = {
         .init   = nfnl_acct_net_init,
         .exit   = nfnl_acct_net_exit,
-	.async	= true,
 };
 
 static int __init nfnl_acct_init(void)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 6819300f7fb7..95b04702a655 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -586,7 +586,6 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 static struct pernet_operations cttimeout_ops = {
 	.init	= cttimeout_net_init,
 	.exit	= cttimeout_net_exit,
-	.async	= true,
 };
 
 static int __init cttimeout_init(void)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b21ef79849a1..7b46aa4c478d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1108,7 +1108,6 @@ static struct pernet_operations nfnl_log_net_ops = {
 	.exit	= nfnl_log_net_exit,
 	.id	= &nfnl_log_net_id,
 	.size	= sizeof(struct nfnl_log_net),
-	.async	= true,
 };
 
 static int __init nfnetlink_log_init(void)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 9f572ed56208..0b839c38800f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1525,7 +1525,6 @@ static struct pernet_operations nfnl_queue_net_ops = {
 	.exit_batch	= nfnl_queue_net_exit_batch,
 	.id		= &nfnl_queue_net_id,
 	.size		= sizeof(struct nfnl_queue_net),
-	.async		= true,
 };
 
 static int __init nfnetlink_queue_init(void)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 6de1f6a4cb80..4aa01c90e9d1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1789,7 +1789,6 @@ static void __net_exit xt_net_exit(struct net *net)
 static struct pernet_operations xt_net_ops = {
 	.init = xt_net_init,
 	.exit = xt_net_exit,
-	.async = true,
 };
 
 static int __init xt_init(void)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index ef65b7a9173e..3360f13dc208 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1349,7 +1349,6 @@ static struct pernet_operations hashlimit_net_ops = {
 	.exit	= hashlimit_net_exit,
 	.id	= &hashlimit_net_id,
 	.size	= sizeof(struct hashlimit_net),
-	.async	= true,
 };
 
 static int __init hashlimit_mt_init(void)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 434e35ce940b..9bbfc17ce3ec 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -687,7 +687,6 @@ static struct pernet_operations recent_net_ops = {
 	.exit	= recent_net_exit,
 	.id	= &recent_net_id,
 	.size	= sizeof(struct recent_net),
-	.async	= true,
 };
 
 static struct xt_match recent_mt_reg[] __read_mostly = {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5d10dcfe6411..f1b02d87e336 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -253,7 +253,6 @@ static struct pernet_operations netlink_tap_net_ops = {
 	.exit = netlink_tap_exit_net,
 	.id   = &netlink_tap_net_id,
 	.size = sizeof(struct netlink_tap_net),
-	.async = true,
 };
 
 static bool netlink_filter_tap(const struct sk_buff *skb)
@@ -2726,7 +2725,6 @@ static void __init netlink_add_usersock_entry(void)
 static struct pernet_operations __net_initdata netlink_net_ops = {
 	.init = netlink_net_init,
 	.exit = netlink_net_exit,
-	.async = true,
 };
 
 static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index af51b8c0a2cb..b9ce82c9440f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1035,7 +1035,6 @@ static void __net_exit genl_pernet_exit(struct net *net)
 static struct pernet_operations genl_pernet_ops = {
 	.init = genl_pernet_init,
 	.exit = genl_pernet_exit,
-	.async = true,
 };
 
 static int __init genl_init(void)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 100191df0371..ef38e5aecd28 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2384,7 +2384,6 @@ static struct pernet_operations ovs_net_ops = {
 	.exit = ovs_exit_net,
 	.id   = &ovs_net_id,
 	.size = sizeof(struct ovs_net),
-	.async = true,
 };
 
 static int __init dp_init(void)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2c5a6fe5d749..616cb9c18f88 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4557,7 +4557,6 @@ static void __net_exit packet_net_exit(struct net *net)
 static struct pernet_operations packet_net_ops = {
 	.init = packet_net_init,
 	.exit = packet_net_exit,
-	.async = true,
 };
 
 
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 9454e8393793..77787512fc32 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -342,7 +342,6 @@ static struct pernet_operations phonet_net_ops = {
 	.exit = phonet_exit_net,
 	.id   = &phonet_net_id,
 	.size = sizeof(struct phonet_net),
-	.async = true,
 };
 
 /* Initialize Phonet devices list */
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 4f3a32c38bf5..351a28474667 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -530,7 +530,6 @@ static struct pernet_operations rds_tcp_net_ops = {
 	.exit = rds_tcp_exit_net,
 	.id = &rds_tcp_netid,
 	.size = sizeof(struct rds_tcp_net),
-	.async = true,
 };
 
 void *rds_tcp_listen_sock_def_readable(struct net *net)
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 5fd939dabf41..f18c9248e0d4 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -106,5 +106,4 @@ struct pernet_operations rxrpc_net_ops = {
 	.exit	= rxrpc_exit_net,
 	.id	= &rxrpc_net_id,
 	.size	= sizeof(struct rxrpc_net),
-	.async	= true,
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 7bd1b964f021..0d78b58e1898 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1533,7 +1533,6 @@ static struct pernet_operations tcf_action_net_ops = {
 	.exit = tcf_action_net_exit,
 	.id = &tcf_action_net_id,
 	.size = sizeof(struct tcf_action_net),
-	.async = true,
 };
 
 static int __init tc_action_init(void)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5cb9b268e8ff..9092531d45d8 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -413,7 +413,6 @@ static struct pernet_operations bpf_net_ops = {
 	.exit_batch = bpf_exit_net,
 	.id   = &bpf_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init bpf_init_module(void)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 371e5e4ab3e2..e4b880fa51fe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -222,7 +222,6 @@ static struct pernet_operations connmark_net_ops = {
 	.exit_batch = connmark_exit_net,
 	.id   = &connmark_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init connmark_init_module(void)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index a527e287c086..7e28b2ce1437 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -678,7 +678,6 @@ static struct pernet_operations csum_net_ops = {
 	.exit_batch = csum_exit_net,
 	.id   = &csum_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_DESCRIPTION("Checksum updating actions");
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 88fbb8403565..4dc4f153cad8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -261,7 +261,6 @@ static struct pernet_operations gact_net_ops = {
 	.exit_batch = gact_exit_net,
 	.id   = &gact_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 555b1caeff72..a5994cf0512b 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -870,7 +870,6 @@ static struct pernet_operations ife_net_ops = {
 	.exit_batch = ife_exit_net,
 	.id   = &ife_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init ife_init_module(void)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index b5e8565b89c7..14c312d7908f 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -352,7 +352,6 @@ static struct pernet_operations ipt_net_ops = {
 	.exit_batch = ipt_exit_net,
 	.id   = &ipt_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
@@ -403,7 +402,6 @@ static struct pernet_operations xt_net_ops = {
 	.exit_batch = xt_exit_net,
 	.id   = &xt_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 64c86579c3d9..fd34015331ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -353,7 +353,6 @@ static struct pernet_operations mirred_net_ops = {
 	.exit_batch = mirred_exit_net,
 	.id   = &mirred_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002)");
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b1bc757f6491..4b5848b6c252 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -323,7 +323,6 @@ static struct pernet_operations nat_net_ops = {
 	.exit_batch = nat_exit_net,
 	.id   = &nat_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_DESCRIPTION("Stateless NAT actions");
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index f392ccaaa0d8..8a925c72db5f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -465,7 +465,6 @@ static struct pernet_operations pedit_net_ops = {
 	.exit_batch = pedit_exit_net,
 	.id   = &pedit_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 7081ec75e696..4e72bc2a0dfb 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -347,7 +347,6 @@ static struct pernet_operations police_net_ops = {
 	.exit_batch = police_exit_net,
 	.id   = &police_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init police_init_module(void)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3a89f98f17e6..5db358497c9e 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -249,7 +249,6 @@ static struct pernet_operations sample_net_ops = {
 	.exit_batch = sample_exit_net,
 	.id   = &sample_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init sample_init_module(void)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e84768ae610a..9618b4a83cee 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -216,7 +216,6 @@ static struct pernet_operations simp_net_ops = {
 	.exit_batch = simp_exit_net,
 	.id   = &simp_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim(2005)");
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 7971510fe61b..ddf69fc01bdf 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -253,7 +253,6 @@ static struct pernet_operations skbedit_net_ops = {
 	.exit_batch = skbedit_exit_net,
 	.id   = &skbedit_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 142a996ac776..bbcbdce732cc 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -279,7 +279,6 @@ static struct pernet_operations skbmod_net_ops = {
 	.exit_batch = skbmod_exit_net,
 	.id   = &skbmod_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index a1c8dd406a04..626dac81a48a 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -339,7 +339,6 @@ static struct pernet_operations tunnel_key_net_ops = {
 	.exit_batch = tunnel_key_exit_net,
 	.id   = &tunnel_key_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init tunnel_key_init_module(void)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 41a66effeb5f..853604685965 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -314,7 +314,6 @@ static struct pernet_operations vlan_net_ops = {
 	.exit_batch = vlan_exit_net,
 	.id   = &vlan_net_id,
 	.size = sizeof(struct tc_action_net),
-	.async = true,
 };
 
 static int __init vlan_init_module(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ec5fe8ec0c3e..b66754f52a9f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1619,7 +1619,6 @@ static struct pernet_operations tcf_net_ops = {
 	.exit = tcf_net_exit,
 	.id   = &tcf_net_id,
 	.size = sizeof(struct tcf_net),
-	.async = true,
 };
 
 static int __init tc_filter_init(void)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 68f9d942bed4..106dae7e4818 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2133,7 +2133,6 @@ static void __net_exit psched_net_exit(struct net *net)
 static struct pernet_operations psched_net_ops = {
 	.init = psched_net_init,
 	.exit = psched_net_exit,
-	.async = true,
 };
 
 static int __init pktsched_init(void)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 493b817f6a2a..84a09f599131 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1283,7 +1283,6 @@ static void __net_exit sctp_defaults_exit(struct net *net)
 static struct pernet_operations sctp_defaults_ops = {
 	.init = sctp_defaults_init,
 	.exit = sctp_defaults_exit,
-	.async = true,
 };
 
 static int __net_init sctp_ctrlsock_init(struct net *net)
@@ -1307,7 +1306,6 @@ static void __net_init sctp_ctrlsock_exit(struct net *net)
 static struct pernet_operations sctp_ctrlsock_ops = {
 	.init = sctp_ctrlsock_init,
 	.exit = sctp_ctrlsock_exit,
-	.async = true,
 };
 
 /* Initialize the universe into something sensible.  */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 44f939cb6bc8..9463af4b32e8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2063,7 +2063,6 @@ static __net_exit void rpcsec_gss_exit_net(struct net *net)
 static struct pernet_operations rpcsec_gss_net_ops = {
 	.init = rpcsec_gss_init_net,
 	.exit = rpcsec_gss_exit_net,
-	.async = true,
 };
 
 /*
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 68287e921847..56f9eff74150 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -79,7 +79,6 @@ static struct pernet_operations sunrpc_net_ops = {
 	.exit = sunrpc_exit_net,
 	.id = &sunrpc_net_id,
 	.size = sizeof(struct sunrpc_net),
-	.async = true,
 };
 
 static int __init
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index f424539829b7..9aed6fe1bf1a 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -89,7 +89,6 @@ static void __net_exit sysctl_net_exit(struct net *net)
 static struct pernet_operations sysctl_pernet_ops = {
 	.init = sysctl_net_init,
 	.exit = sysctl_net_exit,
-	.async = true,
 };
 
 static struct ctl_table_header *net_header;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 52dfc51ac4d5..5b38f5164281 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -109,7 +109,6 @@ static struct pernet_operations tipc_net_ops = {
 	.exit = tipc_exit_net,
 	.id   = &tipc_net_id,
 	.size = sizeof(struct tipc_net),
-	.async = true,
 };
 
 static int __init tipc_init(void)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index bc2970a8e7f3..aded82da1aea 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2913,7 +2913,6 @@ static void __net_exit unix_net_exit(struct net *net)
 static struct pernet_operations unix_net_ops = {
 	.init = unix_net_init,
 	.exit = unix_net_exit,
-	.async = true,
 };
 
 static int __init af_unix_init(void)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 670aa229168a..a6f3cac8c640 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1340,7 +1340,6 @@ static void __net_exit cfg80211_pernet_exit(struct net *net)
 
 static struct pernet_operations cfg80211_pernet_ops = {
 	.exit = cfg80211_pernet_exit,
-	.async = true,
 };
 
 static int __init cfg80211_init(void)
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index bc7064486b15..9efbfc753347 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -390,7 +390,6 @@ static void __net_exit wext_pernet_exit(struct net *net)
 static struct pernet_operations wext_pernet_ops = {
 	.init = wext_pernet_init,
 	.exit = wext_pernet_exit,
-	.async = true,
 };
 
 static int __init wireless_nlevent_init(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cb3bb9ae4407..625b3fca5704 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2985,7 +2985,6 @@ static void __net_exit xfrm_net_exit(struct net *net)
 static struct pernet_operations __net_initdata xfrm_net_ops = {
 	.init = xfrm_net_init,
 	.exit = xfrm_net_exit,
-	.async = true,
 };
 
 void __init xfrm_init(void)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e92b8c019c88..080035f056d9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3253,7 +3253,6 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
 static struct pernet_operations xfrm_user_net_ops = {
 	.init	    = xfrm_user_net_init,
 	.exit_batch = xfrm_user_net_exit,
-	.async	    = true,
 };
 
 static int __init xfrm_user_init(void)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b4d7b6242a40..8644d864e3c1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6743,7 +6743,6 @@ static void __net_exit selinux_nf_unregister(struct net *net)
 static struct pernet_operations selinux_net_ops = {
 	.init = selinux_nf_register,
 	.exit = selinux_nf_unregister,
-	.async = true,
 };
 
 static int __init selinux_nf_ip_init(void)
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index 3f29c03162ca..e36d17835d4f 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -89,7 +89,6 @@ static void __net_exit smack_nf_unregister(struct net *net)
 static struct pernet_operations smack_net_ops = {
 	.init = smack_nf_register,
 	.exit = smack_nf_unregister,
-	.async = true,
 };
 
 static int __init smack_nf_ip_init(void)

From 4420bf21fb6c0306e36ad58ade1e741fba57ce65 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 27 Mar 2018 18:02:23 +0300
Subject: [PATCH 1341/1678] net: Rename net_sem to pernet_ops_rwsem

net_sem is some undefined area name, so it will be better
to make the area more defined.

Rename it to pernet_ops_rwsem for better readability and
better intelligibility.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h   |  2 +-
 include/net/net_namespace.h | 12 ++++++-----
 net/core/net_namespace.c    | 40 ++++++++++++++++++-------------------
 net/core/rtnetlink.c        |  4 ++--
 4 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 562a175c35a9..c7d1e4689325 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -36,7 +36,7 @@ extern int rtnl_is_locked(void);
 extern int rtnl_lock_killable(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
-extern struct rw_semaphore net_sem;
+extern struct rw_semaphore pernet_ops_rwsem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 37bcf8382b61..922e8b6fb422 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,9 +60,10 @@ struct net {
 
 	struct list_head	list;		/* list of network namespaces */
 	struct list_head	exit_list;	/* To linked to call pernet exit
-						 * methods on dead net (net_sem
-						 * read locked), or to unregister
-						 * pernet ops (net_sem wr locked).
+						 * methods on dead net (
+						 * pernet_ops_rwsem read locked),
+						 * or to unregister pernet ops
+						 * (pernet_ops_rwsem write locked).
 						 */
 	struct llist_node	cleanup_list;	/* namespaces on death row */
 
@@ -95,8 +96,9 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
-	struct list_head	fib_notifier_ops;  /* protected by net_sem */
-
+	struct list_head	fib_notifier_ops;  /* Populated by
+						    * register_pernet_subsys()
+						    */
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
 	struct netns_mib	mib;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index eef17ad29dea..9e8ee4640451 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -41,10 +41,10 @@ EXPORT_SYMBOL(init_net);
 
 static bool init_net_initialized;
 /*
- * net_sem: protects: pernet_list, net_generic_ids,
+ * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
  * init_net_initialized and first_device pointer.
  */
-DECLARE_RWSEM(net_sem);
+DECLARE_RWSEM(pernet_ops_rwsem);
 
 #define MIN_PERNET_OPS_ID	\
 	((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
@@ -72,7 +72,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
 	BUG_ON(id < MIN_PERNET_OPS_ID);
 
 	old_ng = rcu_dereference_protected(net->gen,
-					   lockdep_is_held(&net_sem));
+					   lockdep_is_held(&pernet_ops_rwsem));
 	if (old_ng->s.len > id) {
 		old_ng->ptr[id] = data;
 		return 0;
@@ -289,7 +289,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
  */
 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 {
-	/* Must be called with net_sem held */
+	/* Must be called with pernet_ops_rwsem held */
 	const struct pernet_operations *ops, *saved_ops;
 	int error = 0;
 	LIST_HEAD(net_exit_list);
@@ -422,13 +422,13 @@ struct net *copy_net_ns(unsigned long flags,
 	net->ucounts = ucounts;
 	get_user_ns(user_ns);
 
-	rv = down_read_killable(&net_sem);
+	rv = down_read_killable(&pernet_ops_rwsem);
 	if (rv < 0)
 		goto put_userns;
 
 	rv = setup_net(net, user_ns);
 
-	up_read(&net_sem);
+	up_read(&pernet_ops_rwsem);
 
 	if (rv < 0) {
 put_userns:
@@ -480,7 +480,7 @@ static void cleanup_net(struct work_struct *work)
 	/* Atomically snapshot the list of namespaces to cleanup */
 	net_kill_list = llist_del_all(&cleanup_list);
 
-	down_read(&net_sem);
+	down_read(&pernet_ops_rwsem);
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
@@ -519,7 +519,7 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry_reverse(ops, &pernet_list, list)
 		ops_free_list(ops, &net_exit_list);
 
-	up_read(&net_sem);
+	up_read(&pernet_ops_rwsem);
 
 	/* Ensure there are no outstanding rcu callbacks using this
 	 * network namespace.
@@ -546,8 +546,8 @@ static void cleanup_net(struct work_struct *work)
  */
 void net_ns_barrier(void)
 {
-	down_write(&net_sem);
-	up_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL(net_ns_barrier);
 
@@ -869,12 +869,12 @@ static int __init net_ns_init(void)
 
 	rcu_assign_pointer(init_net.gen, ng);
 
-	down_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
 	init_net_initialized = true;
-	up_write(&net_sem);
+	up_write(&pernet_ops_rwsem);
 
 	register_pernet_subsys(&net_ns_ops);
 
@@ -1013,9 +1013,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 int register_pernet_subsys(struct pernet_operations *ops)
 {
 	int error;
-	down_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
 	error =  register_pernet_operations(first_device, ops);
-	up_write(&net_sem);
+	up_write(&pernet_ops_rwsem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_subsys);
@@ -1031,9 +1031,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  */
 void unregister_pernet_subsys(struct pernet_operations *ops)
 {
-	down_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
 	unregister_pernet_operations(ops);
-	up_write(&net_sem);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 
@@ -1059,11 +1059,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 int register_pernet_device(struct pernet_operations *ops)
 {
 	int error;
-	down_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
 	error = register_pernet_operations(&pernet_list, ops);
 	if (!error && (first_device == &pernet_list))
 		first_device = &ops->list;
-	up_write(&net_sem);
+	up_write(&pernet_ops_rwsem);
 	return error;
 }
 EXPORT_SYMBOL_GPL(register_pernet_device);
@@ -1079,11 +1079,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
  */
 void unregister_pernet_device(struct pernet_operations *ops)
 {
-	down_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
 	if (&ops->list == first_device)
 		first_device = first_device->next;
 	unregister_pernet_operations(ops);
-	up_write(&net_sem);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 31438b63d4b4..73011a60434c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -460,11 +460,11 @@ static void rtnl_lock_unregistering_all(void)
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
 	/* Close the race with cleanup_net() */
-	down_write(&net_sem);
+	down_write(&pernet_ops_rwsem);
 	rtnl_lock_unregistering_all();
 	__rtnl_link_unregister(ops);
 	rtnl_unlock();
-	up_write(&net_sem);
+	up_write(&pernet_ops_rwsem);
 }
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 

From 8518e9bb98b602eca0717d5aaad63ccbe56539d2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 27 Mar 2018 18:02:32 +0300
Subject: [PATCH 1342/1678] net: Add more comments

This adds comments to different places to improve
readability.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 4 ++++
 net/core/net_namespace.c    | 2 ++
 net/core/rtnetlink.c        | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 922e8b6fb422..1ab4f920f109 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -323,6 +323,10 @@ struct pernet_operations {
 	 * have to keep in mind all other pernet_operations and
 	 * to introduce a locking, if they share common resources.
 	 *
+	 * The only time they are called with exclusive lock is
+	 * from register_pernet_subsys(), unregister_pernet_subsys()
+	 * register_pernet_device() and unregister_pernet_device().
+	 *
 	 * Exit methods using blocking RCU primitives, such as
 	 * synchronize_rcu(), should be implemented via exit_batch.
 	 * Then, destruction of a group of net requires single
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9e8ee4640451..b5796d17a302 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -43,6 +43,8 @@ static bool init_net_initialized;
 /*
  * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
  * init_net_initialized and first_device pointer.
+ * This is internal net namespace object. Please, don't use it
+ * outside.
  */
 DECLARE_RWSEM(pernet_ops_rwsem);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 73011a60434c..2d3949789cef 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -459,7 +459,7 @@ static void rtnl_lock_unregistering_all(void)
  */
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
-	/* Close the race with cleanup_net() */
+	/* Close the race with setup_net() and cleanup_net() */
 	down_write(&pernet_ops_rwsem);
 	rtnl_lock_unregistering_all();
 	__rtnl_link_unregister(ops);

From 3af0f34290f6192756ee1d9c2d5fe27222267035 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 27 Mar 2018 17:41:59 +0100
Subject: [PATCH 1343/1678] sfc: replace asynchronous filter operations

Instead of having an efx->type->filter_rfs_insert() method, just use
 workitems with a worker function that calls efx->type->filter_insert().
The only user of this is efx_filter_rfs(), which now queues a call to
 efx_filter_rfs_work().
Similarly, efx_filter_rfs_expire() is now a worker function called on a
 new channel->filter_work work_struct, so the method
 efx->type->filter_rfs_expire_one() is no longer called in atomic context.
 We also add a new mutex efx->rps_mutex to protect the RPS state (efx->
 rps_expire_channel, efx->rps_expire_index, and channel->rps_flow_id) so
 that the taking of efx->filter_lock can be moved to
 efx->type->filter_rfs_expire_one().
Thus, all filter table functions are now called in a sleepable context,
 allowing them to use sleeping locks in a future patch.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 161 +++-----------------------
 drivers/net/ethernet/sfc/efx.c        |  15 ++-
 drivers/net/ethernet/sfc/efx.h        |   7 +-
 drivers/net/ethernet/sfc/farch.c      |  17 ++-
 drivers/net/ethernet/sfc/net_driver.h |  10 +-
 drivers/net/ethernet/sfc/nic.h        |   2 -
 drivers/net/ethernet/sfc/rx.c         | 119 ++++++++++++-------
 drivers/net/ethernet/sfc/siena.c      |   1 -
 8 files changed, 124 insertions(+), 208 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index e100273b623d..bcbaba330fb5 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4758,143 +4758,6 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 
 #ifdef CONFIG_RFS_ACCEL
 
-static efx_mcdi_async_completer efx_ef10_filter_rfs_insert_complete;
-
-static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
-				      struct efx_filter_spec *spec)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
-	struct efx_filter_spec *saved_spec;
-	unsigned int hash, i, depth = 1;
-	bool replacing = false;
-	int ins_index = -1;
-	u64 cookie;
-	s32 rc;
-
-	/* Must be an RX filter without RSS and not for a multicast
-	 * destination address (RFS only works for connected sockets).
-	 * These restrictions allow us to pass only a tiny amount of
-	 * data through to the completion function.
-	 */
-	EFX_WARN_ON_PARANOID(spec->flags !=
-			     (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_SCATTER));
-	EFX_WARN_ON_PARANOID(spec->priority != EFX_FILTER_PRI_HINT);
-	EFX_WARN_ON_PARANOID(efx_filter_is_mc_recipient(spec));
-
-	hash = efx_ef10_filter_hash(spec);
-
-	spin_lock_bh(&efx->filter_lock);
-
-	/* Find any existing filter with the same match tuple or else
-	 * a free slot to insert at.  If an existing filter is busy,
-	 * we have to give up.
-	 */
-	for (;;) {
-		i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-		saved_spec = efx_ef10_filter_entry_spec(table, i);
-
-		if (!saved_spec) {
-			if (ins_index < 0)
-				ins_index = i;
-		} else if (efx_ef10_filter_equal(spec, saved_spec)) {
-			if (table->entry[i].spec & EFX_EF10_FILTER_FLAG_BUSY) {
-				rc = -EBUSY;
-				goto fail_unlock;
-			}
-			if (spec->priority < saved_spec->priority) {
-				rc = -EPERM;
-				goto fail_unlock;
-			}
-			ins_index = i;
-			break;
-		}
-
-		/* Once we reach the maximum search depth, use the
-		 * first suitable slot or return -EBUSY if there was
-		 * none
-		 */
-		if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
-			if (ins_index < 0) {
-				rc = -EBUSY;
-				goto fail_unlock;
-			}
-			break;
-		}
-
-		++depth;
-	}
-
-	/* Create a software table entry if necessary, and mark it
-	 * busy.  We might yet fail to insert, but any attempt to
-	 * insert a conflicting filter while we're waiting for the
-	 * firmware must find the busy entry.
-	 */
-	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
-	if (saved_spec) {
-		replacing = true;
-	} else {
-		saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
-		if (!saved_spec) {
-			rc = -ENOMEM;
-			goto fail_unlock;
-		}
-		*saved_spec = *spec;
-	}
-	efx_ef10_filter_set_entry(table, ins_index, saved_spec,
-				  EFX_EF10_FILTER_FLAG_BUSY);
-
-	spin_unlock_bh(&efx->filter_lock);
-
-	/* Pack up the variables needed on completion */
-	cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id;
-
-	efx_ef10_filter_push_prep(efx, spec, inbuf,
-				  table->entry[ins_index].handle, NULL,
-				  replacing);
-	efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
-			   MC_CMD_FILTER_OP_OUT_LEN,
-			   efx_ef10_filter_rfs_insert_complete, cookie);
-
-	return ins_index;
-
-fail_unlock:
-	spin_unlock_bh(&efx->filter_lock);
-	return rc;
-}
-
-static void
-efx_ef10_filter_rfs_insert_complete(struct efx_nic *efx, unsigned long cookie,
-				    int rc, efx_dword_t *outbuf,
-				    size_t outlen_actual)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	unsigned int ins_index, dmaq_id;
-	struct efx_filter_spec *spec;
-	bool replacing;
-
-	/* Unpack the cookie */
-	replacing = cookie >> 31;
-	ins_index = (cookie >> 16) & (HUNT_FILTER_TBL_ROWS - 1);
-	dmaq_id = cookie & 0xffff;
-
-	spin_lock_bh(&efx->filter_lock);
-	spec = efx_ef10_filter_entry_spec(table, ins_index);
-	if (rc == 0) {
-		table->entry[ins_index].handle =
-			MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
-		if (replacing)
-			spec->dmaq_id = dmaq_id;
-	} else if (!replacing) {
-		kfree(spec);
-		spec = NULL;
-	}
-	efx_ef10_filter_set_entry(table, ins_index, spec, 0);
-	spin_unlock_bh(&efx->filter_lock);
-
-	wake_up_all(&table->waitq);
-}
-
 static void
 efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
 				    unsigned long filter_idx,
@@ -4905,18 +4768,22 @@ static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 					   unsigned int filter_idx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_filter_spec *spec =
-		efx_ef10_filter_entry_spec(table, filter_idx);
+	struct efx_filter_spec *spec;
 	MCDI_DECLARE_BUF(inbuf,
 			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
 			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
+	bool ret = true;
 
+	spin_lock_bh(&efx->filter_lock);
+	spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (!spec ||
 	    (table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) ||
 	    spec->priority != EFX_FILTER_PRI_HINT ||
 	    !rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
-				 flow_id, filter_idx))
-		return false;
+				 flow_id, filter_idx)) {
+		ret = false;
+		goto out_unlock;
+	}
 
 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
 		       MC_CMD_FILTER_OP_IN_OP_REMOVE);
@@ -4924,10 +4791,12 @@ static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 		       table->entry[filter_idx].handle);
 	if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0,
 			       efx_ef10_filter_rfs_expire_complete, filter_idx))
-		return false;
-
-	table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
-	return true;
+		ret = false;
+	else
+		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
+out_unlock:
+	spin_unlock_bh(&efx->filter_lock);
+	return ret;
 }
 
 static void
@@ -6784,7 +6653,6 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_ef10_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
@@ -6897,7 +6765,6 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_ef10_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 7321a4cf6f4d..0be93abdb2ad 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -340,7 +340,10 @@ static int efx_poll(struct napi_struct *napi, int budget)
 			efx_update_irq_mod(efx, channel);
 		}
 
-		efx_filter_rfs_expire(channel);
+#ifdef CONFIG_RFS_ACCEL
+		/* Perhaps expire some ARFS filters */
+		schedule_work(&channel->filter_work);
+#endif
 
 		/* There is no race here; although napi_disable() will
 		 * only wait for napi_complete(), this isn't a problem
@@ -470,6 +473,10 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 		tx_queue->channel = channel;
 	}
 
+#ifdef CONFIG_RFS_ACCEL
+	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
 	rx_queue = &channel->rx_queue;
 	rx_queue->efx = efx;
 	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
@@ -512,6 +519,9 @@ efx_copy_channel(const struct efx_channel *old_channel)
 	rx_queue->buffer = NULL;
 	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+#ifdef CONFIG_RFS_ACCEL
+	INIT_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
 
 	return channel;
 }
@@ -3012,6 +3022,9 @@ static int efx_init_struct(struct efx_nic *efx,
 	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
 	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
 	mutex_init(&efx->mac_lock);
+#ifdef CONFIG_RFS_ACCEL
+	mutex_init(&efx->rps_mutex);
+#endif
 	efx->phy_op = &efx_dummy_phy_operations;
 	efx->mdio.dev = net_dev;
 	INIT_WORK(&efx->mac_work, efx_mac_work);
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 3429ae3f3b08..545c2ea1622e 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -170,15 +170,18 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 		   u16 rxq_index, u32 flow_id);
 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota);
-static inline void efx_filter_rfs_expire(struct efx_channel *channel)
+static inline void efx_filter_rfs_expire(struct work_struct *data)
 {
+	struct efx_channel *channel = container_of(data, struct efx_channel,
+						   filter_work);
+
 	if (channel->rfs_filters_added >= 60 &&
 	    __efx_filter_rfs_expire(channel->efx, 100))
 		channel->rfs_filters_added -= 60;
 }
 #define efx_filter_rfs_enabled() 1
 #else
-static inline void efx_filter_rfs_expire(struct efx_channel *channel) {}
+static inline void efx_filter_rfs_expire(struct work_struct *data) {}
 #define efx_filter_rfs_enabled() 0
 #endif
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index ad001e77d554..6b5ca569db5e 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -2901,28 +2901,25 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
 
 #ifdef CONFIG_RFS_ACCEL
 
-s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
-				struct efx_filter_spec *gen_spec)
-{
-	return efx_farch_filter_insert(efx, gen_spec, true);
-}
-
 bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 				     unsigned int index)
 {
 	struct efx_farch_filter_state *state = efx->filter_state;
-	struct efx_farch_filter_table *table =
-		&state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+	struct efx_farch_filter_table *table;
+	bool ret = false;
 
+	spin_lock_bh(&efx->filter_lock);
+	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
 	if (test_bit(index, table->used_bitmap) &&
 	    table->spec[index].priority == EFX_FILTER_PRI_HINT &&
 	    rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
 				flow_id, index)) {
 		efx_farch_filter_table_clear_entry(efx, table, index);
-		return true;
+		ret = true;
 	}
 
-	return false;
+	spin_unlock_bh(&efx->filter_lock);
+	return ret;
 }
 
 #endif /* CONFIG_RFS_ACCEL */
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 2453f3849e72..dad78e3dde70 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -430,6 +430,7 @@ enum efx_sync_events_state {
  * @event_test_cpu: Last CPU to handle interrupt or test event for this channel
  * @irq_count: Number of IRQs since last adaptive moderation decision
  * @irq_mod_score: IRQ moderation score
+ * @filter_work: Work item for efx_filter_rfs_expire()
  * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
  *      indexed by filter ID
  * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
@@ -475,6 +476,7 @@ struct efx_channel {
 	unsigned int irq_mod_score;
 #ifdef CONFIG_RFS_ACCEL
 	unsigned int rfs_filters_added;
+	struct work_struct filter_work;
 #define RPS_FLOW_ID_INVALID 0xFFFFFFFF
 	u32 *rps_flow_id;
 #endif
@@ -844,6 +846,7 @@ struct efx_rss_context {
  * @filter_sem: Filter table rw_semaphore, for freeing the table
  * @filter_lock: Filter table lock, for mere content changes
  * @filter_state: Architecture-dependent filter table state
+ * @rps_mutex: Protects RPS state of all channels
  * @rps_expire_channel: Next channel to check for expiry
  * @rps_expire_index: Next index to check for expiry in
  *	@rps_expire_channel's @rps_flow_id
@@ -998,6 +1001,7 @@ struct efx_nic {
 	spinlock_t filter_lock;
 	void *filter_state;
 #ifdef CONFIG_RFS_ACCEL
+	struct mutex rps_mutex;
 	unsigned int rps_expire_channel;
 	unsigned int rps_expire_index;
 #endif
@@ -1152,10 +1156,6 @@ struct efx_udp_tunnel {
  * @filter_count_rx_used: Get the number of filters in use at a given priority
  * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
  * @filter_get_rx_ids: Get list of RX filters at a given priority
- * @filter_rfs_insert: Add or replace a filter for RFS.  This must be
- *	atomic.  The hardware change may be asynchronous but should
- *	not be delayed for long.  It may fail if this can't be done
- *	atomically.
  * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
  *	This must check whether the specified table entry is used by RFS
  *	and that rps_may_expire_flow() returns true for it.
@@ -1306,8 +1306,6 @@ struct efx_nic_type {
 				 enum efx_filter_priority priority,
 				 u32 *buf, u32 size);
 #ifdef CONFIG_RFS_ACCEL
-	s32 (*filter_rfs_insert)(struct efx_nic *efx,
-				 struct efx_filter_spec *spec);
 	bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id,
 				      unsigned int index);
 #endif
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index d080a414e8f2..ac59afad6e5c 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -601,8 +601,6 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 				enum efx_filter_priority priority, u32 *buf,
 				u32 size);
 #ifdef CONFIG_RFS_ACCEL
-s32 efx_farch_filter_rfs_insert(struct efx_nic *efx,
-				struct efx_filter_spec *spec);
 bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 				     unsigned int index);
 #endif
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index cfe76aad79ee..95682831484e 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -827,14 +827,67 @@ MODULE_PARM_DESC(rx_refill_threshold,
 
 #ifdef CONFIG_RFS_ACCEL
 
+/**
+ * struct efx_async_filter_insertion - Request to asynchronously insert a filter
+ * @net_dev: Reference to the netdevice
+ * @spec: The filter to insert
+ * @work: Workitem for this request
+ * @rxq_index: Identifies the channel for which this request was made
+ * @flow_id: Identifies the kernel-side flow for which this request was made
+ */
+struct efx_async_filter_insertion {
+	struct net_device *net_dev;
+	struct efx_filter_spec spec;
+	struct work_struct work;
+	u16 rxq_index;
+	u32 flow_id;
+};
+
+static void efx_filter_rfs_work(struct work_struct *data)
+{
+	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
+							      work);
+	struct efx_nic *efx = netdev_priv(req->net_dev);
+	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
+	int rc;
+
+	rc = efx->type->filter_insert(efx, &req->spec, false);
+	if (rc >= 0) {
+		/* Remember this so we can check whether to expire the filter
+		 * later.
+		 */
+		mutex_lock(&efx->rps_mutex);
+		channel->rps_flow_id[rc] = req->flow_id;
+		++channel->rfs_filters_added;
+		mutex_unlock(&efx->rps_mutex);
+
+		if (req->spec.ether_type == htons(ETH_P_IP))
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc);
+		else
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc);
+	}
+
+	/* Release references */
+	dev_put(req->net_dev);
+	kfree(req);
+}
+
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 		   u16 rxq_index, u32 flow_id)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	struct efx_channel *channel;
-	struct efx_filter_spec spec;
+	struct efx_async_filter_insertion *req;
 	struct flow_keys fk;
-	int rc;
 
 	if (flow_id == RPS_FLOW_ID_INVALID)
 		return -EINVAL;
@@ -847,50 +900,39 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
 		return -EPROTONOSUPPORT;
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT,
+	req = kmalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+
+	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
 			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
 			   rxq_index);
-	spec.match_flags =
+	req->spec.match_flags =
 		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
 		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
 		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
-	spec.ether_type = fk.basic.n_proto;
-	spec.ip_proto = fk.basic.ip_proto;
+	req->spec.ether_type = fk.basic.n_proto;
+	req->spec.ip_proto = fk.basic.ip_proto;
 
 	if (fk.basic.n_proto == htons(ETH_P_IP)) {
-		spec.rem_host[0] = fk.addrs.v4addrs.src;
-		spec.loc_host[0] = fk.addrs.v4addrs.dst;
+		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
+		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
 	} else {
-		memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr));
-		memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr));
+		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
 	}
 
-	spec.rem_port = fk.ports.src;
-	spec.loc_port = fk.ports.dst;
+	req->spec.rem_port = fk.ports.src;
+	req->spec.loc_port = fk.ports.dst;
 
-	rc = efx->type->filter_rfs_insert(efx, &spec);
-	if (rc < 0)
-		return rc;
-
-	/* Remember this so we can check whether to expire the filter later */
-	channel = efx_get_channel(efx, rxq_index);
-	channel->rps_flow_id[rc] = flow_id;
-	++channel->rfs_filters_added;
-
-	if (spec.ether_type == htons(ETH_P_IP))
-		netif_info(efx, rx_status, efx->net_dev,
-			   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
-			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
-			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
-	else
-		netif_info(efx, rx_status, efx->net_dev,
-			   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
-			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
-			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
-
-	return rc;
+	dev_hold(req->net_dev = net_dev);
+	INIT_WORK(&req->work, efx_filter_rfs_work);
+	req->rxq_index = rxq_index;
+	req->flow_id = flow_id;
+	schedule_work(&req->work);
+	return 0;
 }
 
 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
@@ -899,9 +941,8 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
 	unsigned int channel_idx, index, size;
 	u32 flow_id;
 
-	if (!spin_trylock_bh(&efx->filter_lock))
+	if (!mutex_trylock(&efx->rps_mutex))
 		return false;
-
 	expire_one = efx->type->filter_rfs_expire_one;
 	channel_idx = efx->rps_expire_channel;
 	index = efx->rps_expire_index;
@@ -926,7 +967,7 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)
 	efx->rps_expire_channel = channel_idx;
 	efx->rps_expire_index = index;
 
-	spin_unlock_bh(&efx->filter_lock);
+	mutex_unlock(&efx->rps_mutex);
 	return true;
 }
 
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 18aab25234ba..65161f68265a 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -1035,7 +1035,6 @@ const struct efx_nic_type siena_a0_nic_type = {
 	.filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
 	.filter_get_rx_ids = efx_farch_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_farch_filter_rfs_insert,
 	.filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
 #endif
 #ifdef CONFIG_SFC_MTD

From c2bebe37c6b686817f795b6b63599ed4472775fa Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 27 Mar 2018 17:42:28 +0100
Subject: [PATCH 1344/1678] sfc: give ef10 its own rwsem in the filter table
 instead of filter_lock

efx->filter_lock remains in place for use on farch, but EF10 now ignores it.
EFX_EF10_FILTER_FLAG_BUSY is no longer needed, hence it is removed.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 363 +++++++++++---------------
 drivers/net/ethernet/sfc/efx.c        |   1 -
 drivers/net/ethernet/sfc/net_driver.h |   2 +-
 3 files changed, 152 insertions(+), 214 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index bcbaba330fb5..9db1b9144e70 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -96,17 +96,15 @@ struct efx_ef10_filter_table {
 		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
 	unsigned int rx_match_count;
 
+	struct rw_semaphore lock; /* Protects entries */
 	struct {
 		unsigned long spec;	/* pointer to spec plus flag bits */
-/* BUSY flag indicates that an update is in progress.  AUTO_OLD is
- * used to mark and sweep MAC filters for the device address lists.
- */
-#define EFX_EF10_FILTER_FLAG_BUSY	1UL
+/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */
+/* unused flag	1UL */
 #define EFX_EF10_FILTER_FLAG_AUTO_OLD	2UL
 #define EFX_EF10_FILTER_FLAGS		3UL
 		u64 handle;		/* firmware handle */
 	} *entry;
-	wait_queue_head_t waitq;
 /* Shadow of net_device address lists, guarded by mac_lock */
 	struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
 	struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
@@ -4302,26 +4300,33 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 				  struct efx_filter_spec *spec,
 				  bool replace_equal)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+	struct efx_ef10_filter_table *table;
 	struct efx_filter_spec *saved_spec;
 	struct efx_rss_context *ctx = NULL;
 	unsigned int match_pri, hash;
 	unsigned int priv_flags;
 	bool replacing = false;
+	unsigned int depth, i;
 	int ins_index = -1;
 	DEFINE_WAIT(wait);
 	bool is_mc_recip;
 	s32 rc;
 
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+
 	/* For now, only support RX filters */
 	if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
-	    EFX_FILTER_FLAG_RX)
-		return -EINVAL;
+	    EFX_FILTER_FLAG_RX) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
 
 	rc = efx_ef10_filter_pri(table, spec);
 	if (rc < 0)
-		return rc;
+		goto out_unlock;
 	match_pri = rc;
 
 	hash = efx_ef10_filter_hash(spec);
@@ -4335,86 +4340,64 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 							 &efx->rss_context.list);
 		else
 			ctx = &efx->rss_context;
-		if (!ctx)
-			return -ENOENT;
-		if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
-			return -EOPNOTSUPP;
+		if (!ctx) {
+			rc = -ENOENT;
+			goto out_unlock;
+		}
+		if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+			rc = -EOPNOTSUPP;
+			goto out_unlock;
+		}
 	}
 
 	/* Find any existing filters with the same match tuple or
-	 * else a free slot to insert at.  If any of them are busy,
-	 * we have to wait and retry.
+	 * else a free slot to insert at.
 	 */
-	for (;;) {
-		unsigned int depth = 1;
-		unsigned int i;
+	for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
+		i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
+		saved_spec = efx_ef10_filter_entry_spec(table, i);
 
-		spin_lock_bh(&efx->filter_lock);
-
-		for (;;) {
-			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-			saved_spec = efx_ef10_filter_entry_spec(table, i);
-
-			if (!saved_spec) {
+		if (!saved_spec) {
+			if (ins_index < 0)
+				ins_index = i;
+		} else if (efx_ef10_filter_equal(spec, saved_spec)) {
+			if (spec->priority < saved_spec->priority &&
+			    spec->priority != EFX_FILTER_PRI_AUTO) {
+				rc = -EPERM;
+				goto out_unlock;
+			}
+			if (!is_mc_recip) {
+				/* This is the only one */
+				if (spec->priority ==
+				    saved_spec->priority &&
+				    !replace_equal) {
+					rc = -EEXIST;
+					goto out_unlock;
+				}
+				ins_index = i;
+				break;
+			} else if (spec->priority >
+				   saved_spec->priority ||
+				   (spec->priority ==
+				    saved_spec->priority &&
+				    replace_equal)) {
 				if (ins_index < 0)
 					ins_index = i;
-			} else if (efx_ef10_filter_equal(spec, saved_spec)) {
-				if (table->entry[i].spec &
-				    EFX_EF10_FILTER_FLAG_BUSY)
-					break;
-				if (spec->priority < saved_spec->priority &&
-				    spec->priority != EFX_FILTER_PRI_AUTO) {
-					rc = -EPERM;
-					goto out_unlock;
-				}
-				if (!is_mc_recip) {
-					/* This is the only one */
-					if (spec->priority ==
-					    saved_spec->priority &&
-					    !replace_equal) {
-						rc = -EEXIST;
-						goto out_unlock;
-					}
-					ins_index = i;
-					goto found;
-				} else if (spec->priority >
-					   saved_spec->priority ||
-					   (spec->priority ==
-					    saved_spec->priority &&
-					    replace_equal)) {
-					if (ins_index < 0)
-						ins_index = i;
-					else
-						__set_bit(depth, mc_rem_map);
-				}
+				else
+					__set_bit(depth, mc_rem_map);
 			}
-
-			/* Once we reach the maximum search depth, use
-			 * the first suitable slot or return -EBUSY if
-			 * there was none
-			 */
-			if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) {
-				if (ins_index < 0) {
-					rc = -EBUSY;
-					goto out_unlock;
-				}
-				goto found;
-			}
-
-			++depth;
 		}
-
-		prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_bh(&efx->filter_lock);
-		schedule();
 	}
 
-found:
-	/* Create a software table entry if necessary, and mark it
-	 * busy.  We might yet fail to insert, but any attempt to
-	 * insert a conflicting filter while we're waiting for the
-	 * firmware must find the busy entry.
+	/* Once we reach the maximum search depth, use the first suitable
+	 * slot, or return -EBUSY if there was none
 	 */
+	if (ins_index < 0) {
+		rc = -EBUSY;
+		goto out_unlock;
+	}
+
+	/* Create a software table entry if necessary. */
 	saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
 	if (saved_spec) {
 		if (spec->priority == EFX_FILTER_PRI_AUTO &&
@@ -4438,28 +4421,13 @@ found:
 		*saved_spec = *spec;
 		priv_flags = 0;
 	}
-	efx_ef10_filter_set_entry(table, ins_index, saved_spec,
-				  priv_flags | EFX_EF10_FILTER_FLAG_BUSY);
-
-	/* Mark lower-priority multicast recipients busy prior to removal */
-	if (is_mc_recip) {
-		unsigned int depth, i;
-
-		for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
-			i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
-			if (test_bit(depth, mc_rem_map))
-				table->entry[i].spec |=
-					EFX_EF10_FILTER_FLAG_BUSY;
-		}
-	}
-
-	spin_unlock_bh(&efx->filter_lock);
+	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
 
+	/* Actually insert the filter on the HW */
 	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
 				  ctx, replacing);
 
 	/* Finalise the software table entry */
-	spin_lock_bh(&efx->filter_lock);
 	if (rc == 0) {
 		if (replacing) {
 			/* Update the fields that may differ */
@@ -4475,6 +4443,12 @@ found:
 	} else if (!replacing) {
 		kfree(saved_spec);
 		saved_spec = NULL;
+	} else {
+		/* We failed to replace, so the old filter is still present.
+		 * Roll back the software table to reflect this.  In fact the
+		 * efx_ef10_filter_set_entry() call below will do the right
+		 * thing, so nothing extra is needed here.
+		 */
 	}
 	efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
 
@@ -4496,7 +4470,6 @@ found:
 			priv_flags = efx_ef10_filter_entry_flags(table, i);
 
 			if (rc == 0) {
-				spin_unlock_bh(&efx->filter_lock);
 				MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
 					       MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
 				MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
@@ -4504,15 +4477,12 @@ found:
 				rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
 						  inbuf, sizeof(inbuf),
 						  NULL, 0, NULL);
-				spin_lock_bh(&efx->filter_lock);
 			}
 
 			if (rc == 0) {
 				kfree(saved_spec);
 				saved_spec = NULL;
 				priv_flags = 0;
-			} else {
-				priv_flags &= ~EFX_EF10_FILTER_FLAG_BUSY;
 			}
 			efx_ef10_filter_set_entry(table, i, saved_spec,
 						  priv_flags);
@@ -4523,10 +4493,9 @@ found:
 	if (rc == 0)
 		rc = efx_ef10_make_filter_id(match_pri, ins_index);
 
-	wake_up_all(&table->waitq);
 out_unlock:
-	spin_unlock_bh(&efx->filter_lock);
-	finish_wait(&table->waitq, &wait);
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
 	return rc;
 }
 
@@ -4539,6 +4508,8 @@ static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx)
  * If !by_index, remove by ID
  * If by_index, remove by index
  * Filter ID may come from userland and must be range-checked.
+ * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
+ * for write.
  */
 static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 					   unsigned int priority_mask,
@@ -4553,45 +4524,23 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 	DEFINE_WAIT(wait);
 	int rc;
 
-	/* Find the software table entry and mark it busy.  Don't
-	 * remove it yet; any attempt to update while we're waiting
-	 * for the firmware must find the busy entry.
-	 */
-	for (;;) {
-		spin_lock_bh(&efx->filter_lock);
-		if (!(table->entry[filter_idx].spec &
-		      EFX_EF10_FILTER_FLAG_BUSY))
-			break;
-		prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_bh(&efx->filter_lock);
-		schedule();
-	}
-
 	spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (!spec ||
 	    (!by_index &&
 	     efx_ef10_filter_pri(table, spec) !=
-	     efx_ef10_filter_get_unsafe_pri(filter_id))) {
-		rc = -ENOENT;
-		goto out_unlock;
-	}
+	     efx_ef10_filter_get_unsafe_pri(filter_id)))
+		return -ENOENT;
 
 	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
 	    priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
 		/* Just remove flags */
 		spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
 		table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
-		rc = 0;
-		goto out_unlock;
+		return 0;
 	}
 
-	if (!(priority_mask & (1U << spec->priority))) {
-		rc = -ENOENT;
-		goto out_unlock;
-	}
-
-	table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
-	spin_unlock_bh(&efx->filter_lock);
+	if (!(priority_mask & (1U << spec->priority)))
+		return -ENOENT;
 
 	if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
 		/* Reset to an automatic filter */
@@ -4609,7 +4558,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 					  &efx->rss_context,
 					  true);
 
-		spin_lock_bh(&efx->filter_lock);
 		if (rc == 0)
 			*spec = new_spec;
 	} else {
@@ -4624,7 +4572,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
 					inbuf, sizeof(inbuf), NULL, 0, NULL);
 
-		spin_lock_bh(&efx->filter_lock);
 		if ((rc == 0) || (rc == -ENOENT)) {
 			/* Filter removed OK or didn't actually exist */
 			kfree(spec);
@@ -4636,11 +4583,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
 		}
 	}
 
-	table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY;
-	wake_up_all(&table->waitq);
-out_unlock:
-	spin_unlock_bh(&efx->filter_lock);
-	finish_wait(&table->waitq, &wait);
 	return rc;
 }
 
@@ -4648,17 +4590,33 @@ static int efx_ef10_filter_remove_safe(struct efx_nic *efx,
 				       enum efx_filter_priority priority,
 				       u32 filter_id)
 {
-	return efx_ef10_filter_remove_internal(efx, 1U << priority,
-					       filter_id, false);
+	struct efx_ef10_filter_table *table;
+	int rc;
+
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
+	rc = efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
+					     false);
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
 }
 
+/* Caller must hold efx->filter_sem for read */
 static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx,
 					  enum efx_filter_priority priority,
 					  u32 filter_id)
 {
+	struct efx_ef10_filter_table *table = efx->filter_state;
+
 	if (filter_id == EFX_EF10_FILTER_ID_INVALID)
 		return;
-	efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, true);
+
+	down_write(&table->lock);
+	efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
+					true);
+	up_write(&table->lock);
 }
 
 static int efx_ef10_filter_get_safe(struct efx_nic *efx,
@@ -4666,11 +4624,13 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx,
 				    u32 filter_id, struct efx_filter_spec *spec)
 {
 	unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id);
-	struct efx_ef10_filter_table *table = efx->filter_state;
 	const struct efx_filter_spec *saved_spec;
+	struct efx_ef10_filter_table *table;
 	int rc;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
 	saved_spec = efx_ef10_filter_entry_spec(table, filter_idx);
 	if (saved_spec && saved_spec->priority == priority &&
 	    efx_ef10_filter_pri(table, saved_spec) ==
@@ -4680,13 +4640,15 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx,
 	} else {
 		rc = -ENOENT;
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
 	return rc;
 }
 
 static int efx_ef10_filter_clear_rx(struct efx_nic *efx,
-				     enum efx_filter_priority priority)
+				    enum efx_filter_priority priority)
 {
+	struct efx_ef10_filter_table *table;
 	unsigned int priority_mask;
 	unsigned int i;
 	int rc;
@@ -4694,31 +4656,40 @@ static int efx_ef10_filter_clear_rx(struct efx_nic *efx,
 	priority_mask = (((1U << (priority + 1)) - 1) &
 			 ~(1U << EFX_FILTER_PRI_AUTO));
 
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
 	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
 		rc = efx_ef10_filter_remove_internal(efx, priority_mask,
 						     i, true);
 		if (rc && rc != -ENOENT)
-			return rc;
+			break;
+		rc = 0;
 	}
 
-	return 0;
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
+	return rc;
 }
 
 static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx,
 					 enum efx_filter_priority priority)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_table *table;
 	unsigned int filter_idx;
 	s32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		if (table->entry[filter_idx].spec &&
 		    efx_ef10_filter_entry_spec(table, filter_idx)->priority ==
 		    priority)
 			++count;
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
 	return count;
 }
 
@@ -4733,12 +4704,15 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 				      enum efx_filter_priority priority,
 				      u32 *buf, u32 size)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_table *table;
 	struct efx_filter_spec *spec;
 	unsigned int filter_idx;
 	s32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_read(&table->lock);
+
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
 		if (spec && spec->priority == priority) {
@@ -4752,73 +4726,44 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
 					filter_idx);
 		}
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&table->lock);
+	up_read(&efx->filter_sem);
 	return count;
 }
 
 #ifdef CONFIG_RFS_ACCEL
 
-static void
-efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
-				    unsigned long filter_idx,
-				    int rc, efx_dword_t *outbuf,
-				    size_t outlen_actual);
-
 static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 					   unsigned int filter_idx)
 {
-	struct efx_ef10_filter_table *table = efx->filter_state;
+	struct efx_ef10_filter_table *table;
 	struct efx_filter_spec *spec;
-	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_FILTER_OP_IN_HANDLE_OFST +
-			 MC_CMD_FILTER_OP_IN_HANDLE_LEN);
-	bool ret = true;
+	bool ret;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&efx->filter_sem);
+	table = efx->filter_state;
+	down_write(&table->lock);
 	spec = efx_ef10_filter_entry_spec(table, filter_idx);
-	if (!spec ||
-	    (table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) ||
-	    spec->priority != EFX_FILTER_PRI_HINT ||
-	    !rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
+
+	if (!spec || spec->priority != EFX_FILTER_PRI_HINT) {
+		ret = true;
+		goto out_unlock;
+	}
+
+	if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id,
 				 flow_id, filter_idx)) {
 		ret = false;
 		goto out_unlock;
 	}
 
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
-		       MC_CMD_FILTER_OP_IN_OP_REMOVE);
-	MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
-		       table->entry[filter_idx].handle);
-	if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0,
-			       efx_ef10_filter_rfs_expire_complete, filter_idx))
-		ret = false;
-	else
-		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
+	ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
+					      filter_idx, true) == 0;
 out_unlock:
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&table->lock);
+	up_read(&efx->filter_sem);
 	return ret;
 }
 
-static void
-efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx,
-				    unsigned long filter_idx,
-				    int rc, efx_dword_t *outbuf,
-				    size_t outlen_actual)
-{
-	struct efx_ef10_filter_table *table = efx->filter_state;
-	struct efx_filter_spec *spec =
-		efx_ef10_filter_entry_spec(table, filter_idx);
-
-	spin_lock_bh(&efx->filter_lock);
-	if (rc == 0) {
-		kfree(spec);
-		efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
-	}
-	table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY;
-	wake_up_all(&table->waitq);
-	spin_unlock_bh(&efx->filter_lock);
-}
-
 #endif /* CONFIG_RFS_ACCEL */
 
 static int efx_ef10_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags)
@@ -5011,9 +4956,9 @@ static int efx_ef10_filter_table_probe(struct efx_nic *efx)
 	table->vlan_filter =
 		!!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
 	INIT_LIST_HEAD(&table->vlan_list);
+	init_rwsem(&table->lock);
 
 	efx->filter_state = table;
-	init_waitqueue_head(&table->waitq);
 
 	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
 		rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
@@ -5055,7 +5000,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 	if (!table)
 		return;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&table->lock);
 
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
@@ -5093,15 +5038,11 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 			}
 		}
 
-		table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY;
-		spin_unlock_bh(&efx->filter_lock);
-
 		rc = efx_ef10_filter_push(efx, spec,
 					  &table->entry[filter_idx].handle,
 					  ctx, false);
 		if (rc)
 			failed++;
-		spin_lock_bh(&efx->filter_lock);
 
 		if (rc) {
 not_restored:
@@ -5113,13 +5054,10 @@ not_restored:
 
 			kfree(spec);
 			efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
-		} else {
-			table->entry[filter_idx].spec &=
-				~EFX_EF10_FILTER_FLAG_BUSY;
 		}
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&table->lock);
 
 	/* This can happen validly if the MC's capabilities have changed, so
 	 * is not an error.
@@ -5187,6 +5125,8 @@ static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	unsigned int filter_idx;
 
+	efx_rwsem_assert_write_locked(&table->lock);
+
 	if (*id != EFX_EF10_FILTER_ID_INVALID) {
 		filter_idx = efx_ef10_filter_get_unsafe_id(*id);
 		if (!table->entry[filter_idx].spec)
@@ -5222,10 +5162,10 @@ static void efx_ef10_filter_mark_old(struct efx_nic *efx)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct efx_ef10_filter_vlan *vlan;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&table->lock);
 	list_for_each_entry(vlan, &table->vlan_list, list)
 		_efx_ef10_filter_vlan_mark_old(efx, vlan);
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&table->lock);
 }
 
 static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
@@ -5502,10 +5442,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx,
 	return rc;
 }
 
-/* Remove filters that weren't renewed.  Since nothing else changes the AUTO_OLD
- * flag or removes these filters, we don't need to hold the filter_lock while
- * scanning for these filters.
- */
+/* Remove filters that weren't renewed. */
 static void efx_ef10_filter_remove_old(struct efx_nic *efx)
 {
 	struct efx_ef10_filter_table *table = efx->filter_state;
@@ -5514,6 +5451,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx)
 	int rc;
 	int i;
 
+	down_write(&table->lock);
 	for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
 		if (READ_ONCE(table->entry[i].spec) &
 		    EFX_EF10_FILTER_FLAG_AUTO_OLD) {
@@ -5525,6 +5463,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx)
 				remove_failed++;
 		}
 	}
+	up_write(&table->lock);
 
 	if (remove_failed)
 		netif_info(efx, drv, efx->net_dev,
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 0be93abdb2ad..cb9273016916 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1783,7 +1783,6 @@ static int efx_probe_filters(struct efx_nic *efx)
 {
 	int rc;
 
-	spin_lock_init(&efx->filter_lock);
 	init_rwsem(&efx->filter_sem);
 	mutex_lock(&efx->mac_lock);
 	down_write(&efx->filter_sem);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index dad78e3dde70..e2901dda4e32 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -843,7 +843,7 @@ struct efx_rss_context {
  * @loopback_mode: Loopback status
  * @loopback_modes: Supported loopback mode bitmask
  * @loopback_selftest: Offline self-test private state
- * @filter_sem: Filter table rw_semaphore, for freeing the table
+ * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
  * @filter_lock: Filter table lock, for mere content changes
  * @filter_state: Architecture-dependent filter table state
  * @rps_mutex: Protects RPS state of all channels

From fc7a6c287ff395eb64745292b4d398e64152cfb6 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 27 Mar 2018 17:42:57 +0100
Subject: [PATCH 1345/1678] sfc: use a semaphore to lock farch filters too

With this change, the spinlock efx->filter_lock is no longer used and is
 thus removed.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/farch.c      | 67 +++++++++++++--------------
 drivers/net/ethernet/sfc/net_driver.h |  2 -
 2 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 6b5ca569db5e..4a19c7efdf8d 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1878,6 +1878,7 @@ struct efx_farch_filter_table {
 };
 
 struct efx_farch_filter_state {
+	struct rw_semaphore lock; /* Protects table contents */
 	struct efx_farch_filter_table table[EFX_FARCH_FILTER_TABLE_COUNT];
 };
 
@@ -2397,9 +2398,13 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 	if (rc)
 		return rc;
 
+	down_write(&state->lock);
+
 	table = &state->table[efx_farch_filter_spec_table_id(&spec)];
-	if (table->size == 0)
-		return -EINVAL;
+	if (table->size == 0) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
 
 	netif_vdbg(efx, hw, efx->net_dev,
 		   "%s: type %d search_limit=%d", __func__, spec.type,
@@ -2412,8 +2417,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 			     EFX_FARCH_FILTER_MC_DEF - EFX_FARCH_FILTER_UC_DEF);
 		rep_index = spec.type - EFX_FARCH_FILTER_UC_DEF;
 		ins_index = rep_index;
-
-		spin_lock_bh(&efx->filter_lock);
 	} else {
 		/* Search concurrently for
 		 * (1) a filter to be replaced (rep_index): any filter
@@ -2443,8 +2446,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 		ins_index = -1;
 		depth = 1;
 
-		spin_lock_bh(&efx->filter_lock);
-
 		for (;;) {
 			if (!test_bit(i, table->used_bitmap)) {
 				if (ins_index < 0)
@@ -2463,7 +2464,7 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 				/* Case (b) */
 				if (ins_index < 0) {
 					rc = -EBUSY;
-					goto out;
+					goto out_unlock;
 				}
 				rep_index = -1;
 				break;
@@ -2483,11 +2484,11 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 
 		if (spec.priority == saved_spec->priority && !replace_equal) {
 			rc = -EEXIST;
-			goto out;
+			goto out_unlock;
 		}
 		if (spec.priority < saved_spec->priority) {
 			rc = -EPERM;
-			goto out;
+			goto out_unlock;
 		}
 		if (saved_spec->priority == EFX_FILTER_PRI_AUTO ||
 		    saved_spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO)
@@ -2528,8 +2529,8 @@ s32 efx_farch_filter_insert(struct efx_nic *efx,
 		   __func__, spec.type, ins_index, spec.dmaq_id);
 	rc = efx_farch_filter_make_id(&spec, ins_index);
 
-out:
-	spin_unlock_bh(&efx->filter_lock);
+out_unlock:
+	up_write(&state->lock);
 	return rc;
 }
 
@@ -2604,11 +2605,11 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx,
 	filter_idx = efx_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
 		return -ENOENT;
+	down_write(&state->lock);
 	spec = &table->spec[filter_idx];
 
-	spin_lock_bh(&efx->filter_lock);
 	rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 
 	return rc;
 }
@@ -2622,30 +2623,28 @@ int efx_farch_filter_get_safe(struct efx_nic *efx,
 	struct efx_farch_filter_table *table;
 	struct efx_farch_filter_spec *spec;
 	unsigned int filter_idx;
-	int rc;
+	int rc = -ENOENT;
+
+	down_read(&state->lock);
 
 	table_id = efx_farch_filter_id_table_id(filter_id);
 	if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
-		return -ENOENT;
+		goto out_unlock;
 	table = &state->table[table_id];
 
 	filter_idx = efx_farch_filter_id_index(filter_id);
 	if (filter_idx >= table->size)
-		return -ENOENT;
+		goto out_unlock;
 	spec = &table->spec[filter_idx];
 
-	spin_lock_bh(&efx->filter_lock);
-
 	if (test_bit(filter_idx, table->used_bitmap) &&
 	    spec->priority == priority) {
 		efx_farch_filter_to_gen_spec(spec_buf, spec);
 		rc = 0;
-	} else {
-		rc = -ENOENT;
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
-
+out_unlock:
+	up_read(&state->lock);
 	return rc;
 }
 
@@ -2658,13 +2657,13 @@ efx_farch_filter_table_clear(struct efx_nic *efx,
 	struct efx_farch_filter_table *table = &state->table[table_id];
 	unsigned int filter_idx;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 	for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
 		if (table->spec[filter_idx].priority != EFX_FILTER_PRI_AUTO)
 			efx_farch_filter_remove(efx, table,
 						filter_idx, priority);
 	}
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }
 
 int efx_farch_filter_clear_rx(struct efx_nic *efx,
@@ -2688,7 +2687,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
 	unsigned int filter_idx;
 	u32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&state->lock);
 
 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2701,7 +2700,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
 		}
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&state->lock);
 
 	return count;
 }
@@ -2716,7 +2715,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 	unsigned int filter_idx;
 	s32 count = 0;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_read(&state->lock);
 
 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2735,7 +2734,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
 		}
 	}
 out:
-	spin_unlock_bh(&efx->filter_lock);
+	up_read(&state->lock);
 
 	return count;
 }
@@ -2749,7 +2748,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx)
 	efx_oword_t filter;
 	unsigned int filter_idx;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 
 	for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
 		table = &state->table[table_id];
@@ -2770,7 +2769,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx)
 	efx_farch_filter_push_rx_config(efx);
 	efx_farch_filter_push_tx_limits(efx);
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }
 
 void efx_farch_filter_table_remove(struct efx_nic *efx)
@@ -2864,7 +2863,7 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
 	efx_oword_t filter;
 	unsigned int filter_idx;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 
 	for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
 	     table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
@@ -2896,7 +2895,7 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
 
 	efx_farch_filter_push_rx_config(efx);
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 }
 
 #ifdef CONFIG_RFS_ACCEL
@@ -2908,7 +2907,7 @@ bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 	struct efx_farch_filter_table *table;
 	bool ret = false;
 
-	spin_lock_bh(&efx->filter_lock);
+	down_write(&state->lock);
 	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
 	if (test_bit(index, table->used_bitmap) &&
 	    table->spec[index].priority == EFX_FILTER_PRI_HINT &&
@@ -2918,7 +2917,7 @@ bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
 		ret = true;
 	}
 
-	spin_unlock_bh(&efx->filter_lock);
+	up_write(&state->lock);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index e2901dda4e32..92ee55d84b7d 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -844,7 +844,6 @@ struct efx_rss_context {
  * @loopback_modes: Supported loopback mode bitmask
  * @loopback_selftest: Offline self-test private state
  * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state
- * @filter_lock: Filter table lock, for mere content changes
  * @filter_state: Architecture-dependent filter table state
  * @rps_mutex: Protects RPS state of all channels
  * @rps_expire_channel: Next channel to check for expiry
@@ -998,7 +997,6 @@ struct efx_nic {
 	void *loopback_selftest;
 
 	struct rw_semaphore filter_sem;
-	spinlock_t filter_lock;
 	void *filter_state;
 #ifdef CONFIG_RFS_ACCEL
 	struct mutex rps_mutex;

From 31b842955211f427a099f78a6a63d18a6bbc8d55 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 27 Mar 2018 17:44:21 +0100
Subject: [PATCH 1346/1678] sfc: return a better error if filter insertion
 collides with MC reboot

If some other operation gets the MCDI lock ahead of us and performs an MC
 reboot, then our attempt to insert the filter will fail with EINVAL,
 because the destination VI (spec->dmaq_id, MC_CMD_FILTER_OP_IN_RX_QUEUE) does
 not exist.  But the caller's request (which might e.g. be an ethtool ntuple
 request from userland) isn't invalid, it just got unlucky; so return EAGAIN.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 9db1b9144e70..9a139c390037 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4301,6 +4301,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 				  bool replace_equal)
 {
 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	struct efx_ef10_filter_table *table;
 	struct efx_filter_spec *saved_spec;
 	struct efx_rss_context *ctx = NULL;
@@ -4427,6 +4428,12 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
 				  ctx, replacing);
 
+	if (rc == -EINVAL && nic_data->must_realloc_vis)
+		/* The MC rebooted under us, causing it to reject our filter
+		 * insertion as pointing to an invalid VI (spec->dmaq_id).
+		 */
+		rc = -EAGAIN;
+
 	/* Finalise the software table entry */
 	if (rc == 0) {
 		if (replacing) {

From e0a65e3c5e3b7b11ec9320524b8fcc210f2026e9 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 27 Mar 2018 17:44:36 +0100
Subject: [PATCH 1347/1678] sfc: protect list of RSS contexts under a mutex

Otherwise races are possible between ethtool ops and
 efx_ef10_rx_restore_rss_contexts().
Also, don't try to perform the restore on every reset, only after an MC
 reboot, otherwise we'll leak RSS contexts on the NIC.

Fixes: 42356d9a137b ("sfc: support RSS spreading of ethtool ntuple filters")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 32 +++++++++++--
 drivers/net/ethernet/sfc/efx.c        | 14 +++++-
 drivers/net/ethernet/sfc/efx.h        |  4 +-
 drivers/net/ethernet/sfc/ethtool.c    | 66 +++++++++++++++++----------
 drivers/net/ethernet/sfc/net_driver.h |  2 +
 drivers/net/ethernet/sfc/nic.h        |  3 ++
 6 files changed, 89 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 9a139c390037..c4c45c94da77 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1499,6 +1499,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
 
 	/* All our allocations have been reset */
 	nic_data->must_realloc_vis = true;
+	nic_data->must_restore_rss_contexts = true;
 	nic_data->must_restore_filters = true;
 	nic_data->must_restore_piobufs = true;
 	efx_ef10_forget_old_piobufs(efx);
@@ -2899,6 +2900,8 @@ static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
 {
 	int rc;
 
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
 		rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
 		if (rc)
@@ -2929,6 +2932,8 @@ static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
 	size_t outlen;
 	int rc, i;
 
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
 		     MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
 
@@ -2972,14 +2977,25 @@ static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
 
 static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
 {
-	return efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
+	int rc;
+
+	mutex_lock(&efx->rss_lock);
+	rc = efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
+	mutex_unlock(&efx->rss_lock);
+	return rc;
 }
 
 static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
 {
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	struct efx_rss_context *ctx;
 	int rc;
 
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+	if (!nic_data->must_restore_rss_contexts)
+		return;
+
 	list_for_each_entry(ctx, &efx->rss_context.list, list) {
 		/* previous NIC RSS context is gone */
 		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
@@ -2993,6 +3009,7 @@ static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
 				   "; RSS filters may fail to be applied\n",
 				   ctx->user_id, rc);
 	}
+	nic_data->must_restore_rss_contexts = false;
 }
 
 static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
@@ -4307,6 +4324,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 	struct efx_rss_context *ctx = NULL;
 	unsigned int match_pri, hash;
 	unsigned int priv_flags;
+	bool rss_locked = false;
 	bool replacing = false;
 	unsigned int depth, i;
 	int ins_index = -1;
@@ -4336,9 +4354,10 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 		bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
 
 	if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+		mutex_lock(&efx->rss_lock);
+		rss_locked = true;
 		if (spec->rss_context)
-			ctx = efx_find_rss_context_entry(spec->rss_context,
-							 &efx->rss_context.list);
+			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
 		else
 			ctx = &efx->rss_context;
 		if (!ctx) {
@@ -4501,6 +4520,8 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx,
 		rc = efx_ef10_make_filter_id(match_pri, ins_index);
 
 out_unlock:
+	if (rss_locked)
+		mutex_unlock(&efx->rss_lock);
 	up_write(&table->lock);
 	up_read(&efx->filter_sem);
 	return rc;
@@ -5008,6 +5029,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 		return;
 
 	down_write(&table->lock);
+	mutex_lock(&efx->rss_lock);
 
 	for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
 		spec = efx_ef10_filter_entry_spec(table, filter_idx);
@@ -5024,8 +5046,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
 			goto not_restored;
 		}
 		if (spec->rss_context)
-			ctx = efx_find_rss_context_entry(spec->rss_context,
-							 &efx->rss_context.list);
+			ctx = efx_find_rss_context_entry(efx, spec->rss_context);
 		else
 			ctx = &efx->rss_context;
 		if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
@@ -5064,6 +5085,7 @@ not_restored:
 		}
 	}
 
+	mutex_unlock(&efx->rss_lock);
 	up_write(&table->lock);
 
 	/* This can happen validly if the MC's capabilities have changed, so
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index cb9273016916..692dd729ee2a 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2657,6 +2657,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method)
 	efx_disable_interrupts(efx);
 
 	mutex_lock(&efx->mac_lock);
+	mutex_lock(&efx->rss_lock);
 	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
 	    method != RESET_TYPE_DATAPATH)
 		efx->phy_op->fini(efx);
@@ -2712,6 +2713,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 
 	if (efx->type->rx_restore_rss_contexts)
 		efx->type->rx_restore_rss_contexts(efx);
+	mutex_unlock(&efx->rss_lock);
 	down_read(&efx->filter_sem);
 	efx_restore_filters(efx);
 	up_read(&efx->filter_sem);
@@ -2730,6 +2732,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
 fail:
 	efx->port_initialized = false;
 
+	mutex_unlock(&efx->rss_lock);
 	mutex_unlock(&efx->mac_lock);
 
 	return rc;
@@ -3016,6 +3019,7 @@ static int efx_init_struct(struct efx_nic *efx,
 	efx->rx_packet_ts_offset =
 		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
 	INIT_LIST_HEAD(&efx->rss_context.list);
+	mutex_init(&efx->rss_lock);
 	spin_lock_init(&efx->stats_lock);
 	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
 	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
@@ -3091,11 +3095,14 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
 /* RSS contexts.  We're using linked lists and crappy O(n) algorithms, because
  * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
  */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
 {
+	struct list_head *head = &efx->rss_context.list;
 	struct efx_rss_context *ctx, *new;
 	u32 id = 1; /* Don't use zero, that refers to the master RSS context */
 
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	/* Search for first gap in the numbering */
 	list_for_each_entry(ctx, head, list) {
 		if (ctx->user_id != id)
@@ -3121,10 +3128,13 @@ struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head)
 	return new;
 }
 
-struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *head)
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
 {
+	struct list_head *head = &efx->rss_context.list;
 	struct efx_rss_context *ctx;
 
+	WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
 	list_for_each_entry(ctx, head, list)
 		if (ctx->user_id == id)
 			return ctx;
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 545c2ea1622e..a3140e16fcef 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -187,8 +187,8 @@ static inline void efx_filter_rfs_expire(struct work_struct *data) {}
 bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
 
 /* RSS contexts */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *list);
-struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *list);
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
 void efx_free_rss_context_entry(struct efx_rss_context *ctx);
 static inline bool efx_rss_active(struct efx_rss_context *ctx)
 {
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index bb1c80d48d12..e07c8bc8f4bd 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -979,7 +979,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	u32 rss_context = 0;
-	s32 rc;
+	s32 rc = 0;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
@@ -989,15 +989,17 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 	case ETHTOOL_GRXFH: {
 		struct efx_rss_context *ctx = &efx->rss_context;
 
+		mutex_lock(&efx->rss_lock);
 		if (info->flow_type & FLOW_RSS && info->rss_context) {
-			ctx = efx_find_rss_context_entry(info->rss_context,
-							 &efx->rss_context.list);
-			if (!ctx)
-				return -ENOENT;
+			ctx = efx_find_rss_context_entry(efx, info->rss_context);
+			if (!ctx) {
+				rc = -ENOENT;
+				goto out_unlock;
+			}
 		}
 		info->data = 0;
 		if (!efx_rss_active(ctx)) /* No RSS */
-			return 0;
+			goto out_unlock;
 		switch (info->flow_type & ~FLOW_RSS) {
 		case UDP_V4_FLOW:
 			if (ctx->rx_hash_udp_4tuple)
@@ -1024,7 +1026,9 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		default:
 			break;
 		}
-		return 0;
+out_unlock:
+		mutex_unlock(&efx->rss_lock);
+		return rc;
 	}
 
 	case ETHTOOL_GRXCLSRLCNT:
@@ -1366,16 +1370,20 @@ static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_rss_context *ctx;
-	int rc;
+	int rc = 0;
 
 	if (!efx->type->rx_pull_rss_context_config)
 		return -EOPNOTSUPP;
-	ctx = efx_find_rss_context_entry(rss_context, &efx->rss_context.list);
-	if (!ctx)
-		return -ENOENT;
+
+	mutex_lock(&efx->rss_lock);
+	ctx = efx_find_rss_context_entry(efx, rss_context);
+	if (!ctx) {
+		rc = -ENOENT;
+		goto out_unlock;
+	}
 	rc = efx->type->rx_pull_rss_context_config(efx, ctx);
 	if (rc)
-		return rc;
+		goto out_unlock;
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
@@ -1383,7 +1391,9 @@ static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
 		memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table));
 	if (key)
 		memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size);
-	return 0;
+out_unlock:
+	mutex_unlock(&efx->rss_lock);
+	return rc;
 }
 
 static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
@@ -1401,23 +1411,31 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 	/* Hash function is Toeplitz, cannot be changed */
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
+
+	mutex_lock(&efx->rss_lock);
+
 	if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
-		if (delete)
+		if (delete) {
 			/* alloc + delete == Nothing to do */
-			return -EINVAL;
-		ctx = efx_alloc_rss_context_entry(&efx->rss_context.list);
-		if (!ctx)
-			return -ENOMEM;
+			rc = -EINVAL;
+			goto out_unlock;
+		}
+		ctx = efx_alloc_rss_context_entry(efx);
+		if (!ctx) {
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
 		ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
 		/* Initialise indir table and key to defaults */
 		efx_set_default_rx_indir_table(efx, ctx);
 		netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
 		allocated = true;
 	} else {
-		ctx = efx_find_rss_context_entry(*rss_context,
-						 &efx->rss_context.list);
-		if (!ctx)
-			return -ENOENT;
+		ctx = efx_find_rss_context_entry(efx, *rss_context);
+		if (!ctx) {
+			rc = -ENOENT;
+			goto out_unlock;
+		}
 	}
 
 	if (delete) {
@@ -1425,7 +1443,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 		rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL);
 		if (!rc)
 			efx_free_rss_context_entry(ctx);
-		return rc;
+		goto out_unlock;
 	}
 
 	if (!key)
@@ -1438,6 +1456,8 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
 		efx_free_rss_context_entry(ctx);
 	else
 		*rss_context = ctx->user_id;
+out_unlock:
+	mutex_unlock(&efx->rss_lock);
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 92ee55d84b7d..5e379a83c729 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -796,6 +796,7 @@ struct efx_rss_context {
  * @rx_scatter: Scatter mode enabled for receives
  * @rss_context: Main RSS context.  Its @list member is the head of the list of
  *	RSS contexts created by user requests
+ * @rss_lock: Protects custom RSS context software state in @rss_context.list
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -940,6 +941,7 @@ struct efx_nic {
 	int rx_packet_ts_offset;
 	bool rx_scatter;
 	struct efx_rss_context rss_context;
+	struct mutex rss_lock;
 
 	unsigned int_error_count;
 	unsigned long int_error_expire;
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index ac59afad6e5c..5640034bda10 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -365,6 +365,8 @@ enum {
  * @vi_base: Absolute index of first VI in this function
  * @n_allocated_vis: Number of VIs allocated to this function
  * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
+ * @must_restore_rss_contexts: Flag: RSS contexts have yet to be restored after
+ *	MC reboot
  * @must_restore_filters: Flag: filters have yet to be restored after MC reboot
  * @n_piobufs: Number of PIO buffers allocated to this function
  * @wc_membase: Base address of write-combining mapping of the memory BAR
@@ -407,6 +409,7 @@ struct efx_ef10_nic_data {
 	unsigned int vi_base;
 	unsigned int n_allocated_vis;
 	bool must_realloc_vis;
+	bool must_restore_rss_contexts;
 	bool must_restore_filters;
 	unsigned int n_piobufs;
 	void __iomem *wc_membase, *pio_write_base;

From a8e8fbebde5ded18e94c36220397521021d941ce Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 27 Mar 2018 17:44:51 +0100
Subject: [PATCH 1348/1678] sfc: fix flow type handling for RSS filters

The FLOW_RSS flag was causing us to insert UDP filters when TCP was wanted.

Fixes: 42356d9a137b ("sfc: support RSS spreading of ethtool ntuple filters")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ethtool.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index e07c8bc8f4bd..3143588ffd77 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1088,6 +1088,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
 	struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
 	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+	u32 flow_type = rule->flow_type & ~(FLOW_EXT | FLOW_RSS);
 	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
 	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
 	enum efx_filter_flags flags = 0;
@@ -1121,14 +1122,14 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 	if (rule->flow_type & FLOW_RSS)
 		spec.rss_context = rss_context;
 
-	switch (rule->flow_type & ~(FLOW_EXT | FLOW_RSS)) {
+	switch (flow_type) {
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
 				    EFX_FILTER_MATCH_IP_PROTO);
 		spec.ether_type = htons(ETH_P_IP);
-		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V4_FLOW ?
-				 IPPROTO_TCP : IPPROTO_UDP);
+		spec.ip_proto = flow_type == TCP_V4_FLOW ? IPPROTO_TCP
+							 : IPPROTO_UDP;
 		if (ip_mask->ip4dst) {
 			if (ip_mask->ip4dst != IP4_ADDR_FULL_MASK)
 				return -EINVAL;
@@ -1162,8 +1163,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
 		spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE |
 				    EFX_FILTER_MATCH_IP_PROTO);
 		spec.ether_type = htons(ETH_P_IPV6);
-		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ?
-				 IPPROTO_TCP : IPPROTO_UDP);
+		spec.ip_proto = flow_type == TCP_V6_FLOW ? IPPROTO_TCP
+							 : IPPROTO_UDP;
 		if (!ip6_mask_is_empty(ip6_mask->ip6dst)) {
 			if (!ip6_mask_is_full(ip6_mask->ip6dst))
 				return -EINVAL;

From b76354cdfefb68bc017065850a34d486887a4f7b Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Tue, 27 Mar 2018 11:53:21 -0700
Subject: [PATCH 1349/1678] bpf: follow idr code convention

Generally we do a preload before doing idr allocation. This also help
improve the allocation success rate in memory pressure.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dd172ee16716..77d45bd9f507 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -203,11 +203,13 @@ static int bpf_map_alloc_id(struct bpf_map *map)
 {
 	int id;
 
+	idr_preload(GFP_KERNEL);
 	spin_lock_bh(&map_idr_lock);
 	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
 	if (id > 0)
 		map->id = id;
 	spin_unlock_bh(&map_idr_lock);
+	idr_preload_end();
 
 	if (WARN_ON_ONCE(!id))
 		return -ENOSPC;
@@ -940,11 +942,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
 {
 	int id;
 
+	idr_preload(GFP_KERNEL);
 	spin_lock_bh(&prog_idr_lock);
 	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
 	if (id > 0)
 		prog->aux->id = id;
 	spin_unlock_bh(&prog_idr_lock);
+	idr_preload_end();
 
 	/* id is in [1, INT_MAX) */
 	if (WARN_ON_ONCE(!id))

From 827efed6a66ef8a1c071400b5952fee4a5ffedf9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 27 Mar 2018 23:02:47 +0100
Subject: [PATCH 1350/1678] rxrpc: Trace resend

Add a tracepoint to trace packet resend events and to dump the Tx
annotation buffer for added illumination.

Signed-off-by: David Howells <dhowells@rdhat.com>
---
 include/trace/events/rxrpc.h | 24 ++++++++++++++++++++++++
 net/rxrpc/call_event.c       |  1 +
 2 files changed, 25 insertions(+)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 36cb50c111a6..41979f907575 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -420,6 +420,7 @@ rxrpc_rtt_rx_traces;
 rxrpc_timer_traces;
 rxrpc_propose_ack_traces;
 rxrpc_propose_ack_outcomes;
+rxrpc_congest_modes;
 rxrpc_congest_changes;
 
 /*
@@ -1229,6 +1230,29 @@ TRACE_EVENT(rxrpc_connect_call,
 		      __entry->call_id)
 	    );
 
+TRACE_EVENT(rxrpc_resend,
+	    TP_PROTO(struct rxrpc_call *call, int ix),
+
+	    TP_ARGS(call, ix),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(int,			ix		)
+		    __array(u8,				anno, 64	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->ix = ix;
+		    memcpy(__entry->anno, call->rxtx_annotations, 64);
+			   ),
+
+	    TP_printk("c=%p ix=%u a=%64phN",
+		      __entry->call,
+		      __entry->ix,
+		      __entry->anno)
+	    );
+
 #endif /* _TRACE_RXRPC_H */
 
 /* This part must be outside protection */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index ad2ab1103189..6a62e42e1d8d 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -195,6 +195,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 	 * the packets in the Tx buffer we're going to resend and what the new
 	 * resend timeout will be.
 	 */
+	trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK);
 	oldest = now;
 	for (seq = cursor + 1; before_eq(seq, top); seq++) {
 		ix = seq & RXRPC_RXTX_BUFF_MASK;

From a25e21f0bcd25673b91b97b9805db33350feec0f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 27 Mar 2018 23:03:00 +0100
Subject: [PATCH 1351/1678] rxrpc, afs: Use debug_ids rather than pointers in
 traces

In rxrpc and afs, use the debug_ids that are monotonically allocated to
various objects as they're allocated rather than pointers as kernel
pointers are now hashed making them less useful.  Further, the debug ids
aren't reused anywhere nearly as quickly.

In addition, allow kernel services that use rxrpc, such as afs, to take
numbers from the rxrpc counter, assign them to their own call struct and
pass them in to rxrpc for both client and service calls so that the trace
lines for each will have the same ID tag.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h            |   1 +
 fs/afs/rxrpc.c               |  12 ++-
 include/net/af_rxrpc.h       |  11 ++-
 include/trace/events/afs.h   |  69 +++++++---------
 include/trace/events/rxrpc.h | 155 ++++++++++++++++++-----------------
 net/rxrpc/af_rxrpc.c         |   7 +-
 net/rxrpc/ar-internal.h      |   8 +-
 net/rxrpc/call_accept.c      |  18 ++--
 net/rxrpc/call_object.c      |  15 ++--
 net/rxrpc/conn_event.c       |   3 +-
 net/rxrpc/input.c            |   6 +-
 net/rxrpc/sendmsg.c          |   3 +-
 12 files changed, 163 insertions(+), 145 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f38d6a561a84..72217170b155 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -118,6 +118,7 @@ struct afs_call {
 	bool			ret_reply0;	/* T if should return reply[0] on success */
 	bool			upgrade;	/* T to request service upgrade */
 	u16			service_id;	/* Actual service ID (after upgrade) */
+	unsigned int		debug_id;	/* Trace ID */
 	u32			operation_ID;	/* operation ID for an incoming call */
 	u32			count;		/* count for use in unmarshalling */
 	__be32			tmp;		/* place to extract temporary data */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e1126659f043..b819900916e6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -131,6 +131,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
 
 	call->type = type;
 	call->net = net;
+	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
 	atomic_set(&call->usage, 1);
 	INIT_WORK(&call->async_work, afs_process_async_call);
 	init_waitqueue_head(&call->waitq);
@@ -169,11 +170,12 @@ void afs_put_call(struct afs_call *call)
 		afs_put_server(call->net, call->cm_server);
 		afs_put_cb_interest(call->net, call->cbi);
 		kfree(call->request);
+
+		trace_afs_call(call, afs_call_trace_free, 0, o,
+			       __builtin_return_address(0));
 		kfree(call);
 
 		o = atomic_dec_return(&net->nr_outstanding_calls);
-		trace_afs_call(call, afs_call_trace_free, 0, o,
-			       __builtin_return_address(0));
 		if (o == 0)
 			wake_up_atomic_t(&net->nr_outstanding_calls);
 	}
@@ -378,7 +380,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 					 (async ?
 					  afs_wake_up_async_call :
 					  afs_wake_up_call_waiter),
-					 call->upgrade);
+					 call->upgrade,
+					 call->debug_id);
 	if (IS_ERR(rxcall)) {
 		ret = PTR_ERR(rxcall);
 		goto error_kill_call;
@@ -727,7 +730,8 @@ void afs_charge_preallocation(struct work_struct *work)
 					       afs_wake_up_async_call,
 					       afs_rx_attach,
 					       (unsigned long)call,
-					       GFP_KERNEL) < 0)
+					       GFP_KERNEL,
+					       call->debug_id) < 0)
 			break;
 		call = NULL;
 	}
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 2b3a6eec4570..8ae8ee004258 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -31,6 +31,11 @@ enum rxrpc_call_completion {
 	NR__RXRPC_CALL_COMPLETIONS
 };
 
+/*
+ * Debug ID counter for tracing.
+ */
+extern atomic_t rxrpc_debug_id;
+
 typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
 				  unsigned long);
 typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *,
@@ -50,7 +55,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
 					   s64,
 					   gfp_t,
 					   rxrpc_notify_rx_t,
-					   bool);
+					   bool,
+					   unsigned int);
 int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 			   struct msghdr *, size_t,
 			   rxrpc_notify_end_tx_t);
@@ -63,7 +69,8 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 			   struct sockaddr_rxrpc *);
 u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
 int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
-			       rxrpc_user_attach_call_t, unsigned long, gfp_t);
+			       rxrpc_user_attach_call_t, unsigned long, gfp_t,
+			       unsigned int);
 void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
 int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
 			    struct sockaddr_rxrpc *, struct key *);
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 6b59c63a8e51..63815f66b274 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -133,8 +133,7 @@ TRACE_EVENT(afs_recv_data,
 	    TP_ARGS(call, count, offset, want_more, ret),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	rxcall		)
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(enum afs_call_state,	state		)
 		    __field(unsigned int,		count		)
 		    __field(unsigned int,		offset		)
@@ -144,8 +143,7 @@ TRACE_EVENT(afs_recv_data,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->rxcall	= call->rxcall;
-		    __entry->call	= call;
+		    __entry->call	= call->debug_id;
 		    __entry->state	= call->state;
 		    __entry->unmarshall	= call->unmarshall;
 		    __entry->count	= count;
@@ -154,8 +152,7 @@ TRACE_EVENT(afs_recv_data,
 		    __entry->ret	= ret;
 			   ),
 
-	    TP_printk("c=%p ac=%p s=%u u=%u %u/%u wm=%u ret=%d",
-		      __entry->rxcall,
+	    TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d",
 		      __entry->call,
 		      __entry->state, __entry->unmarshall,
 		      __entry->offset, __entry->count,
@@ -168,21 +165,18 @@ TRACE_EVENT(afs_notify_call,
 	    TP_ARGS(rxcall, call),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	rxcall		)
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(enum afs_call_state,	state		)
 		    __field(unsigned short,		unmarshall	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->rxcall	= rxcall;
-		    __entry->call	= call;
+		    __entry->call	= call->debug_id;
 		    __entry->state	= call->state;
 		    __entry->unmarshall	= call->unmarshall;
 			   ),
 
-	    TP_printk("c=%p ac=%p s=%u u=%u",
-		      __entry->rxcall,
+	    TP_printk("c=%08x s=%u u=%u",
 		      __entry->call,
 		      __entry->state, __entry->unmarshall)
 	    );
@@ -193,21 +187,18 @@ TRACE_EVENT(afs_cb_call,
 	    TP_ARGS(call),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	rxcall		)
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(const char *,		name		)
 		    __field(u32,			op		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->rxcall	= call->rxcall;
-		    __entry->call	= call;
+		    __entry->call	= call->debug_id;
 		    __entry->name	= call->type->name;
 		    __entry->op		= call->operation_ID;
 			   ),
 
-	    TP_printk("c=%p ac=%p %s o=%u",
-		      __entry->rxcall,
+	    TP_printk("c=%08x %s o=%u",
 		      __entry->call,
 		      __entry->name,
 		      __entry->op)
@@ -220,7 +211,7 @@ TRACE_EVENT(afs_call,
 	    TP_ARGS(call, op, usage, outstanding, where),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(int,			op		)
 		    __field(int,			usage		)
 		    __field(int,			outstanding	)
@@ -228,14 +219,14 @@ TRACE_EVENT(afs_call,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->op = op;
 		    __entry->usage = usage;
 		    __entry->outstanding = outstanding;
 		    __entry->where = where;
 			   ),
 
-	    TP_printk("c=%p %s u=%d o=%d sp=%pSR",
+	    TP_printk("c=%08x %s u=%d o=%d sp=%pSR",
 		      __entry->call,
 		      __print_symbolic(__entry->op, afs_call_traces),
 		      __entry->usage,
@@ -249,13 +240,13 @@ TRACE_EVENT(afs_make_fs_call,
 	    TP_ARGS(call, fid),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(enum afs_fs_operation,	op		)
 		    __field_struct(struct afs_fid,	fid		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->op = call->operation_ID;
 		    if (fid) {
 			    __entry->fid = *fid;
@@ -266,7 +257,7 @@ TRACE_EVENT(afs_make_fs_call,
 		    }
 			   ),
 
-	    TP_printk("c=%p %06x:%06x:%06x %s",
+	    TP_printk("c=%08x %06x:%06x:%06x %s",
 		      __entry->call,
 		      __entry->fid.vid,
 		      __entry->fid.vnode,
@@ -280,16 +271,16 @@ TRACE_EVENT(afs_make_vl_call,
 	    TP_ARGS(call),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(enum afs_vl_operation,	op		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->op = call->operation_ID;
 			   ),
 
-	    TP_printk("c=%p %s",
+	    TP_printk("c=%08x %s",
 		      __entry->call,
 		      __print_symbolic(__entry->op, afs_vl_operations))
 	    );
@@ -300,20 +291,20 @@ TRACE_EVENT(afs_call_done,
 	    TP_ARGS(call),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(struct rxrpc_call *,	rx_call		)
 		    __field(int,			ret		)
 		    __field(u32,			abort_code	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->rx_call = call->rxcall;
 		    __entry->ret = call->error;
 		    __entry->abort_code = call->abort_code;
 			   ),
 
-	    TP_printk("   c=%p ret=%d ab=%d [%p]",
+	    TP_printk("   c=%08x ret=%d ab=%d [%p]",
 		      __entry->call,
 		      __entry->ret,
 		      __entry->abort_code,
@@ -327,7 +318,7 @@ TRACE_EVENT(afs_send_pages,
 	    TP_ARGS(call, msg, first, last, offset),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(pgoff_t,			first		)
 		    __field(pgoff_t,			last		)
 		    __field(unsigned int,		nr		)
@@ -337,7 +328,7 @@ TRACE_EVENT(afs_send_pages,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->first = first;
 		    __entry->last = last;
 		    __entry->nr = msg->msg_iter.nr_segs;
@@ -346,7 +337,7 @@ TRACE_EVENT(afs_send_pages,
 		    __entry->flags = msg->msg_flags;
 			   ),
 
-	    TP_printk(" c=%p %lx-%lx-%lx b=%x o=%x f=%x",
+	    TP_printk(" c=%08x %lx-%lx-%lx b=%x o=%x f=%x",
 		      __entry->call,
 		      __entry->first, __entry->first + __entry->nr - 1, __entry->last,
 		      __entry->bytes, __entry->offset,
@@ -360,7 +351,7 @@ TRACE_EVENT(afs_sent_pages,
 	    TP_ARGS(call, first, last, cursor, ret),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(pgoff_t,			first		)
 		    __field(pgoff_t,			last		)
 		    __field(pgoff_t,			cursor		)
@@ -368,14 +359,14 @@ TRACE_EVENT(afs_sent_pages,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->first = first;
 		    __entry->last = last;
 		    __entry->cursor = cursor;
 		    __entry->ret = ret;
 			   ),
 
-	    TP_printk(" c=%p %lx-%lx c=%lx r=%d",
+	    TP_printk(" c=%08x %lx-%lx c=%lx r=%d",
 		      __entry->call,
 		      __entry->first, __entry->last,
 		      __entry->cursor, __entry->ret)
@@ -450,7 +441,7 @@ TRACE_EVENT(afs_call_state,
 	    TP_ARGS(call, from, to, ret, remote_abort),
 
 	    TP_STRUCT__entry(
-		    __field(struct afs_call *,		call		)
+		    __field(unsigned int,		call		)
 		    __field(enum afs_call_state,	from		)
 		    __field(enum afs_call_state,	to		)
 		    __field(int,			ret		)
@@ -458,14 +449,14 @@ TRACE_EVENT(afs_call_state,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->from = from;
 		    __entry->to = to;
 		    __entry->ret = ret;
 		    __entry->abort = remote_abort;
 			   ),
 
-	    TP_printk("c=%p %u->%u r=%d ab=%d",
+	    TP_printk("c=%08x %u->%u r=%d ab=%d",
 		      __entry->call,
 		      __entry->from, __entry->to,
 		      __entry->ret, __entry->abort)
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 41979f907575..4d2c2d35c5cb 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -439,20 +439,20 @@ TRACE_EVENT(rxrpc_conn,
 	    TP_ARGS(conn, op, usage, where),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_connection *,	conn		)
-		    __field(int,			op		)
-		    __field(int,			usage		)
-		    __field(const void *,		where		)
+		    __field(unsigned int,	conn		)
+		    __field(int,		op		)
+		    __field(int,		usage		)
+		    __field(const void *,	where		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->conn = conn;
+		    __entry->conn = conn->debug_id;
 		    __entry->op = op;
 		    __entry->usage = usage;
 		    __entry->where = where;
 			   ),
 
-	    TP_printk("C=%p %s u=%d sp=%pSR",
+	    TP_printk("C=%08x %s u=%d sp=%pSR",
 		      __entry->conn,
 		      __print_symbolic(__entry->op, rxrpc_conn_traces),
 		      __entry->usage,
@@ -466,7 +466,7 @@ TRACE_EVENT(rxrpc_client,
 	    TP_ARGS(conn, channel, op),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_connection *,	conn		)
+		    __field(unsigned int,		conn		)
 		    __field(u32,			cid		)
 		    __field(int,			channel		)
 		    __field(int,			usage		)
@@ -475,7 +475,7 @@ TRACE_EVENT(rxrpc_client,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->conn = conn;
+		    __entry->conn = conn->debug_id;
 		    __entry->channel = channel;
 		    __entry->usage = atomic_read(&conn->usage);
 		    __entry->op = op;
@@ -483,7 +483,7 @@ TRACE_EVENT(rxrpc_client,
 		    __entry->cs = conn->cache_state;
 			   ),
 
-	    TP_printk("C=%p h=%2d %s %s i=%08x u=%d",
+	    TP_printk("C=%08x h=%2d %s %s i=%08x u=%d",
 		      __entry->conn,
 		      __entry->channel,
 		      __print_symbolic(__entry->op, rxrpc_client_traces),
@@ -499,7 +499,7 @@ TRACE_EVENT(rxrpc_call,
 	    TP_ARGS(call, op, usage, where, aux),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(int,			op		)
 		    __field(int,			usage		)
 		    __field(const void *,		where		)
@@ -507,14 +507,14 @@ TRACE_EVENT(rxrpc_call,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->op = op;
 		    __entry->usage = usage;
 		    __entry->where = where;
 		    __entry->aux = aux;
 			   ),
 
-	    TP_printk("c=%p %s u=%d sp=%pSR a=%p",
+	    TP_printk("c=%08x %s u=%d sp=%pSR a=%p",
 		      __entry->call,
 		      __print_symbolic(__entry->op, rxrpc_call_traces),
 		      __entry->usage,
@@ -593,12 +593,13 @@ TRACE_EVENT(rxrpc_rx_done,
 	    );
 
 TRACE_EVENT(rxrpc_abort,
-	    TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq,
-		     int abort_code, int error),
+	    TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id,
+		     rxrpc_seq_t seq, int abort_code, int error),
 
-	    TP_ARGS(why, cid, call_id, seq, abort_code, error),
+	    TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error),
 
 	    TP_STRUCT__entry(
+		    __field(unsigned int,		call_nr		)
 		    __array(char,			why, 4		)
 		    __field(u32,			cid		)
 		    __field(u32,			call_id		)
@@ -609,6 +610,7 @@ TRACE_EVENT(rxrpc_abort,
 
 	    TP_fast_assign(
 		    memcpy(__entry->why, why, 4);
+		    __entry->call_nr = call_nr;
 		    __entry->cid = cid;
 		    __entry->call_id = call_id;
 		    __entry->abort_code = abort_code;
@@ -616,7 +618,8 @@ TRACE_EVENT(rxrpc_abort,
 		    __entry->seq = seq;
 			   ),
 
-	    TP_printk("%08x:%08x s=%u a=%d e=%d %s",
+	    TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s",
+		      __entry->call_nr,
 		      __entry->cid, __entry->call_id, __entry->seq,
 		      __entry->abort_code, __entry->error, __entry->why)
 	    );
@@ -627,7 +630,7 @@ TRACE_EVENT(rxrpc_transmit,
 	    TP_ARGS(call, why),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(enum rxrpc_transmit_trace,	why		)
 		    __field(rxrpc_seq_t,		tx_hard_ack	)
 		    __field(rxrpc_seq_t,		tx_top		)
@@ -635,14 +638,14 @@ TRACE_EVENT(rxrpc_transmit,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->why = why;
 		    __entry->tx_hard_ack = call->tx_hard_ack;
 		    __entry->tx_top = call->tx_top;
 		    __entry->tx_winsize = call->tx_winsize;
 			   ),
 
-	    TP_printk("c=%p %s f=%08x n=%u/%u",
+	    TP_printk("c=%08x %s f=%08x n=%u/%u",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_transmit_traces),
 		      __entry->tx_hard_ack + 1,
@@ -657,7 +660,7 @@ TRACE_EVENT(rxrpc_rx_data,
 	    TP_ARGS(call, seq, serial, flags, anno),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_seq_t,		seq		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(u8,				flags		)
@@ -665,14 +668,14 @@ TRACE_EVENT(rxrpc_rx_data,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->seq = seq;
 		    __entry->serial = serial;
 		    __entry->flags = flags;
 		    __entry->anno = anno;
 			   ),
 
-	    TP_printk("c=%p DATA %08x q=%08x fl=%02x a=%02x",
+	    TP_printk("c=%08x DATA %08x q=%08x fl=%02x a=%02x",
 		      __entry->call,
 		      __entry->serial,
 		      __entry->seq,
@@ -688,7 +691,7 @@ TRACE_EVENT(rxrpc_rx_ack,
 	    TP_ARGS(call, serial, ack_serial, first, prev, reason, n_acks),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(rxrpc_serial_t,		ack_serial	)
 		    __field(rxrpc_seq_t,		first		)
@@ -698,7 +701,7 @@ TRACE_EVENT(rxrpc_rx_ack,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->serial = serial;
 		    __entry->ack_serial = ack_serial;
 		    __entry->first = first;
@@ -707,7 +710,7 @@ TRACE_EVENT(rxrpc_rx_ack,
 		    __entry->n_acks = n_acks;
 			   ),
 
-	    TP_printk("c=%p %08x %s r=%08x f=%08x p=%08x n=%u",
+	    TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u",
 		      __entry->call,
 		      __entry->serial,
 		      __print_symbolic(__entry->reason, rxrpc_ack_names),
@@ -724,18 +727,18 @@ TRACE_EVENT(rxrpc_rx_abort,
 	    TP_ARGS(call, serial, abort_code),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(u32,			abort_code	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->serial = serial;
 		    __entry->abort_code = abort_code;
 			   ),
 
-	    TP_printk("c=%p ABORT %08x ac=%d",
+	    TP_printk("c=%08x ABORT %08x ac=%d",
 		      __entry->call,
 		      __entry->serial,
 		      __entry->abort_code)
@@ -748,20 +751,20 @@ TRACE_EVENT(rxrpc_rx_rwind_change,
 	    TP_ARGS(call, serial, rwind, wake),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(u32,			rwind		)
 		    __field(bool,			wake		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->serial = serial;
 		    __entry->rwind = rwind;
 		    __entry->wake = wake;
 			   ),
 
-	    TP_printk("c=%p %08x rw=%u%s",
+	    TP_printk("c=%08x %08x rw=%u%s",
 		      __entry->call,
 		      __entry->serial,
 		      __entry->rwind,
@@ -775,7 +778,7 @@ TRACE_EVENT(rxrpc_tx_data,
 	    TP_ARGS(call, seq, serial, flags, retrans, lose),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_seq_t,		seq		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(u8,				flags		)
@@ -784,7 +787,7 @@ TRACE_EVENT(rxrpc_tx_data,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->seq = seq;
 		    __entry->serial = serial;
 		    __entry->flags = flags;
@@ -792,7 +795,7 @@ TRACE_EVENT(rxrpc_tx_data,
 		    __entry->lose = lose;
 			   ),
 
-	    TP_printk("c=%p DATA %08x q=%08x fl=%02x%s%s",
+	    TP_printk("c=%08x DATA %08x q=%08x fl=%02x%s%s",
 		      __entry->call,
 		      __entry->serial,
 		      __entry->seq,
@@ -809,7 +812,7 @@ TRACE_EVENT(rxrpc_tx_ack,
 	    TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(rxrpc_seq_t,		ack_first	)
 		    __field(rxrpc_serial_t,		ack_serial	)
@@ -818,7 +821,7 @@ TRACE_EVENT(rxrpc_tx_ack,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call ? call->debug_id : 0;
 		    __entry->serial = serial;
 		    __entry->ack_first = ack_first;
 		    __entry->ack_serial = ack_serial;
@@ -826,7 +829,7 @@ TRACE_EVENT(rxrpc_tx_ack,
 		    __entry->n_acks = n_acks;
 			   ),
 
-	    TP_printk(" c=%p ACK  %08x %s f=%08x r=%08x n=%u",
+	    TP_printk(" c=%08x ACK  %08x %s f=%08x r=%08x n=%u",
 		      __entry->call,
 		      __entry->serial,
 		      __print_symbolic(__entry->reason, rxrpc_ack_names),
@@ -842,7 +845,7 @@ TRACE_EVENT(rxrpc_receive,
 	    TP_ARGS(call, why, serial, seq),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(enum rxrpc_receive_trace,	why		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(rxrpc_seq_t,		seq		)
@@ -851,7 +854,7 @@ TRACE_EVENT(rxrpc_receive,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->why = why;
 		    __entry->serial = serial;
 		    __entry->seq = seq;
@@ -859,7 +862,7 @@ TRACE_EVENT(rxrpc_receive,
 		    __entry->top = call->rx_top;
 			   ),
 
-	    TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x",
+	    TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_receive_traces),
 		      __entry->serial,
@@ -876,7 +879,7 @@ TRACE_EVENT(rxrpc_recvmsg,
 	    TP_ARGS(call, why, seq, offset, len, ret),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(enum rxrpc_recvmsg_trace,	why		)
 		    __field(rxrpc_seq_t,		seq		)
 		    __field(unsigned int,		offset		)
@@ -885,7 +888,7 @@ TRACE_EVENT(rxrpc_recvmsg,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->why = why;
 		    __entry->seq = seq;
 		    __entry->offset = offset;
@@ -893,7 +896,7 @@ TRACE_EVENT(rxrpc_recvmsg,
 		    __entry->ret = ret;
 			   ),
 
-	    TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d",
+	    TP_printk("c=%08x %s q=%08x o=%u l=%u ret=%d",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_recvmsg_traces),
 		      __entry->seq,
@@ -909,18 +912,18 @@ TRACE_EVENT(rxrpc_rtt_tx,
 	    TP_ARGS(call, why, send_serial),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(enum rxrpc_rtt_tx_trace,	why		)
 		    __field(rxrpc_serial_t,		send_serial	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->why = why;
 		    __entry->send_serial = send_serial;
 			   ),
 
-	    TP_printk("c=%p %s sr=%08x",
+	    TP_printk("c=%08x %s sr=%08x",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_rtt_tx_traces),
 		      __entry->send_serial)
@@ -934,7 +937,7 @@ TRACE_EVENT(rxrpc_rtt_rx,
 	    TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(enum rxrpc_rtt_rx_trace,	why		)
 		    __field(u8,				nr		)
 		    __field(rxrpc_serial_t,		send_serial	)
@@ -944,7 +947,7 @@ TRACE_EVENT(rxrpc_rtt_rx,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->why = why;
 		    __entry->send_serial = send_serial;
 		    __entry->resp_serial = resp_serial;
@@ -953,7 +956,7 @@ TRACE_EVENT(rxrpc_rtt_rx,
 		    __entry->avg = avg;
 			   ),
 
-	    TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld",
+	    TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_rtt_rx_traces),
 		      __entry->send_serial,
@@ -970,7 +973,7 @@ TRACE_EVENT(rxrpc_timer,
 	    TP_ARGS(call, why, now),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,		call		)
+		    __field(unsigned int,			call		)
 		    __field(enum rxrpc_timer_trace,		why		)
 		    __field(long,				now		)
 		    __field(long,				ack_at		)
@@ -984,7 +987,7 @@ TRACE_EVENT(rxrpc_timer,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call		= call;
+		    __entry->call		= call->debug_id;
 		    __entry->why		= why;
 		    __entry->now		= now;
 		    __entry->ack_at		= call->ack_at;
@@ -996,7 +999,7 @@ TRACE_EVENT(rxrpc_timer,
 		    __entry->timer		= call->timer.expires;
 			   ),
 
-	    TP_printk("c=%p %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld",
+	    TP_printk("c=%08x %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_timer_traces),
 		      __entry->ack_at - __entry->now,
@@ -1039,7 +1042,7 @@ TRACE_EVENT(rxrpc_propose_ack,
 		    outcome),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,		call		)
+		    __field(unsigned int,			call		)
 		    __field(enum rxrpc_propose_ack_trace,	why		)
 		    __field(rxrpc_serial_t,			serial		)
 		    __field(u8,					ack_reason	)
@@ -1049,7 +1052,7 @@ TRACE_EVENT(rxrpc_propose_ack,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call	= call;
+		    __entry->call	= call->debug_id;
 		    __entry->why	= why;
 		    __entry->serial	= serial;
 		    __entry->ack_reason	= ack_reason;
@@ -1058,7 +1061,7 @@ TRACE_EVENT(rxrpc_propose_ack,
 		    __entry->outcome	= outcome;
 			   ),
 
-	    TP_printk("c=%p %s %s r=%08x i=%u b=%u%s",
+	    TP_printk("c=%08x %s %s r=%08x i=%u b=%u%s",
 		      __entry->call,
 		      __print_symbolic(__entry->why, rxrpc_propose_ack_traces),
 		      __print_symbolic(__entry->ack_reason, rxrpc_ack_names),
@@ -1075,20 +1078,20 @@ TRACE_EVENT(rxrpc_retransmit,
 	    TP_ARGS(call, seq, annotation, expiry),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_seq_t,		seq		)
 		    __field(u8,				annotation	)
 		    __field(s64,			expiry		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->seq = seq;
 		    __entry->annotation = annotation;
 		    __entry->expiry = expiry;
 			   ),
 
-	    TP_printk("c=%p q=%x a=%02x xp=%lld",
+	    TP_printk("c=%08x q=%x a=%02x xp=%lld",
 		      __entry->call,
 		      __entry->seq,
 		      __entry->annotation,
@@ -1102,7 +1105,7 @@ TRACE_EVENT(rxrpc_congest,
 	    TP_ARGS(call, summary, ack_serial, change),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,		call		)
+		    __field(unsigned int,			call		)
 		    __field(enum rxrpc_congest_change,		change		)
 		    __field(rxrpc_seq_t,			hard_ack	)
 		    __field(rxrpc_seq_t,			top		)
@@ -1112,7 +1115,7 @@ TRACE_EVENT(rxrpc_congest,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call	= call;
+		    __entry->call	= call->debug_id;
 		    __entry->change	= change;
 		    __entry->hard_ack	= call->tx_hard_ack;
 		    __entry->top	= call->tx_top;
@@ -1121,7 +1124,7 @@ TRACE_EVENT(rxrpc_congest,
 		    memcpy(&__entry->sum, summary, sizeof(__entry->sum));
 			   ),
 
-	    TP_printk("c=%p r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
+	    TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
 		      __entry->call,
 		      __entry->ack_serial,
 		      __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
@@ -1146,16 +1149,16 @@ TRACE_EVENT(rxrpc_disconnect_call,
 	    TP_ARGS(call),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(u32,			abort_code	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->abort_code = call->abort_code;
 			   ),
 
-	    TP_printk("c=%p ab=%08x",
+	    TP_printk("c=%08x ab=%08x",
 		      __entry->call,
 		      __entry->abort_code)
 	    );
@@ -1166,16 +1169,16 @@ TRACE_EVENT(rxrpc_improper_term,
 	    TP_ARGS(call),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(u32,			abort_code	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->abort_code = call->abort_code;
 			   ),
 
-	    TP_printk("c=%p ab=%08x",
+	    TP_printk("c=%08x ab=%08x",
 		      __entry->call,
 		      __entry->abort_code)
 	    );
@@ -1187,18 +1190,18 @@ TRACE_EVENT(rxrpc_rx_eproto,
 	    TP_ARGS(call, serial, why),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(const char *,		why		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->serial = serial;
 		    __entry->why = why;
 			   ),
 
-	    TP_printk("c=%p EPROTO %08x %s",
+	    TP_printk("c=%08x EPROTO %08x %s",
 		      __entry->call,
 		      __entry->serial,
 		      __entry->why)
@@ -1210,20 +1213,20 @@ TRACE_EVENT(rxrpc_connect_call,
 	    TP_ARGS(call),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(unsigned long,		user_call_ID	)
 		    __field(u32,			cid		)
 		    __field(u32,			call_id		)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->user_call_ID = call->user_call_ID;
 		    __entry->cid = call->cid;
 		    __entry->call_id = call->call_id;
 			   ),
 
-	    TP_printk("c=%p u=%p %08x:%08x",
+	    TP_printk("c=%08x u=%p %08x:%08x",
 		      __entry->call,
 		      (void *)__entry->user_call_ID,
 		      __entry->cid,
@@ -1236,18 +1239,18 @@ TRACE_EVENT(rxrpc_resend,
 	    TP_ARGS(call, ix),
 
 	    TP_STRUCT__entry(
-		    __field(struct rxrpc_call *,	call		)
+		    __field(unsigned int,		call		)
 		    __field(int,			ix		)
 		    __array(u8,				anno, 64	)
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call;
+		    __entry->call = call->debug_id;
 		    __entry->ix = ix;
 		    memcpy(__entry->anno, call->rxtx_annotations, 64);
 			   ),
 
-	    TP_printk("c=%p ix=%u a=%64phN",
+	    TP_printk("c=%08x ix=%u a=%64phN",
 		      __entry->call,
 		      __entry->ix,
 		      __entry->anno)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 9e1c2c6b6a67..ec5ec68be1aa 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -40,6 +40,7 @@ static const struct proto_ops rxrpc_rpc_ops;
 
 /* current debugging ID */
 atomic_t rxrpc_debug_id;
+EXPORT_SYMBOL(rxrpc_debug_id);
 
 /* count of skbs currently in use */
 atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
@@ -267,6 +268,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
  * @gfp: The allocation constraints
  * @notify_rx: Where to send notifications instead of socket queue
  * @upgrade: Request service upgrade for call
+ * @debug_id: The debug ID for tracing to be assigned to the call
  *
  * Allow a kernel service to begin a call on the nominated socket.  This just
  * sets up all the internal tracking structures and allocates connection and
@@ -282,7 +284,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 					   s64 tx_total_len,
 					   gfp_t gfp,
 					   rxrpc_notify_rx_t notify_rx,
-					   bool upgrade)
+					   bool upgrade,
+					   unsigned int debug_id)
 {
 	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call_params p;
@@ -314,7 +317,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 	cp.exclusive		= false;
 	cp.upgrade		= upgrade;
 	cp.service_id		= srx->srx_service;
-	call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp);
+	call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id);
 	/* The socket has been unlocked. */
 	if (!IS_ERR(call)) {
 		call->notify_rx = notify_rx;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 416688381eb7..9c9817ddafc5 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -691,7 +691,6 @@ struct rxrpc_send_params {
  * af_rxrpc.c
  */
 extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
-extern atomic_t rxrpc_debug_id;
 extern struct workqueue_struct *rxrpc_workqueue;
 
 /*
@@ -732,11 +731,12 @@ extern unsigned int rxrpc_max_call_lifetime;
 extern struct kmem_cache *rxrpc_call_jar;
 
 struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
-struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t);
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
 					 struct sockaddr_rxrpc *,
-					 struct rxrpc_call_params *, gfp_t);
+					 struct rxrpc_call_params *, gfp_t,
+					 unsigned int);
 int rxrpc_retry_client_call(struct rxrpc_sock *,
 			    struct rxrpc_call *,
 			    struct rxrpc_conn_parameters *,
@@ -822,7 +822,7 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
 				      rxrpc_seq_t seq,
 				      u32 abort_code, int error)
 {
-	trace_rxrpc_abort(why, call->cid, call->call_id, seq,
+	trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
 			  abort_code, error);
 	return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
 					   abort_code, error);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3028298ca561..92ebd1d7e0bb 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -34,7 +34,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 				      struct rxrpc_backlog *b,
 				      rxrpc_notify_rx_t notify_rx,
 				      rxrpc_user_attach_call_t user_attach_call,
-				      unsigned long user_call_ID, gfp_t gfp)
+				      unsigned long user_call_ID, gfp_t gfp,
+				      unsigned int debug_id)
 {
 	const void *here = __builtin_return_address(0);
 	struct rxrpc_call *call;
@@ -94,7 +95,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 	/* Now it gets complicated, because calls get registered with the
 	 * socket here, particularly if a user ID is preassigned by the user.
 	 */
-	call = rxrpc_alloc_call(rx, gfp);
+	call = rxrpc_alloc_call(rx, gfp, debug_id);
 	if (!call)
 		return -ENOMEM;
 	call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
@@ -174,7 +175,8 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
 	if (rx->discard_new_call)
 		return 0;
 
-	while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0)
+	while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp,
+					  atomic_inc_return(&rxrpc_debug_id)) == 0)
 		;
 
 	return 0;
@@ -347,7 +349,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 		   service_id == rx->second_service))
 		goto found_service;
 
-	trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_INVALID_OPERATION, EOPNOTSUPP);
 	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
 	skb->priority = RX_INVALID_OPERATION;
@@ -358,7 +360,7 @@ found_service:
 	spin_lock(&rx->incoming_lock);
 	if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
 	    rx->sk.sk_state == RXRPC_CLOSE) {
-		trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber,
+		trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
 				  sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
 		skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
 		skb->priority = RX_INVALID_OPERATION;
@@ -635,6 +637,7 @@ out_discard:
  * @user_attach_call: Func to attach call to user_call_ID
  * @user_call_ID: The tag to attach to the preallocated call
  * @gfp: The allocation conditions.
+ * @debug_id: The tracing debug ID.
  *
  * Charge up the socket with preallocated calls, each with a user ID.  A
  * function should be provided to effect the attachment from the user's side.
@@ -645,7 +648,8 @@ out_discard:
 int rxrpc_kernel_charge_accept(struct socket *sock,
 			       rxrpc_notify_rx_t notify_rx,
 			       rxrpc_user_attach_call_t user_attach_call,
-			       unsigned long user_call_ID, gfp_t gfp)
+			       unsigned long user_call_ID, gfp_t gfp,
+			       unsigned int debug_id)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	struct rxrpc_backlog *b = rx->backlog;
@@ -655,6 +659,6 @@ int rxrpc_kernel_charge_accept(struct socket *sock,
 
 	return rxrpc_service_prealloc_one(rx, b, notify_rx,
 					  user_attach_call, user_call_ID,
-					  gfp);
+					  gfp, debug_id);
 }
 EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 0b2db38dd32d..147657dfe757 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -99,7 +99,8 @@ found_extant_call:
 /*
  * allocate a new call
  */
-struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
+				    unsigned int debug_id)
 {
 	struct rxrpc_call *call;
 
@@ -138,7 +139,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp)
 	spin_lock_init(&call->notify_lock);
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
-	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+	call->debug_id = debug_id;
 	call->tx_total_len = -1;
 	call->next_rx_timo = 20 * HZ;
 	call->next_req_timo = 1 * HZ;
@@ -166,14 +167,15 @@ nomem:
  */
 static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
 						  struct sockaddr_rxrpc *srx,
-						  gfp_t gfp)
+						  gfp_t gfp,
+						  unsigned int debug_id)
 {
 	struct rxrpc_call *call;
 	ktime_t now;
 
 	_enter("");
 
-	call = rxrpc_alloc_call(rx, gfp);
+	call = rxrpc_alloc_call(rx, gfp, debug_id);
 	if (!call)
 		return ERR_PTR(-ENOMEM);
 	call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
@@ -214,7 +216,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 					 struct rxrpc_conn_parameters *cp,
 					 struct sockaddr_rxrpc *srx,
 					 struct rxrpc_call_params *p,
-					 gfp_t gfp)
+					 gfp_t gfp,
+					 unsigned int debug_id)
 	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_call *call, *xcall;
@@ -225,7 +228,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 
 	_enter("%p,%lx", rx, p->user_call_ID);
 
-	call = rxrpc_alloc_client_call(rx, srx, gfp);
+	call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
 	if (IS_ERR(call)) {
 		release_sock(&rx->sk);
 		_leave(" = %ld", PTR_ERR(call));
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b1dfae107431..d2ec3fd593e8 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -160,7 +160,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
 			lockdep_is_held(&conn->channel_lock));
 		if (call) {
 			if (compl == RXRPC_CALL_LOCALLY_ABORTED)
-				trace_rxrpc_abort("CON", call->cid,
+				trace_rxrpc_abort(call->debug_id,
+						  "CON", call->cid,
 						  call->call_id, 0,
 						  abort_code, error);
 			if (rxrpc_set_call_completion(call, compl,
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 6fc61400337f..2a868fdab0ae 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1307,21 +1307,21 @@ out_unlock:
 
 wrong_security:
 	rcu_read_unlock();
-	trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+	trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RXKADINCONSISTENCY, EBADMSG);
 	skb->priority = RXKADINCONSISTENCY;
 	goto post_abort;
 
 reupgrade:
 	rcu_read_unlock();
-	trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+	trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_PROTOCOL_ERROR, EBADMSG);
 	goto protocol_error;
 
 bad_message_unlock:
 	rcu_read_unlock();
 bad_message:
-	trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+	trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
 			  RX_PROTOCOL_ERROR, EBADMSG);
 protocol_error:
 	skb->priority = RX_PROTOCOL_ERROR;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 09f2a3e05221..8503f279b467 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -579,7 +579,8 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	cp.exclusive		= rx->exclusive | p->exclusive;
 	cp.upgrade		= p->upgrade;
 	cp.service_id		= srx->srx_service;
-	call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL);
+	call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL,
+				     atomic_inc_return(&rxrpc_debug_id));
 	/* The socket is now unlocked */
 
 	_leave(" = %p\n", call);

From 1bae5d229532b4e8dfd5728cb3b8373bc9eec9eb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 27 Mar 2018 23:08:20 +0100
Subject: [PATCH 1352/1678] rxrpc: Trace call completion

Add a tracepoint to track rxrpc calls moving into the completed state and
to log the completion type and the recorded error value and abort code.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 33 +++++++++++++++++++++++++++++++++
 net/rxrpc/ar-internal.h      |  1 +
 2 files changed, 34 insertions(+)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 4d2c2d35c5cb..2ea788f6f95d 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -400,6 +400,13 @@ enum rxrpc_congest_change {
 	EM(RXRPC_ACK_IDLE,			"IDL") \
 	E_(RXRPC_ACK__INVALID,			"-?-")
 
+#define rxrpc_completions \
+	EM(RXRPC_CALL_SUCCEEDED,		"Succeeded") \
+	EM(RXRPC_CALL_REMOTELY_ABORTED,		"RemoteAbort") \
+	EM(RXRPC_CALL_LOCALLY_ABORTED,		"LocalAbort") \
+	EM(RXRPC_CALL_LOCAL_ERROR,		"LocalError") \
+	E_(RXRPC_CALL_NETWORK_ERROR,		"NetError")
+
 /*
  * Export enum symbols via userspace.
  */
@@ -624,6 +631,32 @@ TRACE_EVENT(rxrpc_abort,
 		      __entry->abort_code, __entry->error, __entry->why)
 	    );
 
+TRACE_EVENT(rxrpc_call_complete,
+	    TP_PROTO(struct rxrpc_call *call),
+
+	    TP_ARGS(call),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		call		)
+		    __field(enum rxrpc_call_completion,	compl		)
+		    __field(int,			error		)
+		    __field(u32,			abort_code	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call->debug_id;
+		    __entry->compl = call->completion;
+		    __entry->error = call->error;
+		    __entry->abort_code = call->abort_code;
+			   ),
+
+	    TP_printk("c=%08x %s r=%d ac=%d",
+		      __entry->call,
+		      __print_symbolic(__entry->compl, rxrpc_completions),
+		      __entry->error,
+		      __entry->abort_code)
+	    );
+
 TRACE_EVENT(rxrpc_transmit,
 	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why),
 
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9c9817ddafc5..21cf164b6d85 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -778,6 +778,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
 		call->error = error;
 		call->completion = compl,
 		call->state = RXRPC_CALL_COMPLETE;
+		trace_rxrpc_call_complete(call);
 		wake_up(&call->waitq);
 		return true;
 	}

From 0608d4dbaf4ee30a33def4c31b78bc664c80ff79 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 17 Jan 2018 17:39:07 +0200
Subject: [PATCH 1353/1678] net/mlx5e: Unify slow PCI heuristic

Get the link/pci speed query and logic into a single function.
Unify the heuristics and use a single PCI threshold (16G) for all.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 31 +++++++++----------
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  5 +++
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1d36d7569f44..46707826f27e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3902,16 +3902,20 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
 	return 0;
 }
 
-static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
+static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
 {
-	return (link_speed && pci_bw &&
-		(pci_bw < 40000) && (pci_bw < link_speed));
-}
+	u32 link_speed = 0;
+	u32 pci_bw = 0;
 
-static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw)
-{
-	return !(link_speed && pci_bw &&
-		 (pci_bw <= 16000) && (pci_bw < link_speed));
+	mlx5e_get_max_linkspeed(mdev, &link_speed);
+	mlx5e_get_pci_bw(mdev, &pci_bw);
+	mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+			   link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+	return link_speed && pci_bw &&
+		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
 }
 
 void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
@@ -3980,17 +3984,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 			    u16 max_channels)
 {
 	u8 cq_period_mode = 0;
-	u32 link_speed = 0;
-	u32 pci_bw = 0;
 
 	params->num_channels = max_channels;
 	params->num_tc       = 1;
 
-	mlx5e_get_max_linkspeed(mdev, &link_speed);
-	mlx5e_get_pci_bw(mdev, &pci_bw);
-	mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
-		      link_speed, pci_bw);
-
 	/* SQ */
 	params->log_sq_size = is_kdump_kernel() ?
 		MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
@@ -4000,7 +3997,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	params->rx_cqe_compress_def = false;
 	if (MLX5_CAP_GEN(mdev, cqe_compression) &&
 	    MLX5_CAP_GEN(mdev, vport_group_manager))
-		params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw);
+		params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
 
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
 
@@ -4011,7 +4008,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 
 	/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		params->lro_en = hw_lro_heuristic(link_speed, pci_bw);
+		params->lro_en = !slow_pci_heuristic(mdev);
 	params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 4e25f2b2e0bc..7d001fe6e631 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -50,6 +50,11 @@ extern uint mlx5_core_debug_mask;
 		 __func__, __LINE__, current->pid,			\
 		 ##__VA_ARGS__)
 
+#define mlx5_core_dbg_once(__dev, format, ...)				\
+	dev_dbg_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,	\
+		     __func__, __LINE__, current->pid,			\
+		     ##__VA_ARGS__)
+
 #define mlx5_core_dbg_mask(__dev, mask, format, ...)			\
 do {									\
 	if ((mask) & mlx5_core_debug_mask)				\

From 291f445eab9b2b11c8dd05ae1a0943c328cb9b6b Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 11 Feb 2018 11:58:30 +0200
Subject: [PATCH 1354/1678] net/mlx5e: Disable Striding RQ when PCI is slower
 than link

We turn the feature off for servers with PCI BW bounded
by a threshold (16G) and lower than MAX LINK BW.
This improves the effectiveness of CQE compression feature,
that is defaulted to ON for the same case.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 46707826f27e..d4dd00089eb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -113,13 +113,16 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
+static bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
+
 static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params)
 {
 	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-		    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
-		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
-		    MLX5_WQ_TYPE_LINKED_LIST;
+		!slow_pci_heuristic(mdev) &&
+		!params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
+		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+		MLX5_WQ_TYPE_LINKED_LIST;
 	mlx5e_init_rq_type_params(mdev, params, rq_type);
 }
 

From 73faf36c5ae6b300490f0a5f2835fa702adef0e1 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Nov 2017 17:43:50 +0200
Subject: [PATCH 1355/1678] net/mlx5e: Remove unused define
 MLX5_MPWRQ_STRIDES_PER_PAGE

Clean it up as it's not in use.

Fixes: d9d9f156f380 ("net/mlx5e: Expand WQE stride when CQE compression is enabled")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 294bc9f175a5..85767f0869d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -93,8 +93,6 @@
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
 				    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_MPWRQ_STRIDES_PER_PAGE		(MLX5_MPWRQ_NUM_STRIDES >> \
-						 MLX5_MPWRQ_WQE_PAGE_ORDER)
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
 #define MLX5E_REQUIRED_MTTS(wqes)		\

From bd658dda423781dbe775a2d87d662a3145ec05a6 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 27 Nov 2017 10:35:12 +0200
Subject: [PATCH 1356/1678] net/mlx5e: Separate dma base address and offset in
 dma_sync call

Pass the base dma address and offset to dma_sync_single_range_for_cpu(),
instead of doing the pre-calculation.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 8cce90dc461d..ffcbe5c3818a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -870,10 +870,8 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	data           = va + rx_headroom;
 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
 
-	dma_sync_single_range_for_cpu(rq->pdev,
-				      di->addr + wi->offset,
-				      0, frag_size,
-				      DMA_FROM_DEVICE);
+	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
+				      frag_size, DMA_FROM_DEVICE);
 	prefetch(data);
 	wi->offset += frag_size;
 

From 24fd07abfa3584b91f65002956d3d5a5db169185 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 15 Feb 2018 18:27:06 +0200
Subject: [PATCH 1357/1678] net/mlx5e: Use no-offset function in skb header
 copy

In copying skb header to skb->data, replace the call to
skb_copy_to_linear_data_offset() with a zero offset with
the call to the no-offset function skb_copy_to_linear_data().

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ffcbe5c3818a..781b8f21d6d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -333,9 +333,8 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
 	len = ALIGN(headlen_pg, sizeof(long));
 	dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len,
 				DMA_FROM_DEVICE);
-	skb_copy_to_linear_data_offset(skb, 0,
-				       page_address(dma_info->page) + offset,
-				       len);
+	skb_copy_to_linear_data(skb, page_address(dma_info->page) + offset, len);
+
 	if (unlikely(offset + headlen > PAGE_SIZE)) {
 		dma_info++;
 		headlen_pg = len;

From f1e4fc9b4b02ec728b06ad09d6cdf5a9bb7ec372 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 7 Feb 2018 13:21:30 +0200
Subject: [PATCH 1358/1678] net/mlx5e: Remove RQ MPWQE fields from params

Introduce functions to calculate them when needed.
They can be derived from other params.
This will simplify transition between RQ configurations.

In general, any parameter that is not explicitly set
or controlled, but derived from other parameters,
should not have a control-path field itself, but a
getter function.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  7 +++-
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 13 ++-----
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 38 +++++++++++++------
 3 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 85767f0869d8..ba7f1ceb6dcd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -231,8 +231,6 @@ struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
 	u16 rq_headroom;
-	u8  mpwqe_log_stride_sz;
-	u8  mpwqe_log_num_strides;
 	u8  log_rq_size;
 	u16 num_channels;
 	u8  num_tc;
@@ -840,6 +838,11 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params);
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params);
+
 void mlx5e_update_stats(struct mlx5e_priv *priv);
 
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d415e67b557b..234b5b2ebf0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -231,8 +231,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
 	if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
 		return num_wqe;
 
-	stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz;
-	num_strides = 1 << priv->channels.params.mpwqe_log_num_strides;
+	stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
+	num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
 	wqe_size = stride_size * num_strides;
 
 	packets_per_wqe = wqe_size /
@@ -252,8 +252,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
 	if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
 		return num_packets;
 
-	stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz;
-	num_strides = 1 << priv->channels.params.mpwqe_log_num_strides;
+	stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
+	num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
 	wqe_size = stride_size * num_strides;
 
 	num_packets = (1 << order_base_2(num_packets));
@@ -1561,11 +1561,6 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 	new_channels.params = priv->channels.params;
 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 
-	new_channels.params.mpwqe_log_stride_sz =
-		MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
-	new_channels.params.mpwqe_log_num_strides =
-		MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
-
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d4dd00089eb1..65e6955713e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -78,6 +78,20 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 		MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
+u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params)
+{
+	return MLX5E_MPWQE_STRIDE_SZ(mdev,
+		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+				   struct mlx5e_params *params)
+{
+	return MLX5_MPWRQ_LOG_WQE_SZ -
+		mlx5e_mpwqe_get_log_stride_size(mdev, params);
+}
+
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params, u8 rq_type)
 {
@@ -88,10 +102,6 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 		params->log_rq_size = is_kdump_kernel() ?
 			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
 			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-		params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
-			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
-		params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
-			params->mpwqe_log_stride_sz;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		params->log_rq_size = is_kdump_kernel() ?
@@ -109,7 +119,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
 		       BIT(params->log_rq_size),
-		       BIT(params->mpwqe_log_stride_sz),
+		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
@@ -453,8 +463,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_wq_destroy;
 		}
 
-		rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz;
-		rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides);
+		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
+		rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
 
 		byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
 
@@ -1745,13 +1755,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 				 struct mlx5e_params *params,
 				 struct mlx5e_rq_param *param)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9);
-		MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6);
+		MLX5_SET(wq, wq, log_wqe_num_of_strides,
+			 mlx5e_mpwqe_get_log_num_strides(mdev, params) - 9);
+		MLX5_SET(wq, wq, log_wqe_stride_size,
+			 mlx5e_mpwqe_get_log_stride_size(mdev, params) - 6);
 		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1761,12 +1774,12 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
 	MLX5_SET(wq, wq, log_wq_sz,        params->log_rq_size);
-	MLX5_SET(wq, wq, pd,               priv->mdev->mlx5e_res.pdn);
+	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 
-	param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev);
+	param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev);
 	param->wq.linear = 1;
 }
 
@@ -1825,7 +1838,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides;
+		log_cq_size = params->log_rq_size +
+			mlx5e_mpwqe_get_log_num_strides(priv->mdev, params);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		log_cq_size = params->log_rq_size;

From b0cedc844c00991d323c13b82d651d372dcbbb12 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 7 Feb 2018 13:28:35 +0200
Subject: [PATCH 1359/1678] net/mlx5e: Remove rq_headroom field from params

It can be derived from other params, calculate it
via the dedicated function when needed.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 -
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 20 ++++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ba7f1ceb6dcd..ff9aeda186a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -230,7 +230,6 @@ enum mlx5e_priv_flag {
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
-	u16 rq_headroom;
 	u8  log_rq_size;
 	u16 num_channels;
 	u8  num_tc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 65e6955713e7..4907b7bb08e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -92,6 +92,19 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
 		mlx5e_mpwqe_get_log_stride_size(mdev, params);
 }
 
+static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params)
+{
+	u16 linear_rq_headroom = params->xdp_prog ?
+		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
+
+	linear_rq_headroom += NET_IP_ALIGN;
+
+	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)
+		return linear_rq_headroom;
+
+	return 0;
+}
+
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params, u8 rq_type)
 {
@@ -107,12 +120,9 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 		params->log_rq_size = is_kdump_kernel() ?
 			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
 			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
-		params->rq_headroom = params->xdp_prog ?
-			XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
-		params->rq_headroom += NET_IP_ALIGN;
 
 		/* Extra room needed for build_skb */
-		params->lro_wqe_sz -= params->rq_headroom +
+		params->lro_wqe_sz -= mlx5e_get_rq_headroom(params) +
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	}
 
@@ -441,7 +451,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		goto err_rq_wq_destroy;
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-	rq->buff.headroom = params->rq_headroom;
+	rq->buff.headroom = mlx5e_get_rq_headroom(params);
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:

From 2a0f561bf81c13d36910c7312ac8c67f83320a07 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 18 Feb 2018 11:37:06 +0200
Subject: [PATCH 1360/1678] net/mlx5e: Do not reset Receive Queue params on
 every type change

Do not implicit a call to mlx5e_init_rq_type_params() upon every
change in RQ type. It should be called only on channels creation.

Fixes: 2fc4bfb7250d ("net/mlx5e: Dynamic RQ type infrastructure")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++--------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c |  3 ++-
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ff9aeda186a1..45d0c64e77e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -918,8 +918,7 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
-			       struct mlx5e_params *params,
-			       u8 rq_type);
+			       struct mlx5e_params *params);
 
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4907b7bb08e0..ffe3b2469032 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -106,9 +106,8 @@ static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params)
 }
 
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
-			       struct mlx5e_params *params, u8 rq_type)
+			       struct mlx5e_params *params)
 {
-	params->rq_wq_type = rq_type;
 	params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -135,15 +134,14 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 
 static bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
 
-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
-				struct mlx5e_params *params)
+static void mlx5e_set_rq_type(struct mlx5_core_dev *mdev,
+			      struct mlx5e_params *params)
 {
-	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
+	params->rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
 		!slow_pci_heuristic(mdev) &&
 		!params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
 		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
 		MLX5_WQ_TYPE_LINKED_LIST;
-	mlx5e_init_rq_type_params(mdev, params, rq_type);
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -3736,7 +3734,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 		bpf_prog_put(old_prog);
 
 	if (reset) /* change RQ type according to priv->xdp_prog */
-		mlx5e_set_rq_params(priv->mdev, &priv->channels.params);
+		mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
 
 	if (was_opened && reset)
 		mlx5e_open_locked(netdev);
@@ -4029,7 +4027,8 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
 
 	/* RQ */
-	mlx5e_set_rq_params(mdev, params);
+	mlx5e_set_rq_type(mdev, params);
+	mlx5e_init_rq_type_params(mdev, params);
 
 	/* HW LRO */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index f953378bd13d..870584a07c48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -56,7 +56,8 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 				   struct mlx5e_params *params)
 {
 	/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-	mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
+	params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
+	mlx5e_init_rq_type_params(mdev, params);
 
 	/* RQ size in ipoib by default is 512 */
 	params->log_rq_size = is_kdump_kernel() ?

From 2ccb0a79018c9fafa913654163adc9dbac1280c5 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 7 Feb 2018 14:51:45 +0200
Subject: [PATCH 1361/1678] net/mlx5e: Add ethtool priv-flag for Striding RQ

Add a control private flag in ethtool to enable/disable
Striding RQ feature.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  7 ++++
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 38 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 22 +++++++----
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c |  3 +-
 4 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 45d0c64e77e5..13dd7a97ae04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -205,12 +205,14 @@ static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
 	"rx_cqe_moder",
 	"tx_cqe_moder",
 	"rx_cqe_compress",
+	"rx_striding_rq",
 };
 
 enum mlx5e_priv_flag {
 	MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
 	MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
 	MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
+	MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
 };
 
 #define MLX5E_SET_PFLAG(params, pflag, enable)			\
@@ -827,6 +829,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
 
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+				struct mlx5e_params *params);
+
 void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
 			bool recycle);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
@@ -917,6 +923,7 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
 				 u8 cq_period_mode);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 234b5b2ebf0f..7bfe17b7c279 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1598,6 +1598,38 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
 	return 0;
 }
 
+static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_channels new_channels = {};
+	int err;
+
+	if (enable) {
+		if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+			return -EOPNOTSUPP;
+		if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
+			return -EINVAL;
+	}
+
+	new_channels.params = priv->channels.params;
+
+	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
+	mlx5e_set_rq_type(mdev, &new_channels.params);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		priv->channels.params = new_channels.params;
+		return 0;
+	}
+
+	err = mlx5e_open_channels(priv, &new_channels);
+	if (err)
+		return err;
+
+	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	return 0;
+}
+
 static int mlx5e_handle_pflag(struct net_device *netdev,
 			      u32 wanted_flags,
 			      enum mlx5e_priv_flag flag,
@@ -1643,6 +1675,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
 	err = mlx5e_handle_pflag(netdev, pflags,
 				 MLX5E_PFLAG_RX_CQE_COMPRESS,
 				 set_pflag_rx_cqe_compress);
+	if (err)
+		goto out;
+
+	err = mlx5e_handle_pflag(netdev, pflags,
+				 MLX5E_PFLAG_RX_STRIDING_RQ,
+				 set_pflag_rx_striding_rq);
 
 out:
 	mutex_unlock(&priv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ffe3b2469032..7610a7916e96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -71,7 +71,7 @@ struct mlx5e_channel_param {
 	struct mlx5e_cq_param      icosq_cq;
 };
 
-static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
 	return MLX5_CAP_GEN(mdev, striding_rq) &&
 		MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
@@ -132,14 +132,17 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-static bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
-
-static void mlx5e_set_rq_type(struct mlx5_core_dev *mdev,
-			      struct mlx5e_params *params)
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+				struct mlx5e_params *params)
 {
-	params->rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-		!slow_pci_heuristic(mdev) &&
-		!params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
+	return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
+		!params->xdp_prog && !MLX5_IPSEC_DEV(mdev);
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+	params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
+		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
 		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
 		MLX5_WQ_TYPE_LINKED_LIST;
 }
@@ -4027,6 +4030,9 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
 
 	/* RQ */
+	if (mlx5e_striding_rq_possible(mdev, params))
+		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ,
+				!slow_pci_heuristic(mdev));
 	mlx5e_set_rq_type(mdev, params);
 	mlx5e_init_rq_type_params(mdev, params);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 870584a07c48..a35608faf8d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -56,7 +56,8 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 				   struct mlx5e_params *params)
 {
 	/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-	params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, false);
+	mlx5e_set_rq_type(mdev, params);
 	mlx5e_init_rq_type_params(mdev, params);
 
 	/* RQ size in ipoib by default is 512 */

From c4554fbccaa3306f65954ed0f1dab7abce7889f8 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Sun, 21 Jan 2018 10:52:17 +0200
Subject: [PATCH 1362/1678] net/mlx5e: Remove unused max inline related code

Commit 58d522912ac7 ("net/mlx5e: Support TX packet copy into WQE")
introduced the max inline WQE as an ethtool tunable. One commit later,
that functionality was made dependent on BlueFlame.

Commit 6982ab609768 ("net/mlx5e: Xmit, no write combining") removed
BlueFlame support, and with it the max inline WQE.
This patch cleans up the leftovers from the removed feature.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  3 --
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 32 ++-----------------
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 11 -------
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  1 -
 4 files changed, 2 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 13dd7a97ae04..6898f5e26006 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -240,7 +240,6 @@ struct mlx5e_params {
 	struct net_dim_cq_moder tx_cq_moderation;
 	bool lro_en;
 	u32 lro_wqe_sz;
-	u16 tx_max_inline;
 	u8  tx_min_inline_mode;
 	u8  rss_hfunc;
 	u8  toeplitz_hash_key[40];
@@ -366,7 +365,6 @@ struct mlx5e_txqsq {
 	void __iomem              *uar_map;
 	struct netdev_queue       *txq;
 	u32                        sqn;
-	u16                        max_inline;
 	u8                         min_inline_mode;
 	u16                        edge;
 	struct device             *pdev;
@@ -1017,7 +1015,6 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 			u16 rxq_index, u32 flow_id);
 #endif
 
-u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);
 int mlx5e_create_tir(struct mlx5_core_dev *mdev,
 		     struct mlx5e_tir *tir, u32 *in, int inlen);
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 7bfe17b7c279..c57c929d7973 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1118,13 +1118,9 @@ static int mlx5e_get_tunable(struct net_device *dev,
 			     const struct ethtool_tunable *tuna,
 			     void *data)
 {
-	const struct mlx5e_priv *priv = netdev_priv(dev);
-	int err = 0;
+	int err;
 
 	switch (tuna->id) {
-	case ETHTOOL_TX_COPYBREAK:
-		*(u32 *)data = priv->channels.params.tx_max_inline;
-		break;
 	case ETHTOOL_PFC_PREVENTION_TOUT:
 		err = mlx5e_get_pfc_prevention_tout(dev, data);
 		break;
@@ -1141,35 +1137,11 @@ static int mlx5e_set_tunable(struct net_device *dev,
 			     const void *data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
-	int err = 0;
-	u32 val;
+	int err;
 
 	mutex_lock(&priv->state_lock);
 
 	switch (tuna->id) {
-	case ETHTOOL_TX_COPYBREAK:
-		val = *(u32 *)data;
-		if (val > mlx5e_get_max_inline_cap(mdev)) {
-			err = -EINVAL;
-			break;
-		}
-
-		new_channels.params = priv->channels.params;
-		new_channels.params.tx_max_inline = val;
-
-		if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-			priv->channels.params = new_channels.params;
-			break;
-		}
-
-		err = mlx5e_open_channels(priv, &new_channels);
-		if (err)
-			break;
-		mlx5e_switch_priv_channels(priv, &new_channels, NULL);
-
-		break;
 	case ETHTOOL_PFC_PREVENTION_TOUT:
 		err = mlx5e_set_pfc_prevention_tout(dev, *(u16 *)data);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7610a7916e96..5d8eb0a9c0f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -993,7 +993,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->channel   = c;
 	sq->txq_ix    = txq_ix;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
-	sq->max_inline      = params->tx_max_inline;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
@@ -3882,15 +3881,6 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
-u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
-{
-	int bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
-
-	return bf_buf_size -
-	       sizeof(struct mlx5e_tx_wqe) +
-	       2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
-}
-
 void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels)
 {
@@ -4052,7 +4042,6 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	mlx5e_set_tx_cq_mode_params(params, cq_period_mode);
 
 	/* TX inline */
-	params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
 	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
 
 	/* RSS */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index ea4b255380a2..dd32f3e390ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -884,7 +884,6 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
 
-	params->tx_max_inline         = mlx5e_get_max_inline_cap(mdev);
 	params->num_tc                = 1;
 	params->lro_wqe_sz            = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 

From bfc647d52e67dc756c605e9a50d45b71054c2533 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 16 Jan 2018 17:25:06 +0200
Subject: [PATCH 1363/1678] net/mlx5e: Move all TX timeout logic to be under
 state lock

Driver callback for handling TX timeout should access some internal
resources (SQ, CQ) in order to decide if the tx timeout work should be
scheduled.  These resources might be unavailable if channels are closed
in parallel (ifdown for example).

The state lock is the mechanism to protect from such races.
Move all TX timeout logic to be in the work under a state lock.

In addition, Move the work from the global WQ to mlx5e WQ to make sure
this work is flushed when device is detached..

Also, move the mlx5e_tx_timeout_work code to be next to the TX timeout
NDO for better code locality.

Fixes: 3947ca185999 ("net/mlx5e: Implement ndo_tx_timeout callback")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 61 +++++++++++--------
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5d8eb0a9c0f0..e0b75f52d556 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -177,26 +177,6 @@ static void mlx5e_update_carrier_work(struct work_struct *work)
 	mutex_unlock(&priv->state_lock);
 }
 
-static void mlx5e_tx_timeout_work(struct work_struct *work)
-{
-	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
-					       tx_timeout_work);
-	int err;
-
-	rtnl_lock();
-	mutex_lock(&priv->state_lock);
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		goto unlock;
-	mlx5e_close_locked(priv->netdev);
-	err = mlx5e_open_locked(priv->netdev);
-	if (err)
-		netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
-			   err);
-unlock:
-	mutex_unlock(&priv->state_lock);
-	rtnl_unlock();
-}
-
 void mlx5e_update_stats(struct mlx5e_priv *priv)
 {
 	int i;
@@ -3658,13 +3638,19 @@ static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
 	return true;
 }
 
-static void mlx5e_tx_timeout(struct net_device *dev)
+static void mlx5e_tx_timeout_work(struct work_struct *work)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+					       tx_timeout_work);
+	struct net_device *dev = priv->netdev;
 	bool reopen_channels = false;
-	int i;
+	int i, err;
 
-	netdev_err(dev, "TX timeout detected\n");
+	rtnl_lock();
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		goto unlock;
 
 	for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
@@ -3672,7 +3658,9 @@ static void mlx5e_tx_timeout(struct net_device *dev)
 
 		if (!netif_xmit_stopped(dev_queue))
 			continue;
-		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+
+		netdev_err(dev,
+			   "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
 			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
 			   jiffies_to_usecs(jiffies - dev_queue->trans_start));
 
@@ -3685,8 +3673,27 @@ static void mlx5e_tx_timeout(struct net_device *dev)
 		}
 	}
 
-	if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state))
-		schedule_work(&priv->tx_timeout_work);
+	if (!reopen_channels)
+		goto unlock;
+
+	mlx5e_close_locked(dev);
+	err = mlx5e_open_locked(dev);
+	if (err)
+		netdev_err(priv->netdev,
+			   "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   err);
+
+unlock:
+	mutex_unlock(&priv->state_lock);
+	rtnl_unlock();
+}
+
+static void mlx5e_tx_timeout(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	netdev_err(dev, "TX timeout detected\n");
+	queue_work(priv->wq, &priv->tx_timeout_work);
 }
 
 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)

From 2816077127230ef52cc7497903e71def45747611 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 26 Dec 2017 15:17:05 +0200
Subject: [PATCH 1364/1678] mlx5_{ib,core}: Add query SQ state helper function

Move query SQ state function from mlx5_ib to mlx5_core in order to
have it in shared code.

It will be used in a downstream patch from mlx5e.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/qp.c               | 14 +----------
 .../ethernet/mellanox/mlx5/core/transobj.c    | 25 +++++++++++++++++++
 include/linux/mlx5/transobj.h                 |  1 +
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 85c612ac547a..0d0b0b8dad98 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4739,26 +4739,14 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
 					struct mlx5_ib_sq *sq,
 					u8 *sq_state)
 {
-	void *out;
-	void *sqc;
-	int inlen;
 	int err;
 
-	inlen = MLX5_ST_SZ_BYTES(query_sq_out);
-	out = kvzalloc(inlen, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+	err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state);
 	if (err)
 		goto out;
-
-	sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
-	*sq_state = MLX5_GET(sqc, sqc, state);
 	sq->state = *sq_state;
 
 out:
-	kvfree(out);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index 9e38343a951f..c64957b5ef47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -157,6 +157,31 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
 }
 EXPORT_SYMBOL(mlx5_core_query_sq);
 
+int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state)
+{
+	void *out;
+	void *sqc;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+	out = kvzalloc(inlen, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_sq(dev, sqn, out);
+	if (err)
+		goto out;
+
+	sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+	*state = MLX5_GET(sqc, sqc, state);
+
+out:
+	kvfree(out);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
+
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *tirn)
 {
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 7e8f281f8c00..80d7aa8b2831 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -47,6 +47,7 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
 int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
 int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
+int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state);
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *tirn);
 int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,

From 1acae6b030164217b9c6a52245eade730057152b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 31 Dec 2017 12:55:26 +0200
Subject: [PATCH 1365/1678] mlx5: Move dump error CQE function out of mlx5_ib
 for code sharing

Move mlx5_ib dump error CQE implementation to mlx5 CQ header file in
order to use it in a downstream patch from mlx5e.

In addition, use print_hex_dump instead of manual dumping of the buffer.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cq.c | 8 +-------
 include/linux/mlx5/cq.h         | 6 ++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 94a27d89a303..77d257ec899b 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -267,14 +267,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 
 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
 {
-	__be32 *p = (__be32 *)cqe;
-	int i;
-
 	mlx5_ib_warn(dev, "dump error cqe\n");
-	for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
-		pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
-			be32_to_cpu(p[1]), be32_to_cpu(p[2]),
-			be32_to_cpu(p[3]));
+	mlx5_dump_err_cqe(dev->mdev, cqe);
 }
 
 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 445ad194e0fe..0ef6138eca49 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -193,6 +193,12 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
 				   struct mlx5_core_cq *cq, u16 cq_period,
 				   u16 cq_max_count);
+static inline void mlx5_dump_err_cqe(struct mlx5_core_dev *dev,
+				     struct mlx5_err_cqe *err_cqe)
+{
+	print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe,
+		       sizeof(*err_cqe), false);
+}
 int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 

From 16cc14d817338fc297970d2d9d146c88ec87474d Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 9 Jan 2018 16:21:16 +0200
Subject: [PATCH 1366/1678] net/mlx5e: Dump xmit error completions

Monitor and dump xmit error completions. In addition, add err_cqe
counter to track the number of error completion per send queue.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_stats.c    |  3 +++
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 19 +++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index c0dab9a8969e..ad91d9de0240 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -60,6 +60,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
@@ -153,6 +154,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
+			s->tx_cqe_err		+= sq_stats->cqe_err;
 			s->tx_xmit_more		+= sq_stats->xmit_more;
 			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
 			s->tx_csum_none		+= sq_stats->csum_none;
@@ -1103,6 +1105,7 @@ static const struct counter_desc sq_stats_desc[] = {
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
 };
 
 static const struct counter_desc ch_stats_desc[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 43a72efa28c0..43dc808684c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -78,6 +78,7 @@ struct mlx5e_sw_stats {
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
 	u64 tx_xmit_more;
+	u64 tx_cqe_err;
 	u64 rx_wqe_err;
 	u64 rx_mpwqe_filler;
 	u64 rx_buff_alloc_err;
@@ -197,6 +198,7 @@ struct mlx5e_sq_stats {
 	u64 stopped;
 	u64 wake;
 	u64 dropped;
+	u64 cqe_err;
 };
 
 struct mlx5e_ch_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 11b4f1089d1c..88b5b7bfc9a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -417,6 +417,18 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
+static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
+				 struct mlx5_err_cqe *err_cqe)
+{
+	u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
+
+	netdev_err(sq->channel->netdev,
+		   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+		   sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome,
+		   err_cqe->vendor_err_synd);
+	mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
+}
+
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_txqsq *sq;
@@ -456,6 +468,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
+		if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
+			if (!sq->stats.cqe_err)
+				mlx5e_dump_error_cqe(sq,
+						     (struct mlx5_err_cqe *)cqe);
+			sq->stats.cqe_err++;
+		}
+
 		do {
 			struct mlx5e_tx_wqe_info *wi;
 			struct sk_buff *skb;

From db75373c91b0cfb6a68ad6ae88721e4e21ae6261 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 26 Dec 2017 16:02:24 +0200
Subject: [PATCH 1367/1678] net/mlx5e: Recover Send Queue (SQ) from error state

An error TX completion (CQE) which arrived on a specific SQ indicates
that this SQ got moved by the hardware to error state, which means all
pending and incoming TX requests are dropped or will be dropped and no
further "Good" CQEs will be generated for that SQ.

Before this patch TX completions (CQEs) were not monitored and were
handled as a regular CQE. This caused the SQ to stay in an error state,
making it useless for xmiting new packets.

Mitigation plan:
In case of an error completion, schedule a recovery work which would do
the following:
- Mark the TXQ as DRV_XOFF to disable new packets to arrive from the
  stack
- NAPI to flush all pending SQ WQEs (via flush_in_error_en bit) to
  release SW and HW resources(SKB, DMA, etc) and have the SQ and CQ
  consumer/producer indices synced.
- Modify the SQ state ERR -> RST -> RDY (restart the SQ).
- Reactivate the SQ and reset SQ cc and pc

If we identify two consecutive requests for SQ recover in less than
500 msecs, drop the recover request to avoid CPU overload, as this
scenario most likely happened due to a severe repeated bug.

In addition, add SQ recover SW counter to monitor successful recoveries.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |   6 +
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 115 ++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en_stats.c    |   3 +
 .../ethernet/mellanox/mlx5/core/en_stats.h    |   2 +
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   |  10 +-
 5 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 6898f5e26006..353ac6daa3dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -122,6 +122,7 @@
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
 #define MLX5E_TX_CQ_POLL_BUDGET        128
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
+#define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
 #define MLX5E_ICOSQ_MAX_WQEBBS \
 	(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
@@ -332,6 +333,7 @@ struct mlx5e_sq_dma {
 
 enum {
 	MLX5E_SQ_STATE_ENABLED,
+	MLX5E_SQ_STATE_RECOVERING,
 	MLX5E_SQ_STATE_IPSEC,
 };
 
@@ -378,6 +380,10 @@ struct mlx5e_txqsq {
 	struct mlx5e_channel      *channel;
 	int                        txq_ix;
 	u32                        rate_limit;
+	struct mlx5e_txqsq_recover {
+		struct work_struct         recover_work;
+		u64                        last_recover;
+	} recover;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_xdpsq {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e0b75f52d556..1b48dec67abf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -956,6 +956,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 	return 0;
 }
 
+static void mlx5e_sq_recover(struct work_struct *work);
 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 			     int txq_ix,
 			     struct mlx5e_params *params,
@@ -974,6 +975,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->txq_ix    = txq_ix;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
+	INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
@@ -1040,6 +1042,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 		MLX5_SET(sqc,  sqc, min_wqe_inline_mode, csp->min_inline_mode);
 
 	MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
+	MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
 
 	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
@@ -1158,9 +1161,20 @@ err_free_txqsq:
 	return err;
 }
 
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+	WARN_ONCE(sq->cc != sq->pc,
+		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+		  sq->sqn, sq->cc, sq->pc);
+	sq->cc = 0;
+	sq->dma_fifo_cc = 0;
+	sq->pc = 0;
+}
+
 static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
 {
 	sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
+	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
 	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	netdev_tx_reset_queue(sq->txq);
 	netif_tx_start_queue(sq->txq);
@@ -1205,6 +1219,107 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
 	mlx5e_free_txqsq(sq);
 }
 
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+	while (time_before(jiffies, exp_time)) {
+		if (sq->cc == sq->pc)
+			return 0;
+
+		msleep(20);
+	}
+
+	netdev_err(sq->channel->netdev,
+		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+		   sq->sqn, sq->cc, sq->pc);
+
+	return -ETIMEDOUT;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
+{
+	struct mlx5_core_dev *mdev = sq->channel->mdev;
+	struct net_device *dev = sq->channel->netdev;
+	struct mlx5e_modify_sq_param msp = {0};
+	int err;
+
+	msp.curr_state = curr_state;
+	msp.next_state = MLX5_SQC_STATE_RST;
+
+	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+		return err;
+	}
+
+	memset(&msp, 0, sizeof(msp));
+	msp.curr_state = MLX5_SQC_STATE_RST;
+	msp.next_state = MLX5_SQC_STATE_RDY;
+
+	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work)
+{
+	struct mlx5e_txqsq_recover *recover =
+		container_of(work, struct mlx5e_txqsq_recover,
+			     recover_work);
+	struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
+					      recover);
+	struct mlx5_core_dev *mdev = sq->channel->mdev;
+	struct net_device *dev = sq->channel->netdev;
+	u8 state;
+	int err;
+
+	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+			   sq->sqn, err);
+		return;
+	}
+
+	if (state != MLX5_RQC_STATE_ERR) {
+		netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+		return;
+	}
+
+	netif_tx_disable_queue(sq->txq);
+
+	if (mlx5e_wait_for_sq_flush(sq))
+		return;
+
+	/* If the interval between two consecutive recovers per SQ is too
+	 * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
+	 * If we reached this state, there is probably a bug that needs to be
+	 * fixed. let's keep the queue close and let tx timeout cleanup.
+	 */
+	if (jiffies_to_msecs(jiffies - recover->last_recover) <
+	    MLX5E_SQ_RECOVER_MIN_INTERVAL) {
+		netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
+			   sq->sqn);
+		return;
+	}
+
+	/* At this point, no new packets will arrive from the stack as TXQ is
+	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+	 * pending WQEs.  SQ can safely reset the SQ.
+	 */
+	if (mlx5e_sq_to_ready(sq, state))
+		return;
+
+	mlx5e_reset_txqsq_cc_pc(sq);
+	sq->stats.recover++;
+	recover->last_recover = jiffies;
+	mlx5e_activate_txqsq(sq);
+}
+
 static int mlx5e_open_icosq(struct mlx5e_channel *c,
 			    struct mlx5e_params *params,
 			    struct mlx5e_sq_param *param,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index ad91d9de0240..b08c94422907 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -61,6 +61,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_recover) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
@@ -155,6 +156,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
 			s->tx_cqe_err		+= sq_stats->cqe_err;
+			s->tx_recover		+= sq_stats->recover;
 			s->tx_xmit_more		+= sq_stats->xmit_more;
 			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
 			s->tx_csum_none		+= sq_stats->csum_none;
@@ -1106,6 +1108,7 @@ static const struct counter_desc sq_stats_desc[] = {
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, recover) },
 };
 
 static const struct counter_desc ch_stats_desc[] = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 43dc808684c9..53111a2df587 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -79,6 +79,7 @@ struct mlx5e_sw_stats {
 	u64 tx_queue_dropped;
 	u64 tx_xmit_more;
 	u64 tx_cqe_err;
+	u64 tx_recover;
 	u64 rx_wqe_err;
 	u64 rx_mpwqe_filler;
 	u64 rx_buff_alloc_err;
@@ -199,6 +200,7 @@ struct mlx5e_sq_stats {
 	u64 wake;
 	u64 dropped;
 	u64 cqe_err;
+	u64 recover;
 };
 
 struct mlx5e_ch_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 88b5b7bfc9a9..20297108528a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -469,9 +469,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
 		if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) {
-			if (!sq->stats.cqe_err)
+			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
+					      &sq->state)) {
 				mlx5e_dump_error_cqe(sq,
 						     (struct mlx5_err_cqe *)cqe);
+				queue_work(cq->channel->priv->wq,
+					   &sq->recover.recover_work);
+			}
 			sq->stats.cqe_err++;
 		}
 
@@ -528,7 +532,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
 	if (netif_tx_queue_stopped(sq->txq) &&
-	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) {
+	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+				   MLX5E_SQ_STOP_ROOM) &&
+	    !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
 		netif_tx_wake_queue(sq->txq);
 		sq->stats.wake++;
 	}

From e4d7220813e986a99eff2b98d882d5290dd58078 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 19 Dec 2017 11:09:41 +0200
Subject: [PATCH 1368/1678] iwlwifi: mvm: flip AMSDU addresses only for 9000
 family

Hardware bug was fixed in later generation.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 580de5851fc7..4a4ccfd11e5b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -831,6 +831,16 @@ out:
 	rcu_read_unlock();
 }
 
+static void iwl_mvm_flip_address(u8 *addr)
+{
+	int i;
+	u8 mac_addr[ETH_ALEN];
+
+	for (i = 0; i < ETH_ALEN; i++)
+		mac_addr[i] = addr[ETH_ALEN - i - 1];
+	ether_addr_copy(addr, mac_addr);
+}
+
 void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 			struct iwl_rx_cmd_buffer *rxb, int queue)
 {
@@ -985,21 +995,16 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 		 */
 		if ((desc->mac_flags2 & IWL_RX_MPDU_MFLG2_AMSDU) &&
 		    !WARN_ON(!ieee80211_is_data_qos(hdr->frame_control))) {
-			int i;
 			u8 *qc = ieee80211_get_qos_ctl(hdr);
-			u8 mac_addr[ETH_ALEN];
 
 			*qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
 
-			for (i = 0; i < ETH_ALEN; i++)
-				mac_addr[i] = hdr->addr3[ETH_ALEN - i - 1];
-			ether_addr_copy(hdr->addr3, mac_addr);
+			if (mvm->trans->cfg->device_family ==
+			    IWL_DEVICE_FAMILY_9000) {
+				iwl_mvm_flip_address(hdr->addr3);
 
-			if (ieee80211_has_a4(hdr->frame_control)) {
-				for (i = 0; i < ETH_ALEN; i++)
-					mac_addr[i] =
-						hdr->addr4[ETH_ALEN - i - 1];
-				ether_addr_copy(hdr->addr4, mac_addr);
+				if (ieee80211_has_a4(hdr->frame_control))
+					iwl_mvm_flip_address(hdr->addr4);
 			}
 		}
 		if (baid != IWL_RX_REORDER_DATA_INVALID_BAID) {

From f4f155e5ec04d381b2f0870817d93dbdc259aa63 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 19 Dec 2017 09:19:32 +0200
Subject: [PATCH 1369/1678] iwlwifi: mvm: take RCU lock before dereferencing

RCU isn't properly locked.

Fixes: 46d372af9935 ("iwlwifi: mvm: rs: new rate scale API - add FW notifications")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index 55d1274c6092..fb5745660509 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -234,13 +234,15 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt)
 	struct iwl_mvm_sta *mvmsta;
 	struct iwl_lq_sta_rs_fw *lq_sta;
 
+	rcu_read_lock();
+
 	notif = (void *)pkt->data;
 	mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, notif->sta_id);
 
 	if (!mvmsta) {
 		IWL_ERR(mvm, "Invalid sta id (%d) in FW TLC notification\n",
 			notif->sta_id);
-		return;
+		goto out;
 	}
 
 	lq_sta = &mvmsta->lq_sta.rs_fw;
@@ -251,6 +253,8 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt)
 		IWL_DEBUG_RATE(mvm, "new rate_n_flags: 0x%X\n",
 			       lq_sta->last_rate_n_flags);
 	}
+out:
+	rcu_read_unlock();
 }
 
 void rs_fw_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,

From b0c9835c883c71013191e8ec1b8d006bb4c05697 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 18 Dec 2017 14:10:47 +0200
Subject: [PATCH 1370/1678] iwlwifi: mvm: move TSO segment to a separate
 function

This makes future bail-outs from transmitting an AMSDU more
readable.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 158 ++++++++++----------
 1 file changed, 83 insertions(+), 75 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 7dfe4cde55e3..795065974d78 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -687,6 +687,74 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 }
 
 #ifdef CONFIG_INET
+
+static int
+iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes,
+		       netdev_features_t netdev_flags,
+		       struct sk_buff_head *mpdus_skb)
+{
+	struct sk_buff *tmp, *next;
+	struct ieee80211_hdr *hdr = (void *)skb->data;
+	char cb[sizeof(skb->cb)];
+	u16 i = 0;
+	unsigned int tcp_payload_len;
+	unsigned int mss = skb_shinfo(skb)->gso_size;
+	bool ipv4 = (skb->protocol == htons(ETH_P_IP));
+	u16 ip_base_id = ipv4 ? ntohs(ip_hdr(skb)->id) : 0;
+
+	skb_shinfo(skb)->gso_size = num_subframes * mss;
+	memcpy(cb, skb->cb, sizeof(cb));
+
+	next = skb_gso_segment(skb, netdev_flags);
+	skb_shinfo(skb)->gso_size = mss;
+	if (WARN_ON_ONCE(IS_ERR(next)))
+		return -EINVAL;
+	else if (next)
+		consume_skb(skb);
+
+	while (next) {
+		tmp = next;
+		next = tmp->next;
+
+		memcpy(tmp->cb, cb, sizeof(tmp->cb));
+		/*
+		 * Compute the length of all the data added for the A-MSDU.
+		 * This will be used to compute the length to write in the TX
+		 * command. We have: SNAP + IP + TCP for n -1 subframes and
+		 * ETH header for n subframes.
+		 */
+		tcp_payload_len = skb_tail_pointer(tmp) -
+			skb_transport_header(tmp) -
+			tcp_hdrlen(tmp) + tmp->data_len;
+
+		if (ipv4)
+			ip_hdr(tmp)->id = htons(ip_base_id + i * num_subframes);
+
+		if (tcp_payload_len > mss) {
+			skb_shinfo(tmp)->gso_size = mss;
+		} else {
+			if (ieee80211_is_data_qos(hdr->frame_control)) {
+				u8 *qc;
+
+				if (ipv4)
+					ip_send_check(ip_hdr(tmp));
+
+				qc = ieee80211_get_qos_ctl((void *)tmp->data);
+				*qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+			}
+			skb_shinfo(tmp)->gso_size = 0;
+		}
+
+		tmp->prev = NULL;
+		tmp->next = NULL;
+
+		__skb_queue_tail(mpdus_skb, tmp);
+		i++;
+	}
+
+	return 0;
+}
+
 static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 			  struct ieee80211_tx_info *info,
 			  struct ieee80211_sta *sta,
@@ -695,14 +763,10 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-	struct sk_buff *tmp, *next;
-	char cb[sizeof(skb->cb)];
 	unsigned int num_subframes, tcp_payload_len, subf_len, max_amsdu_len;
-	bool ipv4 = (skb->protocol == htons(ETH_P_IP));
-	u16 ip_base_id = ipv4 ? ntohs(ip_hdr(skb)->id) : 0;
-	u16 snap_ip_tcp, pad, i = 0;
+	u16 snap_ip_tcp, pad;
 	unsigned int dbg_max_amsdu_len;
-	netdev_features_t netdev_features = NETIF_F_CSUM_MASK | NETIF_F_SG;
+	netdev_features_t netdev_flags = NETIF_F_CSUM_MASK | NETIF_F_SG;
 	u8 *qc, tid, txf;
 
 	snap_ip_tcp = 8 + skb_transport_header(skb) - skb_network_header(skb) +
@@ -712,16 +776,8 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 	if (!sta->max_amsdu_len ||
 	    !ieee80211_is_data_qos(hdr->frame_control) ||
-	    (!mvmsta->tlc_amsdu && !dbg_max_amsdu_len)) {
-		num_subframes = 1;
-		pad = 0;
-		goto segment;
-	}
-
-	qc = ieee80211_get_qos_ctl(hdr);
-	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
-	if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT))
-		return -EINVAL;
+	    (!mvmsta->tlc_amsdu && !dbg_max_amsdu_len))
+		return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
 
 	/*
 	 * Do not build AMSDU for IPv6 with extension headers.
@@ -730,22 +786,22 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 	if (skb->protocol == htons(ETH_P_IPV6) &&
 	    ((struct ipv6hdr *)skb_network_header(skb))->nexthdr !=
 	    IPPROTO_TCP) {
-		num_subframes = 1;
-		pad = 0;
-		netdev_features &= ~NETIF_F_CSUM_MASK;
-		goto segment;
+		netdev_flags &= ~NETIF_F_CSUM_MASK;
+		return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
 	}
 
+	qc = ieee80211_get_qos_ctl(hdr);
+	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
+	if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT))
+		return -EINVAL;
+
 	/*
 	 * No need to lock amsdu_in_ampdu_allowed since it can't be modified
 	 * during an BA session.
 	 */
 	if (info->flags & IEEE80211_TX_CTL_AMPDU &&
-	    !mvmsta->tid_data[tid].amsdu_in_ampdu_allowed) {
-		num_subframes = 1;
-		pad = 0;
-		goto segment;
-	}
+	    !mvmsta->tid_data[tid].amsdu_in_ampdu_allowed)
+		return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
 
 	max_amsdu_len = sta->max_amsdu_len;
 
@@ -811,56 +867,8 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 	 * Trick the segmentation function to make it
 	 * create SKBs that can fit into one A-MSDU.
 	 */
-segment:
-	skb_shinfo(skb)->gso_size = num_subframes * mss;
-	memcpy(cb, skb->cb, sizeof(cb));
-
-	next = skb_gso_segment(skb, netdev_features);
-	skb_shinfo(skb)->gso_size = mss;
-	if (WARN_ON_ONCE(IS_ERR(next)))
-		return -EINVAL;
-	else if (next)
-		consume_skb(skb);
-
-	while (next) {
-		tmp = next;
-		next = tmp->next;
-
-		memcpy(tmp->cb, cb, sizeof(tmp->cb));
-		/*
-		 * Compute the length of all the data added for the A-MSDU.
-		 * This will be used to compute the length to write in the TX
-		 * command. We have: SNAP + IP + TCP for n -1 subframes and
-		 * ETH header for n subframes.
-		 */
-		tcp_payload_len = skb_tail_pointer(tmp) -
-			skb_transport_header(tmp) -
-			tcp_hdrlen(tmp) + tmp->data_len;
-
-		if (ipv4)
-			ip_hdr(tmp)->id = htons(ip_base_id + i * num_subframes);
-
-		if (tcp_payload_len > mss) {
-			skb_shinfo(tmp)->gso_size = mss;
-		} else {
-			if (ieee80211_is_data_qos(hdr->frame_control)) {
-				qc = ieee80211_get_qos_ctl((void *)tmp->data);
-
-				if (ipv4)
-					ip_send_check(ip_hdr(tmp));
-				*qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
-			}
-			skb_shinfo(tmp)->gso_size = 0;
-		}
-
-		tmp->prev = NULL;
-		tmp->next = NULL;
-
-		__skb_queue_tail(mpdus_skb, tmp);
-		i++;
-	}
-
-	return 0;
+	return iwl_mvm_tx_tso_segment(skb, num_subframes, netdev_flags,
+				      mpdus_skb);
 }
 #else /* CONFIG_INET */
 static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,

From 759931c79fc3c8c4748269a5c2e7d48563baa6d5 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Tue, 19 Dec 2017 10:24:44 +0000
Subject: [PATCH 1371/1678] iwlwifi: set default timstamp marker cmd

In case debug configuration is started with LDBG cmd also start timestamp
marker for syncing logs witn the FW.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c   |  4 ++++
 .../net/wireless/intel/iwlwifi/fw/debugfs.c   | 24 ++++++++++++-------
 .../net/wireless/intel/iwlwifi/fw/debugfs.h   |  5 ++++
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 7bd704a3e640..d27a43ea0d7c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -68,6 +68,7 @@
 #include "iwl-drv.h"
 #include "runtime.h"
 #include "dbg.h"
+#include "debugfs.h"
 #include "iwl-io.h"
 #include "iwl-prph.h"
 #include "iwl-csr.h"
@@ -1080,6 +1081,9 @@ int iwl_fw_start_dbg_conf(struct iwl_fw_runtime *fwrt, u8 conf_id)
 		IWL_WARN(fwrt, "FW already configured (%d) - re-configuring\n",
 			 fwrt->dump.conf);
 
+	/* start default config marker cmd for syncing logs */
+	iwl_fw_trigger_timestamp(fwrt, 1);
+
 	/* Send all HCMDs for configuring the FW debug */
 	ptr = (void *)&fwrt->fw->dbg_conf_tlv[conf_id]->hcmd;
 	for (i = 0; i < fwrt->fw->dbg_conf_tlv[conf_id]->num_of_hcmds; i++) {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
index baec2fbaaf68..8f005cd69559 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
@@ -157,6 +157,20 @@ static void iwl_fw_timestamp_marker_wk(struct work_struct *work)
 			 ret, jiffies_to_msecs(delay) / 1000);
 }
 
+void iwl_fw_trigger_timestamp(struct iwl_fw_runtime *fwrt, u32 delay)
+{
+	IWL_INFO(fwrt,
+		 "starting timestamp_marker trigger with delay: %us\n",
+		 delay);
+
+	iwl_fw_cancel_timestamp(fwrt);
+
+	fwrt->timestamp.delay = msecs_to_jiffies(delay * 1000);
+
+	schedule_delayed_work(&fwrt->timestamp.wk,
+			      round_jiffies_relative(fwrt->timestamp.delay));
+}
+
 static ssize_t iwl_dbgfs_timestamp_marker_write(struct iwl_fw_runtime *fwrt,
 						char *buf, size_t count,
 						loff_t *ppos)
@@ -168,16 +182,8 @@ static ssize_t iwl_dbgfs_timestamp_marker_write(struct iwl_fw_runtime *fwrt,
 	if (ret < 0)
 		return ret;
 
-	IWL_INFO(fwrt,
-		 "starting timestamp_marker trigger with delay: %us\n",
-		 delay);
+	iwl_fw_trigger_timestamp(fwrt, delay);
 
-	iwl_fw_cancel_timestamp(fwrt);
-
-	fwrt->timestamp.delay = msecs_to_jiffies(delay * 1000);
-
-	schedule_delayed_work(&fwrt->timestamp.wk,
-			      round_jiffies_relative(fwrt->timestamp.delay));
 	return count;
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
index 3da468d2cc92..d93f6a4bb22d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.h
@@ -89,6 +89,8 @@ static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt)
 			      round_jiffies_relative(fwrt->timestamp.delay));
 }
 
+void iwl_fw_trigger_timestamp(struct iwl_fw_runtime *fwrt, u32 delay);
+
 #else
 static inline int iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
 					  struct dentry *dbgfs_dir)
@@ -102,4 +104,7 @@ static inline void iwl_fw_suspend_timestamp(struct iwl_fw_runtime *fwrt) {}
 
 static inline void iwl_fw_resume_timestamp(struct iwl_fw_runtime *fwrt) {}
 
+static inline void iwl_fw_trigger_timestamp(struct iwl_fw_runtime *fwrt,
+					    u32 delay) {}
+
 #endif /* CONFIG_IWLWIFI_DEBUGFS */

From 9a233bb8025105db9a60b5d761005cc5a6c77f3d Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Mon, 18 Dec 2017 20:13:07 +0200
Subject: [PATCH 1372/1678] iwlwifi: mvm: check if mac80211_queue is valid in
 iwl_mvm_disable_txq

Sometimes iwl_mvm_disable_txq() may be called with mac80211_queue ==
IEEE80211_INVAL_HW_QUEUE, and this would cause us to use BIT(0xFF)
which is way too large for the u16 we used to store it in
hw_queue_to_mac820211.  If this happens the following UBSAN warning
will be generated:

[  167.185167] UBSAN: Undefined behaviour in drivers/net/wireless/intel/iwlwifi/mvm/utils.c:838:5
[  167.185171] shift exponent 255 is too large for 64-bit type 'long unsigned int'

Fix that by checking that it is not IEEE80211_INVAL_HW_QUEUE and,
while at it, add a warning if the queue number is larger than
IEEE80211_MAX_QUEUES.

Fixes: 34e10860ae8d ("iwlwifi: mvm: remove references to queue_info in new TX path")
Reported-by: Paul Menzel <pmenzel+linux-wireless@molgen.mpg.de>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index d65e1db7c097..70f8b8eb6117 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -800,12 +800,19 @@ int iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 		.scd_queue = queue,
 		.action = SCD_CFG_DISABLE_QUEUE,
 	};
-	bool remove_mac_queue = true;
+	bool remove_mac_queue = mac80211_queue != IEEE80211_INVAL_HW_QUEUE;
 	int ret;
 
+	if (WARN_ON(remove_mac_queue && mac80211_queue >= IEEE80211_MAX_QUEUES))
+		return -EINVAL;
+
 	if (iwl_mvm_has_new_tx_api(mvm)) {
 		spin_lock_bh(&mvm->queue_info_lock);
-		mvm->hw_queue_to_mac80211[queue] &= ~BIT(mac80211_queue);
+
+		if (remove_mac_queue)
+			mvm->hw_queue_to_mac80211[queue] &=
+				~BIT(mac80211_queue);
+
 		spin_unlock_bh(&mvm->queue_info_lock);
 
 		iwl_trans_txq_free(mvm->trans, queue);

From 20cfb7a04f0a8b7f8f45cf630e23524d51bb3cd9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Mar 2018 11:07:01 +0100
Subject: [PATCH 1373/1678] samples/bpf: fix spelling mistake: "revieve" ->
 "receive"

Trivial fix to spelling mistake in error message text

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/cookie_uid_helper_example.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index 9d751e209f31..8eca27e595ae 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -246,7 +246,7 @@ static void udp_client(void)
 		recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0,
 			     (struct sockaddr *)&si_me, &slen);
 		if (recv_len < 0)
-			error(1, errno, "revieve\n");
+			error(1, errno, "receive\n");
 		res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr),
 			   sizeof(si_me.sin_addr));
 		if (res != 0)

From 6f5c39fa5cd4a78c5432021e981aa8f79437a32c Mon Sep 17 00:00:00 2001
From: "Nikita V. Shirokov" <tehnerd@fb.com>
Date: Mon, 26 Mar 2018 08:36:57 -0700
Subject: [PATCH 1374/1678] bpf: Add sock_ops R/W access to ipv4 tos

Sample usage for tos ...

  bpf_getsockopt(skops, SOL_IP, IP_TOS, &v, sizeof(v))

... where skops is a pointer to the ctx (struct bpf_sock_ops).

Signed-off-by: Nikita V. Shirokov <tehnerd@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index 00c711c5f1a2..afd825534ac4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3462,6 +3462,27 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 			ret = -EINVAL;
 		}
 #ifdef CONFIG_INET
+	} else if (level == SOL_IP) {
+		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+			return -EINVAL;
+
+		val = *((int *)optval);
+		/* Only some options are supported */
+		switch (optname) {
+		case IP_TOS:
+			if (val < -1 || val > 0xff) {
+				ret = -EINVAL;
+			} else {
+				struct inet_sock *inet = inet_sk(sk);
+
+				if (val == -1)
+					val = 0;
+				inet->tos = val;
+			}
+			break;
+		default:
+			ret = -EINVAL;
+		}
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (level == SOL_IPV6) {
 		if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
@@ -3561,6 +3582,20 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 		} else {
 			goto err_clear;
 		}
+	} else if (level == SOL_IP) {
+		struct inet_sock *inet = inet_sk(sk);
+
+		if (optlen != sizeof(int) || sk->sk_family != AF_INET)
+			goto err_clear;
+
+		/* Only some options are supported */
+		switch (optname) {
+		case IP_TOS:
+			*((int *)optval) = (int)inet->tos;
+			break;
+		default:
+			goto err_clear;
+		}
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (level == SOL_IPV6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);

From c1a7515393e403758a684fd0a2372af466675b15 Mon Sep 17 00:00:00 2001
From: "Zamir, Roee" <roee.zamir@intel.com>
Date: Thu, 30 Mar 2017 16:34:51 +0300
Subject: [PATCH 1375/1678] iwlwifi: mvm: add adaptive dwell support

Update the scan command API with support for adaptive dwell.  Adaptive
dwell is a type of scan that dynamically changes the time it remains
on each channel listening for beacons or probe responses.

Signed-off-by: Roee Zamir <roee.zamir@intel.com>
Signed-off-by: Beni Lev <beni.lev@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/mvm/constants.h    |   2 +
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 104 ++++++++++++------
 2 files changed, 70 insertions(+), 36 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 976640fed334..96b52a275ee3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -110,6 +110,8 @@
 #define IWL_MVM_SW_TX_CSUM_OFFLOAD		0
 #define IWL_MVM_HW_CSUM_DISABLE			0
 #define IWL_MVM_PARSE_NVM			0
+#define IWL_MVM_ADWELL_ENABLE			1
+#define IWL_MVM_ADWELL_MAX_BUDGET		0
 #define IWL_MVM_RS_NUM_TRY_BEFORE_ANT_TOGGLE    1
 #define IWL_MVM_RS_HT_VHT_RETRIES_PER_RATE      2
 #define IWL_MVM_RS_HT_VHT_RETRIES_PER_RATE_TW   1
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 356b16f40e78..f910098e9531 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -86,6 +86,15 @@ enum iwl_mvm_traffic_load {
 #define IWL_SCAN_DWELL_FRAGMENTED	44
 #define IWL_SCAN_DWELL_EXTENDED		90
 
+/* adaptive dwell max budget time [TU] for full scan */
+#define IWL_SCAN_ADWELL_MAX_BUDGET_FULL_SCAN 300
+/* adaptive dwell max budget time [TU] for directed scan */
+#define IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN 100
+/* adaptive dwell default APs number */
+#define IWL_SCAN_ADWELL_DEFAULT_N_APS 2
+/* adaptive dwell default APs number in social channels (1, 6, 11) */
+#define IWL_SCAN_ADWELL_DEFAULT_N_APS_SOCIAL 10
+
 struct iwl_mvm_scan_timing_params {
 	u32 suspend_time;
 	u32 max_out_time;
@@ -1115,11 +1124,6 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
 {
 	struct iwl_mvm_scan_timing_params *timing = &scan_timing[params->type];
 
-	if (iwl_mvm_is_regular_scan(params))
-		cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
-	else
-		cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_2);
-
 	if (iwl_mvm_is_adaptive_dwell_supported(mvm)) {
 		if (params->measurement_dwell) {
 			cmd->v7.active_dwell = params->measurement_dwell;
@@ -1129,39 +1133,27 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
 			cmd->v7.passive_dwell = IWL_SCAN_DWELL_PASSIVE;
 		}
 		cmd->v7.fragmented_dwell = IWL_SCAN_DWELL_FRAGMENTED;
+		cmd->v7.adwell_default_n_aps_social =
+			IWL_SCAN_ADWELL_DEFAULT_N_APS_SOCIAL;
+		cmd->v7.adwell_default_n_aps =
+			IWL_SCAN_ADWELL_DEFAULT_N_APS;
+
+		/* if custom max budget was configured with debugfs */
+		if (IWL_MVM_ADWELL_MAX_BUDGET)
+			cmd->v7.adwell_max_budget =
+				cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET);
+		else if (params->ssids && params->ssids[0].ssid_len)
+			cmd->v7.adwell_max_budget =
+				cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN);
+		else
+			cmd->v7.adwell_max_budget =
+				cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_FULL_SCAN);
 
 		cmd->v7.scan_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
 		cmd->v7.max_out_time[SCAN_LB_LMAC_IDX] =
 			cpu_to_le32(timing->max_out_time);
 		cmd->v7.suspend_time[SCAN_LB_LMAC_IDX] =
 			cpu_to_le32(timing->suspend_time);
-		if (iwl_mvm_is_cdb_supported(mvm)) {
-			cmd->v7.max_out_time[SCAN_HB_LMAC_IDX] =
-				cpu_to_le32(timing->max_out_time);
-			cmd->v7.suspend_time[SCAN_HB_LMAC_IDX] =
-				cpu_to_le32(timing->suspend_time);
-		}
-
-		return;
-	}
-
-	if (params->measurement_dwell) {
-		cmd->v1.active_dwell = params->measurement_dwell;
-		cmd->v1.passive_dwell = params->measurement_dwell;
-		cmd->v1.extended_dwell = params->measurement_dwell;
-	} else {
-		cmd->v1.active_dwell = IWL_SCAN_DWELL_ACTIVE;
-		cmd->v1.passive_dwell = IWL_SCAN_DWELL_PASSIVE;
-		cmd->v1.extended_dwell = IWL_SCAN_DWELL_EXTENDED;
-	}
-	cmd->v1.fragmented_dwell = IWL_SCAN_DWELL_FRAGMENTED;
-
-	if (iwl_mvm_has_new_tx_api(mvm)) {
-		cmd->v6.scan_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
-		cmd->v6.max_out_time[SCAN_LB_LMAC_IDX] =
-			cpu_to_le32(timing->max_out_time);
-		cmd->v6.suspend_time[SCAN_LB_LMAC_IDX] =
-			cpu_to_le32(timing->suspend_time);
 		if (iwl_mvm_is_cdb_supported(mvm)) {
 			cmd->v6.max_out_time[SCAN_HB_LMAC_IDX] =
 				cpu_to_le32(timing->max_out_time);
@@ -1169,10 +1161,41 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
 				cpu_to_le32(timing->suspend_time);
 		}
 	} else {
-		cmd->v1.max_out_time = cpu_to_le32(timing->max_out_time);
-		cmd->v1.suspend_time = cpu_to_le32(timing->suspend_time);
-		cmd->v1.scan_priority =
-			cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
+		if (params->measurement_dwell) {
+			cmd->v1.active_dwell = params->measurement_dwell;
+			cmd->v1.passive_dwell = params->measurement_dwell;
+			cmd->v1.extended_dwell = params->measurement_dwell;
+		} else {
+			cmd->v1.active_dwell = IWL_SCAN_DWELL_ACTIVE;
+			cmd->v1.passive_dwell = IWL_SCAN_DWELL_PASSIVE;
+			cmd->v1.extended_dwell = IWL_SCAN_DWELL_EXTENDED;
+		}
+		cmd->v1.fragmented_dwell = IWL_SCAN_DWELL_FRAGMENTED;
+		if (iwl_mvm_is_cdb_supported(mvm)) {
+			struct iwl_mvm_scan_timing_params *hb_timing =
+				&scan_timing[params->type];
+
+			cmd->v6.max_out_time[SCAN_HB_LMAC_IDX] =
+					cpu_to_le32(hb_timing->max_out_time);
+			cmd->v6.suspend_time[SCAN_HB_LMAC_IDX] =
+					cpu_to_le32(hb_timing->suspend_time);
+		}
+
+		if (iwl_mvm_has_new_tx_api(mvm)) {
+			cmd->v6.scan_priority =
+				cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
+			cmd->v6.max_out_time[SCAN_LB_LMAC_IDX] =
+				cpu_to_le32(timing->max_out_time);
+			cmd->v6.suspend_time[SCAN_LB_LMAC_IDX] =
+				cpu_to_le32(timing->suspend_time);
+		} else {
+			cmd->v1.scan_priority =
+				cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
+			cmd->v1.max_out_time =
+				cpu_to_le32(timing->max_out_time);
+			cmd->v1.suspend_time =
+				cpu_to_le32(timing->suspend_time);
+		}
 	}
 }
 
@@ -1234,6 +1257,15 @@ static u16 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm,
 	if (mvm->sched_scan_pass_all == SCHED_SCAN_PASS_ALL_ENABLED)
 		flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE;
 
+	if (iwl_mvm_is_adaptive_dwell_supported(mvm) && IWL_MVM_ADWELL_ENABLE &&
+	    vif->type != NL80211_IFTYPE_P2P_DEVICE)
+		flags |= IWL_UMAC_SCAN_GEN_FLAGS_ADAPTIVE_DWELL;
+
+	/*
+	 * Extended dwell is relevant only for low band to start with, as it is
+	 * being used for social channles only (1, 6, 11), so we can check
+	 * only scan type on low band also for CDB.
+	 */
 	if (iwl_mvm_is_regular_scan(params) &&
 	    vif->type != NL80211_IFTYPE_P2P_DEVICE &&
 	    params->type != IWL_SCAN_TYPE_FRAGMENTED)

From 8f691af967293319668058efdc10f560b65bd651 Mon Sep 17 00:00:00 2001
From: "Zamir, Roee" <roee.zamir@intel.com>
Date: Thu, 11 May 2017 11:56:15 +0300
Subject: [PATCH 1376/1678] iwlwifi: mvm: add support for oce

Add support for Optimized Connectivity Experience (OCE).  Get
capabilities from the fw, expose them with nl80211, and enable them in
UMAC scan if the relevant nl80211 flags are set by the userspace.

Signed-off-by: Roee Zamir <roee.zamir@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/scan.h  |  6 ++++++
 drivers/net/wireless/intel/iwlwifi/fw/file.h  |  1 +
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 11 ++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |  6 ++++++
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 21 ++++++++++++++++++-
 5 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 3bfc657f6b42..620862c4c79b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -579,8 +579,14 @@ enum iwl_umac_scan_general_flags {
 	IWL_UMAC_SCAN_GEN_FLAGS_RRM_ENABLED		= BIT(8),
 	IWL_UMAC_SCAN_GEN_FLAGS_MATCH			= BIT(9),
 	IWL_UMAC_SCAN_GEN_FLAGS_EXTENDED_DWELL		= BIT(10),
+	/* Extended dwell is obselete when adaptive dwell is used, making this
+	 * bit reusable. Hence, probe request defer is used only when adaptive
+	 * dwell is supported. */
+	IWL_UMAC_SCAN_GEN_FLAGS_PROB_REQ_DEFER_SUPP	= BIT(10),
 	IWL_UMAC_SCAN_GEN_FLAGS_LMAC2_FRAGMENTED	= BIT(11),
 	IWL_UMAC_SCAN_GEN_FLAGS_ADAPTIVE_DWELL		= BIT(13),
+	IWL_UMAC_SCAN_GEN_FLAGS_MAX_CHNL_TIME		= BIT(14),
+	IWL_UMAC_SCAN_GEN_FLAGS_PROB_REQ_HIGH_TX_RATE	= BIT(15),
 };
 
 /**
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 2cb303c5c42e..f8c2dac12c66 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -265,6 +265,7 @@ enum iwl_ucode_tlv_api {
 	IWL_UCODE_TLV_API_NAN2_VER2		= (__force iwl_ucode_tlv_api_t)31,
 	/* API Set 1 */
 	IWL_UCODE_TLV_API_ADAPTIVE_DWELL	= (__force iwl_ucode_tlv_api_t)32,
+	IWL_UCODE_TLV_API_OCE			= (__force iwl_ucode_tlv_api_t)33,
 	IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE	= (__force iwl_ucode_tlv_api_t)34,
 	IWL_UCODE_TLV_API_NEW_RX_STATS		= (__force iwl_ucode_tlv_api_t)35,
 	IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY	= (__force iwl_ucode_tlv_api_t)38,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 7152fdc00fb1..82dc9dec4160 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -661,6 +661,17 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 				      NL80211_EXT_FEATURE_SET_SCAN_DWELL);
 	}
 
+	if (iwl_mvm_is_oce_supported(mvm)) {
+		wiphy_ext_feature_set(hw->wiphy,
+			NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP);
+		wiphy_ext_feature_set(hw->wiphy,
+			NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME);
+		wiphy_ext_feature_set(hw->wiphy,
+			NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION);
+		wiphy_ext_feature_set(hw->wiphy,
+			NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE);
+	}
+
 	mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 625b238a3f0a..bd87cdb36fb2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1155,6 +1155,12 @@ static inline bool iwl_mvm_is_adaptive_dwell_supported(struct iwl_mvm *mvm)
 			  IWL_UCODE_TLV_API_ADAPTIVE_DWELL);
 }
 
+static inline bool iwl_mvm_is_oce_supported(struct iwl_mvm *mvm)
+{
+	/* OCE should never be enabled for LMAC scan FWs */
+	return fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_OCE);
+}
+
 static inline bool iwl_mvm_enter_d0i3_on_suspend(struct iwl_mvm *mvm)
 {
 	/* For now we only use this mode to differentiate between
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index f910098e9531..b756d9d0199f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1268,9 +1268,28 @@ static u16 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm,
 	 */
 	if (iwl_mvm_is_regular_scan(params) &&
 	    vif->type != NL80211_IFTYPE_P2P_DEVICE &&
-	    params->type != IWL_SCAN_TYPE_FRAGMENTED)
+	    params->type != IWL_SCAN_TYPE_FRAGMENTED &&
+	    !iwl_mvm_is_adaptive_dwell_supported(mvm) &&
+	    !iwl_mvm_is_oce_supported(mvm))
 		flags |= IWL_UMAC_SCAN_GEN_FLAGS_EXTENDED_DWELL;
 
+	if (iwl_mvm_is_oce_supported(mvm)) {
+		if ((params->flags &
+		     NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE))
+			flags |= IWL_UMAC_SCAN_GEN_FLAGS_PROB_REQ_HIGH_TX_RATE;
+		/* Since IWL_UMAC_SCAN_GEN_FLAGS_EXTENDED_DWELL and
+		 * NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION shares
+		 * the same bit, we need to make sure that we use this bit here
+		 * only when IWL_UMAC_SCAN_GEN_FLAGS_EXTENDED_DWELL cannot be
+		 * used. */
+		if ((params->flags &
+		     NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+		     !WARN_ON_ONCE(!iwl_mvm_is_adaptive_dwell_supported(mvm)))
+			flags |= IWL_UMAC_SCAN_GEN_FLAGS_PROB_REQ_DEFER_SUPP;
+		if ((params->flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME))
+			flags |= IWL_UMAC_SCAN_GEN_FLAGS_MAX_CHNL_TIME;
+	}
+
 	return flags;
 }
 

From d270e7b8fa8e68c6d08910e353b7674b182dc1c2 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 22 Jan 2018 16:44:09 +0200
Subject: [PATCH 1377/1678] iwlwifi: mvm: Allow iwl_mvm_mac_mgd_prepare_tx()
 when associated

The FW does not allocate quota air time for the binding of a station
MAC before iwlmvm indicates that it is associated. Currently iwlmvm
indicates that the MAC is associated only after hearing a beacon from
the AP. In case a deauthentication frame is sent before the MAC is
associated, the frame might not be sent as the corresponding binding
is not scheduled.

To handle such cases, set IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP in the
HW flags, requesting mac80211 to call the mgd_prepare_tx() callback
before transmitting a deauthentication frame if associated but no
beacon was heard from the AP.

In addition, do not warn in iwl_mvm_mac_mgd_prepare_tx() when already
associated as now the callback can be called also when associated.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 82dc9dec4160..51b30424575b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -421,6 +421,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	ieee80211_hw_set(hw, SUPPORTS_CLONED_SKBS);
 	ieee80211_hw_set(hw, SUPPORTS_AMSDU_IN_AMPDU);
 	ieee80211_hw_set(hw, NEEDS_UNIQUE_STA_ADDR);
+	ieee80211_hw_set(hw, DEAUTH_NEED_MGD_TX_PREP);
 
 	if (iwl_mvm_has_tlc_offload(mvm)) {
 		ieee80211_hw_set(hw, TX_AMPDU_SETUP_IN_HW);
@@ -2815,9 +2816,6 @@ static void iwl_mvm_mac_mgd_prepare_tx(struct ieee80211_hw *hw,
 	u32 duration = IWL_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS;
 	u32 min_duration = IWL_MVM_TE_SESSION_PROTECTION_MIN_TIME_MS;
 
-	if (WARN_ON_ONCE(vif->bss_conf.assoc))
-		return;
-
 	/*
 	 * iwl_mvm_protect_session() reads directly from the device
 	 * (the system time), so make sure it is available.

From 66fa2424df16b213b9ed4ea1b9edca49c89ae415 Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Wed, 13 Dec 2017 15:20:21 +0200
Subject: [PATCH 1378/1678] iwlwifi: fw api: support the new scan request FW
 API version

Remove fragmented_dwell_time and add num_of_fragments to support
the new API version.

Signed-off-by: Ayala Beker <ayala.beker@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/scan.h  |  67 +++++++---
 drivers/net/wireless/intel/iwlwifi/fw/file.h  |   3 +
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  |   6 +
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 116 ++++++++++++------
 4 files changed, 137 insertions(+), 55 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 620862c4c79b..7af3a0f51b77 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -30,6 +30,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -589,6 +590,15 @@ enum iwl_umac_scan_general_flags {
 	IWL_UMAC_SCAN_GEN_FLAGS_PROB_REQ_HIGH_TX_RATE	= BIT(15),
 };
 
+/**
+ * enum iwl_umac_scan_general_flags2 - UMAC scan general flags #2
+ * @IWL_UMAC_SCAN_GEN_FLAGS2_NOTIF_PER_CHNL: Whether to send a complete
+ *	notification per channel or not.
+ */
+enum iwl_umac_scan_general_flags2 {
+	IWL_UMAC_SCAN_GEN_FLAGS2_NOTIF_PER_CHNL	= BIT(0),
+};
+
 /**
  * struct iwl_scan_channel_cfg_umac
  * @flags:		bitmap - 0-19:	directed scan to i'th ssid.
@@ -634,6 +644,18 @@ struct iwl_scan_req_umac_tail {
 	struct iwl_ssid_ie direct_scan[PROBE_OPTION_MAX];
 } __packed;
 
+/**
+ * struct iwl_scan_umac_chan_param
+ * @flags: channel flags &enum iwl_scan_channel_flags
+ * @count: num of channels in scan request
+ * @reserved: for future use and alignment
+ */
+struct iwl_scan_umac_chan_param {
+	u8 flags;
+	u8 count;
+	__le16 reserved;
+} __packed; /*SCAN_CHANNEL_PARAMS_API_S_VER_1 */
+
 /**
  * struct iwl_scan_req_umac
  * @flags: &enum iwl_umac_scan_flags
@@ -642,23 +664,24 @@ struct iwl_scan_req_umac_tail {
  * @general_flags: &enum iwl_umac_scan_general_flags
  * @scan_start_mac_id: report the scan start TSF time according to this mac TSF
  * @extended_dwell: dwell time for channels 1, 6 and 11
- * @active_dwell: dwell time for active scan
- * @passive_dwell: dwell time for passive scan
+ * @active_dwell: dwell time for active scan per LMAC
+ * @passive_dwell: dwell time for passive scan per LMAC
  * @fragmented_dwell: dwell time for fragmented passive scan
  * @adwell_default_n_aps: for adaptive dwell the default number of APs
  *	per channel
  * @adwell_default_n_aps_social: for adaptive dwell the default
  *	number of APs per social (1,6,11) channel
+ * @general_flags2: &enum iwl_umac_scan_general_flags2
  * @adwell_max_budget: for adaptive dwell the maximal budget of TU to be added
  *	to total scan time
  * @max_out_time: max out of serving channel time, per LMAC - for CDB there
  *	are 2 LMACs
  * @suspend_time: max suspend time, per LMAC - for CDB there are 2 LMACs
  * @scan_priority: scan internal prioritization &enum iwl_scan_priority
- * @channel_flags: &enum iwl_scan_channel_flags
- * @n_channels: num of channels in scan request
+ * @num_of_fragments: Number of fragments needed for full coverage per band.
+ *	Relevant only for fragmented scan.
+ * @channel: &struct iwl_scan_umac_chan_param
  * @reserved: for future use and alignment
- * @reserved2: for future use and alignment
  * @reserved3: for future use and alignment
  * @data: &struct iwl_scan_channel_cfg_umac and
  *	&struct iwl_scan_req_umac_tail
@@ -679,10 +702,7 @@ struct iwl_scan_req_umac {
 			__le32 max_out_time;
 			__le32 suspend_time;
 			__le32 scan_priority;
-			/* SCAN_CHANNEL_PARAMS_API_S_VER_1 */
-			u8 channel_flags;
-			u8 n_channels;
-			__le16 reserved2;
+			struct iwl_scan_umac_chan_param channel;
 			u8 data[];
 		} v1; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_1 */
 		struct {
@@ -693,10 +713,7 @@ struct iwl_scan_req_umac {
 			__le32 max_out_time[SCAN_TWO_LMACS];
 			__le32 suspend_time[SCAN_TWO_LMACS];
 			__le32 scan_priority;
-			/* SCAN_CHANNEL_PARAMS_API_S_VER_1 */
-			u8 channel_flags;
-			u8 n_channels;
-			__le16 reserved2;
+			struct iwl_scan_umac_chan_param channel;
 			u8 data[];
 		} v6; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_6 */
 		struct {
@@ -710,16 +727,30 @@ struct iwl_scan_req_umac {
 			__le32 max_out_time[SCAN_TWO_LMACS];
 			__le32 suspend_time[SCAN_TWO_LMACS];
 			__le32 scan_priority;
-			/* SCAN_CHANNEL_PARAMS_API_S_VER_1 */
-			u8 channel_flags;
-			u8 n_channels;
-			__le16 reserved2;
+			struct iwl_scan_umac_chan_param channel;
 			u8 data[];
 		} v7; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_7 */
+		struct {
+			u8 active_dwell[SCAN_TWO_LMACS];
+			u8 reserved2;
+			u8 adwell_default_n_aps;
+			u8 adwell_default_n_aps_social;
+			u8 general_flags2;
+			__le16 adwell_max_budget;
+			__le32 max_out_time[SCAN_TWO_LMACS];
+			__le32 suspend_time[SCAN_TWO_LMACS];
+			__le32 scan_priority;
+			u8 passive_dwell[SCAN_TWO_LMACS];
+			u8 num_of_fragments[SCAN_TWO_LMACS];
+			struct iwl_scan_umac_chan_param channel;
+			u8 data[];
+		} v8; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_8 */
 	};
 } __packed;
 
-#define IWL_SCAN_REQ_UMAC_SIZE_V7 sizeof(struct iwl_scan_req_umac)
+#define IWL_SCAN_REQ_UMAC_SIZE_V8 sizeof(struct iwl_scan_req_umac)
+#define IWL_SCAN_REQ_UMAC_SIZE_V7 (sizeof(struct iwl_scan_req_umac) - \
+					 4 * sizeof(u8))
 #define IWL_SCAN_REQ_UMAC_SIZE_V6 (sizeof(struct iwl_scan_req_umac) - \
 				   2 * sizeof(u8) - sizeof(__le16))
 #define IWL_SCAN_REQ_UMAC_SIZE_V1 (sizeof(struct iwl_scan_req_umac) - \
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index f8c2dac12c66..2bea95bf4fc9 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -250,6 +250,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
  *	indicating low latency direction.
  * @IWL_UCODE_TLV_API_DEPRECATE_TTAK: RX status flag TTAK ok (bit 7) is
  *	deprecated.
+ * @IWL_UCODE_TLV_API_ADAPTIVE_DWELL_V2: This ucode supports version 8
+ *	of scan request: SCAN_REQUEST_CMD_UMAC_API_S_VER_8
  *
  * @NUM_IWL_UCODE_TLV_API: number of bits used
  */
@@ -270,6 +272,7 @@ enum iwl_ucode_tlv_api {
 	IWL_UCODE_TLV_API_NEW_RX_STATS		= (__force iwl_ucode_tlv_api_t)35,
 	IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY	= (__force iwl_ucode_tlv_api_t)38,
 	IWL_UCODE_TLV_API_DEPRECATE_TTAK	= (__force iwl_ucode_tlv_api_t)41,
+	IWL_UCODE_TLV_API_ADAPTIVE_DWELL_V2	= (__force iwl_ucode_tlv_api_t)42,
 
 	NUM_IWL_UCODE_TLV_API
 #ifdef __CHECKER__
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index bd87cdb36fb2..82445e12aacb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1155,6 +1155,12 @@ static inline bool iwl_mvm_is_adaptive_dwell_supported(struct iwl_mvm *mvm)
 			  IWL_UCODE_TLV_API_ADAPTIVE_DWELL);
 }
 
+static inline bool iwl_mvm_is_adaptive_dwell_v2_supported(struct iwl_mvm *mvm)
+{
+	return fw_has_api(&mvm->fw->ucode_capa,
+			  IWL_UCODE_TLV_API_ADAPTIVE_DWELL_V2);
+}
+
 static inline bool iwl_mvm_is_oce_supported(struct iwl_mvm *mvm)
 {
 	/* OCE should never be enabled for LMAC scan FWs */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index b756d9d0199f..b31f0ffbbbf0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -35,6 +35,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -85,6 +86,8 @@ enum iwl_mvm_traffic_load {
 #define IWL_SCAN_DWELL_PASSIVE		110
 #define IWL_SCAN_DWELL_FRAGMENTED	44
 #define IWL_SCAN_DWELL_EXTENDED		90
+#define IWL_SCAN_NUM_OF_FRAGS		3
+
 
 /* adaptive dwell max budget time [TU] for full scan */
 #define IWL_SCAN_ADWELL_MAX_BUDGET_FULL_SCAN 300
@@ -143,6 +146,9 @@ static inline void *iwl_mvm_get_scan_req_umac_data(struct iwl_mvm *mvm)
 {
 	struct iwl_scan_req_umac *cmd = mvm->scan_cmd;
 
+	if (iwl_mvm_is_adaptive_dwell_v2_supported(mvm))
+		return (void *)&cmd->v8.data;
+
 	if (iwl_mvm_is_adaptive_dwell_supported(mvm))
 		return (void *)&cmd->v7.data;
 
@@ -152,6 +158,23 @@ static inline void *iwl_mvm_get_scan_req_umac_data(struct iwl_mvm *mvm)
 	return (void *)&cmd->v1.data;
 }
 
+static inline struct iwl_scan_umac_chan_param *
+iwl_mvm_get_scan_req_umac_channel(struct iwl_mvm *mvm)
+{
+	struct iwl_scan_req_umac *cmd = mvm->scan_cmd;
+
+	if (iwl_mvm_is_adaptive_dwell_v2_supported(mvm))
+		return &cmd->v8.channel;
+
+	if (iwl_mvm_is_adaptive_dwell_supported(mvm))
+		return &cmd->v7.channel;
+
+	if (iwl_mvm_has_new_tx_api(mvm))
+		return &cmd->v6.channel;
+
+	return &cmd->v1.channel;
+}
+
 static u8 iwl_mvm_scan_rx_ant(struct iwl_mvm *mvm)
 {
 	if (mvm->scan_rx_ant != ANT_NONE)
@@ -1122,17 +1145,16 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
 				    struct iwl_scan_req_umac *cmd,
 				    struct iwl_mvm_scan_params *params)
 {
-	struct iwl_mvm_scan_timing_params *timing = &scan_timing[params->type];
+	struct iwl_mvm_scan_timing_params *timing, *hb_timing;
+	u8 active_dwell, passive_dwell;
+
+	timing = &scan_timing[params->type];
+	active_dwell = params->measurement_dwell ?
+		params->measurement_dwell : IWL_SCAN_DWELL_ACTIVE;
+	passive_dwell = params->measurement_dwell ?
+		params->measurement_dwell : IWL_SCAN_DWELL_PASSIVE;
 
 	if (iwl_mvm_is_adaptive_dwell_supported(mvm)) {
-		if (params->measurement_dwell) {
-			cmd->v7.active_dwell = params->measurement_dwell;
-			cmd->v7.passive_dwell = params->measurement_dwell;
-		} else {
-			cmd->v7.active_dwell = IWL_SCAN_DWELL_ACTIVE;
-			cmd->v7.passive_dwell = IWL_SCAN_DWELL_PASSIVE;
-		}
-		cmd->v7.fragmented_dwell = IWL_SCAN_DWELL_FRAGMENTED;
 		cmd->v7.adwell_default_n_aps_social =
 			IWL_SCAN_ADWELL_DEFAULT_N_APS_SOCIAL;
 		cmd->v7.adwell_default_n_aps =
@@ -1154,26 +1176,39 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
 			cpu_to_le32(timing->max_out_time);
 		cmd->v7.suspend_time[SCAN_LB_LMAC_IDX] =
 			cpu_to_le32(timing->suspend_time);
+
 		if (iwl_mvm_is_cdb_supported(mvm)) {
-			cmd->v6.max_out_time[SCAN_HB_LMAC_IDX] =
-				cpu_to_le32(timing->max_out_time);
-			cmd->v6.suspend_time[SCAN_HB_LMAC_IDX] =
-				cpu_to_le32(timing->suspend_time);
+			hb_timing = &scan_timing[params->type];
+
+			cmd->v7.max_out_time[SCAN_HB_LMAC_IDX] =
+				cpu_to_le32(hb_timing->max_out_time);
+			cmd->v7.suspend_time[SCAN_HB_LMAC_IDX] =
+				cpu_to_le32(hb_timing->suspend_time);
+		}
+
+		if (!iwl_mvm_is_adaptive_dwell_v2_supported(mvm)) {
+			cmd->v7.active_dwell = active_dwell;
+			cmd->v7.passive_dwell = passive_dwell;
+			cmd->v7.fragmented_dwell = IWL_SCAN_DWELL_FRAGMENTED;
+		} else {
+			cmd->v8.active_dwell[SCAN_LB_LMAC_IDX] = active_dwell;
+			cmd->v8.passive_dwell[SCAN_LB_LMAC_IDX] = passive_dwell;
+			if (iwl_mvm_is_cdb_supported(mvm)) {
+				cmd->v8.active_dwell[SCAN_HB_LMAC_IDX] =
+					active_dwell;
+				cmd->v8.passive_dwell[SCAN_HB_LMAC_IDX] =
+					passive_dwell;
+			}
 		}
 	} else {
-		if (params->measurement_dwell) {
-			cmd->v1.active_dwell = params->measurement_dwell;
-			cmd->v1.passive_dwell = params->measurement_dwell;
-			cmd->v1.extended_dwell = params->measurement_dwell;
-		} else {
-			cmd->v1.active_dwell = IWL_SCAN_DWELL_ACTIVE;
-			cmd->v1.passive_dwell = IWL_SCAN_DWELL_PASSIVE;
-			cmd->v1.extended_dwell = IWL_SCAN_DWELL_EXTENDED;
-		}
+		cmd->v1.extended_dwell = params->measurement_dwell ?
+			params->measurement_dwell : IWL_SCAN_DWELL_EXTENDED;
+		cmd->v1.active_dwell = active_dwell;
+		cmd->v1.passive_dwell = passive_dwell;
 		cmd->v1.fragmented_dwell = IWL_SCAN_DWELL_FRAGMENTED;
+
 		if (iwl_mvm_is_cdb_supported(mvm)) {
-			struct iwl_mvm_scan_timing_params *hb_timing =
-				&scan_timing[params->type];
+			hb_timing = &scan_timing[params->type];
 
 			cmd->v6.max_out_time[SCAN_HB_LMAC_IDX] =
 					cpu_to_le32(hb_timing->max_out_time);
@@ -1298,6 +1333,7 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			     int type)
 {
 	struct iwl_scan_req_umac *cmd = mvm->scan_cmd;
+	struct iwl_scan_umac_chan_param *chan_param;
 	void *cmd_data = iwl_mvm_get_scan_req_umac_data(mvm);
 	struct iwl_scan_req_umac_tail *sec_part = cmd_data +
 		sizeof(struct iwl_scan_channel_cfg_umac) *
@@ -1305,8 +1341,11 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	int uid, i;
 	u32 ssid_bitmap = 0;
 	u8 channel_flags = 0;
+	u16 gen_flags;
 	struct iwl_mvm_vif *scan_vif = iwl_mvm_vif_from_mac80211(vif);
 
+	chan_param = iwl_mvm_get_scan_req_umac_channel(mvm);
+
 	lockdep_assert_held(&mvm->mutex);
 
 	if (WARN_ON(params->n_scan_plans > IWL_MAX_SCHED_SCAN_PLANS))
@@ -1323,8 +1362,17 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	mvm->scan_uid_status[uid] = type;
 
 	cmd->uid = cpu_to_le32(uid);
-	cmd->general_flags = cpu_to_le16(iwl_mvm_scan_umac_flags(mvm, params,
-								 vif));
+	gen_flags = iwl_mvm_scan_umac_flags(mvm, params, vif);
+	cmd->general_flags = cpu_to_le16(gen_flags);
+	if (iwl_mvm_is_adaptive_dwell_v2_supported(mvm)) {
+		if (gen_flags & IWL_UMAC_SCAN_GEN_FLAGS_FRAGMENTED)
+			cmd->v8.num_of_fragments[SCAN_LB_LMAC_IDX] =
+							IWL_SCAN_NUM_OF_FRAGS;
+		if (gen_flags & IWL_UMAC_SCAN_GEN_FLAGS_LMAC2_FRAGMENTED)
+			cmd->v8.num_of_fragments[SCAN_HB_LMAC_IDX] =
+							IWL_SCAN_NUM_OF_FRAGS;
+	}
+
 	cmd->scan_start_mac_id = scan_vif->id;
 
 	if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT)
@@ -1335,16 +1383,8 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				IWL_SCAN_CHANNEL_FLAG_EBS_ACCURATE |
 				IWL_SCAN_CHANNEL_FLAG_CACHE_ADD;
 
-	if (iwl_mvm_is_adaptive_dwell_supported(mvm)) {
-		cmd->v7.channel_flags = channel_flags;
-		cmd->v7.n_channels = params->n_channels;
-	} else if (iwl_mvm_has_new_tx_api(mvm)) {
-		cmd->v6.channel_flags = channel_flags;
-		cmd->v6.n_channels = params->n_channels;
-	} else {
-		cmd->v1.channel_flags = channel_flags;
-		cmd->v1.n_channels = params->n_channels;
-	}
+	chan_param->flags = channel_flags;
+	chan_param->count = params->n_channels;
 
 	iwl_scan_build_ssids(params, sec_part->direct_scan, &ssid_bitmap);
 
@@ -1783,7 +1823,9 @@ int iwl_mvm_scan_size(struct iwl_mvm *mvm)
 {
 	int base_size = IWL_SCAN_REQ_UMAC_SIZE_V1;
 
-	if (iwl_mvm_is_adaptive_dwell_supported(mvm))
+	if (iwl_mvm_is_adaptive_dwell_v2_supported(mvm))
+		base_size = IWL_SCAN_REQ_UMAC_SIZE_V8;
+	else if (iwl_mvm_is_adaptive_dwell_supported(mvm))
 		base_size = IWL_SCAN_REQ_UMAC_SIZE_V7;
 	else if (iwl_mvm_has_new_tx_api(mvm))
 		base_size = IWL_SCAN_REQ_UMAC_SIZE_V6;

From 9e5053ad9d590e095829a8bb07adbbdbd893f0f9 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Wed, 28 Mar 2018 11:15:09 +0300
Subject: [PATCH 1379/1678] iwlwifi: add a bunch of new 9000 PCI IDs

A lot of new PCI IDs were added for the 9000 series.  Add them to the
list of supported PCI IDs.

Cc: stable@vger.kernel.org # 4.13+
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 157 +++++++++++++++++-
 1 file changed, 155 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index e323d3abb6ac..959de2f8bb28 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -8,6 +8,7 @@
  * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016-2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -36,6 +37,7 @@
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  * Copyright(c) 2017 Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -517,9 +519,9 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)},
 
 /* 9000 Series */
-	{IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x0018, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_cfg)},
@@ -544,11 +546,15 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2526, 0x2034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)},
@@ -569,16 +575,42 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_cfg_shared_clk)},
@@ -595,12 +627,94 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x31DC, 0x0264, iwl9461_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x02A0, iwl9462_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x02A4, iwl9462_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x31DC, 0x1030, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x31DC, 0x2030, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x2034, iwl9560_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x4030, iwl9560_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x4034, iwl9560_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x31DC, 0x40A4, iwl9462_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x4234, iwl9560_2ac_cfg_shared_clk)},
+	{IWL_PCI_DEVICE(0x31DC, 0x42A4, iwl9462_2ac_cfg_shared_clk)},
 	{IWL_PCI_DEVICE(0x34F0, 0x0030, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x34F0, 0x0034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0038, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x003C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0060, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0064, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x00A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x00A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0230, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0238, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x023C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0260, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x0264, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x02A0, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x34F0, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x34F0, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x34F0, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x2034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x4030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x4034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg_soc)},
@@ -626,11 +740,44 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x1210, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x2034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x4030, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x4034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_cfg_soc)},
@@ -647,10 +794,16 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0xA370, 0x0264, iwl9461_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x02A0, iwl9462_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x02A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA370, 0x1010, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA370, 0x1210, iwl9260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA370, 0x2030, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA370, 0x2034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x4030, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x4034, iwl9560_2ac_cfg_soc)},
 	{IWL_PCI_DEVICE(0xA370, 0x40A4, iwl9462_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA370, 0x4234, iwl9560_2ac_cfg_soc)},
+	{IWL_PCI_DEVICE(0xA370, 0x42A4, iwl9462_2ac_cfg_soc)},
 
 /* 22000 Series */
 	{IWL_PCI_DEVICE(0x2720, 0x0A10, iwl22000_2ac_cfg_hr_cdb)},

From 976ea7b2c61c0cb91c5cd79fcc7df82a73b6a540 Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Sun, 29 Oct 2017 14:38:37 +0200
Subject: [PATCH 1380/1678] iwlwifi: api: Add geographic profile information to
 MCC_UPDATE_CMD

Some geographic profiles require specific handling.  For example ETSI
profile requires special channel access handling.  Add geographic
profile information to MCC_UPDATE response to allow it.

Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/fw/api/nvm-reg.h   | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index 3fd07bc80f54..37c57bcbfb4a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -8,6 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -30,6 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -310,6 +312,17 @@ struct iwl_mcc_update_resp_v1  {
 	__le32 channels[0];
 } __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_1 */
 
+/**
+ * enum iwl_geo_information - geographic information.
+ * @GEO_NO_INFO: no special info for this geo profile.
+ * @GEO_WMM_ETSI_5GHZ_INFO: this geo profile limits the WMM params
+ *	for the 5 GHz band.
+ */
+enum iwl_geo_information {
+	GEO_NO_INFO =			0,
+	GEO_WMM_ETSI_5GHZ_INFO =	BIT(0),
+};
+
 /**
  * struct iwl_mcc_update_resp - response to MCC_UPDATE_CMD.
  * Contains the new channel control profile map, if changed, and the new MCC
@@ -320,7 +333,8 @@ struct iwl_mcc_update_resp_v1  {
  * @cap: capabilities for all channels which matches the MCC
  * @source_id: the MCC source, see iwl_mcc_source
  * @time: time elapsed from the MCC test start (in 30 seconds TU)
- * @reserved: reserved.
+ * @geo_info: geographic specific profile information
+ *	see &enum iwl_geo_information.
  * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
  *		channels, depending on platform)
  * @channels: channel control data map, DWORD for each channel. Only the first
@@ -332,10 +346,10 @@ struct iwl_mcc_update_resp {
 	u8 cap;
 	u8 source_id;
 	__le16 time;
-	__le16 reserved;
+	__le16 geo_info;
 	__le32 n_channels;
 	__le32 channels[0];
-} __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_2 */
+} __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_3 */
 
 /**
  * struct iwl_mcc_chub_notif - chub notifies of mcc change

From 8f27036a0e380c6ede67a8d2a132327fcb760ace Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 5 Feb 2018 12:44:44 +0200
Subject: [PATCH 1381/1678] iwlwifi: bump the max API version for 9000 and
 22000 devices

We are now ready to load 38.ucode

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 4 +++-
 drivers/net/wireless/intel/iwlwifi/cfg/9000.c  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 48f6f80eb24b..dffd9df782b0 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -19,6 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,7 +57,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX	36
+#define IWL_22000_UCODE_API_MAX	38
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN	24
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index ab1469473e4f..e1c869a1f8cc 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -19,6 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -56,7 +58,7 @@
 #include "fw/file.h"
 
 /* Highest firmware API version supported */
-#define IWL9000_UCODE_API_MAX	36
+#define IWL9000_UCODE_API_MAX	38
 
 /* Lowest firmware API version supported */
 #define IWL9000_UCODE_API_MIN	30

From 9b137866f9a2699698cfffb0b7729331a419d617 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 27 Dec 2017 12:16:33 +0200
Subject: [PATCH 1382/1678] iwlwifi: mvm: save low latency causes in an enum

Currently we have a boolean variable for each cause.

This costs space, and requires to check each separately
when determining low latency.

Since we have another cause incoming, convert it to an enum.

While at it, move the retrieval of the prev value and the
assignment of the new value to be inside iwl_mvm_update_low_latency
and save the need for each caller to do it separately.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../wireless/intel/iwlwifi/mvm/debugfs-vif.c  | 11 +++---
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h  | 36 ++++++++++++++-----
 .../net/wireless/intel/iwlwifi/mvm/utils.c    |  8 +++--
 3 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 4e9d1792baf3..f7fcf700196b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -1276,7 +1276,6 @@ static ssize_t iwl_dbgfs_low_latency_write(struct ieee80211_vif *vif, char *buf,
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm *mvm = mvmvif->mvm;
-	bool prev;
 	u8 value;
 	int ret;
 
@@ -1287,9 +1286,7 @@ static ssize_t iwl_dbgfs_low_latency_write(struct ieee80211_vif *vif, char *buf,
 		return -EINVAL;
 
 	mutex_lock(&mvm->mutex);
-	prev = iwl_mvm_vif_low_latency(mvmvif);
-	mvmvif->low_latency_dbgfs = value;
-	iwl_mvm_update_low_latency(mvm, vif, prev);
+	iwl_mvm_update_low_latency(mvm, vif, value, LOW_LATENCY_DEBUGFS);
 	mutex_unlock(&mvm->mutex);
 
 	return count;
@@ -1306,9 +1303,9 @@ static ssize_t iwl_dbgfs_low_latency_read(struct file *file,
 
 	len = scnprintf(buf, sizeof(buf) - 1,
 			"traffic=%d\ndbgfs=%d\nvcmd=%d\n",
-			mvmvif->low_latency_traffic,
-			mvmvif->low_latency_dbgfs,
-			mvmvif->low_latency_vcmd);
+			!!(mvmvif->low_latency & LOW_LATENCY_TRAFFIC),
+			!!(mvmvif->low_latency & LOW_LATENCY_DEBUGFS),
+			!!(mvmvif->low_latency & LOW_LATENCY_VCMD));
 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 82445e12aacb..d2cf751db68d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -299,6 +299,18 @@ enum iwl_bt_force_ant_mode {
 	BT_FORCE_ANT_MAX,
 };
 
+/**
+* struct iwl_mvm_low_latency_cause - low latency set causes
+* @LOW_LATENCY_TRAFFIC: indicates low latency traffic was detected
+* @LOW_LATENCY_DEBUGFS: low latency mode set from debugfs
+* @LOW_LATENCY_VCMD: low latency mode set from vendor command
+*/
+enum iwl_mvm_low_latency_cause {
+	LOW_LATENCY_TRAFFIC = BIT(0),
+	LOW_LATENCY_DEBUGFS = BIT(1),
+	LOW_LATENCY_VCMD = BIT(2),
+};
+
 /**
 * struct iwl_mvm_vif_bf_data - beacon filtering related data
 * @bf_enabled: indicates if beacon filtering is enabled
@@ -335,9 +347,8 @@ struct iwl_mvm_vif_bf_data {
  * @pm_enabled - Indicate if MAC power management is allowed
  * @monitor_active: indicates that monitor context is configured, and that the
  *	interface should get quota etc.
- * @low_latency_traffic: indicates low latency traffic was detected
- * @low_latency_dbgfs: low latency mode set from debugfs
- * @low_latency_vcmd: low latency mode set from vendor command
+ * @low_latency: indicates low latency is set, see
+ *	enum &iwl_mvm_low_latency_cause for causes.
  * @ps_disabled: indicates that this interface requires PS to be disabled
  * @queue_params: QoS params for this MAC
  * @bcast_sta: station used for broadcast packets. Used by the following
@@ -367,7 +378,7 @@ struct iwl_mvm_vif {
 	bool ap_ibss_active;
 	bool pm_enabled;
 	bool monitor_active;
-	bool low_latency_traffic, low_latency_dbgfs, low_latency_vcmd;
+	u8 low_latency;
 	bool ps_disabled;
 	struct iwl_mvm_vif_bf_data bf_data;
 
@@ -1756,7 +1767,8 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm);
 
 /* Low latency */
 int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			       bool value);
+			      bool low_latency,
+			      enum iwl_mvm_low_latency_cause cause);
 /* get SystemLowLatencyMode - only needed for beacon threshold? */
 bool iwl_mvm_low_latency(struct iwl_mvm *mvm);
 /* get VMACLowLatencyMode */
@@ -1772,9 +1784,17 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif)
 	 * binding, so this has no real impact. For now, just return
 	 * the current desired low-latency state.
 	 */
-	return mvmvif->low_latency_dbgfs ||
-	       mvmvif->low_latency_traffic ||
-	       mvmvif->low_latency_vcmd;
+	return mvmvif->low_latency;
+}
+
+static inline
+void iwl_mvm_vif_set_low_latency(struct iwl_mvm_vif *mvmvif, bool set,
+				 enum iwl_mvm_low_latency_cause cause)
+{
+	if (set)
+		mvmvif->low_latency |= cause;
+	else
+		mvmvif->low_latency &= ~cause;
 }
 
 /* hw scheduler queue config */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 70f8b8eb6117..bebcfb44c8c2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1034,14 +1034,18 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm)
 }
 
 int iwl_mvm_update_low_latency(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			       bool prev)
+			       bool low_latency,
+			       enum iwl_mvm_low_latency_cause cause)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	int res;
-	bool low_latency;
+	bool prev;
 
 	lockdep_assert_held(&mvm->mutex);
 
+	prev = iwl_mvm_vif_low_latency(mvmvif);
+	iwl_mvm_vif_set_low_latency(mvmvif, low_latency, cause);
+
 	low_latency = iwl_mvm_vif_low_latency(mvmvif);
 
 	if (low_latency == prev)

From 378c8931342f99c5a6fce21e38ef46ac39395464 Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Thu, 28 Dec 2017 10:19:43 +0200
Subject: [PATCH 1383/1678] iwlwifi: wrt: add fw force restart via triggers

We can set triggers that cause a debug data collection when something
of interest happens (e.g. when too many probes are lost conscutively).
Normally, this triggers don't cause the FW to be restarted, but in
some cases that may be desired, so we recover from the problem.  To
support this, add a flag that indicates that the FW should be
restarted when the trigger fires.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c  |  6 ++++++
 drivers/net/wireless/intel/iwlwifi/fw/file.h | 12 +++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index d27a43ea0d7c..fa283285fcbe 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1008,6 +1008,12 @@ int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
 {
 	struct iwl_fw_dump_desc *desc;
 
+	if (trigger && trigger->flags & IWL_FW_DBG_FORCE_RESTART) {
+		IWL_WARN(fwrt, "Force restart: trigger %d fired.\n", trig);
+		iwl_force_nmi(fwrt->trans);
+		return 0;
+	}
+
 	desc = kzalloc(sizeof(*desc) + len, GFP_ATOMIC);
 	if (!desc)
 		return -ENOMEM;
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 2bea95bf4fc9..9b2805e1e3b1 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -620,6 +620,14 @@ enum iwl_fw_dbg_trigger_mode {
 	IWL_FW_DBG_TRIGGER_MONITOR_ONLY = BIT(2),
 };
 
+/**
+ * enum iwl_fw_dbg_trigger_flags - the flags supported by wrt triggers
+ * @IWL_FW_DBG_FORCE_RESTART: force a firmware restart
+ */
+enum iwl_fw_dbg_trigger_flags {
+	IWL_FW_DBG_FORCE_RESTART = BIT(0),
+};
+
 /**
  * enum iwl_fw_dbg_trigger_vif_type - define the VIF type for a trigger
  * @IWL_FW_DBG_CONF_VIF_ANY: any vif type
@@ -656,6 +664,7 @@ enum iwl_fw_dbg_trigger_vif_type {
  * @occurrences: number of occurrences. 0 means the trigger will never fire.
  * @trig_dis_ms: the time, in milliseconds, after an occurrence of this
  *	trigger in which another occurrence should be ignored.
+ * @flags: &enum iwl_fw_dbg_trigger_flags
  */
 struct iwl_fw_dbg_trigger_tlv {
 	__le32 id;
@@ -666,7 +675,8 @@ struct iwl_fw_dbg_trigger_tlv {
 	u8 start_conf_id;
 	__le16 occurrences;
 	__le16 trig_dis_ms;
-	__le16 reserved[3];
+	u8 flags;
+	u8 reserved[5];
 
 	u8 data[0];
 } __packed;

From c105547501016897194358b11451608a8d5f9a02 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:32 -0700
Subject: [PATCH 1384/1678] treewide: remove large struct-pass-by-value from
 tracepoint arguments

- fix trace_hfi1_ctxt_info() to pass large struct by reference instead of by value
- convert 'type array[]' tracepoint arguments into 'type *array',
  since compiler will warn that sizeof('type array[]') == sizeof('type *array')
  and later should be used instead

The CAST_TO_U64 macro in the later patch will enforce that tracepoint
arguments can only be integers, pointers, or less than 8 byte structures.
Larger structures should be passed by reference.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/infiniband/hw/hfi1/file_ops.c    |  2 +-
 drivers/infiniband/hw/hfi1/trace_ctxts.h | 12 ++++++------
 include/trace/events/f2fs.h              |  2 +-
 net/wireless/trace.h                     |  2 +-
 sound/firewire/amdtp-stream-trace.h      |  2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 41fafebe3b0d..da4aa1a95b11 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
 	cinfo.sdma_ring_size = fd->cq->nentries;
 	cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
 
-	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
+	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
 	if (copy_to_user((void __user *)arg, &cinfo, len))
 		return -EFAULT;
 
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 4eb4cc798035..e00c8a7d559c 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata,
 TRACE_EVENT(hfi1_ctxt_info,
 	    TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
 		     unsigned int subctxt,
-		     struct hfi1_ctxt_info cinfo),
+		     struct hfi1_ctxt_info *cinfo),
 	    TP_ARGS(dd, ctxt, subctxt, cinfo),
 	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
 			     __field(unsigned int, ctxt)
@@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info,
 	    TP_fast_assign(DD_DEV_ASSIGN(dd);
 			    __entry->ctxt = ctxt;
 			    __entry->subctxt = subctxt;
-			    __entry->egrtids = cinfo.egrtids;
-			    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
-			    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
-			    __entry->sdma_ring_size = cinfo.sdma_ring_size;
-			    __entry->rcvegr_size = cinfo.rcvegr_size;
+			    __entry->egrtids = cinfo->egrtids;
+			    __entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
+			    __entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
+			    __entry->sdma_ring_size = cinfo->sdma_ring_size;
+			    __entry->rcvegr_size = cinfo->rcvegr_size;
 			    ),
 	    TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
 		      __get_str(dev),
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 06c87f9f720c..795698925d20 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node,
 
 TRACE_EVENT(f2fs_truncate_partial_nodes,
 
-	TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err),
+	TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err),
 
 	TP_ARGS(inode, nid, depth, err),
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5152938b358d..018c81fa72fb 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -3137,7 +3137,7 @@ TRACE_EVENT(rdev_start_radar_detection,
 
 TRACE_EVENT(rdev_set_mcast_rate,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
-		 int mcast_rate[NUM_NL80211_BANDS]),
+		 int *mcast_rate),
 	TP_ARGS(wiphy, netdev, mcast_rate),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h
index ea0d486652c8..54cdd4ffa9ce 100644
--- a/sound/firewire/amdtp-stream-trace.h
+++ b/sound/firewire/amdtp-stream-trace.h
@@ -14,7 +14,7 @@
 #include <linux/tracepoint.h>
 
 TRACE_EVENT(in_packet,
-	TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 cip_header[2], unsigned int payload_length, unsigned int index),
+	TP_PROTO(const struct amdtp_stream *s, u32 cycles, u32 *cip_header, unsigned int payload_length, unsigned int index),
 	TP_ARGS(s, cycles, cip_header, payload_length, index),
 	TP_STRUCT__entry(
 		__field(unsigned int, second)

From d992ee6c1ec3c9949d87877bdb44faa7f9cc60ce Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:33 -0700
Subject: [PATCH 1385/1678] net/mediatek: disambiguate mt76 vs mt7601u trace
 events

two trace events defined with the same name and both unused.
They conflict in allyesconfig build. Rename one of them.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/wireless/mediatek/mt7601u/trace.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/trace.h b/drivers/net/wireless/mediatek/mt7601u/trace.h
index 289897300ef0..82c8898b9076 100644
--- a/drivers/net/wireless/mediatek/mt7601u/trace.h
+++ b/drivers/net/wireless/mediatek/mt7601u/trace.h
@@ -34,7 +34,7 @@
 #define REG_PR_FMT	"%04x=%08x"
 #define REG_PR_ARG	__entry->reg, __entry->val
 
-DECLARE_EVENT_CLASS(dev_reg_evt,
+DECLARE_EVENT_CLASS(dev_reg_evtu,
 	TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
 	TP_ARGS(dev, reg, val),
 	TP_STRUCT__entry(
@@ -51,12 +51,12 @@ DECLARE_EVENT_CLASS(dev_reg_evt,
 	)
 );
 
-DEFINE_EVENT(dev_reg_evt, reg_read,
+DEFINE_EVENT(dev_reg_evtu, reg_read,
 	TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
 	TP_ARGS(dev, reg, val)
 );
 
-DEFINE_EVENT(dev_reg_evt, reg_write,
+DEFINE_EVENT(dev_reg_evtu, reg_write,
 	TP_PROTO(struct mt7601u_dev *dev, u32 reg, u32 val),
 	TP_ARGS(dev, reg, val)
 );

From 14624a9387c4d80766d1fdd237b0d110a0a1b43d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:34 -0700
Subject: [PATCH 1386/1678] net/mac802154: disambiguate mac80215 vs mac802154
 trace events

two trace events defined with the same name and both unused.
They conflict in allyesconfig build. Rename one of them.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/mac802154/trace.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 2c8a43d3607f..df855c33daf2 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -33,7 +33,7 @@
 
 /* Tracing for driver callbacks */
 
-DECLARE_EVENT_CLASS(local_only_evt,
+DECLARE_EVENT_CLASS(local_only_evt4,
 	TP_PROTO(struct ieee802154_local *local),
 	TP_ARGS(local),
 	TP_STRUCT__entry(
@@ -45,7 +45,7 @@ DECLARE_EVENT_CLASS(local_only_evt,
 	TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
 );
 
-DEFINE_EVENT(local_only_evt, 802154_drv_return_void,
+DEFINE_EVENT(local_only_evt4, 802154_drv_return_void,
 	TP_PROTO(struct ieee802154_local *local),
 	TP_ARGS(local)
 );
@@ -65,12 +65,12 @@ TRACE_EVENT(802154_drv_return_int,
 		  __entry->ret)
 );
 
-DEFINE_EVENT(local_only_evt, 802154_drv_start,
+DEFINE_EVENT(local_only_evt4, 802154_drv_start,
 	TP_PROTO(struct ieee802154_local *local),
 	TP_ARGS(local)
 );
 
-DEFINE_EVENT(local_only_evt, 802154_drv_stop,
+DEFINE_EVENT(local_only_evt4, 802154_drv_stop,
 	TP_PROTO(struct ieee802154_local *local),
 	TP_ARGS(local)
 );

From 4fe43c2c00349557fdf4e6d61a67ebbe670412b8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:35 -0700
Subject: [PATCH 1387/1678] net/wireless/iwlwifi: fix iwlwifi_dev_ucode_error
 tracepoint

fix iwlwifi_dev_ucode_error tracepoint to pass pointer to a table
instead of all 17 arguments by value.
dvm/main.c and mvm/utils.c have 'struct iwl_error_event_table'
defined with very similar yet subtly different fields and offsets.
tracepoint is still common and using definition of 'struct iwl_error_event_table'
from dvm/commands.h while copying fields.
Long term this tracepoint probably should be split into two.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/wireless/intel/iwlwifi/dvm/main.c |  7 +---
 .../intel/iwlwifi/iwl-devtrace-iwlwifi.h      | 39 +++++++++----------
 .../net/wireless/intel/iwlwifi/iwl-devtrace.c |  1 +
 .../net/wireless/intel/iwlwifi/mvm/utils.c    |  7 +---
 4 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index d11d72615de2..e68254e12764 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -1651,12 +1651,7 @@ static void iwl_dump_nic_error_log(struct iwl_priv *priv)
 			priv->status, table.valid);
 	}
 
-	trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
-				      table.data1, table.data2, table.line,
-				      table.blink2, table.ilink1, table.ilink2,
-				      table.bcon_time, table.gp1, table.gp2,
-				      table.gp3, table.ucode_ver, table.hw_ver,
-				      0, table.brd_ver);
+	trace_iwlwifi_dev_ucode_error(trans->dev, &table, 0, table.brd_ver);
 	IWL_ERR(priv, "0x%08X | %-28s\n", table.error_id,
 		desc_lookup(table.error_id));
 	IWL_ERR(priv, "0x%08X | uPc\n", table.pc);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h
index 9518a82f44c2..27e3e4e96aa2 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-iwlwifi.h
@@ -126,14 +126,11 @@ TRACE_EVENT(iwlwifi_dev_tx,
 		  __entry->framelen, __entry->skbaddr)
 );
 
+struct iwl_error_event_table;
 TRACE_EVENT(iwlwifi_dev_ucode_error,
-	TP_PROTO(const struct device *dev, u32 desc, u32 tsf_low,
-		 u32 data1, u32 data2, u32 line, u32 blink2, u32 ilink1,
-		 u32 ilink2, u32 bcon_time, u32 gp1, u32 gp2, u32 rev_type,
-		 u32 major, u32 minor, u32 hw_ver, u32 brd_ver),
-	TP_ARGS(dev, desc, tsf_low, data1, data2, line,
-		 blink2, ilink1, ilink2, bcon_time, gp1, gp2,
-		 rev_type, major, minor, hw_ver, brd_ver),
+	TP_PROTO(const struct device *dev, const struct iwl_error_event_table *table,
+		 u32 hw_ver, u32 brd_ver),
+	TP_ARGS(dev, table, hw_ver, brd_ver),
 	TP_STRUCT__entry(
 		DEV_ENTRY
 		__field(u32, desc)
@@ -155,20 +152,20 @@ TRACE_EVENT(iwlwifi_dev_ucode_error,
 	),
 	TP_fast_assign(
 		DEV_ASSIGN;
-		__entry->desc = desc;
-		__entry->tsf_low = tsf_low;
-		__entry->data1 = data1;
-		__entry->data2 = data2;
-		__entry->line = line;
-		__entry->blink2 = blink2;
-		__entry->ilink1 = ilink1;
-		__entry->ilink2 = ilink2;
-		__entry->bcon_time = bcon_time;
-		__entry->gp1 = gp1;
-		__entry->gp2 = gp2;
-		__entry->rev_type = rev_type;
-		__entry->major = major;
-		__entry->minor = minor;
+		__entry->desc = table->error_id;
+		__entry->tsf_low = table->tsf_low;
+		__entry->data1 = table->data1;
+		__entry->data2 = table->data2;
+		__entry->line = table->line;
+		__entry->blink2 = table->blink2;
+		__entry->ilink1 = table->ilink1;
+		__entry->ilink2 = table->ilink2;
+		__entry->bcon_time = table->bcon_time;
+		__entry->gp1 = table->gp1;
+		__entry->gp2 = table->gp2;
+		__entry->rev_type = table->gp3;
+		__entry->major = table->ucode_ver;
+		__entry->minor = table->hw_ver;
 		__entry->hw_ver = hw_ver;
 		__entry->brd_ver = brd_ver;
 	),
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
index 50510fb6ab8c..6aa719865a58 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
@@ -30,6 +30,7 @@
 #ifndef __CHECKER__
 #include "iwl-trans.h"
 
+#include "dvm/commands.h"
 #define CREATE_TRACE_POINTS
 #include "iwl-devtrace.h"
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index d65e1db7c097..5442ead876eb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -549,12 +549,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
 
 	IWL_ERR(mvm, "Loaded firmware version: %s\n", mvm->fw->fw_version);
 
-	trace_iwlwifi_dev_ucode_error(trans->dev, table.error_id, table.tsf_low,
-				      table.data1, table.data2, table.data3,
-				      table.blink2, table.ilink1,
-				      table.ilink2, table.bcon_time, table.gp1,
-				      table.gp2, table.fw_rev_type, table.major,
-				      table.minor, table.hw_ver, table.brd_ver);
+	trace_iwlwifi_dev_ucode_error(trans->dev, &table, table.hw_ver, table.brd_ver);
 	IWL_ERR(mvm, "0x%08X | %-28s\n", table.error_id,
 		desc_lookup(table.error_id));
 	IWL_ERR(mvm, "0x%08X | trm_hw_status0\n", table.trm_hw_status0);

From cf14f27f82af78e713f8a57c477cf9233faf8b30 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:36 -0700
Subject: [PATCH 1388/1678] macro: introduce COUNT_ARGS() macro

move COUNT_ARGS() macro from apparmor to generic header and extend it
to count till twelve.

COUNT() was an alternative name for this logic, but it's used for
different purpose in many other places.

Similarly for CONCATENATE() macro.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/kernel.h           | 7 +++++++
 security/apparmor/include/path.h | 7 +------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fd291503576..293fa0677fba 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #define swap(a, b) \
 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
 
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  * @ptr:	the pointer to the member.
diff --git a/security/apparmor/include/path.h b/security/apparmor/include/path.h
index 05fb3305671e..e042b994f2b8 100644
--- a/security/apparmor/include/path.h
+++ b/security/apparmor/include/path.h
@@ -43,15 +43,10 @@ struct aa_buffers {
 
 DECLARE_PER_CPU(struct aa_buffers, aa_buffers);
 
-#define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n
-#define CONCAT(X, Y) X ## Y
-#define CONCAT_AFTER(X, Y) CONCAT(X, Y)
-
 #define ASSIGN(FN, X, N) ((X) = FN(N))
 #define EVAL1(FN, X) ASSIGN(FN, X, 0) /*X = FN(0)*/
 #define EVAL2(FN, X, Y...) do { ASSIGN(FN, X, 1);  EVAL1(FN, Y); } while (0)
-#define EVAL(FN, X...) CONCAT_AFTER(EVAL, COUNT_ARGS(X))(FN, X)
+#define EVAL(FN, X...) CONCATENATE(EVAL, COUNT_ARGS(X))(FN, X)
 
 #define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++)
 

From c4f6699dfcb8558d138fe838f741b2c10f416cf9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:37 -0700
Subject: [PATCH 1389/1678] bpf: introduce BPF_RAW_TRACEPOINT

Introduce BPF_PROG_TYPE_RAW_TRACEPOINT bpf program type to access
kernel internal arguments of the tracepoints in their raw form.

>From bpf program point of view the access to the arguments look like:
struct bpf_raw_tracepoint_args {
       __u64 args[0];
};

int bpf_prog(struct bpf_raw_tracepoint_args *ctx)
{
  // program can read args[N] where N depends on tracepoint
  // and statically verified at program load+attach time
}

kprobe+bpf infrastructure allows programs access function arguments.
This feature allows programs access raw tracepoint arguments.

Similar to proposed 'dynamic ftrace events' there are no abi guarantees
to what the tracepoints arguments are and what their meaning is.
The program needs to type cast args properly and use bpf_probe_read()
helper to access struct fields when argument is a pointer.

For every tracepoint __bpf_trace_##call function is prepared.
In assembler it looks like:
(gdb) disassemble __bpf_trace_xdp_exception
Dump of assembler code for function __bpf_trace_xdp_exception:
   0xffffffff81132080 <+0>:     mov    %ecx,%ecx
   0xffffffff81132082 <+2>:     jmpq   0xffffffff811231f0 <bpf_trace_run3>

where

TRACE_EVENT(xdp_exception,
        TP_PROTO(const struct net_device *dev,
                 const struct bpf_prog *xdp, u32 act),

The above assembler snippet is casting 32-bit 'act' field into 'u64'
to pass into bpf_trace_run3(), while 'dev' and 'xdp' args are passed as-is.
All of ~500 of __bpf_trace_*() functions are only 5-10 byte long
and in total this approach adds 7k bytes to .text.

This approach gives the lowest possible overhead
while calling trace_xdp_exception() from kernel C code and
transitioning into bpf land.
Since tracepoint+bpf are used at speeds of 1M+ events per second
this is valuable optimization.

The new BPF_RAW_TRACEPOINT_OPEN sys_bpf command is introduced
that returns anon_inode FD of 'bpf-raw-tracepoint' object.

The user space looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint with prog attached
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);

Ctrl-C of tracing daemon or cmdline tool that uses this feature
will automatically detach bpf program, unload it and
unregister tracepoint probe.

On the kernel side the __bpf_raw_tp_map section of pointers to
tracepoint definition and to __bpf_trace_*() probe function is used
to find a tracepoint with "xdp_exception" name and
corresponding __bpf_trace_xdp_exception() probe function
which are passed to tracepoint_probe_register() to connect probe
with tracepoint.

Addition of bpf_raw_tracepoint doesn't interfere with ftrace and perf
tracepoint mechanisms. perf_event_open() can be used in parallel
on the same tracepoint.
Multiple bpf_raw_tracepoint_open("xdp_exception", prog_fd) are permitted.
Each with its own bpf program. The kernel will execute
all tracepoint probes and all attached bpf programs.

In the future bpf_raw_tracepoints can be extended with
query/introspection logic.

__bpf_raw_tp_map section logic was contributed by Steven Rostedt

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/asm-generic/vmlinux.lds.h |  10 ++
 include/linux/bpf_types.h         |   1 +
 include/linux/trace_events.h      |  42 +++++++
 include/linux/tracepoint-defs.h   |   6 +
 include/trace/bpf_probe.h         |  92 +++++++++++++++
 include/trace/define_trace.h      |   1 +
 include/uapi/linux/bpf.h          |  11 ++
 kernel/bpf/syscall.c              |  78 +++++++++++++
 kernel/trace/bpf_trace.c          | 183 ++++++++++++++++++++++++++++++
 9 files changed, 424 insertions(+)
 create mode 100644 include/trace/bpf_probe.h

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1ab0e520d6fc..8add3493a202 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -178,6 +178,15 @@
 #define TRACE_SYSCALLS()
 #endif
 
+#ifdef CONFIG_BPF_EVENTS
+#define BPF_RAW_TP() STRUCT_ALIGN();					\
+			 VMLINUX_SYMBOL(__start__bpf_raw_tp) = .;	\
+			 KEEP(*(__bpf_raw_tp_map))			\
+			 VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .;
+#else
+#define BPF_RAW_TP()
+#endif
+
 #ifdef CONFIG_SERIAL_EARLYCON
 #define EARLYCON_TABLE() STRUCT_ALIGN();			\
 			 VMLINUX_SYMBOL(__earlycon_table) = .;	\
@@ -249,6 +258,7 @@
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
 	TRACE_PRINTKS()							\
+	BPF_RAW_TP()							\
 	TRACEPOINT_STR()
 
 /*
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5e2e8a49fb21..6d7243bfb0ff 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -19,6 +19,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
 BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
 BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8a1442c4e513..b0357cd198b0 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
 int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
 void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
+struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
 #else
 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 {
@@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info)
 {
 	return -EOPNOTSUPP;
 }
+static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
+{
+	return -EOPNOTSUPP;
+}
+static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
+{
+	return -EOPNOTSUPP;
+}
+static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
+{
+	return NULL;
+}
 #endif
 
 enum {
@@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
 void perf_trace_buf_update(void *record, u16 type);
 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 
+void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
+void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
+void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3);
+void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4);
+void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5);
+void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
+void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		    u64 arg8);
+void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		    u64 arg8, u64 arg9);
+void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10);
+void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
+void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
+		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
 void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 			       struct trace_event_call *call, u64 count,
 			       struct pt_regs *regs, struct hlist_head *head,
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 64ed7064f1fa..22c5a46e9693 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,4 +35,10 @@ struct tracepoint {
 	struct tracepoint_func __rcu *funcs;
 };
 
+struct bpf_raw_event_map {
+	struct tracepoint	*tp;
+	void			*bpf_func;
+	u32			num_args;
+} __aligned(32);
+
 #endif
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
new file mode 100644
index 000000000000..505dae0bed80
--- /dev/null
+++ b/include/trace/bpf_probe.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#undef TRACE_SYSTEM_VAR
+
+#ifdef CONFIG_BPF_EVENTS
+
+#undef __entry
+#define __entry entry
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
+
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field)	\
+		((__entry->__data_loc_##field >> 16) & 0xffff)
+
+#undef __get_str
+#define __get_str(field) ((char *)__get_dynamic_array(field))
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __perf_count
+#define __perf_count(c)	(c)
+
+#undef __perf_task
+#define __perf_task(t)	(t)
+
+/* cast any integer, pointer, or small struct to u64 */
+#define UINTTYPE(size) \
+	__typeof__(__builtin_choose_expr(size == 1,  (u8)1, \
+		   __builtin_choose_expr(size == 2, (u16)2, \
+		   __builtin_choose_expr(size == 4, (u32)3, \
+		   __builtin_choose_expr(size == 8, (u64)4, \
+					 (void)5)))))
+#define __CAST_TO_U64(x) ({ \
+	typeof(x) __src = (x); \
+	UINTTYPE(sizeof(x)) __dst; \
+	memcpy(&__dst, &__src, sizeof(__dst)); \
+	(u64)__dst; })
+
+#define __CAST1(a,...) __CAST_TO_U64(a)
+#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__)
+#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__)
+#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__)
+#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__)
+#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__)
+#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__)
+#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__)
+#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__)
+#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__)
+#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__)
+#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__)
+/* tracepoints with more than 12 arguments will hit build error */
+#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+static notrace void							\
+__bpf_trace_##call(void *__data, proto)					\
+{									\
+	struct bpf_prog *prog = __data;					\
+	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args));	\
+}
+
+/*
+ * This part is compiled out, it is only here as a build time check
+ * to make sure that if the tracepoint handling changes, the
+ * bpf probe will fail to compile unless it too is updated.
+ */
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args)			\
+static inline void bpf_test_probe_##call(void)				\
+{									\
+	check_trace_callback_type_##call(__bpf_trace_##template);	\
+}									\
+static struct bpf_raw_event_map	__used					\
+	__attribute__((section("__bpf_raw_tp_map")))			\
+__bpf_trace_tp_map_##call = {						\
+	.tp		= &__tracepoint_##call,				\
+	.bpf_func	= (void *)__bpf_trace_##template,		\
+	.num_args	= COUNT_ARGS(args),				\
+};
+
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
+	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#endif /* CONFIG_BPF_EVENTS */
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index d9e3d4aa3f6e..cb30c5532144 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -95,6 +95,7 @@
 #ifdef TRACEPOINTS_ENABLED
 #include <trace/trace_events.h>
 #include <trace/perf.h>
+#include <trace/bpf_probe.h>
 #endif
 
 #undef TRACE_EVENT
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..1878201c2d77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_cmd {
 	BPF_MAP_GET_FD_BY_ID,
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
+	BPF_RAW_TRACEPOINT_OPEN,
 };
 
 enum bpf_map_type {
@@ -134,6 +135,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_RAW_TRACEPOINT,
 };
 
 enum bpf_attach_type {
@@ -344,6 +346,11 @@ union bpf_attr {
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
 	} query;
+
+	struct {
+		__u64 name;
+		__u32 prog_fd;
+	} raw_tracepoint;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx {
 	__u32 minor;
 };
 
+struct bpf_raw_tracepoint_args {
+	__u64 args[0];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 77d45bd9f507..95ca2523fa6e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1315,6 +1315,81 @@ static int bpf_obj_get(const union bpf_attr *attr)
 				attr->file_flags);
 }
 
+struct bpf_raw_tracepoint {
+	struct bpf_raw_event_map *btp;
+	struct bpf_prog *prog;
+};
+
+static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_raw_tracepoint *raw_tp = filp->private_data;
+
+	if (raw_tp->prog) {
+		bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
+		bpf_prog_put(raw_tp->prog);
+	}
+	kfree(raw_tp);
+	return 0;
+}
+
+static const struct file_operations bpf_raw_tp_fops = {
+	.release	= bpf_raw_tracepoint_release,
+	.read		= bpf_dummy_read,
+	.write		= bpf_dummy_write,
+};
+
+#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
+
+static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
+{
+	struct bpf_raw_tracepoint *raw_tp;
+	struct bpf_raw_event_map *btp;
+	struct bpf_prog *prog;
+	char tp_name[128];
+	int tp_fd, err;
+
+	if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name),
+			      sizeof(tp_name) - 1) < 0)
+		return -EFAULT;
+	tp_name[sizeof(tp_name) - 1] = 0;
+
+	btp = bpf_find_raw_tracepoint(tp_name);
+	if (!btp)
+		return -ENOENT;
+
+	raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
+	if (!raw_tp)
+		return -ENOMEM;
+	raw_tp->btp = btp;
+
+	prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
+				 BPF_PROG_TYPE_RAW_TRACEPOINT);
+	if (IS_ERR(prog)) {
+		err = PTR_ERR(prog);
+		goto out_free_tp;
+	}
+
+	err = bpf_probe_register(raw_tp->btp, prog);
+	if (err)
+		goto out_put_prog;
+
+	raw_tp->prog = prog;
+	tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp,
+				 O_CLOEXEC);
+	if (tp_fd < 0) {
+		bpf_probe_unregister(raw_tp->btp, prog);
+		err = tp_fd;
+		goto out_put_prog;
+	}
+	return tp_fd;
+
+out_put_prog:
+	bpf_prog_put(prog);
+out_free_tp:
+	kfree(raw_tp);
+	return err;
+}
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
@@ -1925,6 +2000,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_OBJ_GET_INFO_BY_FD:
 		err = bpf_obj_get_info_by_fd(&attr, uattr);
 		break;
+	case BPF_RAW_TRACEPOINT_OPEN:
+		err = bpf_raw_tracepoint_open(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 7f9691c86b6e..463e72d18c4c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -735,6 +735,86 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
 	}
 }
 
+/*
+ * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
+ * to avoid potential recursive reuse issue when/if tracepoints are added
+ * inside bpf_*_event_output and/or bpf_get_stack_id
+ */
+static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
+	   struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+	struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+	perf_fetch_caller_regs(regs);
+	return ____bpf_perf_event_output(regs, map, flags, data, size);
+}
+
+static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
+	.func		= bpf_perf_event_output_raw_tp,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
+BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
+	   struct bpf_map *, map, u64, flags)
+{
+	struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+	perf_fetch_caller_regs(regs);
+	/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
+	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+			       flags, 0, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
+	.func		= bpf_get_stackid_raw_tp,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_perf_event_output:
+		return &bpf_perf_event_output_proto_raw_tp;
+	case BPF_FUNC_get_stackid:
+		return &bpf_get_stackid_proto_raw_tp;
+	default:
+		return tracing_func_proto(func_id);
+	}
+}
+
+static bool raw_tp_prog_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					struct bpf_insn_access_aux *info)
+{
+	/* largest tracepoint in the kernel has 12 args */
+	if (off < 0 || off >= sizeof(__u64) * 12)
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+	return true;
+}
+
+const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
+	.get_func_proto  = raw_tp_prog_func_proto,
+	.is_valid_access = raw_tp_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_prog_ops = {
+};
+
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
 				    struct bpf_insn_access_aux *info)
 {
@@ -908,3 +988,106 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
 
 	return ret;
 }
+
+extern struct bpf_raw_event_map __start__bpf_raw_tp[];
+extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
+
+struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
+{
+	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
+
+	for (; btp < __stop__bpf_raw_tp; btp++) {
+		if (!strcmp(btp->tp->name, name))
+			return btp;
+	}
+	return NULL;
+}
+
+static __always_inline
+void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
+{
+	rcu_read_lock();
+	preempt_disable();
+	(void) BPF_PROG_RUN(prog, args);
+	preempt_enable();
+	rcu_read_unlock();
+}
+
+#define UNPACK(...)			__VA_ARGS__
+#define REPEAT_1(FN, DL, X, ...)	FN(X)
+#define REPEAT_2(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
+#define REPEAT_3(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
+#define REPEAT_4(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
+#define REPEAT_5(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
+#define REPEAT_6(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
+#define REPEAT_7(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
+#define REPEAT_8(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
+#define REPEAT_9(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
+#define REPEAT_10(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
+#define REPEAT_11(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
+#define REPEAT_12(FN, DL, X, ...)	FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
+#define REPEAT(X, FN, DL, ...)		REPEAT_##X(FN, DL, __VA_ARGS__)
+
+#define SARG(X)		u64 arg##X
+#define COPY(X)		args[X] = arg##X
+
+#define __DL_COM	(,)
+#define __DL_SEM	(;)
+
+#define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+
+#define BPF_TRACE_DEFN_x(x)						\
+	void bpf_trace_run##x(struct bpf_prog *prog,			\
+			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
+	{								\
+		u64 args[x];						\
+		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
+		__bpf_trace_run(prog, args);				\
+	}								\
+	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
+BPF_TRACE_DEFN_x(1);
+BPF_TRACE_DEFN_x(2);
+BPF_TRACE_DEFN_x(3);
+BPF_TRACE_DEFN_x(4);
+BPF_TRACE_DEFN_x(5);
+BPF_TRACE_DEFN_x(6);
+BPF_TRACE_DEFN_x(7);
+BPF_TRACE_DEFN_x(8);
+BPF_TRACE_DEFN_x(9);
+BPF_TRACE_DEFN_x(10);
+BPF_TRACE_DEFN_x(11);
+BPF_TRACE_DEFN_x(12);
+
+static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+{
+	struct tracepoint *tp = btp->tp;
+
+	/*
+	 * check that program doesn't access arguments beyond what's
+	 * available in this tracepoint
+	 */
+	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
+		return -EINVAL;
+
+	return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
+}
+
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+{
+	int err;
+
+	mutex_lock(&bpf_event_mutex);
+	err = __bpf_probe_register(btp, prog);
+	mutex_unlock(&bpf_event_mutex);
+	return err;
+}
+
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+{
+	int err;
+
+	mutex_lock(&bpf_event_mutex);
+	err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
+	mutex_unlock(&bpf_event_mutex);
+	return err;
+}

From a0fe3e574b50461c4811ce81811f0eaefda62871 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:38 -0700
Subject: [PATCH 1390/1678] libbpf: add bpf_raw_tracepoint_open helper

add bpf_raw_tracepoint_open(const char *name, int prog_fd) api to libbpf

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 tools/lib/bpf/bpf.c            | 11 +++++++++++
 tools/lib/bpf/bpf.h            |  1 +
 3 files changed, 23 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d245c41213ac..58060bec999d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_cmd {
 	BPF_MAP_GET_FD_BY_ID,
 	BPF_OBJ_GET_INFO_BY_FD,
 	BPF_PROG_QUERY,
+	BPF_RAW_TRACEPOINT_OPEN,
 };
 
 enum bpf_map_type {
@@ -134,6 +135,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_RAW_TRACEPOINT,
 };
 
 enum bpf_attach_type {
@@ -344,6 +346,11 @@ union bpf_attr {
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
 	} query;
+
+	struct {
+		__u64 name;
+		__u32 prog_fd;
+	} raw_tracepoint;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -1151,4 +1158,8 @@ struct bpf_cgroup_dev_ctx {
 	__u32 minor;
 };
 
+struct bpf_raw_tracepoint_args {
+	__u64 args[0];
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 592a58a2b681..e0500055f1a6 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -428,6 +428,17 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 	return err;
 }
 
+int bpf_raw_tracepoint_open(const char *name, int prog_fd)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.raw_tracepoint.name = ptr_to_u64(name);
+	attr.raw_tracepoint.prog_fd = prog_fd;
+
+	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+}
+
 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 {
 	struct sockaddr_nl sa;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 8d18fb73d7fb..ee59342c6f42 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -79,4 +79,5 @@ int bpf_map_get_fd_by_id(__u32 id);
 int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
+int bpf_raw_tracepoint_open(const char *name, int prog_fd);
 #endif

From 4662a4e53890badf4da17e441606a2885f29d56d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:39 -0700
Subject: [PATCH 1391/1678] samples/bpf: raw tracepoint test

add empty raw_tracepoint bpf program to test overhead similar
to kprobe and traditional tracepoint tests

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/Makefile                    |  1 +
 samples/bpf/bpf_load.c                  | 14 ++++++++++++++
 samples/bpf/test_overhead_raw_tp_kern.c | 17 +++++++++++++++++
 samples/bpf/test_overhead_user.c        | 12 ++++++++++++
 4 files changed, 44 insertions(+)
 create mode 100644 samples/bpf/test_overhead_raw_tp_kern.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 2c2a587e0942..4d6a6edd4bf6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -119,6 +119,7 @@ always += offwaketime_kern.o
 always += spintest_kern.o
 always += map_perf_test_kern.o
 always += test_overhead_tp_kern.o
+always += test_overhead_raw_tp_kern.o
 always += test_overhead_kprobe_kern.o
 always += parse_varlen.o parse_simple.o parse_ldabs.o
 always += test_cgrp2_tc_kern.o
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index b1a310c3ae89..bebe4188b4b3 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -61,6 +61,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
 	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
 	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
+	bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0;
 	bool is_xdp = strncmp(event, "xdp", 3) == 0;
 	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
 	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
@@ -85,6 +86,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_KPROBE;
 	} else if (is_tracepoint) {
 		prog_type = BPF_PROG_TYPE_TRACEPOINT;
+	} else if (is_raw_tracepoint) {
+		prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT;
 	} else if (is_xdp) {
 		prog_type = BPF_PROG_TYPE_XDP;
 	} else if (is_perf_event) {
@@ -131,6 +134,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		return populate_prog_array(event, fd);
 	}
 
+	if (is_raw_tracepoint) {
+		efd = bpf_raw_tracepoint_open(event + 15, fd);
+		if (efd < 0) {
+			printf("tracepoint %s %s\n", event + 15, strerror(errno));
+			return -1;
+		}
+		event_fd[prog_cnt - 1] = efd;
+		return 0;
+	}
+
 	if (is_kprobe || is_kretprobe) {
 		if (is_kprobe)
 			event += 7;
@@ -587,6 +600,7 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		if (memcmp(shname, "kprobe/", 7) == 0 ||
 		    memcmp(shname, "kretprobe/", 10) == 0 ||
 		    memcmp(shname, "tracepoint/", 11) == 0 ||
+		    memcmp(shname, "raw_tracepoint/", 15) == 0 ||
 		    memcmp(shname, "xdp", 3) == 0 ||
 		    memcmp(shname, "perf_event", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0 ||
diff --git a/samples/bpf/test_overhead_raw_tp_kern.c b/samples/bpf/test_overhead_raw_tp_kern.c
new file mode 100644
index 000000000000..d2af8bc1c805
--- /dev/null
+++ b/samples/bpf/test_overhead_raw_tp_kern.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("raw_tracepoint/task_rename")
+int prog(struct bpf_raw_tracepoint_args *ctx)
+{
+	return 0;
+}
+
+SEC("raw_tracepoint/urandom_read")
+int prog2(struct bpf_raw_tracepoint_args *ctx)
+{
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c
index d291167fd3c7..e1d35e07a10e 100644
--- a/samples/bpf/test_overhead_user.c
+++ b/samples/bpf/test_overhead_user.c
@@ -158,5 +158,17 @@ int main(int argc, char **argv)
 		unload_progs();
 	}
 
+	if (test_flags & 0xC0) {
+		snprintf(filename, sizeof(filename),
+			 "%s_raw_tp_kern.o", argv[0]);
+		if (load_bpf_file(filename)) {
+			printf("%s", bpf_log_buf);
+			return 1;
+		}
+		printf("w/RAW_TRACEPOINT\n");
+		run_perf_test(num_cpu, test_flags >> 6);
+		unload_progs();
+	}
+
 	return 0;
 }

From 3bbe0869884ceebffd59d5519c1d560207c6e116 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 28 Mar 2018 12:05:40 -0700
Subject: [PATCH 1392/1678] selftests/bpf: test for bpf_get_stackid() from raw
 tracepoints

similar to traditional traceopint test add bpf_get_stackid() test
from raw tracepoints
and reduce verbosity of existing stackmap test

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_progs.c | 91 ++++++++++++++++++------
 1 file changed, 70 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index e9df48b306df..faadbe233966 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -877,7 +877,7 @@ static void test_stacktrace_map()
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
-		goto out;
+		return;
 
 	/* Get the ID for the sched/sched_switch tracepoint */
 	snprintf(buf, sizeof(buf),
@@ -888,8 +888,7 @@ static void test_stacktrace_map()
 
 	bytes = read(efd, buf, sizeof(buf));
 	close(efd);
-	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
-		  "read", "bytes %d errno %d\n", bytes, errno))
+	if (bytes <= 0 || bytes >= sizeof(buf))
 		goto close_prog;
 
 	/* Open the perf event and attach bpf progrram */
@@ -906,29 +905,24 @@ static void test_stacktrace_map()
 		goto close_prog;
 
 	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
-		  err, errno))
-		goto close_pmu;
+	if (err)
+		goto disable_pmu;
 
 	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
-		  err, errno))
+	if (err)
 		goto disable_pmu;
 
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
-	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
-		  "err %d errno %d\n", err, errno))
+	if (control_map_fd < 0)
 		goto disable_pmu;
 
 	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
-	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
-		  "err %d errno %d\n", err, errno))
+	if (stackid_hmap_fd < 0)
 		goto disable_pmu;
 
 	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
-	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
-		  err, errno))
+	if (stackmap_fd < 0)
 		goto disable_pmu;
 
 	/* give some time for bpf program run */
@@ -945,24 +939,78 @@ static void test_stacktrace_map()
 	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
 	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
 		  "err %d errno %d\n", err, errno))
-		goto disable_pmu;
+		goto disable_pmu_noerr;
 
 	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
 	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
 		  "err %d errno %d\n", err, errno))
-		; /* fall through */
+		goto disable_pmu_noerr;
 
+	goto disable_pmu_noerr;
 disable_pmu:
+	error_cnt++;
+disable_pmu_noerr:
 	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-
-close_pmu:
 	close(pmu_fd);
-
 close_prog:
 	bpf_object__close(obj);
+}
 
-out:
-	return;
+static void test_stacktrace_map_raw_tp()
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	const char *file = "./test_stacktrace_map.o";
+	int efd, err, prog_fd;
+	__u32 key, val, duration = 0;
+	struct bpf_object *obj;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (control_map_fd < 0)
+		goto close_prog;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (stackid_hmap_fd < 0)
+		goto close_prog;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (stackmap_fd < 0)
+		goto close_prog;
+
+	/* give some time for bpf program run */
+	sleep(1);
+
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
 }
 
 static int extract_build_id(char *build_id, size_t size)
@@ -1138,6 +1186,7 @@ int main(void)
 	test_tp_attach_query();
 	test_stacktrace_map();
 	test_stacktrace_build_id();
+	test_stacktrace_map_raw_tp();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;

From be75923786aa28774bf2b5ef8184590a52429103 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Wed, 28 Mar 2018 17:48:25 -0700
Subject: [PATCH 1393/1678] nfp: bpf: read from packet data cache for
 PTR_TO_PACKET

This patch assumes there is a packet data cache, and would try to read
packet data from the cache instead of from memory.

This patch only implements the optimisation "backend", it doesn't build
the packet data cache, so this optimisation is not enabled.

This patch has only enabled aligned packet data read, i.e. when the read
offset to the start of cache is REG_WIDTH aligned.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 80 ++++++++++++++++++-
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  9 +++
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  1 +
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 56451edf01c2..0a2c1d87fed2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1838,6 +1838,74 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 					 tmp_reg, meta->insn.dst_reg * 2, size);
 }
 
+static void
+mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
+			   struct nfp_insn_meta *meta)
+{
+	s16 range_start = meta->pkt_cache.range_start;
+	s16 range_end = meta->pkt_cache.range_end;
+	swreg src_base, off;
+	u8 xfer_num, len;
+	bool indir;
+
+	off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
+	src_base = reg_a(meta->insn.src_reg * 2);
+	len = range_end - range_start;
+	xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
+
+	indir = len > 8 * REG_WIDTH;
+	/* Setup PREV_ALU for indirect mode. */
+	if (indir)
+		wrp_immed(nfp_prog, reg_none(),
+			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+
+	/* Cache memory into transfer-in registers. */
+	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
+		     off, xfer_num - 1, true, indir);
+}
+
+static int
+mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
+				   struct nfp_insn_meta *meta,
+				   unsigned int size)
+{
+	swreg dst_lo, dst_hi, src_lo;
+	u8 dst_gpr, idx;
+
+	idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
+	dst_gpr = meta->insn.dst_reg * 2;
+	dst_hi = reg_both(dst_gpr + 1);
+	dst_lo = reg_both(dst_gpr);
+	src_lo = reg_xfer(idx);
+
+	if (size < REG_WIDTH) {
+		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
+		wrp_immed(nfp_prog, dst_hi, 0);
+	} else if (size == REG_WIDTH) {
+		wrp_mov(nfp_prog, dst_lo, src_lo);
+		wrp_immed(nfp_prog, dst_hi, 0);
+	} else {
+		swreg src_hi = reg_xfer(idx + 1);
+
+		wrp_mov(nfp_prog, dst_lo, src_lo);
+		wrp_mov(nfp_prog, dst_hi, src_hi);
+	}
+
+	return 0;
+}
+
+static int
+mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
+			   struct nfp_insn_meta *meta, unsigned int size)
+{
+	u8 off = meta->insn.off - meta->pkt_cache.range_start;
+
+	if (WARN_ON_ONCE(!IS_ALIGNED(off, REG_WIDTH)))
+		return -EOPNOTSUPP;
+
+	return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
+}
+
 static int
 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	unsigned int size)
@@ -1852,8 +1920,16 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 			return mem_ldx_skb(nfp_prog, meta, size);
 	}
 
-	if (meta->ptr.type == PTR_TO_PACKET)
-		return mem_ldx_data(nfp_prog, meta, size);
+	if (meta->ptr.type == PTR_TO_PACKET) {
+		if (meta->pkt_cache.range_end) {
+			if (meta->pkt_cache.do_init)
+				mem_ldx_data_init_pktcache(nfp_prog, meta);
+
+			return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
+		} else {
+			return mem_ldx_data(nfp_prog, meta, size);
+		}
+	}
 
 	if (meta->ptr.type == PTR_TO_STACK)
 		return mem_ldx_stack(nfp_prog, meta, size,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 054df3dc0698..861211e27ea6 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -199,6 +199,10 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
  * @ldst_gather_len: memcpy length gathered from load/store sequence
  * @paired_st: the paired store insn at the head of the sequence
  * @ptr_not_const: pointer is not always constant
+ * @pkt_cache: packet data cache information
+ * @pkt_cache.range_start: start offset for associated packet data cache
+ * @pkt_cache.range_end: end offset for associated packet data cache
+ * @pkt_cache.do_init: this read needs to initialize packet data cache
  * @jmp_dst: destination info for jump instructions
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
@@ -219,6 +223,11 @@ struct nfp_insn_meta {
 			struct bpf_insn *paired_st;
 			s16 ldst_gather_len;
 			bool ptr_not_const;
+			struct {
+				s16 range_start;
+				s16 range_end;
+				bool do_init;
+			} pkt_cache;
 		};
 		struct nfp_insn_meta *jmp_dst;
 		struct {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 5f9291db98e0..150d28f9cd52 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -39,6 +39,7 @@
 #include <linux/types.h>
 
 #define REG_NONE	0
+#define REG_WIDTH	4
 
 #define RE_REG_NO_DST	0x020
 #define RE_REG_IMM	0x020

From 91ff69e840f91016f464810e8940b99723abb5e8 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Wed, 28 Mar 2018 17:48:26 -0700
Subject: [PATCH 1394/1678] nfp: bpf: support unaligned read offset

This patch add the support for unaligned read offset, i.e. the read offset
to the start of packet cache area is not aligned to REG_WIDTH. In this
case, the read area might across maximum three transfer-in registers.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 73 +++++++++++++++++++-
 1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 0a2c1d87fed2..38ed2f72a4b0 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -553,6 +553,19 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
 	emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
 }
 
+/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
+ * result to @dst from offset, there is no change on the other bits of @dst.
+ */
+static void
+wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
+		   u8 field_len, u8 offset)
+{
+	enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
+	u8 mask = ((1 << field_len) - 1) << offset;
+
+	emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
+}
+
 static void
 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
 	      swreg *rega, swreg *regb)
@@ -1864,6 +1877,60 @@ mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
 		     off, xfer_num - 1, true, indir);
 }
 
+static int
+mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
+				     struct nfp_insn_meta *meta,
+				     unsigned int size)
+{
+	s16 range_start = meta->pkt_cache.range_start;
+	s16 insn_off = meta->insn.off - range_start;
+	swreg dst_lo, dst_hi, src_lo, src_mid;
+	u8 dst_gpr = meta->insn.dst_reg * 2;
+	u8 len_lo = size, len_mid = 0;
+	u8 idx = insn_off / REG_WIDTH;
+	u8 off = insn_off % REG_WIDTH;
+
+	dst_hi = reg_both(dst_gpr + 1);
+	dst_lo = reg_both(dst_gpr);
+	src_lo = reg_xfer(idx);
+
+	/* The read length could involve as many as three registers. */
+	if (size > REG_WIDTH - off) {
+		/* Calculate the part in the second register. */
+		len_lo = REG_WIDTH - off;
+		len_mid = size - len_lo;
+
+		/* Calculate the part in the third register. */
+		if (size > 2 * REG_WIDTH - off)
+			len_mid = REG_WIDTH;
+	}
+
+	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
+
+	if (!len_mid) {
+		wrp_immed(nfp_prog, dst_hi, 0);
+		return 0;
+	}
+
+	src_mid = reg_xfer(idx + 1);
+
+	if (size <= REG_WIDTH) {
+		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
+		wrp_immed(nfp_prog, dst_hi, 0);
+	} else {
+		swreg src_hi = reg_xfer(idx + 2);
+
+		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
+				   REG_WIDTH - len_lo, len_lo);
+		wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
+				REG_WIDTH - len_lo);
+		wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
+				   len_lo);
+	}
+
+	return 0;
+}
+
 static int
 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
 				   struct nfp_insn_meta *meta,
@@ -1900,10 +1967,10 @@ mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
 {
 	u8 off = meta->insn.off - meta->pkt_cache.range_start;
 
-	if (WARN_ON_ONCE(!IS_ALIGNED(off, REG_WIDTH)))
-		return -EOPNOTSUPP;
+	if (IS_ALIGNED(off, REG_WIDTH))
+		return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
 
-	return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
+	return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
 }
 
 static int

From 87b10ecdced224dc0def123e1f57bc6c5ac4ac5c Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Wed, 28 Mar 2018 17:48:27 -0700
Subject: [PATCH 1395/1678] nfp: bpf: detect packet reads could be cached,
 enable the optimisation

This patch is the front end of this optimisation, it detects and marks
those packet reads that could be cached. Then the optimisation "backend"
will be activated automatically.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 115 ++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  30 +++++
 2 files changed, 145 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 38ed2f72a4b0..0701590e8302 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2964,6 +2964,120 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
 	}
 }
 
+static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *range_node = NULL;
+	s16 range_start = 0, range_end = 0;
+	bool cache_avail = false;
+	struct bpf_insn *insn;
+	s32 range_ptr_off = 0;
+	u32 range_ptr_id = 0;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (meta->flags & FLAG_INSN_IS_JUMP_DST)
+			cache_avail = false;
+
+		if (meta->skip)
+			continue;
+
+		insn = &meta->insn;
+
+		if (is_mbpf_store_pkt(meta) ||
+		    insn->code == (BPF_JMP | BPF_CALL) ||
+		    is_mbpf_classic_store_pkt(meta) ||
+		    is_mbpf_classic_load(meta)) {
+			cache_avail = false;
+			continue;
+		}
+
+		if (!is_mbpf_load(meta))
+			continue;
+
+		if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
+			cache_avail = false;
+			continue;
+		}
+
+		if (!cache_avail) {
+			cache_avail = true;
+			if (range_node)
+				goto end_current_then_start_new;
+			goto start_new;
+		}
+
+		/* Check ID to make sure two reads share the same
+		 * variable offset against PTR_TO_PACKET, and check OFF
+		 * to make sure they also share the same constant
+		 * offset.
+		 *
+		 * OFFs don't really need to be the same, because they
+		 * are the constant offsets against PTR_TO_PACKET, so
+		 * for different OFFs, we could canonicalize them to
+		 * offsets against original packet pointer. We don't
+		 * support this.
+		 */
+		if (meta->ptr.id == range_ptr_id &&
+		    meta->ptr.off == range_ptr_off) {
+			s16 new_start = range_start;
+			s16 end, off = insn->off;
+			s16 new_end = range_end;
+			bool changed = false;
+
+			if (off < range_start) {
+				new_start = off;
+				changed = true;
+			}
+
+			end = off + BPF_LDST_BYTES(insn);
+			if (end > range_end) {
+				new_end = end;
+				changed = true;
+			}
+
+			if (!changed)
+				continue;
+
+			if (new_end - new_start <= 64) {
+				/* Install new range. */
+				range_start = new_start;
+				range_end = new_end;
+				continue;
+			}
+		}
+
+end_current_then_start_new:
+		range_node->pkt_cache.range_start = range_start;
+		range_node->pkt_cache.range_end = range_end;
+start_new:
+		range_node = meta;
+		range_node->pkt_cache.do_init = true;
+		range_ptr_id = range_node->ptr.id;
+		range_ptr_off = range_node->ptr.off;
+		range_start = insn->off;
+		range_end = insn->off + BPF_LDST_BYTES(insn);
+	}
+
+	if (range_node) {
+		range_node->pkt_cache.range_start = range_start;
+		range_node->pkt_cache.range_end = range_end;
+	}
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (meta->skip)
+			continue;
+
+		if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
+			if (meta->pkt_cache.do_init) {
+				range_start = meta->pkt_cache.range_start;
+				range_end = meta->pkt_cache.range_end;
+			} else {
+				meta->pkt_cache.range_start = range_start;
+				meta->pkt_cache.range_end = range_end;
+			}
+		}
+	}
+}
+
 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
 {
 	nfp_bpf_opt_reg_init(nfp_prog);
@@ -2971,6 +3085,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
 	nfp_bpf_opt_ld_mask(nfp_prog);
 	nfp_bpf_opt_ld_shift(nfp_prog);
 	nfp_bpf_opt_ldst_gather(nfp_prog);
+	nfp_bpf_opt_pkt_cache(nfp_prog);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 861211e27ea6..c220ac5a734a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -278,6 +278,36 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
 }
 
+static inline bool is_mbpf_load_pkt(const struct nfp_insn_meta *meta)
+{
+	return is_mbpf_load(meta) && meta->ptr.type == PTR_TO_PACKET;
+}
+
+static inline bool is_mbpf_store_pkt(const struct nfp_insn_meta *meta)
+{
+	return is_mbpf_store(meta) && meta->ptr.type == PTR_TO_PACKET;
+}
+
+static inline bool is_mbpf_classic_load(const struct nfp_insn_meta *meta)
+{
+	u8 code = meta->insn.code;
+
+	return BPF_CLASS(code) == BPF_LD &&
+	       (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND);
+}
+
+static inline bool is_mbpf_classic_store(const struct nfp_insn_meta *meta)
+{
+	u8 code = meta->insn.code;
+
+	return BPF_CLASS(code) == BPF_ST && BPF_MODE(code) == BPF_MEM;
+}
+
+static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta)
+{
+	return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET;
+}
+
 /**
  * struct nfp_prog - nfp BPF program
  * @bpf: backpointer to the bpf app priv structure

From fc4484970e7e4bf40ba958f7bc48474ddadfdbfc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:28 -0700
Subject: [PATCH 1396/1678] nfp: bpf: rename map_lookup_stack() to
 map_call_stack_common()

We will reuse most of map call code gen for other map calls.
Rename the lookup gen function and use meta->func_id instead
of hard-coding lookup.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 0701590e8302..b37c9f86a18f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1352,7 +1352,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 }
 
 static int
-map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	struct bpf_offloaded_map *offmap;
 	struct nfp_bpf_map *nfp_map;
@@ -1378,7 +1378,7 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	 */
 	tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
 
-	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
+	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
 		     2, RELO_BR_HELPER);
 	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
 
@@ -2326,7 +2326,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	case BPF_FUNC_xdp_adjust_head:
 		return adjust_head(nfp_prog, meta);
 	case BPF_FUNC_map_lookup_elem:
-		return map_lookup_stack(nfp_prog, meta);
+		return map_call_stack_common(nfp_prog, meta);
 	default:
 		WARN_ONCE(1, "verifier allowed unsupported function\n");
 		return -EOPNOTSUPP;

From 2f46e0c1273512cbdb944f5a50cc93cf0888c3d7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:29 -0700
Subject: [PATCH 1397/1678] nfp: bpf: add helper for validating stack pointers

Our implementation has restriction on stack pointers for function
calls.  Move the common checks into a helper for reuse.  The state
has to be encapsulated into a structure to support parameters
other than BPF_REG_2.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  |  4 +-
 drivers/net/ethernet/netronome/nfp/bpf/main.h | 14 ++++-
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 59 ++++++++++++-------
 3 files changed, 50 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index b37c9f86a18f..3799ca9b9826 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1366,8 +1366,8 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 	/* We only have to reload LM0 if the key is not at start of stack */
 	lm_off = nfp_prog->stack_depth;
-	lm_off += meta->arg2.var_off.value + meta->arg2.off;
-	load_lm_ptr = meta->arg2_var_off || lm_off;
+	lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
+	load_lm_ptr = meta->arg2.var_off || lm_off;
 
 	/* Set LM0 to start of key */
 	if (load_lm_ptr)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index c220ac5a734a..0f920bf48b01 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -190,6 +190,16 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
 #define nfp_meta_next(meta)	list_next_entry(meta, l)
 #define nfp_meta_prev(meta)	list_prev_entry(meta, l)
 
+/**
+ * struct nfp_bpf_reg_state - register state for calls
+ * @reg: BPF register state from latest path
+ * @var_off: for stack arg - changes stack offset on different paths
+ */
+struct nfp_bpf_reg_state {
+	struct bpf_reg_state reg;
+	bool var_off;
+};
+
 #define FLAG_INSN_IS_JUMP_DST	BIT(0)
 
 /**
@@ -207,7 +217,6 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
  * @arg2: arg2 for call instructions
- * @arg2_var_off: arg2 changes stack offset on different paths
  * @off: index of first generated machine instruction (in nfp_prog.prog)
  * @n: eBPF instruction number
  * @flags: eBPF instruction extra optimization flags
@@ -233,8 +242,7 @@ struct nfp_insn_meta {
 		struct {
 			u32 func_id;
 			struct bpf_reg_state arg1;
-			struct bpf_reg_state arg2;
-			bool arg2_var_off;
+			struct nfp_bpf_reg_state arg2;
 		};
 	};
 	unsigned int off;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 479f602887e9..45aa11fcdcad 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -97,7 +97,7 @@ nfp_record_adjust_head(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
 		if (nfp_prog->adjust_head_location != meta->n)
 			goto exit_set_location;
 
-		if (meta->arg2.var_off.value != imm)
+		if (meta->arg2.reg.var_off.value != imm)
 			goto exit_set_location;
 	}
 
@@ -106,6 +106,39 @@ exit_set_location:
 	nfp_prog->adjust_head_location = location;
 }
 
+static int
+nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
+		     const struct bpf_reg_state *reg,
+		     struct nfp_bpf_reg_state *old_arg)
+{
+	s64 off, old_off;
+
+	if (reg->type != PTR_TO_STACK) {
+		pr_vlog(env, "%s: unsupported ptr type %d\n",
+			fname, reg->type);
+		return false;
+	}
+	if (!tnum_is_const(reg->var_off)) {
+		pr_vlog(env, "%s: variable pointer\n", fname);
+		return false;
+	}
+
+	off = reg->var_off.value + reg->off;
+	if (-off % 4) {
+		pr_vlog(env, "%s: unaligned stack pointer %lld\n", fname, -off);
+		return false;
+	}
+
+	/* Rest of the checks is only if we re-parse the same insn */
+	if (!old_arg)
+		return true;
+
+	old_off = old_arg->reg.var_off.value + old_arg->reg.off;
+	old_arg->var_off |= off != old_off;
+
+	return true;
+}
+
 static int
 nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 		   struct nfp_insn_meta *meta)
@@ -114,7 +147,6 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 	const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
 	struct nfp_app_bpf *bpf = nfp_prog->bpf;
 	u32 func_id = meta->insn.imm;
-	s64 off, old_off;
 
 	switch (func_id) {
 	case BPF_FUNC_xdp_adjust_head:
@@ -135,32 +167,15 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 			pr_vlog(env, "map_lookup: not supported by FW\n");
 			return -EOPNOTSUPP;
 		}
-		if (reg2->type != PTR_TO_STACK) {
-			pr_vlog(env,
-				"map_lookup: unsupported key ptr type %d\n",
-				reg2->type);
-			return -EOPNOTSUPP;
-		}
-		if (!tnum_is_const(reg2->var_off)) {
-			pr_vlog(env, "map_lookup: variable key pointer\n");
-			return -EOPNOTSUPP;
-		}
 
-		off = reg2->var_off.value + reg2->off;
-		if (-off % 4) {
-			pr_vlog(env,
-				"map_lookup: unaligned stack pointer %lld\n",
-				-off);
+		if (!nfp_bpf_stack_arg_ok("map_lookup", env, reg2,
+					  meta->func_id ? &meta->arg2 : NULL))
 			return -EOPNOTSUPP;
-		}
 
 		/* Rest of the checks is only if we re-parse the same insn */
 		if (!meta->func_id)
 			break;
 
-		old_off = meta->arg2.var_off.value + meta->arg2.off;
-		meta->arg2_var_off |= off != old_off;
-
 		if (meta->arg1.map_ptr != reg1->map_ptr) {
 			pr_vlog(env, "map_lookup: called for different map\n");
 			return -EOPNOTSUPP;
@@ -173,7 +188,7 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 
 	meta->func_id = func_id;
 	meta->arg1 = *reg1;
-	meta->arg2 = *reg2;
+	meta->arg2.reg = *reg2;
 
 	return 0;
 }

From 289c5b763018be846900da706dc582e572a13864 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:30 -0700
Subject: [PATCH 1398/1678] nfp: bpf: add helper for basic map call checks

Add a verifier helper for performing the basic state checks
before a call to a map helper.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 40 ++++++++++++-------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 45aa11fcdcad..acfc4d798116 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -139,6 +139,28 @@ nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env,
 	return true;
 }
 
+static bool
+nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
+		    struct nfp_insn_meta *meta,
+		    u32 helper_tgt, const struct bpf_reg_state *reg1)
+{
+	if (!helper_tgt) {
+		pr_vlog(env, "%s: not supported by FW\n", fname);
+		return false;
+	}
+
+	/* Rest of the checks is only if we re-parse the same insn */
+	if (!meta->func_id)
+		return true;
+
+	if (meta->arg1.map_ptr != reg1->map_ptr) {
+		pr_vlog(env, "%s: called for different map\n", fname);
+		return false;
+	}
+
+	return true;
+}
+
 static int
 nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 		   struct nfp_insn_meta *meta)
@@ -163,23 +185,11 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 		break;
 
 	case BPF_FUNC_map_lookup_elem:
-		if (!bpf->helpers.map_lookup) {
-			pr_vlog(env, "map_lookup: not supported by FW\n");
-			return -EOPNOTSUPP;
-		}
-
-		if (!nfp_bpf_stack_arg_ok("map_lookup", env, reg2,
+		if (!nfp_bpf_map_call_ok("map_lookup", env, meta,
+					 bpf->helpers.map_lookup, reg1) ||
+		    !nfp_bpf_stack_arg_ok("map_lookup", env, reg2,
 					  meta->func_id ? &meta->arg2 : NULL))
 			return -EOPNOTSUPP;
-
-		/* Rest of the checks is only if we re-parse the same insn */
-		if (!meta->func_id)
-			break;
-
-		if (meta->arg1.map_ptr != reg1->map_ptr) {
-			pr_vlog(env, "map_lookup: called for different map\n");
-			return -EOPNOTSUPP;
-		}
 		break;
 	default:
 		pr_vlog(env, "unsupported function id: %d\n", func_id);

From 44d65a47aeabc40619ad6d1900e0f54e5b5145b8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:31 -0700
Subject: [PATCH 1399/1678] nfp: bpf: add map updates from the datapath

Support calling map_update_elem() from the datapath programs
by calling into FW-provided helper.  Value pointer is passed
in LM pointer #2.  Keeping track of old state for arg3 is not
necessary, since LM pointer #2 will be always loaded in this
case, the trivial optimization for value at the bottom of the
stack can't be done here.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c      |  6 ++++++
 drivers/net/ethernet/netronome/nfp/bpf/main.c     |  3 +++
 drivers/net/ethernet/netronome/nfp/bpf/main.h     |  2 ++
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 10 ++++++++++
 4 files changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 3799ca9b9826..c63368fc28f6 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1372,6 +1372,8 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	/* Set LM0 to start of key */
 	if (load_lm_ptr)
 		emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
+	if (meta->func_id == BPF_FUNC_map_update_elem)
+		emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
 
 	/* Load map ID into a register, it should actually fit as an immediate
 	 * but in case it doesn't deal with it here, not in the delay slots.
@@ -2326,6 +2328,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	case BPF_FUNC_xdp_adjust_head:
 		return adjust_head(nfp_prog, meta);
 	case BPF_FUNC_map_lookup_elem:
+	case BPF_FUNC_map_update_elem:
 		return map_call_stack_common(nfp_prog, meta);
 	default:
 		WARN_ONCE(1, "verifier allowed unsupported function\n");
@@ -3210,6 +3213,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
 			case BPF_FUNC_map_lookup_elem:
 				val = nfp_prog->bpf->helpers.map_lookup;
 				break;
+			case BPF_FUNC_map_update_elem:
+				val = nfp_prog->bpf->helpers.map_update;
+				break;
 			default:
 				pr_err("relocation of unknown helper %d\n",
 				       val);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 34e98aa6b956..db6940551b32 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -284,6 +284,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
 	case BPF_FUNC_map_lookup_elem:
 		bpf->helpers.map_lookup = readl(&cap->func_addr);
 		break;
+	case BPF_FUNC_map_update_elem:
+		bpf->helpers.map_update = readl(&cap->func_addr);
+		break;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 0f920bf48b01..0246bd88bff3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -128,6 +128,7 @@ enum pkt_vec {
  *
  * @helpers:		helper addressess for various calls
  * @helpers.map_lookup:		map lookup helper address
+ * @helpers.map_update:		map update helper address
  */
 struct nfp_app_bpf {
 	struct nfp_app *app;
@@ -162,6 +163,7 @@ struct nfp_app_bpf {
 
 	struct {
 		u32 map_lookup;
+		u32 map_update;
 	} helpers;
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index acfc4d798116..482a0ce6e755 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -167,6 +167,7 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 {
 	const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
 	const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
+	const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3;
 	struct nfp_app_bpf *bpf = nfp_prog->bpf;
 	u32 func_id = meta->insn.imm;
 
@@ -191,6 +192,15 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 					  meta->func_id ? &meta->arg2 : NULL))
 			return -EOPNOTSUPP;
 		break;
+
+	case BPF_FUNC_map_update_elem:
+		if (!nfp_bpf_map_call_ok("map_update", env, meta,
+					 bpf->helpers.map_update, reg1) ||
+		    !nfp_bpf_stack_arg_ok("map_update", env, reg2,
+					  meta->func_id ? &meta->arg2 : NULL) ||
+		    !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL))
+			return -EOPNOTSUPP;
+		break;
 	default:
 		pr_vlog(env, "unsupported function id: %d\n", func_id);
 		return -EOPNOTSUPP;

From bfee64deaa766ba88ff42fcbced82ad4ef6cb315 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:32 -0700
Subject: [PATCH 1400/1678] nfp: bpf: add map deletes from the datapath

Support calling map_delete_elem() FW helper from the datapath
programs.  For JIT checks and code are basically equivalent
to map lookups.  Similarly to other map helper key must be on
the stack.  Different pointer types are left for future extension.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c      | 4 ++++
 drivers/net/ethernet/netronome/nfp/bpf/main.c     | 3 +++
 drivers/net/ethernet/netronome/nfp/bpf/main.h     | 2 ++
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 8 ++++++++
 4 files changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index c63368fc28f6..d8df56087961 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2329,6 +2329,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 		return adjust_head(nfp_prog, meta);
 	case BPF_FUNC_map_lookup_elem:
 	case BPF_FUNC_map_update_elem:
+	case BPF_FUNC_map_delete_elem:
 		return map_call_stack_common(nfp_prog, meta);
 	default:
 		WARN_ONCE(1, "verifier allowed unsupported function\n");
@@ -3216,6 +3217,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
 			case BPF_FUNC_map_update_elem:
 				val = nfp_prog->bpf->helpers.map_update;
 				break;
+			case BPF_FUNC_map_delete_elem:
+				val = nfp_prog->bpf->helpers.map_delete;
+				break;
 			default:
 				pr_err("relocation of unknown helper %d\n",
 				       val);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index db6940551b32..a7e217c5204f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -287,6 +287,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
 	case BPF_FUNC_map_update_elem:
 		bpf->helpers.map_update = readl(&cap->func_addr);
 		break;
+	case BPF_FUNC_map_delete_elem:
+		bpf->helpers.map_delete = readl(&cap->func_addr);
+		break;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 0246bd88bff3..26bb491224b3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -129,6 +129,7 @@ enum pkt_vec {
  * @helpers:		helper addressess for various calls
  * @helpers.map_lookup:		map lookup helper address
  * @helpers.map_update:		map update helper address
+ * @helpers.map_delete:		map delete helper address
  */
 struct nfp_app_bpf {
 	struct nfp_app *app;
@@ -164,6 +165,7 @@ struct nfp_app_bpf {
 	struct {
 		u32 map_lookup;
 		u32 map_update;
+		u32 map_delete;
 	} helpers;
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 482a0ce6e755..7d67ffc897dd 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -201,6 +201,14 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 		    !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL))
 			return -EOPNOTSUPP;
 		break;
+
+	case BPF_FUNC_map_delete_elem:
+		if (!nfp_bpf_map_call_ok("map_delete", env, meta,
+					 bpf->helpers.map_delete, reg1) ||
+		    !nfp_bpf_stack_arg_ok("map_delete", env, reg2,
+					  meta->func_id ? &meta->arg2 : NULL))
+			return -EOPNOTSUPP;
+		break;
 	default:
 		pr_vlog(env, "unsupported function id: %d\n", func_id);
 		return -EOPNOTSUPP;

From e59ac634908f4ea90066e6db7dd7ae8ca02815ff Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:33 -0700
Subject: [PATCH 1401/1678] bpf: add parenthesis around argument of
 BPF_LDST_BYTES()

BPF_LDST_BYTES() does not put it's argument in parenthesis
when referencing it.  This makes it impossible to pass pointers
obtained by address-of operator (e.g. BPF_LDST_BYTES(&insn)).
Add the parenthesis.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 109d05ccea9a..c2f167db8bd5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -372,7 +372,7 @@ struct xdp_rxq_info;
 
 #define BPF_LDST_BYTES(insn)					\
 	({							\
-		const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \
+		const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \
 		WARN_ON(__size < 0);				\
 		__size;						\
 	})

From dcb0c27f3c989fecae42593f470a2413434aae28 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:34 -0700
Subject: [PATCH 1402/1678] nfp: bpf: add basic support for atomic adds

Implement atomic add operation for 32 and 64 bit values.  Depend
on the verifier to ensure alignment.  Values have to be kept in
big endian and swapped upon read/write.  For now only support
atomic add of a constant.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  |  45 ++++++++
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  17 +++
 .../net/ethernet/netronome/nfp/bpf/offload.c  |  45 +++++++-
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 105 ++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/nfp_asm.c  |   1 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |   2 +
 6 files changed, 212 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index d8df56087961..c7fdb8c7ae17 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2127,6 +2127,49 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return mem_stx(nfp_prog, meta, 8);
 }
 
+static int
+mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
+{
+	swreg addra, addrb, off, prev_alu = imm_a(nfp_prog);
+	u8 dst_gpr = meta->insn.dst_reg * 2;
+	u8 src_gpr = meta->insn.src_reg * 2;
+
+	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+	/* If insn has an offset add to the address */
+	if (!meta->insn.off) {
+		addra = reg_a(dst_gpr);
+		addrb = reg_b(dst_gpr + 1);
+	} else {
+		emit_alu(nfp_prog, imma_a(nfp_prog),
+			 reg_a(dst_gpr), ALU_OP_ADD, off);
+		emit_alu(nfp_prog, imma_b(nfp_prog),
+			 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
+		addra = imma_a(nfp_prog);
+		addrb = imma_b(nfp_prog);
+	}
+
+	wrp_immed(nfp_prog, prev_alu,
+		  FIELD_PREP(CMD_OVE_DATA, 2) |
+		  CMD_OVE_LEN |
+		  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
+	wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
+	emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
+		       addra, addrb, 0, false);
+
+	return 0;
+}
+
+static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_xadd(nfp_prog, meta, false);
+}
+
+static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return mem_xadd(nfp_prog, meta, true);
+}
+
 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
@@ -2390,6 +2433,8 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
 	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
+	[BPF_STX | BPF_XADD | BPF_W] =	mem_xadd4,
+	[BPF_STX | BPF_XADD | BPF_DW] =	mem_xadd8,
 	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
 	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
 	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 26bb491224b3..877be7143991 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -72,6 +72,7 @@ enum nfp_relo_type {
 #define BR_OFF_RELO		15000
 
 enum static_regs {
+	STATIC_REG_IMMA		= 20, /* Bank AB */
 	STATIC_REG_IMM		= 21, /* Bank AB */
 	STATIC_REG_STACK	= 22, /* Bank A */
 	STATIC_REG_PKT_LEN	= 22, /* Bank B */
@@ -91,6 +92,8 @@ enum pkt_vec {
 #define pptr_reg(np)	pv_ctm_ptr(np)
 #define imm_a(np)	reg_a(STATIC_REG_IMM)
 #define imm_b(np)	reg_b(STATIC_REG_IMM)
+#define imma_a(np)	reg_a(STATIC_REG_IMMA)
+#define imma_b(np)	reg_b(STATIC_REG_IMMA)
 #define imm_both(np)	reg_both(STATIC_REG_IMM)
 
 #define NFP_BPF_ABI_FLAGS	reg_imm(0)
@@ -169,18 +172,27 @@ struct nfp_app_bpf {
 	} helpers;
 };
 
+enum nfp_bpf_map_use {
+	NFP_MAP_UNUSED = 0,
+	NFP_MAP_USE_READ,
+	NFP_MAP_USE_WRITE,
+	NFP_MAP_USE_ATOMIC_CNT,
+};
+
 /**
  * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
  * @offmap:	pointer to the offloaded BPF map
  * @bpf:	back pointer to bpf app private structure
  * @tid:	table id identifying map on datapath
  * @l:		link on the nfp_app_bpf->map_list list
+ * @use_map:	map of how the value is used (in 4B chunks)
  */
 struct nfp_bpf_map {
 	struct bpf_offloaded_map *offmap;
 	struct nfp_app_bpf *bpf;
 	u32 tid;
 	struct list_head l;
+	enum nfp_bpf_map_use use_map[];
 };
 
 struct nfp_prog;
@@ -320,6 +332,11 @@ static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta)
 	return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET;
 }
 
+static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
+{
+	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
+}
+
 /**
  * struct nfp_prog - nfp BPF program
  * @bpf: backpointer to the bpf app priv structure
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 0a7732385469..42d98792bd25 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -164,6 +164,41 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
 	return 0;
 }
 
+/* Atomic engine requires values to be in big endian, we need to byte swap
+ * the value words used with xadd.
+ */
+static void nfp_map_bpf_byte_swap(struct nfp_bpf_map *nfp_map, void *value)
+{
+	u32 *word = value;
+	unsigned int i;
+
+	for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++)
+		if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT)
+			word[i] = (__force u32)cpu_to_be32(word[i]);
+}
+
+static int
+nfp_bpf_map_lookup_entry(struct bpf_offloaded_map *offmap,
+			 void *key, void *value)
+{
+	int err;
+
+	err = nfp_bpf_ctrl_lookup_entry(offmap, key, value);
+	if (err)
+		return err;
+
+	nfp_map_bpf_byte_swap(offmap->dev_priv, value);
+	return 0;
+}
+
+static int
+nfp_bpf_map_update_entry(struct bpf_offloaded_map *offmap,
+			 void *key, void *value, u64 flags)
+{
+	nfp_map_bpf_byte_swap(offmap->dev_priv, value);
+	return nfp_bpf_ctrl_update_entry(offmap, key, value, flags);
+}
+
 static int
 nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
 			 void *key, void *next_key)
@@ -183,8 +218,8 @@ nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
 
 static const struct bpf_map_dev_ops nfp_bpf_map_ops = {
 	.map_get_next_key	= nfp_bpf_map_get_next_key,
-	.map_lookup_elem	= nfp_bpf_ctrl_lookup_entry,
-	.map_update_elem	= nfp_bpf_ctrl_update_entry,
+	.map_lookup_elem	= nfp_bpf_map_lookup_entry,
+	.map_update_elem	= nfp_bpf_map_update_entry,
 	.map_delete_elem	= nfp_bpf_map_delete_elem,
 };
 
@@ -192,6 +227,7 @@ static int
 nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
 {
 	struct nfp_bpf_map *nfp_map;
+	unsigned int use_map_size;
 	long long int res;
 
 	if (!bpf->maps.types)
@@ -226,7 +262,10 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
 		return -ENOMEM;
 	}
 
-	nfp_map = kzalloc(sizeof(*nfp_map), GFP_USER);
+	use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) *
+		       FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]);
+
+	nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER);
 	if (!nfp_map)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 7d67ffc897dd..40619efea77d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -285,6 +285,72 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog,
 	return -EINVAL;
 }
 
+static const char *nfp_bpf_map_use_name(enum nfp_bpf_map_use use)
+{
+	static const char * const names[] = {
+		[NFP_MAP_UNUSED]	= "unused",
+		[NFP_MAP_USE_READ]	= "read",
+		[NFP_MAP_USE_WRITE]	= "write",
+		[NFP_MAP_USE_ATOMIC_CNT] = "atomic",
+	};
+
+	if (use >= ARRAY_SIZE(names) || !names[use])
+		return "unknown";
+	return names[use];
+}
+
+static int
+nfp_bpf_map_mark_used_one(struct bpf_verifier_env *env,
+			  struct nfp_bpf_map *nfp_map,
+			  unsigned int off, enum nfp_bpf_map_use use)
+{
+	if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED &&
+	    nfp_map->use_map[off / 4] != use) {
+		pr_vlog(env, "map value use type conflict %s vs %s off: %u\n",
+			nfp_bpf_map_use_name(nfp_map->use_map[off / 4]),
+			nfp_bpf_map_use_name(use), off);
+		return -EOPNOTSUPP;
+	}
+
+	nfp_map->use_map[off / 4] = use;
+
+	return 0;
+}
+
+static int
+nfp_bpf_map_mark_used(struct bpf_verifier_env *env, struct nfp_insn_meta *meta,
+		      const struct bpf_reg_state *reg,
+		      enum nfp_bpf_map_use use)
+{
+	struct bpf_offloaded_map *offmap;
+	struct nfp_bpf_map *nfp_map;
+	unsigned int size, off;
+	int i, err;
+
+	if (!tnum_is_const(reg->var_off)) {
+		pr_vlog(env, "map value offset is variable\n");
+		return -EOPNOTSUPP;
+	}
+
+	off = reg->var_off.value + meta->insn.off + reg->off;
+	size = BPF_LDST_BYTES(&meta->insn);
+	offmap = map_to_offmap(reg->map_ptr);
+	nfp_map = offmap->dev_priv;
+
+	if (off + size > offmap->map.value_size) {
+		pr_vlog(env, "map value access out-of-bounds\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < size; i += 4 - (off + i) % 4) {
+		err = nfp_bpf_map_mark_used_one(env, nfp_map, off + i, use);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int
 nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 		  struct bpf_verifier_env *env, u8 reg_no)
@@ -307,10 +373,22 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	}
 
 	if (reg->type == PTR_TO_MAP_VALUE) {
+		if (is_mbpf_load(meta)) {
+			err = nfp_bpf_map_mark_used(env, meta, reg,
+						    NFP_MAP_USE_READ);
+			if (err)
+				return err;
+		}
 		if (is_mbpf_store(meta)) {
 			pr_vlog(env, "map writes not supported\n");
 			return -EOPNOTSUPP;
 		}
+		if (is_mbpf_xadd(meta)) {
+			err = nfp_bpf_map_mark_used(env, meta, reg,
+						    NFP_MAP_USE_ATOMIC_CNT);
+			if (err)
+				return err;
+		}
 	}
 
 	if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
@@ -324,6 +402,31 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 	return 0;
 }
 
+static int
+nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		   struct bpf_verifier_env *env)
+{
+	const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg;
+	const struct bpf_reg_state *dreg = cur_regs(env) + meta->insn.dst_reg;
+
+	if (dreg->type != PTR_TO_MAP_VALUE) {
+		pr_vlog(env, "atomic add not to a map value pointer: %d\n",
+			dreg->type);
+		return -EOPNOTSUPP;
+	}
+	if (sreg->type != SCALAR_VALUE ||
+	    sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), sreg->var_off);
+		pr_vlog(env, "atomic add not of a small constant scalar: %s\n",
+			tn_buf);
+		return -EOPNOTSUPP;
+	}
+
+	return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
+}
+
 static int
 nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 {
@@ -356,6 +459,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	if (is_mbpf_store(meta))
 		return nfp_bpf_check_ptr(nfp_prog, meta, env,
 					 meta->insn.dst_reg);
+	if (is_mbpf_xadd(meta))
+		return nfp_bpf_check_xadd(nfp_prog, meta, env);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 1e597600c693..3c0107ac9a2c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
 	[CMD_TGT_READ32_SWAP] =		{ 0x02, 0x5c },
 	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
 	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+	[CMD_TGT_ADD_IMM] =		{ 0x02, 0x47 },
 };
 
 static bool unreg_is_imm(u16 reg)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 150d28f9cd52..185192590a17 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -238,6 +238,7 @@ enum cmd_tgt_map {
 	CMD_TGT_READ32_SWAP,
 	CMD_TGT_READ_LE,
 	CMD_TGT_READ_SWAP_LE,
+	CMD_TGT_ADD_IMM,
 	__CMD_TGT_MAP_SIZE,
 };
 
@@ -254,6 +255,7 @@ enum cmd_ctx_swap {
 	CMD_CTX_NO_SWAP = 3,
 };
 
+#define CMD_OVE_DATA	GENMASK(5, 3)
 #define CMD_OVE_LEN	BIT(7)
 #define CMD_OV_LEN	GENMASK(12, 8)
 

From b556ddd9c19983f3f13ab0d524f884349fead115 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:35 -0700
Subject: [PATCH 1403/1678] nfp: bpf: expose command delay slots

Allow callers to control the delay slots of commands, instead of
giving them just a wait/nowait choice.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c | 53 +++++++++-----------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index c7fdb8c7ae17..db73f56de59a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -103,23 +103,18 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
 /* --- Emitters --- */
 static void
 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
-	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir)
+	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
+	   bool indir)
 {
-	enum cmd_ctx_swap ctx;
 	u64 insn;
 
-	if (sync)
-		ctx = CMD_CTX_SWAP;
-	else
-		ctx = CMD_CTX_NO_SWAP;
-
 	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
 		FIELD_PREP(OP_CMD_CTX, ctx) |
 		FIELD_PREP(OP_CMD_B_SRC, breg) |
 		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
 		FIELD_PREP(OP_CMD_XFER, xfer) |
 		FIELD_PREP(OP_CMD_CNT, size) |
-		FIELD_PREP(OP_CMD_SIG, sync) |
+		FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
 		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
 		FIELD_PREP(OP_CMD_INDIR, indir) |
 		FIELD_PREP(OP_CMD_MODE, mode);
@@ -129,7 +124,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
 
 static void
 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
-	     swreg lreg, swreg rreg, u8 size, bool sync, bool indir)
+	     swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
 {
 	struct nfp_insn_re_regs reg;
 	int err;
@@ -150,22 +145,22 @@ emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
 		return;
 	}
 
-	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync,
+	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
 		   indir);
 }
 
 static void
 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
-	 swreg lreg, swreg rreg, u8 size, bool sync)
+	 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
 {
-	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false);
+	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
 }
 
 static void
 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
-	       swreg lreg, swreg rreg, u8 size, bool sync)
+	       swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
 {
-	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
+	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
 }
 
 static void
@@ -610,7 +605,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	/* Memory read from source addr into transfer-in registers. */
 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
 		     src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
-		     src_base, off, xfer_num - 1, true, len > 32);
+		     src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
 
 	/* Move from transfer-in to transfer-out. */
 	for (i = 0; i < xfer_num; i++)
@@ -622,39 +617,39 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 		/* Use single direct_ref write8. */
 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
-			 true);
+			 CMD_CTX_SWAP);
 	} else if (len <= 32 && IS_ALIGNED(len, 4)) {
 		/* Use single direct_ref write32. */
 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
 			 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
-			 true);
+			 CMD_CTX_SWAP);
 	} else if (len <= 32) {
 		/* Use single indirect_ref write8. */
 		wrp_immed(nfp_prog, reg_none(),
 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
 			       reg_a(meta->paired_st->dst_reg * 2), off,
-			       len - 1, true);
+			       len - 1, CMD_CTX_SWAP);
 	} else if (IS_ALIGNED(len, 4)) {
 		/* Use single indirect_ref write32. */
 		wrp_immed(nfp_prog, reg_none(),
 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
 			       reg_a(meta->paired_st->dst_reg * 2), off,
-			       xfer_num - 1, true);
+			       xfer_num - 1, CMD_CTX_SWAP);
 	} else if (len <= 40) {
 		/* Use one direct_ref write32 to write the first 32-bytes, then
 		 * another direct_ref write8 to write the remaining bytes.
 		 */
 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
 			 reg_a(meta->paired_st->dst_reg * 2), off, 7,
-			 true);
+			 CMD_CTX_SWAP);
 
 		off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
 				      imm_b(nfp_prog));
 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
-			 true);
+			 CMD_CTX_SWAP);
 	} else {
 		/* Use one indirect_ref write32 to write 4-bytes aligned length,
 		 * then another direct_ref write8 to write the remaining bytes.
@@ -665,12 +660,12 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
 			       reg_a(meta->paired_st->dst_reg * 2), off,
-			       xfer_num - 2, true);
+			       xfer_num - 2, CMD_CTX_SWAP);
 		new_off = meta->paired_st->off + (xfer_num - 1) * 4;
 		off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
 			 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
-			 (len & 0x3) - 1, true);
+			 (len & 0x3) - 1, CMD_CTX_SWAP);
 	}
 
 	/* TODO: The following extra load is to make sure data flow be identical
@@ -731,7 +726,7 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
 	shift = size < 4 ? 4 - size : 0;
 
 	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
-		 pptr_reg(nfp_prog), offset, sz - 1, true);
+		 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
 
 	i = 0;
 	if (shift)
@@ -761,7 +756,7 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
 	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
 
 	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
-		 lreg, rreg, sz / 4 - 1, true);
+		 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
 
 	i = 0;
 	if (mask)
@@ -841,7 +836,7 @@ data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
 		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
 
 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
-		 reg_a(dst_gpr), offset, size - 1, true);
+		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
 
 	return 0;
 }
@@ -855,7 +850,7 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
 		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
 
 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
-		 reg_a(dst_gpr), offset, size - 1, true);
+		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
 
 	return 0;
 }
@@ -1876,7 +1871,7 @@ mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
 
 	/* Cache memory into transfer-in registers. */
 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
-		     off, xfer_num - 1, true, indir);
+		     off, xfer_num - 1, CMD_CTX_SWAP, indir);
 }
 
 static int
@@ -2155,7 +2150,7 @@ mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
 		  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
 	wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
 	emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
-		       addra, addrb, 0, false);
+		       addra, addrb, 0, CMD_CTX_NO_SWAP);
 
 	return 0;
 }

From 41aed09cf61c00ef6c3b2648d5a193cbaf2a74d0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:36 -0700
Subject: [PATCH 1404/1678] nfp: bpf: add support for atomic add of unknown
 values

Allow atomic add to be used even when the value is not guaranteed
to fit into a 16 bit immediate.  This requires the value to be pulled
as data, and therefore use of a transfer register and a context swap.

Track the information about possible lengths of the value, if it's
guaranteed to be larger than 16bits don't generate the code for the
optimized case at all.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 78 +++++++++++++++++--
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  7 ++
 .../net/ethernet/netronome/nfp/bpf/verifier.c | 14 ++--
 drivers/net/ethernet/netronome/nfp/nfp_asm.c  |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  3 +
 5 files changed, 88 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index db73f56de59a..62431a0aa0f5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2125,12 +2125,49 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 static int
 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
 {
-	swreg addra, addrb, off, prev_alu = imm_a(nfp_prog);
 	u8 dst_gpr = meta->insn.dst_reg * 2;
 	u8 src_gpr = meta->insn.src_reg * 2;
+	unsigned int full_add, out;
+	swreg addra, addrb, off;
 
 	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
 
+	/* We can fit 16 bits into command immediate, if we know the immediate
+	 * is guaranteed to either always or never fit into 16 bit we only
+	 * generate code to handle that particular case, otherwise generate
+	 * code for both.
+	 */
+	out = nfp_prog_current_offset(nfp_prog);
+	full_add = nfp_prog_current_offset(nfp_prog);
+
+	if (meta->insn.off) {
+		out += 2;
+		full_add += 2;
+	}
+	if (meta->xadd_maybe_16bit) {
+		out += 3;
+		full_add += 3;
+	}
+	if (meta->xadd_over_16bit)
+		out += 2 + is64;
+	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
+		out += 5;
+		full_add += 5;
+	}
+
+	/* Generate the branch for choosing add_imm vs add */
+	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
+		swreg max_imm = imm_a(nfp_prog);
+
+		wrp_immed(nfp_prog, max_imm, 0xffff);
+		emit_alu(nfp_prog, reg_none(),
+			 max_imm, ALU_OP_SUB, reg_b(src_gpr));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
+		emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
+		/* defer for add */
+	}
+
 	/* If insn has an offset add to the address */
 	if (!meta->insn.off) {
 		addra = reg_a(dst_gpr);
@@ -2144,13 +2181,38 @@ mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
 		addrb = imma_b(nfp_prog);
 	}
 
-	wrp_immed(nfp_prog, prev_alu,
-		  FIELD_PREP(CMD_OVE_DATA, 2) |
-		  CMD_OVE_LEN |
-		  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
-	wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
-	emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
-		       addra, addrb, 0, CMD_CTX_NO_SWAP);
+	/* Generate the add_imm if 16 bits are possible */
+	if (meta->xadd_maybe_16bit) {
+		swreg prev_alu = imm_a(nfp_prog);
+
+		wrp_immed(nfp_prog, prev_alu,
+			  FIELD_PREP(CMD_OVE_DATA, 2) |
+			  CMD_OVE_LEN |
+			  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
+		wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
+		emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
+			       addra, addrb, 0, CMD_CTX_NO_SWAP);
+
+		if (meta->xadd_over_16bit)
+			emit_br(nfp_prog, BR_UNC, out, 0);
+	}
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
+		return -EINVAL;
+
+	/* Generate the add if 16 bits are not guaranteed */
+	if (meta->xadd_over_16bit) {
+		emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
+			 addra, addrb, is64 << 2,
+			 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
+
+		wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
+		if (is64)
+			wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
+	}
+
+	if (!nfp_prog_confirm_current_offset(nfp_prog, out))
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 877be7143991..a73b86c6ce52 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -229,6 +229,8 @@ struct nfp_bpf_reg_state {
  * @pkt_cache.range_start: start offset for associated packet data cache
  * @pkt_cache.range_end: end offset for associated packet data cache
  * @pkt_cache.do_init: this read needs to initialize packet data cache
+ * @xadd_over_16bit: 16bit immediate is not guaranteed
+ * @xadd_maybe_16bit: 16bit immediate is possible
  * @jmp_dst: destination info for jump instructions
  * @func_id: function id for call instructions
  * @arg1: arg1 for call instructions
@@ -243,6 +245,7 @@ struct nfp_bpf_reg_state {
 struct nfp_insn_meta {
 	struct bpf_insn insn;
 	union {
+		/* pointer ops (ld/st/xadd) */
 		struct {
 			struct bpf_reg_state ptr;
 			struct bpf_insn *paired_st;
@@ -253,8 +256,12 @@ struct nfp_insn_meta {
 				s16 range_end;
 				bool do_init;
 			} pkt_cache;
+			bool xadd_over_16bit;
+			bool xadd_maybe_16bit;
 		};
+		/* jump */
 		struct nfp_insn_meta *jmp_dst;
+		/* function calls */
 		struct {
 			u32 func_id;
 			struct bpf_reg_state arg1;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 40619efea77d..486ffd1d5913 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -414,16 +414,16 @@ nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 			dreg->type);
 		return -EOPNOTSUPP;
 	}
-	if (sreg->type != SCALAR_VALUE ||
-	    sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff) {
-		char tn_buf[48];
-
-		tnum_strn(tn_buf, sizeof(tn_buf), sreg->var_off);
-		pr_vlog(env, "atomic add not of a small constant scalar: %s\n",
-			tn_buf);
+	if (sreg->type != SCALAR_VALUE) {
+		pr_vlog(env, "atomic add not of a scalar: %d\n", sreg->type);
 		return -EOPNOTSUPP;
 	}
 
+	meta->xadd_over_16bit |=
+		sreg->var_off.value > 0xffff || sreg->var_off.mask > 0xffff;
+	meta->xadd_maybe_16bit |=
+		(sreg->var_off.value & ~sreg->var_off.mask) <= 0xffff;
+
 	return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 3c0107ac9a2c..cc6ace2be8a9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -48,6 +48,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
 	[CMD_TGT_READ32_SWAP] =		{ 0x02, 0x5c },
 	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
 	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+	[CMD_TGT_ADD] =			{ 0x00, 0x47 },
 	[CMD_TGT_ADD_IMM] =		{ 0x02, 0x47 },
 };
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 185192590a17..36524dd6021b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -238,6 +238,7 @@ enum cmd_tgt_map {
 	CMD_TGT_READ32_SWAP,
 	CMD_TGT_READ_LE,
 	CMD_TGT_READ_SWAP_LE,
+	CMD_TGT_ADD,
 	CMD_TGT_ADD_IMM,
 	__CMD_TGT_MAP_SIZE,
 };
@@ -252,6 +253,8 @@ enum cmd_mode {
 
 enum cmd_ctx_swap {
 	CMD_CTX_SWAP = 0,
+	CMD_CTX_SWAP_DEFER1 = 1,
+	CMD_CTX_SWAP_DEFER2 = 2,
 	CMD_CTX_NO_SWAP = 3,
 };
 

From df4a37d8b53f9fb9af722b056da5edbd9a531768 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:37 -0700
Subject: [PATCH 1405/1678] nfp: bpf: add support for bpf_get_prandom_u32()

NFP has a prng register, which we can read to obtain a u32 worth
of pseudo random data.  Generate code for it.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/fw.h   |  1 +
 drivers/net/ethernet/netronome/nfp/bpf/jit.c  | 24 +++++++++++++++++--
 drivers/net/ethernet/netronome/nfp/bpf/main.c | 12 ++++++++++
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 ++++
 .../net/ethernet/netronome/nfp/bpf/verifier.c |  7 ++++++
 drivers/net/ethernet/netronome/nfp/nfp_asm.h  |  1 +
 6 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index cfcc7bcb2c67..39639ac28b01 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -41,6 +41,7 @@ enum bpf_cap_tlv_type {
 	NFP_BPF_CAP_TYPE_FUNC		= 1,
 	NFP_BPF_CAP_TYPE_ADJUST_HEAD	= 2,
 	NFP_BPF_CAP_TYPE_MAPS		= 3,
+	NFP_BPF_CAP_TYPE_RANDOM		= 4,
 };
 
 struct nfp_bpf_cap_tlv_func {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 62431a0aa0f5..4b631e26f199 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -405,7 +405,7 @@ __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
 		FIELD_PREP(OP_LCSR_A_SRC, areg) |
 		FIELD_PREP(OP_LCSR_B_SRC, breg) |
 		FIELD_PREP(OP_LCSR_WRITE, wr) |
-		FIELD_PREP(OP_LCSR_ADDR, addr) |
+		FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
 		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
 		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
 
@@ -433,10 +433,16 @@ static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
 		return;
 	}
 
-	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
+	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
 		    false, reg.src_lmextn);
 }
 
+/* CSR value is read in following immed[gpr, 0] */
+static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
+{
+	__emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
+}
+
 static void emit_nop(struct nfp_prog *nfp_prog)
 {
 	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
@@ -1398,6 +1404,18 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+static int
+nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	__emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
+	/* CSR value is read in following immed[gpr, 0] */
+	emit_immed(nfp_prog, reg_both(0), 0,
+		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
+	emit_immed(nfp_prog, reg_both(1), 0,
+		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
+	return 0;
+}
+
 /* --- Callbacks --- */
 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
@@ -2431,6 +2449,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	case BPF_FUNC_map_update_elem:
 	case BPF_FUNC_map_delete_elem:
 		return map_call_stack_common(nfp_prog, meta);
+	case BPF_FUNC_get_prandom_u32:
+		return nfp_get_prandom_u32(nfp_prog, meta);
 	default:
 		WARN_ONCE(1, "verifier allowed unsupported function\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index a7e217c5204f..f2214101a1b5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -315,6 +315,14 @@ nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
 	return 0;
 }
 
+static int
+nfp_bpf_parse_cap_random(struct nfp_app_bpf *bpf, void __iomem *value,
+			 u32 length)
+{
+	bpf->pseudo_random = true;
+	return 0;
+}
+
 static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 {
 	struct nfp_cpp *cpp = app->pf->cpp;
@@ -353,6 +361,10 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
 			if (nfp_bpf_parse_cap_maps(app->priv, value, length))
 				goto err_release_free;
 			break;
+		case NFP_BPF_CAP_TYPE_RANDOM:
+			if (nfp_bpf_parse_cap_random(app->priv, value, length))
+				goto err_release_free;
+			break;
 		default:
 			nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
 			break;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index a73b86c6ce52..4981c8944ca3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -133,6 +133,8 @@ enum pkt_vec {
  * @helpers.map_lookup:		map lookup helper address
  * @helpers.map_update:		map update helper address
  * @helpers.map_delete:		map delete helper address
+ *
+ * @pseudo_random:	FW initialized the pseudo-random machinery (CSRs)
  */
 struct nfp_app_bpf {
 	struct nfp_app *app;
@@ -170,6 +172,8 @@ struct nfp_app_bpf {
 		u32 map_update;
 		u32 map_delete;
 	} helpers;
+
+	bool pseudo_random;
 };
 
 enum nfp_bpf_map_use {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 486ffd1d5913..06ad53ce4ad9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -209,6 +209,13 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
 					  meta->func_id ? &meta->arg2 : NULL))
 			return -EOPNOTSUPP;
 		break;
+
+	case BPF_FUNC_get_prandom_u32:
+		if (bpf->pseudo_random)
+			break;
+		pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
+		return -EOPNOTSUPP;
+
 	default:
 		pr_vlog(env, "unsupported function id: %d\n", func_id);
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 36524dd6021b..5f2b2f24f4fa 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -284,6 +284,7 @@ enum lcsr_wr_src {
 #define NFP_CSR_ACT_LM_ADDR1	0x6c
 #define NFP_CSR_ACT_LM_ADDR2	0x94
 #define NFP_CSR_ACT_LM_ADDR3	0x9c
+#define NFP_CSR_PSEUDO_RND_NUM	0x148
 
 /* Software register representation, independent of operand type */
 #define NN_REG_TYPE	GENMASK(31, 24)

From 7c095f5d9bf4cbadbd4adb0adb670a38e42cd793 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 28 Mar 2018 17:48:38 -0700
Subject: [PATCH 1406/1678] nfp: bpf: improve wrong FW response warnings

When FW responds with a message of wrong size or type make sure
the type is checked first and included in the wrong size message.
This makes it easier to figure out which FW command failed.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 80d3aa0fc9d3..7e298148ca26 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -218,17 +218,17 @@ nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
 		return skb;
 
 	hdr = (struct cmsg_hdr *)skb->data;
-	/* 0 reply_size means caller will do the validation */
-	if (reply_size && skb->len != reply_size) {
-		cmsg_warn(bpf, "cmsg drop - wrong size %d != %d!\n",
-			  skb->len, reply_size);
-		goto err_free;
-	}
 	if (hdr->type != __CMSG_REPLY(type)) {
 		cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
 			  hdr->type, __CMSG_REPLY(type));
 		goto err_free;
 	}
+	/* 0 reply_size means caller will do the validation */
+	if (reply_size && skb->len != reply_size) {
+		cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
+			  type, skb->len, reply_size);
+		goto err_free;
+	}
 
 	return skb;
 err_free:

From 5e78abd075e562fd5748ac3bfb067941e8baf6c7 Mon Sep 17 00:00:00 2001
From: "tamizhr@codeaurora.org" <tamizhr@codeaurora.org>
Date: Tue, 27 Mar 2018 19:16:15 +0530
Subject: [PATCH 1407/1678] cfg80211: fix data type of sta_opmode_info
 parameter

Currently bw and smps_mode are u8 type value in sta_opmode_info
structure. This values filled in mac80211 from ieee80211_sta_rx_bandwidth
and ieee80211_smps_mode. These enum values are specific to mac80211 and
userspace/cfg80211 doesn't know about that. This will lead to incorrect
result/assumption by the user space application.
Change bw and smps_mode parameters to their respective enums in nl80211.

Signed-off-by: Tamizh chelvam <tamizhr@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fc40843baed3..4341508bc6a4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3572,15 +3572,15 @@ enum wiphy_opmode_flag {
 /**
  * struct sta_opmode_info - Station's ht/vht operation mode information
  * @changed: contains value from &enum wiphy_opmode_flag
- * @smps_mode: New SMPS mode of a station
- * @bw: new max bandwidth value of a station
+ * @smps_mode: New SMPS mode value from &enum nl80211_smps_mode of a station
+ * @bw: new max bandwidth value from &enum nl80211_chan_width of a station
  * @rx_nss: new rx_nss value of a station
  */
 
 struct sta_opmode_info {
 	u32 changed;
-	u8 smps_mode;
-	u8 bw;
+	enum nl80211_smps_mode smps_mode;
+	enum nl80211_chan_width bw;
 	u8 rx_nss;
 };
 

From 57566b20033f23bdc9f25d5fa4c36a7287aa08d2 Mon Sep 17 00:00:00 2001
From: "tamizhr@codeaurora.org" <tamizhr@codeaurora.org>
Date: Tue, 27 Mar 2018 19:16:16 +0530
Subject: [PATCH 1408/1678] mac80211: Use proper smps_mode enum in sta opmode
 event

SMPS_MODE change value notified via nl80211 contains mac80211
specific value(ieee80211_smps_mode) and user space application
will not know those values. This patch add support to map
the mac80211 enum value to nl80211_smps_mode which will be
understood by the userspace application.

Signed-off-by: Tamizh chelvam <tamizhr@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ht.c          | 15 +++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/rx.c          |  3 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d7523530d3f8..c78036a0ac94 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -466,6 +466,21 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 		__ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_PEER_REQUEST);
 }
 
+enum nl80211_smps_mode
+ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps)
+{
+	switch (smps) {
+	case IEEE80211_SMPS_OFF:
+		return NL80211_SMPS_OFF;
+	case IEEE80211_SMPS_STATIC:
+		return NL80211_SMPS_STATIC;
+	case IEEE80211_SMPS_DYNAMIC:
+		return NL80211_SMPS_DYNAMIC;
+	default:
+		return NL80211_SMPS_OFF;
+	}
+}
+
 int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_smps_mode smps, const u8 *da,
 			       const u8 *bssid)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ae9c33cd8ada..9237ffb4e986 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1788,6 +1788,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
 
 u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
+enum nl80211_smps_mode
+ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps);
 
 /* VHT */
 void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 27bb1f0b5e52..f8c69acfda48 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2883,7 +2883,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			if (rx->sta->sta.smps_mode == smps_mode)
 				goto handled;
 			rx->sta->sta.smps_mode = smps_mode;
-			sta_opmode.smps_mode = smps_mode;
+			sta_opmode.smps_mode =
+				ieee80211_smps_mode_to_smps_mode(smps_mode);
 			sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED;
 
 			sband = rx->local->hw.wiphy->bands[status->band];

From 97f5f4254a61d7a104b5e609a6a0d9a10c73d29e Mon Sep 17 00:00:00 2001
From: "tamizhr@codeaurora.org" <tamizhr@codeaurora.org>
Date: Tue, 27 Mar 2018 19:16:17 +0530
Subject: [PATCH 1409/1678] mac80211: Use proper chan_width enum in sta opmode
 event

Bandwidth change value reported via nl80211 contains mac80211
specific enum value(ieee80211_sta_rx_bw) and which is not
understand by userspace application. Map the mac80211 specific
value to nl80211_chan_width enum value to avoid using wrong value
in the userspace application. And used station's ht/vht capability
to map IEEE80211_STA_RX_BW_20 and IEEE80211_STA_RX_BW_160 with
proper nl80211 value.

Signed-off-by: Tamizh chelvam <tamizhr@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/rx.c          |  3 ++-
 net/mac80211/vht.c         | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9237ffb4e986..6c341d89448c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1816,6 +1816,8 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_sta_vht_cap *vht_cap);
 void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
 				     u16 vht_mask[NL80211_VHT_NSS_MAX]);
+enum nl80211_chan_width
+ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f8c69acfda48..3a9f0c0a2de8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2922,7 +2922,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
 			rx->sta->sta.bandwidth = new_bw;
 			sband = rx->local->hw.wiphy->bands[status->band];
-			sta_opmode.bw = new_bw;
+			sta_opmode.bw =
+				ieee80211_sta_rx_bw_to_chan_width(rx->sta);
 			sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
 
 			rate_control_rate_update(local, sband, rx->sta,
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 5714dee76b12..259325cbcc31 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -358,6 +358,36 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
 	return NL80211_CHAN_WIDTH_80;
 }
 
+enum nl80211_chan_width
+ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta)
+{
+	enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth;
+	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+	u32 cap_width;
+
+	switch (cur_bw) {
+	case IEEE80211_STA_RX_BW_20:
+		if (!sta->sta.ht_cap.ht_supported)
+			return NL80211_CHAN_WIDTH_20_NOHT;
+		else
+			return NL80211_CHAN_WIDTH_20;
+	case IEEE80211_STA_RX_BW_40:
+		return NL80211_CHAN_WIDTH_40;
+	case IEEE80211_STA_RX_BW_80:
+		return NL80211_CHAN_WIDTH_80;
+	case IEEE80211_STA_RX_BW_160:
+		cap_width =
+			vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+
+		if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)
+			return NL80211_CHAN_WIDTH_160;
+
+		return NL80211_CHAN_WIDTH_80P80;
+	default:
+		return NL80211_CHAN_WIDTH_20;
+	}
+}
+
 enum ieee80211_sta_rx_bandwidth
 ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width)
 {
@@ -484,7 +514,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 	new_bw = ieee80211_sta_cur_vht_bw(sta);
 	if (new_bw != sta->sta.bandwidth) {
 		sta->sta.bandwidth = new_bw;
-		sta_opmode.bw = new_bw;
+		sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta);
 		changed |= IEEE80211_RC_BW_CHANGED;
 		sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
 	}

From 850964519a65a96aa8231ff31e28f30c2633484d Mon Sep 17 00:00:00 2001
From: Dmitry Lebed <dlebed@quantenna.com>
Date: Mon, 26 Mar 2018 16:36:31 +0300
Subject: [PATCH 1410/1678] cfg80211: fix CAC_STARTED event handling

Exclude CAC_STARTED event from !wdev->cac_started check,
since cac_started will be set later in the same function.

Signed-off-by: Dmitry Lebed <dlebed@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 6b6818dd76bd..12b3edf70a7b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -872,7 +872,7 @@ void cfg80211_cac_event(struct net_device *netdev,
 
 	trace_cfg80211_cac_event(netdev, event);
 
-	if (WARN_ON(!wdev->cac_started))
+	if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED))
 		return;
 
 	if (WARN_ON(!wdev->chandef.chan))

From 2c390e44e46675da0e7bba5c3b921a091a60ec2c Mon Sep 17 00:00:00 2001
From: Dmitry Lebed <dlebed@quantenna.com>
Date: Mon, 26 Mar 2018 16:36:32 +0300
Subject: [PATCH 1411/1678] cfg80211: enable use of non-cleared DFS channels
 for DFS offload

Currently channel switch/start_ap to DFS channel cannot be done to
non-CAC-cleared channel even if DFS offload if enabled.
Make non-cleared DFS channels available if DFS offload is enabled.
CAC will be started by HW after channel change, start_ap call, etc.

Signed-off-by: Dmitry Lebed <dlebed@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/chan.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index a48859982a32..2db713d18f71 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -579,6 +579,10 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
 {
 	struct ieee80211_channel *c;
 	u32 freq, start_freq, end_freq;
+	bool dfs_offload;
+
+	dfs_offload = wiphy_ext_feature_isset(wiphy,
+					      NL80211_EXT_FEATURE_DFS_OFFLOAD);
 
 	start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
 	end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
@@ -596,8 +600,9 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
 		if (c->flags & IEEE80211_CHAN_DISABLED)
 			return false;
 
-		if ((c->flags & IEEE80211_CHAN_RADAR)  &&
-		    (c->dfs_state != NL80211_DFS_AVAILABLE))
+		if ((c->flags & IEEE80211_CHAN_RADAR) &&
+		    (c->dfs_state != NL80211_DFS_AVAILABLE) &&
+		    !(c->dfs_state == NL80211_DFS_USABLE && dfs_offload))
 			return false;
 	}
 

From 37b1c004685a3cea420dd96aa3803da627359f60 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:44 -0500
Subject: [PATCH 1412/1678] cfg80211: Support all iftypes in autodisconnect_wk

Currently autodisconnect_wk assumes that only interface types of
P2P_CLIENT and STATION use conn_owner_nlportid.  Change this so all
interface types are supported.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sme.c | 43 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 701cfd7acc1b..5df6b33db786 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1239,17 +1239,38 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
 	wdev_lock(wdev);
 
 	if (wdev->conn_owner_nlportid) {
-		/*
-		 * Use disconnect_bssid if still connecting and ops->disconnect
-		 * not implemented.  Otherwise we can use cfg80211_disconnect.
-		 */
-		if (rdev->ops->disconnect || wdev->current_bss)
-			cfg80211_disconnect(rdev, wdev->netdev,
-					    WLAN_REASON_DEAUTH_LEAVING, true);
-		else
-			cfg80211_mlme_deauth(rdev, wdev->netdev,
-					     wdev->disconnect_bssid, NULL, 0,
-					     WLAN_REASON_DEAUTH_LEAVING, false);
+		switch (wdev->iftype) {
+		case NL80211_IFTYPE_ADHOC:
+			cfg80211_leave_ibss(rdev, wdev->netdev, false);
+			break;
+		case NL80211_IFTYPE_AP:
+		case NL80211_IFTYPE_P2P_GO:
+			cfg80211_stop_ap(rdev, wdev->netdev, false);
+			break;
+		case NL80211_IFTYPE_MESH_POINT:
+			cfg80211_leave_mesh(rdev, wdev->netdev);
+			break;
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_P2P_CLIENT:
+			/*
+			 * Use disconnect_bssid if still connecting and
+			 * ops->disconnect not implemented.  Otherwise we can
+			 * use cfg80211_disconnect.
+			 */
+			if (rdev->ops->disconnect || wdev->current_bss)
+				cfg80211_disconnect(rdev, wdev->netdev,
+						    WLAN_REASON_DEAUTH_LEAVING,
+						    true);
+			else
+				cfg80211_mlme_deauth(rdev, wdev->netdev,
+						     wdev->disconnect_bssid,
+						     NULL, 0,
+						     WLAN_REASON_DEAUTH_LEAVING,
+						     false);
+			break;
+		default:
+			break;
+		}
 	}
 
 	wdev_unlock(wdev);

From f8d16d3edb4dbae080df04318423c360de3c594d Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:45 -0500
Subject: [PATCH 1413/1678] nl80211: Add SOCKET_OWNER support to JOIN_IBSS

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[johannes: fix race with wdev lock/unlock by just acquiring once]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/wireless/core.h          |  8 ++++----
 net/wireless/ibss.c          | 27 ++++++---------------------
 net/wireless/nl80211.c       |  7 ++++++-
 4 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 60fefc5a2ea4..266b43c4f6e1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1962,6 +1962,8 @@ enum nl80211_commands {
  *	multicast group.
  *	If set during %NL80211_CMD_ASSOCIATE or %NL80211_CMD_CONNECT the
  *	station will deauthenticate when the socket is closed.
+ *	If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically
+ *	torn down when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
diff --git a/net/wireless/core.h b/net/wireless/core.h
index eaff636169c2..b5cf3ea8d4df 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -282,10 +282,10 @@ void cfg80211_bss_age(struct cfg80211_registered_device *rdev,
                       unsigned long age_secs);
 
 /* IBSS */
-int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
-		       struct net_device *dev,
-		       struct cfg80211_ibss_params *params,
-		       struct cfg80211_cached_keys *connkeys);
+int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct cfg80211_ibss_params *params,
+			 struct cfg80211_cached_keys *connkeys);
 void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
 int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev, bool nowext);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a1d10993d08a..d1743e6abc34 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -84,14 +84,15 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
 }
 EXPORT_SYMBOL(cfg80211_ibss_joined);
 
-static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
-				struct net_device *dev,
-				struct cfg80211_ibss_params *params,
-				struct cfg80211_cached_keys *connkeys)
+int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct cfg80211_ibss_params *params,
+			 struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
+	ASSERT_RTNL();
 	ASSERT_WDEV_LOCK(wdev);
 
 	if (wdev->ssid_len)
@@ -146,23 +147,6 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
-int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
-		       struct net_device *dev,
-		       struct cfg80211_ibss_params *params,
-		       struct cfg80211_cached_keys *connkeys)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	int err;
-
-	ASSERT_RTNL();
-
-	wdev_lock(wdev);
-	err = __cfg80211_join_ibss(rdev, dev, params, connkeys);
-	wdev_unlock(wdev);
-
-	return err;
-}
-
 static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -224,6 +208,7 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 	if (err)
 		return err;
 
+	wdev->conn_owner_nlportid = 0;
 	__cfg80211_clear_ibss(dev, nowext);
 
 	return 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fe27ab443d01..13f7c002f562 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8679,9 +8679,14 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	ibss.userspace_handles_dfs =
 		nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
 
-	err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
+	wdev_lock(dev->ieee80211_ptr);
+	err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
 	if (err)
 		kzfree(connkeys);
+	else if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+		dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid;
+	wdev_unlock(dev->ieee80211_ptr);
+
 	return err;
 }
 

From 188c1b3c04d69e842122daf201f07a34fcfad039 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:46 -0500
Subject: [PATCH 1414/1678] nl80211: Add SOCKET_OWNER support to JOIN_MESH

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[johannes: fix race with wdev lock/unlock by just acquiring once]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/wireless/core.h          |  4 ----
 net/wireless/mesh.c          | 16 +---------------
 net/wireless/nl80211.c       | 10 ++++++++--
 4 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 266b43c4f6e1..98eb37def37d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1964,6 +1964,8 @@ enum nl80211_commands {
  *	station will deauthenticate when the socket is closed.
  *	If set during %NL80211_CMD_JOIN_IBSS the IBSS will be automatically
  *	torn down when the socket is closed.
+ *	If set during %NL80211_CMD_JOIN_MESH the mesh setup will be
+ *	automatically torn down when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b5cf3ea8d4df..63eb1b5fdd04 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -303,10 +303,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
 			 struct mesh_setup *setup,
 			 const struct mesh_config *conf);
-int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
-		       struct net_device *dev,
-		       struct mesh_setup *setup,
-		       const struct mesh_config *conf);
 int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev);
 int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index b12da6ef3c12..eac5aa1419fc 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -217,21 +217,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
-int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
-		       struct net_device *dev,
-		       struct mesh_setup *setup,
-		       const struct mesh_config *conf)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	int err;
-
-	wdev_lock(wdev);
-	err = __cfg80211_join_mesh(rdev, dev, setup, conf);
-	wdev_unlock(wdev);
-
-	return err;
-}
-
 int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev,
 			      struct cfg80211_chan_def *chandef)
@@ -286,6 +271,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 
 	err = rdev_leave_mesh(rdev, dev);
 	if (!err) {
+		wdev->conn_owner_nlportid = 0;
 		wdev->mesh_id_len = 0;
 		wdev->beacon_interval = 0;
 		memset(&wdev->chandef, 0, sizeof(wdev->chandef));
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 13f7c002f562..1d6e81e5b2c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10092,7 +10092,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 		if (err)
 			return err;
 	} else {
-		/* cfg80211_join_mesh() will sort it out */
+		/* __cfg80211_join_mesh() will sort it out */
 		setup.chandef.chan = NULL;
 	}
 
@@ -10130,7 +10130,13 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	setup.userspace_handles_dfs =
 		nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
 
-	return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
+	wdev_lock(dev->ieee80211_ptr);
+	err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg);
+	if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER])
+		dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid;
+	wdev_unlock(dev->ieee80211_ptr);
+
+	return err;
 }
 
 static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)

From 466a306142c002b40deaa58da94741af4153d1c4 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:47 -0500
Subject: [PATCH 1415/1678] nl80211: Add SOCKET_OWNER support to START_AP

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 net/wireless/ap.c            | 1 +
 net/wireless/nl80211.c       | 3 +++
 3 files changed, 6 insertions(+)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 98eb37def37d..9ea3d6039eca 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1966,6 +1966,8 @@ enum nl80211_commands {
  *	torn down when the socket is closed.
  *	If set during %NL80211_CMD_JOIN_MESH the mesh setup will be
  *	automatically torn down when the socket is closed.
+ *	If set during %NL80211_CMD_START_AP the AP will be automatically
+ *	disabled when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 63682176c96c..882d97bdc6bf 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -27,6 +27,7 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
 
 	err = rdev_stop_ap(rdev, dev);
 	if (!err) {
+		wdev->conn_owner_nlportid = 0;
 		wdev->beacon_interval = 0;
 		memset(&wdev->chandef, 0, sizeof(wdev->chandef));
 		wdev->ssid_len = 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1d6e81e5b2c8..cfaf2aeb9783 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4134,6 +4134,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		wdev->chandef = params.chandef;
 		wdev->ssid_len = params.ssid_len;
 		memcpy(wdev->ssid, params.ssid, wdev->ssid_len);
+
+		if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+			wdev->conn_owner_nlportid = info->snd_portid;
 	}
 	wdev_unlock(wdev);
 

From 4415d58c47e5a48dfd202c63fcfd3d6e653e543b Mon Sep 17 00:00:00 2001
From: Timothy Redaelli <tredaelli@redhat.com>
Date: Tue, 27 Mar 2018 11:25:25 +0300
Subject: [PATCH 1416/1678] ath9k: fix DFS detector synchronization

some userspace programs (e.g. hostapd) need to set the regulatory domain
before selecting the operating channel. Synchronize DFS detector regardless of
the value of ah->curchan, to avoid situations where wireless scan can't be done
on some 5GHz sub-bands, because dfs_region is constantly UNSET.

Acked-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Timothy Redaelli <tredaelli@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/init.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index e479fae5aab9..b71b16715f6a 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -257,6 +257,11 @@ static void ath9k_reg_notifier(struct wiphy *wiphy,
 
 	ath_reg_notifier_apply(wiphy, request, reg);
 
+	/* synchronize DFS detector if regulatory domain changed */
+	if (sc->dfs_detector != NULL)
+		sc->dfs_detector->set_dfs_domain(sc->dfs_detector,
+						 request->dfs_region);
+
 	/* Set tx power */
 	if (!ah->curchan)
 		return;
@@ -267,10 +272,6 @@ static void ath9k_reg_notifier(struct wiphy *wiphy,
 	ath9k_cmn_update_txpow(ah, sc->cur_chan->cur_txpower,
 			       sc->cur_chan->txpower,
 			       &sc->cur_chan->cur_txpower);
-	/* synchronize DFS detector if regulatory domain changed */
-	if (sc->dfs_detector != NULL)
-		sc->dfs_detector->set_dfs_domain(sc->dfs_detector,
-						 request->dfs_region);
 	ath9k_ps_restore(sc);
 }
 

From 55cc11da69895a680940c1733caabc37be685f5e Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Tue, 27 Mar 2018 11:25:29 +0300
Subject: [PATCH 1417/1678] Revert "ath10k: send (re)assoc peer command when
 NSS changed"

This reverts commit 55884c045d31a29cf69db8332d1064a1b61dd159.

When Ath10k is in AP mode and an unassociated STA sends a VHT action frame
(Operating Mode Notification for the NSS change) periodically to AP this causes
ath10k to call ath10k_station_assoc() which sends WMI_PEER_ASSOC_CMDID during
NSS update. Over the time (with a certain client it can happen within 15 mins
when there are over 500 of these VHT action frames) continuous calls of
WMI_PEER_ASSOC_CMDID cause firmware to assert due to resource exhaust.

To my knowledge setting WMI_PEER_NSS peer param itself enough to handle NSS
updates and no need to call ath10k_station_assoc(). So revert the original
commit from 2014 as it's unclear why the change was really needed.
Now the firmware assert doesn't happen anymore.

Issue observed in QCA9984 platform with firmware version:10.4-3.5.3-00053.
This Change tested in QCA9984 with firmware version: 10.4-3.5.3-00053 and
QCA988x platform with firmware version: 10.2.4-1.0-00036.

Firmware Assert log:

ath10k_pci 0002:01:00.0: firmware crashed! (guid e61f1274-9acd-4c5b-bcca-e032ea6e723c)
ath10k_pci 0002:01:00.0: qca9984/qca9994 hw1.0 target 0x01000000 chip_id 0x00000000 sub 168c:cafe
ath10k_pci 0002:01:00.0: kconfig debug 1 debugfs 1 tracing 0 dfs 1 testmode 1
ath10k_pci 0002:01:00.0: firmware ver 10.4-3.5.3-00053 api 5 features no-p2p,mfp,peer-flow-ctrl,btcoex-param,allows-mesh-bcast crc32 4c56a386
ath10k_pci 0002:01:00.0: board_file api 2 bmi_id 0:4 crc32 c2271344
ath10k_pci 0002:01:00.0: htt-ver 2.2 wmi-op 6 htt-op 4 cal otp max-sta 512 raw 0 hwcrypto 1
ath10k_pci 0002:01:00.0: firmware register dump:
ath10k_pci 0002:01:00.0: [00]: 0x0000000A 0x000015B3 0x00981E5F 0x00975B31
ath10k_pci 0002:01:00.0: [04]: 0x00981E5F 0x00060530 0x00000011 0x00446C60
ath10k_pci 0002:01:00.0: [08]: 0x0042F1FC 0x00458080 0x00000017 0x00000000
ath10k_pci 0002:01:00.0: [12]: 0x00000009 0x00000000 0x00973ABC 0x00973AD2
ath10k_pci 0002:01:00.0: [16]: 0x00973AB0 0x00960E62 0x009606CA 0x00000000
ath10k_pci 0002:01:00.0: [20]: 0x40981E5F 0x004066DC 0x00400000 0x00981E34
ath10k_pci 0002:01:00.0: [24]: 0x80983B48 0x0040673C 0x000000C0 0xC0981E5F
ath10k_pci 0002:01:00.0: [28]: 0x80993DEB 0x0040676C 0x00431AB8 0x0045D0C4
ath10k_pci 0002:01:00.0: [32]: 0x80993E5C 0x004067AC 0x004303C0 0x0045D0C4
ath10k_pci 0002:01:00.0: [36]: 0x80994AAB 0x004067DC 0x00000000 0x0045D0C4
ath10k_pci 0002:01:00.0: [40]: 0x809971A0 0x0040681C 0x004303C0 0x00441B00
ath10k_pci 0002:01:00.0: [44]: 0x80991904 0x0040688C 0x004303C0 0x0045D0C4
ath10k_pci 0002:01:00.0: [48]: 0x80963AD3 0x00406A7C 0x004303C0 0x009918FC
ath10k_pci 0002:01:00.0: [52]: 0x80960E80 0x00406A9C 0x0000001F 0x00400000
ath10k_pci 0002:01:00.0: [56]: 0x80960E51 0x00406ACC 0x00400000 0x00000000
ath10k_pci 0002:01:00.0: Copy Engine register dump:
ath10k_pci 0002:01:00.0: index: addr: sr_wr_idx: sr_r_idx: dst_wr_idx: dst_r_idx:
ath10k_pci 0002:01:00.0: [00]: 0x0004a000 15 15 3 3
ath10k_pci 0002:01:00.0: [01]: 0x0004a400 17 17 212 213
ath10k_pci 0002:01:00.0: [02]: 0x0004a800 21 21 20 21
ath10k_pci 0002:01:00.0: [03]: 0x0004ac00 25 25 27 25
ath10k_pci 0002:01:00.0: [04]: 0x0004b000 515 515 144 104
ath10k_pci 0002:01:00.0: [05]: 0x0004b400 28 28 155 156
ath10k_pci 0002:01:00.0: [06]: 0x0004b800 12 12 12 12
ath10k_pci 0002:01:00.0: [07]: 0x0004bc00 1 1 1 1
ath10k_pci 0002:01:00.0: [08]: 0x0004c000 0 0 127 0
ath10k_pci 0002:01:00.0: [09]: 0x0004c400 1 1 1 1
ath10k_pci 0002:01:00.0: [10]: 0x0004c800 0 0 0 0
ath10k_pci 0002:01:00.0: [11]: 0x0004cc00 0 0 0 0
ath10k_pci 0002:01:00.0: CE[1] write_index 212 sw_index 213 hw_index 0 nentries_mask 0x000001ff
ath10k_pci 0002:01:00.0: CE[2] write_index 20 sw_index 21 hw_index 0 nentries_mask 0x0000007f
ath10k_pci 0002:01:00.0: CE[5] write_index 155 sw_index 156 hw_index 0 nentries_mask 0x000001ff
ath10k_pci 0002:01:00.0: DMA addr: nbytes: meta data: byte swap: gather:
ath10k_pci 0002:01:00.0: [455]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [456]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [457]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [458]: 0x594a0038 0 0 0 1
ath10k_pci 0002:01:00.0: [459]: 0x580c0a42 0 0 0 0
ath10k_pci 0002:01:00.0: [460]: 0x594a0060 0 0 0 1
ath10k_pci 0002:01:00.0: [461]: 0x580c0c42 0 0 0 0
ath10k_pci 0002:01:00.0: [462]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [463]: 0x580c0c42 0 0 0 0
ath10k_pci 0002:01:00.0: [464]: 0x594a0038 0 0 0 1
ath10k_pci 0002:01:00.0: [465]: 0x580c0a42 0 0 0 0
ath10k_pci 0002:01:00.0: [466]: 0x594a0060 0 0 0 1
ath10k_pci 0002:01:00.0: [467]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [468]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [469]: 0x580c1c42 0 0 0 0
ath10k_pci 0002:01:00.0: [470]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [471]: 0x580c1c42 0 0 0 0
ath10k_pci 0002:01:00.0: [472]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [473]: 0x580c1c42 0 0 0 0
ath10k_pci 0002:01:00.0: [474]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [475]: 0x580c0642 0 0 0 0
ath10k_pci 0002:01:00.0: [476]: 0x594a0038 0 0 0 1
ath10k_pci 0002:01:00.0: [477]: 0x580c0842 0 0 0 0
ath10k_pci 0002:01:00.0: [478]: 0x594a0060 0 0 0 1
ath10k_pci 0002:01:00.0: [479]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [480]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [481]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [482]: 0x594a0038 0 0 0 1
ath10k_pci 0002:01:00.0: [483]: 0x580c0842 0 0 0 0
ath10k_pci 0002:01:00.0: [484]: 0x594a0060 0 0 0 1
ath10k_pci 0002:01:00.0: [485]: 0x580c0642 0 0 0 0
ath10k_pci 0002:01:00.0: [486]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [487]: 0x580c0642 0 0 0 0
ath10k_pci 0002:01:00.0: [488]: 0x594a0038 0 0 0 1
ath10k_pci 0002:01:00.0: [489]: 0x580c0842 0 0 0 0
ath10k_pci 0002:01:00.0: [490]: 0x594a0060 0 0 0 1
ath10k_pci 0002:01:00.0: [491]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [492]: 0x58174040 0 1 0 0
ath10k_pci 0002:01:00.0: [493]: 0x5a946040 0 1 0 0
ath10k_pci 0002:01:00.0: [494]: 0x59909040 0 1 0 0
ath10k_pci 0002:01:00.0: [495]: 0x5ae5a040 0 1 0 0
ath10k_pci 0002:01:00.0: [496]: 0x58096040 0 1 0 0
ath10k_pci 0002:01:00.0: [497]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [498]: 0x580c0642 0 0 0 0
ath10k_pci 0002:01:00.0: [499]: 0x5c1e0040 0 1 0 0
ath10k_pci 0002:01:00.0: [500]: 0x58153040 0 1 0 0
ath10k_pci 0002:01:00.0: [501]: 0x58129040 0 1 0 0
ath10k_pci 0002:01:00.0: [502]: 0x5952f040 0 1 0 0
ath10k_pci 0002:01:00.0: [503]: 0x59535040 0 1 0 0
ath10k_pci 0002:01:00.0: [504]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [505]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [506]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [507]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [508]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [509]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [510]: 0x594a0010 0 0 0 1
ath10k_pci 0002:01:00.0: [511]: 0x580c0042 0 0 0 0
ath10k_pci 0002:01:00.0: [512]: 0x5adcc040 0 1 0 0
ath10k_pci 0002:01:00.0: [513]: 0x5cf3d040 0 1 0 0
ath10k_pci 0002:01:00.0: [514]: 0x5c1e9040 64 1 0 0
ath10k_pci 0002:01:00.0: [515]: 0x00000000 0 0 0 0

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 02674ee04435..1cc87cb15620 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -6046,9 +6046,8 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk)
 				    sta->addr, smps, err);
 	}
 
-	if (changed & IEEE80211_RC_SUPP_RATES_CHANGED ||
-	    changed & IEEE80211_RC_NSS_CHANGED) {
-		ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM supp rates/nss\n",
+	if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) {
+		ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM supp rates\n",
 			   sta->addr);
 
 		err = ath10k_station_assoc(ar, arvif->vif, sta, true);

From e98199a8c27f6ac8900307bc611ca4b15467f324 Mon Sep 17 00:00:00 2001
From: Ryan Hsu <ryanhsu@codeaurora.org>
Date: Tue, 27 Mar 2018 11:25:36 +0300
Subject: [PATCH 1418/1678] ath10k: enable QCA6174/QCA9377 to read the chip
 temperature

The firmware of QCA6174/QCA9377 already support the feature, just enable
it to be able to handle the get_temperature command and process the event.

You can read the temperature by using the hwmon interface,

cat /sys/class/ieee80211/phy*/device/hwmon/hwmon2/temp1_input

Verified with the following hardware and software combination,
QCA6174, only firmware-4.bin doesn't support this, otherwise all support.
QCA9377, all the firmwares upstreamed support this command

Signed-off-by: Ryan Hsu <ryanhsu@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 39 +++++++++++++++++++++--
 drivers/net/wireless/ath/ath10k/wmi-tlv.h | 11 +++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 523af3f8bb62..57bd8a70d242 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -413,6 +413,19 @@ static int ath10k_wmi_tlv_event_tx_pause(struct ath10k *ar,
 	return 0;
 }
 
+static int ath10k_wmi_tlv_event_temperature(struct ath10k *ar,
+					    struct sk_buff *skb)
+{
+	const struct wmi_tlv_pdev_temperature_event *ev;
+
+	ev = (struct wmi_tlv_pdev_temperature_event *)skb->data;
+	if (WARN_ON(skb->len < sizeof(*ev)))
+		return -EPROTO;
+
+	ath10k_thermal_event_temperature(ar, __le32_to_cpu(ev->temperature));
+	return 0;
+}
+
 /***********/
 /* TLV ops */
 /***********/
@@ -553,6 +566,9 @@ static void ath10k_wmi_tlv_op_rx(struct ath10k *ar, struct sk_buff *skb)
 	case WMI_TLV_TX_PAUSE_EVENTID:
 		ath10k_wmi_tlv_event_tx_pause(ar, skb);
 		break;
+	case WMI_TLV_PDEV_TEMPERATURE_EVENTID:
+		ath10k_wmi_tlv_event_temperature(ar, skb);
+		break;
 	default:
 		ath10k_warn(ar, "Unknown eventid: %d\n", id);
 		break;
@@ -2657,6 +2673,25 @@ ath10k_wmi_tlv_op_gen_pktlog_enable(struct ath10k *ar, u32 filter)
 	return skb;
 }
 
+static struct sk_buff *
+ath10k_wmi_tlv_op_gen_pdev_get_temperature(struct ath10k *ar)
+{
+	struct wmi_tlv_pdev_get_temp_cmd *cmd;
+	struct wmi_tlv *tlv;
+	struct sk_buff *skb;
+
+	skb = ath10k_wmi_alloc_skb(ar, sizeof(*tlv) + sizeof(*cmd));
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	tlv = (void *)skb->data;
+	tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_PDEV_GET_TEMPERATURE_CMD);
+	tlv->len = __cpu_to_le16(sizeof(*cmd));
+	cmd = (void *)tlv->value;
+	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi pdev get temperature tlv\n");
+	return skb;
+}
+
 static struct sk_buff *
 ath10k_wmi_tlv_op_gen_pktlog_disable(struct ath10k *ar)
 {
@@ -3439,7 +3474,7 @@ static struct wmi_cmd_map wmi_tlv_cmd_map = {
 	.force_fw_hang_cmdid = WMI_TLV_FORCE_FW_HANG_CMDID,
 	.gpio_config_cmdid = WMI_TLV_GPIO_CONFIG_CMDID,
 	.gpio_output_cmdid = WMI_TLV_GPIO_OUTPUT_CMDID,
-	.pdev_get_temperature_cmdid = WMI_TLV_CMD_UNSUPPORTED,
+	.pdev_get_temperature_cmdid = WMI_TLV_PDEV_GET_TEMPERATURE_CMDID,
 	.vdev_set_wmm_params_cmdid = WMI_TLV_VDEV_SET_WMM_PARAMS_CMDID,
 	.tdls_set_state_cmdid = WMI_TLV_TDLS_SET_STATE_CMDID,
 	.tdls_peer_update_cmdid = WMI_TLV_TDLS_PEER_UPDATE_CMDID,
@@ -3702,7 +3737,7 @@ static const struct wmi_ops wmi_tlv_ops = {
 	.gen_pktlog_enable = ath10k_wmi_tlv_op_gen_pktlog_enable,
 	.gen_pktlog_disable = ath10k_wmi_tlv_op_gen_pktlog_disable,
 	/* .gen_pdev_set_quiet_mode not implemented */
-	/* .gen_pdev_get_temperature not implemented */
+	.gen_pdev_get_temperature = ath10k_wmi_tlv_op_gen_pdev_get_temperature,
 	/* .gen_addba_clear_resp not implemented */
 	/* .gen_addba_send not implemented */
 	/* .gen_addba_set_resp not implemented */
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index da89128e8dd6..b07b690544e7 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -1340,6 +1340,17 @@ struct wmi_tlv_init_cmd {
 	__le32 num_host_mem_chunks;
 } __packed;
 
+struct wmi_tlv_pdev_get_temp_cmd {
+	__le32 pdev_id; /* not used */
+} __packed;
+
+struct wmi_tlv_pdev_temperature_event {
+	__le32 tlv_hdr;
+	/* temperature value in Celcius degree */
+	__le32 temperature;
+	__le32 pdev_id;
+} __packed;
+
 struct wmi_tlv_pdev_set_param_cmd {
 	__le32 pdev_id; /* not used yet */
 	__le32 param_id;

From e3814bec380f6f848ab619abcdfff320a839cabc Mon Sep 17 00:00:00 2001
From: Ryan Hsu <ryanhsu@codeaurora.org>
Date: Tue, 27 Mar 2018 11:25:38 +0300
Subject: [PATCH 1419/1678] ath10k: add FW API 6 firmware image for QCA9377

Firmware WLAN.TF.2.1-00014-QCARMSWP-1 now supports reading the board ID
information and also required 9 IRAM bank, which older ath10k version
don't have the support will fail to be enabled, so in order to maintain
the backward compatibility, we need to update the FW API to 6.

Signed-off-by: Ryan Hsu <ryanhsu@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 808f3d67ba90..561c722979ec 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -3718,5 +3718,6 @@ MODULE_FIRMWARE(QCA6174_HW_3_0_FW_DIR "/" QCA6174_HW_3_0_BOARD_DATA_FILE);
 MODULE_FIRMWARE(QCA6174_HW_3_0_FW_DIR "/" ATH10K_BOARD_API2_FILE);
 
 /* QCA9377 1.0 firmware files */
+MODULE_FIRMWARE(QCA9377_HW_1_0_FW_DIR "/" ATH10K_FW_API6_FILE);
 MODULE_FIRMWARE(QCA9377_HW_1_0_FW_DIR "/" ATH10K_FW_API5_FILE);
 MODULE_FIRMWARE(QCA9377_HW_1_0_FW_DIR "/" QCA9377_HW_1_0_BOARD_DATA_FILE);

From 606204bb863fa3b0bb54929d79b4dc46338f9180 Mon Sep 17 00:00:00 2001
From: Sathishkumar Muruganandam <murugana@codeaurora.org>
Date: Tue, 27 Mar 2018 11:26:46 +0300
Subject: [PATCH 1420/1678] ath10k: suppress "Unknown eventid: 36925" warnings

FW has Smart Logging feature enabled by default for detecting failures
and processing FATAL_CONDITION_EVENTID (36925 - 0x903D) back to host.

Since ath10k doesn't implement the Smart Logging and FATAL CONDITION
EVENT processing yet, suppressing the unknown event ID warning by moving
this under ATH10K_DBG_WMI.

Simulated the same issue by having associated STA powered off when
ping flood was running from AP backbone. This triggerd STA KICKOUT
in AP followed by FATAL CONDITION event 36925.

Issue was reproduced and verified in below DUT
------------------------------------------------
AP mode of OpenWRT QCA9984 running 6.0.8 with FW ver 10.4-3.5.3-00053

Signed-off-by: Sathishkumar Muruganandam <murugana@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 9649bb752bbd..5947d33d6810 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -5786,6 +5786,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb)
 	case WMI_10_4_WOW_WAKEUP_HOST_EVENTID:
 	case WMI_10_4_PEER_RATECODE_LIST_EVENTID:
 	case WMI_10_4_WDS_PEER_EVENTID:
+	case WMI_10_4_DEBUG_FATAL_CONDITION_EVENTID:
 		ath10k_dbg(ar, ATH10K_DBG_WMI,
 			   "received event id %d not implemented\n", id);
 		break;

From 10c2288430817981cab70fc4b78e405765be93b1 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Tue, 27 Mar 2018 11:26:50 +0300
Subject: [PATCH 1421/1678] ath10k: refactor ath10k_pci_dump_memory() in
 preparation for QCA9984 support

As QCA9984 needs two region types refactor the code to make it easier add the
new types. No functional changes.

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/pci.c | 55 ++++++++++++++++++---------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 561c722979ec..ad2a14678be7 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1584,6 +1584,36 @@ static int ath10k_pci_set_ram_config(struct ath10k *ar, u32 config)
 	return 0;
 }
 
+/* if an error happened returns < 0, otherwise the length */
+static int ath10k_pci_dump_memory_generic(struct ath10k *ar,
+					  const struct ath10k_mem_region *current_region,
+					  u8 *buf)
+{
+	int ret;
+
+	if (current_region->section_table.size > 0)
+		/* Copy each section individually. */
+		return ath10k_pci_dump_memory_section(ar,
+						      current_region,
+						      buf,
+						      current_region->len);
+
+	/* No individiual memory sections defined so we can
+	 * copy the entire memory region.
+	 */
+	ret = ath10k_pci_diag_read_mem(ar,
+				       current_region->start,
+				       buf,
+				       current_region->len);
+	if (ret) {
+		ath10k_warn(ar, "failed to copy ramdump region %s: %d\n",
+			    current_region->name, ret);
+		return ret;
+	}
+
+	return current_region->len;
+}
+
 static void ath10k_pci_dump_memory(struct ath10k *ar,
 				   struct ath10k_fw_crash_data *crash_data)
 {
@@ -1642,27 +1672,14 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
 		buf += sizeof(*hdr);
 		buf_len -= sizeof(*hdr);
 
-		if (current_region->section_table.size > 0) {
-			/* Copy each section individually. */
-			count = ath10k_pci_dump_memory_section(ar,
-							       current_region,
-							       buf,
-							       current_region->len);
-		} else {
-			/* No individiual memory sections defined so we can
-			 * copy the entire memory region.
-			 */
-			ret = ath10k_pci_diag_read_mem(ar,
-						       current_region->start,
-						       buf,
-						       current_region->len);
-			if (ret) {
-				ath10k_warn(ar, "failed to copy ramdump region %s: %d\n",
-					    current_region->name, ret);
+		switch (current_region->type) {
+		default:
+			ret = ath10k_pci_dump_memory_generic(ar, current_region, buf);
+			if (ret < 0)
 				break;
-			}
 
-			count = current_region->len;
+			count = ret;
+			break;
 		}
 
 		hdr->region_type = cpu_to_le32(current_region->type);

From 219cc084c67061ee7eafc47e68874d451fa2fff8 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 27 Mar 2018 11:26:52 +0300
Subject: [PATCH 1422/1678] ath10k: add memory dump support QCA9984

QCA9984/QCA99X0/QCA4019 chipsets have 8 memory regions, dump all of them to the
firmware coredump file. Some of the regions need to be read using ioread() so
add new region types for them.

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
[kvalo: refactoring etc]
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/coredump.c | 90 ++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/coredump.h |  2 +
 drivers/net/wireless/ath/ath10k/pci.c      | 43 +++++++++++
 3 files changed, 135 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/coredump.c b/drivers/net/wireless/ath/ath10k/coredump.c
index 7173b3743b43..f90cec0ebb1c 100644
--- a/drivers/net/wireless/ath/ath10k/coredump.c
+++ b/drivers/net/wireless/ath/ath10k/coredump.c
@@ -701,6 +701,89 @@ static const struct ath10k_mem_region qca988x_hw20_mem_regions[] = {
 	},
 };
 
+static const struct ath10k_mem_region qca9984_hw10_mem_regions[] = {
+	{
+		.type = ATH10K_MEM_REGION_TYPE_DRAM,
+		.start = 0x400000,
+		.len = 0x80000,
+		.name = "DRAM",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_REG,
+		.start = 0x98000,
+		.len = 0x50000,
+		.name = "IRAM",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_IOSRAM,
+		.start = 0xC0000,
+		.len = 0x40000,
+		.name = "SRAM",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_IOREG,
+		.start = 0x30000,
+		.len = 0x7000,
+		.name = "APB REG 1",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_IOREG,
+		.start = 0x3f000,
+		.len = 0x3000,
+		.name = "APB REG 2",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_IOREG,
+		.start = 0x43000,
+		.len = 0x3000,
+		.name = "WIFI REG",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_IOREG,
+		.start = 0x4A000,
+		.len = 0x5000,
+		.name = "CE REG",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+	{
+		.type = ATH10K_MEM_REGION_TYPE_IOREG,
+		.start = 0x80000,
+		.len = 0x6000,
+		.name = "SOC REG",
+		.section_table = {
+			.sections = NULL,
+			.size = 0,
+		},
+	},
+};
+
 static const struct ath10k_hw_mem_layout hw_mem_layouts[] = {
 	{
 		.hw_id = QCA6174_HW_1_0_VERSION,
@@ -758,6 +841,13 @@ static const struct ath10k_hw_mem_layout hw_mem_layouts[] = {
 			.size = ARRAY_SIZE(qca988x_hw20_mem_regions),
 		},
 	},
+	{
+		.hw_id = QCA9984_HW_1_0_DEV_VERSION,
+		.region_table = {
+			.regions = qca9984_hw10_mem_regions,
+			.size = ARRAY_SIZE(qca9984_hw10_mem_regions),
+		},
+	},
 };
 
 static u32 ath10k_coredump_get_ramdump_size(struct ath10k *ar)
diff --git a/drivers/net/wireless/ath/ath10k/coredump.h b/drivers/net/wireless/ath/ath10k/coredump.h
index bfee13038e59..3baaf9d2cbcd 100644
--- a/drivers/net/wireless/ath/ath10k/coredump.h
+++ b/drivers/net/wireless/ath/ath10k/coredump.h
@@ -124,6 +124,8 @@ enum ath10k_mem_region_type {
 	ATH10K_MEM_REGION_TYPE_AXI	= 3,
 	ATH10K_MEM_REGION_TYPE_IRAM1	= 4,
 	ATH10K_MEM_REGION_TYPE_IRAM2	= 5,
+	ATH10K_MEM_REGION_TYPE_IOSRAM	= 6,
+	ATH10K_MEM_REGION_TYPE_IOREG	= 7,
 };
 
 /* Define a section of the region which should be copied. As not all parts
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index ad2a14678be7..fd1566cd7d2b 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -57,6 +57,10 @@ MODULE_PARM_DESC(reset_mode, "0: auto, 1: warm only (default: 0)");
  */
 #define ATH10K_DIAG_TRANSFER_LIMIT	0x5000
 
+#define QCA99X0_PCIE_BAR0_START_REG    0x81030
+#define QCA99X0_CPU_MEM_ADDR_REG       0x4d00c
+#define QCA99X0_CPU_MEM_DATA_REG       0x4d010
+
 static const struct pci_device_id ath10k_pci_id_table[] = {
 	/* PCI-E QCA988X V2 (Ubiquiti branded) */
 	{ PCI_VDEVICE(UBIQUITI, QCA988X_2_0_DEVICE_ID_UBNT) },
@@ -1584,6 +1588,39 @@ static int ath10k_pci_set_ram_config(struct ath10k *ar, u32 config)
 	return 0;
 }
 
+/* if an error happened returns < 0, otherwise the length */
+static int ath10k_pci_dump_memory_sram(struct ath10k *ar,
+				       const struct ath10k_mem_region *region,
+				       u8 *buf)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	u32 base_addr, i;
+
+	base_addr = ioread32(ar_pci->mem + QCA99X0_PCIE_BAR0_START_REG);
+	base_addr += region->start;
+
+	for (i = 0; i < region->len; i += 4) {
+		iowrite32(base_addr + i, ar_pci->mem + QCA99X0_CPU_MEM_ADDR_REG);
+		*(u32 *)(buf + i) = ioread32(ar_pci->mem + QCA99X0_CPU_MEM_DATA_REG);
+	}
+
+	return region->len;
+}
+
+/* if an error happened returns < 0, otherwise the length */
+static int ath10k_pci_dump_memory_reg(struct ath10k *ar,
+				      const struct ath10k_mem_region *region,
+				      u8 *buf)
+{
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	u32 i;
+
+	for (i = 0; i < region->len; i += 4)
+		*(u32 *)(buf + i) = ioread32(ar_pci->mem + region->start + i);
+
+	return region->len;
+}
+
 /* if an error happened returns < 0, otherwise the length */
 static int ath10k_pci_dump_memory_generic(struct ath10k *ar,
 					  const struct ath10k_mem_region *current_region,
@@ -1673,6 +1710,12 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
 		buf_len -= sizeof(*hdr);
 
 		switch (current_region->type) {
+		case ATH10K_MEM_REGION_TYPE_IOSRAM:
+			count = ath10k_pci_dump_memory_sram(ar, current_region, buf);
+			break;
+		case ATH10K_MEM_REGION_TYPE_IOREG:
+			count = ath10k_pci_dump_memory_reg(ar, current_region, buf);
+			break;
 		default:
 			ret = ath10k_pci_dump_memory_generic(ar, current_region, buf);
 			if (ret < 0)

From ee35eecb08220978f68a6987f71c5132f40d4a10 Mon Sep 17 00:00:00 2001
From: Ramon Fried <rfried@codeaurora.org>
Date: Tue, 27 Mar 2018 11:26:55 +0300
Subject: [PATCH 1423/1678] wcn36xx: turn off probe response offloading

It appears that the WCN36xx firmware doesn't actually respond to
probe requests. Until it's resolved, switch the probe response
responsibility to the 802.11 layer to allow creation of
hidden SSID AP's.

Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 621e72b6ec99..69d6be59d97f 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1152,8 +1152,6 @@ static int wcn36xx_init_ieee80211(struct wcn36xx *wcn)
 	wcn->hw->wiphy->cipher_suites = cipher_suites;
 	wcn->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
-	wcn->hw->wiphy->flags |= WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD;
-
 #ifdef CONFIG_PM
 	wcn->hw->wiphy->wowlan = &wowlan_support;
 #endif

From e5f9908155c96d945b6d1053701b6168c4e8decc Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 27 Mar 2018 11:26:57 +0300
Subject: [PATCH 1424/1678] wcn36xx: Fix firmware crash due to corrupted buffer
 address

wcn36xx_start_tx function retrieves the buffer descriptor from the
channel control queue to start filling tx buffer information. However,
nothing prevents this same buffer to be concurrently accessed in a
concurent tx call, leading to potential buffer coruption and firmware
crash (observed during iperf test). The channel control queue should
only be accessed and updated with the channel lock.

Fix this issue by using a local buffer descriptor which will be copied
in the thread-safe wcn36xx_dxe_tx_frame.

Note that buffer descriptor size is few bytes so the introduced copy
overhead is insignificant. Moreover, this allows to keep the locked
section minimal.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Ramon Fried <rfried@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/dxe.c  | 13 ++++------
 drivers/net/wireless/ath/wcn36xx/dxe.h  |  3 ++-
 drivers/net/wireless/ath/wcn36xx/txrx.c | 32 ++++++++-----------------
 3 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
index 7d5ecaf02288..2c3b899a88fa 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
@@ -27,15 +27,6 @@
 #include "wcn36xx.h"
 #include "txrx.h"
 
-void *wcn36xx_dxe_get_next_bd(struct wcn36xx *wcn, bool is_low)
-{
-	struct wcn36xx_dxe_ch *ch = is_low ?
-		&wcn->dxe_tx_l_ch :
-		&wcn->dxe_tx_h_ch;
-
-	return ch->head_blk_ctl->bd_cpu_addr;
-}
-
 static void wcn36xx_ccu_write_register(struct wcn36xx *wcn, int addr, int data)
 {
 	wcn36xx_dbg(WCN36XX_DBG_DXE,
@@ -648,6 +639,7 @@ void wcn36xx_dxe_free_mem_pools(struct wcn36xx *wcn)
 
 int wcn36xx_dxe_tx_frame(struct wcn36xx *wcn,
 			 struct wcn36xx_vif *vif_priv,
+			 struct wcn36xx_tx_bd *bd,
 			 struct sk_buff *skb,
 			 bool is_low)
 {
@@ -681,6 +673,9 @@ int wcn36xx_dxe_tx_frame(struct wcn36xx *wcn,
 	ctl->skb = NULL;
 	desc = ctl->desc;
 
+	/* write buffer descriptor */
+	memcpy(ctl->bd_cpu_addr, bd, sizeof(*bd));
+
 	/* Set source address of the BD we send */
 	desc->src_addr_l = ctl->bd_phy_addr;
 
diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.h b/drivers/net/wireless/ath/wcn36xx/dxe.h
index 2bc376c5391b..ce580960d109 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.h
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.h
@@ -452,6 +452,7 @@ struct wcn36xx_dxe_mem_pool {
 	dma_addr_t	phy_addr;
 };
 
+struct wcn36xx_tx_bd;
 struct wcn36xx_vif;
 int wcn36xx_dxe_allocate_mem_pools(struct wcn36xx *wcn);
 void wcn36xx_dxe_free_mem_pools(struct wcn36xx *wcn);
@@ -463,8 +464,8 @@ void wcn36xx_dxe_deinit(struct wcn36xx *wcn);
 int wcn36xx_dxe_init_channels(struct wcn36xx *wcn);
 int wcn36xx_dxe_tx_frame(struct wcn36xx *wcn,
 			 struct wcn36xx_vif *vif_priv,
+			 struct wcn36xx_tx_bd *bd,
 			 struct sk_buff *skb,
 			 bool is_low);
 void wcn36xx_dxe_tx_ack_ind(struct wcn36xx *wcn, u32 status);
-void *wcn36xx_dxe_get_next_bd(struct wcn36xx *wcn, bool is_low);
 #endif	/* _DXE_H_ */
diff --git a/drivers/net/wireless/ath/wcn36xx/txrx.c b/drivers/net/wireless/ath/wcn36xx/txrx.c
index 22304edc5948..b1768ed6b0be 100644
--- a/drivers/net/wireless/ath/wcn36xx/txrx.c
+++ b/drivers/net/wireless/ath/wcn36xx/txrx.c
@@ -272,21 +272,9 @@ int wcn36xx_start_tx(struct wcn36xx *wcn,
 	bool is_low = ieee80211_is_data(hdr->frame_control);
 	bool bcast = is_broadcast_ether_addr(hdr->addr1) ||
 		is_multicast_ether_addr(hdr->addr1);
-	struct wcn36xx_tx_bd *bd = wcn36xx_dxe_get_next_bd(wcn, is_low);
+	struct wcn36xx_tx_bd bd;
 
-	if (!bd) {
-		/*
-		 * TX DXE are used in pairs. One for the BD and one for the
-		 * actual frame. The BD DXE's has a preallocated buffer while
-		 * the skb ones does not. If this isn't true something is really
-		 * wierd. TODO: Recover from this situation
-		 */
-
-		wcn36xx_err("bd address may not be NULL for BD DXE\n");
-		return -EINVAL;
-	}
-
-	memset(bd, 0, sizeof(*bd));
+	memset(&bd, 0, sizeof(bd));
 
 	wcn36xx_dbg(WCN36XX_DBG_TX,
 		    "tx skb %p len %d fc %04x sn %d %s %s\n",
@@ -296,10 +284,10 @@ int wcn36xx_start_tx(struct wcn36xx *wcn,
 
 	wcn36xx_dbg_dump(WCN36XX_DBG_TX_DUMP, "", skb->data, skb->len);
 
-	bd->dpu_rf = WCN36XX_BMU_WQ_TX;
+	bd.dpu_rf = WCN36XX_BMU_WQ_TX;
 
-	bd->tx_comp = !!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS);
-	if (bd->tx_comp) {
+	bd.tx_comp = !!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS);
+	if (bd.tx_comp) {
 		wcn36xx_dbg(WCN36XX_DBG_DXE, "TX_ACK status requested\n");
 		spin_lock_irqsave(&wcn->dxe_lock, flags);
 		if (wcn->tx_ack_skb) {
@@ -321,13 +309,13 @@ int wcn36xx_start_tx(struct wcn36xx *wcn,
 
 	/* Data frames served first*/
 	if (is_low)
-		wcn36xx_set_tx_data(bd, wcn, &vif_priv, sta_priv, skb, bcast);
+		wcn36xx_set_tx_data(&bd, wcn, &vif_priv, sta_priv, skb, bcast);
 	else
 		/* MGMT and CTRL frames are handeld here*/
-		wcn36xx_set_tx_mgmt(bd, wcn, &vif_priv, skb, bcast);
+		wcn36xx_set_tx_mgmt(&bd, wcn, &vif_priv, skb, bcast);
 
-	buff_to_be((u32 *)bd, sizeof(*bd)/sizeof(u32));
-	bd->tx_bd_sign = 0xbdbdbdbd;
+	buff_to_be((u32 *)&bd, sizeof(bd)/sizeof(u32));
+	bd.tx_bd_sign = 0xbdbdbdbd;
 
-	return wcn36xx_dxe_tx_frame(wcn, vif_priv, skb, is_low);
+	return wcn36xx_dxe_tx_frame(wcn, vif_priv, &bd, skb, is_low);
 }

From f276ba06e8b2db6d43b78964ec89b827d6b33537 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Tue, 27 Mar 2018 11:26:58 +0300
Subject: [PATCH 1425/1678] wcn36xx: dequeue all pending indicator messages

In case wcn36xx_smd_rsp_process() is called more than once before
hal_ind_work was dispatched, the messages will end up in hal_ind_queue,
but wcn36xx_ind_smd_work() will only look at the first message in that
list.

Fix this by dequeing the messages from the list in a loop, and only stop
when it's empty.

This issue was found during a review of the driver. In my tests, that
race never actually occured.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/wcn36xx/smd.c | 95 ++++++++++++++------------
 1 file changed, 52 insertions(+), 43 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index def6b23b777f..8932af5e4d8d 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -2411,54 +2411,63 @@ static void wcn36xx_ind_smd_work(struct work_struct *work)
 {
 	struct wcn36xx *wcn =
 		container_of(work, struct wcn36xx, hal_ind_work);
-	struct wcn36xx_hal_msg_header *msg_header;
-	struct wcn36xx_hal_ind_msg *hal_ind_msg;
-	unsigned long flags;
 
-	spin_lock_irqsave(&wcn->hal_ind_lock, flags);
+	for (;;) {
+		struct wcn36xx_hal_msg_header *msg_header;
+		struct wcn36xx_hal_ind_msg *hal_ind_msg;
+		unsigned long flags;
 
-	hal_ind_msg = list_first_entry(&wcn->hal_ind_queue,
-				       struct wcn36xx_hal_ind_msg,
-				       list);
-	list_del(wcn->hal_ind_queue.next);
-	spin_unlock_irqrestore(&wcn->hal_ind_lock, flags);
+		spin_lock_irqsave(&wcn->hal_ind_lock, flags);
 
-	msg_header = (struct wcn36xx_hal_msg_header *)hal_ind_msg->msg;
+		if (list_empty(&wcn->hal_ind_queue)) {
+			spin_unlock_irqrestore(&wcn->hal_ind_lock, flags);
+			return;
+		}
 
-	switch (msg_header->msg_type) {
-	case WCN36XX_HAL_COEX_IND:
-	case WCN36XX_HAL_DEL_BA_IND:
-	case WCN36XX_HAL_AVOID_FREQ_RANGE_IND:
-		break;
-	case WCN36XX_HAL_OTA_TX_COMPL_IND:
-		wcn36xx_smd_tx_compl_ind(wcn,
-					 hal_ind_msg->msg,
-					 hal_ind_msg->msg_len);
-		break;
-	case WCN36XX_HAL_MISSED_BEACON_IND:
-		wcn36xx_smd_missed_beacon_ind(wcn,
-					      hal_ind_msg->msg,
-					      hal_ind_msg->msg_len);
-		break;
-	case WCN36XX_HAL_DELETE_STA_CONTEXT_IND:
-		wcn36xx_smd_delete_sta_context_ind(wcn,
-						   hal_ind_msg->msg,
-						   hal_ind_msg->msg_len);
-		break;
-	case WCN36XX_HAL_PRINT_REG_INFO_IND:
-		wcn36xx_smd_print_reg_info_ind(wcn,
-					       hal_ind_msg->msg,
-					       hal_ind_msg->msg_len);
-		break;
-	case WCN36XX_HAL_SCAN_OFFLOAD_IND:
-		wcn36xx_smd_hw_scan_ind(wcn, hal_ind_msg->msg,
-					hal_ind_msg->msg_len);
-		break;
-	default:
-		wcn36xx_err("SMD_EVENT (%d) not supported\n",
-			      msg_header->msg_type);
+		hal_ind_msg = list_first_entry(&wcn->hal_ind_queue,
+					       struct wcn36xx_hal_ind_msg,
+					       list);
+		list_del(&hal_ind_msg->list);
+		spin_unlock_irqrestore(&wcn->hal_ind_lock, flags);
+
+		msg_header = (struct wcn36xx_hal_msg_header *)hal_ind_msg->msg;
+
+		switch (msg_header->msg_type) {
+		case WCN36XX_HAL_COEX_IND:
+		case WCN36XX_HAL_DEL_BA_IND:
+		case WCN36XX_HAL_AVOID_FREQ_RANGE_IND:
+			break;
+		case WCN36XX_HAL_OTA_TX_COMPL_IND:
+			wcn36xx_smd_tx_compl_ind(wcn,
+						 hal_ind_msg->msg,
+						 hal_ind_msg->msg_len);
+			break;
+		case WCN36XX_HAL_MISSED_BEACON_IND:
+			wcn36xx_smd_missed_beacon_ind(wcn,
+						      hal_ind_msg->msg,
+						      hal_ind_msg->msg_len);
+			break;
+		case WCN36XX_HAL_DELETE_STA_CONTEXT_IND:
+			wcn36xx_smd_delete_sta_context_ind(wcn,
+							   hal_ind_msg->msg,
+							   hal_ind_msg->msg_len);
+			break;
+		case WCN36XX_HAL_PRINT_REG_INFO_IND:
+			wcn36xx_smd_print_reg_info_ind(wcn,
+						       hal_ind_msg->msg,
+						       hal_ind_msg->msg_len);
+			break;
+		case WCN36XX_HAL_SCAN_OFFLOAD_IND:
+			wcn36xx_smd_hw_scan_ind(wcn, hal_ind_msg->msg,
+						hal_ind_msg->msg_len);
+			break;
+		default:
+			wcn36xx_err("SMD_EVENT (%d) not supported\n",
+				    msg_header->msg_type);
+		}
+
+		kfree(hal_ind_msg);
 	}
-	kfree(hal_ind_msg);
 }
 int wcn36xx_smd_open(struct wcn36xx *wcn)
 {

From 802ca335496e0c30518434d59f744596eff079b0 Mon Sep 17 00:00:00 2001
From: Yingying Tang <yintang@qti.qualcomm.com>
Date: Wed, 28 Mar 2018 12:12:52 +0300
Subject: [PATCH 1426/1678] ath10k: enable TDLS peer buffer STA feature

Enable TDLS peer buffer STA feature.
QCA6174 firmware(version: WLAN.RM.4.4) support TDLS peer buffer STA,
it reports this capability through wmi service map in wmi service ready
event. Set related parameter in TDLS WMI command to enable this feature.

Signed-off-by: Yingying Tang <yintang@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c     | 3 +++
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 1cc87cb15620..c684d6f803f5 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8325,6 +8325,9 @@ int ath10k_mac_register(struct ath10k *ar)
 			ieee80211_hw_set(ar->hw, TDLS_WIDER_BW);
 	}
 
+	if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map))
+		ieee80211_hw_set(ar->hw, SUPPORTS_TDLS_BUFFER_STA);
+
 	ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
 	ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 	ar->hw->wiphy->max_remain_on_channel_duration = 5000;
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 57bd8a70d242..cb723a7312dc 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -2886,6 +2886,9 @@ ath10k_wmi_tlv_op_gen_update_fw_tdls_state(struct ath10k *ar, u32 vdev_id,
 	 */
 	u32 options = 0;
 
+	if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map))
+		options |=  WMI_TLV_TDLS_BUFFER_STA_EN;
+
 	len = sizeof(*tlv) + sizeof(*cmd);
 	skb = ath10k_wmi_alloc_skb(ar, len);
 	if (!skb)

From 4c9f8d114660c68b43918f671bd8fc6bdf3bdbd8 Mon Sep 17 00:00:00 2001
From: Yingying Tang <yintang@qti.qualcomm.com>
Date: Wed, 28 Mar 2018 12:13:07 +0300
Subject: [PATCH 1427/1678] ath10k: enable TDLS peer inactivity detection

Enable TDLS peer inactivity detetion feature.
QCA6174 firmware(version: WLAN.RM.4.4) support TDLS link inactivity detecting.
Set related parameters in TDLS WMI command to enable this feature.

Signed-off-by: Yingying Tang <yintang@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 52 +++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.h |  7 +++
 drivers/net/wireless/ath/ath10k/wmi.h     |  1 +
 3 files changed, 60 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index cb723a7312dc..9d1b0a459069 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -426,6 +426,49 @@ static int ath10k_wmi_tlv_event_temperature(struct ath10k *ar,
 	return 0;
 }
 
+static void ath10k_wmi_event_tdls_peer(struct ath10k *ar, struct sk_buff *skb)
+{
+	struct ieee80211_sta *station;
+	const struct wmi_tlv_tdls_peer_event *ev;
+	const void **tb;
+	struct ath10k_vif *arvif;
+
+	tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC);
+	if (IS_ERR(tb)) {
+		ath10k_warn(ar, "tdls peer failed to parse tlv");
+		return;
+	}
+	ev = tb[WMI_TLV_TAG_STRUCT_TDLS_PEER_EVENT];
+	if (!ev) {
+		kfree(tb);
+		ath10k_warn(ar, "tdls peer NULL event");
+		return;
+	}
+
+	switch (__le32_to_cpu(ev->peer_reason)) {
+	case WMI_TDLS_TEARDOWN_REASON_TX:
+	case WMI_TDLS_TEARDOWN_REASON_RSSI:
+	case WMI_TDLS_TEARDOWN_REASON_PTR_TIMEOUT:
+		station = ieee80211_find_sta_by_ifaddr(ar->hw,
+						       ev->peer_macaddr.addr,
+						       NULL);
+		if (!station) {
+			ath10k_warn(ar, "did not find station from tdls peer event");
+			kfree(tb);
+			return;
+		}
+		arvif = ath10k_get_arvif(ar, __le32_to_cpu(ev->vdev_id));
+		ieee80211_tdls_oper_request(
+					arvif->vif, station->addr,
+					NL80211_TDLS_TEARDOWN,
+					WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE,
+					GFP_ATOMIC
+					);
+		break;
+	}
+	kfree(tb);
+}
+
 /***********/
 /* TLV ops */
 /***********/
@@ -569,6 +612,9 @@ static void ath10k_wmi_tlv_op_rx(struct ath10k *ar, struct sk_buff *skb)
 	case WMI_TLV_PDEV_TEMPERATURE_EVENTID:
 		ath10k_wmi_tlv_event_temperature(ar, skb);
 		break;
+	case WMI_TLV_TDLS_PEER_EVENTID:
+		ath10k_wmi_event_tdls_peer(ar, skb);
+		break;
 	default:
 		ath10k_warn(ar, "Unknown eventid: %d\n", id);
 		break;
@@ -2889,6 +2935,12 @@ ath10k_wmi_tlv_op_gen_update_fw_tdls_state(struct ath10k *ar, u32 vdev_id,
 	if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map))
 		options |=  WMI_TLV_TDLS_BUFFER_STA_EN;
 
+	/* WMI_TDLS_ENABLE_ACTIVE_EXTERNAL_CONTROL means firm will handle TDLS
+	 * link inactivity detecting logic.
+	 */
+	if (state == WMI_TDLS_ENABLE_ACTIVE)
+		state = WMI_TDLS_ENABLE_ACTIVE_EXTERNAL_CONTROL;
+
 	len = sizeof(*tlv) + sizeof(*cmd);
 	skb = ath10k_wmi_alloc_skb(ar, len);
 	if (!skb)
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index b07b690544e7..fa3773ec7c68 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -1757,6 +1757,13 @@ struct wmi_tlv_tx_pause_ev {
 	__le32 tid_map;
 } __packed;
 
+struct wmi_tlv_tdls_peer_event {
+	struct wmi_mac_addr    peer_macaddr;
+	__le32 peer_status;
+	__le32 peer_reason;
+	__le32 vdev_id;
+} __packed;
+
 void ath10k_wmi_tlv_attach(struct ath10k *ar);
 
 struct wmi_tlv_mgmt_tx_cmd {
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index c8fc45d8090e..6da2583c341f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -6792,6 +6792,7 @@ enum wmi_tdls_state {
 	WMI_TDLS_DISABLE,
 	WMI_TDLS_ENABLE_PASSIVE,
 	WMI_TDLS_ENABLE_ACTIVE,
+	WMI_TDLS_ENABLE_ACTIVE_EXTERNAL_CONTROL,
 };
 
 enum wmi_tdls_peer_state {

From c3816c9ee12c235dde8f382619a38a5ce86fb548 Mon Sep 17 00:00:00 2001
From: Yingying Tang <yintang@qti.qualcomm.com>
Date: Wed, 28 Mar 2018 12:15:23 +0300
Subject: [PATCH 1428/1678] ath10k: avoid to set WEP key for TDLS peer

TDLS peer do not need WEP key. Setting WEP key will lead
to TDLS setup failure. Add fix to avoid setting WEP key
for TDLS peer.

Signed-off-by: Yingying Tang <yintang@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index c684d6f803f5..659b23809b47 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2977,7 +2977,7 @@ static int ath10k_station_assoc(struct ath10k *ar,
 		}
 
 		/* Plumb cached keys only for static WEP */
-		if (arvif->def_wep_key_idx != -1) {
+		if ((arvif->def_wep_key_idx != -1) && (!sta->tdls)) {
 			ret = ath10k_install_peer_wep_keys(arvif, sta->addr);
 			if (ret) {
 				ath10k_warn(ar, "failed to install peer wep keys for vdev %i: %d\n",

From 9cdd005750293c0e7e9276546e02b290d0bafeb0 Mon Sep 17 00:00:00 2001
From: Yingying Tang <yintang@qti.qualcomm.com>
Date: Wed, 28 Mar 2018 12:15:35 +0300
Subject: [PATCH 1429/1678] ath10k: fix TDLS peer TX data failure issue on
 encryped AP

For WPA encryption, QCA6174 firmware(version: WLAN.RM.4.4) will unblock
data when M4 was sent successfully. For other encryption which didn't need
4-way handshake firmware will unblock the data when peer authorized. Since
TDLS is 3-way handshake host need send authorize cmd to firmware to unblock
data.

Signed-off-by: Yingying Tang <yintang@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 659b23809b47..0a249cd0662e 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -5932,6 +5932,10 @@ static int ath10k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		ath10k_warn(ar, "Peer %pM disappeared!\n", peer_addr);
 	spin_unlock_bh(&ar->data_lock);
 
+	if (sta && sta->tdls)
+		ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr,
+					  WMI_PEER_AUTHORIZE, 1);
+
 exit:
 	mutex_unlock(&ar->conf_mutex);
 	return ret;

From 8ebee73b574ad3dd1f14d461f65ceaffbd637650 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Wed, 28 Mar 2018 12:19:40 +0300
Subject: [PATCH 1430/1678] ath10k: advertize beacon_int_min_gcd

This patch fixes regression caused by 0c317a02ca98
("cfg80211: support virtual interfaces with different beacon intervals"),
with this change cfg80211 expects the driver to advertize
'beacon_int_min_gcd' to support different beacon intervals in multivap
scenario. This support is added for, QCA988X/QCA99X0/QCA9984/QCA4019.

Verifed AP + mesh bring up on QCA9984 with beacon interval 100msec and
1000msec respectively.
Frimware: firmware-5.bin_10.4-3.5.3-00053

Fixes: 0c317a02ca98 ("cfg80211: support virtual interfaces with different beacon intervals")
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0a249cd0662e..bf05a3689558 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -7905,6 +7905,7 @@ static const struct ieee80211_iface_combination ath10k_10x_if_comb[] = {
 		.max_interfaces = 8,
 		.num_different_channels = 1,
 		.beacon_int_infra_match = true,
+		.beacon_int_min_gcd = 1,
 #ifdef CONFIG_ATH10K_DFS_CERTIFIED
 		.radar_detect_widths =	BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 					BIT(NL80211_CHAN_WIDTH_20) |
@@ -8028,6 +8029,7 @@ static const struct ieee80211_iface_combination ath10k_10_4_if_comb[] = {
 		.max_interfaces = 16,
 		.num_different_channels = 1,
 		.beacon_int_infra_match = true,
+		.beacon_int_min_gcd = 1,
 #ifdef CONFIG_ATH10K_DFS_CERTIFIED
 		.radar_detect_widths =	BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 					BIT(NL80211_CHAN_WIDTH_20) |

From 91493e8e10f0f495b04a5c32096d56ea1f254c93 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Wed, 28 Mar 2018 12:19:55 +0300
Subject: [PATCH 1431/1678] ath10k: fix recent bandwidth conversion bug

The commit "cfg80211: make RATE_INFO_BW_20 the default" changed
the index of RATE_INFO_BW_20, but the updates to ath10k missed
the special bandwidth calculation case in
ath10k_update_per_peer_tx_stats().

This will fix below warning,

 WARNING: CPU: 0 PID: 609 at net/wireless/util.c:1254
 cfg80211_calculate_bitrate+0x174/0x220
 invalid rate bw=1, mcs=9, nss=2

 (unwind_backtrace) from
 (cfg80211_calculate_bitrate+0x174/0x220)
 (cfg80211_calculate_bitrate) from
 (nl80211_put_sta_rate+0x44/0x1dc)from
 (nl80211_put_sta_rate) from
 (nl80211_send_station+0x388/0xaf0)
 (nl80211_get_station+0xa8/0xec)
 [ end trace da8257d6a850e91a ]

Fixes: 842be75c77cb ("cfg80211: make RATE_INFO_BW_20 the default")
Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 42 ++++++++++++++----------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 64996aba1485..5e02e26158f6 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -724,6 +724,28 @@ struct amsdu_subframe_hdr {
 
 #define GROUP_ID_IS_SU_MIMO(x) ((x) == 0 || (x) == 63)
 
+static inline u8 ath10k_bw_to_mac80211_bw(u8 bw)
+{
+	u8 ret = 0;
+
+	switch (bw) {
+	case 0:
+		ret = RATE_INFO_BW_20;
+		break;
+	case 1:
+		ret = RATE_INFO_BW_40;
+		break;
+	case 2:
+		ret = RATE_INFO_BW_80;
+		break;
+	case 3:
+		ret = RATE_INFO_BW_160;
+		break;
+	}
+
+	return ret;
+}
+
 static void ath10k_htt_rx_h_rates(struct ath10k *ar,
 				  struct ieee80211_rx_status *status,
 				  struct htt_rx_desc *rxd)
@@ -826,23 +848,7 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar,
 		if (sgi)
 			status->enc_flags |= RX_ENC_FLAG_SHORT_GI;
 
-		switch (bw) {
-		/* 20MHZ */
-		case 0:
-			break;
-		/* 40MHZ */
-		case 1:
-			status->bw = RATE_INFO_BW_40;
-			break;
-		/* 80MHZ */
-		case 2:
-			status->bw = RATE_INFO_BW_80;
-			break;
-		case 3:
-			status->bw = RATE_INFO_BW_160;
-			break;
-		}
-
+		status->bw = ath10k_bw_to_mac80211_bw(bw);
 		status->encoding = RX_ENC_VHT;
 		break;
 	default:
@@ -2550,7 +2556,7 @@ ath10k_update_per_peer_tx_stats(struct ath10k *ar,
 		arsta->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
 
 	arsta->txrate.nss = txrate.nss;
-	arsta->txrate.bw = txrate.bw + RATE_INFO_BW_20;
+	arsta->txrate.bw = ath10k_bw_to_mac80211_bw(txrate.bw);
 }
 
 static void ath10k_htt_fetch_peer_stats(struct ath10k *ar,

From 1b3fdb50f88195a9814a8301fad6325868881bb6 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Wed, 28 Mar 2018 12:40:24 +0300
Subject: [PATCH 1432/1678] ath10k: fix vdev stats for 10.4 firmware

Currently vdev stats displayed in fw_stats are applicable
only for TLV based firmware and fix it for 10.4 firmware
as of now. The vdev stats in 10.4 firmware is split into two
parts (vdev_stats, vdev_stats_extended). The actual stats
are captured only in extended vdev stats. In order to enable
vdev stats, appropriate feature bit will be set on extended
resource config. As FTM related counters are available only on
newer 10.4 based firmware, these counters will be displayed
only on valid data.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/core.c |   6 +-
 drivers/net/wireless/ath/ath10k/core.h |  21 ++++
 drivers/net/wireless/ath/ath10k/wmi.c  | 151 +++++++++++++++++++++----
 drivers/net/wireless/ath/ath10k/wmi.h  |  27 ++++-
 4 files changed, 184 insertions(+), 21 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 830b7fe466f3..8a3020dbd4cf 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -2041,7 +2041,8 @@ static int ath10k_core_init_firmware_features(struct ath10k *ar)
 		ar->max_num_vdevs = TARGET_10_4_NUM_VDEVS;
 		ar->num_tids = TARGET_10_4_TGT_NUM_TIDS;
 		ar->fw_stats_req_mask = WMI_10_4_STAT_PEER |
-					WMI_10_4_STAT_PEER_EXTD;
+					WMI_10_4_STAT_PEER_EXTD |
+					WMI_10_4_STAT_VDEV_EXTD;
 		ar->max_spatial_stream = ar->hw_params.max_spatial_stream;
 		ar->max_num_tdls_vdevs = TARGET_10_4_NUM_TDLS_VDEVS;
 
@@ -2282,6 +2283,9 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 		if (ath10k_peer_stats_enabled(ar))
 			val = WMI_10_4_PEER_STATS;
 
+		/* Enable vdev stats by default */
+		val |= WMI_10_4_VDEV_STATS;
+
 		if (test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map))
 			val |= WMI_10_4_BSS_CHANNEL_INFO_64;
 
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 73712c830be7..c17d805d68cc 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -222,6 +222,27 @@ struct ath10k_fw_stats_vdev {
 	u32 beacon_rssi_history[10];
 };
 
+struct ath10k_fw_stats_vdev_extd {
+	struct list_head list;
+
+	u32 vdev_id;
+	u32 ppdu_aggr_cnt;
+	u32 ppdu_noack;
+	u32 mpdu_queued;
+	u32 ppdu_nonaggr_cnt;
+	u32 mpdu_sw_requeued;
+	u32 mpdu_suc_retry;
+	u32 mpdu_suc_multitry;
+	u32 mpdu_fail_retry;
+	u32 tx_ftm_suc;
+	u32 tx_ftm_suc_retry;
+	u32 tx_ftm_fail;
+	u32 rx_ftmr_cnt;
+	u32 rx_ftmr_dup_cnt;
+	u32 rx_iftmr_cnt;
+	u32 rx_iftmr_dup_cnt;
+};
+
 struct ath10k_fw_stats_pdev {
 	struct list_head list;
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 5947d33d6810..c5e1ca5945db 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -2708,6 +2708,28 @@ ath10k_wmi_10_4_pull_peer_stats(const struct wmi_10_4_peer_stats *src,
 	dst->peer_rx_rate = __le32_to_cpu(src->peer_rx_rate);
 }
 
+static void
+ath10k_wmi_10_4_pull_vdev_stats(const struct wmi_vdev_stats_extd *src,
+				struct ath10k_fw_stats_vdev_extd *dst)
+{
+	dst->vdev_id = __le32_to_cpu(src->vdev_id);
+	dst->ppdu_aggr_cnt = __le32_to_cpu(src->ppdu_aggr_cnt);
+	dst->ppdu_noack = __le32_to_cpu(src->ppdu_noack);
+	dst->mpdu_queued = __le32_to_cpu(src->mpdu_queued);
+	dst->ppdu_nonaggr_cnt = __le32_to_cpu(src->ppdu_nonaggr_cnt);
+	dst->mpdu_sw_requeued = __le32_to_cpu(src->mpdu_sw_requeued);
+	dst->mpdu_suc_retry = __le32_to_cpu(src->mpdu_suc_retry);
+	dst->mpdu_suc_multitry = __le32_to_cpu(src->mpdu_suc_multitry);
+	dst->mpdu_fail_retry = __le32_to_cpu(src->mpdu_fail_retry);
+	dst->tx_ftm_suc = __le32_to_cpu(src->tx_ftm_suc);
+	dst->tx_ftm_suc_retry = __le32_to_cpu(src->tx_ftm_suc_retry);
+	dst->tx_ftm_fail = __le32_to_cpu(src->tx_ftm_fail);
+	dst->rx_ftmr_cnt = __le32_to_cpu(src->rx_ftmr_cnt);
+	dst->rx_ftmr_dup_cnt = __le32_to_cpu(src->rx_ftmr_dup_cnt);
+	dst->rx_iftmr_cnt = __le32_to_cpu(src->rx_iftmr_cnt);
+	dst->rx_iftmr_dup_cnt = __le32_to_cpu(src->rx_iftmr_dup_cnt);
+}
+
 static int ath10k_wmi_main_op_pull_fw_stats(struct ath10k *ar,
 					    struct sk_buff *skb,
 					    struct ath10k_fw_stats *stats)
@@ -3047,7 +3069,16 @@ static int ath10k_wmi_10_4_op_pull_fw_stats(struct ath10k *ar,
 		 */
 	}
 
-	/* fw doesn't implement vdev stats */
+	for (i = 0; i < num_vdev_stats; i++) {
+		const struct wmi_vdev_stats *src;
+
+		/* Ignore vdev stats here as it has only vdev id. Actual vdev
+		 * stats will be retrieved from vdev extended stats.
+		 */
+		src = (void *)skb->data;
+		if (!skb_pull(skb, sizeof(*src)))
+			return -EPROTO;
+	}
 
 	for (i = 0; i < num_peer_stats; i++) {
 		const struct wmi_10_4_peer_stats *src;
@@ -3079,26 +3110,43 @@ static int ath10k_wmi_10_4_op_pull_fw_stats(struct ath10k *ar,
 		 */
 	}
 
-	if ((stats_id & WMI_10_4_STAT_PEER_EXTD) == 0)
-		return 0;
+	if (stats_id & WMI_10_4_STAT_PEER_EXTD) {
+		stats->extended = true;
 
-	stats->extended = true;
+		for (i = 0; i < num_peer_stats; i++) {
+			const struct wmi_10_4_peer_extd_stats *src;
+			struct ath10k_fw_extd_stats_peer *dst;
 
-	for (i = 0; i < num_peer_stats; i++) {
-		const struct wmi_10_4_peer_extd_stats *src;
-		struct ath10k_fw_extd_stats_peer *dst;
+			src = (void *)skb->data;
+			if (!skb_pull(skb, sizeof(*src)))
+				return -EPROTO;
 
-		src = (void *)skb->data;
-		if (!skb_pull(skb, sizeof(*src)))
-			return -EPROTO;
+			dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
+			if (!dst)
+				continue;
 
-		dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
-		if (!dst)
-			continue;
+			ether_addr_copy(dst->peer_macaddr,
+					src->peer_macaddr.addr);
+			dst->rx_duration = __le32_to_cpu(src->rx_duration);
+			list_add_tail(&dst->list, &stats->peers_extd);
+		}
+	}
 
-		ether_addr_copy(dst->peer_macaddr, src->peer_macaddr.addr);
-		dst->rx_duration = __le32_to_cpu(src->rx_duration);
-		list_add_tail(&dst->list, &stats->peers_extd);
+	if (stats_id & WMI_10_4_STAT_VDEV_EXTD) {
+		for (i = 0; i < num_vdev_stats; i++) {
+			const struct wmi_vdev_stats_extd *src;
+			struct ath10k_fw_stats_vdev_extd *dst;
+
+			src = (void *)skb->data;
+			if (!skb_pull(skb, sizeof(*src)))
+				return -EPROTO;
+
+			dst = kzalloc(sizeof(*dst), GFP_ATOMIC);
+			if (!dst)
+				continue;
+			ath10k_wmi_10_4_pull_vdev_stats(src, dst);
+			list_add_tail(&dst->list, &stats->vdevs);
+		}
 	}
 
 	return 0;
@@ -8004,6 +8052,72 @@ ath10k_wmi_op_gen_pdev_enable_adaptive_cca(struct ath10k *ar, u8 enable,
 	return skb;
 }
 
+static void
+ath10k_wmi_fw_vdev_stats_extd_fill(const struct ath10k_fw_stats_vdev_extd *vdev,
+				   char *buf, u32 *length)
+{
+	u32 len = *length;
+	u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;
+	u32 val;
+
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "vdev id", vdev->vdev_id);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "ppdu aggr count", vdev->ppdu_aggr_cnt);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "ppdu noack", vdev->ppdu_noack);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "mpdu queued", vdev->mpdu_queued);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "ppdu nonaggr count", vdev->ppdu_nonaggr_cnt);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "mpdu sw requeued", vdev->mpdu_sw_requeued);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "mpdu success retry", vdev->mpdu_suc_retry);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "mpdu success multitry", vdev->mpdu_suc_multitry);
+	len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+			 "mpdu fail retry", vdev->mpdu_fail_retry);
+	val = vdev->tx_ftm_suc;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "tx ftm success",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	val = vdev->tx_ftm_suc_retry;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "tx ftm success retry",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	val = vdev->tx_ftm_fail;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "tx ftm fail",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	val = vdev->rx_ftmr_cnt;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "rx ftm request count",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	val = vdev->rx_ftmr_dup_cnt;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "rx ftm request dup count",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	val = vdev->rx_iftmr_cnt;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "rx initial ftm req count",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	val = vdev->rx_iftmr_dup_cnt;
+	if (val & WMI_VDEV_STATS_FTM_COUNT_VALID)
+		len += scnprintf(buf + len, buf_len - len, "%30s %u\n",
+				 "rx initial ftm req dup cnt",
+				 MS(val, WMI_VDEV_STATS_FTM_COUNT));
+	len += scnprintf(buf + len, buf_len - len, "\n");
+
+	*length = len;
+}
+
 void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar,
 				      struct ath10k_fw_stats *fw_stats,
 				      char *buf)
@@ -8011,7 +8125,7 @@ void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar,
 	u32 len = 0;
 	u32 buf_len = ATH10K_FW_STATS_BUF_SIZE;
 	const struct ath10k_fw_stats_pdev *pdev;
-	const struct ath10k_fw_stats_vdev *vdev;
+	const struct ath10k_fw_stats_vdev_extd *vdev;
 	const struct ath10k_fw_stats_peer *peer;
 	size_t num_peers;
 	size_t num_vdevs;
@@ -8064,9 +8178,8 @@ void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar,
 			"ath10k VDEV stats", num_vdevs);
 	len += scnprintf(buf + len, buf_len - len, "%30s\n\n",
 				"=================");
-
 	list_for_each_entry(vdev, &fw_stats->vdevs, list) {
-		ath10k_wmi_fw_vdev_stats_fill(vdev, buf, &len);
+		ath10k_wmi_fw_vdev_stats_extd_fill(vdev, buf, &len);
 	}
 
 	len += scnprintf(buf + len, buf_len - len, "\n");
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 6da2583c341f..6fbc84c29521 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -4413,6 +4413,7 @@ enum wmi_10_4_stats_id {
 	WMI_10_4_STAT_AP		= BIT(1),
 	WMI_10_4_STAT_INST		= BIT(2),
 	WMI_10_4_STAT_PEER_EXTD		= BIT(3),
+	WMI_10_4_STAT_VDEV_EXTD		= BIT(4),
 };
 
 struct wlan_inst_rssi_args {
@@ -4552,12 +4553,36 @@ struct wmi_10_4_pdev_stats {
 
 /*
  * VDEV statistics
- * TODO: add all VDEV stats here
  */
+
+#define WMI_VDEV_STATS_FTM_COUNT_VALID	BIT(31)
+#define WMI_VDEV_STATS_FTM_COUNT_LSB	0
+#define WMI_VDEV_STATS_FTM_COUNT_MASK	0x7fffffff
+
 struct wmi_vdev_stats {
 	__le32 vdev_id;
 } __packed;
 
+struct wmi_vdev_stats_extd {
+	__le32 vdev_id;
+	__le32 ppdu_aggr_cnt;
+	__le32 ppdu_noack;
+	__le32 mpdu_queued;
+	__le32 ppdu_nonaggr_cnt;
+	__le32 mpdu_sw_requeued;
+	__le32 mpdu_suc_retry;
+	__le32 mpdu_suc_multitry;
+	__le32 mpdu_fail_retry;
+	__le32 tx_ftm_suc;
+	__le32 tx_ftm_suc_retry;
+	__le32 tx_ftm_fail;
+	__le32 rx_ftmr_cnt;
+	__le32 rx_ftmr_dup_cnt;
+	__le32 rx_iftmr_cnt;
+	__le32 rx_iftmr_dup_cnt;
+	__le32 reserved[6];
+} __packed;
+
 /*
  * peer statistics.
  * TODO: add more stats

From a72c92629108bb8ad756f31b74791c51e1de2af4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 21 Mar 2018 03:32:52 -0700
Subject: [PATCH 1433/1678] ath: Remove unnecessary ath_bcast_mac and use
 eth_broadcast_addr

Remove the static array and use the generic routine to set the
Ethernet broadcast address.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath.h                | 2 --
 drivers/net/wireless/ath/ath5k/attach.c       | 2 +-
 drivers/net/wireless/ath/ath9k/htc_drv_init.c | 2 +-
 drivers/net/wireless/ath/ath9k/init.c         | 2 +-
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h
index f3f2784f6ebd..7a364eca46d6 100644
--- a/drivers/net/wireless/ath/ath.h
+++ b/drivers/net/wireless/ath/ath.h
@@ -33,8 +33,6 @@
  */
 #define	ATH_KEYMAX	        128     /* max key cache size we handle */
 
-static const u8 ath_bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-
 struct ath_ani {
 	bool caldone;
 	unsigned int longcal_timer;
diff --git a/drivers/net/wireless/ath/ath5k/attach.c b/drivers/net/wireless/ath/ath5k/attach.c
index 233054bd6b52..12d3a6c92ba4 100644
--- a/drivers/net/wireless/ath/ath5k/attach.c
+++ b/drivers/net/wireless/ath/ath5k/attach.c
@@ -327,7 +327,7 @@ int ath5k_hw_init(struct ath5k_hw *ah)
 	ath5k_hw_set_lladdr(ah, zero_mac);
 
 	/* Set BSSID to bcast address: ff:ff:ff:ff:ff:ff for now */
-	memcpy(common->curbssid, ath_bcast_mac, ETH_ALEN);
+	eth_broadcast_addr(common->curbssid);
 	ath5k_hw_set_bssid(ah);
 	ath5k_hw_set_opmode(ah, ah->opmode);
 
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index f246e9ed4a81..214c68269a69 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -591,7 +591,7 @@ static void ath9k_init_misc(struct ath9k_htc_priv *priv)
 {
 	struct ath_common *common = ath9k_hw_common(priv->ah);
 
-	memcpy(common->bssidmask, ath_bcast_mac, ETH_ALEN);
+	eth_broadcast_addr(common->bssidmask);
 
 	common->last_rssi = ATH_RSSI_DUMMY_MARKER;
 	priv->ah->opmode = NL80211_IFTYPE_STATION;
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index b71b16715f6a..c070a9e51ebf 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -428,7 +428,7 @@ static void ath9k_init_misc(struct ath_softc *sc)
 	timer_setup(&common->ani.timer, ath_ani_calibrate, 0);
 
 	common->last_rssi = ATH_RSSI_DUMMY_MARKER;
-	memcpy(common->bssidmask, ath_bcast_mac, ETH_ALEN);
+	eth_broadcast_addr(common->bssidmask);
 	sc->beacon.slottime = 9;
 
 	for (i = 0; i < ARRAY_SIZE(sc->beacon.bslot); i++)

From 230ebaa189af44d50dccb4a1846e39ca594e347b Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Wed, 28 Mar 2018 13:24:09 +0300
Subject: [PATCH 1434/1678] cfg80211: read wmm rules from regulatory database

ETSI EN 301 893 v2.1.1 (2017-05) standard defines a new channel access
mechanism that all devices (WLAN and LAA) need to comply with.
The regulatory database can now be loaded into the kernel and also
has the option to load optional data.
In order to be able to comply with ETSI standard, we add wmm_rule into
regulatory rule and add the option to read its value from the regulatory
database.

Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
[johannes: fix memory leak in error path]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/regulatory.h |  28 ++++++++
 net/wireless/reg.c       | 148 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 169 insertions(+), 7 deletions(-)

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index f83cacce3308..60f8cc86a447 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -4,6 +4,7 @@
  * regulatory support structures
  *
  * Copyright 2008-2009	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
+ * Copyright (C) 2018 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -188,9 +189,35 @@ struct ieee80211_power_rule {
 	u32 max_eirp;
 };
 
+/**
+ * struct ieee80211_wmm_ac - used to store per ac wmm regulatory limitation
+ *
+ * The information provided in this structure is required for QoS
+ * transmit queue configuration. Cf. IEEE 802.11 7.3.2.29.
+ *
+ * @cw_min: minimum contention window [a value of the form
+ *      2^n-1 in the range 1..32767]
+ * @cw_max: maximum contention window [like @cw_min]
+ * @cot: maximum burst time in units of 32 usecs, 0 meaning disabled
+ * @aifsn: arbitration interframe space [0..255]
+ *
+ */
+struct ieee80211_wmm_ac {
+	u16 cw_min;
+	u16 cw_max;
+	u16 cot;
+	u8 aifsn;
+};
+
+struct ieee80211_wmm_rule {
+	struct ieee80211_wmm_ac client[IEEE80211_NUM_ACS];
+	struct ieee80211_wmm_ac ap[IEEE80211_NUM_ACS];
+};
+
 struct ieee80211_reg_rule {
 	struct ieee80211_freq_range freq_range;
 	struct ieee80211_power_rule power_rule;
+	struct ieee80211_wmm_rule *wmm_rule;
 	u32 flags;
 	u32 dfs_cac_ms;
 };
@@ -198,6 +225,7 @@ struct ieee80211_reg_rule {
 struct ieee80211_regdomain {
 	struct rcu_head rcu_head;
 	u32 n_reg_rules;
+	u32 n_wmm_rules;
 	char alpha2[3];
 	enum nl80211_dfs_regions dfs_region;
 	struct ieee80211_reg_rule reg_rules[];
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 7b42f0bacfd8..eddc834f6358 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,6 +5,7 @@
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright      2017  Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -424,23 +425,36 @@ static const struct ieee80211_regdomain *
 reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 {
 	struct ieee80211_regdomain *regd;
-	int size_of_regd;
+	int size_of_regd, size_of_wmms;
 	unsigned int i;
+	struct ieee80211_wmm_rule *d_wmm, *s_wmm;
 
 	size_of_regd =
 		sizeof(struct ieee80211_regdomain) +
 		src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
+	size_of_wmms = src_regd->n_wmm_rules *
+		sizeof(struct ieee80211_wmm_rule);
 
-	regd = kzalloc(size_of_regd, GFP_KERNEL);
+	regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
 	if (!regd)
 		return ERR_PTR(-ENOMEM);
 
 	memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
 
-	for (i = 0; i < src_regd->n_reg_rules; i++)
+	d_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
+	s_wmm = (struct ieee80211_wmm_rule *)((u8 *)src_regd + size_of_regd);
+	memcpy(d_wmm, s_wmm, size_of_wmms);
+
+	for (i = 0; i < src_regd->n_reg_rules; i++) {
 		memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
 		       sizeof(struct ieee80211_reg_rule));
+		if (!src_regd->reg_rules[i].wmm_rule)
+			continue;
 
+		regd->reg_rules[i].wmm_rule = d_wmm +
+			(src_regd->reg_rules[i].wmm_rule - s_wmm) /
+			sizeof(struct ieee80211_wmm_rule);
+	}
 	return regd;
 }
 
@@ -595,6 +609,17 @@ enum fwdb_flags {
 	FWDB_FLAG_AUTO_BW	= BIT(4),
 };
 
+struct fwdb_wmm_ac {
+	u8 ecw;
+	u8 aifsn;
+	__be16 cot;
+} __packed;
+
+struct fwdb_wmm_rule {
+	struct fwdb_wmm_ac client[IEEE80211_NUM_ACS];
+	struct fwdb_wmm_ac ap[IEEE80211_NUM_ACS];
+} __packed;
+
 struct fwdb_rule {
 	u8 len;
 	u8 flags;
@@ -602,6 +627,7 @@ struct fwdb_rule {
 	__be32 start, end, max_bw;
 	/* start of optional data */
 	__be16 cac_timeout;
+	__be16 wmm_ptr;
 } __packed __aligned(4);
 
 #define FWDB_MAGIC 0x52474442
@@ -613,6 +639,31 @@ struct fwdb_header {
 	struct fwdb_country country[];
 } __packed __aligned(4);
 
+static int ecw2cw(int ecw)
+{
+	return (1 << ecw) - 1;
+}
+
+static bool valid_wmm(struct fwdb_wmm_rule *rule)
+{
+	struct fwdb_wmm_ac *ac = (struct fwdb_wmm_ac *)rule;
+	int i;
+
+	for (i = 0; i < IEEE80211_NUM_ACS * 2; i++) {
+		u16 cw_min = ecw2cw((ac[i].ecw & 0xf0) >> 4);
+		u16 cw_max = ecw2cw(ac[i].ecw & 0x0f);
+		u8 aifsn = ac[i].aifsn;
+
+		if (cw_min >= cw_max)
+			return false;
+
+		if (aifsn < 1)
+			return false;
+	}
+
+	return true;
+}
+
 static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
 {
 	struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
@@ -623,7 +674,18 @@ static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
 	/* mandatory fields */
 	if (rule->len < offsetofend(struct fwdb_rule, max_bw))
 		return false;
+	if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
+		u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
+		struct fwdb_wmm_rule *wmm;
 
+		if (wmm_ptr + sizeof(struct fwdb_wmm_rule) > size)
+			return false;
+
+		wmm = (void *)(data + wmm_ptr);
+
+		if (!valid_wmm(wmm))
+			return false;
+	}
 	return true;
 }
 
@@ -798,23 +860,64 @@ static bool valid_regdb(const u8 *data, unsigned int size)
 	return true;
 }
 
+static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
+			 struct fwdb_wmm_rule *wmm)
+{
+	unsigned int i;
+
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		rule->client[i].cw_min =
+			ecw2cw((wmm->client[i].ecw & 0xf0) >> 4);
+		rule->client[i].cw_max = ecw2cw(wmm->client[i].ecw & 0x0f);
+		rule->client[i].aifsn =  wmm->client[i].aifsn;
+		rule->client[i].cot = 1000 * be16_to_cpu(wmm->client[i].cot);
+		rule->ap[i].cw_min = ecw2cw((wmm->ap[i].ecw & 0xf0) >> 4);
+		rule->ap[i].cw_max = ecw2cw(wmm->ap[i].ecw & 0x0f);
+		rule->ap[i].aifsn = wmm->ap[i].aifsn;
+		rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
+	}
+}
+
+struct wmm_ptrs {
+	struct ieee80211_wmm_rule *rule;
+	u32 ptr;
+};
+
+static struct ieee80211_wmm_rule *find_wmm_ptr(struct wmm_ptrs *wmm_ptrs,
+					       u32 wmm_ptr, int n_wmms)
+{
+	int i;
+
+	for (i = 0; i < n_wmms; i++) {
+		if (wmm_ptrs[i].ptr == wmm_ptr)
+			return wmm_ptrs[i].rule;
+	}
+	return NULL;
+}
+
 static int regdb_query_country(const struct fwdb_header *db,
 			       const struct fwdb_country *country)
 {
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
 	struct ieee80211_regdomain *regdom;
-	unsigned int size_of_regd;
-	unsigned int i;
+	struct ieee80211_regdomain *tmp_rd;
+	unsigned int size_of_regd, i, n_wmms = 0;
+	struct wmm_ptrs *wmm_ptrs;
 
-	size_of_regd =
-		sizeof(struct ieee80211_regdomain) +
+	size_of_regd = sizeof(struct ieee80211_regdomain) +
 		coll->n_rules * sizeof(struct ieee80211_reg_rule);
 
 	regdom = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!regdom)
 		return -ENOMEM;
 
+	wmm_ptrs = kcalloc(coll->n_rules, sizeof(*wmm_ptrs), GFP_KERNEL);
+	if (!wmm_ptrs) {
+		kfree(regdom);
+		return -ENOMEM;
+	}
+
 	regdom->n_reg_rules = coll->n_rules;
 	regdom->alpha2[0] = country->alpha2[0];
 	regdom->alpha2[1] = country->alpha2[1];
@@ -851,7 +954,38 @@ static int regdb_query_country(const struct fwdb_header *db,
 		if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
 			rrule->dfs_cac_ms =
 				1000 * be16_to_cpu(rule->cac_timeout);
+		if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
+			u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
+			struct ieee80211_wmm_rule *wmm_pos =
+				find_wmm_ptr(wmm_ptrs, wmm_ptr, n_wmms);
+			struct fwdb_wmm_rule *wmm;
+			struct ieee80211_wmm_rule *wmm_rule;
+
+			if (wmm_pos) {
+				rrule->wmm_rule = wmm_pos;
+				continue;
+			}
+			wmm = (void *)((u8 *)db + wmm_ptr);
+			tmp_rd = krealloc(regdom, size_of_regd + (n_wmms + 1) *
+					  sizeof(struct ieee80211_wmm_rule),
+					  GFP_KERNEL);
+
+			if (!tmp_rd) {
+				kfree(regdom);
+				return -ENOMEM;
+			}
+			regdom = tmp_rd;
+
+			wmm_rule = (struct ieee80211_wmm_rule *)
+				((u8 *)regdom + size_of_regd + n_wmms *
+				sizeof(struct ieee80211_wmm_rule));
+
+			set_wmm_rule(wmm_rule, wmm);
+			wmm_ptrs[n_wmms].ptr = wmm_ptr;
+			wmm_ptrs[n_wmms++].rule = wmm_rule;
+		}
 	}
+	kfree(wmm_ptrs);
 
 	return reg_schedule_apply(regdom);
 }

From 5bf16a11ba2940c67d0b3a2154813bb42e8c6281 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 27 Feb 2018 11:22:15 +0100
Subject: [PATCH 1435/1678] cfg80211: don't require RTNL held for regdomain
 reads

The whole code is set up to allow RCU reads of this data, but
then uses rtnl_dereference() which requires the RTNL. Convert
it to rcu_dereference_rtnl() which makes it require only RCU
or the RTNL, to allow RCU-protected reading of the data.

Reviewed-by: Coelho, Luciano <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index eddc834f6358..e352a0d1c438 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -135,12 +135,12 @@ static void restore_regulatory_settings(bool reset_user);
 
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
-	return rtnl_dereference(cfg80211_regdomain);
+	return rcu_dereference_rtnl(cfg80211_regdomain);
 }
 
 const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
 {
-	return rtnl_dereference(wiphy->regd);
+	return rcu_dereference_rtnl(wiphy->regd);
 }
 
 static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region)

From e552af058148498c8a0874edf6b022caea9cb2b7 Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Wed, 28 Mar 2018 13:24:10 +0300
Subject: [PATCH 1436/1678] mac80211: limit wmm params to comply with ETSI
 requirements

ETSI has recently added new requirements that restrict the WMM
parameter values for 5GHz frequencies.  We need to take care of the
following scenarios in order to comply with these new requirements:

1. When using mac80211 default values;
2. When the userspace tries to configure its own values;
3. When associating to an AP which advertises WWM IE.

When associating to an AP, the client uses the values in the
advertised WMM IE.  But the AP may not comply with the new ETSI
requirements, so the client needs to check the current regulatory
rules and use those limits accordingly.

Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         |  3 +++
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/mlme.c        |  1 +
 net/mac80211/util.c        | 44 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5c4b105ca398..36d128ebbac8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -4,6 +4,7 @@
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This file is GPLv2 as found in COPYING.
  */
@@ -2156,6 +2157,8 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 	 */
 	p.uapsd = false;
 
+	ieee80211_regulatory_limit_wmm_params(sdata, &p, params->ac);
+
 	sdata->tx_conf[params->ac] = p;
 	if (drv_conf_tx(local, sdata, params->ac, &p)) {
 		wiphy_debug(local->hw.wiphy,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6c341d89448c..5b3d78362fb5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -4,6 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1869,6 +1870,9 @@ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
 int ieee80211_frame_duration(enum nl80211_band band, size_t len,
 			     int rate, int erp, int short_preamble,
 			     int shift);
+void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
+					   struct ieee80211_tx_queue_params *qparam,
+					   int ac);
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
 			       bool bss_notify, bool enable_qos);
 void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ea3ba03fb952..a3bcf953d423 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1792,6 +1792,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				   params[ac].cw_min, params[ac].cw_max, aci);
 			return false;
 		}
+		ieee80211_regulatory_limit_wmm_params(sdata, &params[ac], ac);
 	}
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 55cd2922627a..11f9cfc016d9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -5,6 +5,7 @@
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017	Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1113,6 +1114,48 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 	return crc;
 }
 
+void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
+					   struct ieee80211_tx_queue_params
+					   *qparam, int ac)
+{
+	struct ieee80211_chanctx_conf *chanctx_conf;
+	const struct ieee80211_reg_rule *rrule;
+	struct ieee80211_wmm_ac *wmm_ac;
+	u16 center_freq = 0;
+
+	if (sdata->vif.type != NL80211_IFTYPE_AP &&
+	    sdata->vif.type != NL80211_IFTYPE_STATION)
+		return;
+
+	rcu_read_lock();
+	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+	if (chanctx_conf)
+		center_freq = chanctx_conf->def.chan->center_freq;
+
+	if (!center_freq) {
+		rcu_read_unlock();
+		return;
+	}
+
+	rrule = freq_reg_info(sdata->wdev.wiphy, MHZ_TO_KHZ(center_freq));
+
+	if (IS_ERR_OR_NULL(rrule) || !rrule->wmm_rule) {
+		rcu_read_unlock();
+		return;
+	}
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
+		wmm_ac = &rrule->wmm_rule->ap[ac];
+	else
+		wmm_ac = &rrule->wmm_rule->client[ac];
+	qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min);
+	qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max);
+	qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn);
+	qparam->txop = !qparam->txop ? wmm_ac->cot / 32 :
+		min_t(u16, qparam->txop, wmm_ac->cot / 32);
+	rcu_read_unlock();
+}
+
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
 			       bool bss_notify, bool enable_qos)
 {
@@ -1206,6 +1249,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
 				break;
 			}
 		}
+		ieee80211_regulatory_limit_wmm_params(sdata, &qparam, ac);
 
 		qparam.uapsd = false;
 

From 19d3577e35e0cbb42694811b096e749a0f89a824 Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Wed, 28 Mar 2018 13:24:11 +0300
Subject: [PATCH 1437/1678] cfg80211: Add API to allow querying regdb for
 wmm_rule

In general regulatory self managed devices maintain their own
regulatory profiles thus it doesn't have to query the regulatory database
on country change.

ETSI has recently introduced a new channel access mechanism for 5GHz
that all wlan devices need to comply with.
These values are stored in the regulatory database.
There are self managed devices which can't maintain these
values on their own. Add API to allow self managed regulatory devices
to query the regulatory database for high band wmm rule.

Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
[johannes: fix documentation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 28 ++++++++++++++++++++++
 net/wireless/reg.c     | 54 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4341508bc6a4..bfe174896fcf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6,6 +6,7 @@
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014 Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -4657,6 +4658,33 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
  */
 const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
 
+/**
+ * DOC: Internal regulatory db functions
+ *
+ */
+
+/**
+ * reg_query_regdb_wmm -  Query internal regulatory db for wmm rule
+ * Regulatory self-managed driver can use it to proactively
+ *
+ * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried.
+ * @freq: the freqency(in MHz) to be queried.
+ * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if
+ *	irrelevant). This can be used later for deduplication.
+ * @rule: pointer to store the wmm rule from the regulatory db.
+ *
+ * Self-managed wireless drivers can use this function to  query
+ * the internal regulatory database to check whether the given
+ * ISO/IEC 3166 alpha2 country and freq have wmm rule limitations.
+ *
+ * Drivers should check the return value, its possible you can get
+ * an -ENODATA.
+ *
+ * Return: 0 on success. -ENODATA.
+ */
+int reg_query_regdb_wmm(char *alpha2, int freq, u32 *ptr,
+			struct ieee80211_wmm_rule *rule);
+
 /*
  * callbacks for asynchronous cfg80211 methods, notification
  * functions and BSS handling helpers
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index e352a0d1c438..16c7e4ef5820 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -878,6 +878,60 @@ static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
 	}
 }
 
+static int __regdb_query_wmm(const struct fwdb_header *db,
+			     const struct fwdb_country *country, int freq,
+			     u32 *dbptr, struct ieee80211_wmm_rule *rule)
+{
+	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
+	int i;
+
+	for (i = 0; i < coll->n_rules; i++) {
+		__be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+		unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
+		struct fwdb_rule *rrule = (void *)((u8 *)db + rule_ptr);
+		struct fwdb_wmm_rule *wmm;
+		unsigned int wmm_ptr;
+
+		if (rrule->len < offsetofend(struct fwdb_rule, wmm_ptr))
+			continue;
+
+		if (freq >= KHZ_TO_MHZ(be32_to_cpu(rrule->start)) &&
+		    freq <= KHZ_TO_MHZ(be32_to_cpu(rrule->end))) {
+			wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2;
+			wmm = (void *)((u8 *)db + wmm_ptr);
+			set_wmm_rule(rule, wmm);
+			if (dbptr)
+				*dbptr = wmm_ptr;
+			return 0;
+		}
+	}
+
+	return -ENODATA;
+}
+
+int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
+			struct ieee80211_wmm_rule *rule)
+{
+	const struct fwdb_header *hdr = regdb;
+	const struct fwdb_country *country;
+
+	if (IS_ERR(regdb))
+		return PTR_ERR(regdb);
+
+	country = &hdr->country[0];
+	while (country->coll_ptr) {
+		if (alpha2_equal(alpha2, country->alpha2))
+			return __regdb_query_wmm(regdb, country, freq, dbptr,
+						 rule);
+
+		country++;
+	}
+
+	return -ENODATA;
+}
+EXPORT_SYMBOL(reg_query_regdb_wmm);
+
 struct wmm_ptrs {
 	struct ieee80211_wmm_rule *rule;
 	u32 ptr;

From db3bdcb9c3ffc628c5284d7ed03a704295ba1214 Mon Sep 17 00:00:00 2001
From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Date: Wed, 28 Mar 2018 18:34:19 +0530
Subject: [PATCH 1438/1678] mac80211: allow AP_VLAN operation on crypto
 controlled devices

In the current implementation, mac80211 advertises the support of
AP_VLANs based on the driver's support for AP mode; it also
blocks encrypted AP_VLAN operation on devices advertising
SW_CRYPTO_CONTROL.

The implementation seems weird in it's current form and could be
often confusing, this is because there can be drivers advertising
both SW_CRYPTO_CONTROL and AP mode support (ex: ath10k) in which case
AP_VLAN will still be supported but only in open BSS and not in
secured BSS.

When SW_CRYPTO_CONTROL is enabled, it makes more sense if the decision
to support AP_VLANs is left to the driver. Mac80211 can then allow
AP_VLAN operations depending on the driver support.

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/key.c  | 8 +++++---
 net/mac80211/main.c | 8 ++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index aee05ec3f7ea..ee0d0cc8dc3b 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -126,7 +126,7 @@ static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
 
 static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 {
-	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_sub_if_data *sdata = key->sdata;
 	struct sta_info *sta;
 	int ret = -EOPNOTSUPP;
 
@@ -162,7 +162,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	if (sta && !sta->uploaded)
 		goto out_unsupported;
 
-	sdata = key->sdata;
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
 		/*
 		 * The driver doesn't know anything about VLAN interfaces.
@@ -214,8 +213,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		/* all of these we can do in software - if driver can */
 		if (ret == 1)
 			return 0;
-		if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
+		if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
+			if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+				return 0;
 			return -EINVAL;
+		}
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0785d04a80bc..8d0333b5355b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -930,8 +930,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 			             IEEE80211_HT_CAP_SM_PS_SHIFT;
 	}
 
-	/* if low-level driver supports AP, we also support VLAN */
-	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) {
+	/* if low-level driver supports AP, we also support VLAN.
+	 * drivers advertising SW_CRYPTO_CONTROL should enable AP_VLAN
+	 * based on their support to transmit SW encrypted packets.
+	 */
+	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP) &&
+	    !ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL)) {
 		hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
 		hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_AP_VLAN);
 	}

From 4d191c75365a0067a9d5b8c8746b1bd9310c5a70 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 29 Mar 2018 11:14:30 +0200
Subject: [PATCH 1439/1678] mac80211: remove shadowing duplicated variable

We already have 'ifmgd' here, and it's already assigned
to the same value, so remove the duplicate.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a3bcf953d423..d2bc52046729 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2011,8 +2011,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	/* deauthenticate/disassociate now */
 	if (tx || frame_buf) {
-		struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-
 		/*
 		 * In multi channel scenarios guarantee that the virtual
 		 * interface is granted immediate airtime to transmit the

From 6a671a50f8199b3e1fe49fa8afff0fc8335da79c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:41 -0500
Subject: [PATCH 1440/1678] nl80211: Add CMD_CONTROL_PORT_FRAME API

This commit also adds cfg80211_rx_control_port function.  This is used
to generate a CMD_CONTROL_PORT_FRAME event out to userspace.  The
conn_owner_nlportid is used as the unicast destination.  This means that
userspace must specify NL80211_ATTR_SOCKET_OWNER flag if control port
over nl80211 routing is requested in NL80211_CMD_CONNECT,
NL80211_CMD_ASSOCIATE, NL80211_CMD_START_AP or IBSS/mesh join.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[johannes: fix return value of cfg80211_rx_control_port()]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 22 ++++++++++++++
 include/uapi/linux/nl80211.h | 13 ++++++++
 net/wireless/nl80211.c       | 58 ++++++++++++++++++++++++++++++++++++
 net/wireless/trace.h         | 21 +++++++++++++
 4 files changed, 114 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bfe174896fcf..df145f76adad 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5721,6 +5721,28 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 			     const u8 *buf, size_t len, bool ack, gfp_t gfp);
 
 
+/**
+ * cfg80211_rx_control_port - notification about a received control port frame
+ * @dev: The device the frame matched to
+ * @buf: control port frame
+ * @len: length of the frame data
+ * @addr: The peer from which the frame was received
+ * @proto: frame protocol, typically PAE or Pre-authentication
+ * @unencrypted: Whether the frame was received unencrypted
+ *
+ * This function is used to inform userspace about a received control port
+ * frame.  It should only be used if userspace indicated it wants to receive
+ * control port frames over nl80211.
+ *
+ * The frame is the data portion of the 802.3 or 802.11 data frame with all
+ * network layer headers removed (e.g. the raw EAPoL frame).
+ *
+ * Return: %true if the frame was passed to userspace
+ */
+bool cfg80211_rx_control_port(struct net_device *dev,
+			      const u8 *buf, size_t len,
+			      const u8 *addr, u16 proto, bool unencrypted);
+
 /**
  * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
  * @dev: network device
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9ea3d6039eca..6a3cc7a635b5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -990,6 +990,17 @@
  *	&NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed
  *	&NL80211_CMD_DISCONNECT should be indicated instead.
  *
+ * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request
+ *	and RX notification.  This command is used both as a request to transmit
+ *	a control port frame and as a notification that a control port frame
+ *	has been received. %NL80211_ATTR_FRAME is used to specify the
+ *	frame contents.  The frame is the raw EAPoL data, without ethernet or
+ *	802.11 headers.
+ *	When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added
+ *	indicating the protocol type of the received frame; whether the frame
+ *	was received unencrypted and the MAC address of the peer respectively.
+ *
  * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
  *
  * @NL80211_CMD_EXTERNAL_AUTH: This interface is exclusively defined for host
@@ -1228,6 +1239,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_STA_OPMODE_CHANGED,
 
+	NL80211_CMD_CONTROL_PORT_FRAME,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cfaf2aeb9783..0870447fbd55 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14553,6 +14553,64 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 }
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
 
+static int __nl80211_rx_control_port(struct net_device *dev,
+				     const u8 *buf, size_t len,
+				     const u8 *addr, u16 proto,
+				     bool unencrypted, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid);
+
+	if (!nlportid)
+		return -ENOENT;
+
+	msg = nlmsg_new(100 + len, gfp);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONTROL_PORT_FRAME);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return -ENOBUFS;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD) ||
+	    nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
+	    nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) ||
+	    (unencrypted && nla_put_flag(msg,
+					 NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT)))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
+
+ nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+bool cfg80211_rx_control_port(struct net_device *dev,
+			      const u8 *buf, size_t len,
+			      const u8 *addr, u16 proto, bool unencrypted)
+{
+	int ret;
+
+	trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted);
+	ret = __nl80211_rx_control_port(dev, buf, len, addr, proto,
+					unencrypted, GFP_ATOMIC);
+	trace_cfg80211_return_bool(ret == 0);
+	return ret == 0;
+}
+EXPORT_SYMBOL(cfg80211_rx_control_port);
+
 static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev,
 					    const char *mac, gfp_t gfp)
 {
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5152938b358d..42fd338f879e 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2600,6 +2600,27 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
 		  WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
 );
 
+TRACE_EVENT(cfg80211_rx_control_port,
+	TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len,
+		 const u8 *addr, u16 proto, bool unencrypted),
+	TP_ARGS(netdev, buf, len, addr, proto, unencrypted),
+	TP_STRUCT__entry(
+		NETDEV_ENTRY
+		MAC_ENTRY(addr)
+		__field(u16, proto)
+		__field(bool, unencrypted)
+	),
+	TP_fast_assign(
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(addr, addr);
+		__entry->proto = proto;
+		__entry->unencrypted = unencrypted;
+	),
+	TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s",
+		  NETDEV_PR_ARG, MAC_PR_ARG(addr),
+		  __entry->proto, BOOL_TO_STR(__entry->unencrypted))
+);
+
 TRACE_EVENT(cfg80211_cqm_rssi_notify,
 	TP_PROTO(struct net_device *netdev,
 		 enum nl80211_cqm_rssi_threshold_event rssi_event,

From 2576a9ace47eba28a682d249d1d6402f891808c9 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:42 -0500
Subject: [PATCH 1441/1678] nl80211: Implement TX of control port frames

This commit implements the TX side of NL80211_CMD_CONTROL_PORT_FRAME.
Userspace provides the raw EAPoL frame using NL80211_ATTR_FRAME.
Userspace should also provide the destination address and the protocol
type to use when sending the frame.  This is used to implement TX of
Pre-authentication frames.  If CONTROL_PORT_ETHERTYPE_NO_ENCRYPT is
specified, then the driver will be asked not to encrypt the outgoing
frame.

A new EXT_FEATURE flag is introduced so that nl80211 code can check
whether a given wiphy has capability to pass EAPoL frames over nl80211.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  9 +++++
 include/uapi/linux/nl80211.h |  3 ++
 net/wireless/nl80211.c       | 71 +++++++++++++++++++++++++++++++++++-
 net/wireless/rdev-ops.h      | 15 ++++++++
 net/wireless/trace.h         | 26 +++++++++++++
 5 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index df145f76adad..de2894a4ad10 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2961,6 +2961,9 @@ struct cfg80211_external_auth_params {
  *
  * @external_auth: indicates result of offloaded authentication processing from
  *     user space
+ *
+ * @tx_control_port: TX a control port frame (EAPoL).  The noencrypt parameter
+ *	tells the driver that the frame should not be encrypted.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3256,6 +3259,12 @@ struct cfg80211_ops {
 			   const u8 *aa);
 	int     (*external_auth)(struct wiphy *wiphy, struct net_device *dev,
 				 struct cfg80211_external_auth_params *params);
+
+	int	(*tx_control_port)(struct wiphy *wiphy,
+				   struct net_device *dev,
+				   const u8 *buf, size_t len,
+				   const u8 *dest, const __be16 proto,
+				   const bool noencrypt);
 };
 
 /*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 6a3cc7a635b5..3167d6f7fc68 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5024,6 +5024,8 @@ enum nl80211_feature_flags {
  *	channel change triggered by radar detection event.
  *	No need to start CAC from user-space, no need to react to
  *	"radar detected" event.
+ * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211: Driver supports sending and
+ *	receiving control port frames over nl80211 instead of the netdevice.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5055,6 +5057,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_LOW_POWER_SCAN,
 	NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN,
 	NL80211_EXT_FEATURE_DFS_OFFLOAD,
+	NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0870447fbd55..6eb286784924 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12535,6 +12535,68 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
 	return rdev_external_auth(rdev, dev, &params);
 }
 
+static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *buf;
+	size_t len;
+	u8 *dest;
+	u16 proto;
+	bool noencrypt;
+	int err;
+
+	if (!wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211))
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->tx_control_port)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_FRAME] ||
+	    !info->attrs[NL80211_ATTR_MAC] ||
+	    !info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
+		GENL_SET_ERR_MSG(info, "Frame, MAC or ethertype missing");
+		return -EINVAL;
+	}
+
+	wdev_lock(wdev);
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_MESH_POINT:
+		break;
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		if (wdev->current_bss)
+			break;
+		err = -ENOTCONN;
+		goto out;
+	default:
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	wdev_unlock(wdev);
+
+	buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+	len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+	dest = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	proto = nla_get_u16(info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
+	noencrypt =
+		nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]);
+
+	return rdev_tx_control_port(rdev, dev, buf, len,
+				    dest, cpu_to_be16(proto), noencrypt);
+
+ out:
+	wdev_unlock(wdev);
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -13438,7 +13500,14 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
-
+	{
+		.cmd = NL80211_CMD_CONTROL_PORT_FRAME,
+		.doit = nl80211_tx_control_port,
+		.policy = nl80211_policy,
+		.flags = GENL_UNS_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_family nl80211_fam __ro_after_init = {
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 84f23ae015fc..87479a53411b 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -714,6 +714,21 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev,
+				       struct net_device *dev,
+				       const void *buf, size_t len,
+				       const u8 *dest, __be16 proto,
+				       const bool noencrypt)
+{
+	int ret;
+	trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len,
+				   dest, proto, noencrypt);
+	ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len,
+					 dest, proto, noencrypt);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 static inline int
 rdev_mgmt_tx_cancel_wait(struct cfg80211_registered_device *rdev,
 			 struct wireless_dev *wdev, u64 cookie)
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 42fd338f879e..a64291ae52a6 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1882,6 +1882,32 @@ TRACE_EVENT(rdev_mgmt_tx,
 		  BOOL_TO_STR(__entry->dont_wait_for_ack))
 );
 
+TRACE_EVENT(rdev_tx_control_port,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 const u8 *buf, size_t len, const u8 *dest, __be16 proto,
+		 bool unencrypted),
+	TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(dest)
+		__field(__be16, proto)
+		__field(bool, unencrypted)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(dest, dest);
+		__entry->proto = proto;
+		__entry->unencrypted = unencrypted;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ","
+		  " proto: 0x%x, unencrypted: %s",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest),
+		  be16_to_cpu(__entry->proto),
+		  BOOL_TO_STR(__entry->unencrypted))
+);
+
 TRACE_EVENT(rdev_set_noack_map,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
 		 u16 noack_map),

From 64bf3d4bc2b0725b3c5ffadd982a9746bfc738b7 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:43 -0500
Subject: [PATCH 1442/1678] nl80211: Add CONTROL_PORT_OVER_NL80211 attribute

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h | 14 +++++++++++++-
 net/wireless/nl80211.c       | 26 ++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index de2894a4ad10..0bd957b37208 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -647,6 +647,8 @@ struct survey_info {
  *	allowed through even on unauthorized ports
  * @control_port_no_encrypt: TRUE to prevent encryption of control port
  *	protocol frames.
+ * @control_port_over_nl80211: TRUE if userspace expects to exchange control
+ *	port frames over NL80211 instead of the network interface.
  * @wep_keys: static WEP keys, if not NULL points to an array of
  *	CFG80211_MAX_WEP_KEYS WEP keys
  * @wep_tx_key: key index (0..3) of the default TX static WEP key
@@ -662,6 +664,7 @@ struct cfg80211_crypto_settings {
 	bool control_port;
 	__be16 control_port_ethertype;
 	bool control_port_no_encrypt;
+	bool control_port_over_nl80211;
 	struct key_params *wep_keys;
 	int wep_tx_key;
 	const u8 *psk;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 3167d6f7fc68..15daf5e2638d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -542,7 +542,8 @@
  *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP,
  *	%NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
  *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
- *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, %NL80211_ATTR_MAC_HINT, and
+ *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
+ *	%NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and
  *	%NL80211_ATTR_WIPHY_FREQ_HINT.
  *	If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are
  *	restrictions on BSS selection, i.e., they effectively prevent roaming
@@ -1488,6 +1489,15 @@ enum nl80211_commands {
  * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with
  *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom
  *	ethertype frames used for key negotiation must not be encrypted.
+ * @NL80211_ATTR_CONTROL_PORT_OVER_NL80211: A flag indicating whether control
+ *	port frames (e.g. of type given in %NL80211_ATTR_CONTROL_PORT_ETHERTYPE)
+ *	will be sent directly to the network interface or sent via the NL80211
+ *	socket.  If this attribute is missing, then legacy behavior of sending
+ *	control port frames directly to the network interface is used.  If the
+ *	flag is included, then control port frames are sent over NL80211 instead
+ *	using %CMD_CONTROL_PORT_FRAME.  If control port routing over NL80211 is
+ *	to be used then userspace must also use the %NL80211_ATTR_SOCKET_OWNER
+ *	flag.
  *
  * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
  *	We recommend using nested, driver-specific attributes within this.
@@ -2647,6 +2657,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_NSS,
 	NL80211_ATTR_ACK_SIGNAL,
 
+	NL80211_ATTR_CONTROL_PORT_OVER_NL80211,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6eb286784924..d3b14d9d002a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -287,6 +287,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
 	[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -8211,6 +8212,22 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int validate_pae_over_nl80211(struct cfg80211_registered_device *rdev,
+				     struct genl_info *info)
+{
+	if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+		GENL_SET_ERR_MSG(info, "SOCKET_OWNER not set");
+		return -EINVAL;
+	}
+
+	if (!rdev->ops->tx_control_port ||
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 				   struct genl_info *info,
 				   struct cfg80211_crypto_settings *settings,
@@ -8234,6 +8251,15 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 	} else
 		settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
 
+	if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
+		int r = validate_pae_over_nl80211(rdev, info);
+
+		if (r < 0)
+			return r;
+
+		settings->control_port_over_nl80211 = true;
+	}
+
 	if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
 		void *data;
 		int len, i;

From c3bfe1f6fc98e7185ff5ee9279ba259fe484597c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:48 -0500
Subject: [PATCH 1443/1678] nl80211: Add control_port_over_nl80211 for ibss

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 +++
 net/wireless/nl80211.c | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0bd957b37208..ed2773f8558e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2034,6 +2034,8 @@ struct cfg80211_disassoc_request {
  *	sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
  *	required to assume that the port is unauthorized until authorized by
  *	user space. Otherwise, port is marked authorized by default.
+ * @control_port_over_nl80211: TRUE if userspace expects to exchange control
+ *	port frames over NL80211 instead of the network interface.
  * @userspace_handles_dfs: whether user space controls DFS operation, i.e.
  *	changes the channel when a radar is detected. This is required
  *	to operate on DFS channels.
@@ -2057,6 +2059,7 @@ struct cfg80211_ibss_params {
 	bool channel_fixed;
 	bool privacy;
 	bool control_port;
+	bool control_port_over_nl80211;
 	bool userspace_handles_dfs;
 	int mcast_rate[NUM_NL80211_BANDS];
 	struct ieee80211_ht_cap ht_capa;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d3b14d9d002a..f8e10408f2b3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8705,6 +8705,15 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	ibss.control_port =
 		nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT]);
 
+	if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
+		int r = validate_pae_over_nl80211(rdev, info);
+
+		if (r < 0)
+			return r;
+
+		ibss.control_port_over_nl80211 = true;
+	}
+
 	ibss.userspace_handles_dfs =
 		nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
 

From 1224f5831a22977f30c1842874be12c58608cee7 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:49 -0500
Subject: [PATCH 1444/1678] nl80211: Add control_port_over_nl80211 to
 mesh_setup

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 +++
 net/wireless/nl80211.c | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ed2773f8558e..250dac390806 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1454,6 +1454,8 @@ struct mesh_config {
  * @userspace_handles_dfs: whether user space controls DFS operation, i.e.
  *	changes the channel when a radar is detected. This is required
  *	to operate on DFS channels.
+ * @control_port_over_nl80211: TRUE if userspace expects to exchange control
+ *	port frames over NL80211 instead of the network interface.
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -1476,6 +1478,7 @@ struct mesh_setup {
 	u32 basic_rates;
 	struct cfg80211_bitrate_mask beacon_rate;
 	bool userspace_handles_dfs;
+	bool control_port_over_nl80211;
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f8e10408f2b3..ff28f8feeb09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10168,6 +10168,15 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	setup.userspace_handles_dfs =
 		nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]);
 
+	if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) {
+		int r = validate_pae_over_nl80211(rdev, info);
+
+		if (r < 0)
+			return r;
+
+		setup.control_port_over_nl80211 = true;
+	}
+
 	wdev_lock(dev->ieee80211_ptr);
 	err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg);
 	if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER])

From 911806491425d79107cadddbde11b42bbdfe38c8 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:50 -0500
Subject: [PATCH 1445/1678] mac80211: Add support for tx_control_port

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         |  1 +
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/tx.c          | 46 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 36d128ebbac8..f6b8d59a7ee8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3791,4 +3791,5 @@ const struct cfg80211_ops mac80211_config_ops = {
 	.add_nan_func = ieee80211_add_nan_func,
 	.del_nan_func = ieee80211_del_nan_func,
 	.set_multicast_to_unicast = ieee80211_set_multicast_to_unicast,
+	.tx_control_port = ieee80211_tx_control_port,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5b3d78362fb5..275d6269b4a7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1735,6 +1735,9 @@ void ieee80211_check_fast_xmit(struct sta_info *sta);
 void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
 void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
 void ieee80211_clear_fast_xmit(struct sta_info *sta);
+int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
+			      const u8 *buf, size_t len,
+			      const u8 *dest, __be16 proto, bool unencrypted);
 
 /* HT */
 void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 933c67b5f845..535de3161a78 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4757,3 +4757,49 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 	ieee80211_xmit(sdata, NULL, skb);
 	local_bh_enable();
 }
+
+int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
+			      const u8 *buf, size_t len,
+			      const u8 *dest, __be16 proto, bool unencrypted)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ethhdr *ehdr;
+	u32 flags;
+
+	/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
+	 * or Pre-Authentication
+	 */
+	if (proto != sdata->control_port_protocol &&
+	    proto != cpu_to_be16(ETH_P_PREAUTH))
+		return -EINVAL;
+
+	if (unencrypted)
+		flags = IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	else
+		flags = 0;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+			    sizeof(struct ethhdr) + len);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, local->hw.extra_tx_headroom + sizeof(struct ethhdr));
+
+	skb_put_data(skb, buf, len);
+
+	ehdr = skb_push(skb, sizeof(struct ethhdr));
+	memcpy(ehdr->h_dest, dest, ETH_ALEN);
+	memcpy(ehdr->h_source, sdata->vif.addr, ETH_ALEN);
+	ehdr->h_proto = proto;
+
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_802_3);
+	skb_reset_network_header(skb);
+	skb_reset_mac_header(skb);
+
+	__ieee80211_subif_start_xmit(skb, skb->dev, flags);
+
+	return 0;
+}

From 018f6fbf540d7bd7223b7d0b29651c1dd5e1c606 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 26 Mar 2018 12:52:51 -0500
Subject: [PATCH 1446/1678] mac80211: Send control port frames over nl80211

If userspace requested control port frames to go over 80211, then do so.
The control packets are intercepted just prior to delivery of the packet
to the underlying network device.

Pre-authentication type frames (protocol: 0x88c7) are also forwarded
over nl80211.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         |  6 ++++++
 net/mac80211/ibss.c        |  1 +
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/iface.c       |  2 ++
 net/mac80211/main.c        |  2 ++
 net/mac80211/mlme.c        |  2 ++
 net/mac80211/rx.c          | 33 ++++++++++++++++++++++++++++-----
 7 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f6b8d59a7ee8..85dbaa891059 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -926,6 +926,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	 */
 	sdata->control_port_protocol = params->crypto.control_port_ethertype;
 	sdata->control_port_no_encrypt = params->crypto.control_port_no_encrypt;
+	sdata->control_port_over_nl80211 =
+				params->crypto.control_port_over_nl80211;
 	sdata->encrypt_headroom = ieee80211_cs_headroom(sdata->local,
 							&params->crypto,
 							sdata->vif.type);
@@ -935,6 +937,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 			params->crypto.control_port_ethertype;
 		vlan->control_port_no_encrypt =
 			params->crypto.control_port_no_encrypt;
+		vlan->control_port_over_nl80211 =
+			params->crypto.control_port_over_nl80211;
 		vlan->encrypt_headroom =
 			ieee80211_cs_headroom(sdata->local,
 					      &params->crypto,
@@ -2020,6 +2024,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
 	if (err)
 		return err;
 
+	sdata->control_port_over_nl80211 = setup->control_port_over_nl80211;
+
 	/* can mesh use other SMPS modes? */
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 	sdata->needed_rx_chains = sdata->local->rx_chains;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index dc582aa35c89..6449a1c2283b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1844,6 +1844,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 	sdata->needed_rx_chains = local->rx_chains;
+	sdata->control_port_over_nl80211 = params->control_port_over_nl80211;
 
 	ieee80211_queue_work(&local->hw, &sdata->work);
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 275d6269b4a7..6372dbdadf53 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -900,6 +900,7 @@ struct ieee80211_sub_if_data {
 	u16 sequence_number;
 	__be16 control_port_protocol;
 	bool control_port_no_encrypt;
+	bool control_port_over_nl80211;
 	int encrypt_headroom;
 
 	atomic_t num_tx_queued;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d13ba064951f..555e389b7dfa 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -519,6 +519,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 			master->control_port_protocol;
 		sdata->control_port_no_encrypt =
 			master->control_port_no_encrypt;
+		sdata->control_port_over_nl80211 =
+			master->control_port_over_nl80211;
 		sdata->vif.cab_queue = master->vif.cab_queue;
 		memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
 		       sizeof(sdata->vif.hw_queue));
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8d0333b5355b..9ea17afaa237 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -554,6 +554,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 			   NL80211_FEATURE_USERSPACE_MPM |
 			   NL80211_FEATURE_FULL_AP_CLIENT_STATE;
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA);
+	wiphy_ext_feature_set(wiphy,
+			      NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
 
 	if (!ops->hw_scan)
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d2bc52046729..20d2b186d740 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4855,6 +4855,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
 	sdata->control_port_protocol = req->crypto.control_port_ethertype;
 	sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
+	sdata->control_port_over_nl80211 =
+					req->crypto.control_port_over_nl80211;
 	sdata->encrypt_headroom = ieee80211_cs_headroom(local, &req->crypto,
 							sdata->vif.type);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3a9f0c0a2de8..03102aff0953 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2245,6 +2245,32 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
 	return true;
 }
 
+static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
+						 struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_sub_if_data *sdata = rx->sdata;
+	struct net_device *dev = sdata->dev;
+
+	if (unlikely((skb->protocol == sdata->control_port_protocol ||
+		      skb->protocol == cpu_to_be16(ETH_P_PREAUTH)) &&
+		     sdata->control_port_over_nl80211)) {
+		struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+		bool noencrypt = status->flag & RX_FLAG_DECRYPTED;
+		struct ethhdr *ehdr = eth_hdr(skb);
+
+		cfg80211_rx_control_port(dev, skb->data, skb->len,
+					 ehdr->h_source,
+					 be16_to_cpu(skb->protocol), noencrypt);
+		dev_kfree_skb(skb);
+	} else {
+		/* deliver to local stack */
+		if (rx->napi)
+			napi_gro_receive(rx->napi, skb);
+		else
+			netif_receive_skb(skb);
+	}
+}
+
 /*
  * requires that rx->skb is a frame with ethernet header
  */
@@ -2329,13 +2355,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 #endif
 
 	if (skb) {
-		/* deliver to local stack */
 		skb->protocol = eth_type_trans(skb, dev);
 		memset(skb->cb, 0, sizeof(skb->cb));
-		if (rx->napi)
-			napi_gro_receive(rx->napi, skb);
-		else
-			netif_receive_skb(skb);
+
+		ieee80211_deliver_skb_to_local_stack(skb, rx);
 	}
 
 	if (xmit_skb) {

From c470bdc1aaf36669e04ba65faf1092b2d1c6cabe Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 26 Mar 2018 16:21:04 +0300
Subject: [PATCH 1447/1678] mac80211: don't WARN on bad WMM parameters from
 buggy APs

Apparently, some APs are buggy enough to send a zeroed
WMM IE. Don't WARN on this since this is not caused by a bug
on the client's system.

This aligns the condition of the WARNING in drv_conf_tx
with the validity check in ieee80211_sta_wmm_params.
We will now pick the default values whenever we get
a zeroed WMM IE.

This has been reported here:
https://bugzilla.kernel.org/show_bug.cgi?id=199161

Fixes: f409079bb678 ("mac80211: sanity check CW_min/CW_max towards driver")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 20d2b186d740..07b58d20e89c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1786,7 +1786,8 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		params[ac].acm = acm;
 		params[ac].uapsd = uapsd;
 
-		if (params[ac].cw_min > params[ac].cw_max) {
+		if (params->cw_min == 0 ||
+		    params[ac].cw_min > params[ac].cw_max) {
 			sdata_info(sdata,
 				   "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
 				   params[ac].cw_min, params[ac].cw_max, aci);

From 86674a97f5055f4c7f406563408096e8cf9364ff Mon Sep 17 00:00:00 2001
From: Harry Morris <h.morris@cascoda.com>
Date: Wed, 28 Mar 2018 11:54:27 +0100
Subject: [PATCH 1448/1678] ieee802154: ca8210: fix uninitialised data read

In ca8210_test_int_user_write() a user can request the transfer of a
frame with a length field (command.length) that is longer than the
actual buffer provided (len). In this scenario the driver will copy
the buffer contents into the uninitialised command[] buffer, then
transfer <data.length> bytes over the SPI even though only <len> bytes
had been populated, potentially leaking sensitive kernel memory.

Also the first 6 bytes of the command buffer must be initialised in case
a malformed, short packet is written and the uninitialised bytes are
read in ca8210_test_check_upstream.

Reported-by: Domen Puncer Kugler <domen.puncer@samsung.com>
Signed-off-by: Harry Morris <h.morris@cascoda.com>
Tested-by: Harry Morris <h.morris@cascoda.com>
Signed-off-by: Stefan Schmidt <stefan@osg.samsung.com>
---
 drivers/net/ieee802154/ca8210.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 377af43b81b3..58299fb666ed 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -2493,13 +2493,14 @@ static ssize_t ca8210_test_int_user_write(
 	struct ca8210_priv *priv = filp->private_data;
 	u8 command[CA8210_SPI_BUF_SIZE];
 
-	if (len > CA8210_SPI_BUF_SIZE) {
+	memset(command, SPI_IDLE, 6);
+	if (len > CA8210_SPI_BUF_SIZE || len < 2) {
 		dev_warn(
 			&priv->spi->dev,
-			"userspace requested erroneously long write (%zu)\n",
+			"userspace requested erroneous write length (%zu)\n",
 			len
 		);
-		return -EMSGSIZE;
+		return -EBADE;
 	}
 
 	ret = copy_from_user(command, in_buf, len);
@@ -2511,6 +2512,13 @@ static ssize_t ca8210_test_int_user_write(
 		);
 		return -EIO;
 	}
+	if (len != command[1] + 2) {
+		dev_err(
+			&priv->spi->dev,
+			"write len does not match packet length field\n"
+		);
+		return -EBADE;
+	}
 
 	ret = ca8210_test_check_upstream(command, priv->spi);
 	if (ret == 0) {

From 75530a78de21267f0dc76fbc67d2253e89233d54 Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Tue, 27 Mar 2018 14:43:14 -0700
Subject: [PATCH 1449/1678] dt-bindings: net: Add bindings for National
 Instruments XGE netdev

This adds bindings for the NI XGE 1G/10G network device.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/nixge.txt         | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/nixge.txt

diff --git a/Documentation/devicetree/bindings/net/nixge.txt b/Documentation/devicetree/bindings/net/nixge.txt
new file mode 100644
index 000000000000..e55af7f0881a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nixge.txt
@@ -0,0 +1,32 @@
+* NI XGE Ethernet controller
+
+Required properties:
+- compatible: Should be "ni,xge-enet-2.00"
+- reg: Address and length of the register set for the device
+- interrupts: Should contain tx and rx interrupt
+- interrupt-names: Should be "rx" and "tx"
+- phy-mode: See ethernet.txt file in the same directory.
+- phy-handle: See ethernet.txt file in the same directory.
+- nvmem-cells: Phandle of nvmem cell containing the MAC address
+- nvmem-cell-names: Should be "address"
+
+Examples (10G generic PHY):
+	nixge0: ethernet@40000000 {
+		compatible = "ni,xge-enet-2.00";
+		reg = <0x40000000 0x6000>;
+
+		nvmem-cells = <&eth1_addr>;
+		nvmem-cell-names = "address";
+
+		interrupts = <0 29 IRQ_TYPE_LEVEL_HIGH>, <0 30 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "rx", "tx";
+		interrupt-parent = <&intc>;
+
+		phy-mode = "xgmii";
+		phy-handle = <&ethernet_phy1>;
+
+		ethernet_phy1: ethernet-phy@4 {
+			compatible = "ethernet-phy-ieee802.3-c45";
+			reg = <4>;
+		};
+	};

From 492caffa8a1a405f661c111acabfe6b8b9645db8 Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Tue, 27 Mar 2018 14:43:15 -0700
Subject: [PATCH 1450/1678] net: ethernet: nixge: Add support for National
 Instruments XGE netdev

Add support for the National Instruments XGE 1/10G network device.

It uses the EEPROM on the board via NVMEM.

Signed-off-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/Kconfig     |    1 +
 drivers/net/ethernet/Makefile    |    1 +
 drivers/net/ethernet/ni/Kconfig  |   27 +
 drivers/net/ethernet/ni/Makefile |    1 +
 drivers/net/ethernet/ni/nixge.c  | 1310 ++++++++++++++++++++++++++++++
 5 files changed, 1340 insertions(+)
 create mode 100644 drivers/net/ethernet/ni/Kconfig
 create mode 100644 drivers/net/ethernet/ni/Makefile
 create mode 100644 drivers/net/ethernet/ni/nixge.c

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index b6cf4b6962f5..908218561fdd 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -129,6 +129,7 @@ config FEALNX
 
 source "drivers/net/ethernet/natsemi/Kconfig"
 source "drivers/net/ethernet/netronome/Kconfig"
+source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
 
 config NET_NETX
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 3cdf01e96e0b..d732e9522b76 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/
 obj-$(CONFIG_FEALNX) += fealnx.o
 obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
+obj-$(CONFIG_NET_VENDOR_NI) += ni/
 obj-$(CONFIG_NET_NETX) += netx-eth.o
 obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/
 obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
diff --git a/drivers/net/ethernet/ni/Kconfig b/drivers/net/ethernet/ni/Kconfig
new file mode 100644
index 000000000000..aa41e5f6e437
--- /dev/null
+++ b/drivers/net/ethernet/ni/Kconfig
@@ -0,0 +1,27 @@
+#
+# National Instuments network device configuration
+#
+
+config NET_VENDOR_NI
+	bool "National Instruments Devices"
+	default y
+	help
+	  If you have a network (Ethernet) device belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about National Instrument devices.
+	  If you say Y, you will be asked for your specific device in the
+	  following questions.
+
+if NET_VENDOR_NI
+
+config NI_XGE_MANAGEMENT_ENET
+	tristate "National Instruments XGE management enet support"
+	depends on ARCH_ZYNQ
+	select PHYLIB
+	help
+	  Simple LAN device for debug or management purposes. Can
+	  support either 10G or 1G PHYs via SFP+ ports.
+
+endif
diff --git a/drivers/net/ethernet/ni/Makefile b/drivers/net/ethernet/ni/Makefile
new file mode 100644
index 000000000000..99c664651c51
--- /dev/null
+++ b/drivers/net/ethernet/ni/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NI_XGE_MANAGEMENT_ENET) += nixge.o
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
new file mode 100644
index 000000000000..27364b7572fc
--- /dev/null
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -0,0 +1,1310 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016-2017, National Instruments Corp.
+ *
+ * Author: Moritz Fischer <mdf@kernel.org>
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/skbuff.h>
+#include <linux/phy.h>
+#include <linux/mii.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/ethtool.h>
+#include <linux/iopoll.h>
+
+#define TX_BD_NUM		64
+#define RX_BD_NUM		128
+
+/* Axi DMA Register definitions */
+#define XAXIDMA_TX_CR_OFFSET	0x00 /* Channel control */
+#define XAXIDMA_TX_SR_OFFSET	0x04 /* Status */
+#define XAXIDMA_TX_CDESC_OFFSET	0x08 /* Current descriptor pointer */
+#define XAXIDMA_TX_TDESC_OFFSET	0x10 /* Tail descriptor pointer */
+
+#define XAXIDMA_RX_CR_OFFSET	0x30 /* Channel control */
+#define XAXIDMA_RX_SR_OFFSET	0x34 /* Status */
+#define XAXIDMA_RX_CDESC_OFFSET	0x38 /* Current descriptor pointer */
+#define XAXIDMA_RX_TDESC_OFFSET	0x40 /* Tail descriptor pointer */
+
+#define XAXIDMA_CR_RUNSTOP_MASK	0x1 /* Start/stop DMA channel */
+#define XAXIDMA_CR_RESET_MASK	0x4 /* Reset DMA engine */
+
+#define XAXIDMA_BD_CTRL_LENGTH_MASK	0x007FFFFF /* Requested len */
+#define XAXIDMA_BD_CTRL_TXSOF_MASK	0x08000000 /* First tx packet */
+#define XAXIDMA_BD_CTRL_TXEOF_MASK	0x04000000 /* Last tx packet */
+#define XAXIDMA_BD_CTRL_ALL_MASK	0x0C000000 /* All control bits */
+
+#define XAXIDMA_DELAY_MASK		0xFF000000 /* Delay timeout counter */
+#define XAXIDMA_COALESCE_MASK		0x00FF0000 /* Coalesce counter */
+
+#define XAXIDMA_DELAY_SHIFT		24
+#define XAXIDMA_COALESCE_SHIFT		16
+
+#define XAXIDMA_IRQ_IOC_MASK		0x00001000 /* Completion intr */
+#define XAXIDMA_IRQ_DELAY_MASK		0x00002000 /* Delay interrupt */
+#define XAXIDMA_IRQ_ERROR_MASK		0x00004000 /* Error interrupt */
+#define XAXIDMA_IRQ_ALL_MASK		0x00007000 /* All interrupts */
+
+/* Default TX/RX Threshold and waitbound values for SGDMA mode */
+#define XAXIDMA_DFT_TX_THRESHOLD	24
+#define XAXIDMA_DFT_TX_WAITBOUND	254
+#define XAXIDMA_DFT_RX_THRESHOLD	24
+#define XAXIDMA_DFT_RX_WAITBOUND	254
+
+#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK	0x007FFFFF /* Actual len */
+#define XAXIDMA_BD_STS_COMPLETE_MASK	0x80000000 /* Completed */
+#define XAXIDMA_BD_STS_DEC_ERR_MASK	0x40000000 /* Decode error */
+#define XAXIDMA_BD_STS_SLV_ERR_MASK	0x20000000 /* Slave error */
+#define XAXIDMA_BD_STS_INT_ERR_MASK	0x10000000 /* Internal err */
+#define XAXIDMA_BD_STS_ALL_ERR_MASK	0x70000000 /* All errors */
+#define XAXIDMA_BD_STS_RXSOF_MASK	0x08000000 /* First rx pkt */
+#define XAXIDMA_BD_STS_RXEOF_MASK	0x04000000 /* Last rx pkt */
+#define XAXIDMA_BD_STS_ALL_MASK		0xFC000000 /* All status bits */
+
+#define NIXGE_REG_CTRL_OFFSET	0x4000
+#define NIXGE_REG_INFO		0x00
+#define NIXGE_REG_MAC_CTL	0x04
+#define NIXGE_REG_PHY_CTL	0x08
+#define NIXGE_REG_LED_CTL	0x0c
+#define NIXGE_REG_MDIO_DATA	0x10
+#define NIXGE_REG_MDIO_ADDR	0x14
+#define NIXGE_REG_MDIO_OP	0x18
+#define NIXGE_REG_MDIO_CTRL	0x1c
+
+#define NIXGE_ID_LED_CTL_EN	BIT(0)
+#define NIXGE_ID_LED_CTL_VAL	BIT(1)
+
+#define NIXGE_MDIO_CLAUSE45	BIT(12)
+#define NIXGE_MDIO_CLAUSE22	0
+#define NIXGE_MDIO_OP(n)     (((n) & 0x3) << 10)
+#define NIXGE_MDIO_OP_ADDRESS	0
+#define NIXGE_MDIO_C45_WRITE	BIT(0)
+#define NIXGE_MDIO_C45_READ	(BIT(1) | BIT(0))
+#define NIXGE_MDIO_C22_WRITE	BIT(0)
+#define NIXGE_MDIO_C22_READ	BIT(1)
+#define NIXGE_MDIO_ADDR(n)   (((n) & 0x1f) << 5)
+#define NIXGE_MDIO_MMD(n)    (((n) & 0x1f) << 0)
+
+#define NIXGE_REG_MAC_LSB	0x1000
+#define NIXGE_REG_MAC_MSB	0x1004
+
+/* Packet size info */
+#define NIXGE_HDR_SIZE		14 /* Size of Ethernet header */
+#define NIXGE_TRL_SIZE		4 /* Size of Ethernet trailer (FCS) */
+#define NIXGE_MTU		1500 /* Max MTU of an Ethernet frame */
+#define NIXGE_JUMBO_MTU		9000 /* Max MTU of a jumbo Eth. frame */
+
+#define NIXGE_MAX_FRAME_SIZE	 (NIXGE_MTU + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE)
+#define NIXGE_MAX_JUMBO_FRAME_SIZE \
+	(NIXGE_JUMBO_MTU + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE)
+
+struct nixge_hw_dma_bd {
+	u32 next;
+	u32 reserved1;
+	u32 phys;
+	u32 reserved2;
+	u32 reserved3;
+	u32 reserved4;
+	u32 cntrl;
+	u32 status;
+	u32 app0;
+	u32 app1;
+	u32 app2;
+	u32 app3;
+	u32 app4;
+	u32 sw_id_offset;
+	u32 reserved5;
+	u32 reserved6;
+};
+
+struct nixge_tx_skb {
+	struct sk_buff *skb;
+	dma_addr_t mapping;
+	size_t size;
+	bool mapped_as_page;
+};
+
+struct nixge_priv {
+	struct net_device *ndev;
+	struct napi_struct napi;
+	struct device *dev;
+
+	/* Connection to PHY device */
+	struct device_node *phy_node;
+	phy_interface_t		phy_mode;
+
+	int link;
+	unsigned int speed;
+	unsigned int duplex;
+
+	/* MDIO bus data */
+	struct mii_bus *mii_bus;	/* MII bus reference */
+
+	/* IO registers, dma functions and IRQs */
+	void __iomem *ctrl_regs;
+	void __iomem *dma_regs;
+
+	struct tasklet_struct dma_err_tasklet;
+
+	int tx_irq;
+	int rx_irq;
+	u32 last_link;
+
+	/* Buffer descriptors */
+	struct nixge_hw_dma_bd *tx_bd_v;
+	struct nixge_tx_skb *tx_skb;
+	dma_addr_t tx_bd_p;
+
+	struct nixge_hw_dma_bd *rx_bd_v;
+	dma_addr_t rx_bd_p;
+	u32 tx_bd_ci;
+	u32 tx_bd_tail;
+	u32 rx_bd_ci;
+
+	u32 coalesce_count_rx;
+	u32 coalesce_count_tx;
+};
+
+static void nixge_dma_write_reg(struct nixge_priv *priv, off_t offset, u32 val)
+{
+	writel(val, priv->dma_regs + offset);
+}
+
+static u32 nixge_dma_read_reg(const struct nixge_priv *priv, off_t offset)
+{
+	return readl(priv->dma_regs + offset);
+}
+
+static void nixge_ctrl_write_reg(struct nixge_priv *priv, off_t offset, u32 val)
+{
+	writel(val, priv->ctrl_regs + offset);
+}
+
+static u32 nixge_ctrl_read_reg(struct nixge_priv *priv, off_t offset)
+{
+	return readl(priv->ctrl_regs + offset);
+}
+
+#define nixge_ctrl_poll_timeout(priv, addr, val, cond, sleep_us, timeout_us) \
+	readl_poll_timeout((priv)->ctrl_regs + (addr), (val), (cond), \
+			   (sleep_us), (timeout_us))
+
+#define nixge_dma_poll_timeout(priv, addr, val, cond, sleep_us, timeout_us) \
+	readl_poll_timeout((priv)->dma_regs + (addr), (val), (cond), \
+			   (sleep_us), (timeout_us))
+
+static void nixge_hw_dma_bd_release(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < RX_BD_NUM; i++) {
+		dma_unmap_single(ndev->dev.parent, priv->rx_bd_v[i].phys,
+				 NIXGE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
+		dev_kfree_skb((struct sk_buff *)
+			      (priv->rx_bd_v[i].sw_id_offset));
+	}
+
+	if (priv->rx_bd_v)
+		dma_free_coherent(ndev->dev.parent,
+				  sizeof(*priv->rx_bd_v) * RX_BD_NUM,
+				  priv->rx_bd_v,
+				  priv->rx_bd_p);
+
+	if (priv->tx_skb)
+		devm_kfree(ndev->dev.parent, priv->tx_skb);
+
+	if (priv->tx_bd_v)
+		dma_free_coherent(ndev->dev.parent,
+				  sizeof(*priv->tx_bd_v) * TX_BD_NUM,
+				  priv->tx_bd_v,
+				  priv->tx_bd_p);
+}
+
+static int nixge_hw_dma_bd_init(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	struct sk_buff *skb;
+	u32 cr;
+	int i;
+
+	/* Reset the indexes which are used for accessing the BDs */
+	priv->tx_bd_ci = 0;
+	priv->tx_bd_tail = 0;
+	priv->rx_bd_ci = 0;
+
+	/* Allocate the Tx and Rx buffer descriptors. */
+	priv->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
+					    sizeof(*priv->tx_bd_v) * TX_BD_NUM,
+					    &priv->tx_bd_p, GFP_KERNEL);
+	if (!priv->tx_bd_v)
+		goto out;
+
+	priv->tx_skb = devm_kzalloc(ndev->dev.parent,
+				    sizeof(*priv->tx_skb) *
+				    TX_BD_NUM,
+				    GFP_KERNEL);
+	if (!priv->tx_skb)
+		goto out;
+
+	priv->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent,
+					    sizeof(*priv->rx_bd_v) * RX_BD_NUM,
+					    &priv->rx_bd_p, GFP_KERNEL);
+	if (!priv->rx_bd_v)
+		goto out;
+
+	for (i = 0; i < TX_BD_NUM; i++) {
+		priv->tx_bd_v[i].next = priv->tx_bd_p +
+				      sizeof(*priv->tx_bd_v) *
+				      ((i + 1) % TX_BD_NUM);
+	}
+
+	for (i = 0; i < RX_BD_NUM; i++) {
+		priv->rx_bd_v[i].next = priv->rx_bd_p +
+				      sizeof(*priv->rx_bd_v) *
+				      ((i + 1) % RX_BD_NUM);
+
+		skb = netdev_alloc_skb_ip_align(ndev,
+						NIXGE_MAX_JUMBO_FRAME_SIZE);
+		if (!skb)
+			goto out;
+
+		priv->rx_bd_v[i].sw_id_offset = (u32)skb;
+		priv->rx_bd_v[i].phys =
+			dma_map_single(ndev->dev.parent,
+				       skb->data,
+				       NIXGE_MAX_JUMBO_FRAME_SIZE,
+				       DMA_FROM_DEVICE);
+		priv->rx_bd_v[i].cntrl = NIXGE_MAX_JUMBO_FRAME_SIZE;
+	}
+
+	/* Start updating the Rx channel control register */
+	cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+	/* Update the interrupt coalesce count */
+	cr = ((cr & ~XAXIDMA_COALESCE_MASK) |
+	      ((priv->coalesce_count_rx) << XAXIDMA_COALESCE_SHIFT));
+	/* Update the delay timer count */
+	cr = ((cr & ~XAXIDMA_DELAY_MASK) |
+	      (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
+	/* Enable coalesce, delay timer and error interrupts */
+	cr |= XAXIDMA_IRQ_ALL_MASK;
+	/* Write to the Rx channel control register */
+	nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr);
+
+	/* Start updating the Tx channel control register */
+	cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
+	/* Update the interrupt coalesce count */
+	cr = (((cr & ~XAXIDMA_COALESCE_MASK)) |
+	      ((priv->coalesce_count_tx) << XAXIDMA_COALESCE_SHIFT));
+	/* Update the delay timer count */
+	cr = (((cr & ~XAXIDMA_DELAY_MASK)) |
+	      (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
+	/* Enable coalesce, delay timer and error interrupts */
+	cr |= XAXIDMA_IRQ_ALL_MASK;
+	/* Write to the Tx channel control register */
+	nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, cr);
+
+	/* Populate the tail pointer and bring the Rx Axi DMA engine out of
+	 * halted state. This will make the Rx side ready for reception.
+	 */
+	nixge_dma_write_reg(priv, XAXIDMA_RX_CDESC_OFFSET, priv->rx_bd_p);
+	cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+	nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET,
+			    cr | XAXIDMA_CR_RUNSTOP_MASK);
+	nixge_dma_write_reg(priv, XAXIDMA_RX_TDESC_OFFSET, priv->rx_bd_p +
+			    (sizeof(*priv->rx_bd_v) * (RX_BD_NUM - 1)));
+
+	/* Write to the RS (Run-stop) bit in the Tx channel control register.
+	 * Tx channel is now ready to run. But only after we write to the
+	 * tail pointer register that the Tx channel will start transmitting.
+	 */
+	nixge_dma_write_reg(priv, XAXIDMA_TX_CDESC_OFFSET, priv->tx_bd_p);
+	cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
+	nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET,
+			    cr | XAXIDMA_CR_RUNSTOP_MASK);
+
+	return 0;
+out:
+	nixge_hw_dma_bd_release(ndev);
+	return -ENOMEM;
+}
+
+static void __nixge_device_reset(struct nixge_priv *priv, off_t offset)
+{
+	u32 status;
+	int err;
+
+	/* Reset Axi DMA. This would reset NIXGE Ethernet core as well.
+	 * The reset process of Axi DMA takes a while to complete as all
+	 * pending commands/transfers will be flushed or completed during
+	 * this reset process.
+	 */
+	nixge_dma_write_reg(priv, offset, XAXIDMA_CR_RESET_MASK);
+	err = nixge_dma_poll_timeout(priv, offset, status,
+				     !(status & XAXIDMA_CR_RESET_MASK), 10,
+				     1000);
+	if (err)
+		netdev_err(priv->ndev, "%s: DMA reset timeout!\n", __func__);
+}
+
+static void nixge_device_reset(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+
+	__nixge_device_reset(priv, XAXIDMA_TX_CR_OFFSET);
+	__nixge_device_reset(priv, XAXIDMA_RX_CR_OFFSET);
+
+	if (nixge_hw_dma_bd_init(ndev))
+		netdev_err(ndev, "%s: descriptor allocation failed\n",
+			   __func__);
+
+	netif_trans_update(ndev);
+}
+
+static void nixge_handle_link_change(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
+
+	if (phydev->link != priv->link || phydev->speed != priv->speed ||
+	    phydev->duplex != priv->duplex) {
+		priv->link = phydev->link;
+		priv->speed = phydev->speed;
+		priv->duplex = phydev->duplex;
+		phy_print_status(phydev);
+	}
+}
+
+static void nixge_tx_skb_unmap(struct nixge_priv *priv,
+			       struct nixge_tx_skb *tx_skb)
+{
+	if (tx_skb->mapping) {
+		if (tx_skb->mapped_as_page)
+			dma_unmap_page(priv->ndev->dev.parent, tx_skb->mapping,
+				       tx_skb->size, DMA_TO_DEVICE);
+		else
+			dma_unmap_single(priv->ndev->dev.parent,
+					 tx_skb->mapping,
+					 tx_skb->size, DMA_TO_DEVICE);
+		tx_skb->mapping = 0;
+	}
+
+	if (tx_skb->skb) {
+		dev_kfree_skb_any(tx_skb->skb);
+		tx_skb->skb = NULL;
+	}
+}
+
+static void nixge_start_xmit_done(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	struct nixge_hw_dma_bd *cur_p;
+	struct nixge_tx_skb *tx_skb;
+	unsigned int status = 0;
+	u32 packets = 0;
+	u32 size = 0;
+
+	cur_p = &priv->tx_bd_v[priv->tx_bd_ci];
+	tx_skb = &priv->tx_skb[priv->tx_bd_ci];
+
+	status = cur_p->status;
+
+	while (status & XAXIDMA_BD_STS_COMPLETE_MASK) {
+		nixge_tx_skb_unmap(priv, tx_skb);
+		cur_p->status = 0;
+
+		size += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
+		packets++;
+
+		++priv->tx_bd_ci;
+		priv->tx_bd_ci %= TX_BD_NUM;
+		cur_p = &priv->tx_bd_v[priv->tx_bd_ci];
+		tx_skb = &priv->tx_skb[priv->tx_bd_ci];
+		status = cur_p->status;
+	}
+
+	ndev->stats.tx_packets += packets;
+	ndev->stats.tx_bytes += size;
+
+	if (packets)
+		netif_wake_queue(ndev);
+}
+
+static int nixge_check_tx_bd_space(struct nixge_priv *priv,
+				   int num_frag)
+{
+	struct nixge_hw_dma_bd *cur_p;
+
+	cur_p = &priv->tx_bd_v[(priv->tx_bd_tail + num_frag) % TX_BD_NUM];
+	if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
+		return NETDEV_TX_BUSY;
+	return 0;
+}
+
+static int nixge_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	struct nixge_hw_dma_bd *cur_p;
+	struct nixge_tx_skb *tx_skb;
+	dma_addr_t tail_p;
+	skb_frag_t *frag;
+	u32 num_frag;
+	u32 ii;
+
+	num_frag = skb_shinfo(skb)->nr_frags;
+	cur_p = &priv->tx_bd_v[priv->tx_bd_tail];
+	tx_skb = &priv->tx_skb[priv->tx_bd_tail];
+
+	if (nixge_check_tx_bd_space(priv, num_frag)) {
+		if (!netif_queue_stopped(ndev))
+			netif_stop_queue(ndev);
+		return NETDEV_TX_OK;
+	}
+
+	cur_p->phys = dma_map_single(ndev->dev.parent, skb->data,
+				     skb_headlen(skb), DMA_TO_DEVICE);
+	if (dma_mapping_error(ndev->dev.parent, cur_p->phys))
+		goto drop;
+
+	cur_p->cntrl = skb_headlen(skb) | XAXIDMA_BD_CTRL_TXSOF_MASK;
+
+	tx_skb->skb = NULL;
+	tx_skb->mapping = cur_p->phys;
+	tx_skb->size = skb_headlen(skb);
+	tx_skb->mapped_as_page = false;
+
+	for (ii = 0; ii < num_frag; ii++) {
+		++priv->tx_bd_tail;
+		priv->tx_bd_tail %= TX_BD_NUM;
+		cur_p = &priv->tx_bd_v[priv->tx_bd_tail];
+		tx_skb = &priv->tx_skb[priv->tx_bd_tail];
+		frag = &skb_shinfo(skb)->frags[ii];
+
+		cur_p->phys = skb_frag_dma_map(ndev->dev.parent, frag, 0,
+					       skb_frag_size(frag),
+					       DMA_TO_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, cur_p->phys))
+			goto frag_err;
+
+		cur_p->cntrl = skb_frag_size(frag);
+
+		tx_skb->skb = NULL;
+		tx_skb->mapping = cur_p->phys;
+		tx_skb->size = skb_frag_size(frag);
+		tx_skb->mapped_as_page = true;
+	}
+
+	/* last buffer of the frame */
+	tx_skb->skb = skb;
+
+	cur_p->cntrl |= XAXIDMA_BD_CTRL_TXEOF_MASK;
+	cur_p->app4 = (unsigned long)skb;
+
+	tail_p = priv->tx_bd_p + sizeof(*priv->tx_bd_v) * priv->tx_bd_tail;
+	/* Start the transfer */
+	nixge_dma_write_reg(priv, XAXIDMA_TX_TDESC_OFFSET, tail_p);
+	++priv->tx_bd_tail;
+	priv->tx_bd_tail %= TX_BD_NUM;
+
+	return NETDEV_TX_OK;
+frag_err:
+	for (; ii > 0; ii--) {
+		if (priv->tx_bd_tail)
+			priv->tx_bd_tail--;
+		else
+			priv->tx_bd_tail = TX_BD_NUM - 1;
+
+		tx_skb = &priv->tx_skb[priv->tx_bd_tail];
+		nixge_tx_skb_unmap(priv, tx_skb);
+
+		cur_p = &priv->tx_bd_v[priv->tx_bd_tail];
+		cur_p->status = 0;
+	}
+	dma_unmap_single(priv->ndev->dev.parent,
+			 tx_skb->mapping,
+			 tx_skb->size, DMA_TO_DEVICE);
+drop:
+	ndev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
+static int nixge_recv(struct net_device *ndev, int budget)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	struct sk_buff *skb, *new_skb;
+	struct nixge_hw_dma_bd *cur_p;
+	dma_addr_t tail_p = 0;
+	u32 packets = 0;
+	u32 length = 0;
+	u32 size = 0;
+
+	cur_p = &priv->rx_bd_v[priv->rx_bd_ci];
+
+	while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK &&
+		budget > packets)) {
+		tail_p = priv->rx_bd_p + sizeof(*priv->rx_bd_v) *
+			 priv->rx_bd_ci;
+
+		skb = (struct sk_buff *)(cur_p->sw_id_offset);
+
+		length = cur_p->status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
+		if (length > NIXGE_MAX_JUMBO_FRAME_SIZE)
+			length = NIXGE_MAX_JUMBO_FRAME_SIZE;
+
+		dma_unmap_single(ndev->dev.parent, cur_p->phys,
+				 NIXGE_MAX_JUMBO_FRAME_SIZE,
+				 DMA_FROM_DEVICE);
+
+		skb_put(skb, length);
+
+		skb->protocol = eth_type_trans(skb, ndev);
+		skb_checksum_none_assert(skb);
+
+		/* For now mark them as CHECKSUM_NONE since
+		 * we don't have offload capabilities
+		 */
+		skb->ip_summed = CHECKSUM_NONE;
+
+		napi_gro_receive(&priv->napi, skb);
+
+		size += length;
+		packets++;
+
+		new_skb = netdev_alloc_skb_ip_align(ndev,
+						    NIXGE_MAX_JUMBO_FRAME_SIZE);
+		if (!new_skb)
+			return packets;
+
+		cur_p->phys = dma_map_single(ndev->dev.parent, new_skb->data,
+					     NIXGE_MAX_JUMBO_FRAME_SIZE,
+					     DMA_FROM_DEVICE);
+		if (dma_mapping_error(ndev->dev.parent, cur_p->phys)) {
+			/* FIXME: bail out and clean up */
+			netdev_err(ndev, "Failed to map ...\n");
+		}
+		cur_p->cntrl = NIXGE_MAX_JUMBO_FRAME_SIZE;
+		cur_p->status = 0;
+		cur_p->sw_id_offset = (u32)new_skb;
+
+		++priv->rx_bd_ci;
+		priv->rx_bd_ci %= RX_BD_NUM;
+		cur_p = &priv->rx_bd_v[priv->rx_bd_ci];
+	}
+
+	ndev->stats.rx_packets += packets;
+	ndev->stats.rx_bytes += size;
+
+	if (tail_p)
+		nixge_dma_write_reg(priv, XAXIDMA_RX_TDESC_OFFSET, tail_p);
+
+	return packets;
+}
+
+static int nixge_poll(struct napi_struct *napi, int budget)
+{
+	struct nixge_priv *priv = container_of(napi, struct nixge_priv, napi);
+	int work_done;
+	u32 status, cr;
+
+	work_done = 0;
+
+	work_done = nixge_recv(priv->ndev, budget);
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		status = nixge_dma_read_reg(priv, XAXIDMA_RX_SR_OFFSET);
+
+		if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) {
+			/* If there's more, reschedule, but clear */
+			nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status);
+			napi_reschedule(napi);
+		} else {
+			/* if not, turn on RX IRQs again ... */
+			cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+			cr |= (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
+			nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr);
+		}
+	}
+
+	return work_done;
+}
+
+static irqreturn_t nixge_tx_irq(int irq, void *_ndev)
+{
+	struct nixge_priv *priv = netdev_priv(_ndev);
+	struct net_device *ndev = _ndev;
+	unsigned int status;
+	u32 cr;
+
+	status = nixge_dma_read_reg(priv, XAXIDMA_TX_SR_OFFSET);
+	if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) {
+		nixge_dma_write_reg(priv, XAXIDMA_TX_SR_OFFSET, status);
+		nixge_start_xmit_done(priv->ndev);
+		goto out;
+	}
+	if (!(status & XAXIDMA_IRQ_ALL_MASK)) {
+		netdev_err(ndev, "No interrupts asserted in Tx path\n");
+		return IRQ_NONE;
+	}
+	if (status & XAXIDMA_IRQ_ERROR_MASK) {
+		netdev_err(ndev, "DMA Tx error 0x%x\n", status);
+		netdev_err(ndev, "Current BD is at: 0x%x\n",
+			   (priv->tx_bd_v[priv->tx_bd_ci]).phys);
+
+		cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
+		/* Disable coalesce, delay timer and error interrupts */
+		cr &= (~XAXIDMA_IRQ_ALL_MASK);
+		/* Write to the Tx channel control register */
+		nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, cr);
+
+		cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+		/* Disable coalesce, delay timer and error interrupts */
+		cr &= (~XAXIDMA_IRQ_ALL_MASK);
+		/* Write to the Rx channel control register */
+		nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr);
+
+		tasklet_schedule(&priv->dma_err_tasklet);
+		nixge_dma_write_reg(priv, XAXIDMA_TX_SR_OFFSET, status);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t nixge_rx_irq(int irq, void *_ndev)
+{
+	struct nixge_priv *priv = netdev_priv(_ndev);
+	struct net_device *ndev = _ndev;
+	unsigned int status;
+	u32 cr;
+
+	status = nixge_dma_read_reg(priv, XAXIDMA_RX_SR_OFFSET);
+	if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) {
+		/* Turn of IRQs because NAPI */
+		nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status);
+		cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+		cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
+		nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr);
+
+		if (napi_schedule_prep(&priv->napi))
+			__napi_schedule(&priv->napi);
+		goto out;
+	}
+	if (!(status & XAXIDMA_IRQ_ALL_MASK)) {
+		netdev_err(ndev, "No interrupts asserted in Rx path\n");
+		return IRQ_NONE;
+	}
+	if (status & XAXIDMA_IRQ_ERROR_MASK) {
+		netdev_err(ndev, "DMA Rx error 0x%x\n", status);
+		netdev_err(ndev, "Current BD is at: 0x%x\n",
+			   (priv->rx_bd_v[priv->rx_bd_ci]).phys);
+
+		cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
+		/* Disable coalesce, delay timer and error interrupts */
+		cr &= (~XAXIDMA_IRQ_ALL_MASK);
+		/* Finally write to the Tx channel control register */
+		nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, cr);
+
+		cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+		/* Disable coalesce, delay timer and error interrupts */
+		cr &= (~XAXIDMA_IRQ_ALL_MASK);
+		/* write to the Rx channel control register */
+		nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr);
+
+		tasklet_schedule(&priv->dma_err_tasklet);
+		nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+static void nixge_dma_err_handler(unsigned long data)
+{
+	struct nixge_priv *lp = (struct nixge_priv *)data;
+	struct nixge_hw_dma_bd *cur_p;
+	struct nixge_tx_skb *tx_skb;
+	u32 cr, i;
+
+	__nixge_device_reset(lp, XAXIDMA_TX_CR_OFFSET);
+	__nixge_device_reset(lp, XAXIDMA_RX_CR_OFFSET);
+
+	for (i = 0; i < TX_BD_NUM; i++) {
+		cur_p = &lp->tx_bd_v[i];
+		tx_skb = &lp->tx_skb[i];
+		nixge_tx_skb_unmap(lp, tx_skb);
+
+		cur_p->phys = 0;
+		cur_p->cntrl = 0;
+		cur_p->status = 0;
+		cur_p->app0 = 0;
+		cur_p->app1 = 0;
+		cur_p->app2 = 0;
+		cur_p->app3 = 0;
+		cur_p->app4 = 0;
+		cur_p->sw_id_offset = 0;
+	}
+
+	for (i = 0; i < RX_BD_NUM; i++) {
+		cur_p = &lp->rx_bd_v[i];
+		cur_p->status = 0;
+		cur_p->app0 = 0;
+		cur_p->app1 = 0;
+		cur_p->app2 = 0;
+		cur_p->app3 = 0;
+		cur_p->app4 = 0;
+	}
+
+	lp->tx_bd_ci = 0;
+	lp->tx_bd_tail = 0;
+	lp->rx_bd_ci = 0;
+
+	/* Start updating the Rx channel control register */
+	cr = nixge_dma_read_reg(lp, XAXIDMA_RX_CR_OFFSET);
+	/* Update the interrupt coalesce count */
+	cr = ((cr & ~XAXIDMA_COALESCE_MASK) |
+	      (XAXIDMA_DFT_RX_THRESHOLD << XAXIDMA_COALESCE_SHIFT));
+	/* Update the delay timer count */
+	cr = ((cr & ~XAXIDMA_DELAY_MASK) |
+	      (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
+	/* Enable coalesce, delay timer and error interrupts */
+	cr |= XAXIDMA_IRQ_ALL_MASK;
+	/* Finally write to the Rx channel control register */
+	nixge_dma_write_reg(lp, XAXIDMA_RX_CR_OFFSET, cr);
+
+	/* Start updating the Tx channel control register */
+	cr = nixge_dma_read_reg(lp, XAXIDMA_TX_CR_OFFSET);
+	/* Update the interrupt coalesce count */
+	cr = (((cr & ~XAXIDMA_COALESCE_MASK)) |
+	      (XAXIDMA_DFT_TX_THRESHOLD << XAXIDMA_COALESCE_SHIFT));
+	/* Update the delay timer count */
+	cr = (((cr & ~XAXIDMA_DELAY_MASK)) |
+	      (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
+	/* Enable coalesce, delay timer and error interrupts */
+	cr |= XAXIDMA_IRQ_ALL_MASK;
+	/* Finally write to the Tx channel control register */
+	nixge_dma_write_reg(lp, XAXIDMA_TX_CR_OFFSET, cr);
+
+	/* Populate the tail pointer and bring the Rx Axi DMA engine out of
+	 * halted state. This will make the Rx side ready for reception.
+	 */
+	nixge_dma_write_reg(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p);
+	cr = nixge_dma_read_reg(lp, XAXIDMA_RX_CR_OFFSET);
+	nixge_dma_write_reg(lp, XAXIDMA_RX_CR_OFFSET,
+			    cr | XAXIDMA_CR_RUNSTOP_MASK);
+	nixge_dma_write_reg(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
+			    (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+
+	/* Write to the RS (Run-stop) bit in the Tx channel control register.
+	 * Tx channel is now ready to run. But only after we write to the
+	 * tail pointer register that the Tx channel will start transmitting
+	 */
+	nixge_dma_write_reg(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
+	cr = nixge_dma_read_reg(lp, XAXIDMA_TX_CR_OFFSET);
+	nixge_dma_write_reg(lp, XAXIDMA_TX_CR_OFFSET,
+			    cr | XAXIDMA_CR_RUNSTOP_MASK);
+}
+
+static int nixge_open(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	struct phy_device *phy;
+	int ret;
+
+	nixge_device_reset(ndev);
+
+	phy = of_phy_connect(ndev, priv->phy_node,
+			     &nixge_handle_link_change, 0, priv->phy_mode);
+	if (!phy)
+		return -ENODEV;
+
+	phy_start(phy);
+
+	/* Enable tasklets for Axi DMA error handling */
+	tasklet_init(&priv->dma_err_tasklet, nixge_dma_err_handler,
+		     (unsigned long)priv);
+
+	napi_enable(&priv->napi);
+
+	/* Enable interrupts for Axi DMA Tx */
+	ret = request_irq(priv->tx_irq, nixge_tx_irq, 0, ndev->name, ndev);
+	if (ret)
+		goto err_tx_irq;
+	/* Enable interrupts for Axi DMA Rx */
+	ret = request_irq(priv->rx_irq, nixge_rx_irq, 0, ndev->name, ndev);
+	if (ret)
+		goto err_rx_irq;
+
+	netif_start_queue(ndev);
+
+	return 0;
+
+err_rx_irq:
+	free_irq(priv->tx_irq, ndev);
+err_tx_irq:
+	phy_stop(phy);
+	phy_disconnect(phy);
+	tasklet_kill(&priv->dma_err_tasklet);
+	netdev_err(ndev, "request_irq() failed\n");
+	return ret;
+}
+
+static int nixge_stop(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	u32 cr;
+
+	netif_stop_queue(ndev);
+	napi_disable(&priv->napi);
+
+	if (ndev->phydev) {
+		phy_stop(ndev->phydev);
+		phy_disconnect(ndev->phydev);
+	}
+
+	cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+	nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET,
+			    cr & (~XAXIDMA_CR_RUNSTOP_MASK));
+	cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
+	nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET,
+			    cr & (~XAXIDMA_CR_RUNSTOP_MASK));
+
+	tasklet_kill(&priv->dma_err_tasklet);
+
+	free_irq(priv->tx_irq, ndev);
+	free_irq(priv->rx_irq, ndev);
+
+	nixge_hw_dma_bd_release(ndev);
+
+	return 0;
+}
+
+static int nixge_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	if (netif_running(ndev))
+		return -EBUSY;
+
+	if ((new_mtu + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE) >
+	     NIXGE_MAX_JUMBO_FRAME_SIZE)
+		return -EINVAL;
+
+	ndev->mtu = new_mtu;
+
+	return 0;
+}
+
+static s32 __nixge_hw_set_mac_address(struct net_device *ndev)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+
+	nixge_ctrl_write_reg(priv, NIXGE_REG_MAC_LSB,
+			     (ndev->dev_addr[2]) << 24 |
+			     (ndev->dev_addr[3] << 16) |
+			     (ndev->dev_addr[4] << 8) |
+			     (ndev->dev_addr[5] << 0));
+
+	nixge_ctrl_write_reg(priv, NIXGE_REG_MAC_MSB,
+			     (ndev->dev_addr[1] | (ndev->dev_addr[0] << 8)));
+
+	return 0;
+}
+
+static int nixge_net_set_mac_address(struct net_device *ndev, void *p)
+{
+	int err;
+
+	err = eth_mac_addr(ndev, p);
+	if (!err)
+		__nixge_hw_set_mac_address(ndev);
+
+	return err;
+}
+
+static const struct net_device_ops nixge_netdev_ops = {
+	.ndo_open = nixge_open,
+	.ndo_stop = nixge_stop,
+	.ndo_start_xmit = nixge_start_xmit,
+	.ndo_change_mtu	= nixge_change_mtu,
+	.ndo_set_mac_address = nixge_net_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+};
+
+static void nixge_ethtools_get_drvinfo(struct net_device *ndev,
+				       struct ethtool_drvinfo *ed)
+{
+	strlcpy(ed->driver, "nixge", sizeof(ed->driver));
+	strlcpy(ed->bus_info, "platform", sizeof(ed->driver));
+}
+
+static int nixge_ethtools_get_coalesce(struct net_device *ndev,
+				       struct ethtool_coalesce *ecoalesce)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	u32 regval = 0;
+
+	regval = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET);
+	ecoalesce->rx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK)
+					     >> XAXIDMA_COALESCE_SHIFT;
+	regval = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET);
+	ecoalesce->tx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK)
+					     >> XAXIDMA_COALESCE_SHIFT;
+	return 0;
+}
+
+static int nixge_ethtools_set_coalesce(struct net_device *ndev,
+				       struct ethtool_coalesce *ecoalesce)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+
+	if (netif_running(ndev)) {
+		netdev_err(ndev,
+			   "Please stop netif before applying configuration\n");
+		return -EBUSY;
+	}
+
+	if (ecoalesce->rx_coalesce_usecs ||
+	    ecoalesce->rx_coalesce_usecs_irq ||
+	    ecoalesce->rx_max_coalesced_frames_irq ||
+	    ecoalesce->tx_coalesce_usecs ||
+	    ecoalesce->tx_coalesce_usecs_irq ||
+	    ecoalesce->tx_max_coalesced_frames_irq ||
+	    ecoalesce->stats_block_coalesce_usecs ||
+	    ecoalesce->use_adaptive_rx_coalesce ||
+	    ecoalesce->use_adaptive_tx_coalesce ||
+	    ecoalesce->pkt_rate_low ||
+	    ecoalesce->rx_coalesce_usecs_low ||
+	    ecoalesce->rx_max_coalesced_frames_low ||
+	    ecoalesce->tx_coalesce_usecs_low ||
+	    ecoalesce->tx_max_coalesced_frames_low ||
+	    ecoalesce->pkt_rate_high ||
+	    ecoalesce->rx_coalesce_usecs_high ||
+	    ecoalesce->rx_max_coalesced_frames_high ||
+	    ecoalesce->tx_coalesce_usecs_high ||
+	    ecoalesce->tx_max_coalesced_frames_high ||
+	    ecoalesce->rate_sample_interval)
+		return -EOPNOTSUPP;
+	if (ecoalesce->rx_max_coalesced_frames)
+		priv->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames;
+	if (ecoalesce->tx_max_coalesced_frames)
+		priv->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames;
+
+	return 0;
+}
+
+static int nixge_ethtools_set_phys_id(struct net_device *ndev,
+				      enum ethtool_phys_id_state state)
+{
+	struct nixge_priv *priv = netdev_priv(ndev);
+	u32 ctrl;
+
+	ctrl = nixge_ctrl_read_reg(priv, NIXGE_REG_LED_CTL);
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		ctrl |= NIXGE_ID_LED_CTL_EN;
+		/* Enable identification LED override*/
+		nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl);
+		return 2;
+
+	case ETHTOOL_ID_ON:
+		ctrl |= NIXGE_ID_LED_CTL_VAL;
+		nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl);
+		break;
+
+	case ETHTOOL_ID_OFF:
+		ctrl &= ~NIXGE_ID_LED_CTL_VAL;
+		nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl);
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		/* Restore LED settings */
+		ctrl &= ~NIXGE_ID_LED_CTL_EN;
+		nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl);
+		break;
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops nixge_ethtool_ops = {
+	.get_drvinfo    = nixge_ethtools_get_drvinfo,
+	.get_coalesce   = nixge_ethtools_get_coalesce,
+	.set_coalesce   = nixge_ethtools_set_coalesce,
+	.set_phys_id    = nixge_ethtools_set_phys_id,
+	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
+	.get_link		= ethtool_op_get_link,
+};
+
+static int nixge_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	struct nixge_priv *priv = bus->priv;
+	u32 status, tmp;
+	int err;
+	u16 device;
+
+	if (reg & MII_ADDR_C45) {
+		device = (reg >> 16) & 0x1f;
+
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_ADDR, reg & 0xffff);
+
+		tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_OP_ADDRESS)
+			| NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device);
+
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp);
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1);
+
+		err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status,
+					      !status, 10, 1000);
+		if (err) {
+			dev_err(priv->dev, "timeout setting address");
+			return err;
+		}
+
+		tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_C45_READ) |
+			NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device);
+	} else {
+		device = reg & 0x1f;
+
+		tmp = NIXGE_MDIO_CLAUSE22 | NIXGE_MDIO_OP(NIXGE_MDIO_C22_READ) |
+			NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device);
+	}
+
+	nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp);
+	nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1);
+
+	err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status,
+				      !status, 10, 1000);
+	if (err) {
+		dev_err(priv->dev, "timeout setting read command");
+		return err;
+	}
+
+	status = nixge_ctrl_read_reg(priv, NIXGE_REG_MDIO_DATA);
+
+	return status;
+}
+
+static int nixge_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+	struct nixge_priv *priv = bus->priv;
+	u32 status, tmp;
+	u16 device;
+	int err;
+
+	if (reg & MII_ADDR_C45) {
+		device = (reg >> 16) & 0x1f;
+
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_ADDR, reg & 0xffff);
+
+		tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_OP_ADDRESS)
+			| NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device);
+
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp);
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1);
+
+		err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status,
+					      !status, 10, 1000);
+		if (err) {
+			dev_err(priv->dev, "timeout setting address");
+			return err;
+		}
+
+		tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_C45_WRITE)
+			| NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device);
+
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_DATA, val);
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp);
+		err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status,
+					      !status, 10, 1000);
+		if (err)
+			dev_err(priv->dev, "timeout setting write command");
+	} else {
+		device = reg & 0x1f;
+
+		tmp = NIXGE_MDIO_CLAUSE22 |
+			NIXGE_MDIO_OP(NIXGE_MDIO_C22_WRITE) |
+			NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device);
+
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_DATA, val);
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp);
+		nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1);
+
+		err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status,
+					      !status, 10, 1000);
+		if (err)
+			dev_err(priv->dev, "timeout setting write command");
+	}
+
+	return err;
+}
+
+static int nixge_mdio_setup(struct nixge_priv *priv, struct device_node *np)
+{
+	struct mii_bus *bus;
+
+	bus = devm_mdiobus_alloc(priv->dev);
+	if (!bus)
+		return -ENOMEM;
+
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(priv->dev));
+	bus->priv = priv;
+	bus->name = "nixge_mii_bus";
+	bus->read = nixge_mdio_read;
+	bus->write = nixge_mdio_write;
+	bus->parent = priv->dev;
+
+	priv->mii_bus = bus;
+
+	return of_mdiobus_register(bus, np);
+}
+
+static void *nixge_get_nvmem_address(struct device *dev)
+{
+	struct nvmem_cell *cell;
+	size_t cell_size;
+	char *mac;
+
+	cell = nvmem_cell_get(dev, "address");
+	if (IS_ERR(cell))
+		return cell;
+
+	mac = nvmem_cell_read(cell, &cell_size);
+	nvmem_cell_put(cell);
+
+	return mac;
+}
+
+static int nixge_probe(struct platform_device *pdev)
+{
+	struct nixge_priv *priv;
+	struct net_device *ndev;
+	struct resource *dmares;
+	const char *mac_addr;
+	int err;
+
+	ndev = alloc_etherdev(sizeof(*priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	ndev->features = NETIF_F_SG;
+	ndev->netdev_ops = &nixge_netdev_ops;
+	ndev->ethtool_ops = &nixge_ethtool_ops;
+
+	/* MTU range: 64 - 9000 */
+	ndev->min_mtu = 64;
+	ndev->max_mtu = NIXGE_JUMBO_MTU;
+
+	mac_addr = nixge_get_nvmem_address(&pdev->dev);
+	if (mac_addr && is_valid_ether_addr(mac_addr))
+		ether_addr_copy(ndev->dev_addr, mac_addr);
+	else
+		eth_hw_addr_random(ndev);
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	priv->dev = &pdev->dev;
+
+	netif_napi_add(ndev, &priv->napi, nixge_poll, NAPI_POLL_WEIGHT);
+
+	dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->dma_regs = devm_ioremap_resource(&pdev->dev, dmares);
+	if (IS_ERR(priv->dma_regs)) {
+		netdev_err(ndev, "failed to map dma regs\n");
+		return PTR_ERR(priv->dma_regs);
+	}
+	priv->ctrl_regs = priv->dma_regs + NIXGE_REG_CTRL_OFFSET;
+	__nixge_hw_set_mac_address(ndev);
+
+	priv->tx_irq = platform_get_irq_byname(pdev, "tx");
+	if (priv->tx_irq < 0) {
+		netdev_err(ndev, "could not find 'tx' irq");
+		return priv->tx_irq;
+	}
+
+	priv->rx_irq = platform_get_irq_byname(pdev, "rx");
+	if (priv->rx_irq < 0) {
+		netdev_err(ndev, "could not find 'rx' irq");
+		return priv->rx_irq;
+	}
+
+	priv->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
+	priv->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+
+	err = nixge_mdio_setup(priv, pdev->dev.of_node);
+	if (err) {
+		netdev_err(ndev, "error registering mdio bus");
+		goto free_netdev;
+	}
+
+	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	if (priv->phy_mode < 0) {
+		netdev_err(ndev, "not find \"phy-mode\" property\n");
+		err = -EINVAL;
+		goto unregister_mdio;
+	}
+
+	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (!priv->phy_node) {
+		netdev_err(ndev, "not find \"phy-handle\" property\n");
+		err = -EINVAL;
+		goto unregister_mdio;
+	}
+
+	err = register_netdev(priv->ndev);
+	if (err) {
+		netdev_err(ndev, "register_netdev() error (%i)\n", err);
+		goto unregister_mdio;
+	}
+
+	return 0;
+
+unregister_mdio:
+	mdiobus_unregister(priv->mii_bus);
+
+free_netdev:
+	free_netdev(ndev);
+
+	return err;
+}
+
+static int nixge_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct nixge_priv *priv = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+
+	mdiobus_unregister(priv->mii_bus);
+
+	free_netdev(ndev);
+
+	return 0;
+}
+
+/* Match table for of_platform binding */
+static const struct of_device_id nixge_dt_ids[] = {
+	{ .compatible = "ni,xge-enet-2.00", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, nixge_dt_ids);
+
+static struct platform_driver nixge_driver = {
+	.probe		= nixge_probe,
+	.remove		= nixge_remove,
+	.driver		= {
+		.name		= "nixge",
+		.of_match_table	= of_match_ptr(nixge_dt_ids),
+	},
+};
+module_platform_driver(nixge_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("National Instruments XGE Management MAC");
+MODULE_AUTHOR("Moritz Fischer <mdf@kernel.org>");

From d72e7c21b54bd7a6caa08a8eb62257d4e4196ab3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 27 Mar 2018 16:20:01 -0700
Subject: [PATCH 1451/1678] net: bgmac: Use interface name to request interrupt

When the system contains several BGMAC adapters, it is nice to be able
to tell which one is which by looking at /proc/interrupts. Use the
network device name as a name to request_irq() with.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 8eef9fb6b1fe..46d7b8068425 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1190,7 +1190,7 @@ static int bgmac_open(struct net_device *net_dev)
 	bgmac_chip_init(bgmac);
 
 	err = request_irq(bgmac->irq, bgmac_interrupt, IRQF_SHARED,
-			  KBUILD_MODNAME, net_dev);
+			  net_dev->name, net_dev);
 	if (err < 0) {
 		dev_err(bgmac->dev, "IRQ request error: %d!\n", err);
 		bgmac_dma_cleanup(bgmac);

From 34322615cbaa0f54305f7818bb74c70bb50f47a8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 27 Mar 2018 16:20:02 -0700
Subject: [PATCH 1452/1678] net: bgmac: Mask interrupts during probe

We can have interrupts left enabled form e.g: the bootloader which used
the network device for network boot. Make sure we have those disabled as
early as possible to avoid spurious interrupts.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 46d7b8068425..2326cc219c46 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1492,6 +1492,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 	struct net_device *net_dev = bgmac->net_dev;
 	int err;
 
+	bgmac_chip_intrs_off(bgmac);
+
 	net_dev->irq = bgmac->irq;
 	SET_NETDEV_DEV(net_dev, bgmac->dev);
 	dev_set_drvdata(bgmac->dev, bgmac);

From f0b07bb151b098d291fd1fd71ef7a2df56fb124a Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 19:20:32 +0300
Subject: [PATCH 1453/1678] net: Introduce net_rwsem to protect
 net_namespace_list

rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.

This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.

Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/roce_gid_mgmt.c |  2 ++
 include/linux/rtnetlink.h               |  1 +
 include/net/net_namespace.h             |  1 +
 net/core/dev.c                          |  5 +++++
 net/core/fib_notifier.c                 |  2 ++
 net/core/net_namespace.c                | 18 +++++++++++++-----
 net/core/rtnetlink.c                    |  5 +++++
 net/netfilter/nf_conntrack_core.c       |  2 ++
 net/openvswitch/datapath.c              |  2 ++
 net/wireless/wext-core.c                |  2 ++
 security/selinux/include/xfrm.h         |  2 ++
 11 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 5a52ec77940a..cc2966380c0c 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -403,10 +403,12 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 	 * our feet
 	 */
 	rtnl_lock();
+	down_read(&net_rwsem);
 	for_each_net(net)
 		for_each_netdev(net, ndev)
 			if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
 				add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
+	up_read(&net_rwsem);
 	rtnl_unlock();
 }
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c7d1e4689325..5225832bd6ff 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct rw_semaphore pernet_ops_rwsem;
+extern struct rw_semaphore net_rwsem;
 
 #ifdef CONFIG_PROVE_LOCKING
 extern bool lockdep_rtnl_is_held(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1ab4f920f109..47e35cce3b64 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
 #endif
 }
 
+/* Protected by net_rwsem */
 #define for_each_net(VAR)				\
 	list_for_each_entry(VAR, &net_namespace_list, list)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e13807b5c84d..eca5458b2753 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 		goto unlock;
 	if (dev_boot_phase)
 		goto unlock;
+	down_read(&net_rwsem);
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
@@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 			call_netdevice_notifier(nb, NETDEV_UP, dev);
 		}
 	}
+	up_read(&net_rwsem);
 
 unlock:
 	rtnl_unlock();
@@ -1664,6 +1666,7 @@ rollback:
 	}
 
 outroll:
+	up_read(&net_rwsem);
 	raw_notifier_chain_unregister(&netdev_chain, nb);
 	goto unlock;
 }
@@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 	if (err)
 		goto unlock;
 
+	down_read(&net_rwsem);
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev->flags & IFF_UP) {
@@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 		}
 	}
+	up_read(&net_rwsem);
 unlock:
 	rtnl_unlock();
 	return err;
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 0c048bdeb016..614b985c92a4 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void)
 	struct net *net;
 
 	rtnl_lock();
+	down_read(&net_rwsem);
 	for_each_net(net) {
 		rcu_read_lock();
 		list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
@@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void)
 		}
 		rcu_read_unlock();
 	}
+	up_read(&net_rwsem);
 	rtnl_unlock();
 
 	return fib_seq;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b5796d17a302..7fdf321d4997 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list;
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
 
+/* Protects net_namespace_list. Nests iside rtnl_lock() */
+DECLARE_RWSEM(net_rwsem);
+EXPORT_SYMBOL_GPL(net_rwsem);
+
 struct net init_net = {
 	.count		= REFCOUNT_INIT(1),
 	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
@@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 		if (error < 0)
 			goto out_undo;
 	}
-	rtnl_lock();
+	down_write(&net_rwsem);
 	list_add_tail_rcu(&net->list, &net_namespace_list);
-	rtnl_unlock();
+	up_write(&net_rwsem);
 out:
 	return error;
 
@@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last)
 	 * and this work is the only process, that may delete
 	 * a net from net_namespace_list. So, when the below
 	 * is executing, the list may only grow. Thus, we do not
-	 * use for_each_net_rcu() or rtnl_lock().
+	 * use for_each_net_rcu() or net_rwsem.
 	 */
 	for_each_net(tmp) {
 		int id;
@@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work)
 	down_read(&pernet_ops_rwsem);
 
 	/* Don't let anyone else find us. */
-	rtnl_lock();
+	down_write(&net_rwsem);
 	llist_for_each_entry(net, net_kill_list, cleanup_list)
 		list_del_rcu(&net->list);
 	/* Cache last net. After we unlock rtnl, no one new net
@@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work)
 	 * useless anyway, as netns_ids are destroyed there.
 	 */
 	last = list_last_entry(&net_namespace_list, struct net, list);
-	rtnl_unlock();
+	up_write(&net_rwsem);
 
 	llist_for_each_entry(net, net_kill_list, cleanup_list) {
 		unhash_nsid(net, last);
@@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list,
 
 	list_add_tail(&ops->list, list);
 	if (ops->init || (ops->id && ops->size)) {
+		/* We held write locked pernet_ops_rwsem, and parallel
+		 * setup_net() and cleanup_net() are not possible.
+		 */
 		for_each_net(net) {
 			error = ops_init(ops, net);
 			if (error)
@@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
 	LIST_HEAD(net_exit_list);
 
 	list_del(&ops->list);
+	/* See comment in __register_pernet_operations() */
 	for_each_net(net)
 		list_add_tail(&net->exit_list, &net_exit_list);
 	ops_exit_list(ops, &net_exit_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d3949789cef..e86b28482ca7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
 	struct net *net;
 
+	down_read(&net_rwsem);
 	for_each_net(net) {
 		__rtnl_kill_links(net, ops);
 	}
+	up_read(&net_rwsem);
 	list_del(&ops->list);
 }
 EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
@@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void)
 	for (;;) {
 		unregistering = false;
 		rtnl_lock();
+		/* We held write locked pernet_ops_rwsem, and parallel
+		 * setup_net() and cleanup_net() are not possible.
+		 */
 		for_each_net(net) {
 			if (net->dev_unreg_count > 0) {
 				unregistering = true;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 705198de671d..370f9b7f051b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1764,12 +1764,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 	struct net *net;
 
 	rtnl_lock();
+	down_read(&net_rwsem);
 	for_each_net(net) {
 		if (atomic_read(&net->ct.count) == 0)
 			continue;
 		__nf_ct_unconfirmed_destroy(net);
 		nf_queue_nf_hook_drop(net);
 	}
+	up_read(&net_rwsem);
 	rtnl_unlock();
 
 	/* Need to wait for netns cleanup worker to finish, if its
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ef38e5aecd28..9746ee30a99b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2364,8 +2364,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
 		__dp_destroy(dp);
 
 	rtnl_lock();
+	down_read(&net_rwsem);
 	for_each_net(net)
 		list_vports_from_net(net, dnet, &head);
+	up_read(&net_rwsem);
 	rtnl_unlock();
 
 	/* Detach all vports from given namespace. */
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 9efbfc753347..544d7b62d7ca 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -349,11 +349,13 @@ void wireless_nlevent_flush(void)
 
 	ASSERT_RTNL();
 
+	down_read(&net_rwsem);
 	for_each_net(net) {
 		while ((skb = skb_dequeue(&net->wext_nlevents)))
 			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
 				    GFP_KERNEL);
 	}
+	up_read(&net_rwsem);
 }
 EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
 
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1f173a7a4daa..31d66431be1e 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -48,8 +48,10 @@ static inline void selinux_xfrm_notify_policyload(void)
 	struct net *net;
 
 	rtnl_lock();
+	down_read(&net_rwsem);
 	for_each_net(net)
 		rt_genid_bump_all(net);
+	up_read(&net_rwsem);
 	rtnl_unlock();
 }
 #else

From 10256debb918aea083d0ddada64d29014c642a7b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 19:20:44 +0300
Subject: [PATCH 1454/1678] net: Don't take rtnl_lock() in
 wireless_nlevent_flush()

This function iterates over net_namespace_list and flushes
the queue for every of them. What does this rtnl_lock()
protects?! Since we may add skbs to net::wext_nlevents
without rtnl_lock(), it does not protects us about queuers.

It guarantees, two threads can't flush the queue in parallel,
that can change the order, but since skb can be queued
in any order, it doesn't matter, how many threads do this
in parallel. In case of several threads, this will be even
faster.

So, we can remove rtnl_lock() here, as it was used for
iteration over net_namespace_list only.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext-core.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 544d7b62d7ca..5e677dac2a0c 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -347,8 +347,6 @@ void wireless_nlevent_flush(void)
 	struct sk_buff *skb;
 	struct net *net;
 
-	ASSERT_RTNL();
-
 	down_read(&net_rwsem);
 	for_each_net(net) {
 		while ((skb = skb_dequeue(&net->wext_nlevents)))
@@ -412,9 +410,7 @@ subsys_initcall(wireless_nlevent_init);
 /* Process events generated by the wireless layer or the driver. */
 static void wireless_nlevent_process(struct work_struct *work)
 {
-	rtnl_lock();
 	wireless_nlevent_flush();
-	rtnl_unlock();
 }
 
 static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process);

From 350311aab4c0b2477f9cf3fb03cef2e4cd6c3b18 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 19:20:56 +0300
Subject: [PATCH 1455/1678] security: Remove rtnl_lock() in
 selinux_xfrm_notify_policyload()

rt_genid_bump_all() consists of ipv4 and ipv6 part.
ipv4 part is incrementing of net::ipv4::rt_genid,
and I see many places, where it's read without rtnl_lock().

ipv6 part calls __fib6_clean_all(), and it's also
called without rtnl_lock() in other places.

So, rtnl_lock() here was used to iterate net_namespace_list only,
and we can remove it.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 security/selinux/include/xfrm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 31d66431be1e..a0b465316292 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -47,12 +47,10 @@ static inline void selinux_xfrm_notify_policyload(void)
 {
 	struct net *net;
 
-	rtnl_lock();
 	down_read(&net_rwsem);
 	for_each_net(net)
 		rt_genid_bump_all(net);
 	up_read(&net_rwsem);
-	rtnl_unlock();
 }
 #else
 static inline int selinux_xfrm_enabled(void)

From ec9c780925c57588637e1dbd8650d294107311c0 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 19:21:09 +0300
Subject: [PATCH 1456/1678] ovs: Remove rtnl_lock() from ovs_exit_net()

Here we iterate for_each_net() and removes
vport from alive net to the exiting net.

ovs_net::dps are protected by ovs_mutex(),
and the others, who change it (ovs_dp_cmd_new(),
__dp_destroy()) also take it.
The same with datapath::ports list.

So, we remove rtnl_lock() here.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9746ee30a99b..015e24e08909 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2363,12 +2363,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
 		__dp_destroy(dp);
 
-	rtnl_lock();
 	down_read(&net_rwsem);
 	for_each_net(net)
 		list_vports_from_net(net, dnet, &head);
 	up_read(&net_rwsem);
-	rtnl_unlock();
 
 	/* Detach all vports from given namespace. */
 	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {

From 152f253152cc08f5d26ba76659723d2d83b363f8 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 19:21:20 +0300
Subject: [PATCH 1457/1678] net: Remove rtnl_lock() in nf_ct_iterate_destroy()

rtnl_lock() doesn't protect net::ct::count,
and it's not needed for__nf_ct_unconfirmed_destroy()
and for nf_queue_nf_hook_drop().

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 370f9b7f051b..41ff04ee2554 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1763,7 +1763,6 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 {
 	struct net *net;
 
-	rtnl_lock();
 	down_read(&net_rwsem);
 	for_each_net(net) {
 		if (atomic_read(&net->ct.count) == 0)
@@ -1772,7 +1771,6 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 		nf_queue_nf_hook_drop(net);
 	}
 	up_read(&net_rwsem);
-	rtnl_unlock();
 
 	/* Need to wait for netns cleanup worker to finish, if its
 	 * running -- it might have deleted a net namespace from

From c30d9356e9e8ed0735c1215e187b03d3ae8b4966 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 27 Mar 2018 18:21:55 -0700
Subject: [PATCH 1458/1678] net: Fix fib notifer to return errno

Notifier handlers use notifier_from_errno to convert any potential error
to an encoded format. As a consequence the other side, call_fib_notifier{s}
in this case, needs to use notifier_to_errno to return the error from
the handler back to its caller.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_notifier.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 614b985c92a4..13a40b831d6d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net,
 		      enum fib_event_type event_type,
 		      struct fib_notifier_info *info)
 {
+	int err;
+
 	info->net = net;
-	return nb->notifier_call(nb, event_type, info);
+	err = nb->notifier_call(nb, event_type, info);
+	return notifier_to_errno(err);
 }
 EXPORT_SYMBOL(call_fib_notifier);
 
 int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
 		       struct fib_notifier_info *info)
 {
+	int err;
+
 	info->net = net;
-	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+	err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+	return notifier_to_errno(err);
 }
 EXPORT_SYMBOL(call_fib_notifiers);
 

From 9776d32537d2bbc251fd1de651e2bb2439474bde Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 27 Mar 2018 18:21:56 -0700
Subject: [PATCH 1459/1678] net: Move call_fib_rule_notifiers up in
 fib_nl_newrule

Move call_fib_rule_notifiers up in fib_nl_newrule to the point right
before the rule is inserted into the list. At this point there are no
more failure paths within the core rule code, so if the notifier
does not fail then the rule will be inserted into the list.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9d87ce868402..33958f84c173 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -631,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err < 0)
 		goto errout_free;
 
+	err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
+				      extack);
+	if (err < 0)
+		goto errout_free;
+
 	list_for_each_entry(r, &ops->rules_list, list) {
 		if (r->pref > rule->pref)
 			break;
@@ -667,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rule->tun_id)
 		ip_tunnel_need_metadata();
 
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);

From 6635f311eab40b6d97eb884f371be41d0f5a3ed6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 27 Mar 2018 18:21:57 -0700
Subject: [PATCH 1460/1678] net/ipv4: Move call_fib_entry_notifiers up for new
 routes

Move call to call_fib_entry_notifiers for new IPv4 routes to right
before the call to fib_insert_alias. At this point the only remaining
failure path is memory allocations in fib_insert_node. Handle that
very unlikely failure with a call to call_fib_entry_notifiers to
tell drivers about it.

At this point notifier handlers can decide the fate of the new route
with a clean path to delete the potential new entry if the notifier
returns non-0.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fac0b73e24d1..67116233e2bc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1065,6 +1065,9 @@ noleaf:
 	return -ENOMEM;
 }
 
+/* fib notifier for ADD is sent before calling fib_insert_alias with
+ * the expectation that the only possible failure ENOMEM
+ */
 static int fib_insert_alias(struct trie *t, struct key_vector *tp,
 			    struct key_vector *l, struct fib_alias *new,
 			    struct fib_alias *fa, t_key key)
@@ -1263,21 +1266,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 	new_fa->tb_id = tb->tb_id;
 	new_fa->fa_default = -1;
 
+	err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
+	if (err)
+		goto out_free_new_fa;
+
 	/* Insert new entry to the list. */
 	err = fib_insert_alias(t, tp, l, new_fa, fa, key);
 	if (err)
-		goto out_free_new_fa;
+		goto out_fib_notif;
 
 	if (!plen)
 		tb->tb_num_default++;
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
-	call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
 		  &cfg->fc_nlinfo, nlflags);
 succeeded:
 	return 0;
 
+out_fib_notif:
+	/* notifier was sent that entry would be added to trie, but
+	 * the add failed and need to recover. Only failure for
+	 * fib_insert_alias is ENOMEM.
+	 */
+	NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
+	call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
+				 plen, new_fa, NULL);
 out_free_new_fa:
 	kmem_cache_free(fn_alias_kmem, new_fa);
 out:

From c1d7ee67acb54b7dc1408929ff70dfe46993e517 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 27 Mar 2018 18:21:58 -0700
Subject: [PATCH 1461/1678] net/ipv4: Allow notifier to fail route replace

Add checking to call to call_fib_entry_notifiers for IPv4 route replace.
Allows a notifier handler to fail the replace.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 67116233e2bc..3dcffd3ce98c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1219,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
 			new_fa->tb_id = tb->tb_id;
 			new_fa->fa_default = -1;
 
-			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
-						 key, plen, new_fa, extack);
+			err = call_fib_entry_notifiers(net,
+						       FIB_EVENT_ENTRY_REPLACE,
+						       key, plen, new_fa,
+						       extack);
+			if (err)
+				goto out_free_new_fa;
+
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
 				  tb->tb_id, &cfg->fc_nlinfo, nlflags);
 

From 2233000cba40ee0784a2d5b5e2b2c38c1159a7ef Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 27 Mar 2018 18:21:59 -0700
Subject: [PATCH 1462/1678] net/ipv6: Move call_fib6_entry_notifiers up for
 route adds

Move call to call_fib6_entry_notifiers for new IPv6 routes to right
before the insertion into the FIB. At this point notifier handlers can
decide the fate of the new route with a clean path to delete the
potential new entry if the notifier returns non-0.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 908b8e5b615a..deab2db6692e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1007,12 +1007,16 @@ add:
 		if (err)
 			return err;
 
+		err = call_fib6_entry_notifiers(info->nl_net,
+						FIB_EVENT_ENTRY_ADD,
+						rt, extack);
+		if (err)
+			return err;
+
 		rcu_assign_pointer(rt->rt6_next, iter);
 		atomic_inc(&rt->rt6i_ref);
 		rcu_assign_pointer(rt->rt6i_node, fn);
 		rcu_assign_pointer(*ins, rt);
-		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
-					  rt, extack);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1036,12 +1040,16 @@ add:
 		if (err)
 			return err;
 
+		err = call_fib6_entry_notifiers(info->nl_net,
+						FIB_EVENT_ENTRY_REPLACE,
+						rt, extack);
+		if (err)
+			return err;
+
 		atomic_inc(&rt->rt6i_ref);
 		rcu_assign_pointer(rt->rt6i_node, fn);
 		rt->rt6_next = iter->rt6_next;
 		rcu_assign_pointer(*ins, rt);
-		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
-					  rt, extack);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {

From 37923ed6b8cea94d7d76038e2f72c57a0b45daab Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 27 Mar 2018 18:22:00 -0700
Subject: [PATCH 1463/1678] netdevsim: Add simple FIB resource controller via
 devlink

Add devlink support to netdevsim and use it to implement a simple,
profile based resource controller. Only one controller is needed
per namespace, so the first netdevsim netdevice in a namespace
registers with devlink. If that device is deleted, the resource
settings are deleted.

The resource controller allows a user to limit the number of IPv4 and
IPv6 FIB entries and FIB rules. The resource paths are:
    /IPv4
    /IPv4/fib
    /IPv4/fib-rules
    /IPv6
    /IPv6/fib
    /IPv6/fib-rules

The IPv4 and IPv6 top level resources are unlimited in size and can not
be changed. From there, the number of FIB entries and FIB rule entries
are unlimited by default. A user can specify a limit for the fib and
fib-rules resources:

    $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib size 96
    $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16
    $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64
    $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16
    $ devlink dev reload netdevsim/netdevsim0

such that the number of rules or routes is limited (96 ipv4 routes in the
example above):
    $ for n in $(seq 1 32); do ip ro add 10.99.$n.0/24 dev eth1; done
    Error: netdevsim: Exceeded number of supported fib entries.

    $ devlink resource show netdevsim/netdevsim0
    netdevsim/netdevsim0:
      name IPv4 size unlimited unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables non
        resources:
          name fib size 96 occ 96 unit entry size_min 0 size_max unlimited size_gran 1 dpipe_tables
    ...

With this template in place for resource management, it is fairly trivial
to extend and shows one way to implement a simple counter based resource
controller typical of network profiles.

Currently, devlink only supports initial namespace. Code is in place to
adapt netdevsim to a per namespace controller once the network namespace
issues are resolved.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig               |   1 +
 drivers/net/netdevsim/Makefile    |   4 +
 drivers/net/netdevsim/devlink.c   | 294 ++++++++++++++++++++++++++++++
 drivers/net/netdevsim/fib.c       | 263 ++++++++++++++++++++++++++
 drivers/net/netdevsim/netdev.c    |  12 +-
 drivers/net/netdevsim/netdevsim.h |  43 +++++
 6 files changed, 616 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/netdevsim/devlink.c
 create mode 100644 drivers/net/netdevsim/fib.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 08b85215c2be..891846655000 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -500,6 +500,7 @@ source "drivers/net/hyperv/Kconfig"
 config NETDEVSIM
 	tristate "Simulated networking device"
 	depends on DEBUG_FS
+	depends on MAY_USE_DEVLINK
 	help
 	  This driver is a developer testing tool and software model that can
 	  be used to test various control path networking APIs, especially
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index 09388c06171d..449b2a1a1800 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -9,3 +9,7 @@ ifeq ($(CONFIG_BPF_SYSCALL),y)
 netdevsim-objs += \
 	bpf.o
 endif
+
+ifneq ($(CONFIG_NET_DEVLINK),)
+netdevsim-objs += devlink.o fib.o
+endif
diff --git a/drivers/net/netdevsim/devlink.c b/drivers/net/netdevsim/devlink.c
new file mode 100644
index 000000000000..bbdcf064ba10
--- /dev/null
+++ b/drivers/net/netdevsim/devlink.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2018 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
+#include <linux/device.h>
+#include <net/devlink.h>
+#include <net/netns/generic.h>
+
+#include "netdevsim.h"
+
+static unsigned int nsim_devlink_id;
+
+/* place holder until devlink and namespaces is sorted out */
+static struct net *nsim_devlink_net(struct devlink *devlink)
+{
+	return &init_net;
+}
+
+/* IPv4
+ */
+static u64 nsim_ipv4_fib_resource_occ_get(struct devlink *devlink)
+{
+	struct net *net = nsim_devlink_net(devlink);
+
+	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
+}
+
+static struct devlink_resource_ops nsim_ipv4_fib_res_ops = {
+	.occ_get = nsim_ipv4_fib_resource_occ_get,
+};
+
+static u64 nsim_ipv4_fib_rules_res_occ_get(struct devlink *devlink)
+{
+	struct net *net = nsim_devlink_net(devlink);
+
+	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
+}
+
+static struct devlink_resource_ops nsim_ipv4_fib_rules_res_ops = {
+	.occ_get = nsim_ipv4_fib_rules_res_occ_get,
+};
+
+/* IPv6
+ */
+static u64 nsim_ipv6_fib_resource_occ_get(struct devlink *devlink)
+{
+	struct net *net = nsim_devlink_net(devlink);
+
+	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
+}
+
+static struct devlink_resource_ops nsim_ipv6_fib_res_ops = {
+	.occ_get = nsim_ipv6_fib_resource_occ_get,
+};
+
+static u64 nsim_ipv6_fib_rules_res_occ_get(struct devlink *devlink)
+{
+	struct net *net = nsim_devlink_net(devlink);
+
+	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
+}
+
+static struct devlink_resource_ops nsim_ipv6_fib_rules_res_ops = {
+	.occ_get = nsim_ipv6_fib_rules_res_occ_get,
+};
+
+static int devlink_resources_register(struct devlink *devlink)
+{
+	struct devlink_resource_size_params params = {
+		.size_max = (u64)-1,
+		.size_granularity = 1,
+		.unit = DEVLINK_RESOURCE_UNIT_ENTRY
+	};
+	struct net *net = nsim_devlink_net(devlink);
+	int err;
+	u64 n;
+
+	/* Resources for IPv4 */
+	err = devlink_resource_register(devlink, "IPv4", (u64)-1,
+					NSIM_RESOURCE_IPV4,
+					DEVLINK_RESOURCE_ID_PARENT_TOP,
+					&params, NULL);
+	if (err) {
+		pr_err("Failed to register IPv4 top resource\n");
+		goto out;
+	}
+
+	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
+	err = devlink_resource_register(devlink, "fib", n,
+					NSIM_RESOURCE_IPV4_FIB,
+					NSIM_RESOURCE_IPV4,
+					&params, &nsim_ipv4_fib_res_ops);
+	if (err) {
+		pr_err("Failed to register IPv4 FIB resource\n");
+		return err;
+	}
+
+	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
+	err = devlink_resource_register(devlink, "fib-rules", n,
+					NSIM_RESOURCE_IPV4_FIB_RULES,
+					NSIM_RESOURCE_IPV4,
+					&params, &nsim_ipv4_fib_rules_res_ops);
+	if (err) {
+		pr_err("Failed to register IPv4 FIB rules resource\n");
+		return err;
+	}
+
+	/* Resources for IPv6 */
+	err = devlink_resource_register(devlink, "IPv6", (u64)-1,
+					NSIM_RESOURCE_IPV6,
+					DEVLINK_RESOURCE_ID_PARENT_TOP,
+					&params, NULL);
+	if (err) {
+		pr_err("Failed to register IPv6 top resource\n");
+		goto out;
+	}
+
+	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
+	err = devlink_resource_register(devlink, "fib", n,
+					NSIM_RESOURCE_IPV6_FIB,
+					NSIM_RESOURCE_IPV6,
+					&params, &nsim_ipv6_fib_res_ops);
+	if (err) {
+		pr_err("Failed to register IPv6 FIB resource\n");
+		return err;
+	}
+
+	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
+	err = devlink_resource_register(devlink, "fib-rules", n,
+					NSIM_RESOURCE_IPV6_FIB_RULES,
+					NSIM_RESOURCE_IPV6,
+					&params, &nsim_ipv6_fib_rules_res_ops);
+	if (err) {
+		pr_err("Failed to register IPv6 FIB rules resource\n");
+		return err;
+	}
+out:
+	return err;
+}
+
+static int nsim_devlink_reload(struct devlink *devlink)
+{
+	enum nsim_resource_id res_ids[] = {
+		NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
+		NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
+	};
+	struct net *net = nsim_devlink_net(devlink);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
+		int err;
+		u64 val;
+
+		err = devlink_resource_size_get(devlink, res_ids[i], &val);
+		if (!err) {
+			err = nsim_fib_set_max(net, res_ids[i], val);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static void nsim_devlink_net_reset(struct net *net)
+{
+	enum nsim_resource_id res_ids[] = {
+		NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
+		NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
+		if (nsim_fib_set_max(net, res_ids[i], (u64)-1)) {
+			pr_err("Failed to reset limit for resource %u\n",
+			       res_ids[i]);
+		}
+	}
+}
+
+static const struct devlink_ops nsim_devlink_ops = {
+	.reload = nsim_devlink_reload,
+};
+
+/* once devlink / namespace issues are sorted out
+ * this needs to be net in which a devlink instance
+ * is to be created. e.g., dev_net(ns->netdev)
+ */
+static struct net *nsim_to_net(struct netdevsim *ns)
+{
+	return &init_net;
+}
+
+void nsim_devlink_teardown(struct netdevsim *ns)
+{
+	if (ns->devlink) {
+		struct net *net = nsim_to_net(ns);
+		bool *reg_devlink = net_generic(net, nsim_devlink_id);
+
+		devlink_unregister(ns->devlink);
+		devlink_free(ns->devlink);
+		ns->devlink = NULL;
+
+		nsim_devlink_net_reset(net);
+		*reg_devlink = true;
+	}
+}
+
+void nsim_devlink_setup(struct netdevsim *ns)
+{
+	struct net *net = nsim_to_net(ns);
+	bool *reg_devlink = net_generic(net, nsim_devlink_id);
+	struct devlink *devlink;
+	int err = -ENOMEM;
+
+	/* only one device per namespace controls devlink */
+	if (!*reg_devlink) {
+		ns->devlink = NULL;
+		return;
+	}
+
+	devlink = devlink_alloc(&nsim_devlink_ops, 0);
+	if (!devlink)
+		return;
+
+	err = devlink_register(devlink, &ns->dev);
+	if (err)
+		goto err_devlink_free;
+
+	err = devlink_resources_register(devlink);
+	if (err)
+		goto err_dl_unregister;
+
+	ns->devlink = devlink;
+
+	*reg_devlink = false;
+
+	return;
+
+err_dl_unregister:
+	devlink_unregister(devlink);
+err_devlink_free:
+	devlink_free(devlink);
+}
+
+/* Initialize per network namespace state */
+static int __net_init nsim_devlink_netns_init(struct net *net)
+{
+	bool *reg_devlink = net_generic(net, nsim_devlink_id);
+
+	*reg_devlink = true;
+
+	return 0;
+}
+
+static struct pernet_operations nsim_devlink_net_ops __net_initdata = {
+	.init = nsim_devlink_netns_init,
+	.id   = &nsim_devlink_id,
+	.size = sizeof(bool),
+};
+
+void nsim_devlink_exit(void)
+{
+	unregister_pernet_subsys(&nsim_devlink_net_ops);
+	nsim_fib_exit();
+}
+
+int nsim_devlink_init(void)
+{
+	int err;
+
+	err = nsim_fib_init();
+	if (err)
+		goto err_out;
+
+	err = register_pernet_subsys(&nsim_devlink_net_ops);
+	if (err)
+		nsim_fib_exit();
+
+err_out:
+	return err;
+}
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
new file mode 100644
index 000000000000..0d105bafa261
--- /dev/null
+++ b/drivers/net/netdevsim/fib.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2018 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This software is licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree.
+ *
+ * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+ * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+ * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+ */
+
+#include <net/fib_notifier.h>
+#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
+#include <net/fib_rules.h>
+#include <net/netns/generic.h>
+
+#include "netdevsim.h"
+
+struct nsim_fib_entry {
+	u64 max;
+	u64 num;
+};
+
+struct nsim_per_fib_data {
+	struct nsim_fib_entry fib;
+	struct nsim_fib_entry rules;
+};
+
+struct nsim_fib_data {
+	struct nsim_per_fib_data ipv4;
+	struct nsim_per_fib_data ipv6;
+};
+
+static unsigned int nsim_fib_net_id;
+
+u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max)
+{
+	struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id);
+	struct nsim_fib_entry *entry;
+
+	switch (res_id) {
+	case NSIM_RESOURCE_IPV4_FIB:
+		entry = &fib_data->ipv4.fib;
+		break;
+	case NSIM_RESOURCE_IPV4_FIB_RULES:
+		entry = &fib_data->ipv4.rules;
+		break;
+	case NSIM_RESOURCE_IPV6_FIB:
+		entry = &fib_data->ipv6.fib;
+		break;
+	case NSIM_RESOURCE_IPV6_FIB_RULES:
+		entry = &fib_data->ipv6.rules;
+		break;
+	default:
+		return 0;
+	}
+
+	return max ? entry->max : entry->num;
+}
+
+int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val)
+{
+	struct nsim_fib_data *fib_data = net_generic(net, nsim_fib_net_id);
+	struct nsim_fib_entry *entry;
+	int err = 0;
+
+	switch (res_id) {
+	case NSIM_RESOURCE_IPV4_FIB:
+		entry = &fib_data->ipv4.fib;
+		break;
+	case NSIM_RESOURCE_IPV4_FIB_RULES:
+		entry = &fib_data->ipv4.rules;
+		break;
+	case NSIM_RESOURCE_IPV6_FIB:
+		entry = &fib_data->ipv6.fib;
+		break;
+	case NSIM_RESOURCE_IPV6_FIB_RULES:
+		entry = &fib_data->ipv6.rules;
+		break;
+	default:
+		return 0;
+	}
+
+	/* not allowing a new max to be less than curren occupancy
+	 * --> no means of evicting entries
+	 */
+	if (val < entry->num)
+		err = -EINVAL;
+	else
+		entry->max = val;
+
+	return err;
+}
+
+static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add,
+				 struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	if (add) {
+		if (entry->num < entry->max) {
+			entry->num++;
+		} else {
+			err = -ENOSPC;
+			NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib rule entries");
+		}
+	} else {
+		entry->num--;
+	}
+
+	return err;
+}
+
+static int nsim_fib_rule_event(struct fib_notifier_info *info, bool add)
+{
+	struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id);
+	struct netlink_ext_ack *extack = info->extack;
+	int err = 0;
+
+	switch (info->family) {
+	case AF_INET:
+		err = nsim_fib_rule_account(&data->ipv4.rules, add, extack);
+		break;
+	case AF_INET6:
+		err = nsim_fib_rule_account(&data->ipv6.rules, add, extack);
+		break;
+	}
+
+	return err;
+}
+
+static int nsim_fib_account(struct nsim_fib_entry *entry, bool add,
+			    struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	if (add) {
+		if (entry->num < entry->max) {
+			entry->num++;
+		} else {
+			err = -ENOSPC;
+			NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries");
+		}
+	} else {
+		entry->num--;
+	}
+
+	return err;
+}
+
+static int nsim_fib_event(struct fib_notifier_info *info, bool add)
+{
+	struct nsim_fib_data *data = net_generic(info->net, nsim_fib_net_id);
+	struct netlink_ext_ack *extack = info->extack;
+	int err = 0;
+
+	switch (info->family) {
+	case AF_INET:
+		err = nsim_fib_account(&data->ipv4.fib, add, extack);
+		break;
+	case AF_INET6:
+		err = nsim_fib_account(&data->ipv6.fib, add, extack);
+		break;
+	}
+
+	return err;
+}
+
+static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
+			     void *ptr)
+{
+	struct fib_notifier_info *info = ptr;
+	int err = 0;
+
+	switch (event) {
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		err = nsim_fib_rule_event(info, event == FIB_EVENT_RULE_ADD);
+		break;
+
+	case FIB_EVENT_ENTRY_ADD:  /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		err = nsim_fib_event(info, event == FIB_EVENT_ENTRY_ADD);
+		break;
+	}
+
+	return notifier_from_errno(err);
+}
+
+/* inconsistent dump, trying again */
+static void nsim_fib_dump_inconsistent(struct notifier_block *nb)
+{
+	struct nsim_fib_data *data;
+	struct net *net;
+
+	rcu_read_lock();
+	for_each_net_rcu(net) {
+		data = net_generic(net, nsim_fib_net_id);
+
+		data->ipv4.fib.num = 0ULL;
+		data->ipv4.rules.num = 0ULL;
+
+		data->ipv6.fib.num = 0ULL;
+		data->ipv6.rules.num = 0ULL;
+	}
+	rcu_read_unlock();
+}
+
+static struct notifier_block nsim_fib_nb = {
+	.notifier_call = nsim_fib_event_nb,
+};
+
+/* Initialize per network namespace state */
+static int __net_init nsim_fib_netns_init(struct net *net)
+{
+	struct nsim_fib_data *data = net_generic(net, nsim_fib_net_id);
+
+	data->ipv4.fib.max = (u64)-1;
+	data->ipv4.rules.max = (u64)-1;
+
+	data->ipv6.fib.max = (u64)-1;
+	data->ipv6.rules.max = (u64)-1;
+
+	return 0;
+}
+
+static struct pernet_operations nsim_fib_net_ops __net_initdata = {
+	.init = nsim_fib_netns_init,
+	.id   = &nsim_fib_net_id,
+	.size = sizeof(struct nsim_fib_data),
+};
+
+void nsim_fib_exit(void)
+{
+	unregister_pernet_subsys(&nsim_fib_net_ops);
+	unregister_fib_notifier(&nsim_fib_nb);
+}
+
+int nsim_fib_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&nsim_fib_net_ops);
+	if (err < 0) {
+		pr_err("Failed to register pernet subsystem\n");
+		goto err_out;
+	}
+
+	err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent);
+	if (err < 0) {
+		pr_err("Failed to register fib notifier\n");
+		goto err_out;
+	}
+
+err_out:
+	return err;
+}
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 3fd567928f3d..8b30ab3ea2c2 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -167,6 +167,8 @@ static int nsim_init(struct net_device *dev)
 
 	SET_NETDEV_DEV(dev, &ns->dev);
 
+	nsim_devlink_setup(ns);
+
 	return 0;
 
 err_bpf_uninit:
@@ -180,6 +182,7 @@ static void nsim_uninit(struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
 
+	nsim_devlink_teardown(ns);
 	debugfs_remove_recursive(ns->ddir);
 	nsim_bpf_uninit(ns);
 }
@@ -478,12 +481,18 @@ static int __init nsim_module_init(void)
 	if (err)
 		goto err_debugfs_destroy;
 
-	err = rtnl_link_register(&nsim_link_ops);
+	err = nsim_devlink_init();
 	if (err)
 		goto err_unreg_bus;
 
+	err = rtnl_link_register(&nsim_link_ops);
+	if (err)
+		goto err_dl_fini;
+
 	return 0;
 
+err_dl_fini:
+	nsim_devlink_exit();
 err_unreg_bus:
 	bus_unregister(&nsim_bus);
 err_debugfs_destroy:
@@ -494,6 +503,7 @@ err_debugfs_destroy:
 static void __exit nsim_module_exit(void)
 {
 	rtnl_link_unregister(&nsim_link_ops);
+	nsim_devlink_exit();
 	bus_unregister(&nsim_bus);
 	debugfs_remove_recursive(nsim_ddir);
 }
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index ea081c10efb8..afb8cf90c0fd 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -64,6 +64,9 @@ struct netdevsim {
 
 	bool bpf_map_accept;
 	struct list_head bpf_bound_maps;
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+	struct devlink *devlink;
+#endif
 };
 
 extern struct dentry *nsim_ddir;
@@ -103,6 +106,46 @@ nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 }
 #endif
 
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+enum nsim_resource_id {
+	NSIM_RESOURCE_NONE,   /* DEVLINK_RESOURCE_ID_PARENT_TOP */
+	NSIM_RESOURCE_IPV4,
+	NSIM_RESOURCE_IPV4_FIB,
+	NSIM_RESOURCE_IPV4_FIB_RULES,
+	NSIM_RESOURCE_IPV6,
+	NSIM_RESOURCE_IPV6_FIB,
+	NSIM_RESOURCE_IPV6_FIB_RULES,
+};
+
+void nsim_devlink_setup(struct netdevsim *ns);
+void nsim_devlink_teardown(struct netdevsim *ns);
+
+int nsim_devlink_init(void);
+void nsim_devlink_exit(void);
+
+int nsim_fib_init(void);
+void nsim_fib_exit(void);
+u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
+int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val);
+#else
+static inline void nsim_devlink_setup(struct netdevsim *ns)
+{
+}
+
+static inline void nsim_devlink_teardown(struct netdevsim *ns)
+{
+}
+
+static inline int nsim_devlink_init(void)
+{
+	return 0;
+}
+
+static inline void nsim_devlink_exit(void)
+{
+}
+#endif
+
 static inline struct netdevsim *to_nsim(struct device *ptr)
 {
 	return container_of(ptr, struct netdevsim, dev);

From 697fefc7c1531f75ecd9623a4a4cbd47fab32ca8 Mon Sep 17 00:00:00 2001
From: Intiyaz Basha <intiyaz.basha@cavium.com>
Date: Tue, 27 Mar 2018 19:25:18 -0700
Subject: [PATCH 1464/1678] liquidio: Prioritize control messages

During heavy tx traffic, control messages (sent by liquidio driver to NIC
firmware) sometimes do not get processed in a timely manner.  Reason is:
the low-level metadata of control messages and that of egress network
packets indicate that they have the same priority.

Fix it by setting a higher priority for control messages through the new
ctrl_qpg field in the oct_txpciq struct.  It is the NIC firmware that does
the actual setting of priority by writing to the new ctrl_qpg field; the
host driver treats that value as opaque and just assigns it to pki_ih3->qpg

Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/liquidio_common.h | 8 ++++++--
 drivers/net/ethernet/cavium/liquidio/request_manager.c | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 82a783db5baf..75eea83c7cc6 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -712,9 +712,13 @@ union oct_txpciq {
 		u64 pkind:6;
 		u64 use_qpg:1;
 		u64 qpg:11;
-		u64 reserved:30;
+		u64 reserved0:10;
+		u64 ctrl_qpg:11;
+		u64 reserved:9;
 #else
-		u64 reserved:30;
+		u64 reserved:9;
+		u64 ctrl_qpg:11;
+		u64 reserved0:10;
 		u64 qpg:11;
 		u64 use_qpg:1;
 		u64 pkind:6;
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 2766af05b89e..b1270355b0b1 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -628,7 +628,8 @@ octeon_prepare_soft_command(struct octeon_device *oct,
 		pki_ih3->tag     = LIO_CONTROL;
 		pki_ih3->tagtype = ATOMIC_TAG;
 		pki_ih3->qpg         =
-			oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
+			oct->instr_queue[sc->iq_no]->txpciq.s.ctrl_qpg;
+
 		pki_ih3->pm          = 0x7;
 		pki_ih3->sl          = 8;
 

From 18dcbe12fe9fca0ab825f7eff993060525ac2503 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 27 Mar 2018 19:49:42 -0700
Subject: [PATCH 1465/1678] ipv6: export ip6 fragments sysctl to unprivileged
 users

IPv4 was changed in commit 52a773d645e9 ("net: Export ip fragment
sysctl to unprivileged users")

The only sysctl that is not per-netns is not used :
ip6frag_secret_interval

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..08a139f14d0f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -650,10 +650,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 		table[1].data = &net->ipv6.frags.low_thresh;
 		table[1].extra2 = &net->ipv6.frags.high_thresh;
 		table[2].data = &net->ipv6.frags.timeout;
-
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			table[0].procname = NULL;
 	}
 
 	hdr = register_net_sysctl(net, "net/ipv6", table);

From 50bc60cb155c813157fdca5b3b05194cd325d3e9 Mon Sep 17 00:00:00 2001
From: Michal Kalderon <Michal.Kalderon@cavium.com>
Date: Wed, 28 Mar 2018 11:42:16 +0300
Subject: [PATCH 1466/1678] qed*: Utilize FW 8.33.11.0

This FW contains several fixes and features

RDMA Features
- SRQ support
- XRC support
- Memory window support
- RDMA low latency queue support
- RDMA bonding support

RDMA bug fixes
- RDMA remote invalidate during retransmit fix
- iWARP MPA connect interop issue with RTR fix
- iWARP Legacy DPM support
- Fix MPA reject flow
- iWARP error handling
- RQ WQE validation checks

MISC
- Fix some HSI types endianity
- New Restriction: vlan insertion in core_tx_bd_data can't be set
  for LB packets

ETH
- HW QoS offload support
- Fix vlan, dcb and sriov flow of VF sending a packet with
  inband VLAN tag instead of default VLAN
- Allow GRE version 1 offloads in RX flow
- Allow VXLAN steering

iSCSI / FcoE
- Fix bd availability checking flow
- Support 256th sge proerly in iscsi/fcoe retransmit
- Performance improvement
- Fix handle iSCSI command arrival with AHS and with immediate
- Fix ipv6 traffic class configuration

DEBUG
- Update debug utilities

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Ariel Elior <Ariel.Elior@cavium.com>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/qedr_hsi_rdma.h    |    4 +-
 drivers/infiniband/hw/qedr/verbs.c            |    4 +-
 drivers/net/ethernet/qlogic/qed/qed_debug.c   |  415 +++--
 drivers/net/ethernet/qlogic/qed/qed_dev.c     |    4 +-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h     | 1610 +++++++++--------
 .../ethernet/qlogic/qed/qed_init_fw_funcs.c   |  103 +-
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c   |    7 -
 drivers/net/ethernet/qlogic/qed/qed_l2.c      |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_ll2.c     |   13 -
 include/linux/qed/common_hsi.h                |    2 +-
 include/linux/qed/eth_common.h                |    2 +-
 include/linux/qed/iscsi_common.h              |    4 +-
 include/linux/qed/rdma_common.h               |    2 +
 include/linux/qed/roce_common.h               |    3 +
 14 files changed, 1169 insertions(+), 1006 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 78b49002fbd2..b816c80df50b 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -45,7 +45,7 @@ struct rdma_cqe_responder {
 	__le32 imm_data_or_inv_r_Key;
 	__le32 length;
 	__le32 imm_data_hi;
-	__le16 rq_cons;
+	__le16 rq_cons_or_srq_id;
 	u8 flags;
 #define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK  0x1
 #define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0
@@ -115,6 +115,7 @@ enum rdma_cqe_requester_status_enum {
 	RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR,
 	RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR,
 	RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR,
+	RDMA_CQE_REQ_STS_XRC_VOILATION_ERR,
 	MAX_RDMA_CQE_REQUESTER_STATUS_ENUM
 };
 
@@ -136,6 +137,7 @@ enum rdma_cqe_type {
 	RDMA_CQE_TYPE_REQUESTER,
 	RDMA_CQE_TYPE_RESPONDER_RQ,
 	RDMA_CQE_TYPE_RESPONDER_SRQ,
+	RDMA_CQE_TYPE_RESPONDER_XRC_SRQ,
 	RDMA_CQE_TYPE_INVALID,
 	MAX_RDMA_CQE_TYPE
 };
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 875b17272d65..7d51ef47667f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -3695,7 +3695,7 @@ static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
 				 struct rdma_cqe_responder *resp, int *update)
 {
-	if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
+	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
 		consume_cqe(cq);
 		*update |= 1;
 	}
@@ -3710,7 +3710,7 @@ static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
 
 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
 		cnt = process_resp_flush(qp, cq, num_entries, wc,
-					 resp->rq_cons);
+					 resp->rq_cons_or_srq_id);
 		try_consume_resp_cqe(cq, qp, resp, update);
 	} else {
 		cnt = process_resp_one(dev, qp, cq, wc, resp);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index fdf37abee3d3..4926c5532fba 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -265,6 +265,7 @@ struct grc_param_defs {
 	u32 min;
 	u32 max;
 	bool is_preset;
+	bool is_persistent;
 	u32 exclude_all_preset_val;
 	u32 crash_preset_val;
 };
@@ -1520,129 +1521,129 @@ static struct platform_defs s_platform_defs[] = {
 
 static struct grc_param_defs s_grc_param_defs[] = {
 	/* DBG_GRC_PARAM_DUMP_TSTORM */
-	{{1, 1, 1}, 0, 1, false, 1, 1},
+	{{1, 1, 1}, 0, 1, false, false, 1, 1},
 
 	/* DBG_GRC_PARAM_DUMP_MSTORM */
-	{{1, 1, 1}, 0, 1, false, 1, 1},
+	{{1, 1, 1}, 0, 1, false, false, 1, 1},
 
 	/* DBG_GRC_PARAM_DUMP_USTORM */
-	{{1, 1, 1}, 0, 1, false, 1, 1},
+	{{1, 1, 1}, 0, 1, false, false, 1, 1},
 
 	/* DBG_GRC_PARAM_DUMP_XSTORM */
-	{{1, 1, 1}, 0, 1, false, 1, 1},
+	{{1, 1, 1}, 0, 1, false, false, 1, 1},
 
 	/* DBG_GRC_PARAM_DUMP_YSTORM */
-	{{1, 1, 1}, 0, 1, false, 1, 1},
+	{{1, 1, 1}, 0, 1, false, false, 1, 1},
 
 	/* DBG_GRC_PARAM_DUMP_PSTORM */
-	{{1, 1, 1}, 0, 1, false, 1, 1},
+	{{1, 1, 1}, 0, 1, false, false, 1, 1},
 
 	/* DBG_GRC_PARAM_DUMP_REGS */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_RAM */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_PBUF */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_IOR */
-	{{0, 0, 0}, 0, 1, false, 0, 1},
+	{{0, 0, 0}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_VFC */
-	{{0, 0, 0}, 0, 1, false, 0, 1},
+	{{0, 0, 0}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_CM_CTX */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_ILT */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_RSS */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_CAU */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_QM */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_MCP */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
-	/* DBG_GRC_PARAM_RESERVED */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	/* DBG_GRC_PARAM_MCP_TRACE_META_SIZE */
+	{{1, 1, 1}, 1, 0xffffffff, false, true, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_CFC */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_IGU */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_BRB */
-	{{0, 0, 0}, 0, 1, false, 0, 1},
+	{{0, 0, 0}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_BTB */
-	{{0, 0, 0}, 0, 1, false, 0, 1},
+	{{0, 0, 0}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_BMB */
-	{{0, 0, 0}, 0, 1, false, 0, 1},
+	{{0, 0, 0}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_NIG */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_MULD */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_PRS */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_DMAE */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_TM */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_SDM */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_DIF */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_STATIC */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_UNSTALL */
-	{{0, 0, 0}, 0, 1, false, 0, 0},
+	{{0, 0, 0}, 0, 1, false, false, 0, 0},
 
 	/* DBG_GRC_PARAM_NUM_LCIDS */
-	{{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS,
-	 MAX_LCIDS},
+	{{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, false,
+	 MAX_LCIDS, MAX_LCIDS},
 
 	/* DBG_GRC_PARAM_NUM_LTIDS */
-	{{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS,
-	 MAX_LTIDS},
+	{{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, false,
+	 MAX_LTIDS, MAX_LTIDS},
 
 	/* DBG_GRC_PARAM_EXCLUDE_ALL */
-	{{0, 0, 0}, 0, 1, true, 0, 0},
+	{{0, 0, 0}, 0, 1, true, false, 0, 0},
 
 	/* DBG_GRC_PARAM_CRASH */
-	{{0, 0, 0}, 0, 1, true, 0, 0},
+	{{0, 0, 0}, 0, 1, true, false, 0, 0},
 
 	/* DBG_GRC_PARAM_PARITY_SAFE */
-	{{0, 0, 0}, 0, 1, false, 1, 0},
+	{{0, 0, 0}, 0, 1, false, false, 1, 0},
 
 	/* DBG_GRC_PARAM_DUMP_CM */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_DUMP_PHY */
-	{{1, 1, 1}, 0, 1, false, 0, 1},
+	{{1, 1, 1}, 0, 1, false, false, 0, 1},
 
 	/* DBG_GRC_PARAM_NO_MCP */
-	{{0, 0, 0}, 0, 1, false, 0, 0},
+	{{0, 0, 0}, 0, 1, false, false, 0, 0},
 
 	/* DBG_GRC_PARAM_NO_FW_VER */
-	{{0, 0, 0}, 0, 1, false, 0, 0}
+	{{0, 0, 0}, 0, 1, false, false, 0, 0}
 };
 
 static struct rss_mem_defs s_rss_mem_defs[] = {
@@ -4731,8 +4732,13 @@ static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "mcp_trace_meta", 1);
 
-	/* Read trace meta info (trace_meta_size_bytes is dword-aligned) */
-	if (mcp_access) {
+	/* If MCP Trace meta size parameter was set, use it.
+	 * Otherwise, read trace meta.
+	 * trace_meta_size_bytes is dword-aligned.
+	 */
+	trace_meta_size_bytes =
+		qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_MCP_TRACE_META_SIZE);
+	if ((!trace_meta_size_bytes || dump) && mcp_access) {
 		status = qed_mcp_trace_get_meta_info(p_hwfn,
 						     p_ptt,
 						     trace_data_size_bytes,
@@ -5063,8 +5069,9 @@ void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
 	u32 i;
 
 	for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
-		dev_data->grc.param_val[i] =
-		    s_grc_param_defs[i].default_val[dev_data->chip_id];
+		if (!s_grc_param_defs[i].is_persistent)
+			dev_data->grc.param_val[i] =
+			    s_grc_param_defs[i].default_val[dev_data->chip_id];
 }
 
 enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
@@ -6071,10 +6078,14 @@ static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
 
 /******************************** Variables **********************************/
 
-/* MCP Trace meta data - used in case the dump doesn't contain the meta data
- * (e.g. due to no NVRAM access).
+/* MCP Trace meta data array - used in case the dump doesn't contain the
+ * meta data (e.g. due to no NVRAM access).
  */
-static struct user_dbg_array s_mcp_trace_meta = { NULL, 0 };
+static struct user_dbg_array s_mcp_trace_meta_arr = { NULL, 0 };
+
+/* Parsed MCP Trace meta data info, based on MCP trace meta array */
+static struct mcp_trace_meta s_mcp_trace_meta;
+static bool s_mcp_trace_meta_valid;
 
 /* Temporary buffer, used for print size calculations */
 static char s_temp_buf[MAX_MSG_LEN];
@@ -6104,6 +6115,9 @@ static u32 qed_read_from_cyclic_buf(void *buf,
 
 	val_ptr = (u8 *)&val;
 
+	/* Assume running on a LITTLE ENDIAN and the buffer is network order
+	 * (BIG ENDIAN), as high order bytes are placed in lower memory address.
+	 */
 	for (i = 0; i < num_bytes_to_read; i++) {
 		val_ptr[i] = bytes_buf[*offset];
 		*offset = qed_cyclic_add(*offset, 1, buf_size);
@@ -6185,7 +6199,7 @@ static u32 qed_read_param(u32 *dump_buf,
 		offset += 4;
 	}
 
-	return offset / 4;
+	return (u32)offset / 4;
 }
 
 /* Reads a section header from the specified buffer.
@@ -6503,6 +6517,8 @@ static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
 {
 	u32 i;
 
+	s_mcp_trace_meta_valid = false;
+
 	/* Release modules */
 	if (meta->modules) {
 		for (i = 0; i < meta->modules_num; i++)
@@ -6529,6 +6545,10 @@ static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
 	u8 *meta_buf_bytes = (u8 *)meta_buf;
 	u32 offset = 0, signature, i;
 
+	/* Free the previous meta before loading a new one. */
+	if (s_mcp_trace_meta_valid)
+		qed_mcp_trace_free_meta(p_hwfn, meta);
+
 	memset(meta, 0, sizeof(*meta));
 
 	/* Read first signature */
@@ -6594,31 +6614,153 @@ static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
 				      format_len, format_ptr->format_str);
 	}
 
+	s_mcp_trace_meta_valid = true;
 	return DBG_STATUS_OK;
 }
 
+/* Parses an MCP trace buffer. If result_buf is not NULL, the MCP Trace results
+ * are printed to it. The parsing status is returned.
+ * Arguments:
+ * trace_buf - MCP trace cyclic buffer
+ * trace_buf_size - MCP trace cyclic buffer size in bytes
+ * data_offset - offset in bytes of the data to parse in the MCP trace cyclic
+ *               buffer.
+ * data_size - size in bytes of data to parse.
+ * parsed_buf - destination buffer for parsed data.
+ * parsed_bytes - size of parsed data in bytes.
+ */
+static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
+					       u32 trace_buf_size,
+					       u32 data_offset,
+					       u32 data_size,
+					       char *parsed_buf,
+					       u32 *parsed_bytes)
+{
+	u32 param_mask, param_shift;
+	enum dbg_status status;
+
+	*parsed_bytes = 0;
+
+	if (!s_mcp_trace_meta_valid)
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+	status = DBG_STATUS_OK;
+
+	while (data_size) {
+		struct mcp_trace_format *format_ptr;
+		u8 format_level, format_module;
+		u32 params[3] = { 0, 0, 0 };
+		u32 header, format_idx, i;
+
+		if (data_size < MFW_TRACE_ENTRY_SIZE)
+			return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+		header = qed_read_from_cyclic_buf(trace_buf,
+						  &data_offset,
+						  trace_buf_size,
+						  MFW_TRACE_ENTRY_SIZE);
+		data_size -= MFW_TRACE_ENTRY_SIZE;
+		format_idx = header & MFW_TRACE_EVENTID_MASK;
+
+		/* Skip message if its index doesn't exist in the meta data */
+		if (format_idx > s_mcp_trace_meta.formats_num) {
+			u8 format_size =
+				(u8)((header & MFW_TRACE_PRM_SIZE_MASK) >>
+				     MFW_TRACE_PRM_SIZE_SHIFT);
+
+			if (data_size < format_size)
+				return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+			data_offset = qed_cyclic_add(data_offset,
+						     format_size,
+						     trace_buf_size);
+			data_size -= format_size;
+			continue;
+		}
+
+		format_ptr = &s_mcp_trace_meta.formats[format_idx];
+
+		for (i = 0,
+		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK,
+		     param_shift = MCP_TRACE_FORMAT_P1_SIZE_SHIFT;
+		     i < MCP_TRACE_FORMAT_MAX_PARAMS;
+		     i++,
+		     param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH,
+		     param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) {
+			/* Extract param size (0..3) */
+			u8 param_size = (u8)((format_ptr->data & param_mask) >>
+					     param_shift);
+
+			/* If the param size is zero, there are no other
+			 * parameters.
+			 */
+			if (!param_size)
+				break;
+
+			/* Size is encoded using 2 bits, where 3 is used to
+			 * encode 4.
+			 */
+			if (param_size == 3)
+				param_size = 4;
+
+			if (data_size < param_size)
+				return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+			params[i] = qed_read_from_cyclic_buf(trace_buf,
+							     &data_offset,
+							     trace_buf_size,
+							     param_size);
+			data_size -= param_size;
+		}
+
+		format_level = (u8)((format_ptr->data &
+				     MCP_TRACE_FORMAT_LEVEL_MASK) >>
+				    MCP_TRACE_FORMAT_LEVEL_SHIFT);
+		format_module = (u8)((format_ptr->data &
+				      MCP_TRACE_FORMAT_MODULE_MASK) >>
+				     MCP_TRACE_FORMAT_MODULE_SHIFT);
+		if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str))
+			return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+		/* Print current message to results buffer */
+		*parsed_bytes +=
+			sprintf(qed_get_buf_ptr(parsed_buf, *parsed_bytes),
+				"%s %-8s: ",
+				s_mcp_trace_level_str[format_level],
+				s_mcp_trace_meta.modules[format_module]);
+		*parsed_bytes +=
+		    sprintf(qed_get_buf_ptr(parsed_buf, *parsed_bytes),
+			    format_ptr->format_str,
+			    params[0], params[1], params[2]);
+	}
+
+	/* Add string NULL terminator */
+	(*parsed_bytes)++;
+
+	return status;
+}
+
 /* Parses an MCP Trace dump buffer.
  * If result_buf is not NULL, the MCP Trace results are printed to it.
  * In any case, the required results buffer size is assigned to
- * parsed_results_bytes.
+ * parsed_bytes.
  * The parsing status is returned.
  */
 static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 						u32 *dump_buf,
-						char *results_buf,
-						u32 *parsed_results_bytes)
+						char *parsed_buf,
+						u32 *parsed_bytes)
 {
-	u32 end_offset, bytes_left, trace_data_dwords, trace_meta_dwords;
-	u32 param_mask, param_shift, param_num_val, num_section_params;
 	const char *section_name, *param_name, *param_str_val;
-	u32 offset, results_offset = 0;
-	struct mcp_trace_meta meta;
+	u32 data_size, trace_data_dwords, trace_meta_dwords;
+	u32 offset, results_offset, parsed_buf_bytes;
+	u32 param_num_val, num_section_params;
 	struct mcp_trace *trace;
 	enum dbg_status status;
 	const u32 *meta_buf;
 	u8 *trace_buf;
 
-	*parsed_results_bytes = 0;
+	*parsed_bytes = 0;
 
 	/* Read global_params section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6629,7 +6771,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	/* Print global params */
 	dump_buf += qed_print_section_params(dump_buf,
 					     num_section_params,
-					     results_buf, &results_offset);
+					     parsed_buf, &results_offset);
 
 	/* Read trace_data section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6646,8 +6788,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	trace = (struct mcp_trace *)dump_buf;
 	trace_buf = (u8 *)dump_buf + sizeof(*trace);
 	offset = trace->trace_oldest;
-	end_offset = trace->trace_prod;
-	bytes_left = qed_cyclic_sub(end_offset, offset, trace->size);
+	data_size = qed_cyclic_sub(trace->trace_prod, offset, trace->size);
 	dump_buf += trace_data_dwords;
 
 	/* Read meta_data section */
@@ -6664,126 +6805,33 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	/* Choose meta data buffer */
 	if (!trace_meta_dwords) {
 		/* Dump doesn't include meta data */
-		if (!s_mcp_trace_meta.ptr)
+		if (!s_mcp_trace_meta_arr.ptr)
 			return DBG_STATUS_MCP_TRACE_NO_META;
-		meta_buf = s_mcp_trace_meta.ptr;
+		meta_buf = s_mcp_trace_meta_arr.ptr;
 	} else {
 		/* Dump includes meta data */
 		meta_buf = dump_buf;
 	}
 
 	/* Allocate meta data memory */
-	status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &meta);
+	status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &s_mcp_trace_meta);
 	if (status != DBG_STATUS_OK)
-		goto free_mem;
+		return status;
 
-	/* Ignore the level and modules masks - just print everything that is
-	 * already in the buffer.
-	 */
-	while (bytes_left) {
-		struct mcp_trace_format *format_ptr;
-		u8 format_level, format_module;
-		u32 params[3] = { 0, 0, 0 };
-		u32 header, format_idx, i;
+	status = qed_parse_mcp_trace_buf(trace_buf,
+					 trace->size,
+					 offset,
+					 data_size,
+					 parsed_buf ?
+					 parsed_buf + results_offset :
+					 NULL,
+					 &parsed_buf_bytes);
+	if (status != DBG_STATUS_OK)
+		return status;
 
-		if (bytes_left < MFW_TRACE_ENTRY_SIZE) {
-			status = DBG_STATUS_MCP_TRACE_BAD_DATA;
-			goto free_mem;
-		}
+	*parsed_bytes = results_offset + parsed_buf_bytes;
 
-		header = qed_read_from_cyclic_buf(trace_buf,
-						  &offset,
-						  trace->size,
-						  MFW_TRACE_ENTRY_SIZE);
-		bytes_left -= MFW_TRACE_ENTRY_SIZE;
-		format_idx = header & MFW_TRACE_EVENTID_MASK;
-
-		/* Skip message if its  index doesn't exist in the meta data */
-		if (format_idx > meta.formats_num) {
-			u8 format_size =
-			    (u8)((header &
-				  MFW_TRACE_PRM_SIZE_MASK) >>
-				 MFW_TRACE_PRM_SIZE_SHIFT);
-
-			if (bytes_left < format_size) {
-				status = DBG_STATUS_MCP_TRACE_BAD_DATA;
-				goto free_mem;
-			}
-
-			offset = qed_cyclic_add(offset,
-						format_size, trace->size);
-			bytes_left -= format_size;
-			continue;
-		}
-
-		format_ptr = &meta.formats[format_idx];
-
-		for (i = 0,
-		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift =
-		     MCP_TRACE_FORMAT_P1_SIZE_SHIFT;
-		     i < MCP_TRACE_FORMAT_MAX_PARAMS;
-		     i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH,
-		     param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) {
-			/* Extract param size (0..3) */
-			u8 param_size =
-			    (u8)((format_ptr->data &
-				  param_mask) >> param_shift);
-
-			/* If the param size is zero, there are no other
-			 * parameters.
-			 */
-			if (!param_size)
-				break;
-
-			/* Size is encoded using 2 bits, where 3 is used to
-			 * encode 4.
-			 */
-			if (param_size == 3)
-				param_size = 4;
-
-			if (bytes_left < param_size) {
-				status = DBG_STATUS_MCP_TRACE_BAD_DATA;
-				goto free_mem;
-			}
-
-			params[i] = qed_read_from_cyclic_buf(trace_buf,
-							     &offset,
-							     trace->size,
-							     param_size);
-
-			bytes_left -= param_size;
-		}
-
-		format_level =
-		    (u8)((format_ptr->data &
-			  MCP_TRACE_FORMAT_LEVEL_MASK) >>
-			 MCP_TRACE_FORMAT_LEVEL_SHIFT);
-		format_module =
-		    (u8)((format_ptr->data &
-			  MCP_TRACE_FORMAT_MODULE_MASK) >>
-			 MCP_TRACE_FORMAT_MODULE_SHIFT);
-		if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str)) {
-			status = DBG_STATUS_MCP_TRACE_BAD_DATA;
-			goto free_mem;
-		}
-
-		/* Print current message to results buffer */
-		results_offset +=
-		    sprintf(qed_get_buf_ptr(results_buf,
-					    results_offset), "%s %-8s: ",
-			    s_mcp_trace_level_str[format_level],
-			    meta.modules[format_module]);
-		results_offset +=
-		    sprintf(qed_get_buf_ptr(results_buf,
-					    results_offset),
-			    format_ptr->format_str, params[0], params[1],
-			    params[2]);
-	}
-
-free_mem:
-	*parsed_results_bytes = results_offset + 1;
-	qed_mcp_trace_free_meta(p_hwfn, &meta);
-	return status;
+	return DBG_STATUS_OK;
 }
 
 /* Parses a Reg FIFO dump buffer.
@@ -7291,8 +7339,8 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 
 void qed_dbg_mcp_trace_set_meta_data(u32 *data, u32 size)
 {
-	s_mcp_trace_meta.ptr = data;
-	s_mcp_trace_meta.size_in_dwords = size;
+	s_mcp_trace_meta_arr.ptr = data;
+	s_mcp_trace_meta_arr.size_in_dwords = size;
 }
 
 enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
@@ -7316,6 +7364,19 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 					results_buf, &parsed_buf_size);
 }
 
+enum dbg_status qed_print_mcp_trace_line(u8 *dump_buf,
+					 u32 num_dumped_bytes,
+					 char *results_buf)
+{
+	u32 parsed_bytes;
+
+	return qed_parse_mcp_trace_buf(dump_buf,
+				       num_dumped_bytes,
+				       0,
+				       num_dumped_bytes,
+				       results_buf, &parsed_bytes);
+}
+
 enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *dump_buf,
 						  u32 num_dumped_dwords,
@@ -7891,6 +7952,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 		}
 	}
 
+	qed_set_debug_engine(cdev, org_engine);
 	/* mcp_trace */
 	rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset +
 			       REGDUMP_HEADER_SIZE, &feature_size);
@@ -7903,8 +7965,6 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 		DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc);
 	}
 
-	qed_set_debug_engine(cdev, org_engine);
-
 	return 0;
 }
 
@@ -7929,9 +7989,10 @@ int qed_dbg_all_data_size(struct qed_dev *cdev)
 			    REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev);
 	}
 
+	qed_set_debug_engine(cdev, org_engine);
+
 	/* Engine common */
 	regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev);
-	qed_set_debug_engine(cdev, org_engine);
 
 	return regs_len;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index cdb3eec0f68c..de5527c0c1c7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -407,6 +407,7 @@ static void qed_init_qm_pq(struct qed_hwfn *p_hwfn,
 		       "pq overflow! pq %d, max pq %d\n", pq_idx, max_pq);
 
 	/* init pq params */
+	qm_info->qm_pq_params[pq_idx].port_id = p_hwfn->port_id;
 	qm_info->qm_pq_params[pq_idx].vport_id = qm_info->start_vport +
 	    qm_info->num_vports;
 	qm_info->qm_pq_params[pq_idx].tc_id = tc;
@@ -727,8 +728,9 @@ static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
 		pq = &(qm_info->qm_pq_params[i]);
 		DP_VERBOSE(p_hwfn,
 			   NETIF_MSG_HW,
-			   "pq idx %d, vport_id %d, tc %d, wrr_grp %d, rl_valid %d\n",
+			   "pq idx %d, port %d, vport_id %d, tc %d, wrr_grp %d, rl_valid %d\n",
 			   qm_info->start_pq + i,
+			   pq->port_id,
 			   pq->vport_id,
 			   pq->tc_id, pq->wrr_group, pq->rl_valid);
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index de873d770575..2c6a679166e1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -612,7 +612,7 @@ struct e4_xstorm_core_conn_ag_ctx {
 	__le16 reserved16;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_or_spq_prod;
-	__le16 word5;
+	__le16 updated_qm_pq_id;
 	__le16 conn_dpi;
 	u8 byte3;
 	u8 byte4;
@@ -1005,7 +1005,9 @@ enum fw_flow_ctrl_mode {
 enum gft_profile_type {
 	GFT_PROFILE_TYPE_4_TUPLE,
 	GFT_PROFILE_TYPE_L4_DST_PORT,
-	GFT_PROFILE_TYPE_IP_DST_PORT,
+	GFT_PROFILE_TYPE_IP_DST_ADDR,
+	GFT_PROFILE_TYPE_IP_SRC_ADDR,
+	GFT_PROFILE_TYPE_TUNNEL_TYPE,
 	MAX_GFT_PROFILE_TYPE
 };
 
@@ -1133,7 +1135,7 @@ struct protocol_dcb_data {
 	u8 dcb_priority;
 	u8 dcb_tc;
 	u8 dscp_val;
-	u8 reserved0;
+	u8 dcb_dont_add_vlan0;
 };
 
 /* Update tunnel configuration */
@@ -1932,7 +1934,7 @@ enum bin_dbg_buffer_type {
 
 /* Attention bit mapping */
 struct dbg_attn_bit_mapping {
-	__le16 data;
+	u16 data;
 #define DBG_ATTN_BIT_MAPPING_VAL_MASK			0x7FFF
 #define DBG_ATTN_BIT_MAPPING_VAL_SHIFT			0
 #define DBG_ATTN_BIT_MAPPING_IS_UNUSED_BIT_CNT_MASK	0x1
@@ -1941,11 +1943,12 @@ struct dbg_attn_bit_mapping {
 
 /* Attention block per-type data */
 struct dbg_attn_block_type_data {
-	__le16 names_offset;
-	__le16 reserved1;
+	u16 names_offset;
+	u16 reserved1;
 	u8 num_regs;
 	u8 reserved2;
-	__le16 regs_offset;
+	u16 regs_offset;
+
 };
 
 /* Block attentions */
@@ -1955,15 +1958,15 @@ struct dbg_attn_block {
 
 /* Attention register result */
 struct dbg_attn_reg_result {
-	__le32 data;
+	u32 data;
 #define DBG_ATTN_REG_RESULT_STS_ADDRESS_MASK	0xFFFFFF
 #define DBG_ATTN_REG_RESULT_STS_ADDRESS_SHIFT	0
 #define DBG_ATTN_REG_RESULT_NUM_REG_ATTN_MASK	0xFF
 #define DBG_ATTN_REG_RESULT_NUM_REG_ATTN_SHIFT	24
-	__le16 block_attn_offset;
-	__le16 reserved;
-	__le32 sts_val;
-	__le32 mask_val;
+	u16 block_attn_offset;
+	u16 reserved;
+	u32 sts_val;
+	u32 mask_val;
 };
 
 /* Attention block result */
@@ -1974,13 +1977,13 @@ struct dbg_attn_block_result {
 #define DBG_ATTN_BLOCK_RESULT_ATTN_TYPE_SHIFT	0
 #define DBG_ATTN_BLOCK_RESULT_NUM_REGS_MASK	0x3F
 #define DBG_ATTN_BLOCK_RESULT_NUM_REGS_SHIFT	2
-	__le16 names_offset;
+	u16 names_offset;
 	struct dbg_attn_reg_result reg_results[15];
 };
 
 /* Mode header */
 struct dbg_mode_hdr {
-	__le16 data;
+	u16 data;
 #define DBG_MODE_HDR_EVAL_MODE_MASK		0x1
 #define DBG_MODE_HDR_EVAL_MODE_SHIFT		0
 #define DBG_MODE_HDR_MODES_BUF_OFFSET_MASK	0x7FFF
@@ -1990,14 +1993,14 @@ struct dbg_mode_hdr {
 /* Attention register */
 struct dbg_attn_reg {
 	struct dbg_mode_hdr mode;
-	__le16 block_attn_offset;
-	__le32 data;
+	u16 block_attn_offset;
+	u32 data;
 #define DBG_ATTN_REG_STS_ADDRESS_MASK	0xFFFFFF
 #define DBG_ATTN_REG_STS_ADDRESS_SHIFT	0
 #define DBG_ATTN_REG_NUM_REG_ATTN_MASK	0xFF
 #define DBG_ATTN_REG_NUM_REG_ATTN_SHIFT 24
-	__le32 sts_clr_address;
-	__le32 mask_address;
+	u32 sts_clr_address;
+	u32 mask_address;
 };
 
 /* Attention types */
@@ -2011,14 +2014,14 @@ enum dbg_attn_type {
 struct dbg_bus_block {
 	u8 num_of_lines;
 	u8 has_latency_events;
-	__le16 lines_offset;
+	u16 lines_offset;
 };
 
 /* Debug Bus block user data */
 struct dbg_bus_block_user_data {
 	u8 num_of_lines;
 	u8 has_latency_events;
-	__le16 names_offset;
+	u16 names_offset;
 };
 
 /* Block Debug line data */
@@ -2042,12 +2045,12 @@ struct dbg_dump_cond_hdr {
 
 /* Memory data for registers dump */
 struct dbg_dump_mem {
-	__le32 dword0;
+	u32 dword0;
 #define DBG_DUMP_MEM_ADDRESS_MASK	0xFFFFFF
 #define DBG_DUMP_MEM_ADDRESS_SHIFT	0
 #define DBG_DUMP_MEM_MEM_GROUP_ID_MASK	0xFF
 #define DBG_DUMP_MEM_MEM_GROUP_ID_SHIFT	24
-	__le32 dword1;
+	u32 dword1;
 #define DBG_DUMP_MEM_LENGTH_MASK	0xFFFFFF
 #define DBG_DUMP_MEM_LENGTH_SHIFT	0
 #define DBG_DUMP_MEM_WIDE_BUS_MASK	0x1
@@ -2058,7 +2061,7 @@ struct dbg_dump_mem {
 
 /* Register data for registers dump */
 struct dbg_dump_reg {
-	__le32 data;
+	u32 data;
 #define DBG_DUMP_REG_ADDRESS_MASK	0x7FFFFF
 #define DBG_DUMP_REG_ADDRESS_SHIFT	0
 #define DBG_DUMP_REG_WIDE_BUS_MASK	0x1
@@ -2069,7 +2072,7 @@ struct dbg_dump_reg {
 
 /* Split header for registers dump */
 struct dbg_dump_split_hdr {
-	__le32 hdr;
+	u32 hdr;
 #define DBG_DUMP_SPLIT_HDR_DATA_SIZE_MASK	0xFFFFFF
 #define DBG_DUMP_SPLIT_HDR_DATA_SIZE_SHIFT	0
 #define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_MASK	0xFF
@@ -2079,33 +2082,33 @@ struct dbg_dump_split_hdr {
 /* Condition header for idle check */
 struct dbg_idle_chk_cond_hdr {
 	struct dbg_mode_hdr mode; /* Mode header */
-	__le16 data_size; /* size in dwords of the data following this header */
+	u16 data_size; /* size in dwords of the data following this header */
 };
 
 /* Idle Check condition register */
 struct dbg_idle_chk_cond_reg {
-	__le32 data;
+	u32 data;
 #define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK	0x7FFFFF
 #define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT	0
 #define DBG_IDLE_CHK_COND_REG_WIDE_BUS_MASK	0x1
 #define DBG_IDLE_CHK_COND_REG_WIDE_BUS_SHIFT	23
 #define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK	0xFF
 #define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT	24
-	__le16 num_entries;
+	u16 num_entries;
 	u8 entry_size;
 	u8 start_entry;
 };
 
 /* Idle Check info register */
 struct dbg_idle_chk_info_reg {
-	__le32 data;
+	u32 data;
 #define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK	0x7FFFFF
 #define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT	0
 #define DBG_IDLE_CHK_INFO_REG_WIDE_BUS_MASK	0x1
 #define DBG_IDLE_CHK_INFO_REG_WIDE_BUS_SHIFT	23
 #define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK	0xFF
 #define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT	24
-	__le16 size; /* register size in dwords */
+	u16 size; /* register size in dwords */
 	struct dbg_mode_hdr mode; /* Mode header */
 };
 
@@ -2117,8 +2120,8 @@ union dbg_idle_chk_reg {
 
 /* Idle Check result header */
 struct dbg_idle_chk_result_hdr {
-	__le16 rule_id; /* Failing rule index */
-	__le16 mem_entry_id; /* Failing memory entry index */
+	u16 rule_id; /* Failing rule index */
+	u16 mem_entry_id; /* Failing memory entry index */
 	u8 num_dumped_cond_regs; /* number of dumped condition registers */
 	u8 num_dumped_info_regs; /* number of dumped condition registers */
 	u8 severity; /* from dbg_idle_chk_severity_types enum */
@@ -2133,29 +2136,29 @@ struct dbg_idle_chk_result_reg_hdr {
 #define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_MASK  0x7F
 #define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_SHIFT 1
 	u8 start_entry; /* index of the first checked entry */
-	__le16 size; /* register size in dwords */
+	u16 size; /* register size in dwords */
 };
 
 /* Idle Check rule */
 struct dbg_idle_chk_rule {
-	__le16 rule_id; /* Idle Check rule ID */
+	u16 rule_id; /* Idle Check rule ID */
 	u8 severity; /* value from dbg_idle_chk_severity_types enum */
 	u8 cond_id; /* Condition ID */
 	u8 num_cond_regs; /* number of condition registers */
 	u8 num_info_regs; /* number of info registers */
 	u8 num_imms; /* number of immediates in the condition */
 	u8 reserved1;
-	__le16 reg_offset; /* offset of this rules registers in the idle check
-			    * register array (in dbg_idle_chk_reg units).
-			    */
-	__le16 imm_offset; /* offset of this rules immediate values in the
-			    * immediate values array (in dwords).
-			    */
+	u16 reg_offset; /* offset of this rules registers in the idle check
+			 * register array (in dbg_idle_chk_reg units).
+			 */
+	u16 imm_offset; /* offset of this rules immediate values in the
+			 * immediate values array (in dwords).
+			 */
 };
 
 /* Idle Check rule parsing data */
 struct dbg_idle_chk_rule_parsing_data {
-	__le32 data;
+	u32 data;
 #define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_MASK	0x1
 #define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_SHIFT	0
 #define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_MASK	0x7FFFFFFF
@@ -2175,7 +2178,7 @@ enum dbg_idle_chk_severity_types {
 
 /* Debug Bus block data */
 struct dbg_bus_block_data {
-	__le16 data;
+	u16 data;
 #define DBG_BUS_BLOCK_DATA_ENABLE_MASK_MASK		0xF
 #define DBG_BUS_BLOCK_DATA_ENABLE_MASK_SHIFT		0
 #define DBG_BUS_BLOCK_DATA_RIGHT_SHIFT_MASK		0xF
@@ -2238,15 +2241,15 @@ struct dbg_bus_trigger_state_data {
 
 /* Debug Bus memory address */
 struct dbg_bus_mem_addr {
-	__le32 lo;
-	__le32 hi;
+	u32 lo;
+	u32 hi;
 };
 
 /* Debug Bus PCI buffer data */
 struct dbg_bus_pci_buf_data {
 	struct dbg_bus_mem_addr phys_addr; /* PCI buffer physical address */
 	struct dbg_bus_mem_addr virt_addr; /* PCI buffer virtual address */
-	__le32 size; /* PCI buffer size in bytes */
+	u32 size; /* PCI buffer size in bytes */
 };
 
 /* Debug Bus Storm EID range filter params */
@@ -2276,15 +2279,15 @@ struct dbg_bus_storm_data {
 	u8 eid_range_not_mask;
 	u8 cid_filter_en;
 	union dbg_bus_storm_eid_params eid_filter_params;
-	__le32 cid;
+	u32 cid;
 };
 
 /* Debug Bus data */
 struct dbg_bus_data {
-	__le32 app_version;
+	u32 app_version;
 	u8 state;
 	u8 hw_dwords;
-	__le16 hw_id_mask;
+	u16 hw_id_mask;
 	u8 num_enabled_blocks;
 	u8 num_enabled_storms;
 	u8 target;
@@ -2295,7 +2298,7 @@ struct dbg_bus_data {
 	u8 adding_filter;
 	u8 filter_pre_trigger;
 	u8 filter_post_trigger;
-	__le16 reserved;
+	u16 reserved;
 	u8 trigger_en;
 	struct dbg_bus_trigger_state_data trigger_states[3];
 	u8 next_trigger_state;
@@ -2391,8 +2394,8 @@ enum dbg_bus_targets {
 struct dbg_grc_data {
 	u8 params_initialized;
 	u8 reserved1;
-	__le16 reserved2;
-	__le32 param_val[48];
+	u16 reserved2;
+	u32 param_val[48];
 };
 
 /* Debug GRC params */
@@ -2414,7 +2417,7 @@ enum dbg_grc_params {
 	DBG_GRC_PARAM_DUMP_CAU,
 	DBG_GRC_PARAM_DUMP_QM,
 	DBG_GRC_PARAM_DUMP_MCP,
-	DBG_GRC_PARAM_RESERVED,
+	DBG_GRC_PARAM_MCP_TRACE_META_SIZE,
 	DBG_GRC_PARAM_DUMP_CFC,
 	DBG_GRC_PARAM_DUMP_IGU,
 	DBG_GRC_PARAM_DUMP_BRB,
@@ -2526,10 +2529,10 @@ enum dbg_storms {
 
 /* Idle Check data */
 struct idle_chk_data {
-	__le32 buf_size;
+	u32 buf_size;
 	u8 buf_size_set;
 	u8 reserved1;
-	__le16 reserved2;
+	u16 reserved2;
 };
 
 /* Debug Tools data (per HW function) */
@@ -2543,7 +2546,7 @@ struct dbg_tools_data {
 	u8 platform_id;
 	u8 initialized;
 	u8 use_dmae;
-	__le32 num_regs_read;
+	u32 num_regs_read;
 };
 
 /********************************/
@@ -2555,10 +2558,10 @@ struct dbg_tools_data {
 
 /* BRB RAM init requirements */
 struct init_brb_ram_req {
-	__le32 guranteed_per_tc;
-	__le32 headroom_per_tc;
-	__le32 min_pkt_size;
-	__le32 max_ports_per_engine;
+	u32 guranteed_per_tc;
+	u32 headroom_per_tc;
+	u32 min_pkt_size;
+	u32 max_ports_per_engine;
 	u8 num_active_tcs[MAX_NUM_PORTS];
 };
 
@@ -2566,21 +2569,21 @@ struct init_brb_ram_req {
 struct init_ets_tc_req {
 	u8 use_sp;
 	u8 use_wfq;
-	__le16 weight;
+	u16 weight;
 };
 
 /* ETS init requirements */
 struct init_ets_req {
-	__le32 mtu;
+	u32 mtu;
 	struct init_ets_tc_req tc_req[NUM_OF_TCS];
 };
 
 /* NIG LB RL init requirements */
 struct init_nig_lb_rl_req {
-	__le16 lb_mac_rate;
-	__le16 lb_rate;
-	__le32 mtu;
-	__le16 tc_rate[NUM_OF_PHYS_TCS];
+	u16 lb_mac_rate;
+	u16 lb_rate;
+	u32 mtu;
+	u16 tc_rate[NUM_OF_PHYS_TCS];
 };
 
 /* NIG TC mapping for each priority */
@@ -2598,9 +2601,9 @@ struct init_nig_pri_tc_map_req {
 struct init_qm_port_params {
 	u8 active;
 	u8 active_phys_tcs;
-	__le16 num_pbf_cmd_lines;
-	__le16 num_btb_blocks;
-	__le16 reserved;
+	u16 num_pbf_cmd_lines;
+	u16 num_btb_blocks;
+	u16 reserved;
 };
 
 /* QM per-PQ init parameters */
@@ -2609,13 +2612,16 @@ struct init_qm_pq_params {
 	u8 tc_id;
 	u8 wrr_group;
 	u8 rl_valid;
+	u8 port_id;
+	u8 reserved0;
+	u16 reserved1;
 };
 
 /* QM per-vport init parameters */
 struct init_qm_vport_params {
-	__le32 vport_rl;
-	__le16 vport_wfq;
-	__le16 first_tx_pq_id[NUM_OF_TCS];
+	u32 vport_rl;
+	u16 vport_wfq;
+	u16 first_tx_pq_id[NUM_OF_TCS];
 };
 
 /**************************************/
@@ -2639,8 +2645,8 @@ enum chip_ids {
 };
 
 struct fw_asserts_ram_section {
-	__le16 section_ram_line_offset;
-	__le16 section_ram_line_size;
+	u16 section_ram_line_offset;
+	u16 section_ram_line_size;
 	u8 list_dword_offset;
 	u8 list_element_dword_size;
 	u8 list_num_elements;
@@ -2713,8 +2719,8 @@ enum init_split_types {
 
 /* Binary buffer header */
 struct bin_buffer_hdr {
-	__le32 offset;
-	__le32 length;
+	u32 offset;
+	u32 length;
 };
 
 /* Binary init buffer types */
@@ -2729,7 +2735,7 @@ enum bin_init_buffer_type {
 
 /* init array header: raw */
 struct init_array_raw_hdr {
-	__le32 data;
+	u32 data;
 #define INIT_ARRAY_RAW_HDR_TYPE_MASK	0xF
 #define INIT_ARRAY_RAW_HDR_TYPE_SHIFT	0
 #define INIT_ARRAY_RAW_HDR_PARAMS_MASK	0xFFFFFFF
@@ -2738,7 +2744,7 @@ struct init_array_raw_hdr {
 
 /* init array header: standard */
 struct init_array_standard_hdr {
-	__le32 data;
+	u32 data;
 #define INIT_ARRAY_STANDARD_HDR_TYPE_MASK	0xF
 #define INIT_ARRAY_STANDARD_HDR_TYPE_SHIFT	0
 #define INIT_ARRAY_STANDARD_HDR_SIZE_MASK	0xFFFFFFF
@@ -2747,7 +2753,7 @@ struct init_array_standard_hdr {
 
 /* init array header: zipped */
 struct init_array_zipped_hdr {
-	__le32 data;
+	u32 data;
 #define INIT_ARRAY_ZIPPED_HDR_TYPE_MASK		0xF
 #define INIT_ARRAY_ZIPPED_HDR_TYPE_SHIFT	0
 #define INIT_ARRAY_ZIPPED_HDR_ZIPPED_SIZE_MASK	0xFFFFFFF
@@ -2756,7 +2762,7 @@ struct init_array_zipped_hdr {
 
 /* init array header: pattern */
 struct init_array_pattern_hdr {
-	__le32 data;
+	u32 data;
 #define INIT_ARRAY_PATTERN_HDR_TYPE_MASK		0xF
 #define INIT_ARRAY_PATTERN_HDR_TYPE_SHIFT		0
 #define INIT_ARRAY_PATTERN_HDR_PATTERN_SIZE_MASK	0xF
@@ -2783,41 +2789,41 @@ enum init_array_types {
 
 /* init operation: callback */
 struct init_callback_op {
-	__le32 op_data;
+	u32 op_data;
 #define INIT_CALLBACK_OP_OP_MASK	0xF
 #define INIT_CALLBACK_OP_OP_SHIFT	0
 #define INIT_CALLBACK_OP_RESERVED_MASK	0xFFFFFFF
 #define INIT_CALLBACK_OP_RESERVED_SHIFT	4
-	__le16 callback_id;
-	__le16 block_id;
+	u16 callback_id;
+	u16 block_id;
 };
 
 /* init operation: delay */
 struct init_delay_op {
-	__le32 op_data;
+	u32 op_data;
 #define INIT_DELAY_OP_OP_MASK		0xF
 #define INIT_DELAY_OP_OP_SHIFT		0
 #define INIT_DELAY_OP_RESERVED_MASK	0xFFFFFFF
 #define INIT_DELAY_OP_RESERVED_SHIFT	4
-	__le32 delay;
+	u32 delay;
 };
 
 /* init operation: if_mode */
 struct init_if_mode_op {
-	__le32 op_data;
+	u32 op_data;
 #define INIT_IF_MODE_OP_OP_MASK			0xF
 #define INIT_IF_MODE_OP_OP_SHIFT		0
 #define INIT_IF_MODE_OP_RESERVED1_MASK		0xFFF
 #define INIT_IF_MODE_OP_RESERVED1_SHIFT		4
 #define INIT_IF_MODE_OP_CMD_OFFSET_MASK		0xFFFF
 #define INIT_IF_MODE_OP_CMD_OFFSET_SHIFT	16
-	__le16 reserved2;
-	__le16 modes_buf_offset;
+	u16 reserved2;
+	u16 modes_buf_offset;
 };
 
 /* init operation: if_phase */
 struct init_if_phase_op {
-	__le32 op_data;
+	u32 op_data;
 #define INIT_IF_PHASE_OP_OP_MASK		0xF
 #define INIT_IF_PHASE_OP_OP_SHIFT		0
 #define INIT_IF_PHASE_OP_DMAE_ENABLE_MASK	0x1
@@ -2826,7 +2832,7 @@ struct init_if_phase_op {
 #define INIT_IF_PHASE_OP_RESERVED1_SHIFT	5
 #define INIT_IF_PHASE_OP_CMD_OFFSET_MASK	0xFFFF
 #define INIT_IF_PHASE_OP_CMD_OFFSET_SHIFT	16
-	__le32 phase_data;
+	u32 phase_data;
 #define INIT_IF_PHASE_OP_PHASE_MASK		0xFF
 #define INIT_IF_PHASE_OP_PHASE_SHIFT		0
 #define INIT_IF_PHASE_OP_RESERVED2_MASK		0xFF
@@ -2845,31 +2851,31 @@ enum init_mode_ops {
 
 /* init operation: raw */
 struct init_raw_op {
-	__le32 op_data;
+	u32 op_data;
 #define INIT_RAW_OP_OP_MASK		0xF
 #define INIT_RAW_OP_OP_SHIFT		0
 #define INIT_RAW_OP_PARAM1_MASK		0xFFFFFFF
 #define INIT_RAW_OP_PARAM1_SHIFT	4
-	__le32 param2;
+	u32 param2;
 };
 
 /* init array params */
 struct init_op_array_params {
-	__le16 size;
-	__le16 offset;
+	u16 size;
+	u16 offset;
 };
 
 /* Write init operation arguments */
 union init_write_args {
-	__le32 inline_val;
-	__le32 zeros_count;
-	__le32 array_offset;
+	u32 inline_val;
+	u32 zeros_count;
+	u32 array_offset;
 	struct init_op_array_params runtime;
 };
 
 /* init operation: write */
 struct init_write_op {
-	__le32 data;
+	u32 data;
 #define INIT_WRITE_OP_OP_MASK		0xF
 #define INIT_WRITE_OP_OP_SHIFT		0
 #define INIT_WRITE_OP_SOURCE_MASK	0x7
@@ -2885,7 +2891,7 @@ struct init_write_op {
 
 /* init operation: read */
 struct init_read_op {
-	__le32 op_data;
+	u32 op_data;
 #define INIT_READ_OP_OP_MASK		0xF
 #define INIT_READ_OP_OP_SHIFT		0
 #define INIT_READ_OP_POLL_TYPE_MASK	0xF
@@ -2894,7 +2900,7 @@ struct init_read_op {
 #define INIT_READ_OP_RESERVED_SHIFT	8
 #define INIT_READ_OP_ADDRESS_MASK	0x7FFFFF
 #define INIT_READ_OP_ADDRESS_SHIFT	9
-	__le32 expected_val;
+	u32 expected_val;
 };
 
 /* Init operations union */
@@ -2939,11 +2945,11 @@ enum init_source_types {
 
 /* Internal RAM Offsets macro data */
 struct iro {
-	__le32 base;
-	__le16 m1;
-	__le16 m2;
-	__le16 m3;
-	__le16 size;
+	u32 base;
+	u16 m1;
+	u16 m2;
+	u16 m3;
+	u16 size;
 };
 
 /***************************** Public Functions *******************************/
@@ -3383,6 +3389,19 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 					    u32 num_dumped_dwords,
 					    char *results_buf);
 
+/**
+ * @brief print_mcp_trace_line - Prints MCP Trace results for a single line
+ *
+ * @param dump_buf -	      mcp trace dump buffer, starting from the header.
+ * @param num_dumped_bytes -  number of bytes that were dumped.
+ * @param results_buf -	      buffer for printing the mcp trace results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_mcp_trace_line(u8 *dump_buf,
+					 u32 num_dumped_bytes,
+					 char *results_buf);
+
 /**
  * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
  *	for reg_fifo results (in bytes).
@@ -4005,6 +4024,9 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt,
 			   bool eth_geneve_enable, bool ip_geneve_enable);
 
+void qed_set_vxlan_no_l2_enable(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt, bool enable);
+
 /**
  * @brief qed_gft_disable - Disable GFT
  *
@@ -4348,8 +4370,8 @@ static const struct iro iro_arr[51] = {
 	{0x80, 0x8, 0x0, 0x0, 0x4},
 	{0x84, 0x8, 0x0, 0x0, 0x2},
 	{0x4c48, 0x0, 0x0, 0x0, 0x78},
-	{0x3e18, 0x0, 0x0, 0x0, 0x78},
-	{0x2b58, 0x0, 0x0, 0x0, 0x78},
+	{0x3e38, 0x0, 0x0, 0x0, 0x78},
+	{0x2b78, 0x0, 0x0, 0x0, 0x78},
 	{0x4c40, 0x0, 0x0, 0x0, 0x78},
 	{0x4998, 0x0, 0x0, 0x0, 0x78},
 	{0x7f50, 0x0, 0x0, 0x0, 0x78},
@@ -4364,7 +4386,7 @@ static const struct iro iro_arr[51] = {
 	{0x4ba8, 0x80, 0x0, 0x0, 0x20},
 	{0x8158, 0x40, 0x0, 0x0, 0x30},
 	{0xe770, 0x60, 0x0, 0x0, 0x60},
-	{0x2cf0, 0x80, 0x0, 0x0, 0x38},
+	{0x2d10, 0x80, 0x0, 0x0, 0x38},
 	{0xf2b8, 0x78, 0x0, 0x0, 0x78},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0xaf20, 0x0, 0x0, 0x0, 0xf0},
@@ -4384,10 +4406,10 @@ static const struct iro iro_arr[51] = {
 	{0x10300, 0x18, 0x0, 0x0, 0x10},
 	{0xde48, 0x48, 0x0, 0x0, 0x38},
 	{0x10768, 0x20, 0x0, 0x0, 0x20},
-	{0x2d28, 0x80, 0x0, 0x0, 0x10},
+	{0x2d48, 0x80, 0x0, 0x0, 0x10},
 	{0x5048, 0x10, 0x0, 0x0, 0x10},
 	{0xc9b8, 0x30, 0x0, 0x0, 0x10},
-	{0xeee0, 0x10, 0x0, 0x0, 0x10},
+	{0xed90, 0x10, 0x0, 0x0, 0x10},
 	{0xa3a0, 0x10, 0x0, 0x0, 0x10},
 	{0x13108, 0x8, 0x0, 0x0, 0x8},
 };
@@ -5151,7 +5173,7 @@ struct e4_xstorm_eth_conn_ag_ctx {
 	__le16 edpm_num_bds;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_prod;
-	__le16 tx_class;
+	__le16 updated_qm_pq_id;
 	__le16 conn_dpi;
 	u8 byte3;
 	u8 byte4;
@@ -5674,7 +5696,6 @@ struct eth_vport_rx_mode {
 #define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_SHIFT	5
 #define ETH_VPORT_RX_MODE_RESERVED1_MASK		0x3FF
 #define ETH_VPORT_RX_MODE_RESERVED1_SHIFT		6
-	__le16 reserved2[3];
 };
 
 /* Command for setting tpa parameters */
@@ -5712,7 +5733,6 @@ struct eth_vport_tx_mode {
 #define ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL_SHIFT	4
 #define ETH_VPORT_TX_MODE_RESERVED1_MASK		0x7FF
 #define ETH_VPORT_TX_MODE_RESERVED1_SHIFT		5
-	__le16 reserved2[3];
 };
 
 /* GFT filter update action type */
@@ -5805,7 +5825,8 @@ struct rx_queue_update_ramrod_data {
 	u8 complete_cqe_flg;
 	u8 complete_event_flg;
 	u8 vport_id;
-	u8 reserved[4];
+	u8 set_default_rss_queue;
+	u8 reserved[3];
 	u8 reserved1;
 	u8 reserved2;
 	u8 reserved3;
@@ -5843,7 +5864,7 @@ struct rx_update_gft_filter_data {
 	u8 flow_id_valid;
 	u8 filter_action;
 	u8 assert_on_error;
-	u8 reserved;
+	u8 inner_vlan_removal_en;
 };
 
 /* Ramrod data for rx queue start ramrod */
@@ -5927,7 +5948,7 @@ struct vport_start_ramrod_data {
 	u8 zero_placement_offset;
 	u8 ctl_frame_mac_check_en;
 	u8 ctl_frame_ethtype_check_en;
-	u8 reserved[5];
+	u8 reserved[1];
 };
 
 /* Ramrod data for vport stop ramrod */
@@ -5992,6 +6013,7 @@ struct vport_update_ramrod_data {
 
 	struct eth_vport_rx_mode rx_mode;
 	struct eth_vport_tx_mode tx_mode;
+	__le32 reserved[3];
 	struct eth_vport_tpa_param tpa_param;
 	struct vport_update_ramrod_mcast approx_mcast;
 	struct eth_vport_rss_config rss_config;
@@ -6213,7 +6235,7 @@ struct e4_xstorm_eth_conn_ag_ctx_dq_ext_ldpart {
 	__le16 edpm_num_bds;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_prod;
-	__le16 tx_class;
+	__le16 updated_qm_pq_id;
 	__le16 conn_dpi;
 	u8 byte3;
 	u8 byte4;
@@ -6479,7 +6501,7 @@ struct e4_xstorm_eth_hw_conn_ag_ctx {
 	__le16 edpm_num_bds;
 	__le16 tx_bd_cons;
 	__le16 tx_bd_prod;
-	__le16 tx_class;
+	__le16 updated_qm_pq_id;
 	__le16 conn_dpi;
 };
 
@@ -6703,8 +6725,8 @@ struct e4_ystorm_rdma_task_ag_ctx {
 #define E4_YSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
 #define E4_YSTORM_RDMA_TASK_AG_CTX_VALID_MASK			0x1
 #define E4_YSTORM_RDMA_TASK_AG_CTX_VALID_SHIFT			6
-#define E4_YSTORM_RDMA_TASK_AG_CTX_BIT3_MASK			0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT			7
+#define E4_YSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_MASK		0x1
+#define E4_YSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_SHIFT		7
 	u8 flags1;
 #define E4_YSTORM_RDMA_TASK_AG_CTX_CF0_MASK		0x3
 #define E4_YSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT		0
@@ -6759,8 +6781,8 @@ struct e4_mstorm_rdma_task_ag_ctx {
 #define E4_MSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
 #define E4_MSTORM_RDMA_TASK_AG_CTX_BIT2_MASK			0x1
 #define E4_MSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT			6
-#define E4_MSTORM_RDMA_TASK_AG_CTX_BIT3_MASK			0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT			7
+#define E4_MSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_MASK		0x1
+#define E4_MSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_SHIFT		7
 	u8 flags1;
 #define E4_MSTORM_RDMA_TASK_AG_CTX_CF0_MASK	0x3
 #define E4_MSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT	0
@@ -6814,7 +6836,7 @@ struct ustorm_rdma_task_st_ctx {
 
 struct e4_ustorm_rdma_task_ag_ctx {
 	u8 reserved;
-	u8 byte1;
+	u8 state;
 	__le16 icid;
 	u8 flags0;
 #define E4_USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
@@ -6830,8 +6852,8 @@ struct e4_ustorm_rdma_task_ag_ctx {
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_SHIFT	0
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_MASK		0x3
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_SHIFT		2
-#define E4_USTORM_RDMA_TASK_AG_CTX_CF3_MASK			0x3
-#define E4_USTORM_RDMA_TASK_AG_CTX_CF3_SHIFT			4
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_BLOCK_SIZE_MASK          0x3
+#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_BLOCK_SIZE_SHIFT         4
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_MASK		0x3
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_SHIFT		6
 	u8 flags2;
@@ -6841,8 +6863,8 @@ struct e4_ustorm_rdma_task_ag_ctx {
 #define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED2_SHIFT		1
 #define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED3_MASK		0x1
 #define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED3_SHIFT		2
-#define E4_USTORM_RDMA_TASK_AG_CTX_CF3EN_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT			3
+#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED4_MASK               0x1
+#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED4_SHIFT              3
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK		0x1
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
 #define E4_USTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK			0x1
@@ -6864,10 +6886,17 @@ struct e4_ustorm_rdma_task_ag_ctx {
 #define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT	4
 	__le32 dif_err_intervals;
 	__le32 dif_error_1st_interval;
-	__le32 reg2;
+	__le32 sq_cons;
 	__le32 dif_runt_value;
-	__le32 reg4;
+	__le32 sge_index;
 	__le32 reg5;
+	u8 byte2;
+	u8 byte3;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le32 reg6;
+	__le32 reg7;
 };
 
 /* RDMA task context */
@@ -6970,7 +6999,9 @@ struct rdma_init_func_hdr {
 	u8 vf_id;
 	u8 vf_valid;
 	u8 relaxed_ordering;
-	u8 reserved[2];
+	__le16 first_reg_srq_id;
+	__le32 reg_srq_base_addr;
+	__le32 reserved;
 };
 
 /* rdma function init ramrod data */
@@ -7077,13 +7108,23 @@ struct rdma_srq_context {
 
 /* rdma create qp requester ramrod data */
 struct rdma_srq_create_ramrod_data {
+	u8 flags;
+#define RDMA_SRQ_CREATE_RAMROD_DATA_XRC_FLAG_MASK         0x1
+#define RDMA_SRQ_CREATE_RAMROD_DATA_XRC_FLAG_SHIFT        0
+#define RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED_KEY_EN_MASK  0x1
+#define RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED_KEY_EN_SHIFT 1
+#define RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED1_MASK        0x3F
+#define RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED1_SHIFT       2
+	u8 reserved2;
+	__le16 xrc_domain;
+	__le32 xrc_srq_cq_cid;
 	struct regpair pbl_base_addr;
 	__le16 pages_in_srq_pbl;
 	__le16 pd_id;
 	struct rdma_srq_id srq_id;
 	__le16 page_size;
-	__le16 reserved1;
-	__le32 reserved2;
+	__le16 reserved3;
+	__le32 reserved4;
 	struct regpair producers_addr;
 };
 
@@ -7108,372 +7149,8 @@ enum rdma_tid_type {
 	MAX_RDMA_TID_TYPE
 };
 
-struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
-	u8 reserved0;
-	u8 state;
-	u8 flags0;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT1_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT1_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT2_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT2_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM3_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM3_SHIFT	3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT4_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT4_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT5_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT5_SHIFT		5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT6_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT6_SHIFT		6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT7_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT7_SHIFT		7
-	u8 flags1;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT8_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT8_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT9_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT9_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_SHIFT		3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSTORM_FLUSH_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSTORM_FLUSH_SHIFT	5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT14_SHIFT		6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_SHIFT	7
-	u8 flags2;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3_SHIFT	6
-	u8 flags3;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_SHIFT	6
-	u8 flags4;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11_SHIFT	6
-	u8 flags5;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15_SHIFT	6
-	u8 flags6;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19_SHIFT	6
-	u8 flags7;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0EN_SHIFT		6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1EN_SHIFT		7
-	u8 flags8;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2EN_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3EN_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4EN_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5EN_SHIFT		3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6EN_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_EN_SHIFT	5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8EN_SHIFT		6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9EN_SHIFT		7
-	u8 flags9;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10EN_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11EN_SHIFT	1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12EN_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13EN_SHIFT	3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14EN_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15EN_SHIFT	5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16EN_SHIFT	6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17EN_SHIFT	7
-	u8 flags10;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18EN_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19EN_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20EN_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21EN_SHIFT		3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_EN_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_EN_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23EN_SHIFT		5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE0EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE0EN_SHIFT		6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE1EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE1EN_SHIFT		7
-	u8 flags11;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE2EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE2EN_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE3EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE3EN_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE4EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE4EN_SHIFT		2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE5EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE5EN_SHIFT		3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE6EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE6EN_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE7EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE7EN_SHIFT		5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED1_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED1_SHIFT	6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE9EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE9EN_SHIFT		7
-	u8 flags12;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE10EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE10EN_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE11EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE11EN_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED2_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED2_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED3_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED3_SHIFT	3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE14EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE14EN_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE15EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE15EN_SHIFT		5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE16EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE16EN_SHIFT		6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE17EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE17EN_SHIFT		7
-	u8 flags13;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE18EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE18EN_SHIFT		0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE19EN_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE19EN_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED4_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED4_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED5_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED5_SHIFT	3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED6_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED6_SHIFT	4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED7_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED7_SHIFT	5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED8_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED8_SHIFT	6
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED9_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED9_SHIFT	7
-	u8 flags14;
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MIGRATION_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MIGRATION_SHIFT	0
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT17_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT17_SHIFT		1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_DPM_PORT_NUM_MASK	0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_DPM_PORT_NUM_SHIFT	2
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RESERVED_MASK		0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RESERVED_SHIFT		4
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_MASK	0x1
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_SHIFT	5
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23_MASK		0x3
-#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23_SHIFT		6
-	u8 byte2;
-	__le16 physical_q0;
-	__le16 word1;
-	__le16 word2;
-	__le16 word3;
-	__le16 word4;
-	__le16 word5;
-	__le16 conn_dpi;
-	u8 byte3;
-	u8 byte4;
-	u8 byte5;
-	u8 byte6;
-	__le32 reg0;
-	__le32 reg1;
-	__le32 reg2;
-	__le32 snd_nxt_psn;
-	__le32 reg4;
-};
-
-struct e4_mstorm_rdma_conn_ag_ctx {
-	u8 byte0;
-	u8 byte1;
-	u8 flags0;
-#define E4_MSTORM_RDMA_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_MSTORM_RDMA_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT	6
-	u8 flags1;
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_MSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT	7
-	__le16 word0;
-	__le16 word1;
-	__le32 reg0;
-	__le32 reg1;
-};
-
-struct e4_tstorm_rdma_conn_ag_ctx {
-	u8 reserved0;
-	u8 byte1;
-	u8 flags0;
-#define E4_TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT2_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT2_SHIFT		2
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT3_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT3_SHIFT		3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT5_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_BIT5_SHIFT		5
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT		6
-	u8 flags1;
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF1_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT			0
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT			2
-#define E4_TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK	0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT	4
-#define E4_TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK		0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT		6
-	u8 flags2;
-#define E4_TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK		0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT	0
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF6_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF6_SHIFT			2
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF7_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF7_SHIFT			4
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF8_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF8_SHIFT			6
-	u8 flags3;
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF9_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF9_SHIFT			0
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF10_MASK			0x3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF10_SHIFT			2
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT			4
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT			5
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT			6
-#define E4_TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK	0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT	7
-	u8 flags4;
-#define E4_TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
-#define E4_TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK	0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT	1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF6EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT			2
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF7EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF7EN_SHIFT			3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF8EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF8EN_SHIFT			4
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF9EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF9EN_SHIFT			5
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF10EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_CF10EN_SHIFT			6
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT		7
-	u8 flags5;
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT	7
-	__le32 reg0;
-	__le32 reg1;
-	__le32 reg2;
-	__le32 reg3;
-	__le32 reg4;
-	__le32 reg5;
-	__le32 reg6;
-	__le32 reg7;
-	__le32 reg8;
-	u8 byte2;
-	u8 byte3;
-	__le16 word0;
-	u8 byte4;
-	u8 byte5;
-	__le16 word1;
-	__le16 word2;
-	__le16 word3;
-	__le32 reg9;
-	__le32 reg10;
+struct rdma_xrc_srq_context {
+	struct regpair temp[9];
 };
 
 struct e4_tstorm_rdma_task_ag_ctx {
@@ -7561,8 +7238,8 @@ struct e4_ustorm_rdma_conn_ag_ctx {
 	u8 flags0;
 #define E4_USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
 #define E4_USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_USTORM_RDMA_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT		1
+#define E4_USTORM_RDMA_CONN_AG_CTX_DIF_ERROR_REPORTED_MASK  0x1
+#define E4_USTORM_RDMA_CONN_AG_CTX_DIF_ERROR_REPORTED_SHIFT 1
 #define E4_USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
 #define E4_USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	2
 #define E4_USTORM_RDMA_CONN_AG_CTX_CF1_MASK		0x3
@@ -7624,214 +7301,214 @@ struct e4_ustorm_rdma_conn_ag_ctx {
 	__le16 word3;
 };
 
-struct e4_xstorm_rdma_conn_ag_ctx {
+struct e4_xstorm_roce_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT2_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT2_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT5_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT5_SHIFT		5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT6_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT6_SHIFT		6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT7_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT7_SHIFT		7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT     0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT1_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT             1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT2_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT2_SHIFT             2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM3_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT     3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT4_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT4_SHIFT             4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT5_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT5_SHIFT             5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT6_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT6_SHIFT             6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT7_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT7_SHIFT             7
 	u8 flags1;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT8_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT8_SHIFT		0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT9_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT9_SHIFT		1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT10_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT10_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT12_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT12_SHIFT		4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_MSTORM_FLUSH_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT14_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT14_SHIFT		6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_YSTORM_FLUSH_SHIFT	7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT8_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT8_SHIFT             0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT9_MASK              0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT9_SHIFT             1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT10_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT10_SHIFT            2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT11_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT11_SHIFT            3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT12_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT12_SHIFT            4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_MASK        0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_SHIFT       5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_MASK        0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_SHIFT       6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_SHIFT     7
 	u8 flags2;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF3_SHIFT	6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT              0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT              2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT              4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3_SHIFT              6
 	u8 flags3;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF4_MASK		0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF4_SHIFT		0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF5_MASK		0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF5_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF6_MASK		0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF6_SHIFT		4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4_SHIFT              0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5_SHIFT              2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6_SHIFT              4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_MASK       0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT      6
 	u8 flags4;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF11_SHIFT	6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8_SHIFT              0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9_MASK               0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9_SHIFT              2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10_SHIFT             4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11_SHIFT             6
 	u8 flags5;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF12_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF12_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF13_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF13_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF14_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF14_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF15_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF15_SHIFT	6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12_SHIFT             0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13_SHIFT             2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14_SHIFT             4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15_SHIFT             6
 	u8 flags6;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF16_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF16_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF17_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF17_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF18_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF18_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF19_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF19_SHIFT	6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16_SHIFT             0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17_SHIFT             2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18_SHIFT             4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19_SHIFT             6
 	u8 flags7;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF20_MASK		0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF20_SHIFT		0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF21_MASK		0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF21_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT		6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT		7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20_SHIFT             0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21_SHIFT             2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_MASK         0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_SHIFT        4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT            6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT            7
 	u8 flags8;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT		0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT		1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF4EN_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF5EN_SHIFT		3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT		4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF8EN_SHIFT		6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF9EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF9EN_SHIFT		7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT            0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3EN_SHIFT            1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4EN_SHIFT            2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5EN_SHIFT            3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6EN_SHIFT            4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK    0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT   5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8EN_SHIFT            6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9EN_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9EN_SHIFT            7
 	u8 flags9;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF10EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF10EN_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF11EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF11EN_SHIFT	1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF12EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF12EN_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF13EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF13EN_SHIFT	3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF14EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF14EN_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF15EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF15EN_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF16EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF16EN_SHIFT	6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF17EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF17EN_SHIFT	7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10EN_SHIFT           0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11EN_SHIFT           1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12EN_SHIFT           2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13EN_SHIFT           3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14EN_SHIFT           4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15EN_SHIFT           5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16EN_SHIFT           6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17EN_SHIFT           7
 	u8 flags10;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF18EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF18EN_SHIFT		0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF19EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF19EN_SHIFT		1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF20EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF20EN_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF21EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF21EN_SHIFT		3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF23EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF23EN_SHIFT		5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT	6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT	7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18EN_SHIFT           0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19EN_SHIFT           1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20EN_SHIFT           2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21EN_SHIFT           3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_EN_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT     4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23EN_MASK            0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23EN_SHIFT           5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT          6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT          7
 	u8 flags11;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT	1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT	3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE9EN_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE9EN_SHIFT	7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT          0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT          1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT          2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE5EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE5EN_SHIFT          3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE6EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE6EN_SHIFT          4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE7EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE7EN_SHIFT          5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED1_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED1_SHIFT     6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE9EN_MASK           0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE9EN_SHIFT          7
 	u8 flags12;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE10EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE10EN_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE11EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE11EN_SHIFT	1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE14EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE14EN_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE15EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE15EN_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE16EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE16EN_SHIFT	6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE17EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE17EN_SHIFT	7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE10EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE10EN_SHIFT         0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE11EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE11EN_SHIFT         1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED2_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED2_SHIFT     2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED3_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED3_SHIFT     3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE14EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE14EN_SHIFT         4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE15EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE15EN_SHIFT         5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE16EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE16EN_SHIFT         6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE17EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE17EN_SHIFT         7
 	u8 flags13;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE18EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE18EN_SHIFT	0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE19EN_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RULE19EN_SHIFT	1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE18EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE18EN_SHIFT         0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE19EN_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE19EN_SHIFT         1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED4_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED4_SHIFT     2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED5_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED5_SHIFT     3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED6_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED6_SHIFT     4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED7_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED7_SHIFT     5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED8_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED8_SHIFT     6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED9_MASK      0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED9_SHIFT     7
 	u8 flags14;
-#define E4_XSTORM_RDMA_CONN_AG_CTX_MIGRATION_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_MIGRATION_SHIFT		0
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT17_MASK			0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_BIT17_SHIFT			1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_DPM_PORT_NUM_MASK		0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_DPM_PORT_NUM_SHIFT		2
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RESERVED_MASK		0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_RESERVED_SHIFT		4
-#define E4_XSTORM_RDMA_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK	0x1
-#define E4_XSTORM_RDMA_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF23_MASK			0x3
-#define E4_XSTORM_RDMA_CONN_AG_CTX_CF23_SHIFT			6
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MIGRATION_MASK         0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_MIGRATION_SHIFT        0
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT17_MASK             0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT17_SHIFT            1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_DPM_PORT_NUM_MASK      0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_DPM_PORT_NUM_SHIFT     2
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RESERVED_MASK          0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_RESERVED_SHIFT         4
+#define E4_XSTORM_ROCE_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK  0x1
+#define E4_XSTORM_ROCE_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT 5
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23_MASK              0x3
+#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23_SHIFT             6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 word1;
@@ -7853,48 +7530,108 @@ struct e4_xstorm_rdma_conn_ag_ctx {
 	__le32 reg6;
 };
 
-struct e4_ystorm_rdma_conn_ag_ctx {
-	u8 byte0;
+struct e4_tstorm_roce_conn_ag_ctx {
+	u8 reserved0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_RDMA_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_RDMA_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF2_SHIFT	6
+#define E4_TSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_MASK          0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT         0
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT1_MASK                  0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT                 1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT2_MASK                  0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT2_SHIFT                 2
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT3_MASK                  0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT3_SHIFT                 3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT4_MASK                  0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT4_SHIFT                 4
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT5_MASK                  0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT5_SHIFT                 5
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT                  6
 	u8 flags1;
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_YSTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK       0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT      0
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT                  2
+#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK     0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT    4
+#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_MASK           0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT          6
+	u8 flags2;
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5_SHIFT                  0
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6_SHIFT                  2
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7_SHIFT                  4
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8_SHIFT                  6
+	u8 flags3;
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9_MASK                   0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9_SHIFT                  0
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10_MASK                  0x3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10_SHIFT                 2
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT                4
+#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK    0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT   5
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT                6
+#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK  0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT 7
+	u8 flags4;
+#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK        0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT       0
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5EN_SHIFT                1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6EN_SHIFT                2
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7EN_SHIFT                3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8EN_SHIFT                4
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9EN_MASK                 0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9EN_SHIFT                5
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10EN_MASK                0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10EN_SHIFT               6
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT              7
+	u8 flags5;
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT              0
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT              1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT              2
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT              3
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE5EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE5EN_SHIFT              4
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE6EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE6EN_SHIFT              5
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE7EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE7EN_SHIFT              6
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE8EN_MASK               0x1
+#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE8EN_SHIFT              7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
-	__le32 reg0;
-	__le32 reg1;
+	u8 byte4;
+	u8 byte5;
 	__le16 word1;
 	__le16 word2;
 	__le16 word3;
-	__le16 word4;
-	__le32 reg2;
-	__le32 reg3;
+	__le32 reg9;
+	__le32 reg10;
 };
 
 /* The roce storm context of Ystorm */
@@ -7933,15 +7670,15 @@ struct e4_roce_conn_context {
 	struct regpair ystorm_st_padding[2];
 	struct pstorm_roce_conn_st_ctx pstorm_st_context;
 	struct xstorm_roce_conn_st_ctx xstorm_st_context;
-	struct regpair xstorm_st_padding[2];
-	struct e4_xstorm_rdma_conn_ag_ctx xstorm_ag_context;
-	struct e4_tstorm_rdma_conn_ag_ctx tstorm_ag_context;
+	struct e4_xstorm_roce_conn_ag_ctx xstorm_ag_context;
+	struct e4_tstorm_roce_conn_ag_ctx tstorm_ag_context;
 	struct timers_context timer_context;
 	struct e4_ustorm_rdma_conn_ag_ctx ustorm_ag_context;
 	struct tstorm_roce_conn_st_ctx tstorm_st_context;
+	struct regpair tstorm_st_padding[2];
 	struct mstorm_roce_conn_st_ctx mstorm_st_context;
+	struct regpair mstorm_st_padding[2];
 	struct ustorm_roce_conn_st_ctx ustorm_st_context;
-	struct regpair ustorm_st_padding[2];
 };
 
 /* roce create qp requester ramrod data */
@@ -7955,8 +7692,8 @@ struct roce_create_qp_req_ramrod_data {
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP_SHIFT		3
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_MASK				0x7
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_PRI_SHIFT			4
-#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x1
-#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			7
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG_MASK			0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG_SHIFT			7
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_MASK		0xF
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_SHIFT		8
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_MASK			0xF
@@ -7982,18 +7719,18 @@ struct roce_create_qp_req_ramrod_data {
 	__le16 udp_src_port;
 	__le32 src_gid[4];
 	__le32 dst_gid[4];
+	__le32 cq_cid;
 	struct regpair qp_handle_for_cqe;
 	struct regpair qp_handle_for_async;
 	u8 stats_counter_id;
 	u8 reserved3[7];
-	__le32 cq_cid;
 	__le16 regular_latency_phy_queue;
 	__le16 dpi;
 };
 
 /* roce create qp responder ramrod data */
 struct roce_create_qp_resp_ramrod_data {
-	__le16 flags;
+	__le32 flags;
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR_MASK		0x3
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR_SHIFT		0
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN_MASK			0x1
@@ -8012,6 +7749,11 @@ struct roce_create_qp_resp_ramrod_data {
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT			8
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK		0x1F
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_SHIFT		11
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG_MASK             0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG_SHIFT            16
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_MASK             0x7FFF
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_SHIFT            17
+	__le16 xrc_domain;
 	u8 max_ird;
 	u8 traffic_class;
 	u8 hop_limit;
@@ -8037,7 +7779,7 @@ struct roce_create_qp_resp_ramrod_data {
 	struct regpair qp_handle_for_cqe;
 	struct regpair qp_handle_for_async;
 	__le16 low_latency_phy_queue;
-	u8 reserved2[6];
+	u8 reserved2[2];
 	__le32 cq_cid;
 	__le16 regular_latency_phy_queue;
 	__le16 dpi;
@@ -8248,6 +7990,270 @@ enum roce_ramrod_cmd_id {
 	MAX_ROCE_RAMROD_CMD_ID
 };
 
+struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM0_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT1_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT1_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT2_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT2_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM3_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_EXIST_IN_QM3_SHIFT	3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT4_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT4_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT5_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT5_SHIFT		5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT6_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT6_SHIFT		6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT7_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT7_SHIFT		7
+	u8 flags1;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT8_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT8_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT9_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT9_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT10_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT11_SHIFT		3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT12_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSEM_FLUSH_MASK        0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSEM_FLUSH_SHIFT       5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSDM_FLUSH_MASK        0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MSDM_FLUSH_SHIFT       6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_YSTORM_FLUSH_SHIFT	7
+	u8 flags2;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3_SHIFT	6
+	u8 flags3;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_SHIFT	6
+	u8 flags4;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11_SHIFT	6
+	u8 flags5;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15_SHIFT	6
+	u8 flags6;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19_SHIFT	6
+	u8 flags7;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF0EN_SHIFT		6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF1EN_SHIFT		7
+	u8 flags8;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF2EN_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF3EN_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF4EN_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF5EN_SHIFT		3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF6EN_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_FLUSH_Q0_CF_EN_SHIFT	5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF8EN_SHIFT		6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF9EN_SHIFT		7
+	u8 flags9;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF10EN_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF11EN_SHIFT	1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF12EN_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF13EN_SHIFT	3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF14EN_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF15EN_SHIFT	5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF16EN_SHIFT	6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF17EN_SHIFT	7
+	u8 flags10;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF18EN_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF19EN_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF20EN_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF21EN_SHIFT		3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_EN_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_SLOW_PATH_EN_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23EN_SHIFT		5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE0EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE0EN_SHIFT		6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE1EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE1EN_SHIFT		7
+	u8 flags11;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE2EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE2EN_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE3EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE3EN_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE4EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE4EN_SHIFT		2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE5EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE5EN_SHIFT		3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE6EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE6EN_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE7EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE7EN_SHIFT		5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED1_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED1_SHIFT	6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE9EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE9EN_SHIFT		7
+	u8 flags12;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE10EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE10EN_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE11EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE11EN_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED2_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED2_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED3_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED3_SHIFT	3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE14EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE14EN_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE15EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE15EN_SHIFT		5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE16EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE16EN_SHIFT		6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE17EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE17EN_SHIFT		7
+	u8 flags13;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE18EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE18EN_SHIFT		0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE19EN_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RULE19EN_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED4_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED4_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED5_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED5_SHIFT	3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED6_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED6_SHIFT	4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED7_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED7_SHIFT	5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED8_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED8_SHIFT	6
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED9_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_A0_RESERVED9_SHIFT	7
+	u8 flags14;
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MIGRATION_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_MIGRATION_SHIFT	0
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT17_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_BIT17_SHIFT		1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_DPM_PORT_NUM_MASK	0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_DPM_PORT_NUM_SHIFT	2
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RESERVED_MASK		0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_RESERVED_SHIFT		4
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_MASK	0x1
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_ROCE_EDPM_ENABLE_SHIFT	5
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23_MASK		0x3
+#define E4XSTORMROCECONNAGCTXDQEXTLDPART_CF23_SHIFT		6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le16 word5;
+	__le16 conn_dpi;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 snd_nxt_psn;
+	__le32 reg4;
+};
+
+struct e4_mstorm_roce_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT0_MASK     0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT0_SHIFT    0
+#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT1_MASK     0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT    1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0_MASK      0x3
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT     2
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1_MASK      0x3
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT     4
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2_MASK      0x3
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK    0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK    0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK    0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT 7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
 struct e4_mstorm_roce_req_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
@@ -8341,8 +8347,8 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_MASK			0x3
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_SHIFT			6
 	u8 flags1;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK				0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT			0
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK             0x3
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT            0
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_MASK			0x3
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_SHIFT		2
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK		0x3
@@ -8350,8 +8356,8 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK			0x3
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT		6
 	u8 flags2;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK	0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT	0
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_MASK               0x3
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_SHIFT              0
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_MASK	0x3
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_SHIFT	2
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_MASK	0x3
@@ -8365,8 +8371,8 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_SHIFT	2
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_MASK			0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_SHIFT		4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK			0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT			5
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK          0x1
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT         5
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_MASK		0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_SHIFT		6
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK	0x1
@@ -8374,8 +8380,8 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 	u8 flags4;
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT		1
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_EN_MASK            0x1
+#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_EN_SHIFT           1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_MASK		0x1
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_SHIFT		2
 #define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_MASK	0x1
@@ -8421,7 +8427,7 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 	u8 byte5;
 	__le16 snd_sq_cons;
 	__le16 conn_dpi;
-	__le16 word3;
+	__le16 force_comp_cons;
 	__le32 reg9;
 	__le32 reg10;
 };
@@ -8445,8 +8451,8 @@ struct e4_tstorm_roce_resp_conn_ag_ctx {
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK			0x3
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT			6
 	u8 flags1;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK	0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT	0
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK            0x3
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT           0
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_MASK	0x3
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_SHIFT	2
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK		0x3
@@ -8454,8 +8460,8 @@ struct e4_tstorm_roce_resp_conn_ag_ctx {
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
 	u8 flags2;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK	0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT	0
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK                0x3
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT               0
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK		0x3
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT		2
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_MASK		0x3
@@ -8469,8 +8475,8 @@ struct e4_tstorm_roce_resp_conn_ag_ctx {
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT		2
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK		0x1
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT		4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK	0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT	5
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK         0x1
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT        5
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_MASK	0x1
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_SHIFT	6
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK		0x1
@@ -8478,8 +8484,8 @@ struct e4_tstorm_roce_resp_conn_ag_ctx {
 	u8 flags4;
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT	1
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK             0x1
+#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT            1
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK			0x1
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT			2
 #define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_MASK			0x1
@@ -8724,10 +8730,10 @@ struct e4_xstorm_roce_req_conn_ag_ctx {
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK		0x3
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF8_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF9_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF9_SHIFT	2
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_MASK        0x3
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_SHIFT       0
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_MASK     0x3
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_SHIFT    2
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_MASK	0x3
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_SHIFT	4
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_MASK	0x3
@@ -8774,10 +8780,10 @@ struct e4_xstorm_roce_req_conn_ag_ctx {
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_SHIFT	4
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK	0x1
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT	5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF8EN_SHIFT		6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF9EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF9EN_SHIFT		7
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_EN_MASK     0x1
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_EN_SHIFT    6
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_EN_MASK  0x1
+#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_EN_SHIFT 7
 	u8 flags9;
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_MASK		0x1
 #define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_SHIFT		0
@@ -8882,9 +8888,9 @@ struct e4_xstorm_roce_req_conn_ag_ctx {
 	__le16 sq_cmp_cons;
 	__le16 sq_cons;
 	__le16 sq_prod;
-	__le16 word5;
+	__le16 dif_error_first_sq_cons;
 	__le16 conn_dpi;
-	u8 byte3;
+	u8 dif_error_sge_index;
 	u8 byte4;
 	u8 byte5;
 	u8 byte6;
@@ -8892,7 +8898,7 @@ struct e4_xstorm_roce_req_conn_ag_ctx {
 	__le32 ssn;
 	__le32 snd_una_psn;
 	__le32 snd_nxt_psn;
-	__le32 reg4;
+	__le32 dif_error_offset;
 	__le32 orq_cons_th;
 	__le32 orq_cons;
 };
@@ -9128,6 +9134,50 @@ struct e4_xstorm_roce_resp_conn_ag_ctx {
 	__le32 msn_and_syndrome;
 };
 
+struct e4_ystorm_roce_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT0_MASK     0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT0_SHIFT    0
+#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT1_MASK     0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT    1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0_MASK      0x3
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT     2
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1_MASK      0x3
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT     4
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2_MASK      0x3
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT     6
+	u8 flags1;
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK    0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK    0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK    0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT 7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le32 reg0;
+	__le32 reg1;
+	__le16 word1;
+	__le16 word2;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
 struct e4_ystorm_roce_req_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
@@ -9236,7 +9286,7 @@ struct pstorm_iwarp_conn_st_ctx {
 
 /* The iwarp storm context of Xstorm */
 struct xstorm_iwarp_conn_st_ctx {
-	__le32 reserved[44];
+	__le32 reserved[48];
 };
 
 struct e4_xstorm_iwarp_conn_ag_ctx {
@@ -9377,8 +9427,8 @@ struct e4_xstorm_iwarp_conn_ag_ctx {
 #define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT		3
 #define E4_XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
 #define E4_XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF23EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF23EN_SHIFT		5
+#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_EN_MASK               0x1
+#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_EN_SHIFT              5
 #define E4_XSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK		0x1
 #define E4_XSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT		6
 #define E4_XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_MASK	0x1
@@ -9447,8 +9497,8 @@ struct e4_xstorm_iwarp_conn_ag_ctx {
 #define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_SHIFT	4
 #define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_MASK	0x1
 #define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_SHIFT	5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF23_MASK		0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF23_SHIFT		6
+#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_MASK	0x3
+#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_SHIFT	6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 physical_q1;
@@ -9466,7 +9516,7 @@ struct e4_xstorm_iwarp_conn_ag_ctx {
 	__le32 reg2;
 	__le32 more_to_send_seq;
 	__le32 reg4;
-	__le32 rewinded_snd_max;
+	__le32 rewinded_snd_max_or_term_opcode;
 	__le32 rd_msn;
 	__le16 irq_prod_via_msdm;
 	__le16 irq_cons;
@@ -9476,8 +9526,8 @@ struct e4_xstorm_iwarp_conn_ag_ctx {
 	__le32 orq_cons;
 	__le32 orq_cons_th;
 	u8 byte7;
-	u8 max_ord;
 	u8 wqe_data_pad_bytes;
+	u8 max_ord;
 	u8 former_hq_prod;
 	u8 irq_prod_via_msem;
 	u8 byte12;
@@ -9506,8 +9556,8 @@ struct e4_tstorm_iwarp_conn_ag_ctx {
 #define E4_TSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT		1
 #define E4_TSTORM_IWARP_CONN_AG_CTX_BIT2_MASK		0x1
 #define E4_TSTORM_IWARP_CONN_AG_CTX_BIT2_SHIFT		2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_MASK	0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_SHIFT	3
+#define E4_TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_OR_TERMINATE_SENT_MASK  0x1
+#define E4_TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_OR_TERMINATE_SENT_SHIFT 3
 #define E4_TSTORM_IWARP_CONN_AG_CTX_BIT4_MASK		0x1
 #define E4_TSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT		4
 #define E4_TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_MASK	0x1
@@ -9622,7 +9672,6 @@ struct e4_iwarp_conn_context {
 	struct pstorm_iwarp_conn_st_ctx pstorm_st_context;
 	struct regpair pstorm_st_padding[2];
 	struct xstorm_iwarp_conn_st_ctx xstorm_st_context;
-	struct regpair xstorm_st_padding[2];
 	struct e4_xstorm_iwarp_conn_ag_ctx xstorm_ag_context;
 	struct e4_tstorm_iwarp_conn_ag_ctx tstorm_ag_context;
 	struct timers_context timer_context;
@@ -9648,8 +9697,10 @@ struct iwarp_create_qp_ramrod_data {
 #define IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN_SHIFT		4
 #define IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG_MASK		0x1
 #define IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG_SHIFT		5
-#define IWARP_CREATE_QP_RAMROD_DATA_RESERVED0_MASK		0x3
-#define IWARP_CREATE_QP_RAMROD_DATA_RESERVED0_SHIFT		6
+#define IWARP_CREATE_QP_RAMROD_DATA_LOW_LATENCY_QUEUE_EN_MASK	0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_LOW_LATENCY_QUEUE_EN_SHIFT	6
+#define IWARP_CREATE_QP_RAMROD_DATA_RESERVED0_MASK		0x1
+#define IWARP_CREATE_QP_RAMROD_DATA_RESERVED0_SHIFT		7
 	u8 reserved1;
 	__le16 pd;
 	__le16 sq_num_pages;
@@ -9698,6 +9749,7 @@ enum iwarp_eqe_sync_opcode {
 	IWARP_EVENT_TYPE_QUERY_QP,
 	IWARP_EVENT_TYPE_MODIFY_QP,
 	IWARP_EVENT_TYPE_DESTROY_QP,
+	IWARP_EVENT_TYPE_ABORT_TCP_OFFLOAD,
 	MAX_IWARP_EQE_SYNC_OPCODE
 };
 
@@ -9722,6 +9774,8 @@ enum iwarp_fw_return_code {
 	IWARP_EXCEPTION_DETECTED_LLP_RESET,
 	IWARP_EXCEPTION_DETECTED_IRQ_FULL,
 	IWARP_EXCEPTION_DETECTED_RQ_EMPTY,
+	IWARP_EXCEPTION_DETECTED_SRQ_EMPTY,
+	IWARP_EXCEPTION_DETECTED_SRQ_LIMIT,
 	IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT,
 	IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR,
 	IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW,
@@ -9766,10 +9820,13 @@ struct iwarp_modify_qp_ramrod_data {
 #define IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN_SHIFT	3
 #define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_OPS_EN_FLG_MASK	0x1
 #define IWARP_MODIFY_QP_RAMROD_DATA_RDMA_OPS_EN_FLG_SHIFT	4
-#define IWARP_MODIFY_QP_RAMROD_DATA_RESERVED_MASK		0x7FF
-#define IWARP_MODIFY_QP_RAMROD_DATA_RESERVED_SHIFT		5
-	__le32 reserved3[3];
-	__le32 reserved4[8];
+#define IWARP_MODIFY_QP_RAMROD_DATA_PHYSICAL_QUEUE_FLG_MASK	0x1
+#define IWARP_MODIFY_QP_RAMROD_DATA_PHYSICAL_QUEUE_FLG_SHIFT	5
+#define IWARP_MODIFY_QP_RAMROD_DATA_RESERVED_MASK		0x3FF
+#define IWARP_MODIFY_QP_RAMROD_DATA_RESERVED_SHIFT		6
+	__le16 physical_q0;
+	__le16 physical_q1;
+	__le32 reserved1[10];
 };
 
 /* MPA params for Enhanced mode */
@@ -9853,6 +9910,7 @@ enum iwarp_ramrod_cmd_id {
 	IWARP_RAMROD_CMD_ID_QUERY_QP,
 	IWARP_RAMROD_CMD_ID_MODIFY_QP,
 	IWARP_RAMROD_CMD_ID_DESTROY_QP,
+	IWARP_RAMROD_CMD_ID_ABORT_TCP_OFFLOAD,
 	MAX_IWARP_RAMROD_CMD_ID
 };
 
@@ -11205,7 +11263,7 @@ struct e4_tstorm_iscsi_conn_ag_ctx {
 #define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT	7
 	__le32 reg0;
 	__le32 reg1;
-	__le32 reg2;
+	__le32 rx_tcp_checksum_err_cnt;
 	__le32 reg3;
 	__le32 reg4;
 	__le32 reg5;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 18fb5062a83d..1365da7c8900 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -467,12 +467,11 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 		u16 *p_first_tx_pq_id;
 
 		ext_voq = qed_get_ext_voq(p_hwfn,
-					  p_params->port_id,
+					  pq_params[i].port_id,
 					  tc_id,
 					  p_params->max_phys_tcs_per_port);
 		is_vf_pq = (i >= p_params->num_pf_pqs);
-		rl_valid = pq_params[i].rl_valid &&
-			   pq_params[i].vport_id < max_qm_global_rls;
+		rl_valid = pq_params[i].rl_valid > 0;
 
 		/* Update first Tx PQ of VPORT/TC */
 		vport_id_in_pf = pq_params[i].vport_id - p_params->start_vport;
@@ -494,10 +493,11 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 		}
 
 		/* Check RL ID */
-		if (pq_params[i].rl_valid && pq_params[i].vport_id >=
-		    max_qm_global_rls)
+		if (rl_valid && pq_params[i].vport_id >= max_qm_global_rls) {
 			DP_NOTICE(p_hwfn,
 				  "Invalid VPORT ID for rate limiter configuration\n");
+			rl_valid = false;
+		}
 
 		/* Prepare PQ map entry */
 		QM_INIT_TX_PQ_MAP(p_hwfn,
@@ -528,7 +528,7 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 			pq_info = PQ_INFO_ELEMENT(*p_first_tx_pq_id,
 						  p_params->pf_id,
 						  tc_id,
-						  p_params->port_id,
+						  pq_params[i].port_id,
 						  rl_valid ? 1 : 0,
 						  rl_valid ?
 						  pq_params[i].vport_id : 0);
@@ -603,6 +603,7 @@ static void qed_other_pq_map_rt_init(struct qed_hwfn *p_hwfn,
  * Return -1 on error.
  */
 static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
+
 			      struct qed_qm_pf_rt_init_params *p_params)
 {
 	u16 num_tx_pqs = p_params->num_pf_pqs + p_params->num_vf_pqs;
@@ -619,7 +620,7 @@ static int qed_pf_wfq_rt_init(struct qed_hwfn *p_hwfn,
 
 	for (i = 0; i < num_tx_pqs; i++) {
 		ext_voq = qed_get_ext_voq(p_hwfn,
-					  p_params->port_id,
+					  pq_params[i].port_id,
 					  pq_params[i].tc_id,
 					  p_params->max_phys_tcs_per_port);
 		crd_reg_offset =
@@ -1020,7 +1021,8 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 		*__p_var = (*__p_var & ~BIT(__offset)) | \
 			   ((enable) ? BIT(__offset) : 0); \
 	} while (0)
-#define PRS_ETH_TUNN_FIC_FORMAT	-188897008
+#define PRS_ETH_TUNN_OUTPUT_FORMAT        -188897008
+#define PRS_ETH_OUTPUT_FORMAT             -46832
 
 void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u16 dest_port)
@@ -1046,11 +1048,15 @@ void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn,
 	shift = PRS_REG_ENCAPSULATION_TYPE_EN_VXLAN_ENABLE_SHIFT;
 	SET_TUNNEL_TYPE_ENABLE_BIT(reg_val, shift, vxlan_enable);
 	qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val);
-	if (reg_val)
-		qed_wr(p_hwfn,
-		       p_ptt,
-		       PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
-		       (u32)PRS_ETH_TUNN_FIC_FORMAT);
+	if (reg_val) {
+		reg_val =
+		    qed_rd(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0_BB_K2);
+
+		/* Update output  only if tunnel blocks not included. */
+		if (reg_val == (u32)PRS_ETH_OUTPUT_FORMAT)
+			qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
+			       (u32)PRS_ETH_TUNN_OUTPUT_FORMAT);
+	}
 
 	/* Update NIG register */
 	reg_val = qed_rd(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE);
@@ -1077,11 +1083,15 @@ void qed_set_gre_enable(struct qed_hwfn *p_hwfn,
 	shift = PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GRE_ENABLE_SHIFT;
 	SET_TUNNEL_TYPE_ENABLE_BIT(reg_val, shift, ip_gre_enable);
 	qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val);
-	if (reg_val)
-		qed_wr(p_hwfn,
-		       p_ptt,
-		       PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
-		       (u32)PRS_ETH_TUNN_FIC_FORMAT);
+	if (reg_val) {
+		reg_val =
+		    qed_rd(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0_BB_K2);
+
+		/* Update output  only if tunnel blocks not included. */
+		if (reg_val == (u32)PRS_ETH_OUTPUT_FORMAT)
+			qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
+			       (u32)PRS_ETH_TUNN_OUTPUT_FORMAT);
+	}
 
 	/* Update NIG register */
 	reg_val = qed_rd(p_hwfn, p_ptt, NIG_REG_ENC_TYPE_ENABLE);
@@ -1126,11 +1136,15 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 	shift = PRS_REG_ENCAPSULATION_TYPE_EN_IP_OVER_GENEVE_ENABLE_SHIFT;
 	SET_TUNNEL_TYPE_ENABLE_BIT(reg_val, shift, ip_geneve_enable);
 	qed_wr(p_hwfn, p_ptt, PRS_REG_ENCAPSULATION_TYPE_EN, reg_val);
-	if (reg_val)
-		qed_wr(p_hwfn,
-		       p_ptt,
-		       PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
-		       (u32)PRS_ETH_TUNN_FIC_FORMAT);
+	if (reg_val) {
+		reg_val =
+		    qed_rd(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0_BB_K2);
+
+		/* Update output  only if tunnel blocks not included. */
+		if (reg_val == (u32)PRS_ETH_OUTPUT_FORMAT)
+			qed_wr(p_hwfn, p_ptt, PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
+			       (u32)PRS_ETH_TUNN_OUTPUT_FORMAT);
+	}
 
 	/* Update NIG register */
 	qed_wr(p_hwfn, p_ptt, NIG_REG_NGE_ETH_ENABLE,
@@ -1152,6 +1166,38 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 	       ip_geneve_enable ? 1 : 0);
 }
 
+#define PRS_ETH_VXLAN_NO_L2_ENABLE_OFFSET   4
+#define PRS_ETH_VXLAN_NO_L2_OUTPUT_FORMAT      -927094512
+
+void qed_set_vxlan_no_l2_enable(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt, bool enable)
+{
+	u32 reg_val, cfg_mask;
+
+	/* read PRS config register */
+	reg_val = qed_rd(p_hwfn, p_ptt, PRS_REG_MSG_INFO);
+
+	/* set VXLAN_NO_L2_ENABLE mask */
+	cfg_mask = BIT(PRS_ETH_VXLAN_NO_L2_ENABLE_OFFSET);
+
+	if (enable) {
+		/* set VXLAN_NO_L2_ENABLE flag */
+		reg_val |= cfg_mask;
+
+		/* update PRS FIC  register */
+		qed_wr(p_hwfn,
+		       p_ptt,
+		       PRS_REG_OUTPUT_FORMAT_4_0_BB_K2,
+		       (u32)PRS_ETH_VXLAN_NO_L2_OUTPUT_FORMAT);
+	} else {
+		/* clear VXLAN_NO_L2_ENABLE flag */
+		reg_val &= ~cfg_mask;
+	}
+
+	/* write PRS config register */
+	qed_wr(p_hwfn, p_ptt, PRS_REG_MSG_INFO, reg_val);
+}
+
 #define T_ETH_PACKET_ACTION_GFT_EVENTID  23
 #define PARSER_ETH_CONN_GFT_ACTION_CM_HDR  272
 #define T_ETH_PACKET_MATCH_RFS_EVENTID 25
@@ -1268,6 +1314,10 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
 	ram_line_lo = 0;
 	ram_line_hi = 0;
 
+	/* Tunnel type */
+	SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_DST_PORT, 1);
+	SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL, 1);
+
 	if (profile_type == GFT_PROFILE_TYPE_4_TUPLE) {
 		SET_FIELD(ram_line_hi, GFT_RAM_LINE_DST_IP, 1);
 		SET_FIELD(ram_line_hi, GFT_RAM_LINE_SRC_IP, 1);
@@ -1279,9 +1329,14 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
 		SET_FIELD(ram_line_hi, GFT_RAM_LINE_OVER_IP_PROTOCOL, 1);
 		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
 		SET_FIELD(ram_line_lo, GFT_RAM_LINE_DST_PORT, 1);
-	} else if (profile_type == GFT_PROFILE_TYPE_IP_DST_PORT) {
+	} else if (profile_type == GFT_PROFILE_TYPE_IP_DST_ADDR) {
 		SET_FIELD(ram_line_hi, GFT_RAM_LINE_DST_IP, 1);
 		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+	} else if (profile_type == GFT_PROFILE_TYPE_IP_SRC_ADDR) {
+		SET_FIELD(ram_line_hi, GFT_RAM_LINE_SRC_IP, 1);
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
+	} else if (profile_type == GFT_PROFILE_TYPE_TUNNEL_TYPE) {
+		SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_ETHERTYPE, 1);
 	}
 
 	qed_wr(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 69051e98aff9..2a2b1018ed1d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -2375,13 +2375,6 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
 
 		memset(&tx_pkt, 0, sizeof(tx_pkt));
 		tx_pkt.num_of_bds = 1;
-		tx_pkt.vlan = data->vlan;
-
-		if (GET_FIELD(data->parse_flags,
-			      PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
-			SET_FIELD(tx_pkt.bd_flags,
-				  CORE_TX_BD_DATA_VLAN_INSERTION, 1);
-
 		tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2;
 		tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
 		tx_pkt.first_frag = buf->data_phys_addr +
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 893ef08a4b39..e874504e8b28 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1974,7 +1974,7 @@ qed_arfs_mode_to_hsi(enum qed_filter_config_mode mode)
 	if (mode == QED_FILTER_CONFIG_MODE_5_TUPLE)
 		return GFT_PROFILE_TYPE_4_TUPLE;
 	if (mode == QED_FILTER_CONFIG_MODE_IP_DEST)
-		return GFT_PROFILE_TYPE_IP_DST_PORT;
+		return GFT_PROFILE_TYPE_IP_DST_ADDR;
 	return GFT_PROFILE_TYPE_L4_DST_PORT;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index c4f14fdc4e77..74fc626b1ec1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -591,16 +591,6 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 	}
 }
 
-static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags)
-{
-	u8 bd_flags = 0;
-
-	if (GET_FIELD(parse_flags, PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
-		SET_FIELD(bd_flags, CORE_TX_BD_DATA_VLAN_INSERTION, 1);
-
-	return bd_flags;
-}
-
 static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
 				  struct qed_ll2_info *p_ll2_conn)
 {
@@ -744,7 +734,6 @@ qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 	struct qed_ooo_buffer *p_buffer;
 	u16 l4_hdr_offset_w;
 	dma_addr_t first_frag;
-	u16 parse_flags;
 	u8 bd_flags;
 	int rc;
 
@@ -756,8 +745,6 @@ qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 
 		first_frag = p_buffer->rx_buffer_phys_addr +
 			     p_buffer->placement_offset;
-		parse_flags = p_buffer->parse_flags;
-		bd_flags = qed_ll2_convert_rx_parse_to_tx_flags(parse_flags);
 		SET_FIELD(bd_flags, CORE_TX_BD_DATA_FORCE_VLAN_MODE, 1);
 		SET_FIELD(bd_flags, CORE_TX_BD_DATA_L4_PROTOCOL, 1);
 
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 2b3b350e07b7..13c8ab171437 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -110,7 +110,7 @@
 
 #define FW_MAJOR_VERSION	8
 #define FW_MINOR_VERSION	33
-#define FW_REVISION_VERSION	1
+#define FW_REVISION_VERSION     11
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index 9db02856623b..d9416ad5ef59 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -105,7 +105,7 @@
 #define ETH_CTL_FRAME_ETH_TYPE_NUM	4
 
 /* GFS constants */
-#define ETH_GFT_TRASH_CAN_VPORT		0x1FF
+#define ETH_GFT_TRASHCAN_VPORT         0x1FF	/* GFT drop flow vport number */
 
 /* Destination port mode */
 enum dest_port_mode {
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index 4cc9b37b8d95..938df614cb6a 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -753,8 +753,8 @@ struct e4_ystorm_iscsi_task_ag_ctx {
 #define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT		5
 #define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK		0x1
 #define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT		6
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT		7
+#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_MASK   0x1	/* bit3 */
+#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_SHIFT  7
 	u8 flags1;
 #define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK		0x3
 #define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT		0
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
index c1a446ebe362..480a57eb36cc 100644
--- a/include/linux/qed/rdma_common.h
+++ b/include/linux/qed/rdma_common.h
@@ -51,6 +51,8 @@
 #define RDMA_MAX_CQS			(64 * 1024)
 #define RDMA_MAX_TIDS			(128 * 1024 - 1)
 #define RDMA_MAX_PDS			(64 * 1024)
+#define RDMA_MAX_XRC_SRQS                       (1024)
+#define RDMA_MAX_SRQS                           (32 * 1024)
 
 #define RDMA_NUM_STATISTIC_COUNTERS	MAX_NUM_VPORTS
 #define RDMA_NUM_STATISTIC_COUNTERS_K2	MAX_NUM_VPORTS_K2
diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h
index e15e0da71240..193bcef302e1 100644
--- a/include/linux/qed/roce_common.h
+++ b/include/linux/qed/roce_common.h
@@ -59,6 +59,9 @@ enum roce_async_events_type {
 	ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
 	ROCE_ASYNC_EVENT_SRQ_EMPTY,
 	ROCE_ASYNC_EVENT_DESTROY_QP_DONE,
+	ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR,
+	ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR,
+	ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR,
 	MAX_ROCE_ASYNC_EVENTS_TYPE
 };
 

From 43645ce03e0063d7c4a5001215ca815188778881 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 28 Mar 2018 05:14:19 -0700
Subject: [PATCH 1467/1678] qed: Populate nvm image attribute shadow.

This patch adds support for populating the flash image attributes.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h         |  8 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c     | 24 ++++-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c     | 98 ++++++++++++++-----
 drivers/net/ethernet/qlogic/qed/qed_mcp.h     | 22 +++--
 .../net/ethernet/qlogic/qed/qed_selftest.c    |  6 +-
 5 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 69488554f4b9..b4094992299d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -437,6 +437,11 @@ enum BAR_ID {
 	BAR_ID_1		/* Used for doorbells */
 };
 
+struct qed_nvm_image_info {
+	u32 num_images;
+	struct bist_nvm_image_att *image_att;
+};
+
 #define DRV_MODULE_VERSION		      \
 	__stringify(QED_MAJOR_VERSION) "."    \
 	__stringify(QED_MINOR_VERSION) "."    \
@@ -561,6 +566,9 @@ struct qed_hwfn {
 	/* L2-related */
 	struct qed_l2_info *p_l2_info;
 
+	/* Nvm images number and attributes */
+	struct qed_nvm_image_info nvm_info;
+
 	struct qed_ptt *p_arfs_ptt;
 
 	struct qed_simd_fp_handler	simd_proto_handler[64];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index de5527c0c1c7..d2ad5e92c74f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2932,6 +2932,12 @@ static int qed_get_dev_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return 0;
 }
 
+static void qed_nvm_info_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->nvm_info.image_att);
+	p_hwfn->nvm_info.image_att = NULL;
+}
+
 static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_regview,
 				 void __iomem *p_doorbells,
@@ -2995,12 +3001,25 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 			DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
 	}
 
+	/* NVRAM info initialization and population */
+	if (IS_LEAD_HWFN(p_hwfn)) {
+		rc = qed_mcp_nvm_info_populate(p_hwfn);
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to populate nvm info shadow\n");
+			goto err2;
+		}
+	}
+
 	/* Allocate the init RT array and initialize the init-ops engine */
 	rc = qed_init_alloc(p_hwfn);
 	if (rc)
-		goto err2;
+		goto err3;
 
 	return rc;
+err3:
+	if (IS_LEAD_HWFN(p_hwfn))
+		qed_nvm_info_free(p_hwfn);
 err2:
 	if (IS_LEAD_HWFN(p_hwfn))
 		qed_iov_free_hw_info(p_hwfn->cdev);
@@ -3056,6 +3075,7 @@ int qed_hw_prepare(struct qed_dev *cdev,
 		if (rc) {
 			if (IS_PF(cdev)) {
 				qed_init_free(p_hwfn);
+				qed_nvm_info_free(p_hwfn);
 				qed_mcp_free(p_hwfn);
 				qed_hw_hwfn_free(p_hwfn);
 			}
@@ -3088,6 +3108,8 @@ void qed_hw_remove(struct qed_dev *cdev)
 	}
 
 	qed_iov_free_hw_info(cdev);
+
+	qed_nvm_info_free(p_hwfn);
 }
 
 static void qed_chain_free_next_ptr(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 6f46cb11f349..2519e71a3a2e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2303,9 +2303,9 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return rc;
 }
 
-int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
-					 struct qed_ptt *p_ptt,
-					 u32 *num_images)
+int qed_mcp_bist_nvm_get_num_images(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    u32 *num_images)
 {
 	u32 drv_mb_param = 0, rsp;
 	int rc = 0;
@@ -2324,10 +2324,10 @@ int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
-int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
-					struct qed_ptt *p_ptt,
-					struct bist_nvm_image_att *p_image_att,
-					u32 image_index)
+int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   struct bist_nvm_image_att *p_image_att,
+				   u32 image_index)
 {
 	u32 buf_size = 0, param, resp = 0, resp_param = 0;
 	int rc;
@@ -2351,16 +2351,71 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn)
+{
+	struct qed_nvm_image_info *nvm_info = &p_hwfn->nvm_info;
+	struct qed_ptt *p_ptt;
+	int rc;
+	u32 i;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_ERR(p_hwfn, "failed to acquire ptt\n");
+		return -EBUSY;
+	}
+
+	/* Acquire from MFW the amount of available images */
+	nvm_info->num_images = 0;
+	rc = qed_mcp_bist_nvm_get_num_images(p_hwfn,
+					     p_ptt, &nvm_info->num_images);
+	if (rc == -EOPNOTSUPP) {
+		DP_INFO(p_hwfn, "DRV_MSG_CODE_BIST_TEST is not supported\n");
+		goto out;
+	} else if (rc || !nvm_info->num_images) {
+		DP_ERR(p_hwfn, "Failed getting number of images\n");
+		goto err0;
+	}
+
+	nvm_info->image_att = kmalloc(nvm_info->num_images *
+				      sizeof(struct bist_nvm_image_att),
+				      GFP_KERNEL);
+	if (!nvm_info->image_att) {
+		rc = -ENOMEM;
+		goto err0;
+	}
+
+	/* Iterate over images and get their attributes */
+	for (i = 0; i < nvm_info->num_images; i++) {
+		rc = qed_mcp_bist_nvm_get_image_att(p_hwfn, p_ptt,
+						    &nvm_info->image_att[i], i);
+		if (rc) {
+			DP_ERR(p_hwfn,
+			       "Failed getting image index %d attributes\n", i);
+			goto err1;
+		}
+
+		DP_VERBOSE(p_hwfn, QED_MSG_SP, "image index %d, size %x\n", i,
+			   nvm_info->image_att[i].len);
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return 0;
+
+err1:
+	kfree(nvm_info->image_att);
+err0:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
+
 static int
 qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
 			  enum qed_nvm_images image_id,
 			  struct qed_nvm_image_att *p_image_att)
 {
-	struct bist_nvm_image_att mfw_image_att;
 	enum nvm_image_type type;
-	u32 num_images, i;
-	int rc;
+	u32 i;
 
 	/* Translate image_id into MFW definitions */
 	switch (image_id) {
@@ -2376,29 +2431,18 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
-	/* Learn number of images, then traverse and see if one fits */
-	rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images);
-	if (rc || !num_images)
-		return -EINVAL;
-
-	for (i = 0; i < num_images; i++) {
-		rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt,
-							 &mfw_image_att, i);
-		if (rc)
-			return rc;
-
-		if (type == mfw_image_att.image_type)
+	for (i = 0; i < p_hwfn->nvm_info.num_images; i++)
+		if (type == p_hwfn->nvm_info.image_att[i].image_type)
 			break;
-	}
-	if (i == num_images) {
+	if (i == p_hwfn->nvm_info.num_images) {
 		DP_VERBOSE(p_hwfn, QED_MSG_STORAGE,
 			   "Failed to find nvram image of type %08x\n",
 			   image_id);
-		return -EINVAL;
+		return -ENOENT;
 	}
 
-	p_image_att->start_addr = mfw_image_att.nvm_start_addr;
-	p_image_att->length = mfw_image_att.len;
+	p_image_att->start_addr = p_hwfn->nvm_info.image_att[i].nvm_start_addr;
+	p_image_att->length = p_hwfn->nvm_info.image_att[i].len;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index c7ec2395d1ce..7d33354b11c6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -496,9 +496,9 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn,
  *
  * @return int - 0 - operation was successful.
  */
-int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
-					 struct qed_ptt *p_ptt,
-					 u32 *num_images);
+int qed_mcp_bist_nvm_get_num_images(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    u32 *num_images);
 
 /**
  * @brief Bist nvm test - get image attributes by index
@@ -510,10 +510,10 @@ int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
  *
  * @return int - 0 - operation was successful.
  */
-int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
-					struct qed_ptt *p_ptt,
-					struct bist_nvm_image_att *p_image_att,
-					u32 image_index);
+int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   struct bist_nvm_image_att *p_image_att,
+				   u32 image_index);
 
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
@@ -957,4 +957,12 @@ int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
  * @param p_ptt
  */
 int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Populate the nvm info shadow in the given hardware function
+ *
+ * @param p_hwfn
+ */
+int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
index 1bafc05db2b8..b88082f0b41a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
@@ -125,7 +125,7 @@ int qed_selftest_nvram(struct qed_dev *cdev)
 	}
 
 	/* Acquire from MFW the amount of available images */
-	rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images);
+	rc = qed_mcp_bist_nvm_get_num_images(p_hwfn, p_ptt, &num_images);
 	if (rc || !num_images) {
 		DP_ERR(p_hwfn, "Failed getting number of images\n");
 		return -EINVAL;
@@ -136,8 +136,8 @@ int qed_selftest_nvram(struct qed_dev *cdev)
 		/* This mailbox returns information about the image required for
 		 * reading it.
 		 */
-		rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt,
-							 &image_att, i);
+		rc = qed_mcp_bist_nvm_get_image_att(p_hwfn, p_ptt,
+						    &image_att, i);
 		if (rc) {
 			DP_ERR(p_hwfn,
 			       "Failed getting image index %d attributes\n",

From d8bf47af24a28d90bbcd5d58d6dc6c5a40f4f91a Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 28 Mar 2018 05:14:20 -0700
Subject: [PATCH 1468/1678] qed: Fix PTT entry leak in the selftest error flow.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_selftest.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
index b88082f0b41a..cf1d4476f9d8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
@@ -128,7 +128,8 @@ int qed_selftest_nvram(struct qed_dev *cdev)
 	rc = qed_mcp_bist_nvm_get_num_images(p_hwfn, p_ptt, &num_images);
 	if (rc || !num_images) {
 		DP_ERR(p_hwfn, "Failed getting number of images\n");
-		return -EINVAL;
+		rc = -EINVAL;
+		goto err0;
 	}
 
 	/* Iterate over images and validate CRC */

From 62e4d4386a954eb071ecd4a72105ea222157f11d Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 28 Mar 2018 05:14:21 -0700
Subject: [PATCH 1469/1678] qed: Add APIs for flash access.

This patch adds APIs for flash access.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |   7 ++
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |   7 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 121 ++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h |  34 ++++++
 4 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b4094992299d..e07460a68d30 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -81,6 +81,13 @@ enum qed_coalescing_mode {
 	QED_COAL_MODE_ENABLE
 };
 
+enum qed_nvm_cmd {
+	QED_PUT_FILE_BEGIN = DRV_MSG_CODE_NVM_PUT_FILE_BEGIN,
+	QED_PUT_FILE_DATA = DRV_MSG_CODE_NVM_PUT_FILE_DATA,
+	QED_NVM_WRITE_NVRAM = DRV_MSG_CODE_NVM_WRITE_NVRAM,
+	QED_GET_MCP_NVM_RESP = 0xFFFFFF00
+};
+
 struct qed_eth_cb_ops;
 struct qed_dev_info;
 union qed_mcp_protocol_stats;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 2c6a679166e1..7f5ec42dde48 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12268,8 +12268,11 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_VF_DISABLED_DONE		0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX		0xc0010000
 #define DRV_MSG_CODE_CFG_PF_VFS_MSIX		0xc0020000
+#define DRV_MSG_CODE_NVM_PUT_FILE_BEGIN		0x00010000
+#define DRV_MSG_CODE_NVM_PUT_FILE_DATA		0x00020000
 #define DRV_MSG_CODE_NVM_GET_FILE_ATT		0x00030000
 #define DRV_MSG_CODE_NVM_READ_NVRAM		0x00050000
+#define DRV_MSG_CODE_NVM_WRITE_NVRAM		0x00060000
 #define DRV_MSG_CODE_MCP_RESET			0x00090000
 #define DRV_MSG_CODE_SET_VERSION		0x000f0000
 #define DRV_MSG_CODE_MCP_HALT                   0x00100000
@@ -12323,7 +12326,6 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_FEATURE_SUPPORT		0x00300000
 #define DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT	0x00310000
-
 #define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 drv_mb_param;
@@ -12435,7 +12437,10 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE	0xb0010000
 
 #define FW_MSG_CODE_NVM_OK			0x00010000
+#define FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK	0x00400000
+#define FW_MSG_CODE_PHY_OK			0x00110000
 #define FW_MSG_CODE_OK				0x00160000
+#define FW_MSG_CODE_ERROR			0x00170000
 
 #define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
 #define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 2519e71a3a2e..ec0d425766a7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -569,6 +569,31 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int qed_mcp_nvm_wr_cmd(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u32 cmd,
+		       u32 param,
+		       u32 *o_mcp_resp,
+		       u32 *o_mcp_param, u32 i_txn_size, u32 *i_buf)
+{
+	struct qed_mcp_mb_params mb_params;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = cmd;
+	mb_params.param = param;
+	mb_params.p_data_src = i_buf;
+	mb_params.data_src_size = (u8)i_txn_size;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	*o_mcp_resp = mb_params.mcp_resp;
+	*o_mcp_param = mb_params.mcp_param;
+
+	return 0;
+}
+
 int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
 		       u32 cmd,
@@ -2261,6 +2286,102 @@ int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len)
 	return rc;
 }
 
+int qed_mcp_nvm_resp(struct qed_dev *cdev, u8 *p_buf)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	memcpy(p_buf, &cdev->mcp_nvm_resp, sizeof(cdev->mcp_nvm_resp));
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return 0;
+}
+
+int qed_mcp_nvm_put_file_begin(struct qed_dev *cdev, u32 addr)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+	u32 resp, param;
+	int rc;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_PUT_FILE_BEGIN, addr,
+			 &resp, &param);
+	cdev->mcp_nvm_resp = resp;
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
+int qed_mcp_nvm_write(struct qed_dev *cdev,
+		      u32 cmd, u32 addr, u8 *p_buf, u32 len)
+{
+	u32 buf_idx = 0, buf_size, nvm_cmd, nvm_offset, resp = 0, param;
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+	int rc = -EINVAL;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	switch (cmd) {
+	case QED_PUT_FILE_DATA:
+		nvm_cmd = DRV_MSG_CODE_NVM_PUT_FILE_DATA;
+		break;
+	case QED_NVM_WRITE_NVRAM:
+		nvm_cmd = DRV_MSG_CODE_NVM_WRITE_NVRAM;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid nvm write command 0x%x\n", cmd);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	while (buf_idx < len) {
+		buf_size = min_t(u32, (len - buf_idx), MCP_DRV_NVM_BUF_LEN);
+		nvm_offset = ((buf_size << DRV_MB_PARAM_NVM_LEN_OFFSET) |
+			      addr) + buf_idx;
+		rc = qed_mcp_nvm_wr_cmd(p_hwfn, p_ptt, nvm_cmd, nvm_offset,
+					&resp, &param, buf_size,
+					(u32 *)&p_buf[buf_idx]);
+		if (rc) {
+			DP_NOTICE(cdev, "nvm write failed, rc = %d\n", rc);
+			resp = FW_MSG_CODE_ERROR;
+			break;
+		}
+
+		if (resp != FW_MSG_CODE_OK &&
+		    resp != FW_MSG_CODE_NVM_OK &&
+		    resp != FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK) {
+			DP_NOTICE(cdev,
+				  "nvm write failed, resp = 0x%08x\n", resp);
+			rc = -EINVAL;
+			break;
+		}
+
+		/* This can be a lengthy process, and it's possible scheduler
+		 * isn't pre-emptable. Sleep a bit to prevent CPU hogging.
+		 */
+		if (buf_idx % 0x1000 > (buf_idx + buf_size) % 0x1000)
+			usleep_range(1000, 2000);
+
+		buf_idx += buf_size;
+	}
+
+	cdev->mcp_nvm_resp = resp;
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 drv_mb_param = 0, rsp, param;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 7d33354b11c6..8a5c988d0c3c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -443,6 +443,40 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
  */
 int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
 
+/**
+ * @brief Write to nvm
+ *
+ *  @param cdev
+ *  @param addr - nvm offset
+ *  @param cmd - nvm command
+ *  @param p_buf - nvm write buffer
+ *  @param len - buffer len
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_nvm_write(struct qed_dev *cdev,
+		      u32 cmd, u32 addr, u8 *p_buf, u32 len);
+
+/**
+ * @brief Put file begin
+ *
+ *  @param cdev
+ *  @param addr - nvm offset
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_nvm_put_file_begin(struct qed_dev *cdev, u32 addr);
+
+/**
+ * @brief Check latest response
+ *
+ *  @param cdev
+ *  @param p_buf - nvm write buffer
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_nvm_resp(struct qed_dev *cdev, u8 *p_buf);
+
 struct qed_nvm_image_att {
 	u32 start_addr;
 	u32 length;

From 3a69cae80cdd1b5c8b23137cba2a80ecfec4cef5 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 28 Mar 2018 05:14:22 -0700
Subject: [PATCH 1470/1678] qed: Adapter flash update support.

This patch adds the required driver support for updating the flash or
non volatile memory of the adapter. At highlevel, flash upgrade comprises
of reading the flash images from the input file, validating the images and
writing them to the respective paritions.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 338 +++++++++++++++++++++
 include/linux/qed/qed_if.h                 |  19 ++
 2 files changed, 357 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 27832885a87f..9854aa9139af 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -45,6 +45,7 @@
 #include <linux/etherdevice.h>
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
+#include <linux/crc32.h>
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
 
@@ -1553,6 +1554,342 @@ static int qed_drain(struct qed_dev *cdev)
 	return 0;
 }
 
+static u32 qed_nvm_flash_image_access_crc(struct qed_dev *cdev,
+					  struct qed_nvm_image_att *nvm_image,
+					  u32 *crc)
+{
+	u8 *buf = NULL;
+	int rc, j;
+	u32 val;
+
+	/* Allocate a buffer for holding the nvram image */
+	buf = kzalloc(nvm_image->length, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Read image into buffer */
+	rc = qed_mcp_nvm_read(cdev, nvm_image->start_addr,
+			      buf, nvm_image->length);
+	if (rc) {
+		DP_ERR(cdev, "Failed reading image from nvm\n");
+		goto out;
+	}
+
+	/* Convert the buffer into big-endian format (excluding the
+	 * closing 4 bytes of CRC).
+	 */
+	for (j = 0; j < nvm_image->length - 4; j += 4) {
+		val = cpu_to_be32(*(u32 *)&buf[j]);
+		*(u32 *)&buf[j] = val;
+	}
+
+	/* Calc CRC for the "actual" image buffer, i.e. not including
+	 * the last 4 CRC bytes.
+	 */
+	*crc = (~cpu_to_be32(crc32(0xffffffff, buf, nvm_image->length - 4)));
+
+out:
+	kfree(buf);
+
+	return rc;
+}
+
+/* Binary file format -
+ *     /----------------------------------------------------------------------\
+ * 0B  |                       0x4 [command index]                            |
+ * 4B  | image_type     | Options        |  Number of register settings       |
+ * 8B  |                       Value                                          |
+ * 12B |                       Mask                                           |
+ * 16B |                       Offset                                         |
+ *     \----------------------------------------------------------------------/
+ * There can be several Value-Mask-Offset sets as specified by 'Number of...'.
+ * Options - 0'b - Calculate & Update CRC for image
+ */
+static int qed_nvm_flash_image_access(struct qed_dev *cdev, const u8 **data,
+				      bool *check_resp)
+{
+	struct qed_nvm_image_att nvm_image;
+	struct qed_hwfn *p_hwfn;
+	bool is_crc = false;
+	u32 image_type;
+	int rc = 0, i;
+	u16 len;
+
+	*data += 4;
+	image_type = **data;
+	p_hwfn = QED_LEADING_HWFN(cdev);
+	for (i = 0; i < p_hwfn->nvm_info.num_images; i++)
+		if (image_type == p_hwfn->nvm_info.image_att[i].image_type)
+			break;
+	if (i == p_hwfn->nvm_info.num_images) {
+		DP_ERR(cdev, "Failed to find nvram image of type %08x\n",
+		       image_type);
+		return -ENOENT;
+	}
+
+	nvm_image.start_addr = p_hwfn->nvm_info.image_att[i].nvm_start_addr;
+	nvm_image.length = p_hwfn->nvm_info.image_att[i].len;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "Read image %02x; type = %08x; NVM [%08x,...,%08x]\n",
+		   **data, image_type, nvm_image.start_addr,
+		   nvm_image.start_addr + nvm_image.length - 1);
+	(*data)++;
+	is_crc = !!(**data & BIT(0));
+	(*data)++;
+	len = *((u16 *)*data);
+	*data += 2;
+	if (is_crc) {
+		u32 crc = 0;
+
+		rc = qed_nvm_flash_image_access_crc(cdev, &nvm_image, &crc);
+		if (rc) {
+			DP_ERR(cdev, "Failed calculating CRC, rc = %d\n", rc);
+			goto exit;
+		}
+
+		rc = qed_mcp_nvm_write(cdev, QED_NVM_WRITE_NVRAM,
+				       (nvm_image.start_addr +
+					nvm_image.length - 4), (u8 *)&crc, 4);
+		if (rc)
+			DP_ERR(cdev, "Failed writing to %08x, rc = %d\n",
+			       nvm_image.start_addr + nvm_image.length - 4, rc);
+		goto exit;
+	}
+
+	/* Iterate over the values for setting */
+	while (len) {
+		u32 offset, mask, value, cur_value;
+		u8 buf[4];
+
+		value = *((u32 *)*data);
+		*data += 4;
+		mask = *((u32 *)*data);
+		*data += 4;
+		offset = *((u32 *)*data);
+		*data += 4;
+
+		rc = qed_mcp_nvm_read(cdev, nvm_image.start_addr + offset, buf,
+				      4);
+		if (rc) {
+			DP_ERR(cdev, "Failed reading from %08x\n",
+			       nvm_image.start_addr + offset);
+			goto exit;
+		}
+
+		cur_value = le32_to_cpu(*((__le32 *)buf));
+		DP_VERBOSE(cdev, NETIF_MSG_DRV,
+			   "NVM %08x: %08x -> %08x [Value %08x Mask %08x]\n",
+			   nvm_image.start_addr + offset, cur_value,
+			   (cur_value & ~mask) | (value & mask), value, mask);
+		value = (value & mask) | (cur_value & ~mask);
+		rc = qed_mcp_nvm_write(cdev, QED_NVM_WRITE_NVRAM,
+				       nvm_image.start_addr + offset,
+				       (u8 *)&value, 4);
+		if (rc) {
+			DP_ERR(cdev, "Failed writing to %08x\n",
+			       nvm_image.start_addr + offset);
+			goto exit;
+		}
+
+		len--;
+	}
+exit:
+	return rc;
+}
+
+/* Binary file format -
+ *     /----------------------------------------------------------------------\
+ * 0B  |                       0x3 [command index]                            |
+ * 4B  | b'0: check_response?   | b'1-31  reserved                            |
+ * 8B  | File-type |                   reserved                               |
+ *     \----------------------------------------------------------------------/
+ *     Start a new file of the provided type
+ */
+static int qed_nvm_flash_image_file_start(struct qed_dev *cdev,
+					  const u8 **data, bool *check_resp)
+{
+	int rc;
+
+	*data += 4;
+	*check_resp = !!(**data & BIT(0));
+	*data += 4;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "About to start a new file of type %02x\n", **data);
+	rc = qed_mcp_nvm_put_file_begin(cdev, **data);
+	*data += 4;
+
+	return rc;
+}
+
+/* Binary file format -
+ *     /----------------------------------------------------------------------\
+ * 0B  |                       0x2 [command index]                            |
+ * 4B  |                       Length in bytes                                |
+ * 8B  | b'0: check_response?   | b'1-31  reserved                            |
+ * 12B |                       Offset in bytes                                |
+ * 16B |                       Data ...                                       |
+ *     \----------------------------------------------------------------------/
+ *     Write data as part of a file that was previously started. Data should be
+ *     of length equal to that provided in the message
+ */
+static int qed_nvm_flash_image_file_data(struct qed_dev *cdev,
+					 const u8 **data, bool *check_resp)
+{
+	u32 offset, len;
+	int rc;
+
+	*data += 4;
+	len = *((u32 *)(*data));
+	*data += 4;
+	*check_resp = !!(**data & BIT(0));
+	*data += 4;
+	offset = *((u32 *)(*data));
+	*data += 4;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "About to write File-data: %08x bytes to offset %08x\n",
+		   len, offset);
+
+	rc = qed_mcp_nvm_write(cdev, QED_PUT_FILE_DATA, offset,
+			       (char *)(*data), len);
+	*data += len;
+
+	return rc;
+}
+
+/* Binary file format [General header] -
+ *     /----------------------------------------------------------------------\
+ * 0B  |                       QED_NVM_SIGNATURE                              |
+ * 4B  |                       Length in bytes                                |
+ * 8B  | Highest command in this batchfile |          Reserved                |
+ *     \----------------------------------------------------------------------/
+ */
+static int qed_nvm_flash_image_validate(struct qed_dev *cdev,
+					const struct firmware *image,
+					const u8 **data)
+{
+	u32 signature, len;
+
+	/* Check minimum size */
+	if (image->size < 12) {
+		DP_ERR(cdev, "Image is too short [%08x]\n", (u32)image->size);
+		return -EINVAL;
+	}
+
+	/* Check signature */
+	signature = *((u32 *)(*data));
+	if (signature != QED_NVM_SIGNATURE) {
+		DP_ERR(cdev, "Wrong signature '%08x'\n", signature);
+		return -EINVAL;
+	}
+
+	*data += 4;
+	/* Validate internal size equals the image-size */
+	len = *((u32 *)(*data));
+	if (len != image->size) {
+		DP_ERR(cdev, "Size mismatch: internal = %08x image = %08x\n",
+		       len, (u32)image->size);
+		return -EINVAL;
+	}
+
+	*data += 4;
+	/* Make sure driver familiar with all commands necessary for this */
+	if (*((u16 *)(*data)) >= QED_NVM_FLASH_CMD_NVM_MAX) {
+		DP_ERR(cdev, "File contains unsupported commands [Need %04x]\n",
+		       *((u16 *)(*data)));
+		return -EINVAL;
+	}
+
+	*data += 4;
+
+	return 0;
+}
+
+static int qed_nvm_flash(struct qed_dev *cdev, const char *name)
+{
+	const struct firmware *image;
+	const u8 *data, *data_end;
+	u32 cmd_type;
+	int rc;
+
+	rc = request_firmware(&image, name, &cdev->pdev->dev);
+	if (rc) {
+		DP_ERR(cdev, "Failed to find '%s'\n", name);
+		return rc;
+	}
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "Flashing '%s' - firmware's data at %p, size is %08x\n",
+		   name, image->data, (u32)image->size);
+	data = image->data;
+	data_end = data + image->size;
+
+	rc = qed_nvm_flash_image_validate(cdev, image, &data);
+	if (rc)
+		goto exit;
+
+	while (data < data_end) {
+		bool check_resp = false;
+
+		/* Parse the actual command */
+		cmd_type = *((u32 *)data);
+		switch (cmd_type) {
+		case QED_NVM_FLASH_CMD_FILE_DATA:
+			rc = qed_nvm_flash_image_file_data(cdev, &data,
+							   &check_resp);
+			break;
+		case QED_NVM_FLASH_CMD_FILE_START:
+			rc = qed_nvm_flash_image_file_start(cdev, &data,
+							    &check_resp);
+			break;
+		case QED_NVM_FLASH_CMD_NVM_CHANGE:
+			rc = qed_nvm_flash_image_access(cdev, &data,
+							&check_resp);
+			break;
+		default:
+			DP_ERR(cdev, "Unknown command %08x\n", cmd_type);
+			rc = -EINVAL;
+			goto exit;
+		}
+
+		if (rc) {
+			DP_ERR(cdev, "Command %08x failed\n", cmd_type);
+			goto exit;
+		}
+
+		/* Check response if needed */
+		if (check_resp) {
+			u32 mcp_response = 0;
+
+			if (qed_mcp_nvm_resp(cdev, (u8 *)&mcp_response)) {
+				DP_ERR(cdev, "Failed getting MCP response\n");
+				rc = -EINVAL;
+				goto exit;
+			}
+
+			switch (mcp_response & FW_MSG_CODE_MASK) {
+			case FW_MSG_CODE_OK:
+			case FW_MSG_CODE_NVM_OK:
+			case FW_MSG_CODE_NVM_PUT_FILE_FINISH_OK:
+			case FW_MSG_CODE_PHY_OK:
+				break;
+			default:
+				DP_ERR(cdev, "MFW returns error: %08x\n",
+				       mcp_response);
+				rc = -EINVAL;
+				goto exit;
+			}
+		}
+	}
+
+exit:
+	release_firmware(image);
+
+	return rc;
+}
+
 static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
 			     u8 *buf, u16 len)
 {
@@ -1719,6 +2056,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.dbg_all_data_size = &qed_dbg_all_data_size,
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
+	.nvm_flash = &qed_nvm_flash,
 	.nvm_get_image = &qed_nvm_get_image,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 15e398c7230e..b5b2bc9eacca 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -483,6 +483,15 @@ struct qed_int_info {
 	u8			used_cnt;
 };
 
+#define QED_NVM_SIGNATURE 0x12435687
+
+enum qed_nvm_flash_cmd {
+	QED_NVM_FLASH_CMD_FILE_DATA = 0x2,
+	QED_NVM_FLASH_CMD_FILE_START = 0x3,
+	QED_NVM_FLASH_CMD_NVM_CHANGE = 0x4,
+	QED_NVM_FLASH_CMD_NVM_MAX,
+};
+
 struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
 	void	(*link_update)(void			*dev,
@@ -657,6 +666,16 @@ struct qed_common_ops {
 	void		(*chain_free)(struct qed_dev *cdev,
 				      struct qed_chain *p_chain);
 
+/**
+ * @brief nvm_flash - Flash nvm data.
+ *
+ * @param cdev
+ * @param name - file containing the data
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*nvm_flash)(struct qed_dev *cdev, const char *name);
+
 /**
  * @brief nvm_get_image - reads an entire image from nvram
  *

From ccfa110cfecd1a200dc302aedbbda859fb237a09 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 28 Mar 2018 05:14:23 -0700
Subject: [PATCH 1471/1678] qede: Ethtool flash update support.

The patch adds ethtool callback implementation for flash update.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 4ca3847fffd4..ecbf1ded7a39 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -699,6 +699,14 @@ static u32 qede_get_link(struct net_device *dev)
 	return current_link.link_up;
 }
 
+static int qede_flash_device(struct net_device *dev,
+			     struct ethtool_flash *flash)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	return edev->ops->common->nvm_flash(edev->cdev, flash->data);
+}
+
 static int qede_get_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
@@ -1806,6 +1814,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
 
 	.get_tunable = qede_get_tunable,
 	.set_tunable = qede_set_tunable,
+	.flash_device = qede_flash_device,
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {

From 981f1f803516666d3e02f1d707a7f4b0ebf41967 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 28 Mar 2018 11:18:25 +0100
Subject: [PATCH 1472/1678] sfp: allow cotsworks modules

Cotsworks modules fail the checksums - it appears that Cotsworks
reprograms the EEPROM at the end of production with the final product
information (serial, date code, and exact part number for module
options) and fails to update the checksum.

Work around this by detecting the Cotsworks name in the manufacturer
field, and reducing the checksum failures to warnings rather than a
hard error.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 83bf4959b043..4ab6e9a50bbe 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -560,6 +560,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 {
 	/* SFP module inserted - read I2C data */
 	struct sfp_eeprom_id id;
+	bool cotsworks;
 	u8 check;
 	int ret;
 
@@ -574,23 +575,43 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		return -EAGAIN;
 	}
 
+	/* Cotsworks do not seem to update the checksums when they
+	 * do the final programming with the final module part number,
+	 * serial number and date code.
+	 */
+	cotsworks = !memcmp(id.base.vendor_name, "COTSWORKS       ", 16);
+
 	/* Validate the checksum over the base structure */
 	check = sfp_check(&id.base, sizeof(id.base) - 1);
 	if (check != id.base.cc_base) {
-		dev_err(sfp->dev,
-			"EEPROM base structure checksum failure: 0x%02x\n",
-			check);
-		print_hex_dump(KERN_ERR, "sfp EE: ", DUMP_PREFIX_OFFSET,
-			       16, 1, &id, sizeof(id.base) - 1, true);
-		return -EINVAL;
+		if (cotsworks) {
+			dev_warn(sfp->dev,
+				 "EEPROM base structure checksum failure (0x%02x != 0x%02x)\n",
+				 check, id.base.cc_base);
+		} else {
+			dev_err(sfp->dev,
+				"EEPROM base structure checksum failure: 0x%02x != 0x%02x\n",
+				check, id.base.cc_base);
+			print_hex_dump(KERN_ERR, "sfp EE: ", DUMP_PREFIX_OFFSET,
+				       16, 1, &id, sizeof(id), true);
+			return -EINVAL;
+		}
 	}
 
 	check = sfp_check(&id.ext, sizeof(id.ext) - 1);
 	if (check != id.ext.cc_ext) {
-		dev_err(sfp->dev,
-			"EEPROM extended structure checksum failure: 0x%02x\n",
-			check);
-		memset(&id.ext, 0, sizeof(id.ext));
+		if (cotsworks) {
+			dev_warn(sfp->dev,
+				 "EEPROM extended structure checksum failure (0x%02x != 0x%02x)\n",
+				 check, id.ext.cc_ext);
+		} else {
+			dev_err(sfp->dev,
+				"EEPROM extended structure checksum failure: 0x%02x != 0x%02x\n",
+				check, id.ext.cc_ext);
+			print_hex_dump(KERN_ERR, "sfp EE: ", DUMP_PREFIX_OFFSET,
+				       16, 1, &id, sizeof(id), true);
+			memset(&id.ext, 0, sizeof(id.ext));
+		}
 	}
 
 	sfp->id = id;

From 99fe29d3a25f813146816b322367b4c6a0fdec84 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 28 Mar 2018 14:48:36 +0300
Subject: [PATCH 1473/1678] test_bpf: Fix NULL vs IS_ERR() check in
 test_skb_segment()

The skb_segment() function returns error pointers on error.  It never
returns NULL.

Fixes: 76db8087c4c9 ("net: bpf: add a test for skb_segment in test_bpf module")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index b2badf6b23cd..8e157806df7a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6649,7 +6649,7 @@ static __init int test_skb_segment(void)
 	}
 
 	segs = skb_segment(skb, features);
-	if (segs) {
+	if (!IS_ERR(segs)) {
 		kfree_skb_list(segs);
 		ret = 0;
 		pr_info("%s: success in skb_segment!", __func__);

From 335ab8ba617ec5bdc0c1595aae875304277e2495 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 28 Mar 2018 12:51:19 +0000
Subject: [PATCH 1474/1678] net: bcmgenet: return NULL instead of plain integer

Fixes the following sparse warning:

drivers/net/ethernet/broadcom/genet/bcmgenet.c:1351:16: warning:
 Using plain integer as NULL pointer

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 7db8edc643ec..2f4cb5c11e18 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1348,7 +1348,7 @@ static struct sk_buff *bcmgenet_free_tx_cb(struct device *dev,
 		dma_unmap_addr_set(cb, dma_addr, 0);
 	}
 
-	return 0;
+	return NULL;
 }
 
 /* Simple helper to free a receive control block's resources */

From 75498aa1aff5dfed0124557dfa0b6b2906b14fa6 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 28 Mar 2018 12:51:55 +0000
Subject: [PATCH 1475/1678] net: cavium: use module_pci_driver to simplify the
 code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/common/cavium_ptp.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.c b/drivers/net/ethernet/cavium/common/cavium_ptp.c
index d59497a7bdce..6aeb1045c302 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.c
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.c
@@ -336,18 +336,7 @@ static struct pci_driver cavium_ptp_driver = {
 	.remove = cavium_ptp_remove,
 };
 
-static int __init cavium_ptp_init_module(void)
-{
-	return pci_register_driver(&cavium_ptp_driver);
-}
-
-static void __exit cavium_ptp_cleanup_module(void)
-{
-	pci_unregister_driver(&cavium_ptp_driver);
-}
-
-module_init(cavium_ptp_init_module);
-module_exit(cavium_ptp_cleanup_module);
+module_pci_driver(cavium_ptp_driver);
 
 MODULE_DESCRIPTION(DRV_NAME);
 MODULE_AUTHOR("Cavium Networks <support@cavium.com>");

From 7ae665f132a62e67ccef1ef0994acba51abc2400 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Mar 2018 16:14:56 +0200
Subject: [PATCH 1476/1678] sctp: fix unused lable warning

The proc file cleanup left a label possibly unused:

net/sctp/protocol.c: In function 'sctp_defaults_init':
net/sctp/protocol.c:1304:1: error: label 'err_init_proc' defined but not used [-Werror=unused-label]

This adds an #ifdef around it to match the respective 'goto'.

Fixes: d47d08c8ca05 ("sctp: use proc_remove_subtree()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 84a09f599131..a24cde236330 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1258,8 +1258,10 @@ static int __net_init sctp_defaults_init(struct net *net)
 
 	return 0;
 
+#ifdef CONFIG_PROC_FS
 err_init_proc:
 	cleanup_sctp_mibs(net);
+#endif
 err_init_mibs:
 	sctp_sysctl_net_unregister(net);
 err_sysctl_register:

From 65f60e4582bd321f4df1433a22f717f18e60f721 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 28 Mar 2018 23:50:28 +0200
Subject: [PATCH 1477/1678] net: dsa: mv88e6xxx: Keep ATU/VTU violation
 statistics

Count the numbers of various ATU and VTU violation statistics and
return them as part of the ethtool -S statistics.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c        | 50 +++++++++++++++++++++----
 drivers/net/dsa/mv88e6xxx/chip.h        | 13 +++++--
 drivers/net/dsa/mv88e6xxx/global1_atu.c | 12 ++++--
 drivers/net/dsa/mv88e6xxx/global1_vtu.c |  8 ++--
 drivers/net/dsa/mv88e6xxx/serdes.c      | 15 +++++---
 drivers/net/dsa/mv88e6xxx/serdes.h      |  8 ++--
 6 files changed, 78 insertions(+), 28 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9a5d786b4885..3d2091099f7f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -723,6 +723,24 @@ static int mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
 					   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
 }
 
+static const uint8_t *mv88e6xxx_atu_vtu_stats_strings[] = {
+	"atu_member_violation",
+	"atu_miss_violation",
+	"atu_full_violation",
+	"vtu_member_violation",
+	"vtu_miss_violation",
+};
+
+static void mv88e6xxx_atu_vtu_get_strings(uint8_t *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(mv88e6xxx_atu_vtu_stats_strings); i++)
+		strlcpy(data + i * ETH_GSTRING_LEN,
+			mv88e6xxx_atu_vtu_stats_strings[i],
+			ETH_GSTRING_LEN);
+}
+
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 				  uint8_t *data)
 {
@@ -736,9 +754,12 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 
 	if (chip->info->ops->serdes_get_strings) {
 		data += count * ETH_GSTRING_LEN;
-		chip->info->ops->serdes_get_strings(chip, port, data);
+		count = chip->info->ops->serdes_get_strings(chip, port, data);
 	}
 
+	data += count * ETH_GSTRING_LEN;
+	mv88e6xxx_atu_vtu_get_strings(data);
+
 	mutex_unlock(&chip->reg_lock);
 }
 
@@ -783,10 +804,13 @@ static int mv88e6xxx_get_sset_count(struct dsa_switch *ds, int port)
 	if (chip->info->ops->serdes_get_sset_count)
 		serdes_count = chip->info->ops->serdes_get_sset_count(chip,
 								      port);
-	if (serdes_count < 0)
+	if (serdes_count < 0) {
 		count = serdes_count;
-	else
-		count += serdes_count;
+		goto out;
+	}
+	count += serdes_count;
+	count += ARRAY_SIZE(mv88e6xxx_atu_vtu_stats_strings);
+
 out:
 	mutex_unlock(&chip->reg_lock);
 
@@ -841,6 +865,16 @@ static int mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
 					 0);
 }
 
+static void mv88e6xxx_atu_vtu_get_stats(struct mv88e6xxx_chip *chip, int port,
+					uint64_t *data)
+{
+	*data++ = chip->ports[port].atu_member_violation;
+	*data++ = chip->ports[port].atu_miss_violation;
+	*data++ = chip->ports[port].atu_full_violation;
+	*data++ = chip->ports[port].vtu_member_violation;
+	*data++ = chip->ports[port].vtu_miss_violation;
+}
+
 static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
 				uint64_t *data)
 {
@@ -849,12 +883,14 @@ static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
 	if (chip->info->ops->stats_get_stats)
 		count = chip->info->ops->stats_get_stats(chip, port, data);
 
+	mutex_lock(&chip->reg_lock);
 	if (chip->info->ops->serdes_get_stats) {
 		data += count;
-		mutex_lock(&chip->reg_lock);
-		chip->info->ops->serdes_get_stats(chip, port, data);
-		mutex_unlock(&chip->reg_lock);
+		count = chip->info->ops->serdes_get_stats(chip, port, data);
 	}
+	data += count;
+	mv88e6xxx_atu_vtu_get_stats(chip, port, data);
+	mutex_unlock(&chip->reg_lock);
 }
 
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index bad211014e91..80490f66bc06 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -194,6 +194,11 @@ struct mv88e6xxx_port_hwtstamp {
 
 struct mv88e6xxx_port {
 	u64 serdes_stats[2];
+	u64 atu_member_violation;
+	u64 atu_miss_violation;
+	u64 atu_full_violation;
+	u64 vtu_member_violation;
+	u64 vtu_miss_violation;
 };
 
 struct mv88e6xxx_chip {
@@ -409,10 +414,10 @@ struct mv88e6xxx_ops {
 
 	/* Statistics from the SERDES interface */
 	int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
-	void (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
-				   uint8_t *data);
-	void (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
-				 uint64_t *data);
+	int (*serdes_get_strings)(struct mv88e6xxx_chip *chip,  int port,
+				  uint8_t *data);
+	int (*serdes_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+				uint64_t *data);
 
 	/* VLAN Translation Unit operations */
 	int (*vtu_getnext)(struct mv88e6xxx_chip *chip,
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
index 20d941f4273b..307410898fc9 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
@@ -336,8 +336,6 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 	if (err)
 		goto out;
 
-	mutex_unlock(&chip->reg_lock);
-
 	if (val & MV88E6XXX_G1_ATU_OP_AGE_OUT_VIOLATION) {
 		dev_err_ratelimited(chip->dev,
 				    "ATU age out violation for %pM\n",
@@ -348,17 +346,23 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id)
 		dev_err_ratelimited(chip->dev,
 				    "ATU member violation for %pM portvec %x\n",
 				    entry.mac, entry.portvec);
+		chip->ports[entry.portvec].atu_member_violation++;
 	}
 
-	if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION)
+	if (val & MV88E6XXX_G1_ATU_OP_MEMBER_VIOLATION) {
 		dev_err_ratelimited(chip->dev,
 				    "ATU miss violation for %pM portvec %x\n",
 				    entry.mac, entry.portvec);
+		chip->ports[entry.portvec].atu_miss_violation++;
+	}
 
-	if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION)
+	if (val & MV88E6XXX_G1_ATU_OP_FULL_VIOLATION) {
 		dev_err_ratelimited(chip->dev,
 				    "ATU full violation for %pM portvec %x\n",
 				    entry.mac, entry.portvec);
+		chip->ports[entry.portvec].atu_full_violation++;
+	}
+	mutex_unlock(&chip->reg_lock);
 
 	return IRQ_HANDLED;
 
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 7997961647de..2cbaf946e7ed 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -539,18 +539,20 @@ static irqreturn_t mv88e6xxx_g1_vtu_prob_irq_thread_fn(int irq, void *dev_id)
 	if (err)
 		goto out;
 
-	mutex_unlock(&chip->reg_lock);
-
 	spid = val & MV88E6XXX_G1_VTU_OP_SPID_MASK;
 
 	if (val & MV88E6XXX_G1_VTU_OP_MEMBER_VIOLATION) {
 		dev_err_ratelimited(chip->dev, "VTU member violation for vid %d, source port %d\n",
 				    entry.vid, spid);
+		chip->ports[spid].vtu_member_violation++;
 	}
 
-	if (val & MV88E6XXX_G1_VTU_OP_MISS_VIOLATION)
+	if (val & MV88E6XXX_G1_VTU_OP_MISS_VIOLATION) {
 		dev_err_ratelimited(chip->dev, "VTU miss violation for vid %d, source port %d\n",
 				    entry.vid, spid);
+		chip->ports[spid].vtu_miss_violation++;
+	}
+	mutex_unlock(&chip->reg_lock);
 
 	return IRQ_HANDLED;
 
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index b6166424216a..fb058fd35c0d 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -106,20 +106,21 @@ int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
 	return 0;
 }
 
-void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
-				  int port, uint8_t *data)
+int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				 int port, uint8_t *data)
 {
 	struct mv88e6352_serdes_hw_stat *stat;
 	int i;
 
 	if (!mv88e6352_port_has_serdes(chip, port))
-		return;
+		return 0;
 
 	for (i = 0; i < ARRAY_SIZE(mv88e6352_serdes_hw_stats); i++) {
 		stat = &mv88e6352_serdes_hw_stats[i];
 		memcpy(data + i * ETH_GSTRING_LEN, stat->string,
 		       ETH_GSTRING_LEN);
 	}
+	return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
 }
 
 static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
@@ -149,8 +150,8 @@ static uint64_t mv88e6352_serdes_get_stat(struct mv88e6xxx_chip *chip,
 	return val;
 }
 
-void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
-				uint64_t *data)
+int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+			       uint64_t *data)
 {
 	struct mv88e6xxx_port *mv88e6xxx_port = &chip->ports[port];
 	struct mv88e6352_serdes_hw_stat *stat;
@@ -158,7 +159,7 @@ void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 	int i;
 
 	if (!mv88e6352_port_has_serdes(chip, port))
-		return;
+		return 0;
 
 	BUILD_BUG_ON(ARRAY_SIZE(mv88e6352_serdes_hw_stats) >
 		     ARRAY_SIZE(mv88e6xxx_port->serdes_stats));
@@ -169,6 +170,8 @@ void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 		mv88e6xxx_port->serdes_stats[i] += value;
 		data[i] = mv88e6xxx_port->serdes_stats[i];
 	}
+
+	return ARRAY_SIZE(mv88e6352_serdes_hw_stats);
 }
 
 /* Set the power on/off for 10GBASE-R and 10GBASE-X4/X2 */
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 641baa75f910..1897c01c6e19 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -45,8 +45,8 @@
 int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, bool on);
 int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
-void mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
-				  int port, uint8_t *data);
-void mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
-				uint64_t *data);
+int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
+				 int port, uint8_t *data);
+int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+			       uint64_t *data);
 #endif

From 7f20d834ea14346c0abef63525ee40a7d2ba0de7 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 28 Mar 2018 23:50:29 +0200
Subject: [PATCH 1478/1678] net: dsa: mv88e6xxx: Make VTU miss violations less
 spammy

VTU miss violations can happen under normal conditions. Don't spam the
kernel log, downgrade the output to debug level only. The statistics
counter will indicate it is happening, if anybody not debugging is
interested.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global1_vtu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 2cbaf946e7ed..058326924f3e 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -548,10 +548,11 @@ static irqreturn_t mv88e6xxx_g1_vtu_prob_irq_thread_fn(int irq, void *dev_id)
 	}
 
 	if (val & MV88E6XXX_G1_VTU_OP_MISS_VIOLATION) {
-		dev_err_ratelimited(chip->dev, "VTU miss violation for vid %d, source port %d\n",
+		dev_dbg_ratelimited(chip->dev, "VTU miss violation for vid %d, source port %d\n",
 				    entry.vid, spid);
 		chip->ports[spid].vtu_miss_violation++;
 	}
+
 	mutex_unlock(&chip->reg_lock);
 
 	return IRQ_HANDLED;

From 8934ce2fd08171e8605f7fada91ee7619fe17ab8 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 28 Mar 2018 12:49:15 -0700
Subject: [PATCH 1479/1678] bpf: sockmap redirect ingress support

Add support for the BPF_F_INGRESS flag in sk_msg redirect helper.
To do this add a scatterlist ring for receiving socks to check
before calling into regular recvmsg call path. Additionally, because
the poll wakeup logic only checked the skb recv queue we need to
add a hook in TCP stack (similar to write side) so that we have
a way to wake up polling socks when a scatterlist is redirected
to that sock.

After this all that is needed is for the redirect helper to
push the scatterlist into the psock receive queue.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h |   1 +
 include/net/sock.h     |   1 +
 kernel/bpf/sockmap.c   | 198 ++++++++++++++++++++++++++++++++++++++++-
 net/core/filter.c      |   2 +-
 net/ipv4/tcp.c         |  10 ++-
 5 files changed, 207 insertions(+), 5 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c2f167db8bd5..961cc5d53956 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -521,6 +521,7 @@ struct sk_msg_buff {
 	__u32 key;
 	__u32 flags;
 	struct bpf_map *map;
+	struct list_head list;
 };
 
 /* Compute the linear packet data range [data, data_end) which
diff --git a/include/net/sock.h b/include/net/sock.h
index 709311132d4c..b8ff435fa96e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1085,6 +1085,7 @@ struct proto {
 #endif
 
 	bool			(*stream_memory_free)(const struct sock *sk);
+	bool			(*stream_memory_read)(const struct sock *sk);
 	/* Memory pressure */
 	void			(*enter_memory_pressure)(struct sock *sk);
 	void			(*leave_memory_pressure)(struct sock *sk);
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 69c5bccabd22..402e15466e9f 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,8 @@
 #include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
+#include <linux/ptr_ring.h>
+#include <net/inet_common.h>
 
 #define SOCK_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -82,6 +84,7 @@ struct smap_psock {
 	int sg_size;
 	int eval;
 	struct sk_msg_buff *cork;
+	struct list_head ingress;
 
 	struct strparser strp;
 	struct bpf_prog *bpf_tx_msg;
@@ -103,6 +106,8 @@ struct smap_psock {
 };
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			   int nonblock, int flags, int *addr_len);
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
@@ -112,6 +117,21 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
+static bool bpf_tcp_stream_read(const struct sock *sk)
+{
+	struct smap_psock *psock;
+	bool empty = true;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
+	empty = list_empty(&psock->ingress);
+out:
+	rcu_read_unlock();
+	return !empty;
+}
+
 static struct proto tcp_bpf_proto;
 static int bpf_tcp_init(struct sock *sk)
 {
@@ -135,6 +155,8 @@ static int bpf_tcp_init(struct sock *sk)
 	if (psock->bpf_tx_msg) {
 		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
 		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
+		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
 	}
 
 	sk->sk_prot = &tcp_bpf_proto;
@@ -170,6 +192,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
 	struct smap_psock_map_entry *e, *tmp;
+	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 	struct sock *osk;
 
@@ -188,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	close_fun = psock->save_close;
 
 	write_lock_bh(&sk->sk_callback_lock);
+	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
+		list_del(&md->list);
+		free_start_sg(psock->sock, md);
+		kfree(md);
+	}
+
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		osk = cmpxchg(e->entry, sk, NULL);
 		if (osk == sk) {
@@ -468,6 +497,72 @@ verdict:
 	return _rc;
 }
 
+static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
+			   struct smap_psock *psock,
+			   struct sk_msg_buff *md, int flags)
+{
+	bool apply = apply_bytes;
+	size_t size, copied = 0;
+	struct sk_msg_buff *r;
+	int err = 0, i;
+
+	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
+	if (unlikely(!r))
+		return -ENOMEM;
+
+	lock_sock(sk);
+	r->sg_start = md->sg_start;
+	i = md->sg_start;
+
+	do {
+		r->sg_data[i] = md->sg_data[i];
+
+		size = (apply && apply_bytes < md->sg_data[i].length) ?
+			apply_bytes : md->sg_data[i].length;
+
+		if (!sk_wmem_schedule(sk, size)) {
+			if (!copied)
+				err = -ENOMEM;
+			break;
+		}
+
+		sk_mem_charge(sk, size);
+		r->sg_data[i].length = size;
+		md->sg_data[i].length -= size;
+		md->sg_data[i].offset += size;
+		copied += size;
+
+		if (md->sg_data[i].length) {
+			get_page(sg_page(&r->sg_data[i]));
+			r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
+		} else {
+			i++;
+			if (i == MAX_SKB_FRAGS)
+				i = 0;
+			r->sg_end = i;
+		}
+
+		if (apply) {
+			apply_bytes -= size;
+			if (!apply_bytes)
+				break;
+		}
+	} while (i != md->sg_end);
+
+	md->sg_start = i;
+
+	if (!err) {
+		list_add_tail(&r->list, &psock->ingress);
+		sk->sk_data_ready(sk);
+	} else {
+		free_start_sg(sk, r);
+		kfree(r);
+	}
+
+	release_sock(sk);
+	return err;
+}
+
 static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 				       struct sk_msg_buff *md,
 				       int flags)
@@ -475,6 +570,7 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 	struct smap_psock *psock;
 	struct scatterlist *sg;
 	int i, err, free = 0;
+	bool ingress = !!(md->flags & BPF_F_INGRESS);
 
 	sg = md->sg_data;
 
@@ -487,9 +583,14 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 		goto out_rcu;
 
 	rcu_read_unlock();
-	lock_sock(sk);
-	err = bpf_tcp_push(sk, send, md, flags, false);
-	release_sock(sk);
+
+	if (ingress) {
+		err = bpf_tcp_ingress(sk, send, psock, md, flags);
+	} else {
+		lock_sock(sk);
+		err = bpf_tcp_push(sk, send, md, flags, false);
+		release_sock(sk);
+	}
 	smap_release_sock(psock, sk);
 	if (unlikely(err))
 		goto out;
@@ -623,6 +724,89 @@ out_err:
 	return err;
 }
 
+static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			   int nonblock, int flags, int *addr_len)
+{
+	struct iov_iter *iter = &msg->msg_iter;
+	struct smap_psock *psock;
+	int copied = 0;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
+
+	if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
+		goto out;
+	rcu_read_unlock();
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+	lock_sock(sk);
+	while (copied != len) {
+		struct scatterlist *sg;
+		struct sk_msg_buff *md;
+		int i;
+
+		md = list_first_entry_or_null(&psock->ingress,
+					      struct sk_msg_buff, list);
+		if (unlikely(!md))
+			break;
+		i = md->sg_start;
+		do {
+			struct page *page;
+			int n, copy;
+
+			sg = &md->sg_data[i];
+			copy = sg->length;
+			page = sg_page(sg);
+
+			if (copied + copy > len)
+				copy = len - copied;
+
+			n = copy_page_to_iter(page, sg->offset, copy, iter);
+			if (n != copy) {
+				md->sg_start = i;
+				release_sock(sk);
+				smap_release_sock(psock, sk);
+				return -EFAULT;
+			}
+
+			copied += copy;
+			sg->offset += copy;
+			sg->length -= copy;
+			sk_mem_uncharge(sk, copy);
+
+			if (!sg->length) {
+				i++;
+				if (i == MAX_SKB_FRAGS)
+					i = 0;
+				put_page(page);
+			}
+			if (copied == len)
+				break;
+		} while (i != md->sg_end);
+		md->sg_start = i;
+
+		if (!sg->length && md->sg_start == md->sg_end) {
+			list_del(&md->list);
+			kfree(md);
+		}
+	}
+
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied;
+out:
+	rcu_read_unlock();
+	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+}
+
+
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
@@ -1107,6 +1291,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab)
 static void smap_gc_work(struct work_struct *w)
 {
 	struct smap_psock_map_entry *e, *tmp;
+	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 
 	psock = container_of(w, struct smap_psock, gc_work);
@@ -1131,6 +1316,12 @@ static void smap_gc_work(struct work_struct *w)
 		kfree(psock->cork);
 	}
 
+	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
+		list_del(&md->list);
+		free_start_sg(psock->sock, md);
+		kfree(md);
+	}
+
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		list_del(&e->list);
 		kfree(e);
@@ -1160,6 +1351,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 	INIT_WORK(&psock->tx_work, smap_tx_work);
 	INIT_WORK(&psock->gc_work, smap_gc_work);
 	INIT_LIST_HEAD(&psock->maps);
+	INIT_LIST_HEAD(&psock->ingress);
 	refcount_set(&psock->refcnt, 1);
 
 	rcu_assign_sk_user_data(sock, psock);
diff --git a/net/core/filter.c b/net/core/filter.c
index afd825534ac4..a5a995e5b380 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1894,7 +1894,7 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
 	   struct bpf_map *, map, u32, key, u64, flags)
 {
 	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags))
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return SK_DROP;
 
 	msg->key = key;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0c31be306572..bccc4c270087 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
 	}
 }
 
+static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+					  int target, struct sock *sk)
+{
+	return (tp->rcv_nxt - tp->copied_seq >= target) ||
+		(sk->sk_prot->stream_memory_read ?
+		sk->sk_prot->stream_memory_read(sk) : false);
+}
+
 /*
  *	Wait for a TCP event.
  *
@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		    tp->urg_data)
 			target++;
 
-		if (tp->rcv_nxt - tp->copied_seq >= target)
+		if (tcp_stream_is_readable(tp, target, sk))
 			mask |= EPOLLIN | EPOLLRDNORM;
 
 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {

From 2596f64cb2ff5767166bee4e812734747c7a0474 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 28 Mar 2018 12:49:20 -0700
Subject: [PATCH 1480/1678] bpf: sockmap, add BPF_F_INGRESS tests

Add a set of tests to verify ingress flag in redirect helpers
works correctly with various msg sizes.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_kern.c  | 41 +++++++++++++++++++++++++--------
 samples/sockmap/sockmap_test.sh | 22 +++++++++++++++++-
 samples/sockmap/sockmap_user.c  | 35 ++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 9ad5ba79c85a..ca2872260e39 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
 	.type = BPF_MAP_TYPE_SOCKMAP,
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.max_entries = 1,
+	.max_entries = 20,
 };
 
 struct bpf_map_def SEC("maps") sock_apply_bytes = {
@@ -78,6 +78,13 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = {
 	.max_entries = 2
 };
 
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
 
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
@@ -197,8 +204,9 @@ int bpf_prog5(struct sk_msg_md *msg)
 SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0, one = 1;
-	int *start, *end;
+	int *bytes, zero = 0, one = 1, key = 0;
+	int *start, *end, *f;
+	__u64 flags = 0;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -210,15 +218,22 @@ int bpf_prog6(struct sk_msg_md *msg)
 	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
 	if (start && end)
 		bpf_msg_pull_data(msg, *start, *end, 0);
-	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
 }
 
 SEC("sk_msg4")
 int bpf_prog7(struct sk_msg_md *msg)
 {
-	int err1 = 0, err2 = 0, zero = 0, one = 1;
-	int *bytes, *start, *end, len1, len2;
+	int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+	int *f, *bytes, *start, *end, len1, len2;
+	__u64 flags = 0;
 
+		int err;
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
 		err1 = bpf_msg_apply_bytes(msg, *bytes);
@@ -229,7 +244,6 @@ int bpf_prog7(struct sk_msg_md *msg)
 	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
 	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
 	if (start && end) {
-		int err;
 
 		bpf_printk("sk_msg2: pull(%i:%i)\n",
 			   start ? *start : 0, end ? *end : 0);
@@ -241,9 +255,16 @@ int bpf_prog7(struct sk_msg_md *msg)
 		bpf_printk("sk_msg2: length update %i->%i\n",
 			   len1, len2);
 	}
-	bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
-		   len1, err1, err2);
-	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+		   len1, flags, err1 ? err1 : err2);
+	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+	bpf_printk("sk_msg3: err %i\n", err);
+	return err;
 }
 
 SEC("sk_msg5")
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
index 6d8cc40cca22..13b205fdb0f0 100755
--- a/samples/sockmap/sockmap_test.sh
+++ b/samples/sockmap/sockmap_test.sh
@@ -1,5 +1,5 @@
 #Test a bunch of positive cases to verify basic functionality
-for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for prog in "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
 for t in "sendmsg" "sendpage"; do
 for r in 1 10 100; do
 	for i in 1 10 100; do
@@ -100,6 +100,16 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
 # Test apply and redirect with larger value than send
 r=1
 i=8
@@ -113,6 +123,16 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
 # Test apply and redirect with apply that never reaches limit
 r=1024
 i=1
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 07aa237221d1..f7503f44b209 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -64,6 +64,7 @@ int txmsg_apply;
 int txmsg_cork;
 int txmsg_start;
 int txmsg_end;
+int txmsg_ingress;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -83,6 +84,7 @@ static const struct option long_options[] = {
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
 	{"txmsg_start", required_argument,	NULL, 's'},
 	{"txmsg_end",	required_argument,	NULL, 'e'},
+	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
 	{0, 0, NULL, 0 }
 };
 
@@ -793,6 +795,39 @@ run:
 				return err;
 			}
 		}
+
+		if (txmsg_ingress) {
+			int in = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+			i = 1;
+			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 2;
+			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
 	}
 
 	if (txmsg_drop)

From fa246693a111fab32bd51d20f07a347e42773ee9 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 28 Mar 2018 12:49:25 -0700
Subject: [PATCH 1481/1678] bpf: sockmap, BPF_F_INGRESS flag for
 BPF_SK_SKB_STREAM_VERDICT:

Add support for the BPF_F_INGRESS flag in skb redirect helper. To
do this convert skb into a scatterlist and push into ingress queue.
This is the same logic that is used in the sk_msg redirect helper
so it should feel familiar.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h |  1 +
 kernel/bpf/sockmap.c   | 92 ++++++++++++++++++++++++++++++++++--------
 net/core/filter.c      |  2 +-
 3 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 961cc5d53956..897ff3d95968 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -521,6 +521,7 @@ struct sk_msg_buff {
 	__u32 key;
 	__u32 flags;
 	struct bpf_map *map;
+	struct sk_buff *skb;
 	struct list_head list;
 };
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 402e15466e9f..9192fdbcbccc 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -785,7 +785,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 				i++;
 				if (i == MAX_SKB_FRAGS)
 					i = 0;
-				put_page(page);
+				if (!md->skb)
+					put_page(page);
 			}
 			if (copied == len)
 				break;
@@ -794,6 +795,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 		if (!sg->length && md->sg_start == md->sg_end) {
 			list_del(&md->list);
+			if (md->skb)
+				consume_skb(md->skb);
 			kfree(md);
 		}
 	}
@@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 		__SK_DROP;
 }
 
+static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk = psock->sock;
+	int copied = 0, num_sg;
+	struct sk_msg_buff *r;
+
+	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
+	if (unlikely(!r))
+		return -EAGAIN;
+
+	if (!sk_rmem_schedule(sk, skb, skb->len)) {
+		kfree(r);
+		return -EAGAIN;
+	}
+
+	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
+	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
+	if (unlikely(num_sg < 0)) {
+		kfree(r);
+		return num_sg;
+	}
+	sk_mem_charge(sk, skb->len);
+	copied = skb->len;
+	r->sg_start = 0;
+	r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
+	r->skb = skb;
+	list_add_tail(&r->list, &psock->ingress);
+	sk->sk_data_ready(sk);
+	return copied;
+}
+
 static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 {
+	struct smap_psock *peer;
 	struct sock *sk;
+	__u32 in;
 	int rc;
 
 	rc = smap_verdict_func(psock, skb);
 	switch (rc) {
 	case __SK_REDIRECT:
 		sk = do_sk_redirect_map(skb);
-		if (likely(sk)) {
-			struct smap_psock *peer = smap_psock_sk(sk);
+		if (!sk) {
+			kfree_skb(skb);
+			break;
+		}
 
-			if (likely(peer &&
-				   test_bit(SMAP_TX_RUNNING, &peer->state) &&
-				   !sock_flag(sk, SOCK_DEAD) &&
-				   sock_writeable(sk))) {
-				skb_set_owner_w(skb, sk);
-				skb_queue_tail(&peer->rxqueue, skb);
-				schedule_work(&peer->tx_work);
-				break;
-			}
+		peer = smap_psock_sk(sk);
+		in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
+
+		if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
+			     !test_bit(SMAP_TX_RUNNING, &peer->state))) {
+			kfree_skb(skb);
+			break;
+		}
+
+		if (!in && sock_writeable(sk)) {
+			skb_set_owner_w(skb, sk);
+			skb_queue_tail(&peer->rxqueue, skb);
+			schedule_work(&peer->tx_work);
+			break;
+		} else if (in &&
+			   atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
+			skb_queue_tail(&peer->rxqueue, skb);
+			schedule_work(&peer->tx_work);
+			break;
 		}
 	/* Fall through and free skb otherwise */
 	case __SK_DROP:
@@ -1127,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w)
 	}
 
 	while ((skb = skb_dequeue(&psock->rxqueue))) {
+		__u32 flags;
+
 		rem = skb->len;
 		off = 0;
 start:
+		flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
 		do {
-			if (likely(psock->sock->sk_socket))
-				n = skb_send_sock_locked(psock->sock,
-							 skb, off, rem);
-			else
+			if (likely(psock->sock->sk_socket)) {
+				if (flags)
+					n = smap_do_ingress(psock, skb);
+				else
+					n = skb_send_sock_locked(psock->sock,
+								 skb, off, rem);
+			} else {
 				n = -EINVAL;
+			}
+
 			if (n <= 0) {
 				if (n == -EAGAIN) {
 					/* Retry when space is available */
@@ -1153,7 +1209,9 @@ start:
 			rem -= n;
 			off += n;
 		} while (rem);
-		kfree_skb(skb);
+
+		if (!flags)
+			kfree_skb(skb);
 	}
 out:
 	release_sock(psock->sock);
diff --git a/net/core/filter.c b/net/core/filter.c
index a5a995e5b380..e989bf313195 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags))
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return SK_DROP;
 
 	tcb->bpf.key = key;

From 2e3f6c5ff253b545b3916300d02871fcc101d4a7 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 28 Mar 2018 12:49:30 -0700
Subject: [PATCH 1482/1678] bpf: sockmap, more BPF_SK_SKB_STREAM_VERDICT tests

Add BPF_SK_SKB_STREAM_VERDICT tests for ingress hook. While
we do this also bring stream tests in-line with MSG based
testing.

A map for skb options is added for userland to push options
at BPF programs.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/sockmap/sockmap_kern.c  | 21 ++++++++++++++++++---
 samples/sockmap/sockmap_test.sh | 20 +++++++++++++++++++-
 samples/sockmap/sockmap_user.c  | 23 +++++++++++++++++++++++
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index ca2872260e39..9ff8bc5dc206 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -85,6 +85,12 @@ struct bpf_map_def SEC("maps") sock_redir_flags = {
 	.max_entries = 1
 };
 
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
 
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
@@ -97,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
 {
 	__u32 lport = skb->local_port;
 	__u32 rport = skb->remote_port;
-	int ret = 0;
+	int len, *f, ret, zero = 0;
+	__u64 flags = 0;
 
 	if (lport == 10000)
 		ret = 10;
 	else
 		ret = 1;
 
-	bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
-	return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
+	len = (__u32)skb->data_end - (__u32)skb->data;
+	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+	if (f && *f) {
+		ret = 3;
+		flags = *f;
+	}
+
+	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+		   len, flags);
+	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
 }
 
 SEC("sockops")
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
index 13b205fdb0f0..ace75f070eb8 100755
--- a/samples/sockmap/sockmap_test.sh
+++ b/samples/sockmap/sockmap_test.sh
@@ -1,5 +1,5 @@
 #Test a bunch of positive cases to verify basic functionality
-for prog in "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
+for prog in  "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
 for t in "sendmsg" "sendpage"; do
 for r in 1 10 100; do
 	for i in 1 10 100; do
@@ -109,6 +109,15 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
 
 # Test apply and redirect with larger value than send
 r=1
@@ -132,6 +141,15 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
 
 # Test apply and redirect with apply that never reaches limit
 r=1024
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index f7503f44b209..6f2334912283 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -65,6 +65,7 @@ int txmsg_cork;
 int txmsg_start;
 int txmsg_end;
 int txmsg_ingress;
+int txmsg_skb;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -85,6 +86,7 @@ static const struct option long_options[] = {
 	{"txmsg_start", required_argument,	NULL, 's'},
 	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
+	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
 	{0, 0, NULL, 0 }
 };
 
@@ -828,6 +830,27 @@ run:
 					err, strerror(errno));
 			}
 		}
+
+		if (txmsg_skb) {
+			int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 3;
+			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
 	}
 
 	if (txmsg_drop)

From 39c202d228c3da5a5531be847e9b06cc9b787f31 Mon Sep 17 00:00:00 2001
From: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Date: Wed, 21 Mar 2018 15:42:15 +1300
Subject: [PATCH 1483/1678] netfilter: ebtables: Add support for specifying
 match revision

Currently ebtables assumes that the revision number of all match
modules is 0, which is an issue when trying to use existing
xtables matches with ebtables. The solution is to modify ebtables
to allow extensions to specify a revision number, similar to
iptables. This gets passed down to the kernel, which is then able
to find the match module correctly.

To main binary backwards compatibility, the size of the ebt_entry
structures is not changed, only the size of the name field is
decreased by 1 byte to make room for the revision field.

Signed-off-by: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 .../uapi/linux/netfilter_bridge/ebtables.h    | 16 +++++--
 net/bridge/netfilter/ebtables.c               | 47 ++++++++++++-------
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 9ff57c0a0199..0c7dc8315013 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -20,6 +20,7 @@
 #define EBT_TABLE_MAXNAMELEN 32
 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN
 #define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN
+#define EBT_EXTENSION_MAXNAMELEN 31
 
 /* verdicts >0 are "branches" */
 #define EBT_ACCEPT   -1
@@ -120,7 +121,10 @@ struct ebt_entries {
 
 struct ebt_entry_match {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			uint8_t revision;
+		};
 		struct xt_match *match;
 	} u;
 	/* size of data */
@@ -130,7 +134,10 @@ struct ebt_entry_match {
 
 struct ebt_entry_watcher {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			uint8_t revision;
+		};
 		struct xt_target *watcher;
 	} u;
 	/* size of data */
@@ -140,7 +147,10 @@ struct ebt_entry_watcher {
 
 struct ebt_entry_target {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			uint8_t revision;
+		};
 		struct xt_target *target;
 	} u;
 	/* size of data */
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9a26d2b7420f..a8cb543e3296 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -356,12 +356,12 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 	    left - sizeof(struct ebt_entry_match) < m->match_size)
 		return -EINVAL;
 
-	match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+	match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
 	if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
 		if (!IS_ERR(match))
 			module_put(match->me);
 		request_module("ebt_%s", m->u.name);
-		match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
+		match = xt_find_match(NFPROTO_BRIDGE, m->u.name, m->u.revision);
 	}
 	if (IS_ERR(match))
 		return PTR_ERR(match);
@@ -1350,16 +1350,17 @@ static int update_counters(struct net *net, const void __user *user,
 
 static inline int ebt_obj_to_user(char __user *um, const char *_name,
 				  const char *data, int entrysize,
-				  int usersize, int datasize)
+				  int usersize, int datasize, u8 revision)
 {
-	char name[EBT_FUNCTION_MAXNAMELEN] = {0};
+	char name[EBT_EXTENSION_MAXNAMELEN] = {0};
 
-	/* ebtables expects 32 bytes long names but xt_match names are 29 bytes
+	/* ebtables expects 31 bytes long names but xt_match names are 29 bytes
 	 * long. Copy 29 bytes and fill remaining bytes with zeroes.
 	 */
 	strlcpy(name, _name, sizeof(name));
-	if (copy_to_user(um, name, EBT_FUNCTION_MAXNAMELEN) ||
-	    put_user(datasize, (int __user *)(um + EBT_FUNCTION_MAXNAMELEN)) ||
+	if (copy_to_user(um, name, EBT_EXTENSION_MAXNAMELEN) ||
+	    put_user(revision, (u8 __user *)(um + EBT_EXTENSION_MAXNAMELEN)) ||
+	    put_user(datasize, (int __user *)(um + EBT_EXTENSION_MAXNAMELEN + 1)) ||
 	    xt_data_to_user(um + entrysize, data, usersize, datasize,
 			    XT_ALIGN(datasize)))
 		return -EFAULT;
@@ -1372,7 +1373,8 @@ static inline int ebt_match_to_user(const struct ebt_entry_match *m,
 {
 	return ebt_obj_to_user(ubase + ((char *)m - base),
 			       m->u.match->name, m->data, sizeof(*m),
-			       m->u.match->usersize, m->match_size);
+			       m->u.match->usersize, m->match_size,
+			       m->u.match->revision);
 }
 
 static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
@@ -1380,7 +1382,8 @@ static inline int ebt_watcher_to_user(const struct ebt_entry_watcher *w,
 {
 	return ebt_obj_to_user(ubase + ((char *)w - base),
 			       w->u.watcher->name, w->data, sizeof(*w),
-			       w->u.watcher->usersize, w->watcher_size);
+			       w->u.watcher->usersize, w->watcher_size,
+			       w->u.watcher->revision);
 }
 
 static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
@@ -1411,7 +1414,8 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
 	if (ret != 0)
 		return ret;
 	ret = ebt_obj_to_user(hlp, t->u.target->name, t->data, sizeof(*t),
-			      t->u.target->usersize, t->target_size);
+			      t->u.target->usersize, t->target_size,
+			      t->u.target->revision);
 	if (ret != 0)
 		return ret;
 
@@ -1599,7 +1603,10 @@ struct compat_ebt_replace {
 /* struct ebt_entry_match, _target and _watcher have same layout */
 struct compat_ebt_entry_mwt {
 	union {
-		char name[EBT_FUNCTION_MAXNAMELEN];
+		struct {
+			char name[EBT_EXTENSION_MAXNAMELEN];
+			u8 revision;
+		};
 		compat_uptr_t ptr;
 	} u;
 	compat_uint_t match_size;
@@ -1638,8 +1645,9 @@ static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr,
 
 	BUG_ON(off >= m->match_size);
 
-	if (copy_to_user(cm->u.name, match->name,
-	    strlen(match->name) + 1) || put_user(msize, &cm->match_size))
+	if (copy_to_user(cm->u.name, match->name, strlen(match->name) + 1) ||
+	    put_user(match->revision, &cm->u.revision) ||
+	    put_user(msize, &cm->match_size))
 		return -EFAULT;
 
 	if (match->compat_to_user) {
@@ -1668,8 +1676,9 @@ static int compat_target_to_user(struct ebt_entry_target *t,
 
 	BUG_ON(off >= t->target_size);
 
-	if (copy_to_user(cm->u.name, target->name,
-	    strlen(target->name) + 1) || put_user(tsize, &cm->match_size))
+	if (copy_to_user(cm->u.name, target->name, strlen(target->name) + 1) ||
+	    put_user(target->revision, &cm->u.revision) ||
+	    put_user(tsize, &cm->match_size))
 		return -EFAULT;
 
 	if (target->compat_to_user) {
@@ -1933,7 +1942,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 				struct ebt_entries_buf_state *state,
 				const unsigned char *base)
 {
-	char name[EBT_FUNCTION_MAXNAMELEN];
+	char name[EBT_EXTENSION_MAXNAMELEN];
 	struct xt_match *match;
 	struct xt_target *wt;
 	void *dst = NULL;
@@ -1947,7 +1956,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 
 	switch (compat_mwt) {
 	case EBT_COMPAT_MATCH:
-		match = xt_request_find_match(NFPROTO_BRIDGE, name, 0);
+		match = xt_request_find_match(NFPROTO_BRIDGE, name,
+					      mwt->u.revision);
 		if (IS_ERR(match))
 			return PTR_ERR(match);
 
@@ -1966,7 +1976,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 		break;
 	case EBT_COMPAT_WATCHER: /* fallthrough */
 	case EBT_COMPAT_TARGET:
-		wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0);
+		wt = xt_request_find_target(NFPROTO_BRIDGE, name,
+					    mwt->u.revision);
 		if (IS_ERR(wt))
 			return PTR_ERR(wt);
 		off = xt_compat_target_offset(wt);

From 1be3ac98444066a292de02c2c12e203bdf575e7d Mon Sep 17 00:00:00 2001
From: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Date: Wed, 21 Mar 2018 15:42:16 +1300
Subject: [PATCH 1484/1678] netfilter: ebtables: Add string filter

This patch is part of a proposal to add a string filter to
ebtables, which would be similar to the string filter in
iptables. Like iptables, the ebtables filter uses the xt_string
module.

Signed-off-by: Bernie Harris <bernie.harris@alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_string.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 423293ee57c2..be1feddadcf0 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -21,6 +21,7 @@ MODULE_DESCRIPTION("Xtables: string-based matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
+MODULE_ALIAS("ebt_string");
 
 static bool
 string_mt(const struct sk_buff *skb, struct xt_action_param *par)

From 19b351f16fd940092f2daa75773b0320f0785de9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 28 Mar 2018 15:00:43 +0200
Subject: [PATCH 1485/1678] netfilter: add flowtable documentation

This patch adds initial documentation for the Netfilter flowtable
infrastructure.

Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 Documentation/networking/nf_flowtable.txt | 112 ++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 Documentation/networking/nf_flowtable.txt

diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt
new file mode 100644
index 000000000000..54128c50d508
--- /dev/null
+++ b/Documentation/networking/nf_flowtable.txt
@@ -0,0 +1,112 @@
+Netfilter's flowtable infrastructure
+====================================
+
+This documentation describes the software flowtable infrastructure available in
+Netfilter since Linux kernel 4.16.
+
+Overview
+--------
+
+Initial packets follow the classic forwarding path, once the flow enters the
+established state according to the conntrack semantics (ie. we have seen traffic
+in both directions), then you can decide to offload the flow to the flowtable
+from the forward chain via the 'flow offload' action available in nftables.
+
+Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
+output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
+path (the visible effect is that you do not see these packets from any of the
+netfilter hooks coming after the ingress). In case of flowtable miss, the packet
+follows the classic forward path.
+
+The flowtable uses a resizable hashtable, lookups are based on the following
+7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
+and destination ports and the input interface (useful in case there are several
+conntrack zones in place).
+
+Flowtables are populated via the 'flow offload' nftables action, so the user can
+selectively specify what flows are placed into the flow table. Hence, packets
+follow the classic forwarding path unless the user explicitly instruct packets
+to use this new alternative forwarding path via nftables policy.
+
+This is represented in Fig.1, which describes the classic forwarding path
+including the Netfilter hooks and the flowtable fastpath bypass.
+
+                                         userspace process
+                                          ^              |
+                                          |              |
+                                     _____|____     ____\/___
+                                    /          \   /         \
+                                    |   input   |  |  output  |
+                                    \__________/   \_________/
+                                         ^               |
+                                         |               |
+      _________      __________      ---------     _____\/_____
+     /         \    /          \     |Routing |   /            \
+  -->  ingress  ---> prerouting ---> |decision|   | postrouting |--> neigh_xmit
+     \_________/    \__________/     ----------   \____________/          ^
+       |      ^          |               |               ^                |
+   flowtable  |          |          ____\/___            |                |
+       |      |          |         /         \           |                |
+    __\/___   |          --------->| forward |------------                |
+    |-----|   |                    \_________/                            |
+    |-----|   |                 'flow offload' rule                       |
+    |-----|   |                   adds entry to                           |
+    |_____|   |                     flowtable                             |
+       |      |                                                           |
+      / \     |                                                           |
+     /hit\_no_|                                                           |
+     \ ? /                                                                |
+      \ /                                                                 |
+       |__yes_________________fastpath bypass ____________________________|
+
+               Fig.1 Netfilter hooks and flowtable interactions
+
+The flowtable entry also stores the NAT configuration, so all packets are
+mangled according to the NAT policy that matches the initial packets that went
+through the classic forwarding path. The TTL is decremented before calling
+neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
+path given that the transport selectors are missing, therefore flowtable lookup
+is not possible.
+
+Example configuration
+---------------------
+
+Enabling the flowtable bypass is relatively easy, you only need to create a
+flowtable and add one rule to your forward chain.
+
+        table inet x {
+		flowtable f {
+			hook ingress priority 0 devices = { eth0, eth1 };
+		}
+                chain y {
+                        type filter hook forward priority 0; policy accept;
+                        ip protocol tcp flow offload @f
+                        counter packets 0 bytes 0
+                }
+        }
+
+This example adds the flowtable 'f' to the ingress hook of the eth0 and eth1
+netdevices. You can create as many flowtables as you want in case you need to
+perform resource partitioning. The flowtable priority defines the order in which
+hooks are run in the pipeline, this is convenient in case you already have a
+nftables ingress chain (make sure the flowtable priority is smaller than the
+nftables ingress chain hence the flowtable runs before in the pipeline).
+
+The 'flow offload' action from the forward chain 'y' adds an entry to the
+flowtable for the TCP syn-ack packet coming in the reply direction. Once the
+flow is offloaded, you will observe that the counter rule in the example above
+does not get updated for the packets that are being forwarded through the
+forwarding bypass.
+
+More reading
+------------
+
+This documentation is based on the LWN.net articles [1][2]. Rafal Milecki also
+made a very complete and comprehensive summary called "A state of network
+acceleration" that describes how things were before this infrastructure was
+mailined [3] and it also makes a rough summary of this work [4].
+
+[1] https://lwn.net/Articles/738214/
+[2] https://lwn.net/Articles/742164/
+[3] http://lists.infradead.org/pipermail/lede-dev/2018-January/010830.html
+[4] http://lists.infradead.org/pipermail/lede-dev/2018-January/010829.html

From 9124a20d8794663a396b5d6f91f66903848a042b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 21 Mar 2018 04:03:22 -0700
Subject: [PATCH 1486/1678] netfilter: ebt_stp: Use generic functions for
 comparisons

Instead of unnecessary const declarations, use the generic functions to
save a little object space.

$ size net/bridge/netfilter/ebt_stp.o*
   text	   data	    bss	    dec	    hex	filename
   1250	    144	      0	   1394	    572	net/bridge/netfilter/ebt_stp.o.new
   1344	    144	      0	   1488	    5d0	net/bridge/netfilter/ebt_stp.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebt_stp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3140eb912d7e..47ba98db145d 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,8 +153,6 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
-	const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
-	const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	const struct ebt_entry *e = par->entryinfo;
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
@@ -162,8 +160,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 	/* Make sure the match only receives stp frames */
 	if (!par->nft_compat &&
-	    (!ether_addr_equal(e->destmac, bridge_ula) ||
-	     !ether_addr_equal(e->destmsk, msk) ||
+	    (!ether_addr_equal(e->destmac, eth_stp_addr) ||
+	     !is_broadcast_ether_addr(e->destmsk) ||
 	     !(e->bitmask & EBT_DESTMAC)))
 		return -EINVAL;
 

From 32537e91847a5686d57d3811c075a46b2d9b6434 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Mar 2018 11:53:05 +0200
Subject: [PATCH 1487/1678] netfilter: nf_tables: rename struct nf_chain_type

Use nft_ prefix. By when I added chain types, I forgot to use the
nftables prefix. Rename enum nft_chain_type to enum nft_chain_types too,
otherwise there is an overlap.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h         | 16 ++++++++--------
 net/bridge/netfilter/nf_tables_bridge.c   |  2 +-
 net/ipv4/netfilter/nf_tables_arp.c        |  2 +-
 net/ipv4/netfilter/nf_tables_ipv4.c       |  2 +-
 net/ipv4/netfilter/nft_chain_nat_ipv4.c   |  2 +-
 net/ipv4/netfilter/nft_chain_route_ipv4.c |  2 +-
 net/ipv6/netfilter/nf_tables_ipv6.c       |  2 +-
 net/ipv6/netfilter/nft_chain_nat_ipv6.c   |  2 +-
 net/ipv6/netfilter/nft_chain_route_ipv6.c |  2 +-
 net/netfilter/nf_tables_api.c             | 18 +++++++++---------
 net/netfilter/nf_tables_inet.c            |  2 +-
 net/netfilter/nf_tables_netdev.c          |  2 +-
 12 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 663b015dace5..4a304997c304 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -868,7 +868,7 @@ struct nft_chain {
 	char				*name;
 };
 
-enum nft_chain_type {
+enum nft_chain_types {
 	NFT_CHAIN_T_DEFAULT = 0,
 	NFT_CHAIN_T_ROUTE,
 	NFT_CHAIN_T_NAT,
@@ -876,7 +876,7 @@ enum nft_chain_type {
 };
 
 /**
- * 	struct nf_chain_type - nf_tables chain type info
+ * 	struct nft_chain_type - nf_tables chain type info
  *
  * 	@name: name of the type
  * 	@type: numeric identifier
@@ -885,9 +885,9 @@ enum nft_chain_type {
  * 	@hook_mask: mask of valid hooks
  * 	@hooks: array of hook functions
  */
-struct nf_chain_type {
+struct nft_chain_type {
 	const char			*name;
-	enum nft_chain_type		type;
+	enum nft_chain_types		type;
 	int				family;
 	struct module			*owner;
 	unsigned int			hook_mask;
@@ -895,7 +895,7 @@ struct nf_chain_type {
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
-				  enum nft_chain_type type);
+				  enum nft_chain_types type);
 int nft_chain_validate_hooks(const struct nft_chain *chain,
                              unsigned int hook_flags);
 
@@ -917,7 +917,7 @@ struct nft_stats {
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops;
-	const struct nf_chain_type	*type;
+	const struct nft_chain_type	*type;
 	u8				policy;
 	u8				flags;
 	struct nft_stats __percpu	*stats;
@@ -970,8 +970,8 @@ struct nft_table {
 	char				*name;
 };
 
-int nft_register_chain_type(const struct nf_chain_type *);
-void nft_unregister_chain_type(const struct nf_chain_type *);
+int nft_register_chain_type(const struct nft_chain_type *);
+void nft_unregister_chain_type(const struct nft_chain_type *);
 
 int nft_register_expr(struct nft_expr_type *);
 void nft_unregister_expr(struct nft_expr_type *);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 5160cf614176..73a1ec556a0a 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -42,7 +42,7 @@ nft_do_chain_bridge(void *priv,
 	return nft_do_chain(&pkt, priv);
 }
 
-static const struct nf_chain_type filter_bridge = {
+static const struct nft_chain_type filter_bridge = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_BRIDGE,
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 036c074736b0..5b0be2a10b69 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -27,7 +27,7 @@ nft_do_chain_arp(void *priv,
 	return nft_do_chain(&pkt, priv);
 }
 
-static const struct nf_chain_type filter_arp = {
+static const struct nft_chain_type filter_arp = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_ARP,
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 96f955496d5f..13bae5cfa257 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -30,7 +30,7 @@ static unsigned int nft_do_chain_ipv4(void *priv,
 	return nft_do_chain(&pkt, priv);
 }
 
-static const struct nf_chain_type filter_ipv4 = {
+static const struct nft_chain_type filter_ipv4 = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index f2a490981594..167f377eb1cb 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -67,7 +67,7 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv,
 	return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
-static const struct nf_chain_type nft_chain_nat_ipv4 = {
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
 	.name		= "nat",
 	.type		= NFT_CHAIN_T_NAT,
 	.family		= NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index d965c225b9f6..48cf1f892314 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -58,7 +58,7 @@ static unsigned int nf_route_table_hook(void *priv,
 	return ret;
 }
 
-static const struct nf_chain_type nft_chain_route_ipv4 = {
+static const struct nft_chain_type nft_chain_route_ipv4 = {
 	.name		= "route",
 	.type		= NFT_CHAIN_T_ROUTE,
 	.family		= NFPROTO_IPV4,
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 17e03589331c..d99f9ac6f1b6 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -28,7 +28,7 @@ static unsigned int nft_do_chain_ipv6(void *priv,
 	return nft_do_chain(&pkt, priv);
 }
 
-static const struct nf_chain_type filter_ipv6 = {
+static const struct nft_chain_type filter_ipv6 = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 73fe2bd13fcf..c498aaa8056b 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -65,7 +65,7 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv,
 	return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
-static const struct nf_chain_type nft_chain_nat_ipv6 = {
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
 	.name		= "nat",
 	.type		= NFT_CHAIN_T_NAT,
 	.family		= NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 11d3c3b9aa18..d5c7fdc34256 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -60,7 +60,7 @@ static unsigned int nf_route_table_hook(void *priv,
 	return ret;
 }
 
-static const struct nf_chain_type nft_chain_route_ipv6 = {
+static const struct nft_chain_type nft_chain_route_ipv6 = {
 	.name		= "route",
 	.type		= NFT_CHAIN_T_ROUTE,
 	.family		= NFPROTO_IPV6,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 92f5606b0dea..bf564f491085 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -384,9 +384,9 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
 	return ++table->hgenerator;
 }
 
-static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
+static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
 
-static const struct nf_chain_type *
+static const struct nft_chain_type *
 __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
 {
 	int i;
@@ -399,10 +399,10 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
 	return NULL;
 }
 
-static const struct nf_chain_type *
+static const struct nft_chain_type *
 nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
 {
-	const struct nf_chain_type *type;
+	const struct nft_chain_type *type;
 
 	type = __nf_tables_chain_type_lookup(nla, family);
 	if (type != NULL)
@@ -859,7 +859,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 	kfree(ctx->table);
 }
 
-int nft_register_chain_type(const struct nf_chain_type *ctype)
+int nft_register_chain_type(const struct nft_chain_type *ctype)
 {
 	int err = 0;
 
@@ -878,7 +878,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(nft_register_chain_type);
 
-void nft_unregister_chain_type(const struct nf_chain_type *ctype)
+void nft_unregister_chain_type(const struct nft_chain_type *ctype)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	chain_type[ctype->family][ctype->type] = NULL;
@@ -1239,7 +1239,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 struct nft_chain_hook {
 	u32				num;
 	s32				priority;
-	const struct nf_chain_type	*type;
+	const struct nft_chain_type	*type;
 	struct net_device		*dev;
 };
 
@@ -1249,7 +1249,7 @@ static int nft_chain_parse_hook(struct net *net,
 				bool create)
 {
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
-	const struct nf_chain_type *type;
+	const struct nft_chain_type *type;
 	struct net_device *dev;
 	int err;
 
@@ -6000,7 +6000,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
-				  enum nft_chain_type type)
+				  enum nft_chain_types type)
 {
 	const struct nft_base_chain *basechain;
 
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index e30c7da09d0d..0aefe66ce558 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -38,7 +38,7 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
 	return nft_do_chain(&pkt, priv);
 }
 
-static const struct nf_chain_type filter_inet = {
+static const struct nft_chain_type filter_inet = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_INET,
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 4041fafca934..88ea959211ac 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -38,7 +38,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
 	return nft_do_chain(&pkt, priv);
 }
 
-static const struct nf_chain_type nft_filter_chain_netdev = {
+static const struct nft_chain_type nft_filter_chain_netdev = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_NETDEV,

From cc07eeb0e5ee18895241460bdccf91a4952731f9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Mar 2018 11:53:06 +0200
Subject: [PATCH 1488/1678] netfilter: nf_tables: nft_register_chain_type()
 returns void

Use WARN_ON() instead since it should not happen that neither family
goes over NFPROTO_NUMPROTO nor there is already a chain of this type
already registered.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h         |  2 +-
 net/bridge/netfilter/nf_tables_bridge.c   |  4 +++-
 net/ipv4/netfilter/nf_tables_arp.c        |  4 +++-
 net/ipv4/netfilter/nf_tables_ipv4.c       |  4 +++-
 net/ipv4/netfilter/nft_chain_nat_ipv4.c   |  6 +-----
 net/ipv4/netfilter/nft_chain_route_ipv4.c |  4 +++-
 net/ipv6/netfilter/nf_tables_ipv6.c       |  4 +++-
 net/ipv6/netfilter/nft_chain_nat_ipv6.c   |  6 +-----
 net/ipv6/netfilter/nft_chain_route_ipv6.c |  4 +++-
 net/netfilter/nf_tables_api.c             | 14 +++++---------
 net/netfilter/nf_tables_inet.c            |  4 +++-
 net/netfilter/nf_tables_netdev.c          |  4 +---
 12 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4a304997c304..1f7148fe0504 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -970,7 +970,7 @@ struct nft_table {
 	char				*name;
 };
 
-int nft_register_chain_type(const struct nft_chain_type *);
+void nft_register_chain_type(const struct nft_chain_type *);
 void nft_unregister_chain_type(const struct nft_chain_type *);
 
 int nft_register_expr(struct nft_expr_type *);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 73a1ec556a0a..ffb8580dfdac 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -63,7 +63,9 @@ static const struct nft_chain_type filter_bridge = {
 
 static int __init nf_tables_bridge_init(void)
 {
-	return nft_register_chain_type(&filter_bridge);
+	nft_register_chain_type(&filter_bridge);
+
+	return 0;
 }
 
 static void __exit nf_tables_bridge_exit(void)
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 5b0be2a10b69..c2ee64208743 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -42,7 +42,9 @@ static const struct nft_chain_type filter_arp = {
 
 static int __init nf_tables_arp_init(void)
 {
-	return nft_register_chain_type(&filter_arp);
+	nft_register_chain_type(&filter_arp);
+
+	return 0;
 }
 
 static void __exit nf_tables_arp_exit(void)
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index 13bae5cfa257..c09667de0d68 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -51,7 +51,9 @@ static const struct nft_chain_type filter_ipv4 = {
 
 static int __init nf_tables_ipv4_init(void)
 {
-	return nft_register_chain_type(&filter_ipv4);
+	nft_register_chain_type(&filter_ipv4);
+
+	return 0;
 }
 
 static void __exit nf_tables_ipv4_exit(void)
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index 167f377eb1cb..9864f5b3279c 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -86,11 +86,7 @@ static const struct nft_chain_type nft_chain_nat_ipv4 = {
 
 static int __init nft_chain_nat_init(void)
 {
-	int err;
-
-	err = nft_register_chain_type(&nft_chain_nat_ipv4);
-	if (err < 0)
-		return err;
+	nft_register_chain_type(&nft_chain_nat_ipv4);
 
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 48cf1f892314..7d82934c46f4 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -71,7 +71,9 @@ static const struct nft_chain_type nft_chain_route_ipv4 = {
 
 static int __init nft_chain_route_init(void)
 {
-	return nft_register_chain_type(&nft_chain_route_ipv4);
+	nft_register_chain_type(&nft_chain_route_ipv4);
+
+	return 0;
 }
 
 static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index d99f9ac6f1b6..496f69453457 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -49,7 +49,9 @@ static const struct nft_chain_type filter_ipv6 = {
 
 static int __init nf_tables_ipv6_init(void)
 {
-	return nft_register_chain_type(&filter_ipv6);
+	nft_register_chain_type(&filter_ipv6);
+
+	return 0;
 }
 
 static void __exit nf_tables_ipv6_exit(void)
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index c498aaa8056b..c95d9a97d425 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -84,11 +84,7 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = {
 
 static int __init nft_chain_nat_ipv6_init(void)
 {
-	int err;
-
-	err = nft_register_chain_type(&nft_chain_nat_ipv6);
-	if (err < 0)
-		return err;
+	nft_register_chain_type(&nft_chain_nat_ipv6);
 
 	return 0;
 }
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index d5c7fdc34256..da3f1f8cb325 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -73,7 +73,9 @@ static const struct nft_chain_type nft_chain_route_ipv6 = {
 
 static int __init nft_chain_route_init(void)
 {
-	return nft_register_chain_type(&nft_chain_route_ipv6);
+	nft_register_chain_type(&nft_chain_route_ipv6);
+
+	return 0;
 }
 
 static void __exit nft_chain_route_exit(void)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bf564f491085..9e4b1614ee39 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -859,22 +859,18 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 	kfree(ctx->table);
 }
 
-int nft_register_chain_type(const struct nft_chain_type *ctype)
+void nft_register_chain_type(const struct nft_chain_type *ctype)
 {
-	int err = 0;
-
 	if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
-		return -EINVAL;
+		return;
 
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	if (chain_type[ctype->family][ctype->type] != NULL) {
-		err = -EBUSY;
-		goto out;
+	if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) {
+		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+		return;
 	}
 	chain_type[ctype->family][ctype->type] = ctype;
-out:
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-	return err;
 }
 EXPORT_SYMBOL_GPL(nft_register_chain_type);
 
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index 0aefe66ce558..202c4219969b 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -59,7 +59,9 @@ static const struct nft_chain_type filter_inet = {
 
 static int __init nf_tables_inet_init(void)
 {
-	return nft_register_chain_type(&filter_inet);
+	nft_register_chain_type(&filter_inet);
+
+	return 0;
 }
 
 static void __exit nf_tables_inet_exit(void)
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 88ea959211ac..4c3835bca63e 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -112,9 +112,7 @@ static int __init nf_tables_netdev_init(void)
 {
 	int ret;
 
-	ret = nft_register_chain_type(&nft_filter_chain_netdev);
-	if (ret)
-		return ret;
+	nft_register_chain_type(&nft_filter_chain_netdev);
 
 	ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
 	if (ret)

From 02c7b25e5f54321b9063e18d4f52cce07f8e081d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Mar 2018 11:53:07 +0200
Subject: [PATCH 1489/1678] netfilter: nf_tables: build-in filter chain type

One module per supported filter chain family type takes too much memory
for very little code - too much modularization - place all chain filter
definitions in one single file.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h       |   3 +
 net/bridge/netfilter/Kconfig            |   2 +-
 net/bridge/netfilter/Makefile           |   1 -
 net/bridge/netfilter/nf_tables_bridge.c |  81 -----
 net/ipv4/netfilter/Kconfig              |   4 +-
 net/ipv4/netfilter/Makefile             |   2 -
 net/ipv4/netfilter/nf_tables_arp.c      |  60 ----
 net/ipv4/netfilter/nf_tables_ipv4.c     |  69 ----
 net/ipv6/netfilter/Kconfig              |   2 +-
 net/ipv6/netfilter/Makefile             |   1 -
 net/ipv6/netfilter/nf_tables_ipv6.c     |  67 ----
 net/netfilter/Kconfig                   |   4 +-
 net/netfilter/Makefile                  |   9 +-
 net/netfilter/nf_tables_api.c           |   3 +
 net/netfilter/nf_tables_inet.c          |  77 -----
 net/netfilter/nf_tables_netdev.c        | 140 ---------
 net/netfilter/nft_chain_filter.c        | 398 ++++++++++++++++++++++++
 17 files changed, 414 insertions(+), 509 deletions(-)
 delete mode 100644 net/bridge/netfilter/nf_tables_bridge.c
 delete mode 100644 net/ipv4/netfilter/nf_tables_arp.c
 delete mode 100644 net/ipv4/netfilter/nf_tables_ipv4.c
 delete mode 100644 net/ipv6/netfilter/nf_tables_ipv6.c
 delete mode 100644 net/netfilter/nf_tables_inet.c
 delete mode 100644 net/netfilter/nf_tables_netdev.c
 create mode 100644 net/netfilter/nft_chain_filter.c

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1f7148fe0504..77c3c04c27ac 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1345,4 +1345,7 @@ struct nft_trans_flowtable {
 #define nft_trans_flowtable(trans)	\
 	(((struct nft_trans_flowtable *)trans->data)->flowtable)
 
+int __init nft_chain_filter_init(void);
+void __exit nft_chain_filter_fini(void);
+
 #endif /* _NET_NF_TABLES_H */
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 225d1668dfdd..f212447794bd 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -5,7 +5,7 @@
 menuconfig NF_TABLES_BRIDGE
 	depends on BRIDGE && NETFILTER && NF_TABLES
 	select NETFILTER_FAMILY_BRIDGE
-	tristate "Ethernet Bridge nf_tables support"
+	bool "Ethernet Bridge nf_tables support"
 
 if NF_TABLES_BRIDGE
 
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 2f28e16de6c7..4bc758dd4a8c 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,7 +3,6 @@
 # Makefile for the netfilter modules for Link Layer filtering on a bridge.
 #
 
-obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
 obj-$(CONFIG_NFT_BRIDGE_META)  += nft_meta_bridge.o
 obj-$(CONFIG_NFT_BRIDGE_REJECT)  += nft_reject_bridge.o
 
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
deleted file mode 100644
index ffb8580dfdac..000000000000
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netfilter_bridge.h>
-#include <net/netfilter/nf_tables.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int
-nft_do_chain_bridge(void *priv,
-		    struct sk_buff *skb,
-		    const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-
-	switch (eth_hdr(skb)->h_proto) {
-	case htons(ETH_P_IP):
-		nft_set_pktinfo_ipv4_validate(&pkt, skb);
-		break;
-	case htons(ETH_P_IPV6):
-		nft_set_pktinfo_ipv6_validate(&pkt, skb);
-		break;
-	default:
-		nft_set_pktinfo_unspec(&pkt, skb);
-		break;
-	}
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static const struct nft_chain_type filter_bridge = {
-	.name		= "filter",
-	.type		= NFT_CHAIN_T_DEFAULT,
-	.family		= NFPROTO_BRIDGE,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_BR_PRE_ROUTING) |
-			  (1 << NF_BR_LOCAL_IN) |
-			  (1 << NF_BR_FORWARD) |
-			  (1 << NF_BR_LOCAL_OUT) |
-			  (1 << NF_BR_POST_ROUTING),
-	.hooks		= {
-		[NF_BR_PRE_ROUTING]	= nft_do_chain_bridge,
-		[NF_BR_LOCAL_IN]	= nft_do_chain_bridge,
-		[NF_BR_FORWARD]		= nft_do_chain_bridge,
-		[NF_BR_LOCAL_OUT]	= nft_do_chain_bridge,
-		[NF_BR_POST_ROUTING]	= nft_do_chain_bridge,
-	},
-};
-
-static int __init nf_tables_bridge_init(void)
-{
-	nft_register_chain_type(&filter_bridge);
-
-	return 0;
-}
-
-static void __exit nf_tables_bridge_exit(void)
-{
-	nft_unregister_chain_type(&filter_bridge);
-}
-
-module_init(nf_tables_bridge_init);
-module_exit(nf_tables_bridge_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index dfe6fa4ea554..280048e1e395 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV4
 if NF_TABLES
 
 config NF_TABLES_IPV4
-	tristate "IPv4 nf_tables support"
+	bool "IPv4 nf_tables support"
 	help
 	  This option enables the IPv4 support for nf_tables.
 
@@ -71,7 +71,7 @@ config NFT_FIB_IPV4
 endif # NF_TABLES_IPV4
 
 config NF_TABLES_ARP
-	tristate "ARP nf_tables support"
+	bool "ARP nf_tables support"
 	select NETFILTER_FAMILY_ARP
 	help
 	  This option enables the ARP support for nf_tables.
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 2dad20eefd26..62ede5e3a3de 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
 # NAT protocols (nf_nat)
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
 
-obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
@@ -47,7 +46,6 @@ obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
 obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
 obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
-obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
 
 # flow table support
 obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
deleted file mode 100644
index c2ee64208743..000000000000
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/netfilter_arp.h>
-#include <net/netfilter/nf_tables.h>
-
-static unsigned int
-nft_do_chain_arp(void *priv,
-		  struct sk_buff *skb,
-		  const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_unspec(&pkt, skb);
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static const struct nft_chain_type filter_arp = {
-	.name		= "filter",
-	.type		= NFT_CHAIN_T_DEFAULT,
-	.family		= NFPROTO_ARP,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_ARP_IN) |
-			  (1 << NF_ARP_OUT),
-	.hooks		= {
-		[NF_ARP_IN]		= nft_do_chain_arp,
-		[NF_ARP_OUT]		= nft_do_chain_arp,
-	},
-};
-
-static int __init nf_tables_arp_init(void)
-{
-	nft_register_chain_type(&filter_arp);
-
-	return 0;
-}
-
-static void __exit nf_tables_arp_exit(void)
-{
-	nft_unregister_chain_type(&filter_arp);
-}
-
-module_init(nf_tables_arp_init);
-module_exit(nf_tables_arp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
deleted file mode 100644
index c09667de0d68..000000000000
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-
-static unsigned int nft_do_chain_ipv4(void *priv,
-				      struct sk_buff *skb,
-				      const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv4(&pkt, skb);
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static const struct nft_chain_type filter_ipv4 = {
-	.name		= "filter",
-	.type		= NFT_CHAIN_T_DEFAULT,
-	.family		= NFPROTO_IPV4,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
-			  (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_FORWARD) |
-			  (1 << NF_INET_PRE_ROUTING) |
-			  (1 << NF_INET_POST_ROUTING),
-	.hooks		= {
-		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv4,
-		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv4,
-		[NF_INET_FORWARD]	= nft_do_chain_ipv4,
-		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv4,
-		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv4,
-	},
-};
-
-static int __init nf_tables_ipv4_init(void)
-{
-	nft_register_chain_type(&filter_ipv4);
-
-	return 0;
-}
-
-static void __exit nf_tables_ipv4_exit(void)
-{
-	nft_unregister_chain_type(&filter_ipv4);
-}
-
-module_init(nf_tables_ipv4_init);
-module_exit(nf_tables_ipv4_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index d395d1590699..ccbfa83e4bb0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV6
 if NF_TABLES
 
 config NF_TABLES_IPV6
-	tristate "IPv6 nf_tables support"
+	bool "IPv6 nf_tables support"
 	help
 	  This option enables the IPv6 support for nf_tables.
 
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d984057b8395..44273d6f03a5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
 obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
 
 # nf_tables
-obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
 obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
deleted file mode 100644
index 496f69453457..000000000000
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int nft_do_chain_ipv6(void *priv,
-				      struct sk_buff *skb,
-				      const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-	nft_set_pktinfo_ipv6(&pkt, skb);
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static const struct nft_chain_type filter_ipv6 = {
-	.name		= "filter",
-	.type		= NFT_CHAIN_T_DEFAULT,
-	.family		= NFPROTO_IPV6,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
-			  (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_FORWARD) |
-			  (1 << NF_INET_PRE_ROUTING) |
-			  (1 << NF_INET_POST_ROUTING),
-	.hooks		= {
-		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6,
-		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv6,
-		[NF_INET_FORWARD]	= nft_do_chain_ipv6,
-		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6,
-		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6,
-	},
-};
-
-static int __init nf_tables_ipv6_init(void)
-{
-	nft_register_chain_type(&filter_ipv6);
-
-	return 0;
-}
-
-static void __exit nf_tables_ipv6_exit(void)
-{
-	nft_unregister_chain_type(&filter_ipv6);
-}
-
-module_init(nf_tables_ipv6_init);
-module_exit(nf_tables_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d3220b43c832..704b3832dbad 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -465,12 +465,12 @@ config NF_TABLES_INET
 	depends on IPV6
 	select NF_TABLES_IPV4
 	select NF_TABLES_IPV6
-	tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
+	bool "Netfilter nf_tables mixed IPv4/IPv6 tables support"
 	help
 	  This option enables support for a mixed IPv4/IPv6 "inet" table.
 
 config NF_TABLES_NETDEV
-	tristate "Netfilter nf_tables netdev tables support"
+	bool "Netfilter nf_tables netdev tables support"
 	help
 	  This option enables support for the "netdev" table.
 
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5d9b8b959e58..fd32bd2c9521 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -73,13 +73,12 @@ obj-$(CONFIG_NETFILTER_CONNCOUNT) += nf_conncount.o
 obj-$(CONFIG_NF_DUP_NETDEV)	+= nf_dup_netdev.o
 
 # nf_tables
-nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \
-		  nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \
-		  nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o
+nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
+		  nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
+		  nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
+		  nft_dynset.o
 
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
-obj-$(CONFIG_NF_TABLES_INET)	+= nf_tables_inet.o
-obj-$(CONFIG_NF_TABLES_NETDEV)	+= nf_tables_netdev.o
 obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
 obj-$(CONFIG_NFT_EXTHDR)	+= nft_exthdr.o
 obj-$(CONFIG_NFT_META)		+= nft_meta.o
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9e4b1614ee39..97ec1c388bfe 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6584,6 +6584,8 @@ static int __init nf_tables_module_init(void)
 {
 	int err;
 
+	nft_chain_filter_init();
+
 	info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
 		       GFP_KERNEL);
 	if (info == NULL) {
@@ -6618,6 +6620,7 @@ static void __exit nf_tables_module_exit(void)
 	rcu_barrier();
 	nf_tables_core_module_exit();
 	kfree(info);
+	nft_chain_filter_fini();
 }
 
 module_init(nf_tables_module_init);
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
deleted file mode 100644
index 202c4219969b..000000000000
--- a/net/netfilter/nf_tables_inet.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2012-2014 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/ip.h>
-
-static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
-				      const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-
-	switch (state->pf) {
-	case NFPROTO_IPV4:
-		nft_set_pktinfo_ipv4(&pkt, skb);
-		break;
-	case NFPROTO_IPV6:
-		nft_set_pktinfo_ipv6(&pkt, skb);
-		break;
-	default:
-		break;
-	}
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static const struct nft_chain_type filter_inet = {
-	.name		= "filter",
-	.type		= NFT_CHAIN_T_DEFAULT,
-	.family		= NFPROTO_INET,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
-			  (1 << NF_INET_LOCAL_OUT) |
-			  (1 << NF_INET_FORWARD) |
-			  (1 << NF_INET_PRE_ROUTING) |
-			  (1 << NF_INET_POST_ROUTING),
-	.hooks		= {
-		[NF_INET_LOCAL_IN]	= nft_do_chain_inet,
-		[NF_INET_LOCAL_OUT]	= nft_do_chain_inet,
-		[NF_INET_FORWARD]	= nft_do_chain_inet,
-		[NF_INET_PRE_ROUTING]	= nft_do_chain_inet,
-		[NF_INET_POST_ROUTING]	= nft_do_chain_inet,
-        },
-};
-
-static int __init nf_tables_inet_init(void)
-{
-	nft_register_chain_type(&filter_inet);
-
-	return 0;
-}
-
-static void __exit nf_tables_inet_exit(void)
-{
-	nft_unregister_chain_type(&filter_inet);
-}
-
-module_init(nf_tables_inet_init);
-module_exit(nf_tables_inet_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(1, "filter");
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
deleted file mode 100644
index 4c3835bca63e..000000000000
--- a/net/netfilter/nf_tables_netdev.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <net/netfilter/nf_tables.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int
-nft_do_chain_netdev(void *priv, struct sk_buff *skb,
-		    const struct nf_hook_state *state)
-{
-	struct nft_pktinfo pkt;
-
-	nft_set_pktinfo(&pkt, skb, state);
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		nft_set_pktinfo_ipv4_validate(&pkt, skb);
-		break;
-	case htons(ETH_P_IPV6):
-		nft_set_pktinfo_ipv6_validate(&pkt, skb);
-		break;
-	default:
-		nft_set_pktinfo_unspec(&pkt, skb);
-		break;
-	}
-
-	return nft_do_chain(&pkt, priv);
-}
-
-static const struct nft_chain_type nft_filter_chain_netdev = {
-	.name		= "filter",
-	.type		= NFT_CHAIN_T_DEFAULT,
-	.family		= NFPROTO_NETDEV,
-	.owner		= THIS_MODULE,
-	.hook_mask	= (1 << NF_NETDEV_INGRESS),
-	.hooks		= {
-		[NF_NETDEV_INGRESS]	= nft_do_chain_netdev,
-	},
-};
-
-static void nft_netdev_event(unsigned long event, struct net_device *dev,
-			     struct nft_ctx *ctx)
-{
-	struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
-
-	switch (event) {
-	case NETDEV_UNREGISTER:
-		if (strcmp(basechain->dev_name, dev->name) != 0)
-			return;
-
-		__nft_release_basechain(ctx);
-		break;
-	case NETDEV_CHANGENAME:
-		if (dev->ifindex != basechain->ops.dev->ifindex)
-			return;
-
-		strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
-		break;
-	}
-}
-
-static int nf_tables_netdev_event(struct notifier_block *this,
-				  unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct nft_table *table;
-	struct nft_chain *chain, *nr;
-	struct nft_ctx ctx = {
-		.net	= dev_net(dev),
-	};
-
-	if (event != NETDEV_UNREGISTER &&
-	    event != NETDEV_CHANGENAME)
-		return NOTIFY_DONE;
-
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_for_each_entry(table, &ctx.net->nft.tables, list) {
-		if (table->family != NFPROTO_NETDEV)
-			continue;
-
-		ctx.family = table->family;
-		ctx.table = table;
-		list_for_each_entry_safe(chain, nr, &table->chains, list) {
-			if (!nft_is_base_chain(chain))
-				continue;
-
-			ctx.chain = chain;
-			nft_netdev_event(event, dev, &ctx);
-		}
-	}
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block nf_tables_netdev_notifier = {
-	.notifier_call	= nf_tables_netdev_event,
-};
-
-static int __init nf_tables_netdev_init(void)
-{
-	int ret;
-
-	nft_register_chain_type(&nft_filter_chain_netdev);
-
-	ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
-	if (ret)
-		goto err_register_netdevice_notifier;
-
-	return 0;
-
-err_register_netdevice_notifier:
-	nft_unregister_chain_type(&nft_filter_chain_netdev);
-
-	return ret;
-}
-
-static void __exit nf_tables_netdev_exit(void)
-{
-	unregister_netdevice_notifier(&nf_tables_netdev_notifier);
-	nft_unregister_chain_type(&nft_filter_chain_netdev);
-}
-
-module_init(nf_tables_netdev_init);
-module_exit(nf_tables_netdev_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
new file mode 100644
index 000000000000..84c902477a91
--- /dev/null
+++ b/net/netfilter/nft_chain_filter.c
@@ -0,0 +1,398 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <net/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static unsigned int nft_do_chain_ipv4(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv4(&pkt, skb);
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_ipv4 = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_IPV4,
+	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv4,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv4,
+		[NF_INET_FORWARD]	= nft_do_chain_ipv4,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv4,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv4,
+	},
+};
+
+static void nft_chain_filter_ipv4_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_ipv4);
+}
+static void nft_chain_filter_ipv4_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_ipv4);
+}
+
+#else
+static inline void nft_chain_filter_ipv4_init(void) {}
+static inline void nft_chain_filter_ipv4_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV4 */
+
+#ifdef CONFIG_NF_TABLES_ARP
+static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb,
+				     const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_unspec(&pkt, skb);
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_arp = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_ARP,
+	.owner		= THIS_MODULE,
+	.hook_mask	= (1 << NF_ARP_IN) |
+			  (1 << NF_ARP_OUT),
+	.hooks		= {
+		[NF_ARP_IN]		= nft_do_chain_arp,
+		[NF_ARP_OUT]		= nft_do_chain_arp,
+	},
+};
+
+static void nft_chain_filter_arp_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_arp);
+}
+
+static void nft_chain_filter_arp_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_arp);
+}
+#else
+static inline void nft_chain_filter_arp_init(void) {}
+static inline void nft_chain_filter_arp_fini(void) {}
+#endif /* CONFIG_NF_TABLES_ARP */
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static unsigned int nft_do_chain_ipv6(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_ipv6(&pkt, skb);
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_ipv6 = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_IPV6,
+	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_ipv6,
+		[NF_INET_FORWARD]	= nft_do_chain_ipv6,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6,
+	},
+};
+
+static void nft_chain_filter_ipv6_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_ipv6);
+}
+
+static void nft_chain_filter_ipv6_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_ipv6);
+}
+#else
+static inline void nft_chain_filter_ipv6_init(void) {}
+static inline void nft_chain_filter_ipv6_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV6 */
+
+#ifdef CONFIG_NF_TABLES_INET
+static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (state->pf) {
+	case NFPROTO_IPV4:
+		nft_set_pktinfo_ipv4(&pkt, skb);
+		break;
+	case NFPROTO_IPV6:
+		nft_set_pktinfo_ipv6(&pkt, skb);
+		break;
+	default:
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_inet = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_INET,
+	.hook_mask	= (1 << NF_INET_LOCAL_IN) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_FORWARD) |
+			  (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING),
+	.hooks		= {
+		[NF_INET_LOCAL_IN]	= nft_do_chain_inet,
+		[NF_INET_LOCAL_OUT]	= nft_do_chain_inet,
+		[NF_INET_FORWARD]	= nft_do_chain_inet,
+		[NF_INET_PRE_ROUTING]	= nft_do_chain_inet,
+		[NF_INET_POST_ROUTING]	= nft_do_chain_inet,
+        },
+};
+
+static void nft_chain_filter_inet_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_inet);
+}
+
+static void nft_chain_filter_inet_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_inet);
+}
+#else
+static inline void nft_chain_filter_inet_init(void) {}
+static inline void nft_chain_filter_inet_fini(void) {}
+#endif /* CONFIG_NF_TABLES_IPV6 */
+
+#ifdef CONFIG_NF_TABLES_BRIDGE
+static unsigned int
+nft_do_chain_bridge(void *priv,
+		    struct sk_buff *skb,
+		    const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (eth_hdr(skb)->h_proto) {
+	case htons(ETH_P_IP):
+		nft_set_pktinfo_ipv4_validate(&pkt, skb);
+		break;
+	case htons(ETH_P_IPV6):
+		nft_set_pktinfo_ipv6_validate(&pkt, skb);
+		break;
+	default:
+		nft_set_pktinfo_unspec(&pkt, skb);
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_bridge = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_BRIDGE,
+	.hook_mask	= (1 << NF_BR_PRE_ROUTING) |
+			  (1 << NF_BR_LOCAL_IN) |
+			  (1 << NF_BR_FORWARD) |
+			  (1 << NF_BR_LOCAL_OUT) |
+			  (1 << NF_BR_POST_ROUTING),
+	.hooks		= {
+		[NF_BR_PRE_ROUTING]	= nft_do_chain_bridge,
+		[NF_BR_LOCAL_IN]	= nft_do_chain_bridge,
+		[NF_BR_FORWARD]		= nft_do_chain_bridge,
+		[NF_BR_LOCAL_OUT]	= nft_do_chain_bridge,
+		[NF_BR_POST_ROUTING]	= nft_do_chain_bridge,
+	},
+};
+
+static void nft_chain_filter_bridge_init(void)
+{
+	nft_register_chain_type(&nft_chain_filter_bridge);
+}
+
+static void nft_chain_filter_bridge_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_bridge);
+}
+#else
+static inline void nft_chain_filter_bridge_init(void) {}
+static inline void nft_chain_filter_bridge_fini(void) {}
+#endif /* CONFIG_NF_TABLES_BRIDGE */
+
+#ifdef CONFIG_NF_TABLES_NETDEV
+static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb,
+					const struct nf_hook_state *state)
+{
+	struct nft_pktinfo pkt;
+
+	nft_set_pktinfo(&pkt, skb, state);
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nft_set_pktinfo_ipv4_validate(&pkt, skb);
+		break;
+	case htons(ETH_P_IPV6):
+		nft_set_pktinfo_ipv6_validate(&pkt, skb);
+		break;
+	default:
+		nft_set_pktinfo_unspec(&pkt, skb);
+		break;
+	}
+
+	return nft_do_chain(&pkt, priv);
+}
+
+static const struct nft_chain_type nft_chain_filter_netdev = {
+	.name		= "filter",
+	.type		= NFT_CHAIN_T_DEFAULT,
+	.family		= NFPROTO_NETDEV,
+	.hook_mask	= (1 << NF_NETDEV_INGRESS),
+	.hooks		= {
+		[NF_NETDEV_INGRESS]	= nft_do_chain_netdev,
+	},
+};
+
+static void nft_netdev_event(unsigned long event, struct net_device *dev,
+			     struct nft_ctx *ctx)
+{
+	struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		if (strcmp(basechain->dev_name, dev->name) != 0)
+			return;
+
+		__nft_release_basechain(ctx);
+		break;
+	case NETDEV_CHANGENAME:
+		if (dev->ifindex != basechain->ops.dev->ifindex)
+			return;
+
+		strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+		break;
+	}
+}
+
+static int nf_tables_netdev_event(struct notifier_block *this,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nft_table *table;
+	struct nft_chain *chain, *nr;
+	struct nft_ctx ctx = {
+		.net	= dev_net(dev),
+	};
+
+	if (event != NETDEV_UNREGISTER &&
+	    event != NETDEV_CHANGENAME)
+		return NOTIFY_DONE;
+
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	list_for_each_entry(table, &ctx.net->nft.tables, list) {
+		if (table->family != NFPROTO_NETDEV)
+			continue;
+
+		ctx.family = table->family;
+		ctx.table = table;
+		list_for_each_entry_safe(chain, nr, &table->chains, list) {
+			if (!nft_is_base_chain(chain))
+				continue;
+
+			ctx.chain = chain;
+			nft_netdev_event(event, dev, &ctx);
+		}
+	}
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_netdev_notifier = {
+	.notifier_call	= nf_tables_netdev_event,
+};
+
+static int nft_chain_filter_netdev_init(void)
+{
+	int err;
+
+	nft_register_chain_type(&nft_chain_filter_netdev);
+
+	err = register_netdevice_notifier(&nf_tables_netdev_notifier);
+	if (err)
+		goto err_register_netdevice_notifier;
+
+	return 0;
+
+err_register_netdevice_notifier:
+	nft_unregister_chain_type(&nft_chain_filter_netdev);
+
+	return err;
+}
+
+static void nft_chain_filter_netdev_fini(void)
+{
+	nft_unregister_chain_type(&nft_chain_filter_netdev);
+	unregister_netdevice_notifier(&nf_tables_netdev_notifier);
+}
+#else
+static inline int nft_chain_filter_netdev_init(void) { return 0; }
+static inline void nft_chain_filter_netdev_fini(void) {}
+#endif /* CONFIG_NF_TABLES_NETDEV */
+
+int __init nft_chain_filter_init(void)
+{
+	int err;
+
+	err = nft_chain_filter_netdev_init();
+	if (err < 0)
+		return err;
+
+	nft_chain_filter_ipv4_init();
+	nft_chain_filter_ipv6_init();
+	nft_chain_filter_arp_init();
+	nft_chain_filter_inet_init();
+	nft_chain_filter_bridge_init();
+
+	return 0;
+}
+
+void __exit nft_chain_filter_fini(void)
+{
+	nft_chain_filter_bridge_fini();
+	nft_chain_filter_inet_fini();
+	nft_chain_filter_arp_fini();
+	nft_chain_filter_ipv6_fini();
+	nft_chain_filter_ipv4_fini();
+	nft_chain_filter_netdev_fini();
+}

From 43a605f2f722b6e08addedae8545b490fca252c4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Mar 2018 11:53:08 +0200
Subject: [PATCH 1490/1678] netfilter: nf_tables: enable conntrack if NAT chain
 is registered

Register conntrack hooks if the user adds NAT chains. Users get confused
with the existing behaviour since they will see no packets hitting this
chain until they add the first rule that refers to conntrack.

This patch adds new ->init() and ->free() indirections to chain types
that can be used by NAT chains to invoke the conntrack dependency.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h       |  4 ++++
 net/ipv4/netfilter/nft_chain_nat_ipv4.c | 12 ++++++++++++
 net/ipv6/netfilter/nft_chain_nat_ipv6.c | 12 ++++++++++++
 net/netfilter/nf_tables_api.c           | 24 +++++++++++++++++-------
 4 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 77c3c04c27ac..e26b94a61a99 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -884,6 +884,8 @@ enum nft_chain_types {
  * 	@owner: module owner
  * 	@hook_mask: mask of valid hooks
  * 	@hooks: array of hook functions
+ *	@init: chain initialization function
+ *	@free: chain release function
  */
 struct nft_chain_type {
 	const char			*name;
@@ -892,6 +894,8 @@ struct nft_chain_type {
 	struct module			*owner;
 	unsigned int			hook_mask;
 	nf_hookfn			*hooks[NF_MAX_HOOKS];
+	int				(*init)(struct nft_ctx *ctx);
+	void				(*free)(struct nft_ctx *ctx);
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index 9864f5b3279c..b5464a3f253b 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -67,6 +67,16 @@ static unsigned int nft_nat_ipv4_local_fn(void *priv,
 	return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
+static int nft_nat_ipv4_init(struct nft_ctx *ctx)
+{
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv4_free(struct nft_ctx *ctx)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
 static const struct nft_chain_type nft_chain_nat_ipv4 = {
 	.name		= "nat",
 	.type		= NFT_CHAIN_T_NAT,
@@ -82,6 +92,8 @@ static const struct nft_chain_type nft_chain_nat_ipv4 = {
 		[NF_INET_LOCAL_OUT]	= nft_nat_ipv4_local_fn,
 		[NF_INET_LOCAL_IN]	= nft_nat_ipv4_fn,
 	},
+	.init		= nft_nat_ipv4_init,
+	.free		= nft_nat_ipv4_free,
 };
 
 static int __init nft_chain_nat_init(void)
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index c95d9a97d425..3557b114446c 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -65,6 +65,16 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv,
 	return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
+static int nft_nat_ipv6_init(struct nft_ctx *ctx)
+{
+	return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv6_free(struct nft_ctx *ctx)
+{
+	nf_ct_netns_put(ctx->net, ctx->family);
+}
+
 static const struct nft_chain_type nft_chain_nat_ipv6 = {
 	.name		= "nat",
 	.type		= NFT_CHAIN_T_NAT,
@@ -80,6 +90,8 @@ static const struct nft_chain_type nft_chain_nat_ipv6 = {
 		[NF_INET_LOCAL_OUT]	= nft_nat_ipv6_local_fn,
 		[NF_INET_LOCAL_IN]	= nft_nat_ipv6_fn,
 	},
+	.init		= nft_nat_ipv6_init,
+	.free		= nft_nat_ipv6_free,
 };
 
 static int __init nft_chain_nat_ipv6_init(void)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 97ec1c388bfe..af8b6a7488bd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1211,13 +1211,17 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
 		rcu_assign_pointer(chain->stats, newstats);
 }
 
-static void nf_tables_chain_destroy(struct nft_chain *chain)
+static void nf_tables_chain_destroy(struct nft_ctx *ctx)
 {
+	struct nft_chain *chain = ctx->chain;
+
 	BUG_ON(chain->use > 0);
 
 	if (nft_is_base_chain(chain)) {
 		struct nft_base_chain *basechain = nft_base_chain(chain);
 
+		if (basechain->type->free)
+			basechain->type->free(ctx);
 		module_put(basechain->type->owner);
 		free_percpu(basechain->stats);
 		if (basechain->stats)
@@ -1354,6 +1358,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		}
 
 		basechain->type = hook.type;
+		if (basechain->type->init)
+			basechain->type->init(ctx);
+
 		chain = &basechain->chain;
 
 		ops		= &basechain->ops;
@@ -1374,6 +1381,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		if (chain == NULL)
 			return -ENOMEM;
 	}
+	ctx->chain = chain;
+
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
 	chain->table = table;
@@ -1387,7 +1396,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 	if (err < 0)
 		goto err1;
 
-	ctx->chain = chain;
 	err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
 	if (err < 0)
 		goto err2;
@@ -1399,7 +1407,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 err2:
 	nf_tables_unregister_hook(net, table, chain);
 err1:
-	nf_tables_chain_destroy(chain);
+	nf_tables_chain_destroy(ctx);
 
 	return err;
 }
@@ -5678,7 +5686,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
 		nf_tables_table_destroy(&trans->ctx);
 		break;
 	case NFT_MSG_DELCHAIN:
-		nf_tables_chain_destroy(trans->ctx.chain);
+		nf_tables_chain_destroy(&trans->ctx);
 		break;
 	case NFT_MSG_DELRULE:
 		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
@@ -5849,7 +5857,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 		nf_tables_table_destroy(&trans->ctx);
 		break;
 	case NFT_MSG_NEWCHAIN:
-		nf_tables_chain_destroy(trans->ctx.chain);
+		nf_tables_chain_destroy(&trans->ctx);
 		break;
 	case NFT_MSG_NEWRULE:
 		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
@@ -6499,7 +6507,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
 	}
 	list_del(&ctx->chain->list);
 	ctx->table->use--;
-	nf_tables_chain_destroy(ctx->chain);
+	nf_tables_chain_destroy(ctx);
 
 	return 0;
 }
@@ -6515,6 +6523,7 @@ static void __nft_release_tables(struct net *net)
 	struct nft_set *set, *ns;
 	struct nft_ctx ctx = {
 		.net	= net,
+		.family	= NFPROTO_NETDEV,
 	};
 
 	list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
@@ -6551,9 +6560,10 @@ static void __nft_release_tables(struct net *net)
 			nft_obj_destroy(obj);
 		}
 		list_for_each_entry_safe(chain, nc, &table->chains, list) {
+			ctx.chain = chain;
 			list_del(&chain->list);
 			table->use--;
-			nf_tables_chain_destroy(chain);
+			nf_tables_chain_destroy(&ctx);
 		}
 		list_del(&table->list);
 		nf_tables_table_destroy(&ctx);

From 10659cbab72b7bfee1a886018d1915a9549b6378 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 28 Mar 2018 12:06:49 +0200
Subject: [PATCH 1491/1678] netfilter: nf_tables: rename to
 nft_set_lookup_global()

To prepare shorter introduction of shorter function prefix.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 10 +++++-----
 net/netfilter/nf_tables_api.c     | 12 ++++++------
 net/netfilter/nft_dynset.c        |  5 +++--
 net/netfilter/nft_lookup.c        |  4 ++--
 net/netfilter/nft_objref.c        |  5 +++--
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e26b94a61a99..bd2a18d66189 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -434,11 +434,11 @@ static inline struct nft_set *nft_set_container_of(const void *priv)
 	return (void *)priv - offsetof(struct nft_set, data);
 }
 
-struct nft_set *nft_set_lookup(const struct net *net,
-			       const struct nft_table *table,
-			       const struct nlattr *nla_set_name,
-			       const struct nlattr *nla_set_id,
-			       u8 genmask);
+struct nft_set *nft_set_lookup_global(const struct net *net,
+				      const struct nft_table *table,
+				      const struct nlattr *nla_set_name,
+				      const struct nlattr *nla_set_id,
+				      u8 genmask);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index af8b6a7488bd..769d84015073 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2633,11 +2633,11 @@ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
 	return ERR_PTR(-ENOENT);
 }
 
-struct nft_set *nft_set_lookup(const struct net *net,
-			       const struct nft_table *table,
-			       const struct nlattr *nla_set_name,
-			       const struct nlattr *nla_set_id,
-			       u8 genmask)
+struct nft_set *nft_set_lookup_global(const struct net *net,
+				      const struct nft_table *table,
+				      const struct nlattr *nla_set_name,
+				      const struct nlattr *nla_set_id,
+				      u8 genmask)
 {
 	struct nft_set *set;
 
@@ -2650,7 +2650,7 @@ struct nft_set *nft_set_lookup(const struct net *net,
 	}
 	return set;
 }
-EXPORT_SYMBOL_GPL(nft_set_lookup);
+EXPORT_SYMBOL_GPL(nft_set_lookup_global);
 
 static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 				    const char *name)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index fc83e29d6634..04863fad05dd 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -132,8 +132,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			priv->invert = true;
 	}
 
-	set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME],
-			     tb[NFTA_DYNSET_SET_ID], genmask);
+	set = nft_set_lookup_global(ctx->net, ctx->table,
+				    tb[NFTA_DYNSET_SET_NAME],
+				    tb[NFTA_DYNSET_SET_ID], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 475570e89ede..f52da5e2199f 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,8 +71,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 	    tb[NFTA_LOOKUP_SREG] == NULL)
 		return -EINVAL;
 
-	set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
-			     tb[NFTA_LOOKUP_SET_ID], genmask);
+	set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_LOOKUP_SET],
+				    tb[NFTA_LOOKUP_SET_ID], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 7bcdc48f3d73..0b02407773ad 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -117,8 +117,9 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
 	struct nft_set *set;
 	int err;
 
-	set = nft_set_lookup(ctx->net, ctx->table, tb[NFTA_OBJREF_SET_NAME],
-			     tb[NFTA_OBJREF_SET_ID], genmask);
+	set = nft_set_lookup_global(ctx->net, ctx->table,
+				    tb[NFTA_OBJREF_SET_NAME],
+				    tb[NFTA_OBJREF_SET_ID], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 

From a3073c17dd8cd041d5cf68f28a80a54e310f2f45 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 28 Mar 2018 12:06:50 +0200
Subject: [PATCH 1492/1678] netfilter: nf_tables: use nft_set_lookup_global
 from nf_tables_newsetelem()

Replace opencoded implementation of nft_set_lookup_global() by call to
this function.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 769d84015073..2bd80fa9b070 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4032,17 +4032,10 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		return err;
 
-	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
-				   genmask);
-	if (IS_ERR(set)) {
-		if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
-			set = nf_tables_set_lookup_byid(net,
-					nla[NFTA_SET_ELEM_LIST_SET_ID],
-					genmask);
-		}
-		if (IS_ERR(set))
-			return PTR_ERR(set);
-	}
+	set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+				    nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
 
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
 		return -EBUSY;

From c47d36b3855d804b2e282f9b4eecbbd19b5453f9 Mon Sep 17 00:00:00 2001
From: Arushi Singhal <arushisinghal19971997@gmail.com>
Date: Thu, 29 Mar 2018 00:39:50 +0530
Subject: [PATCH 1493/1678] netfilter: Merge assignment with return

Merge assignment with return statement to directly return the value.

Signed-off-by: Arushi Singhal <arushisinghal19971997@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 5 ++---
 net/netfilter/xt_hashlimit.c         | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 11ef85a57244..b00e84bf4107 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1527,9 +1527,8 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 	if (ret < 0)
 		return ret;
 
-	ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
-					cda[CTA_NAT_SRC]);
-	return ret;
+	return ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
+					 cda[CTA_NAT_SRC]);
 #else
 	if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
 		return 0;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index db2fe0911740..64fc3721d74c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -534,8 +534,7 @@ static u64 user2rate_bytes(u32 user)
 	u64 r;
 
 	r = user ? U32_MAX / user : U32_MAX;
-	r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
-	return r;
+	return (r - 1) << XT_HASHLIMIT_BYTE_SHIFT;
 }
 
 static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,

From 9ba5c404bf1d6284f0269411b33394362b7ff405 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Thu, 29 Mar 2018 15:12:41 +0100
Subject: [PATCH 1494/1678] netfilter: x_tables: Add note about how to free
 percpu counters

Due to the way percpu counters are allocated and freed in blocks,
it is not safe to free counters individually.  Currently all callers
do the right thing, but let's note this restriction.

Fixes: ae0ac0ed6fcf ("netfilter: x_tables: pack percpu counter allocations")
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bac932f1c582..75cd5196b29b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1854,7 +1854,9 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
  * to fetch the real percpu counter.
  *
  * To speed up allocation and improve data locality, a 4kb block is
- * allocated.
+ * allocated.  Freeing any counter may free an entire block, so all
+ * counters allocated using the same state must be freed at the same
+ * time.
  *
  * xt_percpu_counter_alloc_state contains the base address of the
  * allocated page and the current sub-offset.

From e3b5e1ec75234fb6b27708a316cdf69f9fb176a8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 30 Mar 2018 11:39:12 +0200
Subject: [PATCH 1495/1678] Revert "netfilter: x_tables: ensure last rule in
 base chain matches underflow/policy"

This reverts commit 0d7df906a0e78079a02108b06d32c3ef2238ad25.

Valdis Kletnieks reported that xtables is broken in linux-next since
0d7df906a0e78  ("netfilter: x_tables: ensure last rule in base chain
matches underflow/policy"), as kernel rejects the (well-formed) ruleset:

[   64.402790] ip6_tables: last base chain position 1136 doesn't match underflow 1344 (hook 1)

mark_source_chains is not the correct place for such a check, as it
terminates evaluation of a chain once it sees an unconditional verdict
(following rules are known to be unreachable). It seems preferrable to
fix libiptc instead, so remove this check again.

Fixes: 0d7df906a0e78 ("netfilter: x_tables: ensure last rule in base chain matches underflow/policy")
Reported-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 17 +----------------
 net/ipv4/netfilter/ip_tables.c  | 17 +----------------
 net/ipv6/netfilter/ip6_tables.c | 17 +----------------
 3 files changed, 3 insertions(+), 48 deletions(-)

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f366ff1cfc19..aaafdbd15ad3 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -309,13 +309,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 	for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct arpt_entry *e = entry0 + pos;
-		unsigned int last_pos, depth;
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
 
-		depth = 0;
-		last_pos = pos;
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
 
@@ -346,8 +343,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 					pos = e->counters.pcnt;
 					e->counters.pcnt = 0;
 
-					if (depth)
-						--depth;
 					/* We're at the start. */
 					if (pos == oldpos)
 						goto next;
@@ -372,9 +367,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
-
-					if (entry0 + newpos != arpt_next_entry(e))
-						++depth;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -385,15 +377,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
-			if (depth == 0)
-				last_pos = pos;
-		}
-next:
-		if (last_pos != newinfo->underflow[hook]) {
-			pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n",
-					   last_pos, newinfo->underflow[hook], hook);
-			return 0;
 		}
+next:		;
 	}
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2362ca2c9e0c..f9063513f9d1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -378,13 +378,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ipt_entry *e = entry0 + pos;
-		unsigned int last_pos, depth;
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
 
-		depth = 0;
-		last_pos = pos;
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
 
@@ -413,8 +410,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					pos = e->counters.pcnt;
 					e->counters.pcnt = 0;
 
-					if (depth)
-						--depth;
 					/* We're at the start. */
 					if (pos == oldpos)
 						goto next;
@@ -439,9 +434,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
-
-					if (entry0 + newpos != ipt_next_entry(e))
-						++depth;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -452,15 +444,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
-			if (depth == 0)
-				last_pos = pos;
-		}
-next:
-		if (last_pos != newinfo->underflow[hook]) {
-			pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n",
-					   last_pos, newinfo->underflow[hook], hook);
-			return 0;
 		}
+next:		;
 	}
 	return 1;
 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 004508753abc..3c36a4c77f29 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -396,13 +396,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
 	for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
 		unsigned int pos = newinfo->hook_entry[hook];
 		struct ip6t_entry *e = entry0 + pos;
-		unsigned int last_pos, depth;
 
 		if (!(valid_hooks & (1 << hook)))
 			continue;
 
-		depth = 0;
-		last_pos = pos;
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
 
@@ -431,8 +428,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					pos = e->counters.pcnt;
 					e->counters.pcnt = 0;
 
-					if (depth)
-						--depth;
 					/* We're at the start. */
 					if (pos == oldpos)
 						goto next;
@@ -457,9 +452,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					if (!xt_find_jump_offset(offsets, newpos,
 								 newinfo->number))
 						return 0;
-
-					if (entry0 + newpos != ip6t_next_entry(e))
-						++depth;
 				} else {
 					/* ... this is a fallthru */
 					newpos = pos + e->next_offset;
@@ -470,15 +462,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				e->counters.pcnt = pos;
 				pos = newpos;
 			}
-			if (depth == 0)
-				last_pos = pos;
-		}
-next:
-		if (last_pos != newinfo->underflow[hook]) {
-			pr_err_ratelimited("last base chain position %u doesn't match underflow %u (hook %u)\n",
-					   last_pos, newinfo->underflow[hook], hook);
-			return 0;
 		}
+next:		;
 	}
 	return 1;
 }

From 26c97c5d8dac6bc56d4360561a286f52543ac07e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 20 Mar 2018 10:35:47 -0700
Subject: [PATCH 1496/1678] netfilter: ipset: Use is_zero_ether_addr instead of
 static and memcmp

To make the test a bit clearer and to reduce object size a little.

Miscellanea:

o remove now unnecessary static const array

$ size ip_set_hash_mac.o*
   text	   data	    bss	    dec	    hex	filename
  22822	   4619	     64	  27505	   6b71	ip_set_hash_mac.o.allyesconfig.new
  22932	   4683	     64	  27679	   6c1f	ip_set_hash_mac.o.allyesconfig.old
  10443	   1040	      0	  11483	   2cdb	ip_set_hash_mac.o.defconfig.new
  10507	   1040	      0	  11547	   2d1b	ip_set_hash_mac.o.defconfig.old

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_mac.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 8f004edad396..f9d5a2a1e3d0 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -72,9 +72,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next,
 #define IP_SET_PROTO_UNDEF
 #include "ip_set_hash_gen.h"
 
-/* Zero valued element is not supported */
-static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
-
 static int
 hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	       const struct xt_action_param *par,
@@ -93,7 +90,7 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		return -EINVAL;
 
 	ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
-	if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+	if (is_zero_ether_addr(e.ether))
 		return -EINVAL;
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
@@ -118,7 +115,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (ret)
 		return ret;
 	ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
-	if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+	if (is_zero_ether_addr(e.ether))
 		return -IPSET_ERR_HASH_ELEM;
 
 	return adtfn(set, &e, &ext, &ext, flags);

From 004c3cf1a18becf5ce56f43e8821835e34c15865 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 28 Mar 2018 12:51:09 +0000
Subject: [PATCH 1497/1678] cxgb4: fix error return code in adap_init0()

Fix to return a negative error code from the hash filter init error
handling case instead of 0, as done elsewhere in this function.

Fixes: 5c31254e35a8 ("cxgb4: initialize hash-filter configuration")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 57d38f8ed455..0072580e2c25 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4449,7 +4449,8 @@ static int adap_init0(struct adapter *adap)
 		adap->params.ofldq_wr_cred = val[5];
 
 		if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
-			if (init_hash_filter(adap) < 0)
+			ret = init_hash_filter(adap);
+			if (ret < 0)
 				goto bye;
 		} else {
 			adap->params.offload = 1;

From 9daae9bd47cff82a2a06aca23c458d6c79d09d52 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 28 Mar 2018 17:46:54 +0300
Subject: [PATCH 1498/1678] net: Call add/kill vid ndo on vlan filter feature
 toggling

NETIF_F_HW_VLAN_[CS]TAG_FILTER features require more than just a bit
flip in dev->features in order to keep the driver in a consistent state.
These features notify the driver of each added/removed vlan, but toggling
of vlan-filter does not notify the driver accordingly for each of the
existing vlans.

This patch implements a similar solution to NETIF_F_RX_UDP_TUNNEL_PORT
behavior (which notifies the driver about UDP ports in the same manner
that vids are reported).

Each toggling of the features propagates to the 8021q module, which
iterates over the vlans and call add/kill ndo accordingly.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h   |  24 +++++++++
 include/linux/netdevice.h |   4 ++
 net/8021q/vlan.c          |  21 ++++++++
 net/8021q/vlan.h          |   3 ++
 net/8021q/vlan_core.c     | 101 ++++++++++++++++++++++++++++----------
 net/core/dev.c            |  20 ++++++++
 6 files changed, 148 insertions(+), 25 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index c4a1cff9c768..24d1976c1e61 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -83,6 +83,30 @@ static inline bool is_vlan_dev(const struct net_device *dev)
 #define skb_vlan_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
 #define skb_vlan_tag_get_prio(__skb)	((__skb)->vlan_tci & VLAN_PRIO_MASK)
 
+static inline int vlan_get_rx_ctag_filter_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	return notifier_to_errno(call_netdevice_notifiers(NETDEV_CVLAN_FILTER_PUSH_INFO, dev));
+}
+
+static inline void vlan_drop_rx_ctag_filter_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_CVLAN_FILTER_DROP_INFO, dev);
+}
+
+static inline int vlan_get_rx_stag_filter_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	return notifier_to_errno(call_netdevice_notifiers(NETDEV_SVLAN_FILTER_PUSH_INFO, dev));
+}
+
+static inline void vlan_drop_rx_stag_filter_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_SVLAN_FILTER_DROP_INFO, dev);
+}
+
 /**
  *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
  *	@rx_packets: number of received packets
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2a2d9cf50aa2..da44dab492e3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2349,6 +2349,10 @@ enum netdev_cmd {
 	NETDEV_UDP_TUNNEL_PUSH_INFO,
 	NETDEV_UDP_TUNNEL_DROP_INFO,
 	NETDEV_CHANGE_TX_QUEUE_LEN,
+	NETDEV_CVLAN_FILTER_PUSH_INFO,
+	NETDEV_CVLAN_FILTER_DROP_INFO,
+	NETDEV_SVLAN_FILTER_PUSH_INFO,
+	NETDEV_SVLAN_FILTER_DROP_INFO,
 };
 const char *netdev_cmd_to_name(enum netdev_cmd cmd);
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index bad01b14a4ad..5505ee6ebdbe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -360,6 +360,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	struct vlan_dev_priv *vlan;
 	bool last = false;
 	LIST_HEAD(list);
+	int err;
 
 	if (is_vlan_dev(dev)) {
 		int err = __vlan_device_event(dev, event);
@@ -489,6 +490,26 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		vlan_group_for_each_dev(grp, i, vlandev)
 			call_netdevice_notifiers(event, vlandev);
 		break;
+
+	case NETDEV_CVLAN_FILTER_PUSH_INFO:
+		err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q));
+		if (err)
+			return notifier_from_errno(err);
+		break;
+
+	case NETDEV_CVLAN_FILTER_DROP_INFO:
+		vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q));
+		break;
+
+	case NETDEV_SVLAN_FILTER_PUSH_INFO:
+		err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD));
+		if (err)
+			return notifier_from_errno(err);
+		break;
+
+	case NETDEV_SVLAN_FILTER_DROP_INFO:
+		vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD));
+		break;
 	}
 
 out:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index a8ba51030b75..e23aac3e4d37 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -97,6 +97,9 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
 		if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
 							    (i) % VLAN_N_VID)))
 
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto);
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto);
+
 /* found in vlan_dev.c */
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 45c9bf5ff3a0..c8d7abdc0463 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -165,13 +165,12 @@ struct vlan_vid_info {
 	int refcount;
 };
 
-static bool vlan_hw_filter_capable(const struct net_device *dev,
-				     const struct vlan_vid_info *vid_info)
+bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto)
 {
-	if (vid_info->proto == htons(ETH_P_8021Q) &&
+	if (proto == htons(ETH_P_8021Q) &&
 	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
 		return true;
-	if (vid_info->proto == htons(ETH_P_8021AD) &&
+	if (proto == htons(ETH_P_8021AD) &&
 	    dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
 		return true;
 	return false;
@@ -202,11 +201,73 @@ static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
 	return vid_info;
 }
 
+static int vlan_add_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+	if (!vlan_hw_filter_capable(dev, proto))
+		return 0;
+
+	if (netif_device_present(dev))
+		return dev->netdev_ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+	else
+		return -ENODEV;
+}
+
+static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vid)
+{
+	if (!vlan_hw_filter_capable(dev, proto))
+		return 0;
+
+	if (netif_device_present(dev))
+		return dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+	else
+		return -ENODEV;
+}
+
+int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+	struct net_device *real_dev = vlan_info->real_dev;
+	struct vlan_vid_info *vlan_vid_info;
+	int err;
+
+	list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list) {
+		if (vlan_vid_info->proto == proto) {
+			err = vlan_add_rx_filter_info(real_dev, proto,
+						      vlan_vid_info->vid);
+			if (err)
+				goto unwind;
+		}
+	}
+
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(vlan_vid_info,
+					     &vlan_info->vid_list, list) {
+		if (vlan_vid_info->proto == proto)
+			vlan_kill_rx_filter_info(real_dev, proto,
+						 vlan_vid_info->vid);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(vlan_filter_push_vids);
+
+void vlan_filter_drop_vids(struct vlan_info *vlan_info, __be16 proto)
+{
+	struct vlan_vid_info *vlan_vid_info;
+
+	list_for_each_entry(vlan_vid_info, &vlan_info->vid_list, list)
+		if (vlan_vid_info->proto == proto)
+			vlan_kill_rx_filter_info(vlan_info->real_dev,
+						 vlan_vid_info->proto,
+						 vlan_vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_filter_drop_vids);
+
 static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
 			  struct vlan_vid_info **pvid_info)
 {
 	struct net_device *dev = vlan_info->real_dev;
-	const struct net_device_ops *ops = dev->netdev_ops;
 	struct vlan_vid_info *vid_info;
 	int err;
 
@@ -214,16 +275,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
 	if (!vid_info)
 		return -ENOMEM;
 
-	if (vlan_hw_filter_capable(dev, vid_info)) {
-		if (netif_device_present(dev))
-			err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
-		else
-			err = -ENODEV;
-		if (err) {
-			kfree(vid_info);
-			return err;
-		}
+	err = vlan_add_rx_filter_info(dev, proto, vid);
+	if (err) {
+		kfree(vid_info);
+		return err;
 	}
+
 	list_add(&vid_info->list, &vlan_info->vid_list);
 	vlan_info->nr_vids++;
 	*pvid_info = vid_info;
@@ -270,21 +327,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
 			   struct vlan_vid_info *vid_info)
 {
 	struct net_device *dev = vlan_info->real_dev;
-	const struct net_device_ops *ops = dev->netdev_ops;
 	__be16 proto = vid_info->proto;
 	u16 vid = vid_info->vid;
 	int err;
 
-	if (vlan_hw_filter_capable(dev, vid_info)) {
-		if (netif_device_present(dev))
-			err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
-		else
-			err = -ENODEV;
-		if (err) {
-			pr_warn("failed to kill vid %04x/%d for device %s\n",
-				proto, vid, dev->name);
-		}
-	}
+	err = vlan_kill_rx_filter_info(dev, proto, vid);
+	if (err)
+		pr_warn("failed to kill vid %04x/%d for device %s\n",
+			proto, vid, dev->name);
+
 	list_del(&vid_info->list);
 	kfree(vid_info);
 	vlan_info->nr_vids--;
diff --git a/net/core/dev.c b/net/core/dev.c
index eca5458b2753..1ddb6b9c58a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1584,6 +1584,8 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
 	N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
 	N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
 	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
+	N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
+	N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
 	};
 #undef N
 	return "UNKNOWN_NETDEV_EVENT";
@@ -7665,6 +7667,24 @@ sync_lower:
 			}
 		}
 
+		if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
+			if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+				dev->features = features;
+				err |= vlan_get_rx_ctag_filter_info(dev);
+			} else {
+				vlan_drop_rx_ctag_filter_info(dev);
+			}
+		}
+
+		if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
+			if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
+				dev->features = features;
+				err |= vlan_get_rx_stag_filter_info(dev);
+			} else {
+				vlan_drop_rx_stag_filter_info(dev);
+			}
+		}
+
 		dev->features = features;
 	}
 

From fd41f2bfb71ba161309797de1ae1966810c19703 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Mar 2018 15:15:36 -0700
Subject: [PATCH 1499/1678] net: systemport: Remove adaptive TX coalescing

Adaptive TX coalescing is not currently giving us any advantages and
ends up making the CPU spin more frequently until TX completion. Deny
and disable adaptive TX coalescing for now and rely on static
configuration, we can always add it back later.

Reviewed-by: Tal Gilboa <talgi@mellanox.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 61 +++-------------------
 drivers/net/ethernet/broadcom/bcmsysport.h |  1 -
 2 files changed, 8 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 4e26f606a7f2..1e52bb7d822e 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <linux/net_dim.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
@@ -588,7 +587,8 @@ static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
 	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
 }
 
-static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring)
+static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring,
+					struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = ring->priv;
 	u32 reg;
@@ -596,8 +596,8 @@ static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring)
 	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(ring->index));
 	reg &= ~(RING_INTR_THRESH_MASK |
 		 RING_TIMEOUT_MASK << RING_TIMEOUT_SHIFT);
-	reg |= ring->dim.coal_pkts;
-	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192) <<
+	reg |= ec->tx_max_coalesced_frames;
+	reg |= DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000, 8192) <<
 			    RING_TIMEOUT_SHIFT;
 	tdma_writel(priv, reg, TDMA_DESC_RING_INTR_CONTROL(ring->index));
 }
@@ -606,18 +606,12 @@ static int bcm_sysport_get_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	struct bcm_sysport_tx_ring *ring;
-	unsigned int i;
 	u32 reg;
 
 	reg = tdma_readl(priv, TDMA_DESC_RING_INTR_CONTROL(0));
 
 	ec->tx_coalesce_usecs = (reg >> RING_TIMEOUT_SHIFT) * 8192 / 1000;
 	ec->tx_max_coalesced_frames = reg & RING_INTR_THRESH_MASK;
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		ring = &priv->tx_rings[i];
-		ec->use_adaptive_tx_coalesce |= ring->dim.use_dim;
-	}
 
 	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
 
@@ -632,7 +626,6 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	struct bcm_sysport_tx_ring *ring;
 	unsigned int i;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
@@ -646,20 +639,12 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 		return -EINVAL;
 
 	if ((ec->tx_coalesce_usecs == 0 && ec->tx_max_coalesced_frames == 0) ||
-	    (ec->rx_coalesce_usecs == 0 && ec->rx_max_coalesced_frames == 0))
+	    (ec->rx_coalesce_usecs == 0 && ec->rx_max_coalesced_frames == 0) ||
+	    ec->use_adaptive_tx_coalesce)
 		return -EINVAL;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		ring = &priv->tx_rings[i];
-		ring->dim.coal_pkts = ec->tx_max_coalesced_frames;
-		ring->dim.coal_usecs = ec->tx_coalesce_usecs;
-		if (!ec->use_adaptive_tx_coalesce && ring->dim.use_dim) {
-			ring->dim.coal_pkts = 1;
-			ring->dim.coal_usecs = 0;
-		}
-		ring->dim.use_dim = ec->use_adaptive_tx_coalesce;
-		bcm_sysport_set_tx_coalesce(ring);
-	}
+	for (i = 0; i < dev->num_tx_queues; i++)
+		bcm_sysport_set_tx_coalesce(&priv->tx_rings[i], ec);
 
 	priv->dim.coal_usecs = ec->rx_coalesce_usecs;
 	priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
@@ -940,8 +925,6 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 	ring->packets += pkts_compl;
 	ring->bytes += bytes_compl;
 	u64_stats_update_end(&priv->syncp);
-	ring->dim.packets = pkts_compl;
-	ring->dim.bytes = bytes_compl;
 
 	ring->c_index = c_index;
 
@@ -987,7 +970,6 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 {
 	struct bcm_sysport_tx_ring *ring =
 		container_of(napi, struct bcm_sysport_tx_ring, napi);
-	struct net_dim_sample dim_sample;
 	unsigned int work_done = 0;
 
 	work_done = bcm_sysport_tx_reclaim(ring->priv, ring);
@@ -1004,12 +986,6 @@ static int bcm_sysport_tx_poll(struct napi_struct *napi, int budget)
 		return 0;
 	}
 
-	if (ring->dim.use_dim) {
-		net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
-			       ring->dim.bytes, &dim_sample);
-		net_dim(&ring->dim.dim, dim_sample);
-	}
-
 	return budget;
 }
 
@@ -1089,23 +1065,6 @@ static void bcm_sysport_dim_work(struct work_struct *work)
 	dim->state = NET_DIM_START_MEASURE;
 }
 
-static void bcm_sysport_dim_tx_work(struct work_struct *work)
-{
-	struct net_dim *dim = container_of(work, struct net_dim, work);
-	struct bcm_sysport_net_dim *ndim =
-			container_of(dim, struct bcm_sysport_net_dim, dim);
-	struct bcm_sysport_tx_ring *ring =
-			container_of(ndim, struct bcm_sysport_tx_ring, dim);
-	struct net_dim_cq_moder cur_profile =
-				net_dim_get_profile(dim->mode, dim->profile_ix);
-
-	ring->dim.coal_usecs = cur_profile.usec;
-	ring->dim.coal_pkts = cur_profile.pkts;
-
-	bcm_sysport_set_tx_coalesce(ring);
-	dim->state = NET_DIM_START_MEASURE;
-}
-
 /* RX and misc interrupt routine */
 static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 {
@@ -1152,7 +1111,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id)
 			continue;
 
 		txr = &priv->tx_rings[ring];
-		txr->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&txr->napi))) {
 			intrl2_0_mask_set(priv, ring_bit);
@@ -1185,7 +1143,6 @@ static irqreturn_t bcm_sysport_tx_isr(int irq, void *dev_id)
 			continue;
 
 		txr = &priv->tx_rings[ring];
-		txr->dim.event_ctr++;
 
 		if (likely(napi_schedule_prep(&txr->napi))) {
 			intrl2_1_mask_set(priv, BIT(ring));
@@ -1551,7 +1508,6 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	reg |= (1 << index);
 	tdma_writel(priv, reg, TDMA_TIER1_ARB_0_QUEUE_EN);
 
-	bcm_sysport_init_dim(&ring->dim, bcm_sysport_dim_tx_work);
 	napi_enable(&ring->napi);
 
 	netif_dbg(priv, hw, priv->netdev,
@@ -1582,7 +1538,6 @@ static void bcm_sysport_fini_tx_ring(struct bcm_sysport_priv *priv,
 		return;
 
 	napi_disable(&ring->napi);
-	cancel_work_sync(&ring->dim.dim.work);
 	netif_napi_del(&ring->napi);
 
 	bcm_sysport_tx_clean(priv, ring);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index e1c97d4a82b4..57e18ef8f206 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -723,7 +723,6 @@ struct bcm_sysport_tx_ring {
 	struct bcm_sysport_priv *priv;	/* private context backpointer */
 	unsigned long	packets;	/* packets statistics */
 	unsigned long	bytes;		/* bytes statistics */
-	struct bcm_sysport_net_dim dim;	/* Net DIM context */
 	unsigned int	switch_queue;	/* switch port queue number */
 	unsigned int	switch_port;	/* switch port queue number */
 	bool		inspect;	/* inspect switch port and queue */

From a8cdfbdf885f3d3b4940ea31696a4492afd331f7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Mar 2018 15:15:37 -0700
Subject: [PATCH 1500/1678] net: systemport: Fix coalescing settings handling

There were a number of issues with setting the RX coalescing parameters:

- we would not be preserving values that would have been configured
  across close/open calls, instead we would always reset to no timeout
  and 1 interrupt per packet, this would also prevent DIM from setting its
  default usec/pkts values

- when adaptive RX would be turned on, we woud not be fetching the
  default parameters, we would stay with no timeout/1 packet per
  interrupt until the estimator kicks in and changes that

- finally disabling adaptive RX coalescing while providing parameters
  would not be honored, and we would stay with whatever DIM had
  previously determined instead of the user requested parameters

Fixes: b6e0e875421e ("net: systemport: Implement adaptive interrupt coalescing")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tal Gilboa <talgi@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 62 +++++++++++++++-------
 drivers/net/ethernet/broadcom/bcmsysport.h |  4 +-
 2 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 1e52bb7d822e..4a75b1de22e0 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -574,16 +574,16 @@ static int bcm_sysport_set_wol(struct net_device *dev,
 	return 0;
 }
 
-static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv)
+static void bcm_sysport_set_rx_coalesce(struct bcm_sysport_priv *priv,
+					u32 usecs, u32 pkts)
 {
 	u32 reg;
 
 	reg = rdma_readl(priv, RDMA_MBDONE_INTR);
 	reg &= ~(RDMA_INTR_THRESH_MASK |
 		 RDMA_TIMEOUT_MASK << RDMA_TIMEOUT_SHIFT);
-	reg |= priv->dim.coal_pkts;
-	reg |= DIV_ROUND_UP(priv->dim.coal_usecs * 1000, 8192) <<
-			    RDMA_TIMEOUT_SHIFT;
+	reg |= pkts;
+	reg |= DIV_ROUND_UP(usecs * 1000, 8192) << RDMA_TIMEOUT_SHIFT;
 	rdma_writel(priv, reg, RDMA_MBDONE_INTR);
 }
 
@@ -626,6 +626,8 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 				    struct ethtool_coalesce *ec)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct net_dim_cq_moder moder;
+	u32 usecs, pkts;
 	unsigned int i;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
@@ -646,15 +648,21 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
 	for (i = 0; i < dev->num_tx_queues; i++)
 		bcm_sysport_set_tx_coalesce(&priv->tx_rings[i], ec);
 
-	priv->dim.coal_usecs = ec->rx_coalesce_usecs;
-	priv->dim.coal_pkts = ec->rx_max_coalesced_frames;
+	priv->rx_coalesce_usecs = ec->rx_coalesce_usecs;
+	priv->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
+	usecs = priv->rx_coalesce_usecs;
+	pkts = priv->rx_max_coalesced_frames;
 
-	if (!ec->use_adaptive_rx_coalesce && priv->dim.use_dim) {
-		priv->dim.coal_pkts = 1;
-		priv->dim.coal_usecs = 0;
+	if (ec->use_adaptive_rx_coalesce && !priv->dim.use_dim) {
+		moder = net_dim_get_def_profile(priv->dim.dim.mode);
+		usecs = moder.usec;
+		pkts = moder.pkts;
 	}
+
 	priv->dim.use_dim = ec->use_adaptive_rx_coalesce;
-	bcm_sysport_set_rx_coalesce(priv);
+
+	/* Apply desired coalescing parameters */
+	bcm_sysport_set_rx_coalesce(priv, usecs, pkts);
 
 	return 0;
 }
@@ -1058,10 +1066,7 @@ static void bcm_sysport_dim_work(struct work_struct *work)
 	struct net_dim_cq_moder cur_profile =
 				net_dim_get_profile(dim->mode, dim->profile_ix);
 
-	priv->dim.coal_usecs = cur_profile.usec;
-	priv->dim.coal_pkts = cur_profile.pkts;
-
-	bcm_sysport_set_rx_coalesce(priv);
+	bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
 	dim->state = NET_DIM_START_MEASURE;
 }
 
@@ -1408,9 +1413,11 @@ out:
 		phy_print_status(phydev);
 }
 
-static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
+static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
 				 void (*cb)(struct work_struct *work))
 {
+	struct bcm_sysport_net_dim *dim = &priv->dim;
+
 	INIT_WORK(&dim->dim.work, cb);
 	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	dim->event_ctr = 0;
@@ -1418,6 +1425,25 @@ static void bcm_sysport_init_dim(struct bcm_sysport_net_dim *dim,
 	dim->bytes = 0;
 }
 
+static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
+{
+	struct bcm_sysport_net_dim *dim = &priv->dim;
+	struct net_dim_cq_moder moder;
+	u32 usecs, pkts;
+
+	usecs = priv->rx_coalesce_usecs;
+	pkts = priv->rx_max_coalesced_frames;
+
+	/* If DIM was enabled, re-apply default parameters */
+	if (dim->use_dim) {
+		moder = net_dim_get_def_profile(dim->dim.mode);
+		usecs = moder.usec;
+		pkts = moder.pkts;
+	}
+
+	bcm_sysport_set_rx_coalesce(priv, usecs, pkts);
+}
+
 static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 				    unsigned int index)
 {
@@ -1658,8 +1684,6 @@ static int bcm_sysport_init_rx_ring(struct bcm_sysport_priv *priv)
 	rdma_writel(priv, 0, RDMA_END_ADDR_HI);
 	rdma_writel(priv, priv->num_rx_desc_words - 1, RDMA_END_ADDR_LO);
 
-	rdma_writel(priv, 1, RDMA_MBDONE_INTR);
-
 	netif_dbg(priv, hw, priv->netdev,
 		  "RDMA cfg, num_rx_bds=%d, rx_bds=%p\n",
 		  priv->num_rx_bds, priv->rx_bds);
@@ -1827,7 +1851,8 @@ static void bcm_sysport_netif_start(struct net_device *dev)
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 
 	/* Enable NAPI */
-	bcm_sysport_init_dim(&priv->dim, bcm_sysport_dim_work);
+	bcm_sysport_init_dim(priv, bcm_sysport_dim_work);
+	bcm_sysport_init_rx_coalesce(priv);
 	napi_enable(&priv->napi);
 
 	/* Enable RX interrupt and TX ring full interrupt */
@@ -2333,6 +2358,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	/* libphy will adjust the link state accordingly */
 	netif_carrier_off(dev);
 
+	priv->rx_max_coalesced_frames = 1;
 	u64_stats_init(&priv->syncp);
 
 	priv->dsa_notifier.notifier_call = bcm_sysport_dsa_notifier;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 57e18ef8f206..d6e5d0cbf3a3 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -701,8 +701,6 @@ struct bcm_sysport_net_dim {
 	u16			event_ctr;
 	unsigned long		packets;
 	unsigned long		bytes;
-	u32			coal_usecs;
-	u32			coal_pkts;
 	struct net_dim		dim;
 };
 
@@ -755,6 +753,8 @@ struct bcm_sysport_priv {
 	unsigned int		rx_c_index;
 
 	struct bcm_sysport_net_dim	dim;
+	u32			rx_max_coalesced_frames;
+	u32			rx_coalesce_usecs;
 
 	/* PHY device */
 	struct device_node	*phy_dn;

From 5e6ce1f1a4ca83878f2966dd67afb39b631c4320 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Mar 2018 15:15:38 -0700
Subject: [PATCH 1501/1678] net: bcmgenet: Fix coalescing settings handling

There were a number of issues with setting the RX coalescing parameters:

- we would not be preserving values that would have been configured
  across close/open calls, instead we would always reset to no timeout
  and 1 interrupt per packet, this would also prevent DIM from setting its
  default usec/pkts values

- when adaptive RX would be turned on, we woud not be fetching the
  default parameters, we would stay with no timeout/1 packet per interrupt
  until the estimator kicks in and changes that

- finally disabling adaptive RX coalescing while providing parameters
  would not be honored, and we would stay with whatever DIM had previously
  determined instead of the user requested parameters

Fixes: 9f4ca05827a2 ("net: bcmgenet: Add support for adaptive RX coalescing")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tal Gilboa <talgi@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 92 ++++++++++++-------
 .../net/ethernet/broadcom/genet/bcmgenet.h    |  4 +-
 2 files changed, 61 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 2f4cb5c11e18..264fb37dd341 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -625,26 +625,46 @@ static int bcmgenet_get_coalesce(struct net_device *dev,
 	return 0;
 }
 
-static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring)
+static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring,
+				     u32 usecs, u32 pkts)
 {
 	struct bcmgenet_priv *priv = ring->priv;
 	unsigned int i = ring->index;
 	u32 reg;
 
-	bcmgenet_rdma_ring_writel(priv, i, ring->dim.coal_pkts,
-				  DMA_MBUF_DONE_THRESH);
+	bcmgenet_rdma_ring_writel(priv, i, pkts, DMA_MBUF_DONE_THRESH);
 
 	reg = bcmgenet_rdma_readl(priv, DMA_RING0_TIMEOUT + i);
 	reg &= ~DMA_TIMEOUT_MASK;
-	reg |= DIV_ROUND_UP(ring->dim.coal_usecs * 1000, 8192);
+	reg |= DIV_ROUND_UP(usecs * 1000, 8192);
 	bcmgenet_rdma_writel(priv, reg, DMA_RING0_TIMEOUT + i);
 }
 
+static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
+					  struct ethtool_coalesce *ec)
+{
+	struct net_dim_cq_moder moder;
+	u32 usecs, pkts;
+
+	ring->rx_coalesce_usecs = ec->rx_coalesce_usecs;
+	ring->rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
+	usecs = ring->rx_coalesce_usecs;
+	pkts = ring->rx_max_coalesced_frames;
+
+	if (ec->use_adaptive_rx_coalesce && !ring->dim.use_dim) {
+		moder = net_dim_get_def_profile(ring->dim.dim.mode);
+		usecs = moder.usec;
+		pkts = moder.pkts;
+	}
+
+	ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
+	bcmgenet_set_rx_coalesce(ring, usecs, pkts);
+}
+
 static int bcmgenet_set_coalesce(struct net_device *dev,
 				 struct ethtool_coalesce *ec)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct bcmgenet_rx_ring *ring;
 	unsigned int i;
 
 	/* Base system clock is 125Mhz, DMA timeout is this reference clock
@@ -680,27 +700,9 @@ static int bcmgenet_set_coalesce(struct net_device *dev,
 				  ec->tx_max_coalesced_frames,
 				  DMA_MBUF_DONE_THRESH);
 
-	for (i = 0; i < priv->hw_params->rx_queues; i++) {
-		ring = &priv->rx_rings[i];
-		ring->dim.coal_usecs = ec->rx_coalesce_usecs;
-		ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
-		if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
-			ring->dim.coal_pkts = 1;
-			ring->dim.coal_usecs = 0;
-		}
-		ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
-		bcmgenet_set_rx_coalesce(ring);
-	}
-
-	ring = &priv->rx_rings[DESC_INDEX];
-	ring->dim.coal_usecs = ec->rx_coalesce_usecs;
-	ring->dim.coal_pkts = ec->rx_max_coalesced_frames;
-	if (!ec->use_adaptive_rx_coalesce && ring->dim.use_dim) {
-		ring->dim.coal_pkts = 1;
-		ring->dim.coal_usecs = 0;
-	}
-	ring->dim.use_dim = ec->use_adaptive_rx_coalesce;
-	bcmgenet_set_rx_coalesce(ring);
+	for (i = 0; i < priv->hw_params->rx_queues; i++)
+		bcmgenet_set_ring_rx_coalesce(&priv->rx_rings[i], ec);
+	bcmgenet_set_ring_rx_coalesce(&priv->rx_rings[DESC_INDEX], ec);
 
 	return 0;
 }
@@ -1924,10 +1926,7 @@ static void bcmgenet_dim_work(struct work_struct *work)
 	struct net_dim_cq_moder cur_profile =
 			net_dim_get_profile(dim->mode, dim->profile_ix);
 
-	ring->dim.coal_usecs = cur_profile.usec;
-	ring->dim.coal_pkts = cur_profile.pkts;
-
-	bcmgenet_set_rx_coalesce(ring);
+	bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
 	dim->state = NET_DIM_START_MEASURE;
 }
 
@@ -2079,9 +2078,11 @@ static void init_umac(struct bcmgenet_priv *priv)
 	dev_dbg(kdev, "done init umac\n");
 }
 
-static void bcmgenet_init_dim(struct bcmgenet_net_dim *dim,
+static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
 			      void (*cb)(struct work_struct *work))
 {
+	struct bcmgenet_net_dim *dim = &ring->dim;
+
 	INIT_WORK(&dim->dim.work, cb);
 	dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	dim->event_ctr = 0;
@@ -2089,6 +2090,25 @@ static void bcmgenet_init_dim(struct bcmgenet_net_dim *dim,
 	dim->bytes = 0;
 }
 
+static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
+{
+	struct bcmgenet_net_dim *dim = &ring->dim;
+	struct net_dim_cq_moder moder;
+	u32 usecs, pkts;
+
+	usecs = ring->rx_coalesce_usecs;
+	pkts = ring->rx_max_coalesced_frames;
+
+	/* If DIM was enabled, re-apply default parameters */
+	if (dim->use_dim) {
+		moder = net_dim_get_def_profile(dim->dim.mode);
+		usecs = moder.usec;
+		pkts = moder.pkts;
+	}
+
+	bcmgenet_set_rx_coalesce(ring, usecs, pkts);
+}
+
 /* Initialize a Tx ring along with corresponding hardware registers */
 static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
 				  unsigned int index, unsigned int size,
@@ -2178,7 +2198,8 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
 	if (ret)
 		return ret;
 
-	bcmgenet_init_dim(&ring->dim, bcmgenet_dim_work);
+	bcmgenet_init_dim(ring, bcmgenet_dim_work);
+	bcmgenet_init_rx_coalesce(ring);
 
 	/* Initialize Rx NAPI */
 	netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll,
@@ -2186,7 +2207,6 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv,
 
 	bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_PROD_INDEX);
 	bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_CONS_INDEX);
-	bcmgenet_rdma_ring_writel(priv, index, 1, DMA_MBUF_DONE_THRESH);
 	bcmgenet_rdma_ring_writel(priv, index,
 				  ((size << DMA_RING_SIZE_SHIFT) |
 				   RX_BUF_LENGTH), DMA_RING_BUF_SIZE);
@@ -3424,6 +3444,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	struct net_device *dev;
 	const void *macaddr;
 	struct resource *r;
+	unsigned int i;
 	int err = -EIO;
 	const char *phy_mode_str;
 
@@ -3552,6 +3573,11 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	netif_set_real_num_tx_queues(priv->dev, priv->hw_params->tx_queues + 1);
 	netif_set_real_num_rx_queues(priv->dev, priv->hw_params->rx_queues + 1);
 
+	/* Set default coalescing parameters */
+	for (i = 0; i < priv->hw_params->rx_queues; i++)
+		priv->rx_rings[i].rx_max_coalesced_frames = 1;
+	priv->rx_rings[DESC_INDEX].rx_max_coalesced_frames = 1;
+
 	/* libphy will determine the link state */
 	netif_carrier_off(dev);
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 22c41e0430fb..b773bc07edf7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -578,8 +578,6 @@ struct bcmgenet_net_dim {
 	u16		event_ctr;
 	unsigned long	packets;
 	unsigned long	bytes;
-	u32		coal_usecs;
-	u32		coal_pkts;
 	struct net_dim	dim;
 };
 
@@ -598,6 +596,8 @@ struct bcmgenet_rx_ring {
 	unsigned int	end_ptr;	/* Rx ring end CB ptr */
 	unsigned int	old_discards;
 	struct bcmgenet_net_dim dim;
+	u32		rx_max_coalesced_frames;
+	u32		rx_coalesce_usecs;
 	void (*int_enable)(struct bcmgenet_rx_ring *);
 	void (*int_disable)(struct bcmgenet_rx_ring *);
 	struct bcmgenet_priv *priv;

From 2166dc95717d982d359b616b1acbb3d28f48c494 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Wed, 28 Mar 2018 15:38:19 -0700
Subject: [PATCH 1502/1678] MAINTAINERS: update vmxnet3 driver maintainer

Shrikrishna Khare would no longer maintain the vmxnet3 driver. Taking
over the role of vmxnet3 maintainer.

Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: Shrikrishna Khare <skhare@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9107d9241564..aace4ae4ad30 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14940,7 +14940,7 @@ F:	drivers/input/mouse/vmmouse.c
 F:	drivers/input/mouse/vmmouse.h
 
 VMWARE VMXNET3 ETHERNET DRIVER
-M:	Shrikrishna Khare <skhare@vmware.com>
+M:	Ronak Doshi <doshir@vmware.com>
 M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	netdev@vger.kernel.org
 S:	Maintained

From c6ab3008b6a6ecda22e92f96a1b9cc6b0d0b0a4e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Mar 2018 15:44:15 -0700
Subject: [PATCH 1503/1678] net: phy: phylink: Provide PHY interface to
 mac_link_{up, down}

In preparation for having DSA transition entirely to PHYLINK, we need to pass a
PHY interface type to the mac_link_{up,down} callbacks because we may have to
make decisions on that (e.g: turn on/off RGMII interfaces etc.). We do not pass
an entire phylink_link_state because not all parameters (pause, duplex etc.) are
defined when the link is down, only link and interface are.

Update mvneta accordingly since it currently implements phylink_mac_ops.

Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c |  4 +++-
 drivers/net/phy/phylink.c             |  4 +++-
 include/linux/phylink.h               | 14 +++++++++++---
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index eaa4bb80f1c9..cd09bde55596 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3396,7 +3396,8 @@ static void mvneta_set_eee(struct mvneta_port *pp, bool enable)
 	mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1);
 }
 
-static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode)
+static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
+				 phy_interface_t interface)
 {
 	struct mvneta_port *pp = netdev_priv(ndev);
 	u32 val;
@@ -3415,6 +3416,7 @@ static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode)
 }
 
 static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode,
+			       phy_interface_t interface,
 			       struct phy_device *phy)
 {
 	struct mvneta_port *pp = netdev_priv(ndev);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 51a011a349fe..9b1e4721ea3a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -470,10 +470,12 @@ static void phylink_resolve(struct work_struct *w)
 	if (link_state.link != netif_carrier_ok(ndev)) {
 		if (!link_state.link) {
 			netif_carrier_off(ndev);
-			pl->ops->mac_link_down(ndev, pl->link_an_mode);
+			pl->ops->mac_link_down(ndev, pl->link_an_mode,
+					       pl->phy_state.interface);
 			netdev_info(ndev, "Link is Down\n");
 		} else {
 			pl->ops->mac_link_up(ndev, pl->link_an_mode,
+					     pl->phy_state.interface,
 					     pl->phydev);
 
 			netif_carrier_on(ndev);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index bd137c273d38..e95cc12030fa 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -73,8 +73,10 @@ struct phylink_mac_ops {
 	void (*mac_config)(struct net_device *ndev, unsigned int mode,
 			   const struct phylink_link_state *state);
 	void (*mac_an_restart)(struct net_device *ndev);
-	void (*mac_link_down)(struct net_device *ndev, unsigned int mode);
+	void (*mac_link_down)(struct net_device *ndev, unsigned int mode,
+			      phy_interface_t interface);
 	void (*mac_link_up)(struct net_device *ndev, unsigned int mode,
+			    phy_interface_t interface,
 			    struct phy_device *phy);
 };
 
@@ -161,25 +163,31 @@ void mac_an_restart(struct net_device *ndev);
  * mac_link_down() - take the link down
  * @ndev: a pointer to a &struct net_device for the MAC.
  * @mode: link autonegotiation mode
+ * @interface: link &typedef phy_interface_t mode
  *
  * If @mode is not an in-band negotiation mode (as defined by
  * phylink_autoneg_inband()), force the link down and disable any
- * Energy Efficient Ethernet MAC configuration.
+ * Energy Efficient Ethernet MAC configuration. Interface type
+ * selection must be done in mac_config().
  */
-void mac_link_down(struct net_device *ndev, unsigned int mode);
+void mac_link_down(struct net_device *ndev, unsigned int mode,
+		   phy_interface_t interface);
 
 /**
  * mac_link_up() - allow the link to come up
  * @ndev: a pointer to a &struct net_device for the MAC.
  * @mode: link autonegotiation mode
+ * @interface: link &typedef phy_interface_t mode
  * @phy: any attached phy
  *
  * If @mode is not an in-band negotiation mode (as defined by
  * phylink_autoneg_inband()), allow the link to come up. If @phy
  * is non-%NULL, configure Energy Efficient Ethernet by calling
  * phy_init_eee() and perform appropriate MAC configuration for EEE.
+ * Interface type selection must be done in mac_config().
  */
 void mac_link_up(struct net_device *ndev, unsigned int mode,
+		 phy_interface_t interface,
 		 struct phy_device *phy);
 #endif
 

From e679c9c1dbfdba07b2a979a076cca74b773be8ce Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 28 Mar 2018 15:44:16 -0700
Subject: [PATCH 1504/1678] sfp/phylink: move module EEPROM ethtool access into
 netdev core ethtool

Provide a pointer to the SFP bus in struct net_device, so that the
ethtool module EEPROM methods can access the SFP directly, rather
than needing every user to provide a hook for it.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 18 -----------------
 drivers/net/phy/phylink.c             | 28 ---------------------------
 drivers/net/phy/sfp-bus.c             |  6 ++----
 include/linux/netdevice.h             |  3 +++
 include/linux/phylink.h               |  3 ---
 net/core/ethtool.c                    |  7 +++++++
 6 files changed, 12 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index cd09bde55596..25ced96750bf 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4075,22 +4075,6 @@ static int mvneta_ethtool_set_wol(struct net_device *dev,
 	return ret;
 }
 
-static int mvneta_ethtool_get_module_info(struct net_device *dev,
-					  struct ethtool_modinfo *modinfo)
-{
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	return phylink_ethtool_get_module_info(pp->phylink, modinfo);
-}
-
-static int mvneta_ethtool_get_module_eeprom(struct net_device *dev,
-					    struct ethtool_eeprom *ee, u8 *buf)
-{
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	return phylink_ethtool_get_module_eeprom(pp->phylink, ee, buf);
-}
-
 static int mvneta_ethtool_get_eee(struct net_device *dev,
 				  struct ethtool_eee *eee)
 {
@@ -4165,8 +4149,6 @@ static const struct ethtool_ops mvneta_eth_tool_ops = {
 	.set_link_ksettings = mvneta_ethtool_set_link_ksettings,
 	.get_wol        = mvneta_ethtool_get_wol,
 	.set_wol        = mvneta_ethtool_set_wol,
-	.get_module_info = mvneta_ethtool_get_module_info,
-	.get_module_eeprom = mvneta_ethtool_get_module_eeprom,
 	.get_eee	= mvneta_ethtool_get_eee,
 	.set_eee	= mvneta_ethtool_set_eee,
 };
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9b1e4721ea3a..c582b2d7546c 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1250,34 +1250,6 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
 }
 EXPORT_SYMBOL_GPL(phylink_ethtool_set_pauseparam);
 
-int phylink_ethtool_get_module_info(struct phylink *pl,
-				    struct ethtool_modinfo *modinfo)
-{
-	int ret = -EOPNOTSUPP;
-
-	WARN_ON(!lockdep_rtnl_is_held());
-
-	if (pl->sfp_bus)
-		ret = sfp_get_module_info(pl->sfp_bus, modinfo);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_info);
-
-int phylink_ethtool_get_module_eeprom(struct phylink *pl,
-				      struct ethtool_eeprom *ee, u8 *buf)
-{
-	int ret = -EOPNOTSUPP;
-
-	WARN_ON(!lockdep_rtnl_is_held());
-
-	if (pl->sfp_bus)
-		ret = sfp_get_module_eeprom(pl->sfp_bus, ee, buf);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_eeprom);
-
 /**
  * phylink_ethtool_get_eee_err() - read the energy efficient ethernet error
  *   counter
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 3d4ff5d0d2a6..0381da78d228 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -342,6 +342,7 @@ static int sfp_register_bus(struct sfp_bus *bus)
 	}
 	if (bus->started)
 		bus->socket_ops->start(bus->sfp);
+	bus->netdev->sfp_bus = bus;
 	bus->registered = true;
 	return 0;
 }
@@ -356,6 +357,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus)
 		if (bus->phydev && ops && ops->disconnect_phy)
 			ops->disconnect_phy(bus->upstream);
 	}
+	bus->netdev->sfp_bus = NULL;
 	bus->registered = false;
 }
 
@@ -371,8 +373,6 @@ static void sfp_unregister_bus(struct sfp_bus *bus)
  */
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo)
 {
-	if (!bus->registered)
-		return -ENOIOCTLCMD;
 	return bus->socket_ops->module_info(bus->sfp, modinfo);
 }
 EXPORT_SYMBOL_GPL(sfp_get_module_info);
@@ -391,8 +391,6 @@ EXPORT_SYMBOL_GPL(sfp_get_module_info);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
 			  u8 *data)
 {
-	if (!bus->registered)
-		return -ENOIOCTLCMD;
 	return bus->socket_ops->module_eeprom(bus->sfp, ee, data);
 }
 EXPORT_SYMBOL_GPL(sfp_get_module_eeprom);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index da44dab492e3..cf44503ea81a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -58,6 +58,7 @@ struct device;
 struct phy_device;
 struct dsa_port;
 
+struct sfp_bus;
 /* 802.11 specific */
 struct wireless_dev;
 /* 802.15.4 specific */
@@ -1662,6 +1663,7 @@ enum netdev_priv_flags {
  *	@priomap:	XXX: need comments on this one
  *	@phydev:	Physical device may attach itself
  *			for hardware timestamping
+ *	@sfp_bus:	attached &struct sfp_bus structure.
  *
  *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
  *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
@@ -1945,6 +1947,7 @@ struct net_device {
 	struct netprio_map __rcu *priomap;
 #endif
 	struct phy_device	*phydev;
+	struct sfp_bus		*sfp_bus;
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index e95cc12030fa..50eeae025f1e 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -219,9 +219,6 @@ void phylink_ethtool_get_pauseparam(struct phylink *,
 				    struct ethtool_pauseparam *);
 int phylink_ethtool_set_pauseparam(struct phylink *,
 				   struct ethtool_pauseparam *);
-int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *);
-int phylink_ethtool_get_module_eeprom(struct phylink *,
-				      struct ethtool_eeprom *, u8 *);
 int phylink_get_eee_err(struct phylink *);
 int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
 int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index bb6e498c6e3d..eb55252ca1fb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -22,6 +22,7 @@
 #include <linux/bitops.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/sfp.h>
 #include <linux/slab.h>
 #include <linux/rtnetlink.h>
 #include <linux/sched/signal.h>
@@ -2245,6 +2246,9 @@ static int __ethtool_get_module_info(struct net_device *dev,
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	struct phy_device *phydev = dev->phydev;
 
+	if (dev->sfp_bus)
+		return sfp_get_module_info(dev->sfp_bus, modinfo);
+
 	if (phydev && phydev->drv && phydev->drv->module_info)
 		return phydev->drv->module_info(phydev, modinfo);
 
@@ -2279,6 +2283,9 @@ static int __ethtool_get_module_eeprom(struct net_device *dev,
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	struct phy_device *phydev = dev->phydev;
 
+	if (dev->sfp_bus)
+		return sfp_get_module_eeprom(dev->sfp_bus, ee, data);
+
 	if (phydev && phydev->drv && phydev->drv->module_eeprom)
 		return phydev->drv->module_eeprom(phydev, ee, data);
 

From 167cebeffadd45ce1e786889ab9346c15d64389b Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Wed, 28 Mar 2018 18:50:06 -0700
Subject: [PATCH 1505/1678] nfp: modify app MTU setting callbacks

Rename the 'change_mtu' app callback to 'check_mtu'. This is called
whenever an MTU change is requested on a netdev. It can reject the
change but is not responsible for implementing it.

Introduce a new 'repr_change_mtu' app callback that is hit when the MTU
of a repr is to be changed. This is responsible for performing the MTU
change and verifying it.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c |  4 +--
 drivers/net/ethernet/netronome/nfp/nfp_app.h  | 25 +++++++++++++------
 .../ethernet/netronome/nfp/nfp_net_common.c   |  2 +-
 .../net/ethernet/netronome/nfp/nfp_net_repr.c | 13 +++++++++-
 4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 34e98aa6b956..752c45763ed9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -221,7 +221,7 @@ static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
 }
 
 static int
-nfp_bpf_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
+nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 	unsigned int max_mtu;
@@ -413,7 +413,7 @@ const struct nfp_app_type app_bpf = {
 	.init		= nfp_bpf_init,
 	.clean		= nfp_bpf_clean,
 
-	.change_mtu	= nfp_bpf_change_mtu,
+	.check_mtu	= nfp_bpf_check_mtu,
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 20546ae67909..2d9cb2528fc7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -86,8 +86,8 @@ extern const struct nfp_app_type app_flower;
  * @repr_clean:	representor about to be unregistered
  * @repr_open:	representor netdev open callback
  * @repr_stop:	representor netdev stop callback
- * @change_mtu:	MTU change on a netdev has been requested (veto-only, change
- *		is not guaranteed to be committed)
+ * @check_mtu:	MTU change request on a netdev (verify it is valid)
+ * @repr_change_mtu:	MTU change request on repr (make and verify change)
  * @start:	start application logic
  * @stop:	stop application logic
  * @ctrl_msg_rx:    control message handler
@@ -124,8 +124,10 @@ struct nfp_app_type {
 	int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr);
 	int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr);
 
-	int (*change_mtu)(struct nfp_app *app, struct net_device *netdev,
-			  int new_mtu);
+	int (*check_mtu)(struct nfp_app *app, struct net_device *netdev,
+			 int new_mtu);
+	int (*repr_change_mtu)(struct nfp_app *app, struct net_device *netdev,
+			       int new_mtu);
 
 	int (*start)(struct nfp_app *app);
 	void (*stop)(struct nfp_app *app);
@@ -247,11 +249,20 @@ nfp_app_repr_clean(struct nfp_app *app, struct net_device *netdev)
 }
 
 static inline int
-nfp_app_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
+nfp_app_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu)
 {
-	if (!app || !app->type->change_mtu)
+	if (!app || !app->type->check_mtu)
 		return 0;
-	return app->type->change_mtu(app, netdev, new_mtu);
+	return app->type->check_mtu(app, netdev, new_mtu);
+}
+
+static inline int
+nfp_app_repr_change_mtu(struct nfp_app *app, struct net_device *netdev,
+			int new_mtu)
+{
+	if (!app || !app->type->repr_change_mtu)
+		return 0;
+	return app->type->repr_change_mtu(app, netdev, new_mtu);
 }
 
 static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index a05be0ab2713..43a9c207a049 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3066,7 +3066,7 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
 	struct nfp_net_dp *dp;
 	int err;
 
-	err = nfp_app_change_mtu(nn->app, netdev, new_mtu);
+	err = nfp_app_check_mtu(nn->app, netdev, new_mtu);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 619570524d2a..0cd077addb26 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -196,8 +196,19 @@ nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
 static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	int err;
 
-	return nfp_app_change_mtu(repr->app, netdev, new_mtu);
+	err = nfp_app_check_mtu(repr->app, netdev, new_mtu);
+	if (err)
+		return err;
+
+	err = nfp_app_repr_change_mtu(repr->app, netdev, new_mtu);
+	if (err)
+		return err;
+
+	netdev->mtu = new_mtu;
+
+	return 0;
 }
 
 static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)

From 29a5dcae2790ba7fb26ea7128cbe61ecf906ab0a Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Wed, 28 Mar 2018 18:50:07 -0700
Subject: [PATCH 1506/1678] nfp: flower: offload phys port MTU change

Trigger a port mod message to request an MTU change on the NIC when any
physical port representor is assigned a new MTU value. The driver waits
10 msec for an ack that the FW has set the MTU. If no ack is received the
request is rejected and an appropriate warning flagged.

Rather than maintain an MTU queue per repr, one is maintained per app.
Because the MTU ndo is protected by the rtnl lock, there can never be
contention here. Portmod messages from the NIC are also protected by
rtnl so we first check if the portmod is an ack and, if so, handle outside
rtnl and the cmsg work queue.

Acks are detected by the marking of a bit in a portmod response. They are
then verfied by checking the port number and MTU value expected by the
app. If the expected MTU is 0 then no acks are currently expected.

Also, ensure that the packet headroom reserved by the flower firmware is
considered when accepting an MTU change on any repr.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/cmsg.c  | 41 ++++++++-
 .../net/ethernet/netronome/nfp/flower/cmsg.h  |  4 +-
 .../net/ethernet/netronome/nfp/flower/main.c  | 87 ++++++++++++++++++-
 .../net/ethernet/netronome/nfp/flower/main.h  | 19 ++++
 4 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index baaea6f1a9d8..3735c09d2112 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -104,7 +104,8 @@ nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx,
 	msg->ports[idx].phys_port = phys_port;
 }
 
-int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok,
+			    unsigned int mtu, bool mtu_only)
 {
 	struct nfp_flower_cmsg_portmod *msg;
 	struct sk_buff *skb;
@@ -118,7 +119,11 @@ int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
 	msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id);
 	msg->reserved = 0;
 	msg->info = carrier_ok;
-	msg->mtu = cpu_to_be16(repr->netdev->mtu);
+
+	if (mtu_only)
+		msg->info |= NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY;
+
+	msg->mtu = cpu_to_be16(mtu);
 
 	nfp_ctrl_tx(repr->app->ctrl, skb);
 
@@ -146,6 +151,34 @@ int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists)
 	return 0;
 }
 
+static bool
+nfp_flower_process_mtu_ack(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+	struct nfp_flower_cmsg_portmod *msg;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+
+	if (!(msg->info & NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY))
+		return false;
+
+	spin_lock_bh(&app_priv->mtu_conf.lock);
+	if (!app_priv->mtu_conf.requested_val ||
+	    app_priv->mtu_conf.portnum != be32_to_cpu(msg->portnum) ||
+	    be16_to_cpu(msg->mtu) != app_priv->mtu_conf.requested_val) {
+		/* Not an ack for requested MTU change. */
+		spin_unlock_bh(&app_priv->mtu_conf.lock);
+		return false;
+	}
+
+	app_priv->mtu_conf.ack = true;
+	app_priv->mtu_conf.requested_val = 0;
+	wake_up(&app_priv->mtu_conf.wait_q);
+	spin_unlock_bh(&app_priv->mtu_conf.lock);
+
+	return true;
+}
+
 static void
 nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb)
 {
@@ -269,6 +302,10 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
 		/* We need to deal with stats updates from HW asap */
 		nfp_flower_rx_flow_stats(app, skb);
 		dev_consume_skb_any(skb);
+	} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_MOD &&
+		   nfp_flower_process_mtu_ack(app, skb)) {
+		/* Handle MTU acks outside wq to prevent RTNL conflict. */
+		dev_consume_skb_any(skb);
 	} else {
 		skb_queue_tail(&priv->cmsg_skbs, skb);
 		schedule_work(&priv->cmsg_work);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 3f46d836d1b8..96bc0e33980c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -397,6 +397,7 @@ struct nfp_flower_cmsg_portmod {
 };
 
 #define NFP_FLOWER_CMSG_PORTMOD_INFO_LINK	BIT(0)
+#define NFP_FLOWER_CMSG_PORTMOD_MTU_CHANGE_ONLY	BIT(1)
 
 /* NFP_FLOWER_CMSG_TYPE_PORT_REIFY */
 struct nfp_flower_cmsg_portreify {
@@ -464,7 +465,8 @@ void
 nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx,
 			     unsigned int nbi, unsigned int nbi_port,
 			     unsigned int phys_port);
-int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok);
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok,
+			    unsigned int mtu, bool mtu_only);
 int nfp_flower_cmsg_portreify(struct nfp_repr *repr, bool exists);
 void nfp_flower_cmsg_process_rx(struct work_struct *work);
 void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 742d6f1575b5..6357e0720f43 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -52,6 +52,8 @@
 
 #define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
 
+#define NFP_FLOWER_FRAME_HEADROOM	158
+
 static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 {
 	return "FLOWER";
@@ -157,7 +159,7 @@ nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr)
 {
 	int err;
 
-	err = nfp_flower_cmsg_portmod(repr, true);
+	err = nfp_flower_cmsg_portmod(repr, true, repr->netdev->mtu, false);
 	if (err)
 		return err;
 
@@ -171,7 +173,7 @@ nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr)
 {
 	netif_tx_disable(repr->netdev);
 
-	return nfp_flower_cmsg_portmod(repr, false);
+	return nfp_flower_cmsg_portmod(repr, false, repr->netdev->mtu, false);
 }
 
 static int
@@ -521,6 +523,9 @@ static int nfp_flower_init(struct nfp_app *app)
 	INIT_WORK(&app_priv->cmsg_work, nfp_flower_cmsg_process_rx);
 	init_waitqueue_head(&app_priv->reify_wait_queue);
 
+	init_waitqueue_head(&app_priv->mtu_conf.wait_q);
+	spin_lock_init(&app_priv->mtu_conf.lock);
+
 	err = nfp_flower_metadata_init(app);
 	if (err)
 		goto err_free_app_priv;
@@ -552,6 +557,81 @@ static void nfp_flower_clean(struct nfp_app *app)
 	app->priv = NULL;
 }
 
+static int
+nfp_flower_check_mtu(struct nfp_app *app, struct net_device *netdev,
+		     int new_mtu)
+{
+	/* The flower fw reserves NFP_FLOWER_FRAME_HEADROOM bytes of the
+	 * supported max MTU to allow for appending tunnel headers. To prevent
+	 * unexpected behaviour this needs to be accounted for.
+	 */
+	if (new_mtu > netdev->max_mtu - NFP_FLOWER_FRAME_HEADROOM) {
+		nfp_err(app->cpp, "New MTU (%d) is not valid\n", new_mtu);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv)
+{
+	bool ret;
+
+	spin_lock_bh(&app_priv->mtu_conf.lock);
+	ret = app_priv->mtu_conf.ack;
+	spin_unlock_bh(&app_priv->mtu_conf.lock);
+
+	return ret;
+}
+
+static int
+nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev,
+			   int new_mtu)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+	struct nfp_repr *repr = netdev_priv(netdev);
+	int err, ack;
+
+	/* Only need to config FW for physical port MTU change. */
+	if (repr->port->type != NFP_PORT_PHYS_PORT)
+		return 0;
+
+	if (!(app_priv->flower_ext_feats & NFP_FL_NBI_MTU_SETTING)) {
+		nfp_err(app->cpp, "Physical port MTU setting not supported\n");
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&app_priv->mtu_conf.lock);
+	app_priv->mtu_conf.ack = false;
+	app_priv->mtu_conf.requested_val = new_mtu;
+	app_priv->mtu_conf.portnum = repr->dst->u.port_info.port_id;
+	spin_unlock_bh(&app_priv->mtu_conf.lock);
+
+	err = nfp_flower_cmsg_portmod(repr, netif_carrier_ok(netdev), new_mtu,
+				      true);
+	if (err) {
+		spin_lock_bh(&app_priv->mtu_conf.lock);
+		app_priv->mtu_conf.requested_val = 0;
+		spin_unlock_bh(&app_priv->mtu_conf.lock);
+		return err;
+	}
+
+	/* Wait for fw to ack the change. */
+	ack = wait_event_timeout(app_priv->mtu_conf.wait_q,
+				 nfp_flower_check_ack(app_priv),
+				 msecs_to_jiffies(10));
+
+	if (!ack) {
+		spin_lock_bh(&app_priv->mtu_conf.lock);
+		app_priv->mtu_conf.requested_val = 0;
+		spin_unlock_bh(&app_priv->mtu_conf.lock);
+		nfp_warn(app->cpp, "MTU change not verified with fw\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static int nfp_flower_start(struct nfp_app *app)
 {
 	return nfp_tunnel_config_start(app);
@@ -574,6 +654,9 @@ const struct nfp_app_type app_flower = {
 	.init		= nfp_flower_init,
 	.clean		= nfp_flower_clean,
 
+	.check_mtu	= nfp_flower_check_mtu,
+	.repr_change_mtu  = nfp_flower_repr_change_mtu,
+
 	.vnic_alloc	= nfp_flower_vnic_alloc,
 	.vnic_init	= nfp_flower_vnic_init,
 	.vnic_clean	= nfp_flower_vnic_clean,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c5cebf6fb1d3..e030b3ce4510 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -65,6 +65,7 @@ struct nfp_app;
 
 /* Extra features bitmap. */
 #define NFP_FL_FEATS_GENEVE		BIT(0)
+#define NFP_FL_NBI_MTU_SETTING		BIT(1)
 
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
@@ -78,6 +79,22 @@ struct nfp_fl_stats_id {
 	u8 repeated_em_count;
 };
 
+/**
+ * struct nfp_mtu_conf - manage MTU setting
+ * @portnum:		NFP port number of repr with requested MTU change
+ * @requested_val:	MTU value requested for repr
+ * @ack:		Received ack that MTU has been correctly set
+ * @wait_q:		Wait queue for MTU acknowledgements
+ * @lock:		Lock for setting/reading MTU variables
+ */
+struct nfp_mtu_conf {
+	u32 portnum;
+	unsigned int requested_val;
+	bool ack;
+	wait_queue_head_t wait_q;
+	spinlock_t lock;
+};
+
 /**
  * struct nfp_flower_priv - Flower APP per-vNIC priv data
  * @app:		Back pointer to app
@@ -106,6 +123,7 @@ struct nfp_fl_stats_id {
  * @reify_replies:	atomically stores the number of replies received
  *			from firmware for repr reify
  * @reify_wait_queue:	wait queue for repr reify response counting
+ * @mtu_conf:		Configuration of repr MTU value
  */
 struct nfp_flower_priv {
 	struct nfp_app *app;
@@ -133,6 +151,7 @@ struct nfp_flower_priv {
 	struct notifier_block nfp_tun_neigh_nb;
 	atomic_t reify_replies;
 	wait_queue_head_t reify_wait_queue;
+	struct nfp_mtu_conf mtu_conf;
 };
 
 struct nfp_fl_key_ls {

From 9217e566bdee4583d0a9ea4879c8f5e004886eac Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Thu, 29 Mar 2018 07:29:48 +0200
Subject: [PATCH 1507/1678] of_net: Implement of_get_nvmem_mac_address helper

It's common practice to store MAC addresses for network interfaces into
nvmem devices. However the code to actually do this in the kernel lacks,
so this patch adds of_get_nvmem_mac_address() for drivers to obtain the
address from an nvmem cell provider.

This is particulary useful on devices where the ethernet interface cannot
be configured by the bootloader, for example because it's in an FPGA.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ethernet.txt      |  2 +
 drivers/of/of_net.c                           | 40 +++++++++++++++++++
 include/linux/of_net.h                        |  6 +++
 3 files changed, 48 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 2974e63ba311..cfc376bc977a 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,6 +10,8 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
   the boot program; should be used in cases where the MAC address assigned to
   the device by the boot program is different from the "local-mac-address"
   property;
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
 - max-speed: number, specifies maximum speed in Mbit/s supported by the device;
 - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
   the maximum frame size (there's contradiction in the Devicetree
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index d820f3edd431..53189d4022a6 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -7,6 +7,7 @@
  */
 #include <linux/etherdevice.h>
 #include <linux/kernel.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/of_net.h>
 #include <linux/phy.h>
 #include <linux/export.h>
@@ -80,3 +81,42 @@ const void *of_get_mac_address(struct device_node *np)
 	return of_get_mac_addr(np, "address");
 }
 EXPORT_SYMBOL(of_get_mac_address);
+
+/**
+ * Obtain the MAC address from an nvmem provider named 'mac-address' through
+ * device tree.
+ * On success, copies the new address into memory pointed to by addr and
+ * returns 0. Returns a negative error code otherwise.
+ * @np:		Device tree node containing the nvmem-cells phandle
+ * @addr:	Pointer to receive the MAC address using ether_addr_copy()
+ */
+int of_get_nvmem_mac_address(struct device_node *np, void *addr)
+{
+	struct nvmem_cell *cell;
+	const void *mac;
+	size_t len;
+	int ret;
+
+	cell = of_nvmem_cell_get(np, "mac-address");
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	mac = nvmem_cell_read(cell, &len);
+
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(mac))
+		return PTR_ERR(mac);
+
+	if (len < ETH_ALEN || !is_valid_ether_addr(mac)) {
+		ret = -EINVAL;
+	} else {
+		ether_addr_copy(addr, mac);
+		ret = 0;
+	}
+
+	kfree(mac);
+
+	return ret;
+}
+EXPORT_SYMBOL(of_get_nvmem_mac_address);
diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 9cd72aab76fe..90d81ee9e6a0 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -13,6 +13,7 @@
 struct net_device;
 extern int of_get_phy_mode(struct device_node *np);
 extern const void *of_get_mac_address(struct device_node *np);
+extern int of_get_nvmem_mac_address(struct device_node *np, void *addr);
 extern struct net_device *of_find_net_device_by_node(struct device_node *np);
 #else
 static inline int of_get_phy_mode(struct device_node *np)
@@ -25,6 +26,11 @@ static inline const void *of_get_mac_address(struct device_node *np)
 	return NULL;
 }
 
+static inline int of_get_nvmem_mac_address(struct device_node *np, void *addr)
+{
+	return -ENODEV;
+}
+
 static inline struct net_device *of_find_net_device_by_node(struct device_node *np)
 {
 	return NULL;

From aa076e3d220a7d2e71ef0f5213bdbbcc7e351b86 Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Thu, 29 Mar 2018 07:29:49 +0200
Subject: [PATCH 1508/1678] net: macb: Try to retrieve MAC addess from nvmem
 provider

Call of_get_nvmem_mac_address() to fetch the MAC address from an nvmem
cell, if one is provided in the device tree. This allows the address to
be stored in an I2C EEPROM device for example.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d09bd43680b3..b4c9268100bb 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3952,10 +3952,16 @@ static int macb_probe(struct platform_device *pdev)
 		dev->max_mtu = ETH_DATA_LEN;
 
 	mac = of_get_mac_address(np);
-	if (mac)
+	if (mac) {
 		ether_addr_copy(bp->dev->dev_addr, mac);
-	else
-		macb_get_hwaddr(bp);
+	} else {
+		err = of_get_nvmem_mac_address(np, bp->dev->dev_addr);
+		if (err) {
+			if (err == -EPROBE_DEFER)
+				goto err_out_free_netdev;
+			macb_get_hwaddr(bp);
+		}
+	}
 
 	err = of_get_phy_mode(np);
 	if (err < 0) {

From e9a441b6e729e16092fcc18e3962b952a01d1e3c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 17:03:25 +0300
Subject: [PATCH 1509/1678] xfrm: Register xfrm_dev_notifier in appropriate
 place

Currently, driver registers it from pernet_operations::init method,
and this breaks modularity, because initialization of net namespace
and netdevice notifiers are orthogonal actions. We don't have
per-namespace netdevice notifiers; all of them are global for all
devices in all namespaces.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     | 2 +-
 net/xfrm/xfrm_device.c | 2 +-
 net/xfrm/xfrm_policy.c | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index aa027ba1d032..a872379b69da 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1894,7 +1894,7 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
 #endif
 }
 
-void __net_init xfrm_dev_init(void);
+void __init xfrm_dev_init(void);
 
 #ifdef CONFIG_XFRM_OFFLOAD
 void xfrm_dev_resume(struct sk_buff *skb);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index e87d6c4dd5b6..175941e15a6e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -350,7 +350,7 @@ static struct notifier_block xfrm_dev_notifier = {
 	.notifier_call	= xfrm_dev_event,
 };
 
-void __net_init xfrm_dev_init(void)
+void __init xfrm_dev_init(void)
 {
 	register_netdevice_notifier(&xfrm_dev_notifier);
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0e065db6c7c0..40b54cc64243 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2895,8 +2895,6 @@ static int __net_init xfrm_policy_init(struct net *net)
 	INIT_LIST_HEAD(&net->xfrm.policy_all);
 	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
 	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
-	if (net_eq(net, &init_net))
-		xfrm_dev_init();
 	return 0;
 
 out_bydst:
@@ -2999,6 +2997,7 @@ void __init xfrm_init(void)
 		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
 
 	register_pernet_subsys(&xfrm_net_ops);
+	xfrm_dev_init();
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
 }

From 9e2f6c5d78db6647eb9d7bfeb20b9e0f9ff2c56c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 17:03:35 +0300
Subject: [PATCH 1510/1678] netfilter: Rework xt_TEE netdevice notifier

Register netdevice notifier for every iptable entry
is not good, since this breaks modularity, and
the hidden synchronization is based on rtnl_lock().

This patch reworks the synchronization via new lock,
while the rest of logic remains as it was before.
This is required for the next patch.

Tested via:

while :; do
	unshare -n iptables -t mangle -A OUTPUT -j TEE --gateway 1.1.1.2 --oif lo;
done

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_TEE.c | 73 ++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 27 deletions(-)

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 86b0580b2216..475957cfcf50 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -20,7 +20,7 @@
 #include <linux/netfilter/xt_TEE.h>
 
 struct xt_tee_priv {
-	struct notifier_block	notifier;
+	struct list_head	list;
 	struct xt_tee_tginfo	*tginfo;
 	int			oif;
 };
@@ -51,29 +51,35 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 }
 #endif
 
+static DEFINE_MUTEX(priv_list_mutex);
+static LIST_HEAD(priv_list);
+
 static int tee_netdev_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct xt_tee_priv *priv;
 
-	priv = container_of(this, struct xt_tee_priv, notifier);
-	switch (event) {
-	case NETDEV_REGISTER:
-		if (!strcmp(dev->name, priv->tginfo->oif))
-			priv->oif = dev->ifindex;
-		break;
-	case NETDEV_UNREGISTER:
-		if (dev->ifindex == priv->oif)
-			priv->oif = -1;
-		break;
-	case NETDEV_CHANGENAME:
-		if (!strcmp(dev->name, priv->tginfo->oif))
-			priv->oif = dev->ifindex;
-		else if (dev->ifindex == priv->oif)
-			priv->oif = -1;
-		break;
+	mutex_lock(&priv_list_mutex);
+	list_for_each_entry(priv, &priv_list, list) {
+		switch (event) {
+		case NETDEV_REGISTER:
+			if (!strcmp(dev->name, priv->tginfo->oif))
+				priv->oif = dev->ifindex;
+			break;
+		case NETDEV_UNREGISTER:
+			if (dev->ifindex == priv->oif)
+				priv->oif = -1;
+			break;
+		case NETDEV_CHANGENAME:
+			if (!strcmp(dev->name, priv->tginfo->oif))
+				priv->oif = dev->ifindex;
+			else if (dev->ifindex == priv->oif)
+				priv->oif = -1;
+			break;
+		}
 	}
+	mutex_unlock(&priv_list_mutex);
 
 	return NOTIFY_DONE;
 }
@@ -89,8 +95,6 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 
 	if (info->oif[0]) {
-		int ret;
-
 		if (info->oif[sizeof(info->oif)-1] != '\0')
 			return -EINVAL;
 
@@ -100,14 +104,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
 
 		priv->tginfo  = info;
 		priv->oif     = -1;
-		priv->notifier.notifier_call = tee_netdev_event;
 		info->priv    = priv;
 
-		ret = register_netdevice_notifier(&priv->notifier);
-		if (ret) {
-			kfree(priv);
-			return ret;
-		}
+		mutex_lock(&priv_list_mutex);
+		list_add(&priv->list, &priv_list);
+		mutex_unlock(&priv_list_mutex);
 	} else
 		info->priv = NULL;
 
@@ -120,7 +121,9 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
 	struct xt_tee_tginfo *info = par->targinfo;
 
 	if (info->priv) {
-		unregister_netdevice_notifier(&info->priv->notifier);
+		mutex_lock(&priv_list_mutex);
+		list_del(&info->priv->list);
+		mutex_unlock(&priv_list_mutex);
 		kfree(info->priv);
 	}
 	static_key_slow_dec(&xt_tee_enabled);
@@ -153,13 +156,29 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 #endif
 };
 
+static struct notifier_block tee_netdev_notifier = {
+	.notifier_call = tee_netdev_event,
+};
+
 static int __init tee_tg_init(void)
 {
-	return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+	int ret;
+
+	ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+	if (ret)
+		return ret;
+	ret = register_netdevice_notifier(&tee_netdev_notifier);
+	if (ret) {
+		xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+		return ret;
+	}
+
+	return 0;
 }
 
 static void __exit tee_tg_exit(void)
 {
+	unregister_netdevice_notifier(&tee_netdev_notifier);
 	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
 }
 

From 328fbe747ad4622f0dfa98d2e19e836f03f80c04 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Thu, 29 Mar 2018 17:03:45 +0300
Subject: [PATCH 1511/1678] net: Close race between {un,
 }register_netdevice_notifier() and setup_net()/cleanup_net()

{un,}register_netdevice_notifier() iterate over all net namespaces
hashed to net_namespace_list. But pernet_operations register and
unregister netdevices in unhashed net namespace, and they are not
seen for netdevice notifiers. This results in asymmetry:

1)Race with register_netdevice_notifier()
  pernet_operations::init(net)	...
   register_netdevice()		...
    call_netdevice_notifiers()  ...
      ... nb is not called ...
  ...				register_netdevice_notifier(nb) -> net skipped
  ...				...
  list_add_tail(&net->list, ..) ...

  Then, userspace stops using net, and it's destructed:

  pernet_operations::exit(net)
   unregister_netdevice()
    call_netdevice_notifiers()
      ... nb is called ...

This always happens with net::loopback_dev, but it may be not the only device.

2)Race with unregister_netdevice_notifier()
  pernet_operations::init(net)
   register_netdevice()
    call_netdevice_notifiers()
      ... nb is called ...

  Then, userspace stops using net, and it's destructed:

  list_del_rcu(&net->list)	...
  pernet_operations::exit(net)  unregister_netdevice_notifier(nb) -> net skipped
   dev_change_net_namespace()	...
    call_netdevice_notifiers()
      ... nb is not called ...
   unregister_netdevice()
    call_netdevice_notifiers()
      ... nb is not called ...

This race is more danger, since dev_change_net_namespace() moves real
network devices, which use not trivial netdevice notifiers, and if this
will happen, the system will be left in unpredictable state.

The patch closes the race. During the testing I found two places,
where register_netdevice_notifier() is called from pernet init/exit
methods (which led to deadlock) and fixed them (see previous patches).

The review moved me to one more unusual registration place:
raw_init() (can driver). It may be a reason of problems,
if someone creates in-kernel CAN_RAW sockets, since they
will be destroyed in exit method and raw_release()
will call unregister_netdevice_notifier(). But grep over
kernel tree does not show, someone creates such sockets
from kernel space.

Theoretically, there can be more places like this, and which are
hidden from review, but we found them on the first bumping there
(since there is no a race, it will be 100% reproducible).

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 1ddb6b9c58a8..07da7add4845 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1625,6 +1625,8 @@ int register_netdevice_notifier(struct notifier_block *nb)
 	struct net *net;
 	int err;
 
+	/* Close race with setup_net() and cleanup_net() */
+	down_write(&pernet_ops_rwsem);
 	rtnl_lock();
 	err = raw_notifier_chain_register(&netdev_chain, nb);
 	if (err)
@@ -1649,6 +1651,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 
 unlock:
 	rtnl_unlock();
+	up_write(&pernet_ops_rwsem);
 	return err;
 
 rollback:
@@ -1694,6 +1697,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 	struct net *net;
 	int err;
 
+	/* Close race with setup_net() and cleanup_net() */
+	down_write(&pernet_ops_rwsem);
 	rtnl_lock();
 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
 	if (err)
@@ -1713,6 +1718,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 	up_read(&net_rwsem);
 unlock:
 	rtnl_unlock();
+	up_write(&pernet_ops_rwsem);
 	return err;
 }
 EXPORT_SYMBOL(unregister_netdevice_notifier);

From 428604fb118facce1309670779a35baf27ad044c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 29 Mar 2018 11:02:24 +0200
Subject: [PATCH 1512/1678] ipv6: do not set routes if disable_ipv6 has been
 enabled

Do not allow setting ipv6 routes from userspace if disable_ipv6 has been
enabled. The issue can be triggered using the following reproducer:

- sysctl net.ipv6.conf.all.disable_ipv6=1
- ip -6 route add a:b:c:d::/64 dev em1
- ip -6 route show
  a:b:c:d::/64 dev em1 metric 1024 pref medium

Fix it checking disable_ipv6 value in ip6_route_info_create routine

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ba8d5df50ebe..e461ef1158b6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2917,6 +2917,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (!dev)
 		goto out;
 
+	if (idev->cnf.disable_ipv6) {
+		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
+		err = -EACCES;
+		goto out;
+	}
+
 	if (!(dev->flags & IFF_UP)) {
 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
 		err = -ENETDOWN;

From 2f0aaf7fb11c90645bbda447c1d26f5b0b04e984 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 29 Mar 2018 11:02:25 +0200
Subject: [PATCH 1513/1678] Documentation: ip-sysctl.txt: clarify disable_ipv6

Clarify that when disable_ipv6 is enabled even the ipv6 routes
are deleted for the selected interface and from now it will not
be possible to add addresses/routes to that interface

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 1d1120753ae8..33f35f049ad5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1703,7 +1703,9 @@ disable_ipv6 - BOOLEAN
 	interface and start Duplicate Address Detection, if necessary.
 
 	When this value is changed from 0 to 1 (IPv6 is being disabled),
-	it will dynamically delete all address on the given interface.
+	it will dynamically delete all addresses and routes on the given
+	interface. From now on it will not possible to add addresses/routes
+	to the selected interface.
 
 accept_dad - INTEGER
 	Whether to accept DAD (Duplicate Address Detection).

From 02281a3525c9f1c07d792d64b079a2d677ed0ea5 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 29 Mar 2018 17:29:40 +0800
Subject: [PATCH 1514/1678] net: mvneta: remove duplicate *_coal assignment

The style of the rx/tx queue's *_coal member assignment is:

static void foo_coal_set(...)
{
	set the coal in hw;
	update queue's foo_coal member; [1]
}

In other place, we call foo_coal_set(pp, queue->foo_coal), so the above [1]
is duplicated and could be removed.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 25ced96750bf..a58acdb5eba3 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1555,7 +1555,6 @@ static void mvneta_rx_pkts_coal_set(struct mvneta_port *pp,
 {
 	mvreg_write(pp, MVNETA_RXQ_THRESHOLD_REG(rxq->id),
 		    value | MVNETA_RXQ_NON_OCCUPIED(0));
-	rxq->pkts_coal = value;
 }
 
 /* Set the time delay in usec before RX interrupt will be generated by
@@ -1571,7 +1570,6 @@ static void mvneta_rx_time_coal_set(struct mvneta_port *pp,
 	val = (clk_rate / 1000000) * value;
 
 	mvreg_write(pp, MVNETA_RXQ_TIME_COAL_REG(rxq->id), val);
-	rxq->time_coal = value;
 }
 
 /* Set threshold for TX_DONE pkts coalescing */
@@ -1586,8 +1584,6 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp,
 	val |= MVNETA_TXQ_SENT_THRESH_MASK(value);
 
 	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), val);
-
-	txq->done_pkts_coal = value;
 }
 
 /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */

From 34877a15f787b594649ed375943ecc65f4342e30 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Thu, 29 Mar 2018 10:40:18 +0100
Subject: [PATCH 1515/1678] net: stmmac: Rework and fix TX Timeout code

Currently TX Timeout handler does not behaves as expected and leads to
an unrecoverable state. Rework current implementation of TX Timeout
handling to actually perform a complete reset of the driver state and IP.

We use deferred work to init a task which will be responsible for
resetting the system.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  | 11 +++
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 67 +++++++++++++++++--
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 75161e1b7e55..1485d8fcfaa9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -145,6 +145,17 @@ struct stmmac_priv {
 	struct dentry *dbgfs_rings_status;
 	struct dentry *dbgfs_dma_cap;
 #endif
+
+	unsigned long state;
+	struct workqueue_struct *wq;
+	struct work_struct service_task;
+};
+
+enum stmmac_state {
+	STMMAC_DOWN,
+	STMMAC_RESET_REQUESTED,
+	STMMAC_RESETING,
+	STMMAC_SERVICE_SCHED,
 };
 
 int stmmac_mdio_unregister(struct net_device *ndev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9f983dd069d5..b75ecf3d19fe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -196,6 +196,20 @@ static void stmmac_start_all_queues(struct stmmac_priv *priv)
 		netif_tx_start_queue(netdev_get_tx_queue(priv->dev, queue));
 }
 
+static void stmmac_service_event_schedule(struct stmmac_priv *priv)
+{
+	if (!test_bit(STMMAC_DOWN, &priv->state) &&
+	    !test_and_set_bit(STMMAC_SERVICE_SCHED, &priv->state))
+		queue_work(priv->wq, &priv->service_task);
+}
+
+static void stmmac_global_err(struct stmmac_priv *priv)
+{
+	netif_carrier_off(priv->dev);
+	set_bit(STMMAC_RESET_REQUESTED, &priv->state);
+	stmmac_service_event_schedule(priv);
+}
+
 /**
  * stmmac_clk_csr_set - dynamically set the MDC clock
  * @priv: driver private structure
@@ -3587,12 +3601,8 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 static void stmmac_tx_timeout(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 chan;
 
-	/* Clear Tx resources and restart transmitting again */
-	for (chan = 0; chan < tx_count; chan++)
-		stmmac_tx_err(priv, chan);
+	stmmac_global_err(priv);
 }
 
 /**
@@ -3716,6 +3726,10 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
+	/* Check if adapter is up */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return IRQ_HANDLED;
+
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {
 		int status = priv->hw->mac->host_irq_status(priv->hw,
@@ -4051,6 +4065,37 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_set_mac_address = stmmac_set_mac_address,
 };
 
+static void stmmac_reset_subtask(struct stmmac_priv *priv)
+{
+	if (!test_and_clear_bit(STMMAC_RESET_REQUESTED, &priv->state))
+		return;
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return;
+
+	netdev_err(priv->dev, "Reset adapter.\n");
+
+	rtnl_lock();
+	netif_trans_update(priv->dev);
+	while (test_and_set_bit(STMMAC_RESETING, &priv->state))
+		usleep_range(1000, 2000);
+
+	set_bit(STMMAC_DOWN, &priv->state);
+	dev_close(priv->dev);
+	dev_open(priv->dev);
+	clear_bit(STMMAC_DOWN, &priv->state);
+	clear_bit(STMMAC_RESETING, &priv->state);
+	rtnl_unlock();
+}
+
+static void stmmac_service_task(struct work_struct *work)
+{
+	struct stmmac_priv *priv = container_of(work, struct stmmac_priv,
+			service_task);
+
+	stmmac_reset_subtask(priv);
+	clear_bit(STMMAC_SERVICE_SCHED, &priv->state);
+}
+
 /**
  *  stmmac_hw_init - Init the MAC device
  *  @priv: driver private structure
@@ -4212,6 +4257,15 @@ int stmmac_dvr_probe(struct device *device,
 	/* Verify driver arguments */
 	stmmac_verify_args();
 
+	/* Allocate workqueue */
+	priv->wq = create_singlethread_workqueue("stmmac_wq");
+	if (!priv->wq) {
+		dev_err(priv->device, "failed to create workqueue\n");
+		goto error_wq;
+	}
+
+	INIT_WORK(&priv->service_task, stmmac_service_task);
+
 	/* Override with kernel parameters if supplied XXX CRS XXX
 	 * this needs to have multiple instances
 	 */
@@ -4342,6 +4396,8 @@ error_mdio_register:
 		netif_napi_del(&rx_q->napi);
 	}
 error_hw_init:
+	destroy_workqueue(priv->wq);
+error_wq:
 	free_netdev(ndev);
 
 	return ret;
@@ -4374,6 +4430,7 @@ int stmmac_dvr_remove(struct device *dev)
 	    priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
+	destroy_workqueue(priv->wq);
 	free_netdev(ndev);
 
 	return 0;

From 8bf993a5877e8a0a2f6338085f2dee7c23f524a3 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Thu, 29 Mar 2018 10:40:19 +0100
Subject: [PATCH 1516/1678] net: stmmac: Add support for DWMAC5 and implement
 Safety Features

This adds initial suport for DWMAC5 and implements the Automotive Safety
Package which is available from core version 5.10.

The Automotive Safety Pacakge (also called Safety Features) offers us
with error protection in the core by implementing ECC Protection in
memories, on-chip data path parity protection, FSM parity and timeout
protection and Application/CSR interface timeout protection.

In case of an uncorrectable error we call stmmac_global_err() and
reconfigure the whole core.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Makefile  |   2 +-
 drivers/net/ethernet/stmicro/stmmac/common.h  |  22 ++
 drivers/net/ethernet/stmicro/stmmac/dwmac4.h  |   4 +
 .../net/ethernet/stmicro/stmmac/dwmac4_core.c |  39 ++-
 .../net/ethernet/stmicro/stmmac/dwmac4_dma.c  |   6 +
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c  | 298 ++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h  |  52 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |   1 +
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  |  39 ++-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  34 ++
 10 files changed, 494 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac5.c
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac5.h

diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index ff3f83b86d10..972e4ef6d414 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -4,7 +4,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 	      chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o	\
 	      dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o	\
 	      mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o	\
-	      dwmac4_dma.o dwmac4_lib.o dwmac4_core.o $(stmmac-y)
+	      dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o $(stmmac-y)
 
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 2ffe76c0ff74..ad2388aee463 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -38,6 +38,8 @@
 #define	DWMAC_CORE_3_40	0x34
 #define	DWMAC_CORE_3_50	0x35
 #define	DWMAC_CORE_4_00	0x40
+#define DWMAC_CORE_5_00 0x50
+#define DWMAC_CORE_5_10 0x51
 #define STMMAC_CHAN0	0	/* Always supported and default for all chips */
 
 /* These need to be power of two, and >= 4 */
@@ -174,6 +176,17 @@ struct stmmac_extra_stats {
 	unsigned long tx_tso_nfrags;
 };
 
+/* Safety Feature statistics exposed by ethtool */
+struct stmmac_safety_stats {
+	unsigned long mac_errors[32];
+	unsigned long mtl_errors[32];
+	unsigned long dma_errors[32];
+};
+
+/* Number of fields in Safety Stats */
+#define STMMAC_SAFETY_FEAT_SIZE	\
+	(sizeof(struct stmmac_safety_stats) / sizeof(unsigned long))
+
 /* CSR Frequency Access Defines*/
 #define CSR_F_35M	35000000
 #define CSR_F_60M	60000000
@@ -336,6 +349,8 @@ struct dma_features {
 	/* TX and RX FIFO sizes */
 	unsigned int tx_fifo_size;
 	unsigned int rx_fifo_size;
+	/* Automotive Safety Package */
+	unsigned int asp;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -532,6 +547,13 @@ struct stmmac_ops {
 			     bool loopback);
 	void (*pcs_rane)(void __iomem *ioaddr, bool restart);
 	void (*pcs_get_adv_lp)(void __iomem *ioaddr, struct rgmii_adv *adv);
+	/* Safety Features */
+	int (*safety_feat_config)(void __iomem *ioaddr, unsigned int asp);
+	bool (*safety_feat_irq_status)(struct net_device *ndev,
+			void __iomem *ioaddr, unsigned int asp,
+			struct stmmac_safety_stats *stats);
+	const char *(*safety_feat_dump)(struct stmmac_safety_stats *stats,
+			int index, unsigned long *count);
 };
 
 /* PTP and HW Timer helpers */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 7761a26ec9c5..c7bff596c665 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -39,6 +39,7 @@
 #define GMAC_HW_FEATURE0		0x0000011c
 #define GMAC_HW_FEATURE1		0x00000120
 #define GMAC_HW_FEATURE2		0x00000124
+#define GMAC_HW_FEATURE3		0x00000128
 #define GMAC_MDIO_ADDR			0x00000200
 #define GMAC_MDIO_DATA			0x00000204
 #define GMAC_ADDR_HIGH(reg)		(0x300 + reg * 8)
@@ -192,6 +193,9 @@ enum power_event {
 #define GMAC_HW_FEAT_TXQCNT		GENMASK(9, 6)
 #define GMAC_HW_FEAT_RXQCNT		GENMASK(3, 0)
 
+/* MAC HW features3 bitmap */
+#define GMAC_HW_FEAT_ASP		GENMASK(29, 28)
+
 /* MAC HW ADDR regs */
 #define GMAC_HI_DCS			GENMASK(18, 16)
 #define GMAC_HI_DCS_SHIFT		16
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 46b9ae20ff6c..a3af92ebbca8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -20,6 +20,7 @@
 #include <net/dsa.h>
 #include "stmmac_pcs.h"
 #include "dwmac4.h"
+#include "dwmac5.h"
 
 static void dwmac4_core_init(struct mac_device_info *hw,
 			     struct net_device *dev)
@@ -768,6 +769,40 @@ static const struct stmmac_ops dwmac410_ops = {
 	.set_filter = dwmac4_set_filter,
 };
 
+static const struct stmmac_ops dwmac510_ops = {
+	.core_init = dwmac4_core_init,
+	.set_mac = stmmac_dwmac4_set_mac,
+	.rx_ipc = dwmac4_rx_ipc_enable,
+	.rx_queue_enable = dwmac4_rx_queue_enable,
+	.rx_queue_prio = dwmac4_rx_queue_priority,
+	.tx_queue_prio = dwmac4_tx_queue_priority,
+	.rx_queue_routing = dwmac4_rx_queue_routing,
+	.prog_mtl_rx_algorithms = dwmac4_prog_mtl_rx_algorithms,
+	.prog_mtl_tx_algorithms = dwmac4_prog_mtl_tx_algorithms,
+	.set_mtl_tx_queue_weight = dwmac4_set_mtl_tx_queue_weight,
+	.map_mtl_to_dma = dwmac4_map_mtl_dma,
+	.config_cbs = dwmac4_config_cbs,
+	.dump_regs = dwmac4_dump_regs,
+	.host_irq_status = dwmac4_irq_status,
+	.host_mtl_irq_status = dwmac4_irq_mtl_status,
+	.flow_ctrl = dwmac4_flow_ctrl,
+	.pmt = dwmac4_pmt,
+	.set_umac_addr = dwmac4_set_umac_addr,
+	.get_umac_addr = dwmac4_get_umac_addr,
+	.set_eee_mode = dwmac4_set_eee_mode,
+	.reset_eee_mode = dwmac4_reset_eee_mode,
+	.set_eee_timer = dwmac4_set_eee_timer,
+	.set_eee_pls = dwmac4_set_eee_pls,
+	.pcs_ctrl_ane = dwmac4_ctrl_ane,
+	.pcs_rane = dwmac4_rane,
+	.pcs_get_adv_lp = dwmac4_get_adv_lp,
+	.debug = dwmac4_debug,
+	.set_filter = dwmac4_set_filter,
+	.safety_feat_config = dwmac5_safety_feat_config,
+	.safety_feat_irq_status = dwmac5_safety_feat_irq_status,
+	.safety_feat_dump = dwmac5_safety_feat_dump,
+};
+
 struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
 				     int perfect_uc_entries, int *synopsys_id)
 {
@@ -808,7 +843,9 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
 	else
 		mac->dma = &dwmac4_dma_ops;
 
-	if (*synopsys_id >= DWMAC_CORE_4_00)
+	if (*synopsys_id >= DWMAC_CORE_5_10)
+		mac->mac = &dwmac510_ops;
+	else if (*synopsys_id >= DWMAC_CORE_4_00)
 		mac->mac = &dwmac410_ops;
 	else
 		mac->mac = &dwmac4_ops;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index c110f6850ffa..d37d457306d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -373,6 +373,12 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* IEEE 1588-2002 */
 	dma_cap->time_stamp = 0;
+
+	/* MAC HW feature3 */
+	hw_cap = readl(ioaddr + GMAC_HW_FEATURE3);
+
+	/* 5.10 Features */
+	dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28;
 }
 
 /* Enable/disable TSO feature and set MSS */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
new file mode 100644
index 000000000000..860de39999c7
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+// Copyright (c) 2017 Synopsys, Inc. and/or its affiliates.
+// stmmac Support for 5.xx Ethernet QoS cores
+
+#include <linux/bitops.h>
+#include <linux/iopoll.h>
+#include "common.h"
+#include "dwmac4.h"
+#include "dwmac5.h"
+
+struct dwmac5_error_desc {
+	bool valid;
+	const char *desc;
+	const char *detailed_desc;
+};
+
+#define STAT_OFF(field)		offsetof(struct stmmac_safety_stats, field)
+
+static void dwmac5_log_error(struct net_device *ndev, u32 value, bool corr,
+		const char *module_name, const struct dwmac5_error_desc *desc,
+		unsigned long field_offset, struct stmmac_safety_stats *stats)
+{
+	unsigned long loc, mask;
+	u8 *bptr = (u8 *)stats;
+	unsigned long *ptr;
+
+	ptr = (unsigned long *)(bptr + field_offset);
+
+	mask = value;
+	for_each_set_bit(loc, &mask, 32) {
+		netdev_err(ndev, "Found %s error in %s: '%s: %s'\n", corr ?
+				"correctable" : "uncorrectable", module_name,
+				desc[loc].desc, desc[loc].detailed_desc);
+
+		/* Update counters */
+		ptr[loc]++;
+	}
+}
+
+static const struct dwmac5_error_desc dwmac5_mac_errors[32]= {
+	{ true, "ATPES", "Application Transmit Interface Parity Check Error" },
+	{ true, "TPES", "TSO Data Path Parity Check Error" },
+	{ true, "RDPES", "Read Descriptor Parity Check Error" },
+	{ true, "MPES", "MTL Data Path Parity Check Error" },
+	{ true, "MTSPES", "MTL TX Status Data Path Parity Check Error" },
+	{ true, "ARPES", "Application Receive Interface Data Path Parity Check Error" },
+	{ true, "CWPES", "CSR Write Data Path Parity Check Error" },
+	{ true, "ASRPES", "AXI Slave Read Data Path Parity Check Error" },
+	{ true, "TTES", "TX FSM Timeout Error" },
+	{ true, "RTES", "RX FSM Timeout Error" },
+	{ true, "CTES", "CSR FSM Timeout Error" },
+	{ true, "ATES", "APP FSM Timeout Error" },
+	{ true, "PTES", "PTP FSM Timeout Error" },
+	{ true, "T125ES", "TX125 FSM Timeout Error" },
+	{ true, "R125ES", "RX125 FSM Timeout Error" },
+	{ true, "RVCTES", "REV MDC FSM Timeout Error" },
+	{ true, "MSTTES", "Master Read/Write Timeout Error" },
+	{ true, "SLVTES", "Slave Read/Write Timeout Error" },
+	{ true, "ATITES", "Application Timeout on ATI Interface Error" },
+	{ true, "ARITES", "Application Timeout on ARI Interface Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
+	{ true, "FSMPES", "FSM State Parity Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 26 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 27 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 28 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 29 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 30 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
+};
+
+static void dwmac5_handle_mac_err(struct net_device *ndev,
+		void __iomem *ioaddr, bool correctable,
+		struct stmmac_safety_stats *stats)
+{
+	u32 value;
+
+	value = readl(ioaddr + MAC_DPP_FSM_INT_STATUS);
+	writel(value, ioaddr + MAC_DPP_FSM_INT_STATUS);
+
+	dwmac5_log_error(ndev, value, correctable, "MAC", dwmac5_mac_errors,
+			STAT_OFF(mac_errors), stats);
+}
+
+static const struct dwmac5_error_desc dwmac5_mtl_errors[32]= {
+	{ true, "TXCES", "MTL TX Memory Error" },
+	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },
+	{ true, "TXUES", "MTL TX Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
+	{ true, "RXCES", "MTL RX Memory Error" },
+	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },
+	{ true, "RXUES", "MTL RX Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
+	{ true, "ECES", "MTL EST Memory Error" },
+	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },
+	{ true, "EUES", "MTL EST Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
+	{ true, "RPCES", "MTL RX Parser Memory Error" },
+	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },
+	{ true, "RPUES", "MTL RX Parser Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 26 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 27 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 28 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 29 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 30 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
+};
+
+static void dwmac5_handle_mtl_err(struct net_device *ndev,
+		void __iomem *ioaddr, bool correctable,
+		struct stmmac_safety_stats *stats)
+{
+	u32 value;
+
+	value = readl(ioaddr + MTL_ECC_INT_STATUS);
+	writel(value, ioaddr + MTL_ECC_INT_STATUS);
+
+	dwmac5_log_error(ndev, value, correctable, "MTL", dwmac5_mtl_errors,
+			STAT_OFF(mtl_errors), stats);
+}
+
+static const struct dwmac5_error_desc dwmac5_dma_errors[32]= {
+	{ true, "TCES", "DMA TSO Memory Error" },
+	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },
+	{ true, "TUES", "DMA TSO Memory Error" },
+	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 4 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 5 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 6 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 10 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 12 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 13 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 14 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 26 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 27 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 28 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 29 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 30 */
+	{ false, "UNKNOWN", "Unknown Error" }, /* 31 */
+};
+
+static void dwmac5_handle_dma_err(struct net_device *ndev,
+		void __iomem *ioaddr, bool correctable,
+		struct stmmac_safety_stats *stats)
+{
+	u32 value;
+
+	value = readl(ioaddr + DMA_ECC_INT_STATUS);
+	writel(value, ioaddr + DMA_ECC_INT_STATUS);
+
+	dwmac5_log_error(ndev, value, correctable, "DMA", dwmac5_dma_errors,
+			STAT_OFF(dma_errors), stats);
+}
+
+int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
+{
+	u32 value;
+
+	if (!asp)
+		return -EINVAL;
+
+	/* 1. Enable Safety Features */
+	value = readl(ioaddr + MTL_ECC_CONTROL);
+	value |= TSOEE; /* TSO ECC */
+	value |= MRXPEE; /* MTL RX Parser ECC */
+	value |= MESTEE; /* MTL EST ECC */
+	value |= MRXEE; /* MTL RX FIFO ECC */
+	value |= MTXEE; /* MTL TX FIFO ECC */
+	writel(value, ioaddr + MTL_ECC_CONTROL);
+
+	/* 2. Enable MTL Safety Interrupts */
+	value = readl(ioaddr + MTL_ECC_INT_ENABLE);
+	value |= RPCEIE; /* RX Parser Memory Correctable Error */
+	value |= ECEIE; /* EST Memory Correctable Error */
+	value |= RXCEIE; /* RX Memory Correctable Error */
+	value |= TXCEIE; /* TX Memory Correctable Error */
+	writel(value, ioaddr + MTL_ECC_INT_ENABLE);
+
+	/* 3. Enable DMA Safety Interrupts */
+	value = readl(ioaddr + DMA_ECC_INT_ENABLE);
+	value |= TCEIE; /* TSO Memory Correctable Error */
+	writel(value, ioaddr + DMA_ECC_INT_ENABLE);
+
+	/* Only ECC Protection for External Memory feature is selected */
+	if (asp <= 0x1)
+		return 0;
+
+	/* 5. Enable Parity and Timeout for FSM */
+	value = readl(ioaddr + MAC_FSM_CONTROL);
+	value |= PRTYEN; /* FSM Parity Feature */
+	value |= TMOUTEN; /* FSM Timeout Feature */
+	writel(value, ioaddr + MAC_FSM_CONTROL);
+
+	/* 4. Enable Data Parity Protection */
+	value = readl(ioaddr + MTL_DPP_CONTROL);
+	value |= EDPP;
+	writel(value, ioaddr + MTL_DPP_CONTROL);
+
+	/*
+	 * All the Automotive Safety features are selected without the "Parity
+	 * Port Enable for external interface" feature.
+	 */
+	if (asp <= 0x2)
+		return 0;
+
+	value |= EPSI;
+	writel(value, ioaddr + MTL_DPP_CONTROL);
+	return 0;
+}
+
+bool dwmac5_safety_feat_irq_status(struct net_device *ndev,
+		void __iomem *ioaddr, unsigned int asp,
+		struct stmmac_safety_stats *stats)
+{
+	bool ret = false, err, corr;
+	u32 mtl, dma;
+
+	if (!asp)
+		return false;
+
+	mtl = readl(ioaddr + MTL_SAFETY_INT_STATUS);
+	dma = readl(ioaddr + DMA_SAFETY_INT_STATUS);
+
+	err = (mtl & MCSIS) || (dma & MCSIS);
+	corr = false;
+	if (err) {
+		dwmac5_handle_mac_err(ndev, ioaddr, corr, stats);
+		ret |= !corr;
+	}
+
+	err = (mtl & (MEUIS | MECIS)) || (dma & (MSUIS | MSCIS));
+	corr = (mtl & MECIS) || (dma & MSCIS);
+	if (err) {
+		dwmac5_handle_mtl_err(ndev, ioaddr, corr, stats);
+		ret |= !corr;
+	}
+
+	err = dma & (DEUIS | DECIS);
+	corr = dma & DECIS;
+	if (err) {
+		dwmac5_handle_dma_err(ndev, ioaddr, corr, stats);
+		ret |= !corr;
+	}
+
+	return ret;
+}
+
+static const struct dwmac5_error {
+	const struct dwmac5_error_desc *desc;
+} dwmac5_all_errors[] = {
+	{ dwmac5_mac_errors },
+	{ dwmac5_mtl_errors },
+	{ dwmac5_dma_errors },
+};
+
+const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
+			int index, unsigned long *count)
+{
+	int module = index / 32, offset = index % 32;
+	unsigned long *ptr = (unsigned long *)stats;
+
+	if (module >= ARRAY_SIZE(dwmac5_all_errors))
+		return NULL;
+	if (!dwmac5_all_errors[module].desc[offset].valid)
+		return NULL;
+	if (count)
+		*count = *(ptr + index);
+	return dwmac5_all_errors[module].desc[offset].desc;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
new file mode 100644
index 000000000000..a0d2c44711b9
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+// Copyright (c) 2017 Synopsys, Inc. and/or its affiliates.
+// stmmac Support for 5.xx Ethernet QoS cores
+
+#ifndef __DWMAC5_H__
+#define __DWMAC5_H__
+
+#define MAC_DPP_FSM_INT_STATUS		0x00000140
+#define MAC_AXI_SLV_DPE_ADDR_STATUS	0x00000144
+#define MAC_FSM_CONTROL			0x00000148
+#define PRTYEN				BIT(1)
+#define TMOUTEN				BIT(0)
+
+#define MTL_ECC_CONTROL			0x00000cc0
+#define TSOEE				BIT(4)
+#define MRXPEE				BIT(3)
+#define MESTEE				BIT(2)
+#define MRXEE				BIT(1)
+#define MTXEE				BIT(0)
+
+#define MTL_SAFETY_INT_STATUS		0x00000cc4
+#define MCSIS				BIT(31)
+#define MEUIS				BIT(1)
+#define MECIS				BIT(0)
+#define MTL_ECC_INT_ENABLE		0x00000cc8
+#define RPCEIE				BIT(12)
+#define ECEIE				BIT(8)
+#define RXCEIE				BIT(4)
+#define TXCEIE				BIT(0)
+#define MTL_ECC_INT_STATUS		0x00000ccc
+#define MTL_DPP_CONTROL			0x00000ce0
+#define EPSI				BIT(2)
+#define OPE				BIT(1)
+#define EDPP				BIT(0)
+
+#define DMA_SAFETY_INT_STATUS		0x00001080
+#define MSUIS				BIT(29)
+#define MSCIS				BIT(28)
+#define DEUIS				BIT(1)
+#define DECIS				BIT(0)
+#define DMA_ECC_INT_ENABLE		0x00001084
+#define TCEIE				BIT(0)
+#define DMA_ECC_INT_STATUS		0x00001088
+
+int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
+bool dwmac5_safety_feat_irq_status(struct net_device *ndev,
+		void __iomem *ioaddr, unsigned int asp,
+		struct stmmac_safety_stats *stats);
+const char *dwmac5_safety_feat_dump(struct stmmac_safety_stats *stats,
+			int index, unsigned long *count);
+
+#endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 1485d8fcfaa9..da50451f8999 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -114,6 +114,7 @@ struct stmmac_priv {
 	int mii_irq[PHY_MAX_ADDR];
 
 	struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp;
+	struct stmmac_safety_stats sstats;
 	struct plat_stmmacenet_data *plat;
 	struct dma_features dma_cap;
 	struct stmmac_counters mmc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index af30b4857c3b..2c6ed47704fc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -523,11 +523,23 @@ stmmac_set_pauseparam(struct net_device *netdev,
 static void stmmac_get_ethtool_stats(struct net_device *dev,
 				 struct ethtool_stats *dummy, u64 *data)
 {
+	const char *(*dump)(struct stmmac_safety_stats *stats, int index,
+			unsigned long *count);
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_queues_count = priv->plat->rx_queues_to_use;
 	u32 tx_queues_count = priv->plat->tx_queues_to_use;
+	unsigned long count;
 	int i, j = 0;
 
+	if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
+		dump = priv->hw->mac->safety_feat_dump;
+
+		for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
+			if (dump(&priv->sstats, i, &count))
+				data[j++] = count;
+		}
+	}
+
 	/* Update the DMA HW counters for dwmac10/100 */
 	if (priv->hw->dma->dma_diagnostic_fr)
 		priv->hw->dma->dma_diagnostic_fr(&dev->stats,
@@ -569,7 +581,9 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
-	int len;
+	const char *(*dump)(struct stmmac_safety_stats *stats, int index,
+			unsigned long *count);
+	int i, len, safety_len = 0;
 
 	switch (sset) {
 	case ETH_SS_STATS:
@@ -577,6 +591,16 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 
 		if (priv->dma_cap.rmon)
 			len += STMMAC_MMC_STATS_LEN;
+		if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
+			dump = priv->hw->mac->safety_feat_dump;
+
+			for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
+				if (dump(&priv->sstats, i, NULL))
+					safety_len++;
+			}
+
+			len += safety_len;
+		}
 
 		return len;
 	default:
@@ -589,9 +613,22 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	int i;
 	u8 *p = data;
 	struct stmmac_priv *priv = netdev_priv(dev);
+	const char *(*dump)(struct stmmac_safety_stats *stats, int index,
+			unsigned long *count);
 
 	switch (stringset) {
 	case ETH_SS_STATS:
+		if (priv->dma_cap.asp && priv->hw->mac->safety_feat_dump) {
+			dump = priv->hw->mac->safety_feat_dump;
+			for (i = 0; i < STMMAC_SAFETY_FEAT_SIZE; i++) {
+				const char *desc = dump(&priv->sstats, i, NULL);
+
+				if (desc) {
+					memcpy(p, desc, ETH_GSTRING_LEN);
+					p += ETH_GSTRING_LEN;
+				}
+			}
+		}
 		if (priv->dma_cap.rmon)
 			for (i = 0; i < STMMAC_MMC_STATS_LEN; i++) {
 				memcpy(p, stmmac_mmc[i].stat_string,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b75ecf3d19fe..9a16931ce39d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2014,6 +2014,22 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
 	}
 }
 
+static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
+{
+	bool ret = false;
+
+	/* Safety features are only available in cores >= 5.10 */
+	if (priv->synopsys_id < DWMAC_CORE_5_10)
+		return ret;
+	if (priv->hw->mac->safety_feat_irq_status)
+		ret = priv->hw->mac->safety_feat_irq_status(priv->dev,
+				priv->ioaddr, priv->dma_cap.asp, &priv->sstats);
+
+	if (ret)
+		stmmac_global_err(priv);
+	return ret;
+}
+
 /**
  * stmmac_dma_interrupt - DMA ISR
  * @priv: driver private structure
@@ -2503,6 +2519,17 @@ static void stmmac_mtl_configuration(struct stmmac_priv *priv)
 		stmmac_mac_config_rx_queues_routing(priv);
 }
 
+static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
+{
+	if (priv->hw->mac->safety_feat_config && priv->dma_cap.asp) {
+		netdev_info(priv->dev, "Enabling Safety Features\n");
+		priv->hw->mac->safety_feat_config(priv->ioaddr,
+				priv->dma_cap.asp);
+	} else {
+		netdev_info(priv->dev, "No Safety Features support found\n");
+	}
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -2554,6 +2581,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	if (priv->synopsys_id >= DWMAC_CORE_4_00)
 		stmmac_mtl_configuration(priv);
 
+	/* Initialize Safety Features */
+	if (priv->synopsys_id >= DWMAC_CORE_5_10)
+		stmmac_safety_feat_configuration(priv);
+
 	ret = priv->hw->mac->rx_ipc(priv->hw);
 	if (!ret) {
 		netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n");
@@ -3729,6 +3760,9 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 	/* Check if adapter is up */
 	if (test_bit(STMMAC_DOWN, &priv->state))
 		return IRQ_HANDLED;
+	/* Check if a fatal error happened */
+	if (stmmac_safety_feat_interrupt(priv))
+		return IRQ_HANDLED;
 
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || (priv->plat->has_gmac4)) {

From 9b85756341c53fd13b4b5e25104c22849274cd0d Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das@bp.renesas.com>
Date: Thu, 29 Mar 2018 11:02:55 +0100
Subject: [PATCH 1517/1678] dt-bindings: net: renesas-ravb: Add support for
 r8a77470 SoC

Add a new compatible string for the RZ/G1C (R8A77470) SoC.

Signed-off-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index b4dc455eb155..c306f55d335b 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -7,6 +7,7 @@ Required properties:
 - compatible: Must contain one or more of the following:
       - "renesas,etheravb-r8a7743" for the R8A7743 SoC.
       - "renesas,etheravb-r8a7745" for the R8A7745 SoC.
+      - "renesas,etheravb-r8a77470" for the R8A77470 SoC.
       - "renesas,etheravb-r8a7790" for the R8A7790 SoC.
       - "renesas,etheravb-r8a7791" for the R8A7791 SoC.
       - "renesas,etheravb-r8a7792" for the R8A7792 SoC.

From fe3f4e805345be01c979143fe2afaeec8dfbf79e Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Thu, 29 Mar 2018 13:58:48 +0300
Subject: [PATCH 1518/1678] MAINTAINERS: Update my email address from freescale
 to nxp

The freescale.com address will no longer be available.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index aace4ae4ad30..22ef8d64fa59 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5637,7 +5637,7 @@ S:	Maintained
 F:	drivers/dma/fsldma.*
 
 FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
-M:	Claudiu Manoil <claudiu.manoil@freescale.com>
+M:	Claudiu Manoil <claudiu.manoil@nxp.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/freescale/gianfar*

From 1dad0f9ffff735ca09ea1588a9c279a45c8978be Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Thu, 29 Mar 2018 08:52:37 -0400
Subject: [PATCH 1519/1678] tc-testing: add connmark action tests

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/connmark.json | 291 ++++++++++++++++++
 1 file changed, 291 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
new file mode 100644
index 000000000000..70952bd98ff9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -0,0 +1,291 @@
+[
+    {
+        "id": "2002",
+        "name": "Add valid connmark action with defaults",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "56a5",
+        "name": "Add valid connmark action with control pass",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 1",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "7c66",
+        "name": "Add valid connmark action with control drop",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark drop index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 100",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 drop.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "a913",
+        "name": "Add valid connmark action with control pipe",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark pipe index 455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 455",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 pipe.*index 455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "bdd8",
+        "name": "Add valid connmark action with control reclassify",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark reclassify index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 reclassify.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "b8be",
+        "name": "Add valid connmark action with control continue",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark continue index 17",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 continue.*index 17 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "d8a6",
+        "name": "Add valid connmark action with control jump",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark jump 10 index 17",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+:  connmark zone 0 jump 10.*index 17 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "aae8",
+        "name": "Add valid connmark action with zone argument",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 100 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 1",
+        "matchPattern": "action order [0-9]+:  connmark zone 100 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "2f0b",
+        "name": "Add valid connmark action with invalid zone argument",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 65536 reclassify index 21",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action connmark index 1",
+        "matchPattern": "action order [0-9]+:  connmark zone 65536 reclassify.*index 21 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "9305",
+        "name": "Add connmark action with unsupported argument",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 655 unsupp_arg pass index 2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action connmark index 2",
+        "matchPattern": "action order [0-9]+:  connmark zone 655 unsupp_arg pass.*index 2 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "71ca",
+        "name": "Add valid connmark action and replace it",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action connmark zone 777 pass index 555"
+        ],
+        "cmdUnderTest": "$TC actions replace action connmark zone 555 reclassify index 555",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 555",
+        "matchPattern": "action order [0-9]+:  connmark zone 555 reclassify.*index 555 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "5f8f",
+        "name": "Add valid connmark action with cookie",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 555 pipe index 5 cookie aabbccddeeff112233445566778800a1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 5",
+        "matchPattern": "action order [0-9]+:  connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    }
+]

From ccdd0b4c35f2adb7434ed69a89bc68bb6e9089ea Mon Sep 17 00:00:00 2001
From: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Date: Thu, 29 Mar 2018 11:13:22 -0700
Subject: [PATCH 1520/1678] liquidio: prevent rx queues from getting stalled

This commit has fix for RX traffic issues when we stress test the driver
with continuous ifconfig up/down under very high traffic conditions.

Reason for the issue is that, in existing liquidio_stop function NAPI is
disabled even before actual FW/HW interface is brought down via
send_rx_ctrl_cmd(lio, 0). Between time frame of NAPI disable and actual
interface down in firmware, firmware continuously enqueues rx traffic to
host. When interrupt happens for new packets, host irq handler fails in
scheduling NAPI as the NAPI is already disabled.

After "ifconfig <iface> up", Host re-enables NAPI but cannot schedule it
until it receives another Rx interrupt. Host never receives Rx interrupt as
it never cleared the Rx interrupt it received during interface down
operation. NIC Rx interrupt gets cleared only when Host processes queue and
clears the queue counts. Above anomaly leads to other issues like packet
overflow in FW/HW queues, backpressure.

Fix:
This commit fixes this issue by disabling NAPI only after informing
firmware to stop queueing packets to host via send_rx_ctrl_cmd(lio, 0).
send_rx_ctrl_cmd is not visible in the patch as it is already there in the
code. The DOWN command also waits for any pending packets to be processed
by NAPI so that the deadlock will not occur.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_core.c   | 23 +++++++++++++++++
 .../net/ethernet/cavium/liquidio/lio_main.c   | 25 +++++++++++--------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 23 ++++++++++-------
 .../ethernet/cavium/liquidio/octeon_network.h |  1 +
 4 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 73e70e076e61..2a94eee943b2 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -1146,3 +1146,26 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 	octeon_free_soft_command(oct, sc);
 	return 0;
 }
+
+int lio_wait_for_clean_oq(struct octeon_device *oct)
+{
+	int retry = 100, pending_pkts = 0;
+	int idx;
+
+	do {
+		pending_pkts = 0;
+
+		for (idx = 0; idx < MAX_OCTEON_OUTPUT_QUEUES(oct); idx++) {
+			if (!(oct->io_qmask.oq & BIT_ULL(idx)))
+				continue;
+			pending_pkts +=
+				atomic_read(&oct->droq[idx]->pkts_pending);
+		}
+
+		if (pending_pkts > 0)
+			schedule_timeout_uninterruptible(1);
+
+	} while (retry-- && pending_pkts);
+
+	return pending_pkts;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 43c5ba0af12b..603a144d3d9c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2084,16 +2084,6 @@ static int liquidio_stop(struct net_device *netdev)
 	struct octeon_device *oct = lio->oct_dev;
 	struct napi_struct *napi, *n;
 
-	if (oct->props[lio->ifidx].napi_enabled) {
-		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
-			napi_disable(napi);
-
-		oct->props[lio->ifidx].napi_enabled = 0;
-
-		if (OCTEON_CN23XX_PF(oct))
-			oct->droq[0]->ops.poll_mode = 0;
-	}
-
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
 	netif_tx_disable(netdev);
@@ -2119,6 +2109,21 @@ static int liquidio_stop(struct net_device *netdev)
 		lio->ptp_clock = NULL;
 	}
 
+	/* Wait for any pending Rx descriptors */
+	if (lio_wait_for_clean_oq(oct))
+		netif_info(lio, rx_err, lio->netdev,
+			   "Proceeding with stop interface after partial RX desc processing\n");
+
+	if (oct->props[lio->ifidx].napi_enabled == 1) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		if (OCTEON_CN23XX_PF(oct))
+			oct->droq[0]->ops.poll_mode = 0;
+	}
+
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
 	return 0;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index dc62698bdaf7..f92dfa411de6 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1138,15 +1138,6 @@ static int liquidio_stop(struct net_device *netdev)
 	/* tell Octeon to stop forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 0);
 
-	if (oct->props[lio->ifidx].napi_enabled) {
-		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
-			napi_disable(napi);
-
-		oct->props[lio->ifidx].napi_enabled = 0;
-
-		oct->droq[0]->ops.poll_mode = 0;
-	}
-
 	netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
 	/* Inform that netif carrier is down */
 	lio->intf_open = 0;
@@ -1159,6 +1150,20 @@ static int liquidio_stop(struct net_device *netdev)
 
 	stop_txqs(netdev);
 
+	/* Wait for any pending Rx descriptors */
+	if (lio_wait_for_clean_oq(oct))
+		netif_info(lio, rx_err, lio->netdev,
+			   "Proceeding with stop interface after partial RX desc processing\n");
+
+	if (oct->props[lio->ifidx].napi_enabled == 1) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		oct->droq[0]->ops.poll_mode = 0;
+	}
+
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
 	return 0;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 8782206271b6..4069710796a8 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -190,6 +190,7 @@ irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)),
 
 int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
 
+int lio_wait_for_clean_oq(struct octeon_device *oct);
 /**
  * \brief Register ethtool operations
  * @param netdev    pointer to network device

From c0b6edef0bf0e33c12eaf80c676ff09def011518 Mon Sep 17 00:00:00 2001
From: Lucas Bates <lucasb@mojatatu.com>
Date: Thu, 29 Mar 2018 15:58:10 -0400
Subject: [PATCH 1521/1678] tc-testing: Add newline when writing test case
 files

When using the -i feature to generate random ID numbers for test
cases in tdc, the function that writes the JSON to file doesn't
add a newline character to the end of the file, so we have to
add our own.

Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 44de4a272a11..87a04a8a5945 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -490,6 +490,7 @@ def generate_case_ids(alltests):
                     testlist.append(t)
         outfile = open(f, "w")
         json.dump(testlist, outfile, indent=4)
+        outfile.write("\n")
         outfile.close()
 
 def filter_tests_by_id(args, testlist):

From ace45bec6d77bc061c3c3d8ad99e298ea9800c2b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:04:43 +0100
Subject: [PATCH 1522/1678] rxrpc: Fix firewall route keepalive

Fix the firewall route keepalive part of AF_RXRPC which is currently
function incorrectly by replying to VERSION REPLY packets from the server
with VERSION REQUEST packets.

Instead, send VERSION REPLY packets to the peers of service connections to
act as keep-alives 20s after the latest packet was transmitted to that
peer.

Also, just discard VERSION REPLY packets rather than replying to them.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/af_rxrpc.c    |  4 ++
 net/rxrpc/ar-internal.h | 14 +++++-
 net/rxrpc/conn_event.c  |  3 ++
 net/rxrpc/input.c       |  2 +
 net/rxrpc/net_ns.c      | 21 ++++++++-
 net/rxrpc/output.c      | 59 ++++++++++++++++++++++++-
 net/rxrpc/peer_event.c  | 96 +++++++++++++++++++++++++++++++++++++++++
 net/rxrpc/peer_object.c |  7 ++-
 net/rxrpc/rxkad.c       |  2 +
 9 files changed, 204 insertions(+), 4 deletions(-)

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index ec5ec68be1aa..0b3026b8fa40 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -762,6 +762,7 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
 static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
 			int kern)
 {
+	struct rxrpc_net *rxnet;
 	struct rxrpc_sock *rx;
 	struct sock *sk;
 
@@ -801,6 +802,9 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
 	rwlock_init(&rx->call_lock);
 	memset(&rx->srx, 0, sizeof(rx->srx));
 
+	rxnet = rxrpc_net(sock_net(&rx->sk));
+	timer_reduce(&rxnet->peer_keepalive_timer, jiffies + 1);
+
 	_leave(" = 0 [%p]", rx);
 	return 0;
 }
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 21cf164b6d85..8a348e0a9d95 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -97,8 +97,16 @@ struct rxrpc_net {
 	struct list_head	local_endpoints;
 	struct mutex		local_mutex;	/* Lock for ->local_endpoints */
 
-	spinlock_t		peer_hash_lock;	/* Lock for ->peer_hash */
 	DECLARE_HASHTABLE	(peer_hash, 10);
+	spinlock_t		peer_hash_lock;	/* Lock for ->peer_hash */
+
+#define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */
+	u8			peer_keepalive_cursor;
+	ktime_t			peer_keepalive_base;
+	struct hlist_head	peer_keepalive[RXRPC_KEEPALIVE_TIME + 1];
+	struct hlist_head	peer_keepalive_new;
+	struct timer_list	peer_keepalive_timer;
+	struct work_struct	peer_keepalive_work;
 };
 
 /*
@@ -285,6 +293,8 @@ struct rxrpc_peer {
 	struct hlist_head	error_targets;	/* targets for net error distribution */
 	struct work_struct	error_distributor;
 	struct rb_root		service_conns;	/* Service connections */
+	struct hlist_node	keepalive_link;	/* Link in net->peer_keepalive[] */
+	time64_t		last_tx_at;	/* Last time packet sent here */
 	seqlock_t		service_conn_lock;
 	spinlock_t		lock;		/* access lock */
 	unsigned int		if_mtu;		/* interface MTU for this peer */
@@ -1026,6 +1036,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *);
 int rxrpc_send_abort_packet(struct rxrpc_call *);
 int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
 void rxrpc_reject_packets(struct rxrpc_local *);
+void rxrpc_send_keepalive(struct rxrpc_peer *);
 
 /*
  * peer_event.c
@@ -1034,6 +1045,7 @@ void rxrpc_error_report(struct sock *);
 void rxrpc_peer_error_distributor(struct work_struct *);
 void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
 			rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
+void rxrpc_peer_keepalive_worker(struct work_struct *);
 
 /*
  * peer_object.c
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index d2ec3fd593e8..c717152070df 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -136,6 +136,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	}
 
 	kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
+	conn->params.peer->last_tx_at = ktime_get_real();
 	_leave("");
 	return;
 }
@@ -239,6 +240,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 		return -EAGAIN;
 	}
 
+	conn->params.peer->last_tx_at = ktime_get_real();
+
 	_leave(" = 0");
 	return 0;
 }
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 2a868fdab0ae..d4f2509e018b 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1183,6 +1183,8 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_VERSION:
+		if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED))
+			goto discard;
 		rxrpc_post_packet_to_local(local, skb);
 		goto out;
 
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index f18c9248e0d4..66baf2b80b6c 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -32,13 +32,22 @@ static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
 		rxrpc_queue_work(&rxnet->service_conn_reaper);
 }
 
+static void rxrpc_peer_keepalive_timeout(struct timer_list *timer)
+{
+	struct rxrpc_net *rxnet =
+		container_of(timer, struct rxrpc_net, peer_keepalive_timer);
+
+	if (rxnet->live)
+		rxrpc_queue_work(&rxnet->peer_keepalive_work);
+}
+
 /*
  * Initialise a per-network namespace record.
  */
 static __net_init int rxrpc_init_net(struct net *net)
 {
 	struct rxrpc_net *rxnet = rxrpc_net(net);
-	int ret;
+	int ret, i;
 
 	rxnet->live = true;
 	get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
@@ -70,8 +79,16 @@ static __net_init int rxrpc_init_net(struct net *net)
 
 	INIT_LIST_HEAD(&rxnet->local_endpoints);
 	mutex_init(&rxnet->local_mutex);
+
 	hash_init(rxnet->peer_hash);
 	spin_lock_init(&rxnet->peer_hash_lock);
+	for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++)
+		INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]);
+	INIT_HLIST_HEAD(&rxnet->peer_keepalive_new);
+	timer_setup(&rxnet->peer_keepalive_timer,
+		    rxrpc_peer_keepalive_timeout, 0);
+	INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker);
+	rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC);
 
 	ret = -ENOMEM;
 	rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
@@ -95,6 +112,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
 	struct rxrpc_net *rxnet = rxrpc_net(net);
 
 	rxnet->live = false;
+	del_timer_sync(&rxnet->peer_keepalive_timer);
+	cancel_work_sync(&rxnet->peer_keepalive_work);
 	rxrpc_destroy_all_calls(rxnet);
 	rxrpc_destroy_all_connections(rxnet);
 	rxrpc_destroy_all_locals(rxnet);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index cf73dc006c3b..7f1fc04775b3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -32,6 +32,8 @@ struct rxrpc_abort_buffer {
 	__be32 abort_code;
 };
 
+static const char rxrpc_keepalive_string[] = "";
+
 /*
  * Arrange for a keepalive ping a certain time after we last transmitted.  This
  * lets the far side know we're still interested in this call and helps keep
@@ -122,6 +124,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	struct kvec iov[2];
 	rxrpc_serial_t serial;
 	rxrpc_seq_t hard_ack, top;
+	ktime_t now;
 	size_t len, n;
 	int ret;
 	u8 reason;
@@ -203,8 +206,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	}
 
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+	now = ktime_get_real();
 	if (ping)
-		call->ping_time = ktime_get_real();
+		call->ping_time = now;
+	conn->params.peer->last_tx_at = ktime_get_real();
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		if (ret < 0) {
@@ -288,6 +293,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 
 	ret = kernel_sendmsg(conn->params.local->socket,
 			     &msg, iov, 1, sizeof(pkt));
+	conn->params.peer->last_tx_at = ktime_get_real();
 
 	rxrpc_put_connection(conn);
 	return ret;
@@ -378,6 +384,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	 *     message and update the peer record
 	 */
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+	conn->params.peer->last_tx_at = ktime_get_real();
 
 	up_read(&conn->params.local->defrag_sem);
 	if (ret == -EMSGSIZE)
@@ -429,6 +436,7 @@ send_fragmentable:
 		if (ret == 0) {
 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
 					     iov, 2, len);
+			conn->params.peer->last_tx_at = ktime_get_real();
 
 			opt = IP_PMTUDISC_DO;
 			kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -446,6 +454,7 @@ send_fragmentable:
 		if (ret == 0) {
 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
 					     iov, 2, len);
+			conn->params.peer->last_tx_at = ktime_get_real();
 
 			opt = IPV6_PMTUDISC_DO;
 			kernel_setsockopt(conn->params.local->socket,
@@ -515,3 +524,51 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 
 	_leave("");
 }
+
+/*
+ * Send a VERSION reply to a peer as a keepalive.
+ */
+void rxrpc_send_keepalive(struct rxrpc_peer *peer)
+{
+	struct rxrpc_wire_header whdr;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t len;
+	int ret;
+
+	_enter("");
+
+	msg.msg_name	= &peer->srx.transport;
+	msg.msg_namelen	= peer->srx.transport_len;
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	whdr.epoch	= htonl(peer->local->rxnet->epoch);
+	whdr.cid	= 0;
+	whdr.callNumber	= 0;
+	whdr.seq	= 0;
+	whdr.serial	= 0;
+	whdr.type	= RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */
+	whdr.flags	= RXRPC_LAST_PACKET;
+	whdr.userStatus	= 0;
+	whdr.securityIndex = 0;
+	whdr._rsvd	= 0;
+	whdr.serviceId	= 0;
+
+	iov[0].iov_base	= &whdr;
+	iov[0].iov_len	= sizeof(whdr);
+	iov[1].iov_base	= (char *)rxrpc_keepalive_string;
+	iov[1].iov_len	= sizeof(rxrpc_keepalive_string);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	_proto("Tx VERSION (keepalive)");
+
+	ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
+	if (ret < 0)
+		_debug("sendmsg failed: %d", ret);
+
+	peer->last_tx_at = ktime_get_real();
+	_leave("");
+}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 7f749505e699..d01eb9a06448 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -348,3 +348,99 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
 	trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
 			   usage, avg);
 }
+
+/*
+ * Perform keep-alive pings with VERSION packets to keep any NAT alive.
+ */
+void rxrpc_peer_keepalive_worker(struct work_struct *work)
+{
+	struct rxrpc_net *rxnet =
+		container_of(work, struct rxrpc_net, peer_keepalive_work);
+	struct rxrpc_peer *peer;
+	unsigned long delay;
+	ktime_t base, now = ktime_get_real();
+	s64 diff;
+	u8 cursor, slot;
+
+	base = rxnet->peer_keepalive_base;
+	cursor = rxnet->peer_keepalive_cursor;
+
+	_enter("%u,%lld", cursor, ktime_sub(now, base));
+
+next_bucket:
+	diff = ktime_to_ns(ktime_sub(now, base));
+	if (diff < 0)
+		goto resched;
+
+	_debug("at %u", cursor);
+	spin_lock_bh(&rxnet->peer_hash_lock);
+next_peer:
+	if (!rxnet->live) {
+		spin_unlock_bh(&rxnet->peer_hash_lock);
+		goto out;
+	}
+
+	/* Everything in the bucket at the cursor is processed this second; the
+	 * bucket at cursor + 1 goes now + 1s and so on...
+	 */
+	if (hlist_empty(&rxnet->peer_keepalive[cursor])) {
+		if (hlist_empty(&rxnet->peer_keepalive_new)) {
+			spin_unlock_bh(&rxnet->peer_hash_lock);
+			goto emptied_bucket;
+		}
+
+		hlist_move_list(&rxnet->peer_keepalive_new,
+				&rxnet->peer_keepalive[cursor]);
+	}
+
+	peer = hlist_entry(rxnet->peer_keepalive[cursor].first,
+			   struct rxrpc_peer, keepalive_link);
+	hlist_del_init(&peer->keepalive_link);
+	if (!rxrpc_get_peer_maybe(peer))
+		goto next_peer;
+
+	spin_unlock_bh(&rxnet->peer_hash_lock);
+
+	_debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport);
+
+recalc:
+	diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC);
+	if (diff < -30 || diff > 30)
+		goto send; /* LSW of 64-bit time probably wrapped on 32-bit */
+	diff += RXRPC_KEEPALIVE_TIME - 1;
+	if (diff < 0)
+		goto send;
+
+	slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff;
+	if (slot == 0)
+		goto send;
+
+	/* A transmission to this peer occurred since last we examined it so
+	 * put it into the appropriate future bucket.
+	 */
+	slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive);
+	spin_lock_bh(&rxnet->peer_hash_lock);
+	hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]);
+	rxrpc_put_peer(peer);
+	goto next_peer;
+
+send:
+	rxrpc_send_keepalive(peer);
+	now = ktime_get_real();
+	goto recalc;
+
+emptied_bucket:
+	cursor++;
+	if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive))
+		cursor = 0;
+	base = ktime_add_ns(base, NSEC_PER_SEC);
+	goto next_bucket;
+
+resched:
+	rxnet->peer_keepalive_base = base;
+	rxnet->peer_keepalive_cursor = cursor;
+	delay = nsecs_to_jiffies(-diff) + 1;
+	timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
+out:
+	_leave("");
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index d02a99f37f5f..94a6dbfcf129 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -322,6 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
 	if (!peer) {
 		peer = prealloc;
 		hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
+		hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new);
 	}
 
 	spin_unlock(&rxnet->peer_hash_lock);
@@ -363,9 +364,12 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
 		peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
 		if (peer && !rxrpc_get_peer_maybe(peer))
 			peer = NULL;
-		if (!peer)
+		if (!peer) {
 			hash_add_rcu(rxnet->peer_hash,
 				     &candidate->hash_link, hash_key);
+			hlist_add_head(&candidate->keepalive_link,
+				       &rxnet->peer_keepalive_new);
+		}
 
 		spin_unlock_bh(&rxnet->peer_hash_lock);
 
@@ -392,6 +396,7 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer)
 
 	spin_lock_bh(&rxnet->peer_hash_lock);
 	hash_del_rcu(&peer->hash_link);
+	hlist_del_init(&peer->keepalive_link);
 	spin_unlock_bh(&rxnet->peer_hash_lock);
 
 	kfree_rcu(peer, rcu);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 77cb23c7bd0a..588fea0dd362 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -668,6 +668,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 		return -EAGAIN;
 	}
 
+	conn->params.peer->last_tx_at = ktime_get_real();
 	_leave(" = 0");
 	return 0;
 }
@@ -722,6 +723,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 		return -EAGAIN;
 	}
 
+	conn->params.peer->last_tx_at = ktime_get_real();
 	_leave(" = 0");
 	return 0;
 }

From f82eb88b0fa6943f58760fd7c3d1b12c1f9cf492 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:04:43 +0100
Subject: [PATCH 1523/1678] rxrpc: Fix a bit of time confusion

The rxrpc_reduce_call_timer() function should be passed the 'current time'
in jiffies, not the current ktime time.  It's confusing in rxrpc_resend
because that has to deal with both.  Pass the correct current time in.

Note that this only affects the trace produced and not the functioning of
the code.

Fixes: a158bdd3247b ("rxrpc: Fix call timeouts")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/call_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 6a62e42e1d8d..3dee89c7a06e 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -238,7 +238,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 	 * retransmitting data.
 	 */
 	if (!retrans) {
-		rxrpc_reduce_call_timer(call, resend_at, now,
+		rxrpc_reduce_call_timer(call, resend_at, now_j,
 					rxrpc_timer_set_for_resend);
 		spin_unlock_bh(&call->lock);
 		ack_ts = ktime_sub(now, call->acks_latest_ts);

From 03877bf6a30cca7d4bc3ffabd3c3e9464a7a1a19 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:04:43 +0100
Subject: [PATCH 1524/1678] rxrpc: Fix Tx ring annotation after initial Tx
 failure

rxrpc calls have a ring of packets that are awaiting ACK or retransmission
and a parallel ring of annotations that tracks the state of those packets.
If the initial transmission of a packet on the underlying UDP socket fails
then the packet annotation is marked for resend - but the setting of this
mark accidentally erases the last-packet mark also stored in the same
annotation slot.  If this happens, a call won't switch out of the Tx phase
when all the packets have been transmitted.

Fix this by retaining the last-packet mark and only altering the packet
state.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/sendmsg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 8503f279b467..783c777fc6e7 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -130,7 +130,9 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
 	spin_lock_bh(&call->lock);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
-		call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+		call->rxtx_annotations[ix] =
+			(call->rxtx_annotations[ix] & RXRPC_TX_ANNO_LAST) |
+			RXRPC_TX_ANNO_RETRANS;
 		if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
 			rxrpc_queue_call(call);
 	}

From 57b0c9d49b94bbeb53649b7fbd264603c1ebd585 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:04:44 +0100
Subject: [PATCH 1525/1678] rxrpc: Don't treat call aborts as conn aborts

If a call-level abort is received for the previous call to complete on a
connection channel, then that abort is queued for the connection processor
to handle.  Unfortunately, the connection processor then assumes without
checking that the abort is connection-level (ie. callNumber is 0) and
distributes it over all active calls on that connection, thereby
incorrectly aborting them.

Fix this by discarding aborts aimed at a completed call.

Further, discard all packets aimed at a call that's complete if there's
currently an active call on a channel, since the DATA packets associated
with the new call automatically terminate the old call.

Fixes: 18bfeba50dfd ("rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/input.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d4f2509e018b..21800e6f5019 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1242,16 +1242,19 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			goto discard_unlock;
 
 		if (sp->hdr.callNumber == chan->last_call) {
-			/* For the previous service call, if completed successfully, we
-			 * discard all further packets.
-			 */
-			if (rxrpc_conn_is_service(conn) &&
-			    (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
-			     sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+			if (chan->call ||
+			    sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
 				goto discard_unlock;
 
-			/* But otherwise we need to retransmit the final packet from
-			 * data cached in the connection record.
+			/* For the previous service call, if completed
+			 * successfully, we discard all further packets.
+			 */
+			if (rxrpc_conn_is_service(conn) &&
+			    chan->last_type == RXRPC_PACKET_TYPE_ACK)
+				goto discard_unlock;
+
+			/* But otherwise we need to retransmit the final packet
+			 * from data cached in the connection record.
 			 */
 			rxrpc_post_packet_to_conn(conn, skb);
 			goto out_unlock;

From 59299aa1028fce051adbd25aaff7c387b865cd6d Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 30 Mar 2018 21:04:44 +0100
Subject: [PATCH 1526/1678] rxrpc: Fix resend event time calculation

Commit a158bdd3 ("rxrpc: Fix call timeouts") reworked the time calculation
for the next resend event.  For this calculation, "oldest" will be before
"now", so ktime_sub(oldest, now) will yield a negative value.  When passed
to nsecs_to_jiffies which expects an unsigned value, the end result will be
a very large value, and a resend event scheduled far into the future.  This
could cause calls to stall if some packets were lost.

Fix by ordering the arguments to ktime_sub correctly.

Fixes: a158bdd3247b ("rxrpc: Fix call timeouts")
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/call_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 3dee89c7a06e..6e0d788b4dc4 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -226,7 +226,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 				       ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
 	}
 
-	resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(oldest, now)));
+	resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
 	resend_at += jiffies + rxrpc_resend_timeout;
 	WRITE_ONCE(call->resend_at, resend_at);
 

From edb63e2b271752a9424a3d33cfcd4f434a020f9b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 30 Mar 2018 21:04:44 +0100
Subject: [PATCH 1527/1678] rxrpc: remove unused static variables

The rxrpc_security_methods and rxrpc_security_sem user has been removed
in 648af7fca159 ("rxrpc: Absorb the rxkad security module"). This was
noticed by kbuild test robot for the -RT tree but is also true for !RT.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/security.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index e9f428351293..c4479afe8ae7 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -19,9 +19,6 @@
 #include <keys/rxrpc-type.h>
 #include "ar-internal.h"
 
-static LIST_HEAD(rxrpc_security_methods);
-static DECLARE_RWSEM(rxrpc_security_sem);
-
 static const struct rxrpc_security *rxrpc_security_types[] = {
 	[RXRPC_SECURITY_NONE]	= &rxrpc_no_security,
 #ifdef CONFIG_RXKAD

From 88f2a8257c9aa7df957b1a79a104f348d60d8027 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:05:17 +0100
Subject: [PATCH 1528/1678] rxrpc: Fix checker warnings and errors

Fix various issues detected by checker.

Errors:

 (*) rxrpc_discard_prealloc() should be using rcu_assign_pointer to set
     call->socket.

Warnings:

 (*) rxrpc_service_connection_reaper() should be passing NULL rather than 0 to
     trace_rxrpc_conn() as the where argument.

 (*) rxrpc_disconnect_client_call() should get its net pointer via the
     call->conn rather than call->sock to avoid a warning about accessing
     an RCU pointer without protection.

 (*) Proc seq start/stop functions need annotation as they pass locks
     between the functions.

False positives:

 (*) Checker doesn't correctly handle of seq-retry lock context balance in
     rxrpc_find_service_conn_rcu().

 (*) Checker thinks execution may proceed past the BUG() in
     rxrpc_publish_service_conn().

 (*) Variable length array warnings from SKCIPHER_REQUEST_ON_STACK() in
     rxkad.c.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/call_accept.c | 3 ++-
 net/rxrpc/call_object.c | 1 +
 net/rxrpc/conn_client.c | 2 +-
 net/rxrpc/conn_object.c | 2 +-
 net/rxrpc/proc.c        | 6 ++++++
 net/rxrpc/sendmsg.c     | 2 ++
 6 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 92ebd1d7e0bb..4ce24c000653 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -225,7 +225,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 	tail = b->call_backlog_tail;
 	while (CIRC_CNT(head, tail, size) > 0) {
 		struct rxrpc_call *call = b->call_backlog[tail];
-		call->socket = rx;
+		rcu_assign_pointer(call->socket, rx);
 		if (rx->discard_new_call) {
 			_debug("discard %lx", call->user_call_ID);
 			rx->discard_new_call(call, call->user_call_ID);
@@ -456,6 +456,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 				     unsigned long user_call_ID,
 				     rxrpc_notify_rx_t notify_rx)
 	__releases(&rx->sk.sk_lock.slock)
+	__acquires(call->user_mutex)
 {
 	struct rxrpc_call *call;
 	struct rb_node *parent, **pp;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 147657dfe757..85b12c472522 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -219,6 +219,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 					 gfp_t gfp,
 					 unsigned int debug_id)
 	__releases(&rx->sk.sk_lock.slock)
+	__acquires(&call->user_mutex)
 {
 	struct rxrpc_call *call, *xcall;
 	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 064175068059..041da40dbf93 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -776,7 +776,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
 	struct rxrpc_connection *conn = call->conn;
 	struct rxrpc_channel *chan = &conn->channels[channel];
-	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk));
+	struct rxrpc_net *rxnet = conn->params.local->rxnet;
 
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
 	call->conn = NULL;
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index ccbac190add1..bfc46fd69a62 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -418,7 +418,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
 		 */
 		if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
 			continue;
-		trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, 0);
+		trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL);
 
 		if (rxrpc_conn_is_client(conn))
 			BUG();
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index f79f260c6ddc..7e45db058823 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -29,6 +29,8 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
  * generate a list of extant and dead calls in /proc/net/rxrpc_calls
  */
 static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+	__acquires(rcu)
+	__acquires(rxnet->call_lock)
 {
 	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
 
@@ -45,6 +47,8 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+	__releases(rxnet->call_lock)
+	__releases(rcu)
 {
 	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
 
@@ -135,6 +139,7 @@ const struct file_operations rxrpc_call_seq_fops = {
  * generate a list of extant virtual connections in /proc/net/rxrpc_conns
  */
 static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+	__acquires(rxnet->conn_lock)
 {
 	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
 
@@ -151,6 +156,7 @@ static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
 }
 
 static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+	__releases(rxnet->conn_lock)
 {
 	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
 
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 783c777fc6e7..a62980a80151 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -556,6 +556,7 @@ static struct rxrpc_call *
 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 				  struct rxrpc_send_params *p)
 	__releases(&rx->sk.sk_lock.slock)
+	__acquires(&call->user_mutex)
 {
 	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call *call;
@@ -596,6 +597,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
  */
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	__releases(&rx->sk.sk_lock.slock)
+	__releases(&call->user_mutex)
 {
 	enum rxrpc_call_state state;
 	struct rxrpc_call *call;

From d3be4d244330f7ef53242d8dc1b7f77d105e767f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:05:23 +0100
Subject: [PATCH 1529/1678] rxrpc: Fix potential call vs socket/net destruction
 race

rxrpc_call structs don't pin sockets or network namespaces, but may attempt
to access both after their refcount reaches 0 so that they can detach
themselves from the network namespace.  However, there's no guarantee that
the socket still exists at this point (so sock_net(&call->socket->sk) may
be invalid) and the namespace may have gone away if the call isn't pinning
a peer.

Fix this by (a) carrying a net pointer in the rxrpc_call struct and (b)
waiting for all calls to be destroyed when the network namespace goes away.

This was detected by checker:

net/rxrpc/call_object.c:634:57: warning: incorrect type in argument 1 (different address spaces)
net/rxrpc/call_object.c:634:57:    expected struct sock const *sk
net/rxrpc/call_object.c:634:57:    got struct sock [noderef] <asn:4>*<noident>

Fixes: 2baec2c3f854 ("rxrpc: Support network namespacing")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/ar-internal.h |  2 ++
 net/rxrpc/call_accept.c |  1 +
 net/rxrpc/call_object.c | 16 +++++++++++++---
 net/rxrpc/net_ns.c      |  1 +
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 8a348e0a9d95..2a2b0fdfb157 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -75,6 +75,7 @@ struct rxrpc_net {
 	u32			epoch;		/* Local epoch for detecting local-end reset */
 	struct list_head	calls;		/* List of calls active in this namespace */
 	rwlock_t		call_lock;	/* Lock for ->calls */
+	atomic_t		nr_calls;	/* Count of allocated calls */
 
 	struct list_head	conn_proc_list;	/* List of conns in this namespace for proc */
 	struct list_head	service_conns;	/* Service conns in this namespace */
@@ -528,6 +529,7 @@ struct rxrpc_call {
 	struct rxrpc_connection	*conn;		/* connection carrying call */
 	struct rxrpc_peer	*peer;		/* Peer record for remote address */
 	struct rxrpc_sock __rcu	*socket;	/* socket responsible */
+	struct rxrpc_net	*rxnet;		/* Network namespace to which call belongs */
 	struct mutex		user_mutex;	/* User access mutex */
 	unsigned long		ack_at;		/* When deferred ACK needs to happen */
 	unsigned long		ack_lost_at;	/* When ACK is figured as lost */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 4ce24c000653..493545033e42 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -138,6 +138,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 
 	write_unlock(&rx->call_lock);
 
+	rxnet = call->rxnet;
 	write_lock(&rxnet->call_lock);
 	list_add_tail(&call->link, &rxnet->calls);
 	write_unlock(&rxnet->call_lock);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 85b12c472522..f721c2b7e234 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -103,6 +103,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
 				    unsigned int debug_id)
 {
 	struct rxrpc_call *call;
+	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 
 	call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
 	if (!call)
@@ -153,6 +154,9 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
 
 	call->cong_cwnd = 2;
 	call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
+
+	call->rxnet = rxnet;
+	atomic_inc(&rxnet->nr_calls);
 	return call;
 
 nomem_2:
@@ -222,7 +226,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	__acquires(&call->user_mutex)
 {
 	struct rxrpc_call *call, *xcall;
-	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
+	struct rxrpc_net *rxnet;
 	struct rb_node *parent, **pp;
 	const void *here = __builtin_return_address(0);
 	int ret;
@@ -272,6 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 
 	write_unlock(&rx->call_lock);
 
+	rxnet = call->rxnet;
 	write_lock(&rxnet->call_lock);
 	list_add_tail(&call->link, &rxnet->calls);
 	write_unlock(&rxnet->call_lock);
@@ -617,7 +622,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
  */
 void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 {
-	struct rxrpc_net *rxnet;
+	struct rxrpc_net *rxnet = call->rxnet;
 	const void *here = __builtin_return_address(0);
 	int n;
 
@@ -631,7 +636,6 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 		ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
 
 		if (!list_empty(&call->link)) {
-			rxnet = rxrpc_net(sock_net(&call->socket->sk));
 			write_lock(&rxnet->call_lock);
 			list_del_init(&call->link);
 			write_unlock(&rxnet->call_lock);
@@ -647,11 +651,14 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
 {
 	struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+	struct rxrpc_net *rxnet = call->rxnet;
 
 	rxrpc_put_peer(call->peer);
 	kfree(call->rxtx_buffer);
 	kfree(call->rxtx_annotations);
 	kmem_cache_free(rxrpc_call_jar, call);
+	if (atomic_dec_and_test(&rxnet->nr_calls))
+		wake_up_atomic_t(&rxnet->nr_calls);
 }
 
 /*
@@ -716,4 +723,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
 	}
 
 	write_unlock(&rxnet->call_lock);
+
+	atomic_dec(&rxnet->nr_calls);
+	wait_on_atomic_t(&rxnet->nr_calls, atomic_t_wait, TASK_UNINTERRUPTIBLE);
 }
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 66baf2b80b6c..101019b0be34 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -55,6 +55,7 @@ static __net_init int rxrpc_init_net(struct net *net)
 
 	INIT_LIST_HEAD(&rxnet->calls);
 	rwlock_init(&rxnet->call_lock);
+	atomic_set(&rxnet->nr_calls, 1);
 
 	INIT_LIST_HEAD(&rxnet->conn_proc_list);
 	INIT_LIST_HEAD(&rxnet->service_conns);

From 09d2bf595db4b4075ea721acd61e180d6bb18f88 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:05:28 +0100
Subject: [PATCH 1530/1678] rxrpc: Add a tracepoint to track rxrpc_local
 refcounting

Add a tracepoint to track reference counting on the rxrpc_local struct.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 43 ++++++++++++++++++++++++
 net/rxrpc/ar-internal.h      | 27 +++------------
 net/rxrpc/call_accept.c      |  3 +-
 net/rxrpc/local_object.c     | 65 ++++++++++++++++++++++++++++++++++--
 4 files changed, 111 insertions(+), 27 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 2ea788f6f95d..0410dfeb79c6 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -42,6 +42,14 @@ enum rxrpc_skb_trace {
 	rxrpc_skb_tx_seen,
 };
 
+enum rxrpc_local_trace {
+	rxrpc_local_got,
+	rxrpc_local_new,
+	rxrpc_local_processing,
+	rxrpc_local_put,
+	rxrpc_local_queued,
+};
+
 enum rxrpc_conn_trace {
 	rxrpc_conn_got,
 	rxrpc_conn_new_client,
@@ -215,6 +223,13 @@ enum rxrpc_congest_change {
 	EM(rxrpc_skb_tx_rotated,		"Tx ROT") \
 	E_(rxrpc_skb_tx_seen,			"Tx SEE")
 
+#define rxrpc_local_traces \
+	EM(rxrpc_local_got,			"GOT") \
+	EM(rxrpc_local_new,			"NEW") \
+	EM(rxrpc_local_processing,		"PRO") \
+	EM(rxrpc_local_put,			"PUT") \
+	E_(rxrpc_local_queued,			"QUE")
+
 #define rxrpc_conn_traces \
 	EM(rxrpc_conn_got,			"GOT") \
 	EM(rxrpc_conn_new_client,		"NWc") \
@@ -416,6 +431,7 @@ enum rxrpc_congest_change {
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
 rxrpc_skb_traces;
+rxrpc_local_traces;
 rxrpc_conn_traces;
 rxrpc_client_traces;
 rxrpc_call_traces;
@@ -439,6 +455,33 @@ rxrpc_congest_changes;
 #define EM(a, b)	{ a, b },
 #define E_(a, b)	{ a, b }
 
+TRACE_EVENT(rxrpc_local,
+	    TP_PROTO(struct rxrpc_local *local, enum rxrpc_local_trace op,
+		     int usage, const void *where),
+
+	    TP_ARGS(local, op, usage, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,	local		)
+		    __field(int,		op		)
+		    __field(int,		usage		)
+		    __field(const void *,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->local = local->debug_id;
+		    __entry->op = op;
+		    __entry->usage = usage;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("L=%08x %s u=%d sp=%pSR",
+		      __entry->local,
+		      __print_symbolic(__entry->op, rxrpc_local_traces),
+		      __entry->usage,
+		      __entry->where)
+	    );
+
 TRACE_EVENT(rxrpc_conn,
 	    TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op,
 		     int usage, const void *where),
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2a2b0fdfb157..cc51d3eb0548 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -981,31 +981,12 @@ extern void rxrpc_process_local_events(struct rxrpc_local *);
  * local_object.c
  */
 struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
-void __rxrpc_put_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *);
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *);
+void rxrpc_put_local(struct rxrpc_local *);
+void rxrpc_queue_local(struct rxrpc_local *);
 void rxrpc_destroy_all_locals(struct rxrpc_net *);
 
-static inline void rxrpc_get_local(struct rxrpc_local *local)
-{
-	atomic_inc(&local->usage);
-}
-
-static inline
-struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
-{
-	return atomic_inc_not_zero(&local->usage) ? local : NULL;
-}
-
-static inline void rxrpc_put_local(struct rxrpc_local *local)
-{
-	if (local && atomic_dec_and_test(&local->usage))
-		__rxrpc_put_local(local);
-}
-
-static inline void rxrpc_queue_local(struct rxrpc_local *local)
-{
-	rxrpc_queue_work(&local->processor);
-}
-
 /*
  * misc.c
  */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 493545033e42..5a9b1d916124 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -296,8 +296,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		b->conn_backlog[conn_tail] = NULL;
 		smp_store_release(&b->conn_backlog_tail,
 				  (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
-		rxrpc_get_local(local);
-		conn->params.local = local;
+		conn->params.local = rxrpc_get_local(local);
 		conn->params.peer = peer;
 		rxrpc_see_connection(conn);
 		rxrpc_new_incoming_connection(rx, conn, skb);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 38b99db30e54..8b54e9531d52 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -95,6 +95,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
 		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
 		memcpy(&local->srx, srx, sizeof(*srx));
 		local->srx.srx_service = 0;
+		trace_rxrpc_local(local, rxrpc_local_new, 1, NULL);
 	}
 
 	_leave(" = %p", local);
@@ -256,15 +257,74 @@ addr_in_use:
 	return ERR_PTR(-EADDRINUSE);
 }
 
+/*
+ * Get a ref on a local endpoint.
+ */
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	n = atomic_inc_return(&local->usage);
+	trace_rxrpc_local(local, rxrpc_local_got, n, here);
+	return local;
+}
+
+/*
+ * Get a ref on a local endpoint unless its usage has already reached 0.
+ */
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
+{
+	const void *here = __builtin_return_address(0);
+
+	if (local) {
+		int n = __atomic_add_unless(&local->usage, 1, 0);
+		if (n > 0)
+			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
+		else
+			local = NULL;
+	}
+	return local;
+}
+
+/*
+ * Queue a local endpoint.
+ */
+void rxrpc_queue_local(struct rxrpc_local *local)
+{
+	const void *here = __builtin_return_address(0);
+
+	if (rxrpc_queue_work(&local->processor))
+		trace_rxrpc_local(local, rxrpc_local_queued,
+				  atomic_read(&local->usage), here);
+}
+
 /*
  * A local endpoint reached its end of life.
  */
-void __rxrpc_put_local(struct rxrpc_local *local)
+static void __rxrpc_put_local(struct rxrpc_local *local)
 {
 	_enter("%d", local->debug_id);
 	rxrpc_queue_work(&local->processor);
 }
 
+/*
+ * Drop a ref on a local endpoint.
+ */
+void rxrpc_put_local(struct rxrpc_local *local)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	if (local) {
+		n = atomic_dec_return(&local->usage);
+		trace_rxrpc_local(local, rxrpc_local_put, n, here);
+
+		if (n == 0)
+			__rxrpc_put_local(local);
+	}
+}
+
 /*
  * Destroy a local endpoint's socket and then hand the record to RCU to dispose
  * of.
@@ -322,7 +382,8 @@ static void rxrpc_local_processor(struct work_struct *work)
 		container_of(work, struct rxrpc_local, processor);
 	bool again;
 
-	_enter("%d", local->debug_id);
+	trace_rxrpc_local(local, rxrpc_local_processing,
+			  atomic_read(&local->usage), NULL);
 
 	do {
 		again = false;

From 31f5f9a1691ebef2113c8bdb3edcb8859f30f702 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:05:33 +0100
Subject: [PATCH 1531/1678] rxrpc: Fix apparent leak of rxrpc_local objects

rxrpc_local objects cannot be disposed of until all the connections that
point to them have been RCU'd as a connection object holds refcount on the
local endpoint it is communicating through.  Currently, this can cause an
assertion failure to occur when a network namespace is destroyed as there's
no check that the RCU destructors for the connections have been run before
we start trying to destroy local endpoints.

The kernel reports:

	rxrpc: AF_RXRPC: Leaked local 0000000036a41bc1 {5}
	------------[ cut here ]------------
	kernel BUG at ../net/rxrpc/local_object.c:439!

Fix this by keeping a count of the live connections and waiting for it to
go to zero at the end of rxrpc_destroy_all_connections().

Fixes: dee46364ce6f ("rxrpc: Add RCU destruction for connections and calls")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/ar-internal.h  | 1 +
 net/rxrpc/call_accept.c  | 2 ++
 net/rxrpc/conn_client.c  | 1 +
 net/rxrpc/conn_object.c  | 8 ++++++++
 net/rxrpc/conn_service.c | 1 +
 net/rxrpc/net_ns.c       | 1 +
 6 files changed, 14 insertions(+)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index cc51d3eb0548..d40d54b78567 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -77,6 +77,7 @@ struct rxrpc_net {
 	rwlock_t		call_lock;	/* Lock for ->calls */
 	atomic_t		nr_calls;	/* Count of allocated calls */
 
+	atomic_t		nr_conns;
 	struct list_head	conn_proc_list;	/* List of conns in this namespace for proc */
 	struct list_head	service_conns;	/* Service conns in this namespace */
 	rwlock_t		conn_lock;	/* Lock for ->conn_proc_list, ->service_conns */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 5a9b1d916124..f67017dcb25e 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -219,6 +219,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 		list_del(&conn->proc_link);
 		write_unlock(&rxnet->conn_lock);
 		kfree(conn);
+		if (atomic_dec_and_test(&rxnet->nr_conns))
+			wake_up_atomic_t(&rxnet->nr_conns);
 		tail = (tail + 1) & (size - 1);
 	}
 
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 041da40dbf93..5736f643c516 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -207,6 +207,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
 	if (ret < 0)
 		goto error_2;
 
+	atomic_inc(&rxnet->nr_conns);
 	write_lock(&rxnet->conn_lock);
 	list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
 	write_unlock(&rxnet->conn_lock);
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index bfc46fd69a62..0950ee3d26f5 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -365,6 +365,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
 	key_put(conn->params.key);
 	key_put(conn->server_key);
 	rxrpc_put_peer(conn->params.peer);
+
+	if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns))
+		wake_up_atomic_t(&conn->params.local->rxnet->nr_conns);
 	rxrpc_put_local(conn->params.local);
 
 	kfree(conn);
@@ -458,6 +461,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
 
 	_enter("");
 
+	atomic_dec(&rxnet->nr_conns);
 	rxrpc_destroy_all_client_connections(rxnet);
 
 	del_timer_sync(&rxnet->service_conn_reap_timer);
@@ -475,5 +479,9 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
 
 	ASSERT(list_empty(&rxnet->conn_proc_list));
 
+	/* We need to wait for the connections to be destroyed by RCU as they
+	 * pin things that we still need to get rid of.
+	 */
+	wait_on_atomic_t(&rxnet->nr_conns, atomic_t_wait, TASK_UNINTERRUPTIBLE);
 	_leave("");
 }
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index f6fcdb3130a1..80773a50c755 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -132,6 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
 		conn->state = RXRPC_CONN_SERVICE_PREALLOC;
 		atomic_set(&conn->usage, 2);
 
+		atomic_inc(&rxnet->nr_conns);
 		write_lock(&rxnet->conn_lock);
 		list_add_tail(&conn->link, &rxnet->service_conns);
 		list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 101019b0be34..fa9ce60e7bfa 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -57,6 +57,7 @@ static __net_init int rxrpc_init_net(struct net *net)
 	rwlock_init(&rxnet->call_lock);
 	atomic_set(&rxnet->nr_calls, 1);
 
+	atomic_set(&rxnet->nr_conns, 1);
 	INIT_LIST_HEAD(&rxnet->conn_proc_list);
 	INIT_LIST_HEAD(&rxnet->service_conns);
 	rwlock_init(&rxnet->conn_lock);

From 1159d4b496f57d5b8ee27c8b90b9d01c332e2e11 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:05:38 +0100
Subject: [PATCH 1532/1678] rxrpc: Add a tracepoint to track rxrpc_peer
 refcounting

Add a tracepoint to track reference counting on the rxrpc_peer struct.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 42 +++++++++++++++++++++++
 net/rxrpc/ar-internal.h      | 23 +++----------
 net/rxrpc/peer_event.c       |  2 +-
 net/rxrpc/peer_object.c      | 65 ++++++++++++++++++++++++++++++++++--
 4 files changed, 110 insertions(+), 22 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 0410dfeb79c6..9e96c2fe2793 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -50,6 +50,14 @@ enum rxrpc_local_trace {
 	rxrpc_local_queued,
 };
 
+enum rxrpc_peer_trace {
+	rxrpc_peer_got,
+	rxrpc_peer_new,
+	rxrpc_peer_processing,
+	rxrpc_peer_put,
+	rxrpc_peer_queued_error,
+};
+
 enum rxrpc_conn_trace {
 	rxrpc_conn_got,
 	rxrpc_conn_new_client,
@@ -230,6 +238,13 @@ enum rxrpc_congest_change {
 	EM(rxrpc_local_put,			"PUT") \
 	E_(rxrpc_local_queued,			"QUE")
 
+#define rxrpc_peer_traces \
+	EM(rxrpc_peer_got,			"GOT") \
+	EM(rxrpc_peer_new,			"NEW") \
+	EM(rxrpc_peer_processing,		"PRO") \
+	EM(rxrpc_peer_put,			"PUT") \
+	E_(rxrpc_peer_queued_error,		"QER")
+
 #define rxrpc_conn_traces \
 	EM(rxrpc_conn_got,			"GOT") \
 	EM(rxrpc_conn_new_client,		"NWc") \
@@ -482,6 +497,33 @@ TRACE_EVENT(rxrpc_local,
 		      __entry->where)
 	    );
 
+TRACE_EVENT(rxrpc_peer,
+	    TP_PROTO(struct rxrpc_peer *peer, enum rxrpc_peer_trace op,
+		     int usage, const void *where),
+
+	    TP_ARGS(peer, op, usage, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,	peer		)
+		    __field(int,		op		)
+		    __field(int,		usage		)
+		    __field(const void *,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->peer = peer->debug_id;
+		    __entry->op = op;
+		    __entry->usage = usage;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("P=%08x %s u=%d sp=%pSR",
+		      __entry->peer,
+		      __print_symbolic(__entry->op, rxrpc_peer_traces),
+		      __entry->usage,
+		      __entry->where)
+	    );
+
 TRACE_EVENT(rxrpc_conn,
 	    TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op,
 		     int usage, const void *where),
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index d40d54b78567..c46583bc255d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1041,25 +1041,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
 struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
 					      struct rxrpc_peer *);
-
-static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
-{
-	atomic_inc(&peer->usage);
-	return peer;
-}
-
-static inline
-struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
-{
-	return atomic_inc_not_zero(&peer->usage) ? peer : NULL;
-}
-
-extern void __rxrpc_put_peer(struct rxrpc_peer *peer);
-static inline void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
-	if (peer && atomic_dec_and_test(&peer->usage))
-		__rxrpc_put_peer(peer);
-}
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
+void rxrpc_put_peer(struct rxrpc_peer *);
+void __rxrpc_queue_peer_error(struct rxrpc_peer *);
 
 /*
  * proc.c
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index d01eb9a06448..78c2f95d1f22 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -192,7 +192,7 @@ void rxrpc_error_report(struct sock *sk)
 	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 
 	/* The ref we obtained is passed off to the work item */
-	rxrpc_queue_work(&peer->error_distributor);
+	__rxrpc_queue_peer_error(peer);
 	_leave("");
 }
 
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 94a6dbfcf129..a4a750aea1e5 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -386,9 +386,54 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
 }
 
 /*
- * Discard a ref on a remote peer record.
+ * Get a ref on a peer record.
  */
-void __rxrpc_put_peer(struct rxrpc_peer *peer)
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	n = atomic_inc_return(&peer->usage);
+	trace_rxrpc_peer(peer, rxrpc_peer_got, n, here);
+	return peer;
+}
+
+/*
+ * Get a ref on a peer record unless its usage has already reached 0.
+ */
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
+{
+	const void *here = __builtin_return_address(0);
+
+	if (peer) {
+		int n = __atomic_add_unless(&peer->usage, 1, 0);
+		if (n > 0)
+			trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
+		else
+			peer = NULL;
+	}
+	return peer;
+}
+
+/*
+ * Queue a peer record.  This passes the caller's ref to the workqueue.
+ */
+void __rxrpc_queue_peer_error(struct rxrpc_peer *peer)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	n = atomic_read(&peer->usage);
+	if (rxrpc_queue_work(&peer->error_distributor))
+		trace_rxrpc_peer(peer, rxrpc_peer_queued_error, n, here);
+	else
+		rxrpc_put_peer(peer);
+}
+
+/*
+ * Discard a peer record.
+ */
+static void __rxrpc_put_peer(struct rxrpc_peer *peer)
 {
 	struct rxrpc_net *rxnet = peer->local->rxnet;
 
@@ -402,6 +447,22 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer)
 	kfree_rcu(peer, rcu);
 }
 
+/*
+ * Drop a ref on a peer record.
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	if (peer) {
+		n = atomic_dec_return(&peer->usage);
+		trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+		if (n == 0)
+			__rxrpc_put_peer(peer);
+	}
+}
+
 /**
  * rxrpc_kernel_get_peer - Get the peer address of a call
  * @sock: The socket on which the call is in progress.

From 17226f1240381812c3a4927dc9da2814fb71c8ac Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 30 Mar 2018 21:05:44 +0100
Subject: [PATCH 1533/1678] rxrpc: Fix leak of rxrpc_peer objects

When a new client call is requested, an rxrpc_conn_parameters struct object
is passed in with a bunch of parameters set, such as the local endpoint to
use.  A pointer to the target peer record is also placed in there by
rxrpc_get_client_conn() - and this is removed if and only if a new
connection object is allocated.  Thus it leaks if a new connection object
isn't allocated.

Fix this by putting any peer object attached to the rxrpc_conn_parameters
object in the function that allocated it.

Fixes: 19ffa01c9c45 ("rxrpc: Use structs to hold connection params and protocol info")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 net/rxrpc/af_rxrpc.c    |  2 ++
 net/rxrpc/ar-internal.h |  1 +
 net/rxrpc/net_ns.c      |  1 +
 net/rxrpc/peer_object.c | 21 +++++++++++++++++++++
 net/rxrpc/sendmsg.c     |  1 +
 5 files changed, 26 insertions(+)

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0b3026b8fa40..9a2c8e7c000e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -324,6 +324,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 		mutex_unlock(&call->user_mutex);
 	}
 
+	rxrpc_put_peer(cp.peer);
 	_leave(" = %p", call);
 	return call;
 }
@@ -447,6 +448,7 @@ int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
 		ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
 
 	mutex_unlock(&call->user_mutex);
+	rxrpc_put_peer(cp.peer);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index c46583bc255d..90d7079e0aa9 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1041,6 +1041,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
 struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
 					      struct rxrpc_peer *);
+void rxrpc_destroy_all_peers(struct rxrpc_net *);
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *);
 void rxrpc_put_peer(struct rxrpc_peer *);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index fa9ce60e7bfa..c7a023fb22d0 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -118,6 +118,7 @@ static __net_exit void rxrpc_exit_net(struct net *net)
 	cancel_work_sync(&rxnet->peer_keepalive_work);
 	rxrpc_destroy_all_calls(rxnet);
 	rxrpc_destroy_all_connections(rxnet);
+	rxrpc_destroy_all_peers(rxnet);
 	rxrpc_destroy_all_locals(rxnet);
 	proc_remove(rxnet->proc_net);
 }
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index a4a750aea1e5..1b7e8107b3ae 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -463,6 +463,27 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
 	}
 }
 
+/*
+ * Make sure all peer records have been discarded.
+ */
+void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
+{
+	struct rxrpc_peer *peer;
+	int i;
+
+	for (i = 0; i < HASH_SIZE(rxnet->peer_hash); i++) {
+		if (hlist_empty(&rxnet->peer_hash[i]))
+			continue;
+
+		hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
+			pr_err("Leaked peer %u {%u} %pISp\n",
+			       peer->debug_id,
+			       atomic_read(&peer->usage),
+			       &peer->srx.transport);
+		}
+	}
+}
+
 /**
  * rxrpc_kernel_get_peer - Get the peer address of a call
  * @sock: The socket on which the call is in progress.
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index a62980a80151..206e802ccbdc 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -586,6 +586,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 				     atomic_inc_return(&rxrpc_debug_id));
 	/* The socket is now unlocked */
 
+	rxrpc_put_peer(cp.peer);
 	_leave(" = %p\n", call);
 	return call;
 }

From f385178679b6561d2e717567d12e07c7f927ee59 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Fri, 30 Mar 2018 09:20:59 +0900
Subject: [PATCH 1534/1678] lib/scatterlist: add sg_init_marker() helper

sg_init_marker initializes sg_magic in the sg table and calls
sg_mark_end() on the last entry of the table. This can be useful to
avoid memset in sg_init_table() when scatterlist is already zeroed out

For example: when scatterlist is embedded inside other struct and that
container struct is zeroed out

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/scatterlist.h | 18 ++++++++++++++++++
 lib/scatterlist.c           |  9 +--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 22b2131bcdcd..aa5d4eb725f5 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -248,6 +248,24 @@ static inline void *sg_virt(struct scatterlist *sg)
 	return page_address(sg_page(sg)) + sg->offset;
 }
 
+/**
+ * sg_init_marker - Initialize markers in sg table
+ * @sgl:	   The SG table
+ * @nents:	   Number of entries in table
+ *
+ **/
+static inline void sg_init_marker(struct scatterlist *sgl,
+				  unsigned int nents)
+{
+#ifdef CONFIG_DEBUG_SG
+	unsigned int i;
+
+	for (i = 0; i < nents; i++)
+		sgl[i].sg_magic = SG_MAGIC;
+#endif
+	sg_mark_end(&sgl[nents - 1]);
+}
+
 int sg_nents(struct scatterlist *sg);
 int sg_nents_for_len(struct scatterlist *sg, u64 len);
 struct scatterlist *sg_next(struct scatterlist *);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 53728d391d3a..06dad7a072fd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -132,14 +132,7 @@ EXPORT_SYMBOL(sg_last);
 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
 {
 	memset(sgl, 0, sizeof(*sgl) * nents);
-#ifdef CONFIG_DEBUG_SG
-	{
-		unsigned int i;
-		for (i = 0; i < nents; i++)
-			sgl[i].sg_magic = SG_MAGIC;
-	}
-#endif
-	sg_mark_end(&sgl[nents - 1]);
+	sg_init_marker(sgl, nents);
 }
 EXPORT_SYMBOL(sg_init_table);
 

From 6ef6d84ceee2262a8c7a4247616c7eb71268b3f9 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Fri, 30 Mar 2018 09:21:00 +0900
Subject: [PATCH 1535/1678] bpf: sockmap: initialize sg table entries properly

When CONFIG_DEBUG_SG is set, sg->sg_magic is initialized in
sg_init_table() and it is verified in sg api while navigating. We hit
BUG_ON when magic check is failed.

In functions sg_tcp_sendpage and sg_tcp_sendmsg, the struct containing
the scatterlist is already zeroed out. So to avoid extra memset, we
use sg_init_marker() to initialize sg_magic.

Fixed following things:
- In bpf_tcp_sendpage: initialize sg using sg_init_marker
- In bpf_tcp_sendmsg: Replace sg_init_table with sg_init_marker
- In bpf_tcp_push: Replace memset with sg_init_table where consumed
  sg entry needs to be re-initialized.

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 9192fdbcbccc..d2bda5aa25d7 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -341,7 +341,7 @@ retry:
 			md->sg_start++;
 			if (md->sg_start == MAX_SKB_FRAGS)
 				md->sg_start = 0;
-			memset(sg, 0, sizeof(*sg));
+			sg_init_table(sg, 1);
 
 			if (md->sg_start == md->sg_end)
 				break;
@@ -843,7 +843,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	}
 
 	sg = md.sg_data;
-	sg_init_table(sg, MAX_SKB_FRAGS);
+	sg_init_marker(sg, MAX_SKB_FRAGS);
 	rcu_read_unlock();
 
 	lock_sock(sk);
@@ -950,10 +950,14 @@ static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 
 	lock_sock(sk);
 
-	if (psock->cork_bytes)
+	if (psock->cork_bytes) {
 		m = psock->cork;
-	else
+		sg = &m->sg_data[m->sg_end];
+	} else {
 		m = &md;
+		sg = m->sg_data;
+		sg_init_marker(sg, MAX_SKB_FRAGS);
+	}
 
 	/* Catch case where ring is full and sendpage is stalled. */
 	if (unlikely(m->sg_end == m->sg_start &&
@@ -961,7 +965,6 @@ static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 		goto out_err;
 
 	psock->sg_size += size;
-	sg = &m->sg_data[m->sg_end];
 	sg_set_page(sg, page, size, offset);
 	get_page(page);
 	m->sg_copy[m->sg_end] = true;

From 7b2117bb8ff98b7b82543f77768dfe7aca7e3746 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 1 Feb 2018 05:37:44 -0800
Subject: [PATCH 1536/1678] net/mlx5e: Use eq ptr from cq

Instead of looking for the EQ of the CQ, remove that redundant code and
use the eq pointer stored in the cq struct.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1b48dec67abf..2aff4db9bdaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3728,21 +3728,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
 					struct mlx5e_txqsq *sq)
 {
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5_core_dev *mdev = priv->mdev;
-	int irqn_not_used, eqn;
-	struct mlx5_eq *eq;
+	struct mlx5_eq *eq = sq->cq.mcq.eq;
 	u32 eqe_count;
 
-	if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, &irqn_not_used))
-		return false;
-
-	eq = mlx5_eqn2eq(mdev, eqn);
-	if (IS_ERR(eq))
-		return false;
-
 	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-		   eqn, eq->cons_index, eq->irqn);
+		   eq->eqn, eq->cons_index, eq->irqn);
 
 	eqe_count = mlx5_eq_poll_irq_disabled(eq);
 	if (!eqe_count)

From b2d3907c234618c20239127f2c234b4e92adf5ef Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 13 Feb 2018 17:07:02 -0800
Subject: [PATCH 1537/1678] net/mlx5: Eliminate query xsrq dead code

1. This function is not used anywhere in mlx5 driver
2. It has a memcpy statement that makes no sense and produces build
warning with gcc8

drivers/net/ethernet/mellanox/mlx5/core/transobj.c: In function 'mlx5_core_query_xsrq':
drivers/net/ethernet/mellanox/mlx5/core/transobj.c:347:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict]

Fixes: 01949d0109ee ("net/mlx5_core: Enable XRCs and SRQs when using ISSI > 0")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/transobj.c    | 21 -------------------
 include/linux/mlx5/transobj.h                 |  1 -
 2 files changed, 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index c64957b5ef47..dae1c5c5d27c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -354,27 +354,6 @@ int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out)
-{
-	u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0};
-	void *srqc;
-	void *xrc_srqc;
-	int err;
-
-	MLX5_SET(query_xrc_srq_in, in, opcode,   MLX5_CMD_OP_QUERY_XRC_SRQ);
-	MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out,
-			    MLX5_ST_SZ_BYTES(query_xrc_srq_out));
-	if (!err) {
-		xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out,
-					xrc_srq_context_entry);
-		srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry);
-		memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc));
-	}
-
-	return err;
-}
-
 int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
 {
 	u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 80d7aa8b2831..83a33a1873a6 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -67,7 +67,6 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
 int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			  u32 *rmpn);
 int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
 int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
 
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,

From 6c75062823d8aa340615962bf261187bafa8d795 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Thu, 1 Feb 2018 15:34:35 +0200
Subject: [PATCH 1538/1678] net/mlx5: Change teardown with force mode failure
 message to warning

With ConnectX-4, we expect the force teardown to fail in case that
DC was enabled, therefore change the message from error to warning.

Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index d7bb10ab2173..70066975f1b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -245,7 +245,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
 
 	force_state = MLX5_GET(teardown_hca_out, out, force_state);
 	if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
-		mlx5_core_err(dev, "teardown with force mode failed\n");
+		mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
 		return -EIO;
 	}
 

From 533788988cb62cf8c9a065612860fb89e50662d3 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Tue, 6 Mar 2018 14:58:46 +0200
Subject: [PATCH 1539/1678] net/mlx5e: IPoIB, Fix spelling mistake

Fix spelling mistake in debug message text.
"dettaching" -> "detaching"

Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index a35608faf8d2..4899de74e252 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -540,7 +540,7 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
 
 	err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
 	if (err)
-		mlx5_core_dbg(mdev, "failed dettaching QPN 0x%x, MGID %pI6\n",
+		mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
 			      ipriv->qp.qpn, gid->raw);
 
 	return err;

From 472a1e44b3495df01c83e048667ef93dd2ea1ca0 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 12 Mar 2018 14:24:41 +0200
Subject: [PATCH 1540/1678] net/mlx5e: Save MTU in channels params

Knowing the MTU is required for RQ creation flow.
By our design, channels creation flow is totally isolated
from priv/netdev, and can be completed with access to
channels params and mdev.
Adding the MTU to the channels params helps preserving that.
In addition, we save it in RQ to make its access faster in
datapath checks.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 10 ++--
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 58 ++++++++++---------
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  3 +-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |  3 +-
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 27 +++++----
 5 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 353ac6daa3dc..823876bfd6ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -57,8 +57,8 @@
 
 #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
 
-#define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu))
-#define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu))
+#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
+#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
 
 #define MLX5E_MAX_DSCP          64
 #define MLX5E_MAX_NUM_TC	8
@@ -251,6 +251,8 @@ struct mlx5e_params {
 	u32 lro_timeout;
 	u32 pflags;
 	struct bpf_prog *xdp_prog;
+	unsigned int sw_mtu;
+	int hard_mtu;
 };
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -534,6 +536,7 @@ struct mlx5e_rq {
 
 	/* XDP */
 	struct bpf_prog       *xdp_prog;
+	unsigned int           hw_mtu;
 	struct mlx5e_xdpsq     xdpsq;
 
 	/* control */
@@ -767,7 +770,6 @@ struct mlx5e_priv {
 	struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
-	int                        hard_mtu;
 
 	struct mlx5e_flow_steering fs;
 	struct mlx5e_vxlan_db      vxlan;
@@ -1111,7 +1113,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 			    struct mlx5e_params *params,
-			    u16 max_channels);
+			    u16 max_channels, u16 mtu);
 u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
 void mlx5e_rx_dim_work(struct work_struct *work);
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2aff4db9bdaa..af345323b2ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -419,6 +419,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->mdev    = mdev;
+	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 
 	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
 	if (IS_ERR(rq->xdp_prog)) {
@@ -494,7 +495,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		byte_count = params->lro_en  ?
 				params->lro_wqe_sz :
-				MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
+				MLX5E_SW2HW_MTU(params, params->sw_mtu);
 #ifdef CONFIG_MLX5_EN_IPSEC
 		if (MLX5_IPSEC_DEV(mdev))
 			byte_count += MLX5E_METADATA_ETHER_LEN;
@@ -2498,10 +2499,10 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
 	mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
 }
 
-static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu)
+static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
+			 struct mlx5e_params *params, u16 mtu)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
-	u16 hw_mtu = MLX5E_SW2HW_MTU(priv, mtu);
+	u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
 	int err;
 
 	err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
@@ -2513,9 +2514,9 @@ static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu)
 	return 0;
 }
 
-static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu)
+static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
+			    struct mlx5e_params *params, u16 *mtu)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
 	u16 hw_mtu = 0;
 	int err;
 
@@ -2523,25 +2524,27 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu)
 	if (err || !hw_mtu) /* fallback to port oper mtu */
 		mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
 
-	*mtu = MLX5E_HW2SW_MTU(priv, hw_mtu);
+	*mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
 }
 
 static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
 {
+	struct mlx5e_params *params = &priv->channels.params;
 	struct net_device *netdev = priv->netdev;
+	struct mlx5_core_dev *mdev = priv->mdev;
 	u16 mtu;
 	int err;
 
-	err = mlx5e_set_mtu(priv, netdev->mtu);
+	err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
 	if (err)
 		return err;
 
-	mlx5e_query_mtu(priv, &mtu);
-	if (mtu != netdev->mtu)
+	mlx5e_query_mtu(mdev, params, &mtu);
+	if (mtu != params->sw_mtu)
 		netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
-			    __func__, mtu, netdev->mtu);
+			    __func__, mtu, params->sw_mtu);
 
-	netdev->mtu = mtu;
+	params->sw_mtu = mtu;
 	return 0;
 }
 
@@ -3411,34 +3414,33 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_channels new_channels = {};
-	int curr_mtu;
+	struct mlx5e_params *params;
 	int err = 0;
 	bool reset;
 
 	mutex_lock(&priv->state_lock);
 
-	reset = !priv->channels.params.lro_en &&
-		(priv->channels.params.rq_wq_type !=
-		 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+	params = &priv->channels.params;
+	reset = !params->lro_en &&
+		(params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 
 	reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
 
-	curr_mtu    = netdev->mtu;
-	netdev->mtu = new_mtu;
-
 	if (!reset) {
+		params->sw_mtu = new_mtu;
 		mlx5e_set_dev_port_mtu(priv);
+		netdev->mtu = params->sw_mtu;
 		goto out;
 	}
 
-	new_channels.params = priv->channels.params;
+	new_channels.params = *params;
+	new_channels.params.sw_mtu = new_mtu;
 	err = mlx5e_open_channels(priv, &new_channels);
-	if (err) {
-		netdev->mtu = curr_mtu;
+	if (err)
 		goto out;
-	}
 
 	mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_set_dev_port_mtu);
+	netdev->mtu = new_channels.params.sw_mtu;
 
 out:
 	mutex_unlock(&priv->state_lock);
@@ -4111,10 +4113,12 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
 
 void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 			    struct mlx5e_params *params,
-			    u16 max_channels)
+			    u16 max_channels, u16 mtu)
 {
 	u8 cq_period_mode = 0;
 
+	params->sw_mtu = mtu;
+	params->hard_mtu = MLX5E_ETH_HARD_MTU;
 	params->num_channels = max_channels;
 	params->num_tc       = 1;
 
@@ -4175,9 +4179,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 	priv->profile     = profile;
 	priv->ppriv       = ppriv;
 	priv->msglevel    = MLX5E_MSG_LEVEL;
-	priv->hard_mtu = MLX5E_ETH_HARD_MTU;
 
-	mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
+	mlx5e_build_nic_params(mdev, &priv->channels.params,
+			       profile->max_nch(mdev), netdev->mtu);
 
 	mutex_init(&priv->state_lock);
 
@@ -4454,7 +4458,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 	/* MTU range: 68 - hw-specific max */
 	netdev->min_mtu = ETH_MIN_MTU;
 	mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
-	netdev->max_mtu = MLX5E_HW2SW_MTU(priv, max_mtu);
+	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
 	mlx5e_set_dev_port_mtu(priv);
 
 	mlx5_lag_add(mdev, netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index dd32f3e390ff..cd884829cb30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -877,6 +877,7 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
+	params->hard_mtu    = MLX5E_ETH_HARD_MTU;
 	params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 	params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
 	params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
@@ -926,8 +927,6 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
 
 	priv->channels.params.num_channels = profile->max_nch(mdev);
 
-	priv->hard_mtu = MLX5E_ETH_HARD_MTU;
-
 	mlx5e_build_rep_params(mdev, &priv->channels.params);
 	mlx5e_build_rep_netdev(netdev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 781b8f21d6d1..c0d528f2131b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -766,8 +766,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 
 	prefetchw(wqe);
 
-	if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE ||
-		     MLX5E_SW2HW_MTU(rq->channel->priv, rq->netdev->mtu) < dma_len)) {
+	if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || rq->hw_mtu < dma_len)) {
 		rq->stats.xdp_drop++;
 		return false;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 4899de74e252..dc5e9e706362 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -66,6 +66,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 		MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
 
 	params->lro_en = false;
+	params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
 }
 
 /* Called directly after IPoIB netdevice was created to initialize SW structs */
@@ -81,10 +82,10 @@ void mlx5i_init(struct mlx5_core_dev *mdev,
 	priv->netdev      = netdev;
 	priv->profile     = profile;
 	priv->ppriv       = ppriv;
-	priv->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
 	mutex_init(&priv->state_lock);
 
-	mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
+	mlx5e_build_nic_params(mdev, &priv->channels.params,
+			       profile->max_nch(mdev), netdev->mtu);
 	mlx5i_build_nic_params(mdev, &priv->channels.params);
 
 	mlx5e_timestamp_init(priv);
@@ -368,25 +369,27 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 	struct mlx5e_channels new_channels = {};
-	int curr_mtu;
+	struct mlx5e_params *params;
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
 
-	curr_mtu    = netdev->mtu;
-	netdev->mtu = new_mtu;
+	params = &priv->channels.params;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		goto out;
-
-	new_channels.params = priv->channels.params;
-	err = mlx5e_open_channels(priv, &new_channels);
-	if (err) {
-		netdev->mtu = curr_mtu;
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		params->sw_mtu = new_mtu;
+		netdev->mtu = params->sw_mtu;
 		goto out;
 	}
 
+	new_channels.params = *params;
+	new_channels.params.sw_mtu = new_mtu;
+	err = mlx5e_open_channels(priv, &new_channels);
+	if (err)
+		goto out;
+
 	mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+	netdev->mtu = new_channels.params.sw_mtu;
 
 out:
 	mutex_unlock(&priv->state_lock);

From 73281b78a37a1a3f392fd5b6116d04e597484529 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 11 Feb 2018 15:21:33 +0200
Subject: [PATCH 1541/1678] net/mlx5e: Derive Striding RQ size from MTU

In Striding RQ, each WQE serves multiple packets
(hence called Multi-Packet WQE, MPWQE).
The size of a MPWQE is constant (currently 256KB).

Upon a ringparam set operation, we calculate the number of
MPWQEs per RQ. For this, first it is needed to determine the
number of packets that can reside within a single MPWQE.
In this patch we use the actual MTU size instead of ETH_DATA_LEN
for this calculation.

This implies that a change in MTU might require a change
in Striding RQ ring size.

In addition, this obsoletes some WQEs-to-packets translation
functions and helps delete ~60 LOC.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 63 ++++++---------
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 79 ++-----------------
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 71 +++++++++++------
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  2 +-
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c |  2 +-
 .../mellanox/mlx5/core/ipoib/ipoib_vlan.c     |  2 +-
 6 files changed, 80 insertions(+), 139 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 823876bfd6ab..1f89e2194b61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -63,18 +63,6 @@
 #define MLX5E_MAX_DSCP          64
 #define MLX5E_MAX_NUM_TC	8
 
-#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE                0x6
-#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE                0xa
-#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE                0xd
-
-#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE                0x1
-#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE                0xa
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE                0xd
-
-#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
-#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x3
-#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6
-
 #define MLX5_RX_HEADROOM NET_SKB_PAD
 #define MLX5_SKB_FRAG_SZ(len)	(SKB_DATA_ALIGN(len) +	\
 				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -95,9 +83,27 @@
 #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_MTTS(wqes)		\
-	(wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
-#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)
+#define MLX5E_REQUIRED_WQE_MTTS		(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_REQUIRED_MTTS(wqes)	(wqes * MLX5E_REQUIRED_WQE_MTTS)
+#define MLX5E_MAX_RQ_NUM_MTTS	\
+	((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
+#define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024))
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW	\
+		(ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS))
+#define MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW \
+	(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW + \
+	 (MLX5_MPWRQ_LOG_WQE_SZ - MLX5E_ORDER2_MAX_PACKET_MTU))
+
+#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE                0x6
+#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE                0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE                0xd
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE                0x1
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE                0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE min_t(u8, 0xd,	\
+					       MLX5E_LOG_MAX_RQ_NUM_PACKETS_MPW)
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
 #define MLX5_UMR_ALIGN				(2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(256)
@@ -155,26 +161,6 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
 	}
 }
 
-static inline int mlx5_min_log_rq_size(int wq_type)
-{
-	switch (wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
-	default:
-		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
-	}
-}
-
-static inline int mlx5_max_log_rq_size(int wq_type)
-{
-	switch (wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW;
-	default:
-		return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
-	}
-}
-
 static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 {
 	return is_kdump_kernel() ?
@@ -233,7 +219,7 @@ enum mlx5e_priv_flag {
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
-	u8  log_rq_size;
+	u8  log_rq_mtu_frames;
 	u16 num_channels;
 	u8  num_tc;
 	bool rx_cqe_compress_def;
@@ -849,11 +835,6 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 
-u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params);
-u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params);
-
 void mlx5e_update_stats(struct mlx5e_priv *priv);
 
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c57c929d7973..a87d46bc2299 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -220,60 +220,12 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 	mlx5e_ethtool_get_ethtool_stats(priv, stats, data);
 }
 
-static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
-				    int num_wqe)
-{
-	int packets_per_wqe;
-	int stride_size;
-	int num_strides;
-	int wqe_size;
-
-	if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		return num_wqe;
-
-	stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
-	num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
-	wqe_size = stride_size * num_strides;
-
-	packets_per_wqe = wqe_size /
-			  ALIGN(ETH_DATA_LEN, stride_size);
-	return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1));
-}
-
-static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
-				    int num_packets)
-{
-	int packets_per_wqe;
-	int stride_size;
-	int num_strides;
-	int wqe_size;
-	int num_wqes;
-
-	if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		return num_packets;
-
-	stride_size = 1 << mlx5e_mpwqe_get_log_stride_size(priv->mdev, &priv->channels.params);
-	num_strides = 1 << mlx5e_mpwqe_get_log_num_strides(priv->mdev, &priv->channels.params);
-	wqe_size = stride_size * num_strides;
-
-	num_packets = (1 << order_base_2(num_packets));
-
-	packets_per_wqe = wqe_size /
-			  ALIGN(ETH_DATA_LEN, stride_size);
-	num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe);
-	return 1 << (order_base_2(num_wqes));
-}
-
 void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
 				 struct ethtool_ringparam *param)
 {
-	int rq_wq_type = priv->channels.params.rq_wq_type;
-
-	param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
-							 1 << mlx5_max_log_rq_size(rq_wq_type));
+	param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
 	param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
-	param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
-						     1 << priv->channels.params.log_rq_size);
+	param->rx_pending     = 1 << priv->channels.params.log_rq_mtu_frames;
 	param->tx_pending     = 1 << priv->channels.params.log_sq_size;
 }
 
@@ -288,13 +240,9 @@ static void mlx5e_get_ringparam(struct net_device *dev,
 int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 				struct ethtool_ringparam *param)
 {
-	int rq_wq_type = priv->channels.params.rq_wq_type;
 	struct mlx5e_channels new_channels = {};
-	u32 rx_pending_wqes;
-	u32 min_rq_size;
 	u8 log_rq_size;
 	u8 log_sq_size;
-	u32 num_mtts;
 	int err = 0;
 
 	if (param->rx_jumbo_pending) {
@@ -308,23 +256,10 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 		return -EINVAL;
 	}
 
-	min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
-					       1 << mlx5_min_log_rq_size(rq_wq_type));
-	rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type,
-						   param->rx_pending);
-
-	if (param->rx_pending < min_rq_size) {
+	if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
 		netdev_info(priv->netdev, "%s: rx_pending (%d) < min (%d)\n",
 			    __func__, param->rx_pending,
-			    min_rq_size);
-		return -EINVAL;
-	}
-
-	num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes);
-	if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
-	    !MLX5E_VALID_NUM_MTTS(num_mtts)) {
-		netdev_info(priv->netdev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
-			    __func__, param->rx_pending);
+			    1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
 		return -EINVAL;
 	}
 
@@ -335,17 +270,17 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 		return -EINVAL;
 	}
 
-	log_rq_size = order_base_2(rx_pending_wqes);
+	log_rq_size = order_base_2(param->rx_pending);
 	log_sq_size = order_base_2(param->tx_pending);
 
-	if (log_rq_size == priv->channels.params.log_rq_size &&
+	if (log_rq_size == priv->channels.params.log_rq_mtu_frames &&
 	    log_sq_size == priv->channels.params.log_sq_size)
 		return 0;
 
 	mutex_lock(&priv->state_lock);
 
 	new_channels.params = priv->channels.params;
-	new_channels.params.log_rq_size = log_rq_size;
+	new_channels.params.log_rq_mtu_frames = log_rq_size;
 	new_channels.params.log_sq_size = log_sq_size;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index af345323b2ce..e627b81cebe9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -78,15 +78,38 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 		MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
-u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params)
+static u32 mlx5e_mpwqe_get_linear_frag_sz(struct mlx5e_params *params)
+{
+	u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+
+	return hw_mtu;
+}
+
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+{
+	u32 linear_frag_sz = mlx5e_mpwqe_get_linear_frag_sz(params);
+
+	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+}
+
+static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+{
+	if (params->log_rq_mtu_frames <
+	    mlx5e_mpwqe_log_pkts_per_wqe(params) + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
+		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+
+	return params->log_rq_mtu_frames - mlx5e_mpwqe_log_pkts_per_wqe(params);
+}
+
+static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
+					  struct mlx5e_params *params)
 {
 	return MLX5E_MPWQE_STRIDE_SZ(mdev,
 		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params)
+static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
+					  struct mlx5e_params *params)
 {
 	return MLX5_MPWRQ_LOG_WQE_SZ -
 		mlx5e_mpwqe_get_log_stride_size(mdev, params);
@@ -109,17 +132,13 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params)
 {
 	params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+	params->log_rq_mtu_frames = is_kdump_kernel() ?
+		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+		MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		params->log_rq_size = is_kdump_kernel() ?
-			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
-			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		params->log_rq_size = is_kdump_kernel() ?
-			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
-			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
-
 		/* Extra room needed for build_skb */
 		params->lro_wqe_sz -= mlx5e_get_rq_headroom(params) +
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -127,7 +146,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 
 	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
-		       BIT(params->log_rq_size),
+		       BIT(params->log_rq_mtu_frames),
 		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
@@ -351,9 +370,6 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 	u32 *in;
 	int err;
 
-	if (!MLX5E_VALID_NUM_MTTS(npages))
-		return -EINVAL;
-
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -1872,14 +1888,15 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 		MLX5_SET(wq, wq, log_wqe_stride_size,
 			 mlx5e_mpwqe_get_log_stride_size(mdev, params) - 6);
 		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
 	}
 
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride,    ilog2(sizeof(struct mlx5e_rx_wqe)));
-	MLX5_SET(wq, wq, log_wq_sz,        params->log_rq_size);
 	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
@@ -1939,16 +1956,17 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 				    struct mlx5e_params *params,
 				    struct mlx5e_cq_param *param)
 {
+	struct mlx5_core_dev *mdev = priv->mdev;
 	void *cqc = param->cqc;
 	u8 log_cq_size;
 
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		log_cq_size = params->log_rq_size +
-			mlx5e_mpwqe_get_log_num_strides(priv->mdev, params);
+		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) +
+			mlx5e_mpwqe_get_log_num_strides(mdev, params);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		log_cq_size = params->log_rq_size;
+		log_cq_size = params->log_rq_mtu_frames;
 	}
 
 	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
@@ -3421,11 +3439,20 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 	mutex_lock(&priv->state_lock);
 
 	params = &priv->channels.params;
-	reset = !params->lro_en &&
-		(params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 
+	reset = !params->lro_en;
 	reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
 
+	new_channels.params = *params;
+	new_channels.params.sw_mtu = new_mtu;
+
+	if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) {
+		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
+		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
+
+		reset = reset && (ppw_old != ppw_new);
+	}
+
 	if (!reset) {
 		params->sw_mtu = new_mtu;
 		mlx5e_set_dev_port_mtu(priv);
@@ -3433,8 +3460,6 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 		goto out;
 	}
 
-	new_channels.params = *params;
-	new_channels.params.sw_mtu = new_mtu;
 	err = mlx5e_open_channels(priv, &new_channels);
 	if (err)
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index cd884829cb30..8e70fa9ef39a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -880,7 +880,7 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
 	params->hard_mtu    = MLX5E_ETH_HARD_MTU;
 	params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 	params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
-	params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+	params->log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
 
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index dc5e9e706362..af3bb2f7a504 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -61,7 +61,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 	mlx5e_init_rq_type_params(mdev, params);
 
 	/* RQ size in ipoib by default is 512 */
-	params->log_rq_size = is_kdump_kernel() ?
+	params->log_rq_mtu_frames = is_kdump_kernel() ?
 		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
 		MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index b69e9d847a6b..54a188f41f90 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -290,7 +290,7 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
 	netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
 
 	/* Use dummy rqs */
-	priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+	priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
 }
 
 /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */

From 18187fb2c3c138630f6b7c7b7ba7ab41ccd95129 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 18 Jul 2017 12:07:06 +0300
Subject: [PATCH 1542/1678] net/mlx5e: Code movements in RX UMR WQE post

Gets the process of a UMR WQE post in one function,
in preparation for a downstream patch that inlines
the WQE data.
No functional change here.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 109 ++++++++----------
 1 file changed, 46 insertions(+), 63 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index c0d528f2131b..8aa94d3cff59 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -347,64 +347,6 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
 	}
 }
 
-static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
-{
-	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-	struct mlx5e_icosq *sq = &rq->channel->icosq;
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	struct mlx5e_umr_wqe *wqe;
-	u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB);
-	u16 pi;
-
-	/* fill sq edge with nops to avoid wqe wrap around */
-	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
-		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-
-	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
-	wqe->ctrl.opmod_idx_opcode =
-		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
-			    MLX5_OPCODE_UMR);
-
-	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
-	sq->pc += num_wqebbs;
-	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
-}
-
-static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
-				    u16 ix)
-{
-	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
-	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
-	int err;
-	int i;
-
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
-		err = mlx5e_page_alloc_mapped(rq, dma_info);
-		if (unlikely(err))
-			goto err_unmap;
-		wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
-		page_ref_add(dma_info->page, pg_strides);
-	}
-
-	memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE);
-	wi->consumed_strides = 0;
-
-	return 0;
-
-err_unmap:
-	while (--i >= 0) {
-		dma_info--;
-		page_ref_sub(dma_info->page, pg_strides);
-		mlx5e_page_release(rq, dma_info, true);
-	}
-
-	return err;
-}
-
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
 	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
@@ -434,16 +376,57 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 
 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
+	struct mlx5e_icosq *sq = &rq->channel->icosq;
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct mlx5e_umr_wqe *wqe;
+	u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB);
 	int err;
+	u16 pi;
+	int i;
 
-	err = mlx5e_alloc_rx_umr_mpwqe(rq, ix);
-	if (unlikely(err)) {
-		rq->stats.buff_alloc_err++;
-		return err;
+	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
+		err = mlx5e_page_alloc_mapped(rq, dma_info);
+		if (unlikely(err))
+			goto err_unmap;
+		wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+		page_ref_add(dma_info->page, pg_strides);
 	}
+
+	memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE);
+	wi->consumed_strides = 0;
+
 	rq->mpwqe.umr_in_progress = true;
-	mlx5e_post_umr_wqe(rq, ix);
+
+	/* fill sq edge with nops to avoid wqe wrap around */
+	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
+		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+	}
+
+	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+	memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
+	wqe->ctrl.opmod_idx_opcode =
+		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+			    MLX5_OPCODE_UMR);
+
+	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+	sq->pc += num_wqebbs;
+	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+
 	return 0;
+
+err_unmap:
+	while (--i >= 0) {
+		dma_info--;
+		page_ref_sub(dma_info->page, pg_strides);
+		mlx5e_page_release(rq, dma_info, true);
+	}
+	rq->stats.buff_alloc_err++;
+
+	return err;
 }
 
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)

From e4d86a4a58b436fca0ce084ea453cc0ec8c39bb4 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 7 Jan 2018 14:15:02 +0200
Subject: [PATCH 1543/1678] net/mlx5e: Do not busy-wait for UMR completion in
 Striding RQ

Do not busy-wait a pending UMR completion. Under high HW load,
busy-waiting a delayed completion would fully utilize the CPU core
and mistakenly indicate a SW bottleneck.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 8aa94d3cff59..8eb9e7e89b09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -527,7 +527,7 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 	if (!rq->mpwqe.umr_in_progress)
 		mlx5e_alloc_rx_mpwqe(rq, wq->head);
 
-	return true;
+	return false;
 }
 
 static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)

From ea3886cab76f1026a5db988fa6fad997e98f3a32 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 10 Jul 2017 12:52:36 +0300
Subject: [PATCH 1544/1678] net/mlx5e: Use inline MTTs in UMR WQEs

When modifying the page mapping of a HW memory region
(via a UMR post), post the new values inlined in WQE,
instead of using a data pointer.

This is a micro-optimization, inline UMR WQEs of different
rings scale better in HW.

In addition, this obsoletes a few control flows and helps
delete ~50 LOC.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 16 ++--
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 82 ++++---------------
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 28 +++----
 3 files changed, 38 insertions(+), 88 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1f89e2194b61..c1d3a29388bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -105,7 +105,6 @@
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
-#define MLX5_UMR_ALIGN				(2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(256)
 
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
@@ -130,8 +129,13 @@
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
-#define MLX5E_ICOSQ_MAX_WQEBBS \
-	(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
+#define MLX5E_UMR_WQE_INLINE_SZ \
+	(sizeof(struct mlx5e_umr_wqe) + \
+	 ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(struct mlx5_mtt), \
+	       MLX5_UMR_MTT_ALIGNMENT))
+#define MLX5E_UMR_WQEBBS \
+	(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
+#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS
 
 #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
 #define MLX5E_XDP_TX_DS_COUNT \
@@ -183,7 +187,7 @@ struct mlx5e_umr_wqe {
 	struct mlx5_wqe_ctrl_seg       ctrl;
 	struct mlx5_wqe_umr_ctrl_seg   uctrl;
 	struct mlx5_mkey_seg           mkc;
-	struct mlx5_wqe_data_seg       data;
+	struct mlx5_mtt                inline_mtts[0];
 };
 
 extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
@@ -421,7 +425,6 @@ struct mlx5e_icosq {
 	void __iomem              *uar_map;
 	u32                        sqn;
 	u16                        edge;
-	__be32                     mkey_be;
 	unsigned long              state;
 
 	/* control path */
@@ -446,8 +449,6 @@ struct mlx5e_wqe_frag_info {
 };
 
 struct mlx5e_umr_dma_info {
-	__be64                *mtt;
-	dma_addr_t             mtt_addr;
 	struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
 	struct mlx5e_umr_wqe   wqe;
 };
@@ -490,7 +491,6 @@ struct mlx5e_rq {
 		} wqe;
 		struct {
 			struct mlx5e_mpw_info *info;
-			void                  *mtt_no_align;
 			u16                    num_strides;
 			u8                     log_stride_sz;
 			bool                   umr_in_progress;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e627b81cebe9..42dc350c5ab1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -73,9 +73,20 @@ struct mlx5e_channel_param {
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
-	return MLX5_CAP_GEN(mdev, striding_rq) &&
+	bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
 		MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
 		MLX5_CAP_ETH(mdev, reg_umr_sq);
+	u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
+	bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
+
+	if (!striding_rq_umr)
+		return false;
+	if (!inline_umr) {
+		mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
+			       (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
+		return false;
+	}
+	return true;
 }
 
 static u32 mlx5e_mpwqe_get_linear_frag_sz(struct mlx5e_params *params)
@@ -258,16 +269,6 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 	synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC));
 }
 
-static inline int mlx5e_get_wqe_mtt_sz(void)
-{
-	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
-	 * To avoid copying garbage after the mtt array, we allocate
-	 * a little more.
-	 */
-	return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64),
-		     MLX5_UMR_MTT_ALIGNMENT);
-}
-
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 				       struct mlx5e_icosq *sq,
 				       struct mlx5e_umr_wqe *wqe,
@@ -275,9 +276,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 {
 	struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
 	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
-	struct mlx5_wqe_data_seg      *dseg = &wqe->data;
-	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-	u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
+	u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
 	u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
 
 	cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
@@ -285,80 +284,32 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 	cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
 	cseg->imm       = rq->mkey_be;
 
-	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
+	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
 	ucseg->xlt_octowords =
 		cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
 	ucseg->bsf_octowords =
 		cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
 	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
-
-	dseg->lkey = sq->mkey_be;
-	dseg->addr = cpu_to_be64(wi->umr.mtt_addr);
 }
 
 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 				     struct mlx5e_channel *c)
 {
 	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
-	int mtt_sz = mlx5e_get_wqe_mtt_sz();
-	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
 	int i;
 
 	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
 				      GFP_KERNEL, cpu_to_node(c->cpu));
 	if (!rq->mpwqe.info)
-		goto err_out;
-
-	/* We allocate more than mtt_sz as we will align the pointer */
-	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
-					cpu_to_node(c->cpu));
-	if (unlikely(!rq->mpwqe.mtt_no_align))
-		goto err_free_wqe_info;
+		return -ENOMEM;
 
 	for (i = 0; i < wq_sz; i++) {
 		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
 
-		wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc,
-					MLX5_UMR_ALIGN);
-		wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
-						  PCI_DMA_TODEVICE);
-		if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr)))
-			goto err_unmap_mtts;
-
 		mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i);
 	}
 
 	return 0;
-
-err_unmap_mtts:
-	while (--i >= 0) {
-		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
-
-		dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
-				 PCI_DMA_TODEVICE);
-	}
-	kfree(rq->mpwqe.mtt_no_align);
-err_free_wqe_info:
-	kfree(rq->mpwqe.info);
-
-err_out:
-	return -ENOMEM;
-}
-
-static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
-{
-	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
-	int mtt_sz = mlx5e_get_wqe_mtt_sz();
-	int i;
-
-	for (i = 0; i < wq_sz; i++) {
-		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
-
-		dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz,
-				 PCI_DMA_TODEVICE);
-	}
-	kfree(rq->mpwqe.mtt_no_align);
-	kfree(rq->mpwqe.info);
 }
 
 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
@@ -579,7 +530,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		mlx5e_rq_free_mpwqe_info(rq);
+		kfree(rq->mpwqe.info);
 		mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -918,7 +869,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 	struct mlx5_core_dev *mdev = c->mdev;
 	int err;
 
-	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 8eb9e7e89b09..539dbe9382ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -381,17 +381,25 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
 	struct mlx5e_icosq *sq = &rq->channel->icosq;
 	struct mlx5_wq_cyc *wq = &sq->wq;
-	struct mlx5e_umr_wqe *wqe;
-	u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB);
+	struct mlx5e_umr_wqe *umr_wqe;
+	int cpy = offsetof(struct mlx5e_umr_wqe, inline_mtts);
 	int err;
 	u16 pi;
 	int i;
 
+	/* fill sq edge with nops to avoid wqe wrap around */
+	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
+		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+	}
+
+	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+	memcpy(umr_wqe, &wi->umr.wqe, cpy);
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
 		err = mlx5e_page_alloc_mapped(rq, dma_info);
 		if (unlikely(err))
 			goto err_unmap;
-		wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+		umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
 		page_ref_add(dma_info->page, pg_strides);
 	}
 
@@ -400,21 +408,13 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 
 	rq->mpwqe.umr_in_progress = true;
 
-	/* fill sq edge with nops to avoid wqe wrap around */
-	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
-		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-
-	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
-	wqe->ctrl.opmod_idx_opcode =
+	umr_wqe->ctrl.opmod_idx_opcode =
 		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
 			    MLX5_OPCODE_UMR);
 
 	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
-	sq->pc += num_wqebbs;
-	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+	sq->pc += MLX5E_UMR_WQEBBS;
+	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &umr_wqe->ctrl);
 
 	return 0;
 

From 619a8f2a42f1031cdbd74435b6a9191eb4913139 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 7 Feb 2018 14:41:25 +0200
Subject: [PATCH 1545/1678] net/mlx5e: Use linear SKB in Striding RQ

Current Striding RQ HW feature utilizes the RX buffers so that
there is no wasted room between the strides. This maximises
the memory utilization.
This prevents the use of build_skb() (which requires headroom
and tailroom), and demands to memcpy the packets headers into
the skb linear part.

In this patch, whenever a set of conditions holds, we apply
an RQ configuration that allows combining the use of linear SKB
on top of a Striding RQ.

To use build_skb() with Striding RQ, the following must hold:
1. packet does not cross a page boundary.
2. there is enough headroom and tailroom surrounding the packet.

We can satisfy 1 and 2 by configuring:
	stride size = MTU + headroom + tailoom.

This is possible only when:
a. (MTU - headroom - tailoom) does not exceed PAGE_SIZE.
b. HW LRO is turned off.

Using linear SKB has many advantages:
- Saves a memcpy of the headers.
- No page-boundary checks in datapath.
- No filler CQEs.
- Significantly smaller CQ.
- SKB data continuously resides in linear part, and not split to
  small amount (linear part) and large amount (fragment).
  This saves datapath cycles in driver and improves utilization
  of SKB fragments in GRO.
- The fragments of a resulting GRO SKB follow the IP forwarding
  assumption of equal-size fragments.

Some implementation details:
HW writes the packets to the beginning of a stride,
i.e. does not keep headroom. To overcome this we make sure we can
extend backwards and use the last bytes of stride i-1.
Extra care is needed for stride 0 as it has no preceding stride.
We make sure headroom bytes are available by shifting the buffer
pointer passed to HW by headroom bytes.

This configuration now becomes default, whenever capable.
Of course, this implies turning LRO off.

Performance testing:
ConnectX-5, single core, single RX ring, default MTU.

UDP packet rate, early drop in TC layer:

--------------------------------------------
| pkt size | before    | after     | ratio |
--------------------------------------------
| 1500byte | 4.65 Mpps | 5.96 Mpps | 1.28x |
|  500byte | 5.23 Mpps | 5.97 Mpps | 1.14x |
|   64byte | 5.94 Mpps | 5.96 Mpps | 1.00x |
--------------------------------------------

TCP streams: ~20% gain

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  10 ++
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  76 ++++++++++---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 102 +++++++++++++-----
 include/linux/mlx5/device.h                   |   3 +
 include/linux/mlx5/mlx5_ifc.h                 |   7 +-
 5 files changed, 153 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c1d3a29388bd..d26dd4bc89f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -473,6 +473,9 @@ struct mlx5e_page_cache {
 
 struct mlx5e_rq;
 typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
+typedef struct sk_buff *
+(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+			       u16 cqe_bcnt, u32 head_offset, u32 page_idx);
 typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
 
@@ -491,6 +494,7 @@ struct mlx5e_rq {
 		} wqe;
 		struct {
 			struct mlx5e_mpw_info *info;
+			mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
 			u16                    num_strides;
 			u8                     log_stride_sz;
 			bool                   umr_in_progress;
@@ -834,6 +838,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+				u16 cqe_bcnt, u32 head_offset, u32 page_idx);
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+				   u16 cqe_bcnt, u32 head_offset, u32 page_idx);
 
 void mlx5e_update_stats(struct mlx5e_priv *priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 42dc350c5ab1..bba2fa0aa15f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -91,9 +91,14 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 
 static u32 mlx5e_mpwqe_get_linear_frag_sz(struct mlx5e_params *params)
 {
-	u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	if (!params->xdp_prog) {
+		u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+		u16 rq_headroom = MLX5_RX_HEADROOM + NET_IP_ALIGN;
 
-	return hw_mtu;
+		return MLX5_SKB_FRAG_SZ(rq_headroom + hw_mtu);
+	}
+
+	return PAGE_SIZE;
 }
 
 static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
@@ -103,6 +108,26 @@ static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
 	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
 }
 
+static bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+					 struct mlx5e_params *params)
+{
+	u32 frag_sz = mlx5e_mpwqe_get_linear_frag_sz(params);
+	s8 signed_log_num_strides_param;
+	u8 log_num_strides;
+
+	if (params->lro_en || frag_sz > PAGE_SIZE)
+		return false;
+
+	if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
+		return true;
+
+	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+	signed_log_num_strides_param =
+		(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+
+	return signed_log_num_strides_param >= 0;
+}
+
 static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
 {
 	if (params->log_rq_mtu_frames <
@@ -115,6 +140,9 @@ static u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
 static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
 					  struct mlx5e_params *params)
 {
+	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+		return order_base_2(mlx5e_mpwqe_get_linear_frag_sz(params));
+
 	return MLX5E_MPWQE_STRIDE_SZ(mdev,
 		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
@@ -126,7 +154,8 @@ static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
 		mlx5e_mpwqe_get_log_stride_size(mdev, params);
 }
 
-static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params)
+static u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
+				 struct mlx5e_params *params)
 {
 	u16 linear_rq_headroom = params->xdp_prog ?
 		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
@@ -136,6 +165,9 @@ static u16 mlx5e_get_rq_headroom(struct mlx5e_params *params)
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)
 		return linear_rq_headroom;
 
+	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+		return linear_rq_headroom;
+
 	return 0;
 }
 
@@ -151,12 +183,14 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		/* Extra room needed for build_skb */
-		params->lro_wqe_sz -= mlx5e_get_rq_headroom(params) +
+		params->lro_wqe_sz -= mlx5e_get_rq_headroom(mdev, params) +
 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	}
 
 	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
 		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+		       BIT(mlx5e_mpwqe_get_log_rq_size(params)) :
 		       BIT(params->log_rq_mtu_frames),
 		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)),
 		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
@@ -400,11 +434,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		goto err_rq_wq_destroy;
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-	rq->buff.headroom = mlx5e_get_rq_headroom(params);
+	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-
 		rq->post_wqes = mlx5e_post_rx_mpwqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
@@ -422,6 +455,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_wq_destroy;
 		}
 
+		rq->mpwqe.skb_from_cqe_mpwrq =
+			mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ?
+			mlx5e_skb_from_cqe_mpwrq_linear :
+			mlx5e_skb_from_cqe_mpwrq_nonlinear;
 		rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params);
 		rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params));
 
@@ -484,7 +521,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 			u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT;
 
-			wqe->data.addr = cpu_to_be64(dma_offset);
+			wqe->data.addr = cpu_to_be64(dma_offset + rq->buff.headroom);
 		}
 
 		wqe->data.byte_count = cpu_to_be32(byte_count);
@@ -1834,9 +1871,11 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	switch (params->rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 mlx5e_mpwqe_get_log_num_strides(mdev, params) - 9);
+			 mlx5e_mpwqe_get_log_num_strides(mdev, params) -
+			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 mlx5e_mpwqe_get_log_stride_size(mdev, params) - 6);
+			 mlx5e_mpwqe_get_log_stride_size(mdev, params) -
+			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
 		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params));
 		break;
@@ -3193,20 +3232,28 @@ typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
 static int set_feature_lro(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params *old_params;
 	int err = 0;
 	bool reset;
 
 	mutex_lock(&priv->state_lock);
 
-	reset = (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST);
-	reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
+	old_params = &priv->channels.params;
+	reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
 
-	new_channels.params = priv->channels.params;
+	new_channels.params = *old_params;
 	new_channels.params.lro_en = enable;
 
+	if (old_params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST) {
+		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) ==
+		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params))
+			reset = false;
+	}
+
 	if (!reset) {
-		priv->channels.params = new_channels.params;
+		*old_params = new_channels.params;
 		err = mlx5e_modify_tirs_lro(priv);
 		goto out;
 	}
@@ -4121,7 +4168,8 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 
 	/* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
-		params->lro_en = !slow_pci_heuristic(mdev);
+		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
+			params->lro_en = !slow_pci_heuristic(mdev);
 	params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 539dbe9382ee..07db8a58d0a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -836,6 +836,24 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
 	}
 }
 
+static inline
+struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
+				       u32 frag_size, u16 headroom,
+				       u32 cqe_bcnt)
+{
+	struct sk_buff *skb = build_skb(va, frag_size);
+
+	if (unlikely(!skb)) {
+		rq->stats.buff_alloc_err++;
+		return NULL;
+	}
+
+	skb_reserve(skb, headroom);
+	skb_put(skb, cqe_bcnt);
+
+	return skb;
+}
+
 static inline
 struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			     struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
@@ -867,18 +885,13 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
 
-	skb = build_skb(va, frag_size);
-	if (unlikely(!skb)) {
-		rq->stats.buff_alloc_err++;
+	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+	if (unlikely(!skb))
 		return NULL;
-	}
 
 	/* queue up for recycling/reuse */
 	page_ref_inc(di->page);
 
-	skb_reserve(skb, rx_headroom);
-	skb_put(skb, cqe_bcnt);
-
 	return skb;
 }
 
@@ -967,20 +980,24 @@ wq_ll_pop:
 }
 #endif
 
-static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
-					   struct mlx5_cqe64 *cqe,
-					   struct mlx5e_mpw_info *wi,
-					   u32 cqe_bcnt,
-					   struct sk_buff *skb)
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+				   u16 cqe_bcnt, u32 head_offset, u32 page_idx)
 {
-	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
-	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
-	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
-	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
-	u32 head_page_idx  = page_idx;
 	u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt);
 	u32 frag_offset    = head_offset + headlen;
 	u16 byte_cnt       = cqe_bcnt - headlen;
+	u32 head_page_idx  = page_idx;
+	struct sk_buff *skb;
+
+	skb = napi_alloc_skb(rq->cq.napi,
+			     ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, sizeof(long)));
+	if (unlikely(!skb)) {
+		rq->stats.buff_alloc_err++;
+		return NULL;
+	}
+
+	prefetchw(skb->data);
 
 	if (unlikely(frag_offset >= PAGE_SIZE)) {
 		page_idx++;
@@ -1003,6 +1020,35 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
 	skb->len  += headlen;
+
+	return skb;
+}
+
+struct sk_buff *
+mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+				u16 cqe_bcnt, u32 head_offset, u32 page_idx)
+{
+	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+	u16 rx_headroom = rq->buff.headroom;
+	struct sk_buff *skb;
+	void *va, *data;
+	u32 frag_size;
+
+	va             = page_address(di->page) + head_offset;
+	data           = va + rx_headroom;
+	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+
+	dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
+				      frag_size, DMA_FROM_DEVICE);
+	prefetch(data);
+	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* queue up for recycling/reuse */
+	wi->skbs_frags[page_idx]++;
+
+	return skb;
 }
 
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
@@ -1010,7 +1056,11 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
 	u16 wqe_id         = be16_to_cpu(cqe->wqe_id);
 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
-	struct mlx5e_rx_wqe  *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id);
+	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
+	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
+	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
+	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
+	struct mlx5e_rx_wqe *wqe;
 	struct sk_buff *skb;
 	u16 cqe_bcnt;
 
@@ -1026,18 +1076,13 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 		goto mpwrq_cqe_out;
 	}
 
-	skb = napi_alloc_skb(rq->cq.napi,
-			     ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD,
-				   sizeof(long)));
-	if (unlikely(!skb)) {
-		rq->stats.buff_alloc_err++;
-		goto mpwrq_cqe_out;
-	}
-
-	prefetchw(skb->data);
 	cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
 
-	mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb);
+	skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
+					   page_idx);
+	if (unlikely(!skb))
+		goto mpwrq_cqe_out;
+
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 	napi_gro_receive(rq->cq.napi, skb);
 
@@ -1045,6 +1090,7 @@ mpwrq_cqe_out:
 	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
 		return;
 
+	wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id);
 	mlx5e_free_rx_mpwqe(rq, wi);
 	mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index);
 }
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 4b5939c78cdd..12758595459b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -782,6 +782,9 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
 	return (u64)lo | ((u64)hi << 32);
 }
 
+#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE	(9)
+#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE	(6)
+
 struct mpwrq_cqe_bc {
 	__be16	filler_consumed_strides;
 	__be16	byte_cnt;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c19e611d2782..d25011f84815 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1038,7 +1038,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_398[0x3];
 	u8         log_max_tis_per_sq[0x5];
 
-	u8         reserved_at_3a0[0x3];
+	u8         ext_stride_num_range[0x1];
+	u8         reserved_at_3a1[0x2];
 	u8         log_max_stride_sz_rq[0x5];
 	u8         reserved_at_3a8[0x3];
 	u8         log_min_stride_sz_rq[0x5];
@@ -1205,9 +1206,9 @@ struct mlx5_ifc_wq_bits {
 	u8         log_hairpin_num_packets[0x5];
 	u8         reserved_at_128[0x3];
 	u8         log_hairpin_data_sz[0x5];
-	u8         reserved_at_130[0x5];
 
-	u8         log_wqe_num_of_strides[0x3];
+	u8         reserved_at_130[0x4];
+	u8         log_wqe_num_of_strides[0x4];
 	u8         two_byte_shift_en[0x1];
 	u8         reserved_at_139[0x4];
 	u8         log_wqe_stride_size[0x3];

From 121e89275471dccbd5b252e40ad6a823fa0527f7 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 12 Dec 2017 15:46:49 +0200
Subject: [PATCH 1546/1678] net/mlx5e: Refactor RQ XDP_TX indication

Make the xdp_xmit indication available for Striding RQ
by taking it out of the type-specific union.
This refactor is a preparation for a downstream patch that
adds XDP support over Striding RQ.
In addition, use a bitmap instead of a boolean for possible
future flags.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h    | 6 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 +++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d26dd4bc89f4..a6ca54393bb6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -479,6 +479,10 @@ typedef struct sk_buff *
 typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
 
+enum mlx5e_rq_flag {
+	MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
+};
+
 struct mlx5e_rq {
 	/* data path */
 	struct mlx5_wq_ll      wq;
@@ -489,7 +493,6 @@ struct mlx5e_rq {
 			u32 frag_sz;	/* max possible skb frag_sz */
 			union {
 				bool page_reuse;
-				bool xdp_xmit;
 			};
 		} wqe;
 		struct {
@@ -528,6 +531,7 @@ struct mlx5e_rq {
 	struct bpf_prog       *xdp_prog;
 	unsigned int           hw_mtu;
 	struct mlx5e_xdpsq     xdpsq;
+	DECLARE_BITMAP(flags, 8);
 
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 07db8a58d0a2..a827571deb85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -788,7 +788,7 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 	/* move page to reference to sq responsibility,
 	 * and mark so it's not put back in page-cache.
 	 */
-	rq->wqe.xdp_xmit = true;
+	__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
 	sq->db.di[pi] = *di;
 	sq->pc++;
 
@@ -913,9 +913,8 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
 		/* probably for XDP */
-		if (rq->wqe.xdp_xmit) {
+		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
 			wi->di.page = NULL;
-			rq->wqe.xdp_xmit = false;
 			/* do not return page to cache, it will be returned on XDP_TX completion */
 			goto wq_ll_pop;
 		}
@@ -955,9 +954,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
 	skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
 	if (!skb) {
-		if (rq->wqe.xdp_xmit) {
+		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
 			wi->di.page = NULL;
-			rq->wqe.xdp_xmit = false;
 			/* do not return page to cache, it will be returned on XDP_TX completion */
 			goto wq_ll_pop;
 		}

From 22f4539881944a8acc9c6f5afa7aa7f028898002 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 7 Feb 2018 14:46:36 +0200
Subject: [PATCH 1547/1678] net/mlx5e: Support XDP over Striding RQ

Add XDP support over Striding RQ.
Now that linear SKB is supported over Striding RQ,
we can support XDP by setting stride size to PAGE_SIZE
and headroom to XDP_PACKET_HEADROOM.

Upon a MPWQE free, do not release pages that are being
XDP xmit, they will be released upon completions.

Striding RQ is capable of a higher packet-rate than
conventional RQ.
A performance gain is expected for all cases that had
a HW packet-rate bottleneck. This is the case whenever
using many flows that distribute to many cores.

Performance testing:
ConnectX-5, 24 rings, default MTU.
CQE compression ON (to reduce completions BW in PCI).

XDP_DROP packet rate:
--------------------------------------------------
| pkt size | XDP rate   | 100GbE linerate | pct% |
--------------------------------------------------
|   64byte | 126.2 Mpps |      148.0 Mpps |  85% |
|  128byte |  80.0 Mpps |       84.8 Mpps |  94% |
|  256byte |  42.7 Mpps |       42.7 Mpps | 100% |
|  512byte |  23.4 Mpps |       23.4 Mpps | 100% |
--------------------------------------------------

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  3 +-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 30 ++++++++++++++-----
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a6ca54393bb6..7997d7c159db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -457,6 +457,7 @@ struct mlx5e_mpw_info {
 	struct mlx5e_umr_dma_info umr;
 	u16 consumed_strides;
 	u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
+	DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
 };
 
 /* a single cache unit is capable to serve one napi call (for non-striding rq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bba2fa0aa15f..b03a2327356a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -200,7 +200,8 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params)
 {
 	return mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-		!params->xdp_prog && !MLX5_IPSEC_DEV(mdev);
+		!MLX5_IPSEC_DEV(mdev) &&
+		!(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params));
 }
 
 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index a827571deb85..1da79cab1838 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -349,13 +349,16 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev,
 
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
+	const bool no_xdp_xmit =
+		bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
 	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
-	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
+	struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
 	int i;
 
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
-		page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]);
-		mlx5e_page_release(rq, dma_info, true);
+	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
+		page_ref_sub(dma_info[i].page, pg_strides - wi->skbs_frags[i]);
+		if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
+			mlx5e_page_release(rq, &dma_info[i], true);
 	}
 }
 
@@ -404,6 +407,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	}
 
 	memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE);
+	bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
 	wi->consumed_strides = 0;
 
 	rq->mpwqe.umr_in_progress = true;
@@ -1028,18 +1032,30 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 {
 	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
 	u16 rx_headroom = rq->buff.headroom;
+	u32 cqe_bcnt32 = cqe_bcnt;
 	struct sk_buff *skb;
 	void *va, *data;
 	u32 frag_size;
+	bool consumed;
 
 	va             = page_address(di->page) + head_offset;
 	data           = va + rx_headroom;
-	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
+	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
 
 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
 				      frag_size, DMA_FROM_DEVICE);
 	prefetch(data);
-	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
+
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32);
+	rcu_read_unlock();
+	if (consumed) {
+		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+			__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+		return NULL; /* page/packet was consumed by XDP */
+	}
+
+	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
 	if (unlikely(!skb))
 		return NULL;
 
@@ -1078,7 +1094,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
 	skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset,
 					   page_idx);
-	if (unlikely(!skb))
+	if (!skb)
 		goto mpwrq_cqe_out;
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);

From 9f9e9cd50eac6ad09cb053509f2e764bddc05f18 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 28 Nov 2017 11:03:37 +0200
Subject: [PATCH 1548/1678] net/mlx5e: Remove page_ref bulking in Striding RQ

When many packets reside on the same page, the bulking of
page_ref modifications reduces the total number of atomic
operations executed.

Besides the necessary 2 operations on page alloc/free, we
have the following extra ops per page:
- one on WQE allocation (bump refcnt to maximum possible),
- zero ops for SKBs,
- one on WQE free,
a constant of two operations in total, no matter how many
packets/SKBs actually populate the page.

Without this bulking, we have:
- no ops on WQE allocation or free,
- one op per SKB,

Comparing the two methods when PAGE_SIZE is 4K:
- As mentioned above, bulking method always executes 2 operations,
  not more, but not less.
- In the default MTU configuration (1500, stride size is 2K),
  the non-bulking method execute 2 ops as well.
- For larger MTUs with stride size of 4K, non-bulking method
  executes only a single op.
- For XDP (stride size of 4K, no SKBs), non-bulking method
  executes no ops at all!

Hence, to optimize the flows with linear SKB and XDP over Striding RQ,
we here remove the page_ref bulking method.

Performance testing:
ConnectX-5, Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz.

Single core packet rate (64 bytes).

Early drop in TC: no degradation.

XDP_DROP:
before: 14,270,188 pps
after:  20,503,603 pps, 43% improvement.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 -
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 47 +++++++------------
 2 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7997d7c159db..5853de4e4fc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -456,7 +456,6 @@ struct mlx5e_umr_dma_info {
 struct mlx5e_mpw_info {
 	struct mlx5e_umr_dma_info umr;
 	u16 consumed_strides;
-	u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
 	DECLARE_BITMAP(xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1da79cab1838..9bb47a6d40f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -296,37 +296,28 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 		mlx5e_free_rx_wqe(rq, wi);
 }
 
-static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
-{
-	return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
-}
-
 static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq,
 					    struct sk_buff *skb,
-					    struct mlx5e_mpw_info *wi,
-					    u32 page_idx, u32 frag_offset,
-					    u32 len)
+					    struct mlx5e_dma_info *di,
+					    u32 frag_offset, u32 len)
 {
 	unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz));
 
 	dma_sync_single_for_cpu(rq->pdev,
-				wi->umr.dma_info[page_idx].addr + frag_offset,
+				di->addr + frag_offset,
 				len, DMA_FROM_DEVICE);
-	wi->skbs_frags[page_idx]++;
+	page_ref_inc(di->page);
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-			wi->umr.dma_info[page_idx].page, frag_offset,
-			len, truesize);
+			di->page, frag_offset, len, truesize);
 }
 
 static inline void
 mlx5e_copy_skb_header_mpwqe(struct device *pdev,
 			    struct sk_buff *skb,
-			    struct mlx5e_mpw_info *wi,
-			    u32 page_idx, u32 offset,
-			    u32 headlen)
+			    struct mlx5e_dma_info *dma_info,
+			    u32 offset, u32 headlen)
 {
 	u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
-	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx];
 	unsigned int len;
 
 	 /* Aligning len to sizeof(long) optimizes memcpy performance */
@@ -351,15 +342,12 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
 	const bool no_xdp_xmit =
 		bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
-	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
 	struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
 	int i;
 
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		page_ref_sub(dma_info[i].page, pg_strides - wi->skbs_frags[i]);
+	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
 		if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
 			mlx5e_page_release(rq, &dma_info[i], true);
-	}
 }
 
 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
@@ -380,7 +368,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
 	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
 	struct mlx5e_icosq *sq = &rq->channel->icosq;
 	struct mlx5_wq_cyc *wq = &sq->wq;
@@ -403,10 +390,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 		if (unlikely(err))
 			goto err_unmap;
 		umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
-		page_ref_add(dma_info->page, pg_strides);
 	}
 
-	memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE);
 	bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
 	wi->consumed_strides = 0;
 
@@ -425,7 +410,6 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 err_unmap:
 	while (--i >= 0) {
 		dma_info--;
-		page_ref_sub(dma_info->page, pg_strides);
 		mlx5e_page_release(rq, dma_info, true);
 	}
 	rq->stats.buff_alloc_err++;
@@ -987,9 +971,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 				   u16 cqe_bcnt, u32 head_offset, u32 page_idx)
 {
 	u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt);
+	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
 	u32 frag_offset    = head_offset + headlen;
-	u16 byte_cnt       = cqe_bcnt - headlen;
-	u32 head_page_idx  = page_idx;
+	u32 byte_cnt       = cqe_bcnt - headlen;
+	struct mlx5e_dma_info *head_di = di;
 	struct sk_buff *skb;
 
 	skb = napi_alloc_skb(rq->cq.napi,
@@ -1002,7 +987,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 	prefetchw(skb->data);
 
 	if (unlikely(frag_offset >= PAGE_SIZE)) {
-		page_idx++;
+		di++;
 		frag_offset -= PAGE_SIZE;
 	}
 
@@ -1010,14 +995,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 		u32 pg_consumed_bytes =
 			min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 
-		mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset,
+		mlx5e_add_skb_frag_mpwqe(rq, skb, di, frag_offset,
 					 pg_consumed_bytes);
 		byte_cnt -= pg_consumed_bytes;
 		frag_offset = 0;
-		page_idx++;
+		di++;
 	}
 	/* copy header */
-	mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx,
+	mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di,
 				    head_offset, headlen);
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
@@ -1060,7 +1045,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 		return NULL;
 
 	/* queue up for recycling/reuse */
-	wi->skbs_frags[page_idx]++;
+	page_ref_inc(di->page);
 
 	return skb;
 }

From b8a98a4cf3221d8140969e3f5bde09206a6cb623 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 20 Dec 2017 11:56:35 +0200
Subject: [PATCH 1549/1678] net/mlx5e: Keep single pre-initialized UMR WQE per
 RQ

All UMR WQEs of an RQ share many common fields. We use
pre-initialized structures to save calculations in datapath.
One field (xlt_offset) was the only reason we saved a pre-initialized
copy per WQE index.
Here we remove its initialization (move its calculation to datapath),
and reduce the number of copies to one-per-RQ.

A very small datapath calculation is added, it occurs once per a MPWQE
(i.e. once every 256KB), but reduces memory consumption and gives
better cache utilization.

Performance testing:
Tested packet rate, no degradation sensed.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  8 ++------
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 20 ++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |  4 +++-
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5853de4e4fc7..30cad07be2b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -84,6 +84,7 @@
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
 #define MLX5E_REQUIRED_WQE_MTTS		(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_LOG_ALIGNED_MPWQE_PPW	(ilog2(MLX5E_REQUIRED_WQE_MTTS))
 #define MLX5E_REQUIRED_MTTS(wqes)	(wqes * MLX5E_REQUIRED_WQE_MTTS)
 #define MLX5E_MAX_RQ_NUM_MTTS	\
 	((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
@@ -450,7 +451,6 @@ struct mlx5e_wqe_frag_info {
 
 struct mlx5e_umr_dma_info {
 	struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
-	struct mlx5e_umr_wqe   wqe;
 };
 
 struct mlx5e_mpw_info {
@@ -496,6 +496,7 @@ struct mlx5e_rq {
 			};
 		} wqe;
 		struct {
+			struct mlx5e_umr_wqe   umr_wqe;
 			struct mlx5e_mpw_info *info;
 			mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
 			u16                    num_strides;
@@ -978,11 +979,6 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
 	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
 }
 
-static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
-{
-	return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
-}
-
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b03a2327356a..0339609cfa56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -306,13 +306,11 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 
 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 				       struct mlx5e_icosq *sq,
-				       struct mlx5e_umr_wqe *wqe,
-				       u16 ix)
+				       struct mlx5e_umr_wqe *wqe)
 {
 	struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
 	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
 	u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
-	u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
 
 	cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
 				      ds_cnt);
@@ -322,8 +320,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
 	ucseg->xlt_octowords =
 		cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
-	ucseg->bsf_octowords =
-		cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
 	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
@@ -331,18 +327,13 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 				     struct mlx5e_channel *c)
 {
 	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
-	int i;
 
 	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
 				      GFP_KERNEL, cpu_to_node(c->cpu));
 	if (!rq->mpwqe.info)
 		return -ENOMEM;
 
-	for (i = 0; i < wq_sz; i++) {
-		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
-
-		mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i);
-	}
+	mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
 
 	return 0;
 }
@@ -388,6 +379,11 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
 	return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
 }
 
+static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+{
+	return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
+}
+
 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
 			  struct mlx5e_rq_param *rqp,
@@ -520,7 +516,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 
 		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-			u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT;
+			u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
 
 			wqe->data.addr = cpu_to_be64(dma_offset + rq->buff.headroom);
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 9bb47a6d40f1..682f9ff9da34 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -373,6 +373,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	int cpy = offsetof(struct mlx5e_umr_wqe, inline_mtts);
+	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
 	int err;
 	u16 pi;
 	int i;
@@ -384,7 +385,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	}
 
 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	memcpy(umr_wqe, &wi->umr.wqe, cpy);
+	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, cpy);
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
 		err = mlx5e_page_alloc_mapped(rq, dma_info);
 		if (unlikely(err))
@@ -400,6 +401,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	umr_wqe->ctrl.opmod_idx_opcode =
 		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
 			    MLX5_OPCODE_UMR);
+	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 
 	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
 	sq->pc += MLX5E_UMR_WQEBBS;

From ab966d7e4ff988a48b3ad72e7abf903aa840afd1 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 4 Jan 2018 13:09:15 +0200
Subject: [PATCH 1550/1678] net/mlx5e: RX, Recycle buffer of UMR WQEs

Upon a new UMR post, check if the WQE buffer contains
a previous UMR WQE. If so, modify the dynamic fields
instead of a whole WQE overwrite. This saves a memcpy.

In current setting, after 2 WQ cycles (12 UMR posts),
this will always be the case.

No degradation sensed.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 682f9ff9da34..176645762e49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -365,6 +365,11 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 	mlx5_wq_ll_update_db_record(wq);
 }
 
+static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
+{
+	return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -372,7 +377,6 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5e_icosq *sq = &rq->channel->icosq;
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
-	int cpy = offsetof(struct mlx5e_umr_wqe, inline_mtts);
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
 	int err;
 	u16 pi;
@@ -385,7 +389,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	}
 
 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, cpy);
+	if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2))
+		memcpy(umr_wqe, &rq->mpwqe.umr_wqe,
+		       offsetof(struct mlx5e_umr_wqe, inline_mtts));
+
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
 		err = mlx5e_page_alloc_mapped(rq, dma_info);
 		if (unlikely(err))

From 5e43f899b03a3492ce5fc44e8900becb04dae9c0 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:00 -0700
Subject: [PATCH 1551/1678] bpf: Check attach type at prog load time

== The problem ==

There are use-cases when a program of some type can be attached to
multiple attach points and those attach points must have different
permissions to access context or to call helpers.

E.g. context structure may have fields for both IPv4 and IPv6 but it
doesn't make sense to read from / write to IPv6 field when attach point
is somewhere in IPv4 stack.

Same applies to BPF-helpers: it may make sense to call some helper from
some attach point, but not from other for same prog type.

== The solution ==

Introduce `expected_attach_type` field in in `struct bpf_attr` for
`BPF_PROG_LOAD` command. If scenario described in "The problem" section
is the case for some prog type, the field will be checked twice:

1) At load time prog type is checked to see if attach type for it must
   be known to validate program permissions correctly. Prog will be
   rejected with EINVAL if it's the case and `expected_attach_type` is
   not specified or has invalid value.

2) At attach time `attach_type` is compared with `expected_attach_type`,
   if prog type requires to have one, and, if they differ, attach will
   be rejected with EINVAL.

The `expected_attach_type` is now available as part of `struct bpf_prog`
in both `bpf_verifier_ops->is_valid_access()` and
`bpf_verifier_ops->get_func_proto()` () and can be used to check context
accesses and calls to helpers correspondingly.

Initially the idea was discussed by Alexei Starovoitov <ast@fb.com> and
Daniel Borkmann <daniel@iogearbox.net> here:
https://marc.info/?l=linux-netdev&m=152107378717201&w=2

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h      |  5 ++++-
 include/linux/filter.h   |  1 +
 include/uapi/linux/bpf.h |  5 +++++
 kernel/bpf/cgroup.c      |  3 ++-
 kernel/bpf/syscall.c     | 31 ++++++++++++++++++++++++++++++-
 kernel/bpf/verifier.c    |  6 +++---
 kernel/trace/bpf_trace.c | 27 ++++++++++++++++++---------
 net/core/filter.c        | 39 +++++++++++++++++++++++++--------------
 8 files changed, 88 insertions(+), 29 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..95a7abd0ee92 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -208,12 +208,15 @@ struct bpf_prog_ops {
 
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
-	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+	const struct bpf_func_proto *
+	(*get_func_proto)(enum bpf_func_id func_id,
+			  const struct bpf_prog *prog);
 
 	/* return true if 'size' wide access at offset 'off' within bpf_context
 	 * with 'type' (read or write) is allowed
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
+				const struct bpf_prog *prog,
 				struct bpf_insn_access_aux *info);
 	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
 			    const struct bpf_prog *prog);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 897ff3d95968..13c044e4832d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -469,6 +469,7 @@ struct bpf_prog {
 				is_func:1,	/* program is a bpf function */
 				kprobe_override:1; /* Do we override a kprobe? */
 	enum bpf_prog_type	type;		/* Type of BPF program */
+	enum bpf_attach_type	expected_attach_type; /* For some prog types */
 	u32			len;		/* Number of filter blocks */
 	u32			jited_len;	/* Size of jited insns in bytes */
 	u8			tag[BPF_TAG_SIZE];
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1878201c2d77..102718624d1e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -296,6 +296,11 @@ union bpf_attr {
 		__u32		prog_flags;
 		char		prog_name[BPF_OBJ_NAME_LEN];
 		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
+		/* For some prog types expected attach type must be known at
+		 * load time to verify attach type specific parts of prog
+		 * (context accesses, allowed helpers, etc).
+		 */
+		__u32		expected_attach_type;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c1c0b60d3f2f..8730b24ed540 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -545,7 +545,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
 
 static const struct bpf_func_proto *
-cgroup_dev_func_proto(enum bpf_func_id func_id)
+cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_map_lookup_elem:
@@ -566,6 +566,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
 
 static bool cgroup_dev_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
+				       const struct bpf_prog *prog,
 				       struct bpf_insn_access_aux *info)
 {
 	const int size_default = sizeof(__u32);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 95ca2523fa6e..9d3b572d4dec 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1171,8 +1171,27 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
 
+static int
+bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
+				enum bpf_attach_type expected_attach_type)
+{
+	/* There are currently no prog types that require specifying
+	 * attach_type at load time.
+	 */
+	return 0;
+}
+
+static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
+					     enum bpf_attach_type attach_type)
+{
+	/* There are currently no prog types that require specifying
+	 * attach_type at load time.
+	 */
+	return 0;
+}
+
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD prog_ifindex
+#define	BPF_PROG_LOAD_LAST_FIELD expected_attach_type
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -1209,11 +1228,16 @@ static int bpf_prog_load(union bpf_attr *attr)
 	    !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
+		return -EINVAL;
+
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 	if (!prog)
 		return -ENOMEM;
 
+	prog->expected_attach_type = attr->expected_attach_type;
+
 	prog->aux->offload_requested = !!attr->prog_ifindex;
 
 	err = security_bpf_prog_alloc(prog->aux);
@@ -1474,6 +1498,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
+	if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
 	cgrp = cgroup_get_from_fd(attr->target_fd);
 	if (IS_ERR(cgrp)) {
 		bpf_prog_put(prog);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8acd2207e412..10024323031d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1323,7 +1323,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 	};
 
 	if (env->ops->is_valid_access &&
-	    env->ops->is_valid_access(off, size, t, &info)) {
+	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
 		/* A non zero info.ctx_field_size indicates that this field is a
 		 * candidate for later verifier transformation to load the whole
 		 * field and then apply a mask when accessed with a narrower
@@ -2349,7 +2349,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	}
 
 	if (env->ops->get_func_proto)
-		fn = env->ops->get_func_proto(func_id);
+		fn = env->ops->get_func_proto(func_id, env->prog);
 	if (!fn) {
 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
 			func_id);
@@ -5572,7 +5572,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			insn      = new_prog->insnsi + i + delta;
 		}
 patch_call_imm:
-		fn = env->ops->get_func_proto(insn->imm);
+		fn = env->ops->get_func_proto(insn->imm, env->prog);
 		/* all functions that have prototype and verifier allowed
 		 * programs to call them, must be real in-kernel functions
 		 */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 463e72d18c4c..d88e96d4e12c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -524,7 +524,8 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_map_lookup_elem:
@@ -568,7 +569,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_perf_event_output:
@@ -582,12 +584,13 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_override_return_proto;
 #endif
 	default:
-		return tracing_func_proto(func_id);
+		return tracing_func_proto(func_id, prog);
 	}
 }
 
 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+					const struct bpf_prog *prog,
 					struct bpf_insn_access_aux *info)
 {
 	if (off < 0 || off >= sizeof(struct pt_regs))
@@ -661,7 +664,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_perf_event_output:
@@ -669,11 +673,12 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto_tp;
 	default:
-		return tracing_func_proto(func_id);
+		return tracing_func_proto(func_id, prog);
 	}
 }
 
 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    const struct bpf_prog *prog,
 				    struct bpf_insn_access_aux *info)
 {
 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
@@ -721,7 +726,8 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
          .arg3_type      = ARG_CONST_SIZE,
 };
 
-static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_perf_event_output:
@@ -731,7 +737,7 @@ static const struct bpf_func_proto *pe_prog_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_perf_prog_read_value:
 		return &bpf_perf_prog_read_value_proto;
 	default:
-		return tracing_func_proto(func_id);
+		return tracing_func_proto(func_id, prog);
 	}
 }
 
@@ -781,7 +787,8 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_perf_event_output:
@@ -789,12 +796,13 @@ static const struct bpf_func_proto *raw_tp_prog_func_proto(enum bpf_func_id func
 	case BPF_FUNC_get_stackid:
 		return &bpf_get_stackid_proto_raw_tp;
 	default:
-		return tracing_func_proto(func_id);
+		return tracing_func_proto(func_id, prog);
 	}
 }
 
 static bool raw_tp_prog_is_valid_access(int off, int size,
 					enum bpf_access_type type,
+					const struct bpf_prog *prog,
 					struct bpf_insn_access_aux *info)
 {
 	/* largest tracepoint in the kernel has 12 args */
@@ -816,6 +824,7 @@ const struct bpf_prog_ops raw_tracepoint_prog_ops = {
 };
 
 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    const struct bpf_prog *prog,
 				    struct bpf_insn_access_aux *info)
 {
 	const int size_u64 = sizeof(u64);
diff --git a/net/core/filter.c b/net/core/filter.c
index e989bf313195..7790fd128614 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3685,7 +3685,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-sock_filter_func_proto(enum bpf_func_id func_id)
+sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	/* inet and inet6 sockets are created in a process
@@ -3699,7 +3699,7 @@ sock_filter_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-sk_filter_func_proto(enum bpf_func_id func_id)
+sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_skb_load_bytes:
@@ -3714,7 +3714,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-tc_cls_act_func_proto(enum bpf_func_id func_id)
+tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_skb_store_bytes:
@@ -3781,7 +3781,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-xdp_func_proto(enum bpf_func_id func_id)
+xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_perf_event_output:
@@ -3804,7 +3804,7 @@ xdp_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-lwt_inout_func_proto(enum bpf_func_id func_id)
+lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_skb_load_bytes:
@@ -3831,7 +3831,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-	sock_ops_func_proto(enum bpf_func_id func_id)
+sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_setsockopt:
@@ -3847,7 +3847,8 @@ static const struct bpf_func_proto *
 	}
 }
 
-static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_msg_redirect_map:
@@ -3863,7 +3864,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_skb_store_bytes:
@@ -3888,7 +3890,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
-lwt_xmit_func_proto(enum bpf_func_id func_id)
+lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_skb_get_tunnel_key:
@@ -3918,11 +3920,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_set_hash_invalid:
 		return &bpf_set_hash_invalid_proto;
 	default:
-		return lwt_inout_func_proto(func_id);
+		return lwt_inout_func_proto(func_id, prog);
 	}
 }
 
 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+				    const struct bpf_prog *prog,
 				    struct bpf_insn_access_aux *info)
 {
 	const int size_default = sizeof(__u32);
@@ -3966,6 +3969,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type,
+				      const struct bpf_prog *prog,
 				      struct bpf_insn_access_aux *info)
 {
 	switch (off) {
@@ -3986,11 +3990,12 @@ static bool sk_filter_is_valid_access(int off, int size,
 		}
 	}
 
-	return bpf_skb_is_valid_access(off, size, type, info);
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
+				const struct bpf_prog *prog,
 				struct bpf_insn_access_aux *info)
 {
 	switch (off) {
@@ -4020,11 +4025,12 @@ static bool lwt_is_valid_access(int off, int size,
 		break;
 	}
 
-	return bpf_skb_is_valid_access(off, size, type, info);
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
 					enum bpf_access_type type,
+					const struct bpf_prog *prog,
 					struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE) {
@@ -4096,6 +4102,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
+				       const struct bpf_prog *prog,
 				       struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE) {
@@ -4125,7 +4132,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		return false;
 	}
 
-	return bpf_skb_is_valid_access(off, size, type, info);
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
 static bool __is_valid_xdp_access(int off, int size)
@@ -4142,6 +4149,7 @@ static bool __is_valid_xdp_access(int off, int size)
 
 static bool xdp_is_valid_access(int off, int size,
 				enum bpf_access_type type,
+				const struct bpf_prog *prog,
 				struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE)
@@ -4174,6 +4182,7 @@ EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
 static bool sock_ops_is_valid_access(int off, int size,
 				     enum bpf_access_type type,
+				     const struct bpf_prog *prog,
 				     struct bpf_insn_access_aux *info)
 {
 	const int size_default = sizeof(__u32);
@@ -4220,6 +4229,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
 
 static bool sk_skb_is_valid_access(int off, int size,
 				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
 				   struct bpf_insn_access_aux *info)
 {
 	switch (off) {
@@ -4249,11 +4259,12 @@ static bool sk_skb_is_valid_access(int off, int size,
 		break;
 	}
 
-	return bpf_skb_is_valid_access(off, size, type, info);
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
 static bool sk_msg_is_valid_access(int off, int size,
 				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
 				   struct bpf_insn_access_aux *info)
 {
 	if (type == BPF_WRITE)

From d7be143b67c2cf99bf93279217b1cf93a1e8a6b1 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:01 -0700
Subject: [PATCH 1552/1678] libbpf: Support expected_attach_type at prog load

Support setting `expected_attach_type` at prog load time in both
`bpf/bpf.h` and `bpf/libbpf.h`.

Since both headers already have API to load programs, new functions are
added not to break backward compatibility for existing ones:
* `bpf_load_program_xattr()` is added to `bpf/bpf.h`;
* `bpf_prog_load_xattr()` is added to `bpf/libbpf.h`.

Both new functions accept structures, `struct bpf_load_program_attr` and
`struct bpf_prog_load_attr` correspondingly, where new fields can be
added in the future w/o changing the API.

Standard `_xattr` suffix is used to name the new API functions.

Since `bpf_load_program_name()` is not used as heavily as
`bpf_load_program()`, it was removed in favor of more generic
`bpf_load_program_xattr()`.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h |   5 ++
 tools/lib/bpf/bpf.c            |  44 +++++++++-----
 tools/lib/bpf/bpf.h            |  17 ++++--
 tools/lib/bpf/libbpf.c         | 107 ++++++++++++++++++++++++---------
 tools/lib/bpf/libbpf.h         |   8 +++
 5 files changed, 134 insertions(+), 47 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 58060bec999d..e1c1fed63396 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -296,6 +296,11 @@ union bpf_attr {
 		__u32		prog_flags;
 		char		prog_name[BPF_OBJ_NAME_LEN];
 		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
+		/* For some prog types expected attach type must be known at
+		 * load time to verify attach type specific parts of prog
+		 * (context accesses, allowed helpers, etc).
+		 */
+		__u32		expected_attach_type;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e0500055f1a6..acbb3f8b3bec 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -146,26 +146,30 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
 					  -1);
 }
 
-int bpf_load_program_name(enum bpf_prog_type type, const char *name,
-			  const struct bpf_insn *insns,
-			  size_t insns_cnt, const char *license,
-			  __u32 kern_version, char *log_buf,
-			  size_t log_buf_sz)
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+			   char *log_buf, size_t log_buf_sz)
 {
-	int fd;
 	union bpf_attr attr;
-	__u32 name_len = name ? strlen(name) : 0;
+	__u32 name_len;
+	int fd;
+
+	if (!load_attr)
+		return -EINVAL;
+
+	name_len = load_attr->name ? strlen(load_attr->name) : 0;
 
 	bzero(&attr, sizeof(attr));
-	attr.prog_type = type;
-	attr.insn_cnt = (__u32)insns_cnt;
-	attr.insns = ptr_to_u64(insns);
-	attr.license = ptr_to_u64(license);
+	attr.prog_type = load_attr->prog_type;
+	attr.expected_attach_type = load_attr->expected_attach_type;
+	attr.insn_cnt = (__u32)load_attr->insns_cnt;
+	attr.insns = ptr_to_u64(load_attr->insns);
+	attr.license = ptr_to_u64(load_attr->license);
 	attr.log_buf = ptr_to_u64(NULL);
 	attr.log_size = 0;
 	attr.log_level = 0;
-	attr.kern_version = kern_version;
-	memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+	attr.kern_version = load_attr->kern_version;
+	memcpy(attr.prog_name, load_attr->name,
+	       min(name_len, BPF_OBJ_NAME_LEN - 1));
 
 	fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 	if (fd >= 0 || !log_buf || !log_buf_sz)
@@ -184,8 +188,18 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     __u32 kern_version, char *log_buf,
 		     size_t log_buf_sz)
 {
-	return bpf_load_program_name(type, NULL, insns, insns_cnt, license,
-				     kern_version, log_buf, log_buf_sz);
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = type;
+	load_attr.expected_attach_type = 0;
+	load_attr.name = NULL;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
 }
 
 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ee59342c6f42..39f6a0d64a3b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -41,13 +41,20 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
 			  int key_size, int inner_map_fd, int max_entries,
 			  __u32 map_flags);
 
+struct bpf_load_program_attr {
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	const char *name;
+	const struct bpf_insn *insns;
+	size_t insns_cnt;
+	const char *license;
+	__u32 kern_version;
+};
+
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE (256 * 1024)
-int bpf_load_program_name(enum bpf_prog_type type, const char *name,
-			  const struct bpf_insn *insns,
-			  size_t insns_cnt, const char *license,
-			  __u32 kern_version, char *log_buf,
-			  size_t log_buf_sz);
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+			   char *log_buf, size_t log_buf_sz);
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 64a8fc384186..48e3e743ebf7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -203,6 +203,8 @@ struct bpf_program {
 	struct bpf_object *obj;
 	void *priv;
 	bpf_program_clear_priv_t clear_priv;
+
+	enum bpf_attach_type expected_attach_type;
 };
 
 struct bpf_map {
@@ -1162,21 +1164,31 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 }
 
 static int
-load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
-	     int insns_cnt, char *license, u32 kern_version, int *pfd)
+load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
+	     const char *name, struct bpf_insn *insns, int insns_cnt,
+	     char *license, u32 kern_version, int *pfd)
 {
-	int ret;
+	struct bpf_load_program_attr load_attr;
 	char *log_buf;
+	int ret;
 
-	if (!insns || !insns_cnt)
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = type;
+	load_attr.expected_attach_type = expected_attach_type;
+	load_attr.name = name;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+
+	if (!load_attr.insns || !load_attr.insns_cnt)
 		return -EINVAL;
 
 	log_buf = malloc(BPF_LOG_BUF_SIZE);
 	if (!log_buf)
 		pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
 
-	ret = bpf_load_program_name(type, name, insns, insns_cnt, license,
-				    kern_version, log_buf, BPF_LOG_BUF_SIZE);
+	ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
 
 	if (ret >= 0) {
 		*pfd = ret;
@@ -1192,18 +1204,18 @@ load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
 		pr_warning("-- BEGIN DUMP LOG ---\n");
 		pr_warning("\n%s\n", log_buf);
 		pr_warning("-- END LOG --\n");
-	} else if (insns_cnt >= BPF_MAXINSNS) {
-		pr_warning("Program too large (%d insns), at most %d insns\n",
-			   insns_cnt, BPF_MAXINSNS);
+	} else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+		pr_warning("Program too large (%zu insns), at most %d insns\n",
+			   load_attr.insns_cnt, BPF_MAXINSNS);
 		ret = -LIBBPF_ERRNO__PROG2BIG;
 	} else {
 		/* Wrong program type? */
-		if (type != BPF_PROG_TYPE_KPROBE) {
+		if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
 			int fd;
 
-			fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name,
-						   insns, insns_cnt, license,
-						   kern_version, NULL, 0);
+			load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
+			load_attr.expected_attach_type = 0;
+			fd = bpf_load_program_xattr(&load_attr, NULL, 0);
 			if (fd >= 0) {
 				close(fd);
 				ret = -LIBBPF_ERRNO__PROGTYPE;
@@ -1247,8 +1259,9 @@ bpf_program__load(struct bpf_program *prog,
 			pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
 				   prog->section_name, prog->instances.nr);
 		}
-		err = load_program(prog->type, prog->name, prog->insns,
-				   prog->insns_cnt, license, kern_version, &fd);
+		err = load_program(prog->type, prog->expected_attach_type,
+				   prog->name, prog->insns, prog->insns_cnt,
+				   license, kern_version, &fd);
 		if (!err)
 			prog->instances.fds[0] = fd;
 		goto out;
@@ -1276,8 +1289,8 @@ bpf_program__load(struct bpf_program *prog,
 			continue;
 		}
 
-		err = load_program(prog->type, prog->name,
-				   result.new_insn_ptr,
+		err = load_program(prog->type, prog->expected_attach_type,
+				   prog->name, result.new_insn_ptr,
 				   result.new_insn_cnt,
 				   license, kern_version, &fd);
 
@@ -1835,11 +1848,22 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type }
+static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+						 enum bpf_attach_type type)
+{
+	prog->expected_attach_type = type;
+}
+
+#define BPF_PROG_SEC_FULL(string, ptype, atype) \
+	{ string, sizeof(string) - 1, ptype, atype }
+
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
+
 static const struct {
 	const char *sec;
 	size_t len;
 	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
 } section_names[] = {
 	BPF_PROG_SEC("socket",		BPF_PROG_TYPE_SOCKET_FILTER),
 	BPF_PROG_SEC("kprobe/",		BPF_PROG_TYPE_KPROBE),
@@ -1859,9 +1883,11 @@ static const struct {
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
 	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
 };
-#undef BPF_PROG_SEC
 
-static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
+#undef BPF_PROG_SEC
+#undef BPF_PROG_SEC_FULL
+
+static int bpf_program__identify_section(struct bpf_program *prog)
 {
 	int i;
 
@@ -1871,13 +1897,13 @@ static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog)
 	for (i = 0; i < ARRAY_SIZE(section_names); i++)
 		if (strncmp(prog->section_name, section_names[i].sec,
 			    section_names[i].len) == 0)
-			return section_names[i].prog_type;
+			return i;
 
 err:
 	pr_warning("failed to guess program type based on section name %s\n",
 		   prog->section_name);
 
-	return BPF_PROG_TYPE_UNSPEC;
+	return -1;
 }
 
 int bpf_map__fd(struct bpf_map *map)
@@ -1976,12 +2002,31 @@ long libbpf_get_error(const void *ptr)
 
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
 		  struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_prog_load_attr attr;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = file;
+	attr.prog_type = type;
+	attr.expected_attach_type = 0;
+
+	return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+			struct bpf_object **pobj, int *prog_fd)
 {
 	struct bpf_program *prog, *first_prog = NULL;
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_prog_type prog_type;
 	struct bpf_object *obj;
+	int section_idx;
 	int err;
 
-	obj = bpf_object__open(file);
+	if (!attr)
+		return -EINVAL;
+
+	obj = bpf_object__open(attr->file);
 	if (IS_ERR(obj))
 		return -ENOENT;
 
@@ -1990,15 +2035,23 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 		 * If type is not specified, try to guess it based on
 		 * section name.
 		 */
-		if (type == BPF_PROG_TYPE_UNSPEC) {
-			type = bpf_program__guess_type(prog);
-			if (type == BPF_PROG_TYPE_UNSPEC) {
+		prog_type = attr->prog_type;
+		expected_attach_type = attr->expected_attach_type;
+		if (prog_type == BPF_PROG_TYPE_UNSPEC) {
+			section_idx = bpf_program__identify_section(prog);
+			if (section_idx < 0) {
 				bpf_object__close(obj);
 				return -EINVAL;
 			}
+			prog_type = section_names[section_idx].prog_type;
+			expected_attach_type =
+				section_names[section_idx].expected_attach_type;
 		}
 
-		bpf_program__set_type(prog, type);
+		bpf_program__set_type(prog, prog_type);
+		bpf_program__set_expected_attach_type(prog,
+						      expected_attach_type);
+
 		if (prog->idx != obj->efile.text_shndx && !first_prog)
 			first_prog = prog;
 	}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f85906533cdd..a3a62a583f27 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -248,6 +248,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
 
 long libbpf_get_error(const void *ptr);
 
+struct bpf_prog_load_attr {
+	const char *file;
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+};
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+			struct bpf_object **pobj, int *prog_fd);
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
 		  struct bpf_object **pobj, int *prog_fd);
 

From 4fbac77d2d092b475dda9eea66da674369665427 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:02 -0700
Subject: [PATCH 1553/1678] bpf: Hooks for sys_bind

== The problem ==

There is a use-case when all processes inside a cgroup should use one
single IP address on a host that has multiple IP configured.  Those
processes should use the IP for both ingress and egress, for TCP and UDP
traffic. So TCP/UDP servers should be bound to that IP to accept
incoming connections on it, and TCP/UDP clients should make outgoing
connections from that IP. It should not require changing application
code since it's often not possible.

Currently it's solved by intercepting glibc wrappers around syscalls
such as `bind(2)` and `connect(2)`. It's done by a shared library that
is preloaded for every process in a cgroup so that whenever TCP/UDP
server calls `bind(2)`, the library replaces IP in sockaddr before
passing arguments to syscall. When application calls `connect(2)` the
library transparently binds the local end of connection to that IP
(`bind(2)` with `IP_BIND_ADDRESS_NO_PORT` to avoid performance penalty).

Shared library approach is fragile though, e.g.:
* some applications clear env vars (incl. `LD_PRELOAD`);
* `/etc/ld.so.preload` doesn't help since some applications are linked
  with option `-z nodefaultlib`;
* other applications don't use glibc and there is nothing to intercept.

== The solution ==

The patch provides much more reliable in-kernel solution for the 1st
part of the problem: binding TCP/UDP servers on desired IP. It does not
depend on application environment and implementation details (whether
glibc is used or not).

It adds new eBPF program type `BPF_PROG_TYPE_CGROUP_SOCK_ADDR` and
attach types `BPF_CGROUP_INET4_BIND` and `BPF_CGROUP_INET6_BIND`
(similar to already existing `BPF_CGROUP_INET_SOCK_CREATE`).

The new program type is intended to be used with sockets (`struct sock`)
in a cgroup and provided by user `struct sockaddr`. Pointers to both of
them are parts of the context passed to programs of newly added types.

The new attach types provides hooks in `bind(2)` system call for both
IPv4 and IPv6 so that one can write a program to override IP addresses
and ports user program tries to bind to and apply such a program for
whole cgroup.

== Implementation notes ==

[1]
Separate attach types for `AF_INET` and `AF_INET6` are added
intentionally to prevent reading/writing to offsets that don't make
sense for corresponding socket family. E.g. if user passes `sockaddr_in`
it doesn't make sense to read from / write to `user_ip6[]` context
fields.

[2]
The write access to `struct bpf_sock_addr_kern` is implemented using
special field as an additional "register".

There are just two registers in `sock_addr_convert_ctx_access`: `src`
with value to write and `dst` with pointer to context that can't be
changed not to break later instructions. But the fields, allowed to
write to, are not available directly and to access them address of
corresponding pointer has to be loaded first. To get additional register
the 1st not used by `src` and `dst` one is taken, its content is saved
to `bpf_sock_addr_kern.tmp_reg`, then the register is used to load
address of pointer field, and finally the register's content is restored
from the temporary field after writing `src` value.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  21 ++++
 include/linux/bpf_types.h  |   1 +
 include/linux/filter.h     |  10 ++
 include/uapi/linux/bpf.h   |  23 ++++
 kernel/bpf/cgroup.c        |  36 ++++++
 kernel/bpf/syscall.c       |  36 ++++--
 kernel/bpf/verifier.c      |   1 +
 net/core/filter.c          | 232 +++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c         |   7 ++
 net/ipv6/af_inet6.c        |   7 ++
 10 files changed, 366 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 8a4566691c8f..67dc4a6471ad 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -6,6 +6,7 @@
 #include <uapi/linux/bpf.h>
 
 struct sock;
+struct sockaddr;
 struct cgroup;
 struct sk_buff;
 struct bpf_sock_ops_kern;
@@ -63,6 +64,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 int __cgroup_bpf_run_filter_sk(struct sock *sk,
 			       enum bpf_attach_type type);
 
+int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+				      struct sockaddr *uaddr,
+				      enum bpf_attach_type type);
+
 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 				     struct bpf_sock_ops_kern *sock_ops,
 				     enum bpf_attach_type type);
@@ -103,6 +108,20 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)				       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled)						       \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type);    \
+	__ret;								       \
+})
+
+#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
+
+#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
+
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
 ({									       \
 	int __ret = 0;							       \
@@ -135,6 +154,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 6d7243bfb0ff..2b28fcf6f6ae 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -8,6 +8,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
 BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 13c044e4832d..fc4e8f91b03d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1021,6 +1021,16 @@ static inline int bpf_tell_extensions(void)
 	return SKF_AD_MAX;
 }
 
+struct bpf_sock_addr_kern {
+	struct sock *sk;
+	struct sockaddr *uaddr;
+	/* Temporary "register" to make indirect stores to nested structures
+	 * defined above. We need three registers to make such a store, but
+	 * only two (src and dst) are available at convert_ctx_access time
+	 */
+	u64 tmp_reg;
+};
+
 struct bpf_sock_ops_kern {
 	struct	sock *sk;
 	u32	op;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 102718624d1e..ce3e69e3c793 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
+	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 };
 
 enum bpf_attach_type {
@@ -147,6 +148,8 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
 	BPF_SK_MSG_VERDICT,
+	BPF_CGROUP_INET4_BIND,
+	BPF_CGROUP_INET6_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1010,6 +1013,26 @@ struct bpf_map_info {
 	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+	__u32 user_family;	/* Allows 4-byte read, but no write. */
+	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 user_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 user_port;	/* Allows 4-byte read and write.
+				 * Stored in network byte order
+				 */
+	__u32 family;		/* Allows 4-byte read, but no write */
+	__u32 type;		/* Allows 4-byte read, but no write */
+	__u32 protocol;		/* Allows 4-byte read, but no write */
+};
+
 /* User bpf_sock_ops struct to access socket values and specify request ops
  * and their replies.
  * Some of this fields are in network (bigendian) byte order and may need
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 8730b24ed540..43171a0bb02b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -494,6 +494,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
+/**
+ * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
+ *                                       provided by user sockaddr
+ * @sk: sock struct that will use sockaddr
+ * @uaddr: sockaddr struct provided by user
+ * @type: The type of program to be exectuted
+ *
+ * socket is expected to be of type INET or INET6.
+ *
+ * This function will return %-EPERM if an attached program is found and
+ * returned value != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+				      struct sockaddr *uaddr,
+				      enum bpf_attach_type type)
+{
+	struct bpf_sock_addr_kern ctx = {
+		.sk = sk,
+		.uaddr = uaddr,
+	};
+	struct cgroup *cgrp;
+	int ret;
+
+	/* Check socket family since not all sockets represent network
+	 * endpoint (e.g. AF_UNIX).
+	 */
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+
+	return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
+
 /**
  * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
  * @sk: socket to get cgroup from
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9d3b572d4dec..2cad66a4cacb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1175,19 +1175,29 @@ static int
 bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 				enum bpf_attach_type expected_attach_type)
 {
-	/* There are currently no prog types that require specifying
-	 * attach_type at load time.
-	 */
-	return 0;
+	switch (prog_type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+		switch (expected_attach_type) {
+		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET6_BIND:
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	default:
+		return 0;
+	}
 }
 
 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 					     enum bpf_attach_type attach_type)
 {
-	/* There are currently no prog types that require specifying
-	 * attach_type at load time.
-	 */
-	return 0;
+	switch (prog->type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+	default:
+		return 0;
+	}
 }
 
 /* last field in 'union bpf_attr' used by this command */
@@ -1479,6 +1489,10 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_SOCK_CREATE:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 		break;
+	case BPF_CGROUP_INET4_BIND:
+	case BPF_CGROUP_INET6_BIND:
+		ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+		break;
 	case BPF_CGROUP_SOCK_OPS:
 		ptype = BPF_PROG_TYPE_SOCK_OPS;
 		break;
@@ -1541,6 +1555,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_INET_SOCK_CREATE:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 		break;
+	case BPF_CGROUP_INET4_BIND:
+	case BPF_CGROUP_INET6_BIND:
+		ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+		break;
 	case BPF_CGROUP_SOCK_OPS:
 		ptype = BPF_PROG_TYPE_SOCK_OPS;
 		break;
@@ -1590,6 +1608,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET4_BIND:
+	case BPF_CGROUP_INET6_BIND:
 	case BPF_CGROUP_SOCK_OPS:
 	case BPF_CGROUP_DEVICE:
 		break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 10024323031d..5dd1dcb902bf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3887,6 +3887,7 @@ static int check_return_code(struct bpf_verifier_env *env)
 	switch (env->prog->type) {
 	case BPF_PROG_TYPE_CGROUP_SKB:
 	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 	case BPF_PROG_TYPE_SOCK_OPS:
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 		break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 7790fd128614..c08e5b121558 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3698,6 +3698,20 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+static const struct bpf_func_proto *
+sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	/* inet and inet6 sockets are created in a process
+	 * context so there is always a valid uid/gid
+	 */
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4180,6 +4194,69 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+static bool sock_addr_is_valid_access(int off, int size,
+				      enum bpf_access_type type,
+				      const struct bpf_prog *prog,
+				      struct bpf_insn_access_aux *info)
+{
+	const int size_default = sizeof(__u32);
+
+	if (off < 0 || off >= sizeof(struct bpf_sock_addr))
+		return false;
+	if (off % size != 0)
+		return false;
+
+	/* Disallow access to IPv6 fields from IPv4 contex and vise
+	 * versa.
+	 */
+	switch (off) {
+	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_BIND:
+			break;
+		default:
+			return false;
+		}
+		break;
+	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET6_BIND:
+			break;
+		default:
+			return false;
+		}
+		break;
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
+	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+		/* Only narrow read access allowed for now. */
+		if (type == BPF_READ) {
+			bpf_ctx_record_field_size(info, size_default);
+			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+				return false;
+		} else {
+			if (size != size_default)
+				return false;
+		}
+		break;
+	case bpf_ctx_range(struct bpf_sock_addr, user_port):
+		if (size != size_default)
+			return false;
+		break;
+	default:
+		if (type == BPF_READ) {
+			if (size != size_default)
+				return false;
+		} else {
+			return false;
+		}
+	}
+
+	return true;
+}
+
 static bool sock_ops_is_valid_access(int off, int size,
 				     enum bpf_access_type type,
 				     const struct bpf_prog *prog,
@@ -4724,6 +4801,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
+ * context Structure, F is Field in context structure that contains a pointer
+ * to Nested Structure of type NS that has the field NF.
+ *
+ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
+ * sure that SIZE is not greater than actual size of S.F.NF.
+ *
+ * If offset OFF is provided, the load happens from that offset relative to
+ * offset of NF.
+ */
+#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)	       \
+	do {								       \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
+				      si->src_reg, offsetof(S, F));	       \
+		*insn++ = BPF_LDX_MEM(					       \
+			SIZE, si->dst_reg, si->dst_reg,			       \
+			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
+				       target_size)			       \
+				+ OFF);					       \
+	} while (0)
+
+#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)			       \
+	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,		       \
+					     BPF_FIELD_SIZEOF(NS, NF), 0)
+
+/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
+ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
+ *
+ * It doesn't support SIZE argument though since narrow stores are not
+ * supported for now.
+ *
+ * In addition it uses Temporary Field TF (member of struct S) as the 3rd
+ * "register" since two registers available in convert_ctx_access are not
+ * enough: we can't override neither SRC, since it contains value to store, nor
+ * DST since it contains pointer to context that may be used by later
+ * instructions. But we need a temporary place to save pointer to nested
+ * structure whose field we want to store to.
+ */
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF)		       \
+	do {								       \
+		int tmp_reg = BPF_REG_9;				       \
+		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
+			--tmp_reg;					       \
+		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
+			--tmp_reg;					       \
+		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,	       \
+				      offsetof(S, TF));			       \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,	       \
+				      si->dst_reg, offsetof(S, F));	       \
+		*insn++ = BPF_STX_MEM(					       \
+			BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg,	       \
+			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
+				       target_size)			       \
+				+ OFF);					       \
+		*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,	       \
+				      offsetof(S, TF));			       \
+	} while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
+						      TF)		       \
+	do {								       \
+		if (type == BPF_WRITE) {				       \
+			SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF,    \
+							 TF);		       \
+		} else {						       \
+			SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(		       \
+				S, NS, F, NF, SIZE, OFF);  \
+		}							       \
+	} while (0)
+
+#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)		       \
+	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(			       \
+		S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
+
+static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
+					const struct bpf_insn *si,
+					struct bpf_insn *insn_buf,
+					struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	int off;
+
+	switch (si->off) {
+	case offsetof(struct bpf_sock_addr, user_family):
+		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+					    struct sockaddr, uaddr, sa_family);
+		break;
+
+	case offsetof(struct bpf_sock_addr, user_ip4):
+		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+			struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
+			sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
+		break;
+
+	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
+		off = si->off;
+		off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
+		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+			struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
+			sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
+			tmp_reg);
+		break;
+
+	case offsetof(struct bpf_sock_addr, user_port):
+		/* To get port we need to know sa_family first and then treat
+		 * sockaddr as either sockaddr_in or sockaddr_in6.
+		 * Though we can simplify since port field has same offset and
+		 * size in both structures.
+		 * Here we check this invariant and use just one of the
+		 * structures if it's true.
+		 */
+		BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
+			     offsetof(struct sockaddr_in6, sin6_port));
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
+			     FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
+						     struct sockaddr_in6, uaddr,
+						     sin6_port, tmp_reg);
+		break;
+
+	case offsetof(struct bpf_sock_addr, family):
+		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+					    struct sock, sk, sk_family);
+		break;
+
+	case offsetof(struct bpf_sock_addr, type):
+		SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+			struct bpf_sock_addr_kern, struct sock, sk,
+			__sk_flags_offset, BPF_W, 0);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+		break;
+
+	case offsetof(struct bpf_sock_addr, protocol):
+		SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
+			struct bpf_sock_addr_kern, struct sock, sk,
+			__sk_flags_offset, BPF_W, 0);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+					SK_FL_PROTO_SHIFT);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 				       const struct bpf_insn *si,
 				       struct bpf_insn *insn_buf,
@@ -5181,6 +5404,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = {
 const struct bpf_prog_ops cg_sock_prog_ops = {
 };
 
+const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
+	.get_func_proto		= sock_addr_func_proto,
+	.is_valid_access	= sock_addr_is_valid_access,
+	.convert_ctx_access	= sock_addr_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_sock_addr_prog_ops = {
+};
+
 const struct bpf_verifier_ops sock_ops_verifier_ops = {
 	.get_func_proto		= sock_ops_func_proto,
 	.is_valid_access	= sock_ops_is_valid_access,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e8c7fad8c329..2dec266507dc 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -450,6 +450,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (addr_len < sizeof(struct sockaddr_in))
 		goto out;
 
+	/* BPF prog is run before any checks are done so that if the prog
+	 * changes context in a wrong way it will be caught.
+	 */
+	err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+	if (err)
+		goto out;
+
 	if (addr->sin_family != AF_INET) {
 		/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
 		 * only if s_addr is INADDR_ANY.
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index dbbe04018813..fa24e3f06ac6 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -295,6 +295,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (addr_len < SIN6_LEN_RFC2133)
 		return -EINVAL;
 
+	/* BPF prog is run before any checks are done so that if the prog
+	 * changes context in a wrong way it will be caught.
+	 */
+	err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+	if (err)
+		return err;
+
 	if (addr->sin6_family != AF_INET6)
 		return -EAFNOSUPPORT;
 

From e50b0a6f089308bec6b2d0198abed231dee4d277 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:03 -0700
Subject: [PATCH 1554/1678] selftests/bpf: Selftest for sys_bind hooks

Add selftest to work with bpf_sock_addr context from
`BPF_PROG_TYPE_CGROUP_SOCK_ADDR` programs.

Try to bind(2) on IP:port and apply:
* loads to make sure context can be read correctly, including narrow
  loads (byte, half) for IP and full-size loads (word) for all fields;
* stores to those fields allowed by verifier.

All combination from IPv4/IPv6 and TCP/UDP are tested.

Both scenarios are tested:
* valid programs can be loaded and attached;
* invalid programs can be neither loaded nor attached.

Test passes when expected data can be read from context in the
BPF-program, and after the call to bind(2) socket is bound to IP:port
pair that was written by BPF-program to the context.

Example:
  # ./test_sock_addr
  Attached bind4 program.
  Test case #1 (IPv4/TCP):
          Requested: bind(192.168.1.254, 4040) ..
             Actual: bind(127.0.0.1, 4444)
  Test case #2 (IPv4/UDP):
          Requested: bind(192.168.1.254, 4040) ..
             Actual: bind(127.0.0.1, 4444)
  Attached bind6 program.
  Test case #3 (IPv6/TCP):
          Requested: bind(face:b00c:1234:5678::abcd, 6060) ..
             Actual: bind(::1, 6666)
  Test case #4 (IPv6/UDP):
          Requested: bind(face:b00c:1234:5678::abcd, 6060) ..
             Actual: bind(::1, 6666)
  ### SUCCESS

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h               |  23 +
 tools/lib/bpf/libbpf.c                       |   6 +
 tools/testing/selftests/bpf/Makefile         |   3 +-
 tools/testing/selftests/bpf/test_sock_addr.c | 486 +++++++++++++++++++
 4 files changed, 517 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_sock_addr.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e1c1fed63396..f2120c5c0578 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -136,6 +136,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_DEVICE,
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
+	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 };
 
 enum bpf_attach_type {
@@ -147,6 +148,8 @@ enum bpf_attach_type {
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
 	BPF_SK_MSG_VERDICT,
+	BPF_CGROUP_INET4_BIND,
+	BPF_CGROUP_INET6_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1009,6 +1012,26 @@ struct bpf_map_info {
 	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+	__u32 user_family;	/* Allows 4-byte read, but no write. */
+	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 user_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 user_port;	/* Allows 4-byte read and write.
+				 * Stored in network byte order
+				 */
+	__u32 family;		/* Allows 4-byte read, but no write */
+	__u32 type;		/* Allows 4-byte read, but no write */
+	__u32 protocol;		/* Allows 4-byte read, but no write */
+};
+
 /* User bpf_sock_ops struct to access socket values and specify request ops
  * and their replies.
  * Some of this fields are in network (bigendian) byte order and may need
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 48e3e743ebf7..d7ce8818982c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1859,6 +1859,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 
 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
 
+#define BPF_SA_PROG_SEC(string, ptype) \
+	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
+
 static const struct {
 	const char *sec;
 	size_t len;
@@ -1882,10 +1885,13 @@ static const struct {
 	BPF_PROG_SEC("sockops",		BPF_PROG_TYPE_SOCK_OPS),
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
 	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
+	BPF_SA_PROG_SEC("cgroup/bind4",	BPF_CGROUP_INET4_BIND),
+	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
 };
 
 #undef BPF_PROG_SEC
 #undef BPF_PROG_SEC_FULL
+#undef BPF_SA_PROG_SEC
 
 static int bpf_program__identify_section(struct bpf_program *prog)
 {
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f35fb02bdf56..f4717c910874 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ urandom_read: urandom_read.c
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
-	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
+	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user test_sock_addr
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
@@ -51,6 +51,7 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_sock_addr: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000000..a57e13a65e37
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+
+#define CG_PATH	"/foo"
+
+#define SERV4_IP		"192.168.1.254"
+#define SERV4_REWRITE_IP	"127.0.0.1"
+#define SERV4_PORT		4040
+#define SERV4_REWRITE_PORT	4444
+
+#define SERV6_IP		"face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP	"::1"
+#define SERV6_PORT		6060
+#define SERV6_REWRITE_PORT	6666
+
+#define INET_NTOP_BUF	40
+
+typedef int (*load_fn)(enum bpf_attach_type, const char *comment);
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+struct program {
+	enum bpf_attach_type type;
+	load_fn	loadfn;
+	int fd;
+	const char *name;
+	enum bpf_attach_type invalid_type;
+};
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int mk_sockaddr(int domain, const char *ip, unsigned short port,
+		       struct sockaddr *addr, socklen_t addr_len)
+{
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr4;
+
+	if (domain != AF_INET && domain != AF_INET6) {
+		log_err("Unsupported address family");
+		return -1;
+	}
+
+	memset(addr, 0, addr_len);
+
+	if (domain == AF_INET) {
+		if (addr_len < sizeof(struct sockaddr_in))
+			return -1;
+		addr4 = (struct sockaddr_in *)addr;
+		addr4->sin_family = domain;
+		addr4->sin_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
+			log_err("Invalid IPv4: %s", ip);
+			return -1;
+		}
+	} else if (domain == AF_INET6) {
+		if (addr_len < sizeof(struct sockaddr_in6))
+			return -1;
+		addr6 = (struct sockaddr_in6 *)addr;
+		addr6->sin6_family = domain;
+		addr6->sin6_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
+			log_err("Invalid IPv6: %s", ip);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int load_insns(enum bpf_attach_type attach_type,
+		      const struct bpf_insn *insns, size_t insns_cnt,
+		      const char *comment)
+{
+	struct bpf_load_program_attr load_attr;
+	int ret;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+	load_attr.expected_attach_type = attach_type;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+	if (ret < 0 && comment) {
+		log_err(">>> Loading %s program error.\n"
+			">>> Output from verifier:\n%s\n-------\n",
+			comment, bpf_log_buf);
+	}
+
+	return ret;
+}
+
+/* [1] These testing programs try to read different context fields, including
+ * narrow loads of different sizes from user_ip4 and user_ip6, and write to
+ * those allowed to be overridden.
+ *
+ * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
+ * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
+ * in such cases since it accepts only _signed_ 32bit integer as IMM
+ * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
+ * to count jumps properly.
+ */
+
+static int bind4_prog_load(enum bpf_attach_type attach_type,
+			   const char *comment)
+{
+	union {
+		uint8_t u4_addr8[4];
+		uint16_t u4_addr16[2];
+		uint32_t u4_addr32;
+	} ip4;
+	struct sockaddr_in addr4_rw;
+
+	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
+		log_err("Invalid IPv4: %s", SERV4_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
+		return -1;
+
+	/* See [1]. */
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
+
+		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, type)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
+		BPF_JMP_A(1),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
+
+		/*     1st_byte_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
+
+		/*     1st_half_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
+
+		/*     whole_user_ip4 == expected) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
+
+		/*      user_ip4 = addr4_rw.sin_addr */
+		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+
+		/*      user_port = addr4_rw.sin_port */
+		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(attach_type, insns,
+			  sizeof(insns) / sizeof(struct bpf_insn), comment);
+}
+
+static int bind6_prog_load(enum bpf_attach_type attach_type,
+			   const char *comment)
+{
+	struct sockaddr_in6 addr6_rw;
+	struct in6_addr ip6;
+
+	if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
+		log_err("Invalid IPv6: %s", SERV6_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
+			(struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
+		return -1;
+
+	/* See [1]. */
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET6 && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+		/*            5th_byte_of_user_ip6 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip6[1])),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
+
+		/*            3rd_half_of_user_ip6 == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip6[1])),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
+
+		/*            last_word_of_user_ip6 == expected) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip6[3])),
+		BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]),  /* See [2]. */
+		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
+
+
+#define STORE_IPV6_WORD(N)						       \
+		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]),     \
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
+			    offsetof(struct bpf_sock_addr, user_ip6[N]))
+
+		/*      user_ip6 = addr6_rw.sin6_addr */
+		STORE_IPV6_WORD(0),
+		STORE_IPV6_WORD(1),
+		STORE_IPV6_WORD(2),
+		STORE_IPV6_WORD(3),
+
+		/*      user_port = addr6_rw.sin6_port */
+		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(attach_type, insns,
+			  sizeof(insns) / sizeof(struct bpf_insn), comment);
+}
+
+static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
+{
+	char addr_buf[INET_NTOP_BUF];
+	struct sockaddr_storage addr;
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr4;
+	socklen_t addr_len;
+	unsigned short port;
+	void *nip;
+
+	addr_len = sizeof(struct sockaddr_storage);
+	memset(&addr, 0, addr_len);
+
+	if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) {
+		if (addr.ss_family == AF_INET) {
+			addr4 = (struct sockaddr_in *)&addr;
+			nip = (void *)&addr4->sin_addr;
+			port = ntohs(addr4->sin_port);
+		} else if (addr.ss_family == AF_INET6) {
+			addr6 = (struct sockaddr_in6 *)&addr;
+			nip = (void *)&addr6->sin6_addr;
+			port = ntohs(addr6->sin6_port);
+		} else {
+			return;
+		}
+		const char *addr_str =
+			inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF);
+		printf(fmt, addr_str ? addr_str : "??", port);
+	}
+}
+
+static void print_local_ip_port(int sockfd, const char *fmt)
+{
+	print_ip_port(sockfd, getsockname, fmt);
+}
+
+static int start_server(int type, const struct sockaddr_storage *addr,
+			socklen_t addr_len)
+{
+
+	int fd;
+
+	fd = socket(addr->ss_family, type, 0);
+	if (fd == -1) {
+		log_err("Failed to create server socket");
+		goto out;
+	}
+
+	if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+		log_err("Failed to bind server socket");
+		goto close_out;
+	}
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 128) == -1) {
+			log_err("Failed to listen on server socket");
+			goto close_out;
+		}
+	}
+
+	print_local_ip_port(fd, "\t   Actual: bind(%s, %d)\n");
+
+	goto out;
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static void print_test_case_num(int domain, int type)
+{
+	static int test_num;
+
+	printf("Test case #%d (%s/%s):\n", ++test_num,
+	       (domain == AF_INET ? "IPv4" :
+		domain == AF_INET6 ? "IPv6" :
+		"unknown_domain"),
+	       (type == SOCK_STREAM ? "TCP" :
+		type == SOCK_DGRAM ? "UDP" :
+		"unknown_type"));
+}
+
+static int run_test_case(int domain, int type, const char *ip,
+			 unsigned short port)
+{
+	struct sockaddr_storage addr;
+	socklen_t addr_len = sizeof(addr);
+	int servfd = -1;
+	int err = 0;
+
+	print_test_case_num(domain, type);
+
+	if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr,
+			addr_len) == -1)
+		return -1;
+
+	printf("\tRequested: bind(%s, %d) ..\n", ip, port);
+	servfd = start_server(type, &addr, addr_len);
+	if (servfd == -1)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(servfd);
+	return err;
+}
+
+static void close_progs_fds(struct program *progs, size_t prog_cnt)
+{
+	size_t i;
+
+	for (i = 0; i < prog_cnt; ++i) {
+		close(progs[i].fd);
+		progs[i].fd = -1;
+	}
+}
+
+static int load_and_attach_progs(int cgfd, struct program *progs,
+				 size_t prog_cnt)
+{
+	size_t i;
+
+	for (i = 0; i < prog_cnt; ++i) {
+		progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
+		if (progs[i].fd != -1) {
+			log_err("Load with invalid type accepted for %s",
+				progs[i].name);
+			goto err;
+		}
+		progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
+		if (progs[i].fd == -1) {
+			log_err("Failed to load program %s", progs[i].name);
+			goto err;
+		}
+		if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
+				    BPF_F_ALLOW_OVERRIDE) != -1) {
+			log_err("Attach with invalid type accepted for %s",
+				progs[i].name);
+			goto err;
+		}
+		if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
+				    BPF_F_ALLOW_OVERRIDE) == -1) {
+			log_err("Failed to attach program %s", progs[i].name);
+			goto err;
+		}
+		printf("Attached %s program.\n", progs[i].name);
+	}
+
+	return 0;
+err:
+	close_progs_fds(progs, prog_cnt);
+	return -1;
+}
+
+static int run_domain_test(int domain, int cgfd, struct program *progs,
+			   size_t prog_cnt, const char *ip, unsigned short port)
+{
+	int err = 0;
+
+	if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1)
+		goto err;
+
+	if (run_test_case(domain, SOCK_STREAM, ip, port) == -1)
+		goto err;
+
+	if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close_progs_fds(progs, prog_cnt);
+	return err;
+}
+
+static int run_test(void)
+{
+	size_t inet6_prog_cnt;
+	size_t inet_prog_cnt;
+	int cgfd = -1;
+	int err = 0;
+
+	struct program inet6_progs[] = {
+		{BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
+		 BPF_CGROUP_INET4_BIND},
+	};
+	inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
+
+	struct program inet_progs[] = {
+		{BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
+		 BPF_CGROUP_INET6_BIND},
+	};
+	inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP,
+			    SERV4_PORT) == -1)
+		goto err;
+
+	if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt,
+			    SERV6_IP, SERV6_PORT) == -1)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	printf(err ? "### FAIL\n" : "### SUCCESS\n");
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	return run_test();
+}

From 3679d585bbc07a1ac4448d5b478b492cad3587ce Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:04 -0700
Subject: [PATCH 1555/1678] net: Introduce __inet_bind() and __inet6_bind

Refactor `bind()` code to make it ready to be called from BPF helper
function `bpf_bind()` (will be added soon). Implementation of
`inet_bind()` and `inet6_bind()` is separated into `__inet_bind()` and
`__inet6_bind()` correspondingly. These function can be used from both
`sk_prot->bind` and `bpf_bind()` contexts.

New functions have two additional arguments.

`force_bind_address_no_port` forces binding to IP only w/o checking
`inet_sock.bind_address_no_port` field. It'll allow to bind local end of
a connection to desired IP in `bpf_bind()` w/o changing
`bind_address_no_port` field of a socket. It's useful since `bpf_bind()`
can return an error and we'd need to restore original value of
`bind_address_no_port` in that case if we changed this before calling to
the helper.

`with_lock` specifies whether to lock socket when working with `struct
sk` or not. The argument is set to `true` for `sk_prot->bind`, i.e. old
behavior is preserved. But it will be set to `false` for `bpf_bind()`
use-case. The reason is all call-sites, where `bpf_bind()` will be
called, already hold that socket lock.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/inet_common.h |  2 ++
 include/net/ipv6.h        |  2 ++
 net/ipv4/af_inet.c        | 39 ++++++++++++++++++++++++---------------
 net/ipv6/af_inet6.c       | 37 ++++++++++++++++++++++++-------------
 4 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 500f81375200..384b90c62c0b 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -32,6 +32,8 @@ int inet_shutdown(struct socket *sock, int how);
 int inet_listen(struct socket *sock, int backlog);
 void inet_sock_destruct(struct sock *sk);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+		bool force_bind_address_no_port, bool with_lock);
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		 int peer);
 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 50a6f0ddb878..2e5fedc56e59 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1066,6 +1066,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info);
 void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu);
 
 int inet6_release(struct socket *sock);
+int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len,
+		 bool force_bind_address_no_port, bool with_lock);
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int peer);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2dec266507dc..e203a39d6988 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -432,30 +432,37 @@ EXPORT_SYMBOL(inet_release);
 
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
-	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
 	struct sock *sk = sock->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct net *net = sock_net(sk);
-	unsigned short snum;
-	int chk_addr_ret;
-	u32 tb_id = RT_TABLE_LOCAL;
 	int err;
 
 	/* If the socket has its own bind function then use it. (RAW) */
 	if (sk->sk_prot->bind) {
-		err = sk->sk_prot->bind(sk, uaddr, addr_len);
-		goto out;
+		return sk->sk_prot->bind(sk, uaddr, addr_len);
 	}
-	err = -EINVAL;
 	if (addr_len < sizeof(struct sockaddr_in))
-		goto out;
+		return -EINVAL;
 
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
 	err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
 	if (err)
-		goto out;
+		return err;
+
+	return __inet_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet_bind);
+
+int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+		bool force_bind_address_no_port, bool with_lock)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
+	unsigned short snum;
+	int chk_addr_ret;
+	u32 tb_id = RT_TABLE_LOCAL;
+	int err;
 
 	if (addr->sin_family != AF_INET) {
 		/* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
@@ -499,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	 *      would be illegal to use them (multicast/broadcast) in
 	 *      which case the sending device address is used.
 	 */
-	lock_sock(sk);
+	if (with_lock)
+		lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
 	err = -EINVAL;
@@ -511,7 +519,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		inet->inet_saddr = 0;  /* Use device */
 
 	/* Make sure we are allowed to bind here. */
-	if ((snum || !inet->bind_address_no_port) &&
+	if ((snum || !(inet->bind_address_no_port ||
+		       force_bind_address_no_port)) &&
 	    sk->sk_prot->get_port(sk, snum)) {
 		inet->inet_saddr = inet->inet_rcv_saddr = 0;
 		err = -EADDRINUSE;
@@ -528,11 +537,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	sk_dst_reset(sk);
 	err = 0;
 out_release_sock:
-	release_sock(sk);
+	if (with_lock)
+		release_sock(sk);
 out:
 	return err;
 }
-EXPORT_SYMBOL(inet_bind);
 
 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 		       int addr_len, int flags)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fa24e3f06ac6..13110bee5c14 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
 /* bind for INET6 API */
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
-	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct net *net = sock_net(sk);
-	__be32 v4addr = 0;
-	unsigned short snum;
-	bool saved_ipv6only;
-	int addr_type = 0;
 	int err = 0;
 
 	/* If the socket has its own bind function then use it. */
@@ -302,11 +294,28 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		return err;
 
+	return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
+int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+		 bool force_bind_address_no_port, bool with_lock)
+{
+	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
+	__be32 v4addr = 0;
+	unsigned short snum;
+	bool saved_ipv6only;
+	int addr_type = 0;
+	int err = 0;
+
 	if (addr->sin6_family != AF_INET6)
 		return -EAFNOSUPPORT;
 
 	addr_type = ipv6_addr_type(&addr->sin6_addr);
-	if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+	if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
 		return -EINVAL;
 
 	snum = ntohs(addr->sin6_port);
@@ -314,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
-	lock_sock(sk);
+	if (with_lock)
+		lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
 	if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -402,7 +412,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		sk->sk_ipv6only = 1;
 
 	/* Make sure we are allowed to bind here. */
-	if ((snum || !inet->bind_address_no_port) &&
+	if ((snum || !(inet->bind_address_no_port ||
+		       force_bind_address_no_port)) &&
 	    sk->sk_prot->get_port(sk, snum)) {
 		sk->sk_ipv6only = saved_ipv6only;
 		inet_reset_saddr(sk);
@@ -418,13 +429,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	inet->inet_dport = 0;
 	inet->inet_daddr = 0;
 out:
-	release_sock(sk);
+	if (with_lock)
+		release_sock(sk);
 	return err;
 out_unlock:
 	rcu_read_unlock();
 	goto out;
 }
-EXPORT_SYMBOL(inet6_bind);
 
 int inet6_release(struct socket *sock)
 {

From d74bad4e74ee373787a9ae24197c17b7cdc428d5 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:05 -0700
Subject: [PATCH 1556/1678] bpf: Hooks for sys_connect

== The problem ==

See description of the problem in the initial patch of this patch set.

== The solution ==

The patch provides much more reliable in-kernel solution for the 2nd
part of the problem: making outgoing connecttion from desired IP.

It adds new attach types `BPF_CGROUP_INET4_CONNECT` and
`BPF_CGROUP_INET6_CONNECT` for program type
`BPF_PROG_TYPE_CGROUP_SOCK_ADDR` that can be used to override both
source and destination of a connection at connect(2) time.

Local end of connection can be bound to desired IP using newly
introduced BPF-helper `bpf_bind()`. It allows to bind to only IP though,
and doesn't support binding to port, i.e. leverages
`IP_BIND_ADDRESS_NO_PORT` socket option. There are two reasons for this:
* looking for a free port is expensive and can affect performance
  significantly;
* there is no use-case for port.

As for remote end (`struct sockaddr *` passed by user), both parts of it
can be overridden, remote IP and remote port. It's useful if an
application inside cgroup wants to connect to another application inside
same cgroup or to itself, but knows nothing about IP assigned to the
cgroup.

Support is added for IPv4 and IPv6, for TCP and UDP.

IPv4 and IPv6 have separate attach types for same reason as sys_bind
hooks, i.e. to prevent reading from / writing to e.g. user_ip6 fields
when user passes sockaddr_in since it'd be out-of-bound.

== Implementation notes ==

The patch introduces new field in `struct proto`: `pre_connect` that is
a pointer to a function with same signature as `connect` but is called
before it. The reason is in some cases BPF hooks should be called way
before control is passed to `sk->sk_prot->connect`. Specifically
`inet_dgram_connect` autobinds socket before calling
`sk->sk_prot->connect` and there is no way to call `bpf_bind()` from
hooks from e.g. `ip4_datagram_connect` or `ip6_datagram_connect` since
it'd cause double-bind. On the other hand `proto.pre_connect` provides a
flexible way to add BPF hooks for connect only for necessary `proto` and
call them at desired time before `connect`. Since `bpf_bind()` is
allowed to bind only to IP and autobind in `inet_dgram_connect` binds
only port there is no chance of double-bind.

bpf_bind() sets `force_bind_address_no_port` to bind to only IP despite
of value of `bind_address_no_port` socket field.

bpf_bind() sets `with_lock` to `false` when calling to __inet_bind()
and __inet6_bind() since all call-sites, where bpf_bind() is called,
already hold socket lock.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 31 +++++++++++++++++++++
 include/net/addrconf.h     |  7 +++++
 include/net/sock.h         |  3 ++
 include/net/udp.h          |  1 +
 include/uapi/linux/bpf.h   | 12 +++++++-
 kernel/bpf/syscall.c       |  8 ++++++
 net/core/filter.c          | 57 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c         | 13 +++++++++
 net/ipv4/tcp_ipv4.c        | 16 +++++++++++
 net/ipv4/udp.c             | 14 ++++++++++
 net/ipv6/af_inet6.c        |  5 ++++
 net/ipv6/tcp_ipv6.c        | 16 +++++++++++
 net/ipv6/udp.c             | 20 +++++++++++++
 13 files changed, 202 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 67dc4a6471ad..c6ab295e6dcb 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -116,12 +116,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type)			       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled)	{					       \
+		lock_sock(sk);						       \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type);    \
+		release_sock(sk);					       \
+	}								       \
+	__ret;								       \
+})
+
 #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)			       \
 	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
 
 #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)			       \
 	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
 
+#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
+					    sk->sk_prot->pre_connect)
+
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr)		       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr)		       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
 ({									       \
 	int __ret = 0;							       \
@@ -151,11 +177,16 @@ struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 
+#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 132e5b95167a..378d601258be 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -231,6 +231,13 @@ struct ipv6_stub {
 };
 extern const struct ipv6_stub *ipv6_stub __read_mostly;
 
+/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
+struct ipv6_bpf_stub {
+	int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+			  bool force_bind_address_no_port, bool with_lock);
+};
+extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+
 /*
  * identify MLD packets for MLD filter exceptions
  */
diff --git a/include/net/sock.h b/include/net/sock.h
index b8ff435fa96e..49bd2c1796b0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,6 +1026,9 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
 struct proto {
 	void			(*close)(struct sock *sk,
 					long timeout);
+	int			(*pre_connect)(struct sock *sk,
+					struct sockaddr *uaddr,
+					int addr_len);
 	int			(*connect)(struct sock *sk,
 					struct sockaddr *uaddr,
 					int addr_len);
diff --git a/include/net/udp.h b/include/net/udp.h
index 850a8e581cce..0676b272f6ac 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -273,6 +273,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
 int udp_rcv(struct sk_buff *skb);
 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int udp_init_sock(struct sock *sk);
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int __udp_disconnect(struct sock *sk, int flags);
 int udp_disconnect(struct sock *sk, int flags);
 __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ce3e69e3c793..77afaf1ba556 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -150,6 +150,8 @@ enum bpf_attach_type {
 	BPF_SK_MSG_VERDICT,
 	BPF_CGROUP_INET4_BIND,
 	BPF_CGROUP_INET6_BIND,
+	BPF_CGROUP_INET4_CONNECT,
+	BPF_CGROUP_INET6_CONNECT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -744,6 +746,13 @@ union bpf_attr {
  *     @flags: reserved for future use
  *     Return: SK_PASS
  *
+ * int bpf_bind(ctx, addr, addr_len)
+ *     Bind socket to address. Only binding to IP is supported, no port can be
+ *     set in addr.
+ *     @ctx: pointer to context of type bpf_sock_addr
+ *     @addr: pointer to struct sockaddr to bind socket to
+ *     @addr_len: length of sockaddr structure
+ *     Return: 0 on success or negative error code
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -809,7 +818,8 @@ union bpf_attr {
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),
+	FN(msg_pull_data),		\
+	FN(bind),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2cad66a4cacb..cf1b29bc0ab8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1180,6 +1180,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 		switch (expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET6_BIND:
+		case BPF_CGROUP_INET4_CONNECT:
+		case BPF_CGROUP_INET6_CONNECT:
 			return 0;
 		default:
 			return -EINVAL;
@@ -1491,6 +1493,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		break;
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_INET4_CONNECT:
+	case BPF_CGROUP_INET6_CONNECT:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
 		break;
 	case BPF_CGROUP_SOCK_OPS:
@@ -1557,6 +1561,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		break;
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_INET4_CONNECT:
+	case BPF_CGROUP_INET6_CONNECT:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
 		break;
 	case BPF_CGROUP_SOCK_OPS:
@@ -1610,6 +1616,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_SOCK_CREATE:
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_INET4_CONNECT:
+	case BPF_CGROUP_INET6_CONNECT:
 	case BPF_CGROUP_SOCK_OPS:
 	case BPF_CGROUP_DEVICE:
 		break;
diff --git a/net/core/filter.c b/net/core/filter.c
index c08e5b121558..bdb9cadd4d27 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -33,6 +33,7 @@
 #include <linux/if_packet.h>
 #include <linux/if_arp.h>
 #include <linux/gfp.h>
+#include <net/inet_common.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/netlink.h>
@@ -3656,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
+
+BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
+	   int, addr_len)
+{
+#ifdef CONFIG_INET
+	struct sock *sk = ctx->sk;
+	int err;
+
+	/* Binding to port can be expensive so it's prohibited in the helper.
+	 * Only binding to IP is supported.
+	 */
+	err = -EINVAL;
+	if (addr->sa_family == AF_INET) {
+		if (addr_len < sizeof(struct sockaddr_in))
+			return err;
+		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+			return err;
+		return __inet_bind(sk, addr, addr_len, true, false);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (addr->sa_family == AF_INET6) {
+		if (addr_len < SIN6_LEN_RFC2133)
+			return err;
+		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+			return err;
+		/* ipv6_bpf_stub cannot be NULL, since it's called from
+		 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
+		 */
+		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+#endif /* CONFIG_IPV6 */
+	}
+#endif /* CONFIG_INET */
+
+	return -EAFNOSUPPORT;
+}
+
+static const struct bpf_func_proto bpf_bind_proto = {
+	.func		= bpf_bind,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+
 static const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -3707,6 +3754,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	 */
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_bind:
+		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_CONNECT:
+		case BPF_CGROUP_INET6_CONNECT:
+			return &bpf_bind_proto;
+		default:
+			return NULL;
+		}
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4213,6 +4268,7 @@ static bool sock_addr_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET4_CONNECT:
 			break;
 		default:
 			return false;
@@ -4221,6 +4277,7 @@ static bool sock_addr_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET6_BIND:
+		case BPF_CGROUP_INET6_CONNECT:
 			break;
 		default:
 			return false;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e203a39d6988..488fe26ac8e5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -547,12 +547,19 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 		       int addr_len, int flags)
 {
 	struct sock *sk = sock->sk;
+	int err;
 
 	if (addr_len < sizeof(uaddr->sa_family))
 		return -EINVAL;
 	if (uaddr->sa_family == AF_UNSPEC)
 		return sk->sk_prot->disconnect(sk, flags);
 
+	if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+		err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+		if (err)
+			return err;
+	}
+
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
 	return sk->sk_prot->connect(sk, uaddr, addr_len);
@@ -633,6 +640,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 		if (sk->sk_state != TCP_CLOSE)
 			goto out;
 
+		if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+			err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
+			if (err)
+				goto out;
+		}
+
 		err = sk->sk_prot->connect(sk, uaddr, addr_len);
 		if (err < 0)
 			goto out;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2c6aec2643e8..3c11d992d784 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
+static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+			      int addr_len)
+{
+	/* This check is replicated from tcp_v4_connect() and intended to
+	 * prevent BPF program called below from accessing bytes that are out
+	 * of the bound specified by user in addr_len.
+	 */
+	if (addr_len < sizeof(struct sockaddr_in))
+		return -EINVAL;
+
+	sock_owned_by_me(sk);
+
+	return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr);
+}
+
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -2409,6 +2424,7 @@ struct proto tcp_prot = {
 	.name			= "TCP",
 	.owner			= THIS_MODULE,
 	.close			= tcp_close,
+	.pre_connect		= tcp_v4_pre_connect,
 	.connect		= tcp_v4_connect,
 	.disconnect		= tcp_disconnect,
 	.accept			= inet_csk_accept,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 908fc02fb4f8..9c6c77fec963 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1658,6 +1658,19 @@ csum_copy_err:
 	goto try_again;
 }
 
+int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	/* This check is replicated from __ip4_datagram_connect() and
+	 * intended to prevent BPF program called below from accessing bytes
+	 * that are out of the bound specified by user in addr_len.
+	 */
+	if (addr_len < sizeof(struct sockaddr_in))
+		return -EINVAL;
+
+	return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr);
+}
+EXPORT_SYMBOL(udp_pre_connect);
+
 int __udp_disconnect(struct sock *sk, int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
@@ -2530,6 +2543,7 @@ struct proto udp_prot = {
 	.name			= "UDP",
 	.owner			= THIS_MODULE,
 	.close			= udp_lib_close,
+	.pre_connect		= udp_pre_connect,
 	.connect		= ip4_datagram_connect,
 	.disconnect		= udp_disconnect,
 	.ioctl			= udp_ioctl,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 13110bee5c14..566cec0e0a44 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -887,6 +887,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.nd_tbl	= &nd_tbl,
 };
 
+static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+	.inet6_bind = __inet6_bind,
+};
+
 static int __init inet6_init(void)
 {
 	struct list_head *r;
@@ -1043,6 +1047,7 @@ static int __init inet6_init(void)
 	/* ensure that ipv6 stubs are visible only after ipv6 is ready */
 	wmb();
 	ipv6_stub = &ipv6_stub_impl;
+	ipv6_bpf_stub = &ipv6_bpf_stub_impl;
 out:
 	return err;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5425d7b100ee..6469b741cf5a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
 				   ipv6_hdr(skb)->saddr.s6_addr32);
 }
 
+static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+			      int addr_len)
+{
+	/* This check is replicated from tcp_v6_connect() and intended to
+	 * prevent BPF program called below from accessing bytes that are out
+	 * of the bound specified by user in addr_len.
+	 */
+	if (addr_len < SIN6_LEN_RFC2133)
+		return -EINVAL;
+
+	sock_owned_by_me(sk);
+
+	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+}
+
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			  int addr_len)
 {
@@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = {
 	.name			= "TCPv6",
 	.owner			= THIS_MODULE,
 	.close			= tcp_close,
+	.pre_connect		= tcp_v6_pre_connect,
 	.connect		= tcp_v6_connect,
 	.disconnect		= tcp_disconnect,
 	.accept			= inet_csk_accept,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ad30f5e31969..6861ed479469 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
 	}
 }
 
+static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+			     int addr_len)
+{
+	/* The following checks are replicated from __ip6_datagram_connect()
+	 * and intended to prevent BPF program called below from accessing
+	 * bytes that are out of the bound specified by user in addr_len.
+	 */
+	if (uaddr->sa_family == AF_INET) {
+		if (__ipv6_only_sock(sk))
+			return -EAFNOSUPPORT;
+		return udp_pre_connect(sk, uaddr, addr_len);
+	}
+
+	if (addr_len < SIN6_LEN_RFC2133)
+		return -EINVAL;
+
+	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+}
+
 /**
  *	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
  *	@sk:	socket we are sending on
@@ -1512,6 +1531,7 @@ struct proto udpv6_prot = {
 	.name			= "UDPv6",
 	.owner			= THIS_MODULE,
 	.close			= udp_lib_close,
+	.pre_connect		= udpv6_pre_connect,
 	.connect		= ip6_datagram_connect,
 	.disconnect		= udp_disconnect,
 	.ioctl			= udp_ioctl,

From 622adafb2a12cac6042d4d0d7eb735b7621bf28c Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:06 -0700
Subject: [PATCH 1557/1678] selftests/bpf: Selftest for sys_connect hooks

Add selftest for BPF_CGROUP_INET4_CONNECT and BPF_CGROUP_INET6_CONNECT
attach types.

Try to connect(2) to specified IP:port and test that:
* remote IP:port pair is overridden;
* local end of connection is bound to specified IP.

All combinations of IPv4/IPv6 and TCP/UDP are tested.

Example:
  # tcpdump -pn -i lo -w connect.pcap 2>/dev/null &
  [1] 478
  # strace -qqf -e connect -o connect.trace ./test_sock_addr.sh
  Wait for testing IPv4/IPv6 to become available ... OK
  Load bind4 with invalid type (can pollute stderr) ... REJECTED
  Load bind4 with valid type ... OK
  Attach bind4 with invalid type ... REJECTED
  Attach bind4 with valid type ... OK
  Load connect4 with invalid type (can pollute stderr) libbpf: load bpf \
    program failed: Permission denied
  libbpf: -- BEGIN DUMP LOG ---
  libbpf:
  0: (b7) r2 = 23569
  1: (63) *(u32 *)(r1 +24) = r2
  2: (b7) r2 = 16777343
  3: (63) *(u32 *)(r1 +4) = r2
  invalid bpf_context access off=4 size=4
  [ 1518.404609] random: crng init done

  libbpf: -- END LOG --
  libbpf: failed to load program 'cgroup/connect4'
  libbpf: failed to load object './connect4_prog.o'
  ... REJECTED
  Load connect4 with valid type ... OK
  Attach connect4 with invalid type ... REJECTED
  Attach connect4 with valid type ... OK
  Test case #1 (IPv4/TCP):
          Requested: bind(192.168.1.254, 4040) ..
             Actual: bind(127.0.0.1, 4444)
          Requested: connect(192.168.1.254, 4040) from (*, *) ..
             Actual: connect(127.0.0.1, 4444) from (127.0.0.4, 56068)
  Test case #2 (IPv4/UDP):
          Requested: bind(192.168.1.254, 4040) ..
             Actual: bind(127.0.0.1, 4444)
          Requested: connect(192.168.1.254, 4040) from (*, *) ..
             Actual: connect(127.0.0.1, 4444) from (127.0.0.4, 56447)
  Load bind6 with invalid type (can pollute stderr) ... REJECTED
  Load bind6 with valid type ... OK
  Attach bind6 with invalid type ... REJECTED
  Attach bind6 with valid type ... OK
  Load connect6 with invalid type (can pollute stderr) libbpf: load bpf \
    program failed: Permission denied
  libbpf: -- BEGIN DUMP LOG ---
  libbpf:
  0: (b7) r6 = 0
  1: (63) *(u32 *)(r1 +12) = r6
  invalid bpf_context access off=12 size=4

  libbpf: -- END LOG --
  libbpf: failed to load program 'cgroup/connect6'
  libbpf: failed to load object './connect6_prog.o'
  ... REJECTED
  Load connect6 with valid type ... OK
  Attach connect6 with invalid type ... REJECTED
  Attach connect6 with valid type ... OK
  Test case #3 (IPv6/TCP):
          Requested: bind(face:b00c:1234:5678::abcd, 6060) ..
             Actual: bind(::1, 6666)
          Requested: connect(face:b00c:1234:5678::abcd, 6060) from (*, *)
             Actual: connect(::1, 6666) from (::6, 37458)
  Test case #4 (IPv6/UDP):
          Requested: bind(face:b00c:1234:5678::abcd, 6060) ..
             Actual: bind(::1, 6666)
          Requested: connect(face:b00c:1234:5678::abcd, 6060) from (*, *)
             Actual: connect(::1, 6666) from (::6, 39315)
  ### SUCCESS
  # egrep 'connect\(.*AF_INET' connect.trace | \
  > egrep -vw 'htons\(1025\)' | fold -b -s -w 72
  502   connect(7, {sa_family=AF_INET, sin_port=htons(4040),
  sin_addr=inet_addr("192.168.1.254")}, 128) = 0
  502   connect(8, {sa_family=AF_INET, sin_port=htons(4040),
  sin_addr=inet_addr("192.168.1.254")}, 128) = 0
  502   connect(9, {sa_family=AF_INET6, sin6_port=htons(6060),
  inet_pton(AF_INET6, "face:b00c:1234:5678::abcd", &sin6_addr),
  sin6_flowinfo=0, sin6_scope_id=0}, 128) = 0
  502   connect(10, {sa_family=AF_INET6, sin6_port=htons(6060),
  inet_pton(AF_INET6, "face:b00c:1234:5678::abcd", &sin6_addr),
  sin6_flowinfo=0, sin6_scope_id=0}, 128) = 0
  # fg
  tcpdump -pn -i lo -w connect.pcap 2> /dev/null
  # tcpdump -r connect.pcap -n tcp | cut -c 1-72
  reading from file connect.pcap, link-type EN10MB (Ethernet)
  17:57:40.383533 IP 127.0.0.4.56068 > 127.0.0.1.4444: Flags [S], seq 1333
  17:57:40.383566 IP 127.0.0.1.4444 > 127.0.0.4.56068: Flags [S.], seq 112
  17:57:40.383589 IP 127.0.0.4.56068 > 127.0.0.1.4444: Flags [.], ack 1, w
  17:57:40.384578 IP 127.0.0.1.4444 > 127.0.0.4.56068: Flags [R.], seq 1,
  17:57:40.403327 IP6 ::6.37458 > ::1.6666: Flags [S], seq 406513443, win
  17:57:40.403357 IP6 ::1.6666 > ::6.37458: Flags [S.], seq 2448389240, ac
  17:57:40.403376 IP6 ::6.37458 > ::1.6666: Flags [.], ack 1, win 342, opt
  17:57:40.404263 IP6 ::1.6666 > ::6.37458: Flags [R.], seq 1, ack 1, win

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h                |  12 +-
 tools/lib/bpf/libbpf.c                        |   2 +
 tools/testing/selftests/bpf/Makefile          |   5 +-
 tools/testing/selftests/bpf/bpf_helpers.h     |   2 +
 tools/testing/selftests/bpf/connect4_prog.c   |  45 ++++++++
 tools/testing/selftests/bpf/connect6_prog.c   |  61 ++++++++++
 tools/testing/selftests/bpf/test_sock_addr.c  | 104 +++++++++++++++++-
 tools/testing/selftests/bpf/test_sock_addr.sh |  57 ++++++++++
 8 files changed, 284 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/connect4_prog.c
 create mode 100644 tools/testing/selftests/bpf/connect6_prog.c
 create mode 100755 tools/testing/selftests/bpf/test_sock_addr.sh

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f2120c5c0578..71051d01e8dd 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -150,6 +150,8 @@ enum bpf_attach_type {
 	BPF_SK_MSG_VERDICT,
 	BPF_CGROUP_INET4_BIND,
 	BPF_CGROUP_INET6_BIND,
+	BPF_CGROUP_INET4_CONNECT,
+	BPF_CGROUP_INET6_CONNECT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -744,6 +746,13 @@ union bpf_attr {
  *     @flags: reserved for future use
  *     Return: SK_PASS
  *
+ * int bpf_bind(ctx, addr, addr_len)
+ *     Bind socket to address. Only binding to IP is supported, no port can be
+ *     set in addr.
+ *     @ctx: pointer to context of type bpf_sock_addr
+ *     @addr: pointer to struct sockaddr to bind socket to
+ *     @addr_len: length of sockaddr structure
+ *     Return: 0 on success or negative error code
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -809,7 +818,8 @@ union bpf_attr {
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),
+	FN(msg_pull_data),		\
+	FN(bind),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d7ce8818982c..5922443063f0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1887,6 +1887,8 @@ static const struct {
 	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
 	BPF_SA_PROG_SEC("cgroup/bind4",	BPF_CGROUP_INET4_BIND),
 	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
+	BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
+	BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
 };
 
 #undef BPF_PROG_SEC
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f4717c910874..c64d4ebc77ff 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -30,14 +30,15 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
-	sockmap_tcp_msg_prog.o
+	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
 	test_libbpf.sh \
 	test_xdp_redirect.sh \
 	test_xdp_meta.sh \
-	test_offload.py
+	test_offload.py \
+	test_sock_addr.sh
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_libbpf_open
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 7cae376d8d0c..d8223d99f96d 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -94,6 +94,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
 	(void *) BPF_FUNC_msg_cork_bytes;
 static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+	(void *) BPF_FUNC_bind;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000000..5a88a681d2ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP4		0x7f000004U
+#define DST_REWRITE_IP4		0x7f000001U
+#define DST_REWRITE_PORT4	4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in sa;
+
+	/* Rewrite destination. */
+	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+
+	if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+		///* Rewrite source. */
+		memset(&sa, 0, sizeof(sa));
+
+		sa.sin_family = AF_INET;
+		sa.sin_port = bpf_htons(0);
+		sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+		if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000000..8ea3f7d12dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0	0
+#define SRC_REWRITE_IP6_1	0
+#define SRC_REWRITE_IP6_2	0
+#define SRC_REWRITE_IP6_3	6
+
+#define DST_REWRITE_IP6_0	0
+#define DST_REWRITE_IP6_1	0
+#define DST_REWRITE_IP6_2	0
+#define DST_REWRITE_IP6_3	1
+
+#define DST_REWRITE_PORT6	6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in6 sa;
+
+	/* Rewrite destination. */
+	ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+	ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+	ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+	ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+	ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+	if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+		/* Rewrite source. */
+		memset(&sa, 0, sizeof(sa));
+
+		sa.sin6_family = AF_INET6;
+		sa.sin6_port = bpf_htons(0);
+
+		sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+		sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+		sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+		sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+		if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index a57e13a65e37..d488f20926e8 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -12,10 +12,13 @@
 #include <linux/filter.h>
 
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
 
 #define CG_PATH	"/foo"
+#define CONNECT4_PROG_PATH	"./connect4_prog.o"
+#define CONNECT6_PROG_PATH	"./connect6_prog.o"
 
 #define SERV4_IP		"192.168.1.254"
 #define SERV4_REWRITE_IP	"127.0.0.1"
@@ -254,6 +257,41 @@ static int bind6_prog_load(enum bpf_attach_type attach_type,
 			  sizeof(insns) / sizeof(struct bpf_insn), comment);
 }
 
+static int connect_prog_load_path(const char *path,
+				  enum bpf_attach_type attach_type,
+				  const char *comment)
+{
+	struct bpf_prog_load_attr attr;
+	struct bpf_object *obj;
+	int prog_fd;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = path;
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+	attr.expected_attach_type = attach_type;
+
+	if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+		if (comment)
+			log_err(">>> Loading %s program at %s error.\n",
+				comment, path);
+		return -1;
+	}
+
+	return prog_fd;
+}
+
+static int connect4_prog_load(enum bpf_attach_type attach_type,
+			      const char *comment)
+{
+	return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment);
+}
+
+static int connect6_prog_load(enum bpf_attach_type attach_type,
+			      const char *comment)
+{
+	return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment);
+}
+
 static void print_ip_port(int sockfd, info_fn fn, const char *fmt)
 {
 	char addr_buf[INET_NTOP_BUF];
@@ -290,6 +328,11 @@ static void print_local_ip_port(int sockfd, const char *fmt)
 	print_ip_port(sockfd, getsockname, fmt);
 }
 
+static void print_remote_ip_port(int sockfd, const char *fmt)
+{
+	print_ip_port(sockfd, getpeername, fmt);
+}
+
 static int start_server(int type, const struct sockaddr_storage *addr,
 			socklen_t addr_len)
 {
@@ -324,6 +367,39 @@ out:
 	return fd;
 }
 
+static int connect_to_server(int type, const struct sockaddr_storage *addr,
+			     socklen_t addr_len)
+{
+	int domain;
+	int fd;
+
+	domain = addr->ss_family;
+
+	if (domain != AF_INET && domain != AF_INET6) {
+		log_err("Unsupported address family");
+		return -1;
+	}
+
+	fd = socket(domain, type, 0);
+	if (fd == -1) {
+		log_err("Failed to creating client socket");
+		return -1;
+	}
+
+	if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+		log_err("Fail to connect to server");
+		goto err;
+	}
+
+	print_remote_ip_port(fd, "\t   Actual: connect(%s, %d)");
+	print_local_ip_port(fd, " from (%s, %d)\n");
+
+	return 0;
+err:
+	close(fd);
+	return -1;
+}
+
 static void print_test_case_num(int domain, int type)
 {
 	static int test_num;
@@ -356,6 +432,10 @@ static int run_test_case(int domain, int type, const char *ip,
 	if (servfd == -1)
 		goto err;
 
+	printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port);
+	if (connect_to_server(type, &addr, addr_len))
+		goto err;
+
 	goto out;
 err:
 	err = -1;
@@ -380,29 +460,41 @@ static int load_and_attach_progs(int cgfd, struct program *progs,
 	size_t i;
 
 	for (i = 0; i < prog_cnt; ++i) {
+		printf("Load %s with invalid type (can pollute stderr) ",
+		       progs[i].name);
+		fflush(stdout);
 		progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL);
 		if (progs[i].fd != -1) {
 			log_err("Load with invalid type accepted for %s",
 				progs[i].name);
 			goto err;
 		}
+		printf("... REJECTED\n");
+
+		printf("Load %s with valid type", progs[i].name);
 		progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name);
 		if (progs[i].fd == -1) {
 			log_err("Failed to load program %s", progs[i].name);
 			goto err;
 		}
+		printf(" ... OK\n");
+
+		printf("Attach %s with invalid type", progs[i].name);
 		if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type,
 				    BPF_F_ALLOW_OVERRIDE) != -1) {
 			log_err("Attach with invalid type accepted for %s",
 				progs[i].name);
 			goto err;
 		}
+		printf(" ... REJECTED\n");
+
+		printf("Attach %s with valid type", progs[i].name);
 		if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type,
 				    BPF_F_ALLOW_OVERRIDE) == -1) {
 			log_err("Failed to attach program %s", progs[i].name);
 			goto err;
 		}
-		printf("Attached %s program.\n", progs[i].name);
+		printf(" ... OK\n");
 	}
 
 	return 0;
@@ -443,12 +535,16 @@ static int run_test(void)
 	struct program inet6_progs[] = {
 		{BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6",
 		 BPF_CGROUP_INET4_BIND},
+		{BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6",
+		 BPF_CGROUP_INET4_CONNECT},
 	};
 	inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program);
 
 	struct program inet_progs[] = {
 		{BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4",
 		 BPF_CGROUP_INET6_BIND},
+		{BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4",
+		 BPF_CGROUP_INET6_CONNECT},
 	};
 	inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program);
 
@@ -482,5 +578,11 @@ out:
 
 int main(int argc, char **argv)
 {
+	if (argc < 2) {
+		fprintf(stderr,
+			"%s has to be run via %s.sh. Skip direct run.\n",
+			argv[0], argv[0]);
+		exit(0);
+	}
 	return run_test();
 }
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000000..c6e1dcf992c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+set -eu
+
+ping_once()
+{
+	ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
+}
+
+wait_for_ip()
+{
+	local _i
+	echo -n "Wait for testing IPv4/IPv6 to become available "
+	for _i in $(seq ${MAX_PING_TRIES}); do
+		echo -n "."
+		if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
+			echo " OK"
+			return
+		fi
+	done
+	echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+	exit 1
+}
+
+setup()
+{
+	# Create testing interfaces not to interfere with current environment.
+	ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+	ip link set ${TEST_IF} up
+	ip link set ${TEST_IF_PEER} up
+
+	ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
+	ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
+	wait_for_ip
+}
+
+cleanup()
+{
+	ip link del ${TEST_IF} 2>/dev/null || :
+	ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+	trap cleanup EXIT 2 3 6 15
+	setup
+	./test_sock_addr setup_done
+}
+
+BASENAME=$(basename $0 .sh)
+TEST_IF="${BASENAME}1"
+TEST_IF_PEER="${BASENAME}2"
+TEST_IPv4="127.0.0.4/8"
+TEST_IPv6="::6/128"
+MAX_PING_TRIES=5
+
+main

From aac3fc320d9404f2665a8b1249dc3170d5fa3caf Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:07 -0700
Subject: [PATCH 1558/1678] bpf: Post-hooks for sys_bind

"Post-hooks" are hooks that are called right before returning from
sys_bind. At this time IP and port are already allocated and no further
changes to `struct sock` can happen before returning from sys_bind but
BPF program has a chance to inspect the socket and change sys_bind
result.

Specifically it can e.g. inspect what port was allocated and if it
doesn't satisfy some policy, BPF program can force sys_bind to fail and
return EPERM to user.

Another example of usage is recording the IP:port pair to some map to
use it in later calls to sys_connect. E.g. if some TCP server inside
cgroup was bound to some IP:port_n, it can be recorded to a map. And
later when some TCP client inside same cgroup is trying to connect to
127.0.0.1:port_n, BPF hook for sys_connect can override the destination
and connect application to IP:port_n instead of 127.0.0.1:port_n. That
helps forcing all applications inside a cgroup to use desired IP and not
break those applications if they e.g. use localhost to communicate
between each other.

== Implementation details ==

Post-hooks are implemented as two new attach types
`BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND` for
existing prog type `BPF_PROG_TYPE_CGROUP_SOCK`.

Separate attach types for IPv4 and IPv6 are introduced to avoid access
to IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from
`inet6_bind()` since those fields might not make sense in such cases.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  16 ++++-
 include/uapi/linux/bpf.h   |  11 ++++
 kernel/bpf/syscall.c       |  43 ++++++++++++++
 net/core/filter.c          | 118 ++++++++++++++++++++++++++++++++-----
 net/ipv4/af_inet.c         |  18 ++++--
 net/ipv6/af_inet6.c        |  21 ++++---
 6 files changed, 196 insertions(+), 31 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c6ab295e6dcb..30d15e64b993 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -98,16 +98,24 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
+#define BPF_CGROUP_RUN_SK_PROG(sk, type)				       \
 ({									       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled) {					       \
-		__ret = __cgroup_bpf_run_filter_sk(sk,			       \
-						 BPF_CGROUP_INET_SOCK_CREATE); \
+		__ret = __cgroup_bpf_run_filter_sk(sk, type);		       \
 	}								       \
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+
+#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)				       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+
+#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk)				       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+
 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)				       \
 ({									       \
 	int __ret = 0;							       \
@@ -183,6 +191,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77afaf1ba556..c5ec89732a8d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -152,6 +152,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_BIND,
 	BPF_CGROUP_INET4_CONNECT,
 	BPF_CGROUP_INET6_CONNECT,
+	BPF_CGROUP_INET4_POST_BIND,
+	BPF_CGROUP_INET6_POST_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -948,6 +950,15 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
+	__u32 src_ip4;		/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_port;		/* Allows 4-byte read.
+				 * Stored in host byte order
+				 */
 };
 
 #define XDP_PACKET_HEADROOM 256
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cf1b29bc0ab8..0244973ee544 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1171,11 +1171,46 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
 
+/* Initially all BPF programs could be loaded w/o specifying
+ * expected_attach_type. Later for some of them specifying expected_attach_type
+ * at load time became required so that program could be validated properly.
+ * Programs of types that are allowed to be loaded both w/ and w/o (for
+ * backward compatibility) expected_attach_type, should have the default attach
+ * type assigned to expected_attach_type for the latter case, so that it can be
+ * validated later at attach time.
+ *
+ * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
+ * prog type requires it but has some attach types that have to be backward
+ * compatible.
+ */
+static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
+{
+	switch (attr->prog_type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+		/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
+		 * exist so checking for non-zero is the way to go here.
+		 */
+		if (!attr->expected_attach_type)
+			attr->expected_attach_type =
+				BPF_CGROUP_INET_SOCK_CREATE;
+		break;
+	}
+}
+
 static int
 bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 				enum bpf_attach_type expected_attach_type)
 {
 	switch (prog_type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+		switch (expected_attach_type) {
+		case BPF_CGROUP_INET_SOCK_CREATE:
+		case BPF_CGROUP_INET4_POST_BIND:
+		case BPF_CGROUP_INET6_POST_BIND:
+			return 0;
+		default:
+			return -EINVAL;
+		}
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		switch (expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
@@ -1195,6 +1230,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 					     enum bpf_attach_type attach_type)
 {
 	switch (prog->type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK:
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
 	default:
@@ -1240,6 +1276,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	    !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	bpf_prog_load_fixup_attach_type(attr);
 	if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
 		return -EINVAL;
 
@@ -1489,6 +1526,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET4_POST_BIND:
+	case BPF_CGROUP_INET6_POST_BIND:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 		break;
 	case BPF_CGROUP_INET4_BIND:
@@ -1557,6 +1596,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET4_POST_BIND:
+	case BPF_CGROUP_INET6_POST_BIND:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 		break;
 	case BPF_CGROUP_INET4_BIND:
@@ -1616,6 +1657,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_SOCK_CREATE:
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_INET4_POST_BIND:
+	case BPF_CGROUP_INET6_POST_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
 	case BPF_CGROUP_SOCK_OPS:
diff --git a/net/core/filter.c b/net/core/filter.c
index bdb9cadd4d27..d31aff93270d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4097,30 +4097,80 @@ static bool lwt_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+
+/* Attach type specific accesses */
+static bool __sock_filter_check_attach_type(int off,
+					    enum bpf_access_type access_type,
+					    enum bpf_attach_type attach_type)
+{
+	switch (off) {
+	case offsetof(struct bpf_sock, bound_dev_if):
+	case offsetof(struct bpf_sock, mark):
+	case offsetof(struct bpf_sock, priority):
+		switch (attach_type) {
+		case BPF_CGROUP_INET_SOCK_CREATE:
+			goto full_access;
+		default:
+			return false;
+		}
+	case bpf_ctx_range(struct bpf_sock, src_ip4):
+		switch (attach_type) {
+		case BPF_CGROUP_INET4_POST_BIND:
+			goto read_only;
+		default:
+			return false;
+		}
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+		switch (attach_type) {
+		case BPF_CGROUP_INET6_POST_BIND:
+			goto read_only;
+		default:
+			return false;
+		}
+	case bpf_ctx_range(struct bpf_sock, src_port):
+		switch (attach_type) {
+		case BPF_CGROUP_INET4_POST_BIND:
+		case BPF_CGROUP_INET6_POST_BIND:
+			goto read_only;
+		default:
+			return false;
+		}
+	}
+read_only:
+	return access_type == BPF_READ;
+full_access:
+	return true;
+}
+
+static bool __sock_filter_check_size(int off, int size,
+				     struct bpf_insn_access_aux *info)
+{
+	const int size_default = sizeof(__u32);
+
+	switch (off) {
+	case bpf_ctx_range(struct bpf_sock, src_ip4):
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+		bpf_ctx_record_field_size(info, size_default);
+		return bpf_ctx_narrow_access_ok(off, size, size_default);
+	}
+
+	return size == size_default;
+}
+
 static bool sock_filter_is_valid_access(int off, int size,
 					enum bpf_access_type type,
 					const struct bpf_prog *prog,
 					struct bpf_insn_access_aux *info)
 {
-	if (type == BPF_WRITE) {
-		switch (off) {
-		case offsetof(struct bpf_sock, bound_dev_if):
-		case offsetof(struct bpf_sock, mark):
-		case offsetof(struct bpf_sock, priority):
-			break;
-		default:
-			return false;
-		}
-	}
-
-	if (off < 0 || off + size > sizeof(struct bpf_sock))
+	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
-	/* The verifier guarantees that size > 0. */
 	if (off % size != 0)
 		return false;
-	if (size != sizeof(__u32))
+	if (!__sock_filter_check_attach_type(off, type,
+					     prog->expected_attach_type))
+		return false;
+	if (!__sock_filter_check_size(off, size, info))
 		return false;
-
 	return true;
 }
 
@@ -4728,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 					  struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
+	int off;
 
 	switch (si->off) {
 	case offsetof(struct bpf_sock, bound_dev_if):
@@ -4783,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
 		break;
+
+	case offsetof(struct bpf_sock, src_ip4):
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_rcv_saddr,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_rcv_saddr),
+				       target_size));
+		break;
+
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		off = si->off;
+		off -= offsetof(struct bpf_sock, src_ip6[0]);
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(
+				struct sock_common,
+				skc_v6_rcv_saddr.s6_addr32[0],
+				FIELD_SIZEOF(struct sock_common,
+					     skc_v6_rcv_saddr.s6_addr32[0]),
+				target_size) + off);
+#else
+		(void)off;
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct bpf_sock, src_port):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_num,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_num),
+				       target_size));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 488fe26ac8e5..142d4c35b493 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -519,12 +519,18 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		inet->inet_saddr = 0;  /* Use device */
 
 	/* Make sure we are allowed to bind here. */
-	if ((snum || !(inet->bind_address_no_port ||
-		       force_bind_address_no_port)) &&
-	    sk->sk_prot->get_port(sk, snum)) {
-		inet->inet_saddr = inet->inet_rcv_saddr = 0;
-		err = -EADDRINUSE;
-		goto out_release_sock;
+	if (snum || !(inet->bind_address_no_port ||
+		      force_bind_address_no_port)) {
+		if (sk->sk_prot->get_port(sk, snum)) {
+			inet->inet_saddr = inet->inet_rcv_saddr = 0;
+			err = -EADDRINUSE;
+			goto out_release_sock;
+		}
+		err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+		if (err) {
+			inet->inet_saddr = inet->inet_rcv_saddr = 0;
+			goto out_release_sock;
+		}
 	}
 
 	if (inet->inet_rcv_saddr)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 566cec0e0a44..41f50472679d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -412,13 +412,20 @@ int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		sk->sk_ipv6only = 1;
 
 	/* Make sure we are allowed to bind here. */
-	if ((snum || !(inet->bind_address_no_port ||
-		       force_bind_address_no_port)) &&
-	    sk->sk_prot->get_port(sk, snum)) {
-		sk->sk_ipv6only = saved_ipv6only;
-		inet_reset_saddr(sk);
-		err = -EADDRINUSE;
-		goto out;
+	if (snum || !(inet->bind_address_no_port ||
+		      force_bind_address_no_port)) {
+		if (sk->sk_prot->get_port(sk, snum)) {
+			sk->sk_ipv6only = saved_ipv6only;
+			inet_reset_saddr(sk);
+			err = -EADDRINUSE;
+			goto out;
+		}
+		err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+		if (err) {
+			sk->sk_ipv6only = saved_ipv6only;
+			inet_reset_saddr(sk);
+			goto out;
+		}
 	}
 
 	if (addr_type != IPV6_ADDR_ANY)

From 1d436885b23bf4474617914d7eb15e039c83ed99 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 30 Mar 2018 15:08:08 -0700
Subject: [PATCH 1559/1678] selftests/bpf: Selftest for sys_bind post-hooks.

Add selftest for attach types `BPF_CGROUP_INET4_POST_BIND` and
`BPF_CGROUP_INET6_POST_BIND`.

The main things tested are:
* prog load behaves as expected (valid/invalid accesses in prog);
* prog attach behaves as expected (load- vs attach-time attach types);
* `BPF_CGROUP_INET_SOCK_CREATE` can be attached in a backward compatible
  way;
* post-hooks return expected result and errno.

Example:
  # ./test_sock
  Test case: bind4 load with invalid access: src_ip6 .. [PASS]
  Test case: bind4 load with invalid access: mark .. [PASS]
  Test case: bind6 load with invalid access: src_ip4 .. [PASS]
  Test case: sock_create load with invalid access: src_port .. [PASS]
  Test case: sock_create load w/o expected_attach_type (compat mode) ..
  [PASS]
  Test case: sock_create load w/ expected_attach_type .. [PASS]
  Test case: attach type mismatch bind4 vs bind6 .. [PASS]
  Test case: attach type mismatch bind6 vs bind4 .. [PASS]
  Test case: attach type mismatch default vs bind4 .. [PASS]
  Test case: attach type mismatch bind6 vs sock_create .. [PASS]
  Test case: bind4 reject all .. [PASS]
  Test case: bind6 reject all .. [PASS]
  Test case: bind6 deny specific IP & port .. [PASS]
  Test case: bind4 allow specific IP & port .. [PASS]
  Test case: bind4 allow all .. [PASS]
  Test case: bind6 allow all .. [PASS]
  Summary: 16 PASSED, 0 FAILED

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h          |  11 +
 tools/testing/selftests/bpf/Makefile    |   4 +-
 tools/testing/selftests/bpf/test_sock.c | 479 ++++++++++++++++++++++++
 3 files changed, 493 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_sock.c

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 71051d01e8dd..9d07465023a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -152,6 +152,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_BIND,
 	BPF_CGROUP_INET4_CONNECT,
 	BPF_CGROUP_INET6_CONNECT,
+	BPF_CGROUP_INET4_POST_BIND,
+	BPF_CGROUP_INET6_POST_BIND,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -947,6 +949,15 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
+	__u32 src_ip4;		/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
+				 * Stored in network byte order.
+				 */
+	__u32 src_port;		/* Allows 4-byte read.
+				 * Stored in host byte order
+				 */
 };
 
 #define XDP_PACKET_HEADROOM 256
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c64d4ebc77ff..0a315ddabbf4 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,8 @@ urandom_read: urandom_read.c
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
-	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user test_sock_addr
+	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+	test_sock test_sock_addr
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
 	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
@@ -52,6 +53,7 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
 $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
 
 .PHONY: force
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000000..73bb20cfb9b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define CG_PATH		"/foo"
+#define MAX_INSNS	512
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_test {
+	const char *descr;
+	/* BPF prog properties */
+	struct bpf_insn	insns[MAX_INSNS];
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_attach_type attach_type;
+	/* Socket properties */
+	int domain;
+	int type;
+	/* Endpoint to bind() to */
+	const char *ip;
+	unsigned short port;
+	/* Expected test result */
+	enum {
+		LOAD_REJECT,
+		ATTACH_REJECT,
+		BIND_REJECT,
+		SUCCESS,
+	} result;
+};
+
+static struct sock_test tests[] = {
+	{
+		"bind4 load with invalid access: src_ip6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip6[0])),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"bind4 load with invalid access: mark",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, mark)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"bind6 load with invalid access: src_ip4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip4)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"sock_create load with invalid access: src_port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET_SOCK_CREATE,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"sock_create load w/o expected_attach_type (compat mode)",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		0,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		8097,
+		SUCCESS,
+	},
+	{
+		"sock_create load w/ expected_attach_type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET_SOCK_CREATE,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		8097,
+		SUCCESS,
+	},
+	{
+		"attach type mismatch bind4 vs bind6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"attach type mismatch bind6 vs bind4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"attach type mismatch default vs bind4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		0,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"attach type mismatch bind6 vs sock_create",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"bind4 reject all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		"0.0.0.0",
+		0,
+		BIND_REJECT,
+	},
+	{
+		"bind6 reject all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		"::",
+		0,
+		BIND_REJECT,
+	},
+	{
+		"bind6 deny specific IP & port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+			/* if (ip == expected && port == expected) */
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip6[3])),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		"::1",
+		8193,
+		BIND_REJECT,
+	},
+	{
+		"bind4 allow specific IP & port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+			/* if (ip == expected && port == expected) */
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip4)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		4098,
+		SUCCESS,
+	},
+	{
+		"bind4 allow all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		"0.0.0.0",
+		0,
+		SUCCESS,
+	},
+	{
+		"bind6 allow all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		"::",
+		0,
+		SUCCESS,
+	},
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+	size_t len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static int load_sock_prog(const struct bpf_insn *prog,
+			  enum bpf_attach_type attach_type)
+{
+	struct bpf_load_program_attr attr;
+
+	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	attr.expected_attach_type = attach_type;
+	attr.insns = prog;
+	attr.insns_cnt = probe_prog_length(attr.insns);
+	attr.license = "GPL";
+
+	return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+static int attach_sock_prog(int cgfd, int progfd,
+			    enum bpf_attach_type attach_type)
+{
+	return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
+}
+
+static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr4;
+	int sockfd = -1;
+	socklen_t len;
+	int err = 0;
+
+	sockfd = socket(domain, type, 0);
+	if (sockfd < 0)
+		goto err;
+
+	memset(&addr, 0, sizeof(addr));
+
+	if (domain == AF_INET) {
+		len = sizeof(struct sockaddr_in);
+		addr4 = (struct sockaddr_in *)&addr;
+		addr4->sin_family = domain;
+		addr4->sin_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
+			goto err;
+	} else if (domain == AF_INET6) {
+		len = sizeof(struct sockaddr_in6);
+		addr6 = (struct sockaddr_in6 *)&addr;
+		addr6->sin6_family = domain;
+		addr6->sin6_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
+			goto err;
+	} else {
+		goto err;
+	}
+
+	if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(sockfd);
+	return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_test *test)
+{
+	int progfd = -1;
+	int err = 0;
+
+	printf("Test case: %s .. ", test->descr);
+	progfd = load_sock_prog(test->insns, test->expected_attach_type);
+	if (progfd < 0) {
+		if (test->result == LOAD_REJECT)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+		if (test->result == ATTACH_REJECT)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
+		/* sys_bind() may fail for different reasons, errno has to be
+		 * checked to confirm that BPF program rejected it.
+		 */
+		if (test->result == BIND_REJECT && errno == EPERM)
+			goto out;
+		else
+			goto err;
+	}
+
+
+	if (test->result != SUCCESS)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	/* Detaching w/o checking return code: best effort attempt. */
+	if (progfd != -1)
+		bpf_prog_detach(cgfd, test->attach_type);
+	close(progfd);
+	printf("[%s]\n", err ? "FAIL" : "PASS");
+	return err;
+}
+
+static int run_tests(int cgfd)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(cgfd, &tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_tests(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
+}

From a5af1fb94ffcad1aff3bc7f8a78dae7ee266f31c Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Fri, 30 Mar 2018 01:00:34 +0200
Subject: [PATCH 1560/1678] dt-bindings: net: meson-dwmac: add support for the
 Meson8m2 SoC

The Meson8m2 SoC uses a similar (potentially even identical) register
layout for the dwmac glue as Meson8b and GXBB. Unfortunately there is no
documentation available.
Testing shows that both, RMII and RGMII PHYs are working if they are
configured as on Meson8b. Add a new compatible string to the
documentation so differences (if there are any) between Meson8m2 and the
other SoCs can be taken care of within the driver.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/meson-dwmac.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/meson-dwmac.txt b/Documentation/devicetree/bindings/net/meson-dwmac.txt
index 354dd9896bb5..61cada22ae6c 100644
--- a/Documentation/devicetree/bindings/net/meson-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt
@@ -9,6 +9,7 @@ Required properties on all platforms:
 - compatible:	Depending on the platform this should be one of:
 			- "amlogic,meson6-dwmac"
 			- "amlogic,meson8b-dwmac"
+			- "amlogic,meson8m2-dwmac"
 			- "amlogic,meson-gxbb-dwmac"
 		Additionally "snps,dwmac" and any applicable more
 		detailed version number described in net/stmmac.txt
@@ -19,13 +20,13 @@ Required properties on all platforms:
 	configuration (for example the PRG_ETHERNET register range
 	on Meson8b and newer)
 
-Required properties on Meson8b and newer:
+Required properties on Meson8b, Meson8m2, GXBB and newer:
 - clock-names:	Should contain the following:
 		- "stmmaceth" - see stmmac.txt
 		- "clkin0" - first parent clock of the internal mux
 		- "clkin1" - second parent clock of the internal mux
 
-Optional properties on Meson8b and newer:
+Optional properties on Meson8b, Meson8m2, GXBB and newer:
 - amlogic,tx-delay-ns:	The internal RGMII TX clock delay (provided
 			by this driver) in nanoseconds. Allowed values
 			are: 0ns, 2ns, 4ns, 6ns.

From 7676693c682564c0f8ffe055afbcfcc9e35ff247 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Fri, 30 Mar 2018 01:00:35 +0200
Subject: [PATCH 1561/1678] net: stmmac: dwmac-meson8b: Add support for the
 Meson8m2 SoC

The Meson8m2 SoC uses a similar (potentially even identical) register
layout as the Meson8b and GXBB SoCs for the dwmac glue.
Add a new compatible string and update the module description to
indicate support for these SoCs.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 2d5d4aea3bcb..7cb794094a70 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -1,5 +1,5 @@
 /*
- * Amlogic Meson8b and GXBB DWMAC glue layer
+ * Amlogic Meson8b, Meson8m2 and GXBB DWMAC glue layer
  *
  * Copyright (C) 2016 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
  *
@@ -318,6 +318,7 @@ err_remove_config_dt:
 
 static const struct of_device_id meson8b_dwmac_match[] = {
 	{ .compatible = "amlogic,meson8b-dwmac" },
+	{ .compatible = "amlogic,meson8m2-dwmac" },
 	{ .compatible = "amlogic,meson-gxbb-dwmac" },
 	{ }
 };
@@ -335,5 +336,5 @@ static struct platform_driver meson8b_dwmac_driver = {
 module_platform_driver(meson8b_dwmac_driver);
 
 MODULE_AUTHOR("Martin Blumenstingl <martin.blumenstingl@googlemail.com>");
-MODULE_DESCRIPTION("Amlogic Meson8b and GXBB DWMAC glue layer");
+MODULE_DESCRIPTION("Amlogic Meson8b, Meson8m2 and GXBB DWMAC glue layer");
 MODULE_LICENSE("GPL v2");

From f8ad1f3f07e06895fe3fc18b5efd2d4b4f22b9d7 Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:47 -0700
Subject: [PATCH 1562/1678] net: thunderx: move filter register related macro
 into proper place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ThunderX NIC has set of registers which allows to configure
filter policy for ingress packets. There are three possible regimes
of filtering multicasts, broadcasts and unicasts: accept all, reject all
and accept filter allowed only.

Current implementation has enum with all of them and two generic macro
for enabling filtering et all (CAM_ACCEPT) and enabling/disabling
broadcast packets, which also should be corrected in order to represent
register bits properly. All these values are private for driver and
there is no need to ‘publish’ them via header file.

This commit is to move filtering register manipulation values from
header file into source with explicit assignment of exact register
values to them to be used while register configuring.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 13 +++++++++++++
 drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 11 -----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 91d34ea40e2c..0dd211605eb1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -24,6 +24,19 @@
 #define DRV_NAME	"thunder_bgx"
 #define DRV_VERSION	"1.0"
 
+/* RX_DMAC_CTL configuration */
+enum MCAST_MODE {
+		MCAST_MODE_REJECT = 0x0,
+		MCAST_MODE_ACCEPT = 0x1,
+		MCAST_MODE_CAM_FILTER = 0x2,
+		RSVD = 0x3
+};
+
+#define BCAST_ACCEPT      BIT(0)
+#define CAM_ACCEPT        BIT(3)
+#define MCAST_MODE_MASK   0x3
+#define BGX_MCAST_MODE(x) (x << 1)
+
 struct lmac {
 	struct bgx		*bgx;
 	int			dmac;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 5a7567d31138..52439da62c97 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -205,17 +205,6 @@
 #define LMAC_INTR_LINK_UP	BIT(0)
 #define LMAC_INTR_LINK_DOWN	BIT(1)
 
-/*  RX_DMAC_CTL configuration*/
-enum MCAST_MODE {
-		MCAST_MODE_REJECT,
-		MCAST_MODE_ACCEPT,
-		MCAST_MODE_CAM_FILTER,
-		RSVD
-};
-
-#define BCAST_ACCEPT	1
-#define CAM_ACCEPT	1
-
 void octeon_mdiobus_force_mod_depencency(void);
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
 void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);

From 3a34ecfd9d3f3e0c1584a80349cc7c4312ff5e91 Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:48 -0700
Subject: [PATCH 1563/1678] net: thunderx: add MAC address filter tracking for
 LMAC

The ThunderX NIC has two Ethernet Interfaces (BGX) each of them could has
up to four Logical MACs configured. Each of BGX has 32 filters to be
configured for filtering ingress packets. The number of filters available
to particular LMAC is from 8 (if we have four LMACs configured per BGX)
up to 32 (in case of only one LMAC is configured per BGX).

At the same time the NIC could present up to 128 VFs to OS as network
interfaces, each of them kernel will configure with set of MAC addresses
for filtering. So to prevent dupes in BGX filter registers from different
network interfaces it is required to cache and track all filter
configuration requests prior to applying them onto BGX filter registers.

This commit is to update LMAC structures with control fields to
allocate/releasing filters tracking list along with implementing
dmac array allocate/release per LMAC.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/thunder_bgx.c | 44 +++++++++++++------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 0dd211605eb1..de90e6aa5a4f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -37,9 +37,18 @@ enum MCAST_MODE {
 #define MCAST_MODE_MASK   0x3
 #define BGX_MCAST_MODE(x) (x << 1)
 
+struct dmac_map {
+	u64                     vf_map;
+	u64                     dmac;
+};
+
 struct lmac {
 	struct bgx		*bgx;
-	int			dmac;
+	/* actual number of DMACs configured */
+	u8			dmacs_cfg;
+	/* overal number of possible DMACs could be configured per LMAC */
+	u8                      dmacs_count;
+	struct dmac_map         *dmacs; /* DMAC:VFs tracking filter array */
 	u8			mac[ETH_ALEN];
 	u8                      lmac_type;
 	u8                      lane_to_sds;
@@ -236,6 +245,19 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 }
 EXPORT_SYMBOL(bgx_set_lmac_mac);
 
+static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
+{
+	struct lmac *lmac = NULL;
+	u8  idx = 0;
+
+	lmac = &bgx->lmac[lmacid];
+	/* reset CAM filters */
+	for (idx = 0; idx < lmac->dmacs_count; idx++)
+		bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM +
+			      ((lmacid * lmac->dmacs_count) + idx) *
+			      sizeof(u64), 0);
+}
+
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
 	struct bgx *bgx = get_bgx(node, bgx_idx);
@@ -481,18 +503,6 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
 }
 EXPORT_SYMBOL(bgx_get_tx_stats);
 
-static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac)
-{
-	u64 offset;
-
-	while (bgx->lmac[lmac].dmac > 0) {
-		offset = ((bgx->lmac[lmac].dmac - 1) * sizeof(u64)) +
-			(lmac * MAX_DMAC_PER_LMAC * sizeof(u64));
-		bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, 0);
-		bgx->lmac[lmac].dmac--;
-	}
-}
-
 /* Configure BGX LMAC in internal loopback mode */
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
 				int lmac_idx, bool enable)
@@ -925,6 +935,11 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 		bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4);
 	}
 
+	/* actual number of filters available to exact LMAC */
+	lmac->dmacs_count = (RX_DMAC_COUNT / bgx->lmac_count);
+	lmac->dmacs = kcalloc(lmac->dmacs_count, sizeof(*lmac->dmacs),
+			      GFP_KERNEL);
+
 	/* Enable lmac */
 	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
 
@@ -1011,7 +1026,8 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 	cfg &= ~CMR_EN;
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
 
-	bgx_flush_dmac_addrs(bgx, lmacid);
+	bgx_flush_dmac_cam_filter(bgx, lmacid);
+	kfree(lmac->dmacs);
 
 	if ((lmac->lmac_type != BGX_MODE_XFI) &&
 	    (lmac->lmac_type != BGX_MODE_XLAUI) &&

From ceb9ea21cc124f8653e4bb1b06fb6e051bd408fe Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:49 -0700
Subject: [PATCH 1564/1678] net: thunderx: add multicast filter management
 support

The ThunderX NIC could be partitioned to up to 128 VFs and thus
represented to system. Each VF is mapped to pair BGX:LMAC, and each of VF
is configured by kernel individually. Eventually the bunch of VFs could be
mapped onto same pair BGX:LMAC and thus could cause several multicast
filtering configuration requests to LMAC with the same MAC addresses.

This commit is to add ThunderX NIC BGX filtering manipulation routines.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/thunder_bgx.c | 144 ++++++++++++++++++
 .../net/ethernet/cavium/thunder/thunder_bgx.h |  10 +-
 2 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index de90e6aa5a4f..5d08d2aeb172 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -258,6 +258,150 @@ static void bgx_flush_dmac_cam_filter(struct bgx *bgx, int lmacid)
 			      sizeof(u64), 0);
 }
 
+static void bgx_lmac_remove_filters(struct lmac *lmac, u8 vf_id)
+{
+	int i = 0;
+
+	if (!lmac)
+		return;
+
+	/* We've got reset filters request from some of attached VF, while the
+	 * others might want to keep their configuration. So in this case lets
+	 * iterate over all of configured filters and decrease number of
+	 * referencies. if some addresses get zero refs remove them from list
+	 */
+	for (i = lmac->dmacs_cfg - 1; i >= 0; i--) {
+		lmac->dmacs[i].vf_map &= ~BIT_ULL(vf_id);
+		if (!lmac->dmacs[i].vf_map) {
+			lmac->dmacs_cfg--;
+			lmac->dmacs[i].dmac = 0;
+			lmac->dmacs[i].vf_map = 0;
+		}
+	}
+}
+
+static int bgx_lmac_save_filter(struct lmac *lmac, u64 dmac, u8 vf_id)
+{
+	u8 i = 0;
+
+	if (!lmac)
+		return -1;
+
+	/* At the same time we could have several VFs 'attached' to some
+	 * particular LMAC, and each VF is represented as network interface
+	 * for kernel. So from user perspective it should be possible to
+	 * manipulate with its' (VF) receive modes. However from PF
+	 * driver perspective we need to keep track of filter configurations
+	 * for different VFs to prevent filter values dupes
+	 */
+	for (i = 0; i < lmac->dmacs_cfg; i++) {
+		if (lmac->dmacs[i].dmac == dmac) {
+			lmac->dmacs[i].vf_map |= BIT_ULL(vf_id);
+			return -1;
+		}
+	}
+
+	if (!(lmac->dmacs_cfg < lmac->dmacs_count))
+		return -1;
+
+	/* keep it for further tracking */
+	lmac->dmacs[lmac->dmacs_cfg].dmac = dmac;
+	lmac->dmacs[lmac->dmacs_cfg].vf_map = BIT_ULL(vf_id);
+	lmac->dmacs_cfg++;
+	return 0;
+}
+
+static int bgx_set_dmac_cam_filter_mac(struct bgx *bgx, int lmacid,
+				       u64 cam_dmac, u8 idx)
+{
+	struct lmac *lmac = NULL;
+	u64 cfg = 0;
+
+	/* skip zero addresses as meaningless */
+	if (!cam_dmac || !bgx)
+		return -1;
+
+	lmac = &bgx->lmac[lmacid];
+
+	/* configure DCAM filtering for designated LMAC */
+	cfg = RX_DMACX_CAM_LMACID(lmacid & LMAC_ID_MASK) |
+		RX_DMACX_CAM_EN | cam_dmac;
+	bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM +
+		      ((lmacid * lmac->dmacs_count) + idx) * sizeof(u64), cfg);
+	return 0;
+}
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid,
+			     u64 cam_dmac, u8 vf_id)
+{
+	struct bgx *bgx = get_bgx(node, bgx_idx);
+	struct lmac *lmac = NULL;
+
+	if (!bgx)
+		return;
+
+	lmac = &bgx->lmac[lmacid];
+
+	if (!cam_dmac)
+		cam_dmac = ether_addr_to_u64(lmac->mac);
+
+	/* since we might have several VFs attached to particular LMAC
+	 * and kernel could call mcast config for each of them with the
+	 * same MAC, check if requested MAC is already in filtering list and
+	 * updare/prepare list of MACs to be applied later to HW filters
+	 */
+	bgx_lmac_save_filter(lmac, cam_dmac, vf_id);
+}
+EXPORT_SYMBOL(bgx_set_dmac_cam_filter);
+
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode)
+{
+	struct bgx *bgx = get_bgx(node, bgx_idx);
+	struct lmac *lmac = NULL;
+	u64 cfg = 0;
+	u8 i = 0;
+
+	if (!bgx)
+		return;
+
+	lmac = &bgx->lmac[lmacid];
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL);
+	if (mode & BGX_XCAST_BCAST_ACCEPT)
+		cfg |= BCAST_ACCEPT;
+	else
+		cfg &= ~BCAST_ACCEPT;
+
+	/* disable all MCASTs and DMAC filtering */
+	cfg &= ~(CAM_ACCEPT | BGX_MCAST_MODE(MCAST_MODE_MASK));
+
+	/* check requested bits and set filtergin mode appropriately */
+	if (mode & (BGX_XCAST_MCAST_ACCEPT)) {
+		cfg |= (BGX_MCAST_MODE(MCAST_MODE_ACCEPT));
+	} else if (mode & BGX_XCAST_MCAST_FILTER) {
+		cfg |= (BGX_MCAST_MODE(MCAST_MODE_CAM_FILTER) | CAM_ACCEPT);
+		for (i = 0; i < lmac->dmacs_cfg; i++)
+			bgx_set_dmac_cam_filter_mac(bgx, lmacid,
+						    lmac->dmacs[i].dmac, i);
+	}
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_set_xcast_mode);
+
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf_id)
+{
+	struct bgx *bgx = get_bgx(node, bgx_idx);
+
+	if (!bgx)
+		return;
+
+	bgx_lmac_remove_filters(&bgx->lmac[lmacid], vf_id);
+	bgx_flush_dmac_cam_filter(bgx, lmacid);
+	bgx_set_xcast_mode(node, bgx_idx, lmacid,
+			   (BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT));
+}
+EXPORT_SYMBOL(bgx_reset_xcast_mode);
+
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
 	struct bgx *bgx = get_bgx(node, bgx_idx);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 52439da62c97..cbdd20b9ee6f 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -30,6 +30,7 @@
 #define    DEFAULT_PAUSE_TIME			0xFFFF
 
 #define	   BGX_ID_MASK				0x3
+#define	   LMAC_ID_MASK				0x3
 
 #define    MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE	2
 
@@ -57,7 +58,7 @@
 #define BGX_CMRX_RX_FIFO_LEN		0x108
 #define BGX_CMR_RX_DMACX_CAM		0x200
 #define  RX_DMACX_CAM_EN			BIT_ULL(48)
-#define  RX_DMACX_CAM_LMACID(x)			(x << 49)
+#define  RX_DMACX_CAM_LMACID(x)			(((u64)x) << 49)
 #define  RX_DMAC_COUNT				32
 #define BGX_CMR_RX_STREERING		0x300
 #define  RX_TRAFFIC_STEER_RULE_COUNT		8
@@ -205,6 +206,13 @@
 #define LMAC_INTR_LINK_UP	BIT(0)
 #define LMAC_INTR_LINK_DOWN	BIT(1)
 
+#define BGX_XCAST_BCAST_ACCEPT  BIT(0)
+#define BGX_XCAST_MCAST_ACCEPT  BIT(1)
+#define BGX_XCAST_MCAST_FILTER  BIT(2)
+
+void bgx_set_dmac_cam_filter(int node, int bgx_idx, int lmacid, u64 mac, u8 vf);
+void bgx_reset_xcast_mode(int node, int bgx_idx, int lmacid, u8 vf);
+void bgx_set_xcast_mode(int node, int bgx_idx, int lmacid, u8 mode);
 void octeon_mdiobus_force_mod_depencency(void);
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
 void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);

From 0b849f58f27b974f204477a71c2239986770a22c Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:50 -0700
Subject: [PATCH 1565/1678] net: thunderx: add new messages for handle
 ndo_set_rx_mode callback

The kernel calls ndo_set_rx_mode() callback supplying it will all necessary
info, such as device state flags, multicast mac addresses list and so on.
Since we have only 128 bits to communicate with PF we need to initiate
several requests to PF with small/short operation each based on input data.

So this commit implements following PF messages codes along with new
data structures for them:
NIC_MBOX_MSG_RESET_XCAST to flush all filters configured for this
                          particular network interface (VF)
NIC_MBOX_MSG_ADD_MCAST   to add new MAC address to DMAC filter registers
                          for this particular network interface (VF)
NIC_MBOX_MSG_SET_XCAST   to apply filtering configuration to filter control
                          register

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 4cacce5d2b16..069289b4f968 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -403,6 +403,9 @@ struct nicvf {
 #define	NIC_MBOX_MSG_PTP_CFG		0x19	/* HW packet timestamp */
 #define	NIC_MBOX_MSG_CFG_DONE		0xF0	/* VF configuration done */
 #define	NIC_MBOX_MSG_SHUTDOWN		0xF1	/* VF is being shutdown */
+#define	NIC_MBOX_MSG_RESET_XCAST	0xF2    /* Reset DCAM filtering mode */
+#define	NIC_MBOX_MSG_ADD_MCAST		0xF3    /* Add MAC to DCAM filters */
+#define	NIC_MBOX_MSG_SET_XCAST		0xF4    /* Set MCAST/BCAST RX mode */
 
 struct nic_cfg_msg {
 	u8    msg;
@@ -556,6 +559,14 @@ struct set_ptp {
 	bool  enable;
 };
 
+struct xcast {
+	u8    msg;
+	union {
+		u8    mode;
+		u64   mac;
+	} data;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
 	struct { u8 msg; }	msg;
@@ -576,6 +587,7 @@ union nic_mbx {
 	struct reset_stat_cfg	reset_stat;
 	struct pfc		pfc;
 	struct set_ptp		ptp;
+	struct xcast            xcast;
 };
 
 #define NIC_NODE_ID_MASK	0x03

From aba4a2633b020064677df77e92238b1a753942fe Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:51 -0700
Subject: [PATCH 1566/1678] net: thunderx: add XCAST messages handlers for PF

This commit is to add message handling for ndo_set_rx_mode()
callback at PF side.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/nic_main.c    | 45 +++++++++++++++++--
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 7ff66a8194e2..55af04fa03a7 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -21,6 +21,8 @@
 #define DRV_NAME	"nicpf"
 #define DRV_VERSION	"1.0"
 
+#define NIC_VF_PER_MBX_REG      64
+
 struct hw_info {
 	u8		bgx_cnt;
 	u8		chans_per_lmac;
@@ -1072,6 +1074,40 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 	case NIC_MBOX_MSG_PTP_CFG:
 		nic_config_timestamp(nic, vf, &mbx.ptp);
 		break;
+	case NIC_MBOX_MSG_RESET_XCAST:
+		if (vf >= nic->num_vf_en) {
+			ret = -1; /* NACK */
+			break;
+		}
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		bgx_reset_xcast_mode(nic->node, bgx, lmac,
+				     vf < NIC_VF_PER_MBX_REG ? vf :
+				     vf - NIC_VF_PER_MBX_REG);
+		break;
+
+	case NIC_MBOX_MSG_ADD_MCAST:
+		if (vf >= nic->num_vf_en) {
+			ret = -1; /* NACK */
+			break;
+		}
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
+					mbx.xcast.data.mac,
+					vf < NIC_VF_PER_MBX_REG ? vf :
+					vf - NIC_VF_PER_MBX_REG);
+		break;
+
+	case NIC_MBOX_MSG_SET_XCAST:
+		if (vf >= nic->num_vf_en) {
+			ret = -1; /* NACK */
+			break;
+		}
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+		break;
 	default:
 		dev_err(&nic->pdev->dev,
 			"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1094,7 +1130,7 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
 	struct nicpf *nic = (struct nicpf *)nic_irq;
 	int mbx;
 	u64 intr;
-	u8  vf, vf_per_mbx_reg = 64;
+	u8  vf;
 
 	if (irq == pci_irq_vector(nic->pdev, NIC_PF_INTR_ID_MBOX0))
 		mbx = 0;
@@ -1103,12 +1139,13 @@ static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
 
 	intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
 	dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
-	for (vf = 0; vf < vf_per_mbx_reg; vf++) {
+	for (vf = 0; vf < NIC_VF_PER_MBX_REG; vf++) {
 		if (intr & (1ULL << vf)) {
 			dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
-				vf + (mbx * vf_per_mbx_reg));
+				vf + (mbx * NIC_VF_PER_MBX_REG));
 
-			nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg));
+			nic_handle_mbx_intr(nic, vf +
+					    (mbx * NIC_VF_PER_MBX_REG));
 			nic_clear_mbx_intr(nic, vf, mbx);
 		}
 	}

From 1b6d55f23948df5ea2b9f59b766bed91fb609b99 Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:52 -0700
Subject: [PATCH 1567/1678] net: thunderx: add workqueue control structures for
 handle ndo_set_rx_mode request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kernel calls ndo_set_rx_mode() callback from atomic context which
causes messaging timeouts between VF and PF (as they’re implemented via
MSIx). So in order to handle ndo_set_rx_mode() we need to get rid of it.

This commit implements necessary workqueue related structures to let VF
queue kernel request processing in non-atomic context later.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 069289b4f968..5fc46c5a4f36 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -265,6 +265,22 @@ struct nicvf_drv_stats {
 
 struct cavium_ptp;
 
+struct xcast_addr {
+	struct list_head list;
+	u64              addr;
+};
+
+struct xcast_addr_list {
+	struct list_head list;
+	int              count;
+};
+
+struct nicvf_work {
+	struct delayed_work    work;
+	u8                     mode;
+	struct xcast_addr_list *mc;
+};
+
 struct nicvf {
 	struct nicvf		*pnicvf;
 	struct net_device	*netdev;
@@ -313,6 +329,7 @@ struct nicvf {
 	struct nicvf_pfc	pfc;
 	struct tasklet_struct	qs_err_task;
 	struct work_struct	reset_task;
+	struct nicvf_work       rx_mode_work;
 
 	/* PTP timestamp */
 	struct cavium_ptp	*ptp_clock;

From 37c3347eb247a79a3371d589175f646ea3a488de Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Date: Fri, 30 Mar 2018 04:59:53 -0700
Subject: [PATCH 1568/1678] net: thunderx: add ndo_set_rx_mode callback
 implementation for VF

The ndo_set_rx_mode() is called from atomic context which causes
messages response timeouts while VF to PF communication via MSIx.
To get rid of that we're copy passed mc list, parse flags and queue
handling of kernel request to ordered workqueue.

Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/nicvf_main.c  | 110 +++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 73fe3881414b..1e9a31fef729 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -21,6 +21,7 @@
 #include <linux/bpf_trace.h>
 #include <linux/filter.h>
 #include <linux/net_tstamp.h>
+#include <linux/workqueue.h>
 
 #include "nic_reg.h"
 #include "nic.h"
@@ -67,6 +68,9 @@ module_param(cpi_alg, int, 0444);
 MODULE_PARM_DESC(cpi_alg,
 		 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
 
+/* workqueue for handling kernel ndo_set_rx_mode() calls */
+static struct workqueue_struct *nicvf_rx_mode_wq;
+
 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
 {
 	if (nic->sqs_mode)
@@ -1919,6 +1923,100 @@ static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
 	}
 }
 
+static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
+{
+	struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
+						  work.work);
+	struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
+	union nic_mbx mbx = {};
+	struct xcast_addr *xaddr, *next;
+
+	if (!vf_work)
+		return;
+
+	/* From the inside of VM code flow we have only 128 bits memory
+	 * available to send message to host's PF, so send all mc addrs
+	 * one by one, starting from flush command in case if kernel
+	 * requests to configure specific MAC filtering
+	 */
+
+	/* flush DMAC filters and reset RX mode */
+	mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
+	nicvf_send_msg_to_pf(nic, &mbx);
+
+	if (vf_work->mode & BGX_XCAST_MCAST_FILTER) {
+		/* once enabling filtering, we need to signal to PF to add
+		 * its' own LMAC to the filter to accept packets for it.
+		 */
+		mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+		mbx.xcast.data.mac = 0;
+		nicvf_send_msg_to_pf(nic, &mbx);
+	}
+
+	/* check if we have any specific MACs to be added to PF DMAC filter */
+	if (vf_work->mc) {
+		/* now go through kernel list of MACs and add them one by one */
+		list_for_each_entry_safe(xaddr, next,
+					 &vf_work->mc->list, list) {
+			mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
+			mbx.xcast.data.mac = xaddr->addr;
+			nicvf_send_msg_to_pf(nic, &mbx);
+
+			/* after receiving ACK from PF release memory */
+			list_del(&xaddr->list);
+			kfree(xaddr);
+			vf_work->mc->count--;
+		}
+		kfree(vf_work->mc);
+	}
+
+	/* and finally set rx mode for PF accordingly */
+	mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
+	mbx.xcast.data.mode = vf_work->mode;
+
+	nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_set_rx_mode(struct net_device *netdev)
+{
+	struct nicvf *nic = netdev_priv(netdev);
+	struct netdev_hw_addr *ha;
+	struct xcast_addr_list *mc_list = NULL;
+	u8 mode = 0;
+
+	if (netdev->flags & IFF_PROMISC) {
+		mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT;
+	} else {
+		if (netdev->flags & IFF_BROADCAST)
+			mode |= BGX_XCAST_BCAST_ACCEPT;
+
+		if (netdev->flags & IFF_ALLMULTI) {
+			mode |= BGX_XCAST_MCAST_ACCEPT;
+		} else if (netdev->flags & IFF_MULTICAST) {
+			mode |= BGX_XCAST_MCAST_FILTER;
+			/* here we need to copy mc addrs */
+			if (netdev_mc_count(netdev)) {
+				struct xcast_addr *xaddr;
+
+				mc_list = kmalloc(sizeof(*mc_list), GFP_ATOMIC);
+				INIT_LIST_HEAD(&mc_list->list);
+				netdev_hw_addr_list_for_each(ha, &netdev->mc) {
+					xaddr = kmalloc(sizeof(*xaddr),
+							GFP_ATOMIC);
+					xaddr->addr =
+						ether_addr_to_u64(ha->addr);
+					list_add_tail(&xaddr->list,
+						      &mc_list->list);
+					mc_list->count++;
+				}
+			}
+		}
+	}
+	nic->rx_mode_work.mc = mc_list;
+	nic->rx_mode_work.mode = mode;
+	queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 2 * HZ);
+}
+
 static const struct net_device_ops nicvf_netdev_ops = {
 	.ndo_open		= nicvf_open,
 	.ndo_stop		= nicvf_stop,
@@ -1931,6 +2029,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
 	.ndo_set_features       = nicvf_set_features,
 	.ndo_bpf		= nicvf_xdp,
 	.ndo_do_ioctl           = nicvf_ioctl,
+	.ndo_set_rx_mode        = nicvf_set_rx_mode,
 };
 
 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -2071,6 +2170,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	INIT_WORK(&nic->reset_task, nicvf_reset_task);
 
+	INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(dev, "Failed to register netdevice\n");
@@ -2109,6 +2210,8 @@ static void nicvf_remove(struct pci_dev *pdev)
 	nic = netdev_priv(netdev);
 	pnetdev = nic->pnicvf->netdev;
 
+	cancel_delayed_work_sync(&nic->rx_mode_work.work);
+
 	/* Check if this Qset is assigned to different VF.
 	 * If yes, clean primary and all secondary Qsets.
 	 */
@@ -2140,12 +2243,17 @@ static struct pci_driver nicvf_driver = {
 static int __init nicvf_init_module(void)
 {
 	pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
-
+	nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
+						   WQ_MEM_RECLAIM);
 	return pci_register_driver(&nicvf_driver);
 }
 
 static void __exit nicvf_cleanup_module(void)
 {
+	if (nicvf_rx_mode_wq) {
+		destroy_workqueue(nicvf_rx_mode_wq);
+		nicvf_rx_mode_wq = NULL;
+	}
 	pci_unregister_driver(&nicvf_driver);
 }
 

From f40aa23339e2d06b1a8daaece5a511bb1c4f704c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 30 Mar 2018 13:46:18 +0300
Subject: [PATCH 1569/1678] net: bridge: set min MTU on port events and allow
 user to set max

Recently the bridge was changed to automatically set maximum MTU on port
events (add/del/changemtu) when vlan filtering is enabled, but that
actually changes behaviour in a way which breaks some setups and can lead
to packet drops. In order to still allow that maximum to be set while being
compatible, we add the ability for the user to tune the bridge MTU up to
the maximum when vlan filtering is enabled, but that has to be done
explicitly and all port events (add/del/changemtu) lead to resetting that
MTU to the minimum as before.

Suggested-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c         |  2 +-
 net/bridge/br_device.c  |  3 ++-
 net/bridge/br_if.c      | 43 ++++++++++++++---------------------------
 net/bridge/br_private.h |  2 +-
 4 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 26e1616b2c90..565ff055813b 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
 	switch (event) {
 	case NETDEV_CHANGEMTU:
-		dev_set_mtu(br->dev, br_mtu(br));
+		dev_set_mtu(br->dev, br_mtu(br, false));
 		break;
 
 	case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 278fc999d355..edb9967eb165 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -224,7 +224,8 @@ static void br_get_stats64(struct net_device *dev,
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	if (new_mtu > br_mtu(br))
+
+	if (new_mtu > br_mtu(br, br_vlan_enabled(dev)))
 		return -EINVAL;
 
 	dev->mtu = new_mtu;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 87b2afd455c7..7d5dc5a91084 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -424,41 +424,26 @@ int br_del_bridge(struct net *net, const char *name)
 	return ret;
 }
 
-static bool min_mtu(int a, int b)
-{
-	return a < b ? 1 : 0;
-}
-
-static bool max_mtu(int a, int b)
-{
-	return a > b ? 1 : 0;
-}
-
-/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
-static int __br_mtu(const struct net_bridge *br, bool (compare_fn)(int, int))
+/* MTU of the bridge pseudo-device: ETH_DATA_LEN if there are no ports, the
+ * minimum of the ports if @max is false or the maximum if it's true
+ */
+int br_mtu(const struct net_bridge *br, bool max)
 {
 	const struct net_bridge_port *p;
-	int mtu = 0;
+	int ret_mtu = 0;
 
 	ASSERT_RTNL();
 
-	if (list_empty(&br->port_list))
-		mtu = ETH_DATA_LEN;
-	else {
-		list_for_each_entry(p, &br->port_list, list) {
-			if (!mtu || compare_fn(p->dev->mtu, mtu))
-				mtu = p->dev->mtu;
+	list_for_each_entry(p, &br->port_list, list) {
+		if (!max) {
+			if (!ret_mtu || ret_mtu > p->dev->mtu)
+				ret_mtu = p->dev->mtu;
+		} else if (p->dev->mtu > ret_mtu) {
+			ret_mtu = p->dev->mtu;
 		}
 	}
-	return mtu;
-}
 
-int br_mtu(const struct net_bridge *br)
-{
-	if (br_vlan_enabled(br->dev))
-		return __br_mtu(br, max_mtu);
-	else
-		return __br_mtu(br, min_mtu);
+	return ret_mtu ? ret_mtu : ETH_DATA_LEN;
 }
 
 static void br_set_gso_limits(struct net_bridge *br)
@@ -612,7 +597,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	if (changed_addr)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
-	dev_set_mtu(br->dev, br_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br, false));
 	br_set_gso_limits(br);
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -659,7 +644,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
-	dev_set_mtu(br->dev, br_mtu(br));
+	dev_set_mtu(br->dev, br_mtu(br, false));
 	br_set_gso_limits(br);
 
 	spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 048d5b51813b..586f84b9670d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -578,7 +578,7 @@ int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
 	      struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_mtu(const struct net_bridge *br);
+int br_mtu(const struct net_bridge *br, bool max);
 netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);

From 804b854d374e39f5f8bff9638fd274b9a9ca7d33 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 30 Mar 2018 13:46:19 +0300
Subject: [PATCH 1570/1678] net: bridge: disable bridge MTU auto tuning if it
 was set manually

As Roopa noted today the biggest source of problems when configuring
bridge and ports is that the bridge MTU keeps changing automatically on
port events (add/del/changemtu). That leads to inconsistent behaviour
and network config software needs to chase the MTU and fix it on each
such event. Let's improve on that situation and allow for the user to
set any MTU within ETH_MIN/MAX limits, but once manually configured it
is the user's responsibility to keep it correct afterwards.

In case the MTU isn't manually set - the behaviour reverts to the
previous and the bridge follows the minimum MTU.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c         |  2 +-
 net/bridge/br_device.c  |  5 ++---
 net/bridge/br_if.c      | 36 +++++++++++++++++++++---------------
 net/bridge/br_private.h |  3 ++-
 4 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 565ff055813b..671d13c10f6f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -52,7 +52,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 
 	switch (event) {
 	case NETDEV_CHANGEMTU:
-		dev_set_mtu(br->dev, br_mtu(br, false));
+		br_mtu_auto_adjust(br);
 		break;
 
 	case NETDEV_CHANGEADDR:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index edb9967eb165..e682a668ce57 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -225,11 +225,10 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	if (new_mtu > br_mtu(br, br_vlan_enabled(dev)))
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 
+	/* this flag will be cleared if the MTU was automatically adjusted */
+	br->mtu_set_by_user = true;
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* remember the MTU in the rtable for PMTU */
 	dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 7d5dc5a91084..82c1a6f430b3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -424,28 +424,34 @@ int br_del_bridge(struct net *net, const char *name)
 	return ret;
 }
 
-/* MTU of the bridge pseudo-device: ETH_DATA_LEN if there are no ports, the
- * minimum of the ports if @max is false or the maximum if it's true
- */
-int br_mtu(const struct net_bridge *br, bool max)
+/* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
+static int br_mtu_min(const struct net_bridge *br)
 {
 	const struct net_bridge_port *p;
 	int ret_mtu = 0;
 
-	ASSERT_RTNL();
-
-	list_for_each_entry(p, &br->port_list, list) {
-		if (!max) {
-			if (!ret_mtu || ret_mtu > p->dev->mtu)
-				ret_mtu = p->dev->mtu;
-		} else if (p->dev->mtu > ret_mtu) {
+	list_for_each_entry(p, &br->port_list, list)
+		if (!ret_mtu || ret_mtu > p->dev->mtu)
 			ret_mtu = p->dev->mtu;
-		}
-	}
 
 	return ret_mtu ? ret_mtu : ETH_DATA_LEN;
 }
 
+void br_mtu_auto_adjust(struct net_bridge *br)
+{
+	ASSERT_RTNL();
+
+	/* if the bridge MTU was manually configured don't mess with it */
+	if (br->mtu_set_by_user)
+		return;
+
+	/* change to the minimum MTU and clear the flag which was set by
+	 * the bridge ndo_change_mtu callback
+	 */
+	dev_set_mtu(br->dev, br_mtu_min(br));
+	br->mtu_set_by_user = false;
+}
+
 static void br_set_gso_limits(struct net_bridge *br)
 {
 	unsigned int gso_max_size = GSO_MAX_SIZE;
@@ -597,7 +603,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	if (changed_addr)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
 
-	dev_set_mtu(br->dev, br_mtu(br, false));
+	br_mtu_auto_adjust(br);
 	br_set_gso_limits(br);
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
@@ -644,7 +650,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
-	dev_set_mtu(br->dev, br_mtu(br, false));
+	br_mtu_auto_adjust(br);
 	br_set_gso_limits(br);
 
 	spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 586f84b9670d..a7cb3ece5031 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -410,6 +410,7 @@ struct net_bridge {
 	int offload_fwd_mark;
 #endif
 	bool				neigh_suppress_enabled;
+	bool				mtu_set_by_user;
 	struct hlist_head		fdb_list;
 };
 
@@ -578,7 +579,7 @@ int br_del_bridge(struct net *net, const char *name);
 int br_add_if(struct net_bridge *br, struct net_device *dev,
 	      struct netlink_ext_ack *extack);
 int br_del_if(struct net_bridge *br, struct net_device *dev);
-int br_mtu(const struct net_bridge *br, bool max);
+void br_mtu_auto_adjust(struct net_bridge *br);
 netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);

From 218527fe27adaebeb81eb770459eb335517e90ee Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:41 +0200
Subject: [PATCH 1571/1678] tipc: replace name table service range array with
 rb tree

The current design of the binding table has an unnecessary memory
consuming and complex data structure. It aggregates the service range
items into an array, which is expanded by a factor two every time it
becomes too small to hold a new item. Furthermore, the arrays never
shrink when the number of ranges diminishes.

We now replace this array with an RB tree that is holding the range
items as tree nodes, each range directly holding a list of bindings.

This, along with a few name changes, improves both readability and
volume of the code, as well as reducing memory consumption and hopefully
improving cache hit rate.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h       |    1 +
 net/tipc/link.c       |    2 +-
 net/tipc/name_table.c | 1016 +++++++++++++++++++----------------------
 net/tipc/name_table.h |    2 +-
 net/tipc/node.c       |    4 +-
 net/tipc/subscr.h     |    4 +-
 6 files changed, 469 insertions(+), 560 deletions(-)

diff --git a/net/tipc/core.h b/net/tipc/core.h
index d0f64ca62d02..8020a6c360ff 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -58,6 +58,7 @@
 #include <linux/etherdevice.h>
 #include <net/netns/generic.h>
 #include <linux/rhashtable.h>
+#include <net/genetlink.h>
 
 struct tipc_node;
 struct tipc_bearer;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1289b4ba404f..8f2a9496439b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1810,7 +1810,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
 {
-	int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
+	int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE);
 
 	l->window = win;
 	l->backlog[TIPC_LOW_IMPORTANCE].limit      = max_t(u16, 50, win);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 4359605b1bec..e06c7a8907aa 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -44,52 +44,40 @@
 #include "addr.h"
 #include "node.h"
 #include "group.h"
-#include <net/genetlink.h>
-
-#define TIPC_NAMETBL_SIZE 1024		/* must be a power of 2 */
 
 /**
- * struct name_info - name sequence publication info
- * @node_list: list of publications on own node of this <type,lower,upper>
- * @all_publ: list of all publications of this <type,lower,upper>
+ * struct service_range - container for all bindings of a service range
+ * @lower: service range lower bound
+ * @upper: service range upper bound
+ * @tree_node: member of service range RB tree
+ * @local_publ: list of identical publications made from this node
+ *   Used by closest_first lookup and multicast lookup algorithm
+ * @all_publ: all publications identical to this one, whatever node and scope
+ *   Used by round-robin lookup algorithm
  */
-struct name_info {
+struct service_range {
+	u32 lower;
+	u32 upper;
+	struct rb_node tree_node;
 	struct list_head local_publ;
 	struct list_head all_publ;
 };
 
 /**
- * struct sub_seq - container for all published instances of a name sequence
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @info: pointer to name sequence publication info
- */
-struct sub_seq {
-	u32 lower;
-	u32 upper;
-	struct name_info *info;
-};
-
-/**
- * struct name_seq - container for all published instances of a name type
- * @type: 32 bit 'type' value for name sequence
- * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type';
- *        sub-sequences are sorted in ascending order
- * @alloc: number of sub-sequences currently in array
- * @first_free: array index of first unused sub-sequence entry
- * @ns_list: links to adjacent name sequences in hash chain
- * @subscriptions: list of subscriptions for this 'type'
- * @lock: spinlock controlling access to publication lists of all sub-sequences
+ * struct tipc_service - container for all published instances of a service type
+ * @type: 32 bit 'type' value for service
+ * @ranges: rb tree containing all service ranges for this service
+ * @service_list: links to adjacent name ranges in hash chain
+ * @subscriptions: list of subscriptions for this service type
+ * @lock: spinlock controlling access to pertaining service ranges/publications
  * @rcu: RCU callback head used for deferred freeing
  */
-struct name_seq {
+struct tipc_service {
 	u32 type;
-	struct sub_seq *sseqs;
-	u32 alloc;
-	u32 first_free;
-	struct hlist_node ns_list;
+	struct rb_root ranges;
+	struct hlist_node service_list;
 	struct list_head subscriptions;
-	spinlock_t lock;
+	spinlock_t lock; /* Covers service range list */
 	struct rcu_head rcu;
 };
 
@@ -99,17 +87,16 @@ static int hash(int x)
 }
 
 /**
- * publ_create - create a publication structure
+ * tipc_publ_create - create a publication structure
  */
-static struct publication *publ_create(u32 type, u32 lower, u32 upper,
-				       u32 scope, u32 node, u32 port,
-				       u32 key)
+static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
+					    u32 scope, u32 node, u32 port,
+					    u32 key)
 {
 	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
-	if (publ == NULL) {
-		pr_warn("Publication creation failure, no memory\n");
+
+	if (!publ)
 		return NULL;
-	}
 
 	publ->type = type;
 	publ->lower = lower;
@@ -119,372 +106,298 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
 	publ->port = port;
 	publ->key = key;
 	INIT_LIST_HEAD(&publ->binding_sock);
+	INIT_LIST_HEAD(&publ->binding_node);
+	INIT_LIST_HEAD(&publ->local_publ);
+	INIT_LIST_HEAD(&publ->all_publ);
 	return publ;
 }
 
 /**
- * tipc_subseq_alloc - allocate a specified number of sub-sequence structures
- */
-static struct sub_seq *tipc_subseq_alloc(u32 cnt)
-{
-	return kcalloc(cnt, sizeof(struct sub_seq), GFP_ATOMIC);
-}
-
-/**
- * tipc_nameseq_create - create a name sequence structure for the specified 'type'
+ * tipc_service_create - create a service structure for the specified 'type'
  *
- * Allocates a single sub-sequence structure and sets it to all 0's.
+ * Allocates a single range structure and sets it to all 0's.
  */
-static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_head)
+static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
 {
-	struct name_seq *nseq = kzalloc(sizeof(*nseq), GFP_ATOMIC);
-	struct sub_seq *sseq = tipc_subseq_alloc(1);
+	struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC);
 
-	if (!nseq || !sseq) {
-		pr_warn("Name sequence creation failed, no memory\n");
-		kfree(nseq);
-		kfree(sseq);
+	if (!service) {
+		pr_warn("Service creation failed, no memory\n");
 		return NULL;
 	}
 
-	spin_lock_init(&nseq->lock);
-	nseq->type = type;
-	nseq->sseqs = sseq;
-	nseq->alloc = 1;
-	INIT_HLIST_NODE(&nseq->ns_list);
-	INIT_LIST_HEAD(&nseq->subscriptions);
-	hlist_add_head_rcu(&nseq->ns_list, seq_head);
-	return nseq;
+	spin_lock_init(&service->lock);
+	service->type = type;
+	service->ranges = RB_ROOT;
+	INIT_HLIST_NODE(&service->service_list);
+	INIT_LIST_HEAD(&service->subscriptions);
+	hlist_add_head_rcu(&service->service_list, hd);
+	return service;
 }
 
 /**
- * nameseq_find_subseq - find sub-sequence (if any) matching a name instance
+ * tipc_service_find_range - find service range matching a service instance
  *
- * Very time-critical, so binary searches through sub-sequence array.
+ * Very time-critical, so binary search through range rb tree
  */
-static struct sub_seq *nameseq_find_subseq(struct name_seq *nseq,
-					   u32 instance)
+static struct service_range *tipc_service_find_range(struct tipc_service *sc,
+						     u32 instance)
 {
-	struct sub_seq *sseqs = nseq->sseqs;
-	int low = 0;
-	int high = nseq->first_free - 1;
-	int mid;
+	struct rb_node *n = sc->ranges.rb_node;
+	struct service_range *sr;
 
-	while (low <= high) {
-		mid = (low + high) / 2;
-		if (instance < sseqs[mid].lower)
-			high = mid - 1;
-		else if (instance > sseqs[mid].upper)
-			low = mid + 1;
+	while (n) {
+		sr = container_of(n, struct service_range, tree_node);
+		if (sr->lower > instance)
+			n = n->rb_left;
+		else if (sr->upper < instance)
+			n = n->rb_right;
 		else
-			return &sseqs[mid];
+			return sr;
 	}
 	return NULL;
 }
 
-/**
- * nameseq_locate_subseq - determine position of name instance in sub-sequence
- *
- * Returns index in sub-sequence array of the entry that contains the specified
- * instance value; if no entry contains that value, returns the position
- * where a new entry for it would be inserted in the array.
- *
- * Note: Similar to binary search code for locating a sub-sequence.
- */
-static u32 nameseq_locate_subseq(struct name_seq *nseq, u32 instance)
+static struct service_range *tipc_service_create_range(struct tipc_service *sc,
+						       u32 lower, u32 upper)
 {
-	struct sub_seq *sseqs = nseq->sseqs;
-	int low = 0;
-	int high = nseq->first_free - 1;
-	int mid;
+	struct rb_node **n, *parent = NULL;
+	struct service_range *sr, *tmp;
 
-	while (low <= high) {
-		mid = (low + high) / 2;
-		if (instance < sseqs[mid].lower)
-			high = mid - 1;
-		else if (instance > sseqs[mid].upper)
-			low = mid + 1;
+	n = &sc->ranges.rb_node;
+	while (*n) {
+		tmp = container_of(*n, struct service_range, tree_node);
+		parent = *n;
+		tmp = container_of(parent, struct service_range, tree_node);
+		if (lower < tmp->lower)
+			n = &(*n)->rb_left;
+		else if (upper > tmp->upper)
+			n = &(*n)->rb_right;
 		else
-			return mid;
+			return NULL;
 	}
-	return low;
+	sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
+	if (!sr)
+		return NULL;
+	sr->lower = lower;
+	sr->upper = upper;
+	INIT_LIST_HEAD(&sr->local_publ);
+	INIT_LIST_HEAD(&sr->all_publ);
+	rb_link_node(&sr->tree_node, parent, n);
+	rb_insert_color(&sr->tree_node, &sc->ranges);
+	return sr;
 }
 
-/**
- * tipc_nameseq_insert_publ
- */
-static struct publication *tipc_nameseq_insert_publ(struct net *net,
-						    struct name_seq *nseq,
+static struct publication *tipc_service_insert_publ(struct net *net,
+						    struct tipc_service *sc,
 						    u32 type, u32 lower,
 						    u32 upper, u32 scope,
-						    u32 node, u32 port, u32 key)
+						    u32 node, u32 port,
+						    u32 key)
 {
-	struct tipc_subscription *s;
-	struct tipc_subscription *st;
-	struct publication *publ;
-	struct sub_seq *sseq;
-	struct name_info *info;
-	int created_subseq = 0;
+	struct tipc_subscription *sub, *tmp;
+	struct service_range *sr;
+	struct publication *p;
+	bool first = false;
 
-	sseq = nameseq_find_subseq(nseq, lower);
-	if (sseq) {
-
-		/* Lower end overlaps existing entry => need an exact match */
-		if ((sseq->lower != lower) || (sseq->upper != upper)) {
-			return NULL;
-		}
-
-		info = sseq->info;
-
-		/* Check if an identical publication already exists */
-		list_for_each_entry(publ, &info->all_publ, all_publ) {
-			if (publ->port == port && publ->key == key &&
-			    (!publ->node || publ->node == node))
-				return NULL;
-		}
-	} else {
-		u32 inspos;
-		struct sub_seq *freesseq;
-
-		/* Find where lower end should be inserted */
-		inspos = nameseq_locate_subseq(nseq, lower);
-
-		/* Fail if upper end overlaps into an existing entry */
-		if ((inspos < nseq->first_free) &&
-		    (upper >= nseq->sseqs[inspos].lower)) {
-			return NULL;
-		}
-
-		/* Ensure there is space for new sub-sequence */
-		if (nseq->first_free == nseq->alloc) {
-			struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2);
-
-			if (!sseqs) {
-				pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
-					type, lower, upper);
-				return NULL;
-			}
-			memcpy(sseqs, nseq->sseqs,
-			       nseq->alloc * sizeof(struct sub_seq));
-			kfree(nseq->sseqs);
-			nseq->sseqs = sseqs;
-			nseq->alloc *= 2;
-		}
-
-		info = kzalloc(sizeof(*info), GFP_ATOMIC);
-		if (!info) {
-			pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
-				type, lower, upper);
-			return NULL;
-		}
-
-		INIT_LIST_HEAD(&info->local_publ);
-		INIT_LIST_HEAD(&info->all_publ);
-
-		/* Insert new sub-sequence */
-		sseq = &nseq->sseqs[inspos];
-		freesseq = &nseq->sseqs[nseq->first_free];
-		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq));
-		memset(sseq, 0, sizeof(*sseq));
-		nseq->first_free++;
-		sseq->lower = lower;
-		sseq->upper = upper;
-		sseq->info = info;
-		created_subseq = 1;
+	sr = tipc_service_find_range(sc, lower);
+	if (!sr) {
+		sr = tipc_service_create_range(sc, lower, upper);
+		if (!sr)
+			goto  err;
+		first = true;
 	}
 
-	/* Insert a publication */
-	publ = publ_create(type, lower, upper, scope, node, port, key);
-	if (!publ)
+	/* Lower end overlaps existing entry, but we need an exact match */
+	if (sr->lower != lower || sr->upper != upper)
 		return NULL;
 
-	list_add(&publ->all_publ, &info->all_publ);
+	/* Return if the publication already exists */
+	list_for_each_entry(p, &sr->all_publ, all_publ) {
+		if (p->key == key && (!p->node || p->node == node))
+			return NULL;
+	}
 
+	/* Create and insert publication */
+	p = tipc_publ_create(type, lower, upper, scope, node, port, key);
+	if (!p)
+		goto err;
 	if (in_own_node(net, node))
-		list_add(&publ->local_publ, &info->local_publ);
+		list_add(&p->local_publ, &sr->local_publ);
+	list_add(&p->all_publ, &sr->all_publ);
 
 	/* Any subscriptions waiting for notification?  */
-	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_sub_report_overlap(s, publ->lower, publ->upper,
-					TIPC_PUBLISHED, publ->port,
-					publ->node, publ->scope,
-					created_subseq);
+	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+		tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED,
+					p->port, p->node, p->scope, first);
 	}
-	return publ;
+	return p;
+err:
+	pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper);
+	return NULL;
 }
 
 /**
- * tipc_nameseq_remove_publ
+ * tipc_service_remove_publ - remove a publication from a service
  *
  * NOTE: There may be cases where TIPC is asked to remove a publication
  * that is not in the name table.  For example, if another node issues a
- * publication for a name sequence that overlaps an existing name sequence
+ * publication for a name range that overlaps an existing name range
  * the publication will not be recorded, which means the publication won't
- * be found when the name sequence is later withdrawn by that node.
+ * be found when the name range is later withdrawn by that node.
  * A failed withdraw request simply returns a failure indication and lets the
  * caller issue any error or warning messages associated with such a problem.
  */
-static struct publication *tipc_nameseq_remove_publ(struct net *net,
-						    struct name_seq *nseq,
+static struct publication *tipc_service_remove_publ(struct net *net,
+						    struct tipc_service *sc,
 						    u32 inst, u32 node,
 						    u32 port, u32 key)
 {
-	struct publication *publ;
-	struct sub_seq *sseq = nameseq_find_subseq(nseq, inst);
-	struct name_info *info;
-	struct sub_seq *free;
-	struct tipc_subscription *s, *st;
-	int removed_subseq = 0;
+	struct tipc_subscription *sub, *tmp;
+	struct service_range *sr;
+	struct publication *p;
+	bool found = false;
+	bool last = false;
 
-	if (!sseq)
+	sr = tipc_service_find_range(sc, inst);
+	if (!sr)
 		return NULL;
 
-	info = sseq->info;
-
-	/* Locate publication, if it exists */
-	list_for_each_entry(publ, &info->all_publ, all_publ) {
-		if (publ->key == key && publ->port == port &&
-		    (!publ->node || publ->node == node))
-			goto found;
+	/* Find publication, if it exists */
+	list_for_each_entry(p, &sr->all_publ, all_publ) {
+		if (p->key != key || (node && node != p->node))
+			continue;
+		found = true;
+		break;
 	}
-	return NULL;
+	if (!found)
+		return NULL;
 
-found:
-	list_del(&publ->all_publ);
-	if (in_own_node(net, node))
-		list_del(&publ->local_publ);
+	list_del(&p->all_publ);
+	list_del(&p->local_publ);
 
-	/* Contract subseq list if no more publications for that subseq */
-	if (list_empty(&info->all_publ)) {
-		kfree(info);
-		free = &nseq->sseqs[nseq->first_free--];
-		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
-		removed_subseq = 1;
+	/* Remove service range item if this was its last publication */
+	if (list_empty(&sr->all_publ)) {
+		last = true;
+		rb_erase(&sr->tree_node, &sc->ranges);
+		kfree(sr);
 	}
 
 	/* Notify any waiting subscriptions */
-	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		tipc_sub_report_overlap(s, publ->lower, publ->upper,
-					TIPC_WITHDRAWN, publ->port,
-					publ->node, publ->scope,
-					removed_subseq);
+	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
+		tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_WITHDRAWN,
+					p->port, p->node, p->scope, last);
 	}
-
-	return publ;
+	return p;
 }
 
 /**
- * tipc_nameseq_subscribe - attach a subscription, and optionally
- * issue the prescribed number of events if there is any sub-
- * sequence overlapping with the requested sequence
+ * tipc_service_subscribe - attach a subscription, and optionally
+ * issue the prescribed number of events if there is any service
+ * range overlapping with the requested range
  */
-static void tipc_nameseq_subscribe(struct name_seq *nseq,
+static void tipc_service_subscribe(struct tipc_service *service,
 				   struct tipc_subscription *sub)
 {
-	struct sub_seq *sseq = nseq->sseqs;
+	struct tipc_subscr *sb = &sub->evt.s;
+	struct service_range *sr;
 	struct tipc_name_seq ns;
-	struct tipc_subscr *s = &sub->evt.s;
-	bool no_status;
+	struct publication *p;
+	struct rb_node *n;
+	bool first;
 
-	ns.type = tipc_sub_read(s, seq.type);
-	ns.lower = tipc_sub_read(s, seq.lower);
-	ns.upper = tipc_sub_read(s, seq.upper);
-	no_status = tipc_sub_read(s, filter) & TIPC_SUB_NO_STATUS;
+	ns.type = tipc_sub_read(sb, seq.type);
+	ns.lower = tipc_sub_read(sb, seq.lower);
+	ns.upper = tipc_sub_read(sb, seq.upper);
 
 	tipc_sub_get(sub);
-	list_add(&sub->nameseq_list, &nseq->subscriptions);
+	list_add(&sub->service_list, &service->subscriptions);
 
-	if (no_status || !sseq)
+	if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS)
 		return;
 
-	while (sseq != &nseq->sseqs[nseq->first_free]) {
-		if (tipc_sub_check_overlap(&ns, sseq->lower, sseq->upper)) {
-			struct publication *crs;
-			struct name_info *info = sseq->info;
-			int must_report = 1;
+	for (n = rb_first(&service->ranges); n; n = rb_next(n)) {
+		sr = container_of(n, struct service_range, tree_node);
+		if (sr->lower > ns.upper)
+			break;
+		if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper))
+			continue;
+		first = true;
 
-			list_for_each_entry(crs, &info->all_publ, all_publ) {
-				tipc_sub_report_overlap(sub, sseq->lower,
-							sseq->upper,
-							TIPC_PUBLISHED,
-							crs->port,
-							crs->node,
-							crs->scope,
-							must_report);
-				must_report = 0;
-			}
+		list_for_each_entry(p, &sr->all_publ, all_publ) {
+			tipc_sub_report_overlap(sub, sr->lower, sr->upper,
+						TIPC_PUBLISHED,	p->port,
+						p->node, p->scope, first);
+			first = false;
 		}
-		sseq++;
 	}
 }
 
-static struct name_seq *nametbl_find_seq(struct net *net, u32 type)
+static struct tipc_service *tipc_service_find(struct net *net, u32 type)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct hlist_head *seq_head;
-	struct name_seq *ns;
+	struct name_table *nt = tipc_name_table(net);
+	struct hlist_head *service_head;
+	struct tipc_service *service;
 
-	seq_head = &tn->nametbl->seq_hlist[hash(type)];
-	hlist_for_each_entry_rcu(ns, seq_head, ns_list) {
-		if (ns->type == type)
-			return ns;
+	service_head = &nt->services[hash(type)];
+	hlist_for_each_entry_rcu(service, service_head, service_list) {
+		if (service->type == type)
+			return service;
 	}
-
 	return NULL;
 };
 
 struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper, u32 scope,
-					     u32 node, u32 port, u32 key)
+					     u32 lower, u32 upper,
+					     u32 scope, u32 node,
+					     u32 port, u32 key)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct publication *publ;
-	struct name_seq *seq = nametbl_find_seq(net, type);
-	int index = hash(type);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_service *sc;
+	struct publication *p;
 
 	if (scope > TIPC_NODE_SCOPE || lower > upper) {
-		pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
+		pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
 			 type, lower, upper, scope);
 		return NULL;
 	}
-
-	if (!seq)
-		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
-	if (!seq)
+	sc = tipc_service_find(net, type);
+	if (!sc)
+		sc = tipc_service_create(type, &nt->services[hash(type)]);
+	if (!sc)
 		return NULL;
 
-	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_insert_publ(net, seq, type, lower, upper,
-					scope, node, port, key);
-	spin_unlock_bh(&seq->lock);
-	return publ;
+	spin_lock_bh(&sc->lock);
+	p = tipc_service_insert_publ(net, sc, type, lower, upper,
+				     scope, node, port, key);
+	spin_unlock_bh(&sc->lock);
+	return p;
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 node, u32 port,
-					     u32 key)
+						 u32 lower, u32 node, u32 port,
+						 u32 key)
 {
-	struct publication *publ;
-	struct name_seq *seq = nametbl_find_seq(net, type);
+	struct tipc_service *sc = tipc_service_find(net, type);
+	struct publication *p = NULL;
 
-	if (!seq)
+	if (!sc)
 		return NULL;
 
-	spin_lock_bh(&seq->lock);
-	publ = tipc_nameseq_remove_publ(net, seq, lower, node, port, key);
-	if (!seq->first_free && list_empty(&seq->subscriptions)) {
-		hlist_del_init_rcu(&seq->ns_list);
-		kfree(seq->sseqs);
-		spin_unlock_bh(&seq->lock);
-		kfree_rcu(seq, rcu);
-		return publ;
+	spin_lock_bh(&sc->lock);
+	p = tipc_service_remove_publ(net, sc, lower, node, port, key);
+
+	/* Delete service item if this no more publications and subscriptions */
+	if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
+		hlist_del_init_rcu(&sc->service_list);
+		kfree_rcu(sc, rcu);
 	}
-	spin_unlock_bh(&seq->lock);
-	return publ;
+	spin_unlock_bh(&sc->lock);
+	return p;
 }
 
 /**
- * tipc_nametbl_translate - perform name translation
+ * tipc_nametbl_translate - perform service instance to socket translation
  *
  * On entry, 'destnode' is the search domain used during translation.
  *
@@ -492,7 +405,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
  * - if name translation is deferred to another node/cluster/zone,
  *   leaves 'destnode' unchanged (will be non-zero) and returns 0
  * - if name translation is attempted and succeeds, sets 'destnode'
- *   to publishing node and returns port reference (will be non-zero)
+ *   to publication node and returns port reference (will be non-zero)
  * - if name translation is attempted and fails, sets 'destnode' to 0
  *   and returns 0
  */
@@ -502,10 +415,9 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	struct tipc_net *tn = tipc_net(net);
 	bool legacy = tn->legacy_addr_format;
 	u32 self = tipc_own_addr(net);
-	struct sub_seq *sseq;
-	struct name_info *info;
-	struct publication *publ;
-	struct name_seq *seq;
+	struct service_range *sr;
+	struct tipc_service *sc;
+	struct publication *p;
 	u32 port = 0;
 	u32 node = 0;
 
@@ -513,49 +425,49 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 		return 0;
 
 	rcu_read_lock();
-	seq = nametbl_find_seq(net, type);
-	if (unlikely(!seq))
+	sc = tipc_service_find(net, type);
+	if (unlikely(!sc))
 		goto not_found;
-	spin_lock_bh(&seq->lock);
-	sseq = nameseq_find_subseq(seq, instance);
-	if (unlikely(!sseq))
+
+	spin_lock_bh(&sc->lock);
+	sr = tipc_service_find_range(sc, instance);
+	if (unlikely(!sr))
 		goto no_match;
-	info = sseq->info;
 
 	/* Closest-First Algorithm */
 	if (legacy && !*destnode) {
-		if (!list_empty(&info->local_publ)) {
-			publ = list_first_entry(&info->local_publ,
-						struct publication,
-						local_publ);
-			list_move_tail(&publ->local_publ,
-				       &info->local_publ);
+		if (!list_empty(&sr->local_publ)) {
+			p = list_first_entry(&sr->local_publ,
+					     struct publication,
+					     local_publ);
+			list_move_tail(&p->local_publ,
+				       &sr->local_publ);
 		} else {
-			publ = list_first_entry(&info->all_publ,
-						struct publication,
-						all_publ);
-			list_move_tail(&publ->all_publ,
-				       &info->all_publ);
+			p = list_first_entry(&sr->all_publ,
+					     struct publication,
+					     all_publ);
+			list_move_tail(&p->all_publ,
+				       &sr->all_publ);
 		}
 	}
 
 	/* Round-Robin Algorithm */
-	else if (*destnode == tipc_own_addr(net)) {
-		if (list_empty(&info->local_publ))
+	else if (*destnode == self) {
+		if (list_empty(&sr->local_publ))
 			goto no_match;
-		publ = list_first_entry(&info->local_publ, struct publication,
-					local_publ);
-		list_move_tail(&publ->local_publ, &info->local_publ);
+		p = list_first_entry(&sr->local_publ, struct publication,
+				     local_publ);
+		list_move_tail(&p->local_publ, &sr->local_publ);
 	} else {
-		publ = list_first_entry(&info->all_publ, struct publication,
-					all_publ);
-		list_move_tail(&publ->all_publ, &info->all_publ);
+		p = list_first_entry(&sr->all_publ, struct publication,
+				     all_publ);
+		list_move_tail(&p->all_publ, &sr->all_publ);
 	}
 
-	port = publ->port;
-	node = publ->node;
+	port = p->port;
+	node = p->node;
 no_match:
-	spin_unlock_bh(&seq->lock);
+	spin_unlock_bh(&sc->lock);
 not_found:
 	rcu_read_unlock();
 	*destnode = node;
@@ -567,34 +479,36 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 			 bool all)
 {
 	u32 self = tipc_own_addr(net);
-	struct publication *publ;
-	struct name_info *info;
-	struct name_seq *seq;
-	struct sub_seq *sseq;
+	struct service_range *sr;
+	struct tipc_service *sc;
+	struct publication *p;
 
 	*dstcnt = 0;
 	rcu_read_lock();
-	seq = nametbl_find_seq(net, type);
-	if (unlikely(!seq))
+	sc = tipc_service_find(net, type);
+	if (unlikely(!sc))
 		goto exit;
-	spin_lock_bh(&seq->lock);
-	sseq = nameseq_find_subseq(seq, instance);
-	if (likely(sseq)) {
-		info = sseq->info;
-		list_for_each_entry(publ, &info->all_publ, all_publ) {
-			if (publ->scope != scope)
-				continue;
-			if (publ->port == exclude && publ->node == self)
-				continue;
-			tipc_dest_push(dsts, publ->node, publ->port);
-			(*dstcnt)++;
-			if (all)
-				continue;
-			list_move_tail(&publ->all_publ, &info->all_publ);
-			break;
-		}
+
+	spin_lock_bh(&sc->lock);
+
+	sr = tipc_service_find_range(sc, instance);
+	if (!sr)
+		goto no_match;
+
+	list_for_each_entry(p, &sr->all_publ, all_publ) {
+		if (p->scope != scope)
+			continue;
+		if (p->port == exclude && p->node == self)
+			continue;
+		tipc_dest_push(dsts, p->node, p->port);
+		(*dstcnt)++;
+		if (all)
+			continue;
+		list_move_tail(&p->all_publ, &sr->all_publ);
+		break;
 	}
-	spin_unlock_bh(&seq->lock);
+no_match:
+	spin_unlock_bh(&sc->lock);
 exit:
 	rcu_read_unlock();
 	return !list_empty(dsts);
@@ -603,61 +517,64 @@ exit:
 void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
 			    u32 scope, bool exact, struct list_head *dports)
 {
-	struct sub_seq *sseq_stop;
-	struct name_info *info;
+	struct service_range *sr;
+	struct tipc_service *sc;
 	struct publication *p;
-	struct name_seq *seq;
-	struct sub_seq *sseq;
+	struct rb_node *n;
 
 	rcu_read_lock();
-	seq = nametbl_find_seq(net, type);
-	if (!seq)
+	sc = tipc_service_find(net, type);
+	if (!sc)
 		goto exit;
 
-	spin_lock_bh(&seq->lock);
-	sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
-	sseq_stop = seq->sseqs + seq->first_free;
-	for (; sseq != sseq_stop; sseq++) {
-		if (sseq->lower > upper)
+	spin_lock_bh(&sc->lock);
+
+	for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+		sr = container_of(n, struct service_range, tree_node);
+		if (sr->upper < lower)
+			continue;
+		if (sr->lower > upper)
 			break;
-		info = sseq->info;
-		list_for_each_entry(p, &info->local_publ, local_publ) {
+		list_for_each_entry(p, &sr->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
 				tipc_dest_push(dports, 0, p->port);
 		}
 	}
-	spin_unlock_bh(&seq->lock);
+	spin_unlock_bh(&sc->lock);
 exit:
 	rcu_read_unlock();
 }
 
 /* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
  * - Creates list of nodes that overlap the given multicast address
- * - Determines if any node local ports overlap
+ * - Determines if any node local destinations overlap
  */
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 				   u32 upper, struct tipc_nlist *nodes)
 {
-	struct sub_seq *sseq, *stop;
-	struct publication *publ;
-	struct name_info *info;
-	struct name_seq *seq;
+	struct service_range *sr;
+	struct tipc_service *sc;
+	struct publication *p;
+	struct rb_node *n;
 
 	rcu_read_lock();
-	seq = nametbl_find_seq(net, type);
-	if (!seq)
+	sc = tipc_service_find(net, type);
+	if (!sc)
 		goto exit;
 
-	spin_lock_bh(&seq->lock);
-	sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
-	stop = seq->sseqs + seq->first_free;
-	for (; sseq != stop && sseq->lower <= upper; sseq++) {
-		info = sseq->info;
-		list_for_each_entry(publ, &info->all_publ, all_publ) {
-			tipc_nlist_add(nodes, publ->node);
+	spin_lock_bh(&sc->lock);
+
+	for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+		sr = container_of(n, struct service_range, tree_node);
+		if (sr->upper < lower)
+			continue;
+		if (sr->lower > upper)
+			break;
+		list_for_each_entry(p, &sr->all_publ, all_publ) {
+			tipc_nlist_add(nodes, p->node);
 		}
 	}
-	spin_unlock_bh(&seq->lock);
+	spin_unlock_bh(&sc->lock);
 exit:
 	rcu_read_unlock();
 }
@@ -667,89 +584,88 @@ exit:
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 			      u32 type, u32 scope)
 {
-	struct sub_seq *sseq, *stop;
-	struct name_info *info;
+	struct service_range *sr;
+	struct tipc_service *sc;
 	struct publication *p;
-	struct name_seq *seq;
+	struct rb_node *n;
 
 	rcu_read_lock();
-	seq = nametbl_find_seq(net, type);
-	if (!seq)
+	sc = tipc_service_find(net, type);
+	if (!sc)
 		goto exit;
 
-	spin_lock_bh(&seq->lock);
-	sseq = seq->sseqs;
-	stop = seq->sseqs + seq->first_free;
-	for (; sseq != stop; sseq++) {
-		info = sseq->info;
-		list_for_each_entry(p, &info->all_publ, all_publ) {
+	spin_lock_bh(&sc->lock);
+	for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+		sr = container_of(n, struct service_range, tree_node);
+		list_for_each_entry(p, &sr->all_publ, all_publ) {
 			if (p->scope != scope)
 				continue;
 			tipc_group_add_member(grp, p->node, p->port, p->lower);
 		}
 	}
-	spin_unlock_bh(&seq->lock);
+	spin_unlock_bh(&sc->lock);
 exit:
 	rcu_read_unlock();
 }
 
-/*
- * tipc_nametbl_publish - add name publication to network name tables
+/* tipc_nametbl_publish - add service binding to name table
  */
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
-					 u32 upper, u32 scope, u32 port_ref,
+					 u32 upper, u32 scope, u32 port,
 					 u32 key)
 {
-	struct publication *publ;
-	struct sk_buff *buf = NULL;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_net *tn = tipc_net(net);
+	struct publication *p = NULL;
+	struct sk_buff *skb = NULL;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	if (tn->nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
-		pr_warn("Publication failed, local publication limit reached (%u)\n",
-			TIPC_MAX_PUBLICATIONS);
-		spin_unlock_bh(&tn->nametbl_lock);
-		return NULL;
+
+	if (nt->local_publ_count >= TIPC_MAX_PUBL) {
+		pr_warn("Bind failed, max limit %u reached\n", TIPC_MAX_PUBL);
+		goto exit;
 	}
 
-	publ = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-					tipc_own_addr(net), port_ref, key);
-	if (likely(publ)) {
-		tn->nametbl->local_publ_count++;
-		buf = tipc_named_publish(net, publ);
+	p = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
+				     tipc_own_addr(net), port, key);
+	if (p) {
+		nt->local_publ_count++;
+		skb = tipc_named_publish(net, p);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
 	}
+exit:
 	spin_unlock_bh(&tn->nametbl_lock);
 
-	if (buf)
-		tipc_node_broadcast(net, buf);
-	return publ;
+	if (skb)
+		tipc_node_broadcast(net, skb);
+	return p;
 }
 
 /**
- * tipc_nametbl_withdraw - withdraw name publication from network name tables
+ * tipc_nametbl_withdraw - withdraw a service binding
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
-			  u32 key)
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
+			  u32 port, u32 key)
 {
-	struct publication *publ;
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_net *tn = tipc_net(net);
+	u32 self = tipc_own_addr(net);
 	struct sk_buff *skb = NULL;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct publication *p;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	publ = tipc_nametbl_remove_publ(net, type, lower, tipc_own_addr(net),
-					port, key);
-	if (likely(publ)) {
-		tn->nametbl->local_publ_count--;
-		skb = tipc_named_withdraw(net, publ);
+
+	p = tipc_nametbl_remove_publ(net, type, lower, self, port, key);
+	if (p) {
+		nt->local_publ_count--;
+		skb = tipc_named_withdraw(net, p);
 		/* Any pending external events? */
 		tipc_named_process_backlog(net);
-		list_del_init(&publ->binding_sock);
-		kfree_rcu(publ, rcu);
+		list_del_init(&p->binding_sock);
+		kfree_rcu(p, rcu);
 	} else {
-		pr_err("Unable to remove local publication\n"
-		       "(type=%u, lower=%u, port=%u, key=%u)\n",
+		pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
 		       type, lower, port, key);
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
@@ -766,27 +682,24 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 port,
  */
 void tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
+	struct name_table *nt = tipc_name_table(sub->net);
 	struct tipc_net *tn = tipc_net(sub->net);
 	struct tipc_subscr *s = &sub->evt.s;
 	u32 type = tipc_sub_read(s, seq.type);
-	int index = hash(type);
-	struct name_seq *seq;
-	struct tipc_name_seq ns;
+	struct tipc_service *sc;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(sub->net, type);
-	if (!seq)
-		seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]);
-	if (seq) {
-		spin_lock_bh(&seq->lock);
-		tipc_nameseq_subscribe(seq, sub);
-		spin_unlock_bh(&seq->lock);
+	sc = tipc_service_find(sub->net, type);
+	if (!sc)
+		sc = tipc_service_create(type, &nt->services[hash(type)]);
+	if (sc) {
+		spin_lock_bh(&sc->lock);
+		tipc_service_subscribe(sc, sub);
+		spin_unlock_bh(&sc->lock);
 	} else {
-		ns.type = tipc_sub_read(s, seq.type);
-		ns.lower = tipc_sub_read(s, seq.lower);
-		ns.upper = tipc_sub_read(s, seq.upper);
-		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
-			ns.type, ns.lower, ns.upper);
+		pr_warn("Failed to subscribe for {%u,%u,%u}\n", type,
+			tipc_sub_read(s, seq.lower),
+			tipc_sub_read(s, seq.upper));
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 }
@@ -796,124 +709,122 @@ void tipc_nametbl_subscribe(struct tipc_subscription *sub)
  */
 void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
-	struct tipc_subscr *s = &sub->evt.s;
 	struct tipc_net *tn = tipc_net(sub->net);
-	struct name_seq *seq;
+	struct tipc_subscr *s = &sub->evt.s;
 	u32 type = tipc_sub_read(s, seq.type);
+	struct tipc_service *sc;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	seq = nametbl_find_seq(sub->net, type);
-	if (seq != NULL) {
-		spin_lock_bh(&seq->lock);
-		list_del_init(&sub->nameseq_list);
-		tipc_sub_put(sub);
-		if (!seq->first_free && list_empty(&seq->subscriptions)) {
-			hlist_del_init_rcu(&seq->ns_list);
-			kfree(seq->sseqs);
-			spin_unlock_bh(&seq->lock);
-			kfree_rcu(seq, rcu);
-		} else {
-			spin_unlock_bh(&seq->lock);
-		}
+	sc = tipc_service_find(sub->net, type);
+	if (!sc)
+		goto exit;
+
+	spin_lock_bh(&sc->lock);
+	list_del_init(&sub->service_list);
+	tipc_sub_put(sub);
+
+	/* Delete service item if no more publications and subscriptions */
+	if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
+		hlist_del_init_rcu(&sc->service_list);
+		kfree_rcu(sc, rcu);
 	}
+	spin_unlock_bh(&sc->lock);
+exit:
 	spin_unlock_bh(&tn->nametbl_lock);
 }
 
 int tipc_nametbl_init(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct name_table *tipc_nametbl;
+	struct tipc_net *tn = tipc_net(net);
+	struct name_table *nt;
 	int i;
 
-	tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC);
-	if (!tipc_nametbl)
+	nt = kzalloc(sizeof(*nt), GFP_ATOMIC);
+	if (!nt)
 		return -ENOMEM;
 
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++)
-		INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]);
+		INIT_HLIST_HEAD(&nt->services[i]);
 
-	INIT_LIST_HEAD(&tipc_nametbl->node_scope);
-	INIT_LIST_HEAD(&tipc_nametbl->cluster_scope);
-	tn->nametbl = tipc_nametbl;
+	INIT_LIST_HEAD(&nt->node_scope);
+	INIT_LIST_HEAD(&nt->cluster_scope);
+	tn->nametbl = nt;
 	spin_lock_init(&tn->nametbl_lock);
 	return 0;
 }
 
 /**
- * tipc_purge_publications - remove all publications for a given type
- *
- * tipc_nametbl_lock must be held when calling this function
+ *  tipc_service_delete - purge all publications for a service and delete it
  */
-static void tipc_purge_publications(struct net *net, struct name_seq *seq)
+static void tipc_service_delete(struct net *net, struct tipc_service *sc)
 {
-	struct publication *publ, *safe;
-	struct sub_seq *sseq;
-	struct name_info *info;
+	struct service_range *sr, *tmpr;
+	struct publication *p, *tmpb;
 
-	spin_lock_bh(&seq->lock);
-	sseq = seq->sseqs;
-	info = sseq->info;
-	list_for_each_entry_safe(publ, safe, &info->all_publ, all_publ) {
-		tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node,
-					 publ->port, publ->key);
-		kfree_rcu(publ, rcu);
+	spin_lock_bh(&sc->lock);
+	rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
+		list_for_each_entry_safe(p, tmpb,
+					 &sr->all_publ, all_publ) {
+			tipc_service_remove_publ(net, sc, p->lower, p->node,
+						 p->port, p->key);
+			kfree_rcu(p, rcu);
+		}
 	}
-	hlist_del_init_rcu(&seq->ns_list);
-	kfree(seq->sseqs);
-	spin_unlock_bh(&seq->lock);
-
-	kfree_rcu(seq, rcu);
+	hlist_del_init_rcu(&sc->service_list);
+	spin_unlock_bh(&sc->lock);
+	kfree_rcu(sc, rcu);
 }
 
 void tipc_nametbl_stop(struct net *net)
 {
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_net *tn = tipc_net(net);
+	struct hlist_head *service_head;
+	struct tipc_service *service;
 	u32 i;
-	struct name_seq *seq;
-	struct hlist_head *seq_head;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct name_table *tipc_nametbl = tn->nametbl;
 
 	/* Verify name table is empty and purge any lingering
 	 * publications, then release the name table
 	 */
 	spin_lock_bh(&tn->nametbl_lock);
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
-		if (hlist_empty(&tipc_nametbl->seq_hlist[i]))
+		if (hlist_empty(&nt->services[i]))
 			continue;
-		seq_head = &tipc_nametbl->seq_hlist[i];
-		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
-			tipc_purge_publications(net, seq);
+		service_head = &nt->services[i];
+		hlist_for_each_entry_rcu(service, service_head, service_list) {
+			tipc_service_delete(net, service);
 		}
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 
 	synchronize_net();
-	kfree(tipc_nametbl);
-
+	kfree(nt);
 }
 
 static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
-					struct name_seq *seq,
-					struct sub_seq *sseq, u32 *last_publ)
+					struct tipc_service *service,
+					struct service_range *sr,
+					u32 *last_key)
 {
-	void *hdr;
-	struct nlattr *attrs;
-	struct nlattr *publ;
 	struct publication *p;
+	struct nlattr *attrs;
+	struct nlattr *b;
+	void *hdr;
 
-	if (*last_publ) {
-		list_for_each_entry(p, &sseq->info->all_publ, all_publ)
-			if (p->key == *last_publ)
+	if (*last_key) {
+		list_for_each_entry(p, &sr->all_publ, all_publ)
+			if (p->key == *last_key)
 				break;
-		if (p->key != *last_publ)
+		if (p->key != *last_key)
 			return -EPIPE;
 	} else {
-		p = list_first_entry(&sseq->info->all_publ, struct publication,
+		p = list_first_entry(&sr->all_publ,
+				     struct publication,
 				     all_publ);
 	}
 
-	list_for_each_entry_from(p, &sseq->info->all_publ, all_publ) {
-		*last_publ = p->key;
+	list_for_each_entry_from(p, &sr->all_publ, all_publ) {
+		*last_key = p->key;
 
 		hdr = genlmsg_put(msg->skb, msg->portid, msg->seq,
 				  &tipc_genl_family, NLM_F_MULTI,
@@ -925,15 +836,15 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 		if (!attrs)
 			goto msg_full;
 
-		publ = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
-		if (!publ)
+		b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL);
+		if (!b)
 			goto attr_msg_full;
 
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, seq->type))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, service->type))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sseq->lower))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sr->lower))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sseq->upper))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sr->upper))
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope))
 			goto publ_msg_full;
@@ -944,16 +855,16 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
 			goto publ_msg_full;
 
-		nla_nest_end(msg->skb, publ);
+		nla_nest_end(msg->skb, b);
 		nla_nest_end(msg->skb, attrs);
 		genlmsg_end(msg->skb, hdr);
 	}
-	*last_publ = 0;
+	*last_key = 0;
 
 	return 0;
 
 publ_msg_full:
-	nla_nest_cancel(msg->skb, publ);
+	nla_nest_cancel(msg->skb, b);
 attr_msg_full:
 	nla_nest_cancel(msg->skb, attrs);
 msg_full:
@@ -962,39 +873,34 @@ msg_full:
 	return -EMSGSIZE;
 }
 
-static int __tipc_nl_subseq_list(struct tipc_nl_msg *msg, struct name_seq *seq,
-				 u32 *last_lower, u32 *last_publ)
+static int __tipc_nl_service_range_list(struct tipc_nl_msg *msg,
+					struct tipc_service *sc,
+					u32 *last_lower, u32 *last_key)
 {
-	struct sub_seq *sseq;
-	struct sub_seq *sseq_start;
+	struct service_range *sr;
+	struct rb_node *n;
 	int err;
 
-	if (*last_lower) {
-		sseq_start = nameseq_find_subseq(seq, *last_lower);
-		if (!sseq_start)
-			return -EPIPE;
-	} else {
-		sseq_start = seq->sseqs;
-	}
-
-	for (sseq = sseq_start; sseq != &seq->sseqs[seq->first_free]; sseq++) {
-		err = __tipc_nl_add_nametable_publ(msg, seq, sseq, last_publ);
+	for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
+		sr = container_of(n, struct service_range, tree_node);
+		if (sr->lower < *last_lower)
+			continue;
+		err = __tipc_nl_add_nametable_publ(msg, sc, sr, last_key);
 		if (err) {
-			*last_lower = sseq->lower;
+			*last_lower = sr->lower;
 			return err;
 		}
 	}
 	*last_lower = 0;
-
 	return 0;
 }
 
-static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
-			    u32 *last_type, u32 *last_lower, u32 *last_publ)
+static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
+				u32 *last_type, u32 *last_lower, u32 *last_key)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct hlist_head *seq_head;
-	struct name_seq *seq = NULL;
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_service *service = NULL;
+	struct hlist_head *head;
 	int err;
 	int i;
 
@@ -1004,30 +910,31 @@ static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
 		i = 0;
 
 	for (; i < TIPC_NAMETBL_SIZE; i++) {
-		seq_head = &tn->nametbl->seq_hlist[i];
+		head = &tn->nametbl->services[i];
 
 		if (*last_type) {
-			seq = nametbl_find_seq(net, *last_type);
-			if (!seq)
+			service = tipc_service_find(net, *last_type);
+			if (!service)
 				return -EPIPE;
 		} else {
-			hlist_for_each_entry_rcu(seq, seq_head, ns_list)
+			hlist_for_each_entry_rcu(service, head, service_list)
 				break;
-			if (!seq)
+			if (!service)
 				continue;
 		}
 
-		hlist_for_each_entry_from_rcu(seq, ns_list) {
-			spin_lock_bh(&seq->lock);
-			err = __tipc_nl_subseq_list(msg, seq, last_lower,
-						    last_publ);
+		hlist_for_each_entry_from_rcu(service, service_list) {
+			spin_lock_bh(&service->lock);
+			err = __tipc_nl_service_range_list(msg, service,
+							   last_lower,
+							   last_key);
 
 			if (err) {
-				*last_type = seq->type;
-				spin_unlock_bh(&seq->lock);
+				*last_type = service->type;
+				spin_unlock_bh(&service->lock);
 				return err;
 			}
-			spin_unlock_bh(&seq->lock);
+			spin_unlock_bh(&service->lock);
 		}
 		*last_type = 0;
 	}
@@ -1036,13 +943,13 @@ static int tipc_nl_seq_list(struct net *net, struct tipc_nl_msg *msg,
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	int err;
-	int done = cb->args[3];
+	struct net *net = sock_net(skb->sk);
 	u32 last_type = cb->args[0];
 	u32 last_lower = cb->args[1];
-	u32 last_publ = cb->args[2];
-	struct net *net = sock_net(skb->sk);
+	u32 last_key = cb->args[2];
+	int done = cb->args[3];
 	struct tipc_nl_msg msg;
+	int err;
 
 	if (done)
 		return 0;
@@ -1052,7 +959,8 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	msg.seq = cb->nlh->nlmsg_seq;
 
 	rcu_read_lock();
-	err = tipc_nl_seq_list(net, &msg, &last_type, &last_lower, &last_publ);
+	err = tipc_nl_service_list(net, &msg, &last_type,
+				   &last_lower, &last_key);
 	if (!err) {
 		done = 1;
 	} else if (err != -EMSGSIZE) {
@@ -1068,7 +976,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	cb->args[0] = last_type;
 	cb->args[1] = last_lower;
-	cb->args[2] = last_publ;
+	cb->args[2] = last_key;
 	cb->args[3] = done;
 
 	return skb->len;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 34a4ccb907aa..1b03b8751707 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -97,7 +97,7 @@ struct publication {
  * @local_publ_count: number of publications issued by this node
  */
 struct name_table {
-	struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE];
+	struct hlist_head services[TIPC_NAMETBL_SIZE];
 	struct list_head node_scope;
 	struct list_head cluster_scope;
 	u32 local_publ_count;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 4fb4327311bb..85e777e00ec9 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -324,12 +324,12 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	if (flags & TIPC_NOTIFY_LINK_UP) {
 		tipc_mon_peer_up(net, addr, bearer_id);
 		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
-				     TIPC_NODE_SCOPE, link_id, addr);
+				     TIPC_NODE_SCOPE, link_id, link_id);
 	}
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
 		tipc_mon_peer_down(net, addr, bearer_id);
 		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
-				      link_id, addr);
+				      link_id, link_id);
 	}
 }
 
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 8b2d22b18f22..d793b4343885 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -40,7 +40,7 @@
 #include "topsrv.h"
 
 #define TIPC_MAX_SUBSCR         65535
-#define TIPC_MAX_PUBLICATIONS   65535
+#define TIPC_MAX_PUBL           65535
 
 struct tipc_subscription;
 struct tipc_conn;
@@ -58,7 +58,7 @@ struct tipc_subscription {
 	struct kref kref;
 	struct net *net;
 	struct timer_list timer;
-	struct list_head nameseq_list;
+	struct list_head service_list;
 	struct list_head sub_list;
 	struct tipc_event evt;
 	int conid;

From f20889f72bd531cad88fbb571755a52cabf43424 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:42 +0200
Subject: [PATCH 1572/1678] tipc: refactor name table translate function

The function tipc_nametbl_translate() function is ugly and hard to
follow. This can be improved somewhat by introducing a stack variable
for holding the publication list to be used and re-ordering the if-
clauses for selection of algorithm.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 61 ++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 36 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index e06c7a8907aa..4bdc580c533b 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -399,29 +399,32 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 /**
  * tipc_nametbl_translate - perform service instance to socket translation
  *
- * On entry, 'destnode' is the search domain used during translation.
+ * On entry, 'dnode' is the search domain used during translation.
  *
  * On exit:
- * - if name translation is deferred to another node/cluster/zone,
- *   leaves 'destnode' unchanged (will be non-zero) and returns 0
- * - if name translation is attempted and succeeds, sets 'destnode'
- *   to publication node and returns port reference (will be non-zero)
- * - if name translation is attempted and fails, sets 'destnode' to 0
- *   and returns 0
+ * - if translation is deferred to another node, leave 'dnode' unchanged and
+ *   return 0
+ * - if translation is attempted and succeeds, set 'dnode' to the publishing
+ *   node and return the published (non-zero) port number
+ * - if translation is attempted and fails, set 'dnode' to 0 and return 0
+ *
+ * Note that for legacy users (node configured with Z.C.N address format) the
+ * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
+ * we must look in the local binding list first
  */
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
-			   u32 *destnode)
+u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
 {
 	struct tipc_net *tn = tipc_net(net);
 	bool legacy = tn->legacy_addr_format;
 	u32 self = tipc_own_addr(net);
 	struct service_range *sr;
 	struct tipc_service *sc;
+	struct list_head *list;
 	struct publication *p;
 	u32 port = 0;
 	u32 node = 0;
 
-	if (!tipc_in_scope(legacy, *destnode, self))
+	if (!tipc_in_scope(legacy, *dnode, self))
 		return 0;
 
 	rcu_read_lock();
@@ -434,43 +437,29 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	if (unlikely(!sr))
 		goto no_match;
 
-	/* Closest-First Algorithm */
-	if (legacy && !*destnode) {
-		if (!list_empty(&sr->local_publ)) {
-			p = list_first_entry(&sr->local_publ,
-					     struct publication,
-					     local_publ);
-			list_move_tail(&p->local_publ,
-				       &sr->local_publ);
-		} else {
-			p = list_first_entry(&sr->all_publ,
-					     struct publication,
-					     all_publ);
-			list_move_tail(&p->all_publ,
-				       &sr->all_publ);
-		}
-	}
-
-	/* Round-Robin Algorithm */
-	else if (*destnode == self) {
-		if (list_empty(&sr->local_publ))
+	/* Select lookup algorithm: local, closest-first or round-robin */
+	if (*dnode == self) {
+		list = &sr->local_publ;
+		if (list_empty(list))
 			goto no_match;
-		p = list_first_entry(&sr->local_publ, struct publication,
-				     local_publ);
+		p = list_first_entry(list, struct publication, local_publ);
+		list_move_tail(&p->local_publ, &sr->local_publ);
+	} else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
+		list = &sr->local_publ;
+		p = list_first_entry(list, struct publication, local_publ);
 		list_move_tail(&p->local_publ, &sr->local_publ);
 	} else {
-		p = list_first_entry(&sr->all_publ, struct publication,
-				     all_publ);
+		list = &sr->all_publ;
+		p = list_first_entry(list, struct publication, all_publ);
 		list_move_tail(&p->all_publ, &sr->all_publ);
 	}
-
 	port = p->port;
 	node = p->node;
 no_match:
 	spin_unlock_bh(&sc->lock);
 not_found:
 	rcu_read_unlock();
-	*destnode = node;
+	*dnode = node;
 	return port;
 }
 

From 37922ea4a3105176357c8d565a9d982c4a08714a Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:43 +0200
Subject: [PATCH 1573/1678] tipc: permit overlapping service ranges in name
 table

With the new RB tree structure for service ranges it becomes possible to
solve an old problem; - we can now allow overlapping service ranges in
the table.

When inserting a new service range to the tree, we use 'lower' as primary
key, and when necessary 'upper' as secondary key.

Since there may now be multiple service ranges matching an indicated
'lower' value, we must also add the 'upper' value to the functions
used for removing publications, so that the correct, corresponding
range item can be found.

These changes guarantee that a well-formed publication/withdrawal item
from a peer node never will be rejected, and make it possible to
eliminate the problematic backlog functionality we currently have for
handling such cases.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 90 +++++++++++--------------------------------
 net/tipc/name_distr.h |  1 -
 net/tipc/name_table.c | 64 +++++++++++++++---------------
 net/tipc/name_table.h |  8 ++--
 net/tipc/net.c        |  2 +-
 net/tipc/node.c       |  2 +-
 net/tipc/socket.c     |  4 +-
 7 files changed, 60 insertions(+), 111 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 8240a85b0d0c..51b4b96f89db 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -204,12 +204,12 @@ void tipc_named_node_up(struct net *net, u32 dnode)
  */
 static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct tipc_net *tn = tipc_net(net);
 	struct publication *p;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower,
-				     publ->node, publ->port, publ->key);
+	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper,
+				     publ->node, publ->key);
 	if (p)
 		tipc_node_unsubscribe(net, &p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
@@ -261,81 +261,37 @@ void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr)
 static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 				u32 node, u32 dtype)
 {
-	struct publication *publ = NULL;
+	struct publication *p = NULL;
+	u32 lower = ntohl(i->lower);
+	u32 upper = ntohl(i->upper);
+	u32 type = ntohl(i->type);
+	u32 port = ntohl(i->port);
+	u32 key = ntohl(i->key);
 
 	if (dtype == PUBLICATION) {
-		publ = tipc_nametbl_insert_publ(net, ntohl(i->type),
-						ntohl(i->lower),
-						ntohl(i->upper),
-						TIPC_CLUSTER_SCOPE, node,
-						ntohl(i->port), ntohl(i->key));
-		if (publ) {
-			tipc_node_subscribe(net, &publ->binding_node, node);
+		p = tipc_nametbl_insert_publ(net, type, lower, upper,
+					     TIPC_CLUSTER_SCOPE, node,
+					     port, key);
+		if (p) {
+			tipc_node_subscribe(net, &p->binding_node, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
-		publ = tipc_nametbl_remove_publ(net, ntohl(i->type),
-						ntohl(i->lower),
-						node, ntohl(i->port),
-						ntohl(i->key));
-		if (publ) {
-			tipc_node_unsubscribe(net, &publ->binding_node, node);
-			kfree_rcu(publ, rcu);
+		p = tipc_nametbl_remove_publ(net, type, lower,
+					     upper, node, key);
+		if (p) {
+			tipc_node_unsubscribe(net, &p->binding_node, node);
+			kfree_rcu(p, rcu);
 			return true;
 		}
+		pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
+				    type, lower, node);
 	} else {
 		pr_warn("Unrecognized name table message received\n");
 	}
 	return false;
 }
 
-/**
- * tipc_named_add_backlog - add a failed name table update to the backlog
- *
- */
-static void tipc_named_add_backlog(struct net *net, struct distr_item *i,
-				   u32 type, u32 node)
-{
-	struct distr_queue_item *e;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	unsigned long now = get_jiffies_64();
-
-	e = kzalloc(sizeof(*e), GFP_ATOMIC);
-	if (!e)
-		return;
-	e->dtype = type;
-	e->node = node;
-	e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout);
-	memcpy(e, i, sizeof(*i));
-	list_add_tail(&e->next, &tn->dist_queue);
-}
-
-/**
- * tipc_named_process_backlog - try to process any pending name table updates
- * from the network.
- */
-void tipc_named_process_backlog(struct net *net)
-{
-	struct distr_queue_item *e, *tmp;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	unsigned long now = get_jiffies_64();
-
-	list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) {
-		if (time_after(e->expires, now)) {
-			if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype))
-				continue;
-		} else {
-			pr_warn_ratelimited("Dropping name table update (%d) of {%u, %u, %u} from %x key=%u\n",
-					    e->dtype, ntohl(e->i.type),
-					    ntohl(e->i.lower),
-					    ntohl(e->i.upper),
-					    e->node, ntohl(e->i.key));
-		}
-		list_del(&e->next);
-		kfree(e);
-	}
-}
-
 /**
  * tipc_named_rcv - process name table update messages sent by another node
  */
@@ -358,12 +314,10 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq)
 		count = msg_data_sz(msg) / ITEM_SIZE;
 		node = msg_orignode(msg);
 		while (count--) {
-			if (!tipc_update_nametbl(net, item, node, mtype))
-				tipc_named_add_backlog(net, item, mtype, node);
+			tipc_update_nametbl(net, item, node, mtype);
 			item++;
 		}
 		kfree_skb(skb);
-		tipc_named_process_backlog(net);
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h
index 4753e628d7c4..63fc73e0fa6c 100644
--- a/net/tipc/name_distr.h
+++ b/net/tipc/name_distr.h
@@ -72,7 +72,6 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ);
 void tipc_named_node_up(struct net *net, u32 dnode);
 void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue);
 void tipc_named_reinit(struct net *net);
-void tipc_named_process_backlog(struct net *net);
 void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr);
 
 #endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 4bdc580c533b..b1fe20972aa9 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -171,10 +171,14 @@ static struct service_range *tipc_service_create_range(struct tipc_service *sc,
 		tmp = container_of(parent, struct service_range, tree_node);
 		if (lower < tmp->lower)
 			n = &(*n)->rb_left;
+		else if (lower > tmp->lower)
+			n = &(*n)->rb_right;
+		else if (upper < tmp->upper)
+			n = &(*n)->rb_left;
 		else if (upper > tmp->upper)
 			n = &(*n)->rb_right;
 		else
-			return NULL;
+			return tmp;
 	}
 	sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
 	if (!sr)
@@ -200,17 +204,11 @@ static struct publication *tipc_service_insert_publ(struct net *net,
 	struct publication *p;
 	bool first = false;
 
-	sr = tipc_service_find_range(sc, lower);
-	if (!sr) {
-		sr = tipc_service_create_range(sc, lower, upper);
-		if (!sr)
-			goto  err;
-		first = true;
-	}
+	sr = tipc_service_create_range(sc, lower, upper);
+	if (!sr)
+		goto  err;
 
-	/* Lower end overlaps existing entry, but we need an exact match */
-	if (sr->lower != lower || sr->upper != upper)
-		return NULL;
+	first = list_empty(&sr->all_publ);
 
 	/* Return if the publication already exists */
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
@@ -239,30 +237,32 @@ err:
 
 /**
  * tipc_service_remove_publ - remove a publication from a service
- *
- * NOTE: There may be cases where TIPC is asked to remove a publication
- * that is not in the name table.  For example, if another node issues a
- * publication for a name range that overlaps an existing name range
- * the publication will not be recorded, which means the publication won't
- * be found when the name range is later withdrawn by that node.
- * A failed withdraw request simply returns a failure indication and lets the
- * caller issue any error or warning messages associated with such a problem.
  */
 static struct publication *tipc_service_remove_publ(struct net *net,
 						    struct tipc_service *sc,
-						    u32 inst, u32 node,
-						    u32 port, u32 key)
+						    u32 lower, u32 upper,
+						    u32 node, u32 key)
 {
 	struct tipc_subscription *sub, *tmp;
 	struct service_range *sr;
 	struct publication *p;
 	bool found = false;
 	bool last = false;
+	struct rb_node *n;
 
-	sr = tipc_service_find_range(sc, inst);
+	sr = tipc_service_find_range(sc, lower);
 	if (!sr)
 		return NULL;
 
+	/* Find exact matching service range */
+	for (n = &sr->tree_node; n; n = rb_next(n)) {
+		sr = container_of(n, struct service_range, tree_node);
+		if (sr->upper == upper)
+			break;
+	}
+	if (!n || sr->lower != lower || sr->upper != upper)
+		return NULL;
+
 	/* Find publication, if it exists */
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
 		if (p->key != key || (node && node != p->node))
@@ -375,8 +375,8 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-						 u32 lower, u32 node, u32 port,
-						 u32 key)
+					     u32 lower, u32 upper,
+					     u32 node, u32 key)
 {
 	struct tipc_service *sc = tipc_service_find(net, type);
 	struct publication *p = NULL;
@@ -385,7 +385,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 		return NULL;
 
 	spin_lock_bh(&sc->lock);
-	p = tipc_service_remove_publ(net, sc, lower, node, port, key);
+	p = tipc_service_remove_publ(net, sc, lower, upper, node, key);
 
 	/* Delete service item if this no more publications and subscriptions */
 	if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
@@ -620,8 +620,6 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 	if (p) {
 		nt->local_publ_count++;
 		skb = tipc_named_publish(net, p);
-		/* Any pending external events? */
-		tipc_named_process_backlog(net);
 	}
 exit:
 	spin_unlock_bh(&tn->nametbl_lock);
@@ -635,7 +633,7 @@ exit:
  * tipc_nametbl_withdraw - withdraw a service binding
  */
 int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
-			  u32 port, u32 key)
+			  u32 upper, u32 key)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
@@ -645,17 +643,15 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	p = tipc_nametbl_remove_publ(net, type, lower, self, port, key);
+	p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key);
 	if (p) {
 		nt->local_publ_count--;
 		skb = tipc_named_withdraw(net, p);
-		/* Any pending external events? */
-		tipc_named_process_backlog(net);
 		list_del_init(&p->binding_sock);
 		kfree_rcu(p, rcu);
 	} else {
 		pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
-		       type, lower, port, key);
+		       type, lower, upper, key);
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
 
@@ -754,8 +750,8 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
 	rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
 		list_for_each_entry_safe(p, tmpb,
 					 &sr->all_publ, all_publ) {
-			tipc_service_remove_publ(net, sc, p->lower, p->node,
-						 p->port, p->key);
+			tipc_service_remove_publ(net, sc, p->lower, p->upper,
+						 p->node, p->key);
 			kfree_rcu(p, rcu);
 		}
 	}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 1b03b8751707..4b14fc28d9e2 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -116,16 +116,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
 			 struct list_head *dsts, int *dstcnt, u32 exclude,
 			 bool all);
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
-					 u32 upper, u32 scope, u32 port_ref,
+					 u32 upper, u32 scope, u32 port,
 					 u32 key);
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
+int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
 			  u32 key);
 struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 					     u32 lower, u32 upper, u32 scope,
 					     u32 node, u32 ref, u32 key);
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 node, u32 ref,
-					     u32 key);
+					     u32 lower, u32 upper,
+					     u32 node, u32 key);
 void tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 29538dc00857..856f9e97ea29 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -136,7 +136,7 @@ void tipc_net_stop(struct net *net)
 	if (!self)
 		return;
 
-	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, 0, self);
+	tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self);
 	rtnl_lock();
 	tipc_bearer_stop(net);
 	tipc_node_stop(net);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 85e777e00ec9..c77dd2f3c589 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -329,7 +329,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
 		tipc_mon_peer_down(net, addr, bearer_id);
 		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
-				      link_id, link_id);
+				      addr, link_id);
 	}
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 275b666f6231..3e5eba30865e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2634,12 +2634,12 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			if (publ->upper != seq->upper)
 				break;
 			tipc_nametbl_withdraw(net, publ->type, publ->lower,
-					      publ->port, publ->key);
+					      publ->upper, publ->key);
 			rc = 0;
 			break;
 		}
 		tipc_nametbl_withdraw(net, publ->type, publ->lower,
-				      publ->port, publ->key);
+				      publ->upper, publ->key);
 		rc = 0;
 	}
 	if (list_empty(&tsk->publications))

From 7a74d39cc2927302bc236397c1fdb1fe5be209ce Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:44 +0200
Subject: [PATCH 1574/1678] tipc: tipc: rename address types in user api

The three address type structs in the user API have names that in
reality reflect the specific, non-Linux environment where they were
originally created.

We now give them more intuitive names, in accordance with how TIPC is
described in the current documentation.

struct tipc_portid   -> struct tipc_socket_addr
struct tipc_name     -> struct tipc_service_addr
struct tipc_name_seq -> struct tipc_service_range

To avoid confusion, we also update some commmets and macro names to
 match the new terminology.

For compatibility, we add macros that map all old names to the new ones.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 57 ++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 4ac9f1f02b06..156224ac3d74 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -45,33 +45,33 @@
  * TIPC addressing primitives
  */
 
-struct tipc_portid {
+struct tipc_socket_addr {
 	__u32 ref;
 	__u32 node;
 };
 
-struct tipc_name {
+struct tipc_service_addr {
 	__u32 type;
 	__u32 instance;
 };
 
-struct tipc_name_seq {
+struct tipc_service_range {
 	__u32 type;
 	__u32 lower;
 	__u32 upper;
 };
 
 /*
- * Application-accessible port name types
+ * Application-accessible service types
  */
 
-#define TIPC_CFG_SRV		0	/* configuration service name type */
-#define TIPC_TOP_SRV		1	/* topology service name type */
-#define TIPC_LINK_STATE		2	/* link state name type */
-#define TIPC_RESERVED_TYPES	64	/* lowest user-publishable name type */
+#define TIPC_NODE_STATE		0	/* node state service type */
+#define TIPC_TOP_SRV		1	/* topology server service type */
+#define TIPC_LINK_STATE		2	/* link state service type */
+#define TIPC_RESERVED_TYPES	64	/* lowest user-allowed service type */
 
 /*
- * Publication scopes when binding port names and port name sequences
+ * Publication scopes when binding service / service range
  */
 enum tipc_scope {
 	TIPC_CLUSTER_SCOPE = 2, /* 0 can also be used */
@@ -108,28 +108,28 @@ enum tipc_scope {
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_PORTS		0x01	/* filter for port availability */
-#define TIPC_SUB_SERVICE	0x02	/* filter for service availability */
-#define TIPC_SUB_CANCEL		0x04	/* cancel a subscription */
+#define TIPC_SUB_PORTS          0x01    /* filter: evt at each match */
+#define TIPC_SUB_SERVICE        0x02    /* filter: evt at first up/last down */
+#define TIPC_SUB_CANCEL         0x04    /* filter: cancel a subscription */
 
 #define TIPC_WAIT_FOREVER	(~0)	/* timeout for permanent subscription */
 
 struct tipc_subscr {
-	struct tipc_name_seq seq;	/* name sequence of interest */
+	struct tipc_service_range seq;	/* range of interest */
 	__u32 timeout;			/* subscription duration (in ms) */
 	__u32 filter;			/* bitmask of filter options */
 	char usr_handle[8];		/* available for subscriber use */
 };
 
 #define TIPC_PUBLISHED		1	/* publication event */
-#define TIPC_WITHDRAWN		2	/* withdraw event */
+#define TIPC_WITHDRAWN		2	/* withdrawal event */
 #define TIPC_SUBSCR_TIMEOUT	3	/* subscription timeout event */
 
 struct tipc_event {
 	__u32 event;			/* event type */
-	__u32 found_lower;		/* matching name seq instances */
-	__u32 found_upper;		/*    "      "    "     "      */
-	struct tipc_portid port;	/* associated port */
+	__u32 found_lower;		/* matching range */
+	__u32 found_upper;		/*    "      "    */
+	struct tipc_socket_addr port;	/* associated socket */
 	struct tipc_subscr s;		/* associated subscription */
 };
 
@@ -149,20 +149,20 @@ struct tipc_event {
 #define SOL_TIPC	271
 #endif
 
-#define TIPC_ADDR_NAMESEQ	1
-#define TIPC_ADDR_MCAST		1
-#define TIPC_ADDR_NAME		2
-#define TIPC_ADDR_ID		3
+#define TIPC_ADDR_MCAST         1
+#define TIPC_SERVICE_RANGE      1
+#define TIPC_SERVICE_ADDR       2
+#define TIPC_SOCKET_ADDR        3
 
 struct sockaddr_tipc {
 	unsigned short family;
 	unsigned char  addrtype;
 	signed   char  scope;
 	union {
-		struct tipc_portid id;
-		struct tipc_name_seq nameseq;
+		struct tipc_socket_addr id;
+		struct tipc_service_range nameseq;
 		struct {
-			struct tipc_name name;
+			struct tipc_service_addr name;
 			__u32 domain;
 		} name;
 	} addr;
@@ -230,8 +230,13 @@ struct tipc_sioc_ln_req {
 /* The macros and functions below are deprecated:
  */
 
+#define TIPC_CFG_SRV		0
 #define TIPC_ZONE_SCOPE         1
 
+#define TIPC_ADDR_NAMESEQ	1
+#define TIPC_ADDR_NAME		2
+#define TIPC_ADDR_ID		3
+
 #define TIPC_NODE_BITS          12
 #define TIPC_CLUSTER_BITS       12
 #define TIPC_ZONE_BITS          8
@@ -250,6 +255,10 @@ struct tipc_sioc_ln_req {
 
 #define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK)
 
+#define tipc_portid tipc_socket_addr
+#define tipc_name tipc_service_addr
+#define tipc_name_seq tipc_service_range
+
 static inline __u32 tipc_addr(unsigned int zone,
 			      unsigned int cluster,
 			      unsigned int node)

From 7494cfa6d36d1556f17baa012dd93833620783db Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Thu, 29 Mar 2018 23:20:45 +0200
Subject: [PATCH 1575/1678] tipc: avoid possible string overflow

gcc points out that the combined length of the fixed-length inputs to
l->name is larger than the destination buffer size:

net/tipc/link.c: In function 'tipc_link_create':
net/tipc/link.c:465:26: error: '%s' directive writing up to 32 bytes
into a region of size between 26 and 58 [-Werror=format-overflow=]
sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);

net/tipc/link.c:465:2: note: 'sprintf' output 11 or more bytes
(assuming 75) into a destination of size 60
sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);

A detailed analysis reveals that the theoretical maximum length of
a link name is:
max self_str + 1 + max if_name + 1 + max peer_str + 1 + max if_name =
16 + 1 + 15 + 1 + 16 + 1 + 15 = 65
Since we also need space for a trailing zero we now set MAX_LINK_NAME
to 68.

Just to be on the safe side we also replace the sprintf() call with
snprintf().

Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address
hash values")
Reported-by: Arnd Bergmann <arnd@arndb.de>

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 2 +-
 net/tipc/link.c           | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 156224ac3d74..bf6d28677cfe 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -216,7 +216,7 @@ struct tipc_group_req {
 #define TIPC_MAX_MEDIA_NAME	16
 #define TIPC_MAX_IF_NAME	16
 #define TIPC_MAX_BEARER_NAME	32
-#define TIPC_MAX_LINK_NAME	60
+#define TIPC_MAX_LINK_NAME	68
 
 #define SIOCGETLINKNAME		SIOCPROTOPRIVATE
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 8f2a9496439b..695acb783969 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -462,7 +462,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 			sprintf(peer_str, "%x", peer);
 	}
 	/* Peer i/f name will be completed by reset/activate message */
-	sprintf(l->name, "%s:%s-%s:unknown", self_str, if_name, peer_str);
+	snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown",
+		 self_str, if_name, peer_str);
 
 	strcpy(l->if_name, if_name);
 	l->addr = peer;

From ef817102586cca428d6fe0803cc232a3c929141f Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 30 Mar 2018 09:28:51 -0700
Subject: [PATCH 1576/1678] netdevsim: Change nsim_devlink_setup to return
 error to caller

Change nsim_devlink_setup to return any error back to the caller and
update nsim_init to handle it.

Requested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/devlink.c   | 12 +++++++-----
 drivers/net/netdevsim/netdev.c    |  6 +++++-
 drivers/net/netdevsim/netdevsim.h |  5 +++--
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netdevsim/devlink.c b/drivers/net/netdevsim/devlink.c
index bbdcf064ba10..27ae05c5fdaf 100644
--- a/drivers/net/netdevsim/devlink.c
+++ b/drivers/net/netdevsim/devlink.c
@@ -218,22 +218,22 @@ void nsim_devlink_teardown(struct netdevsim *ns)
 	}
 }
 
-void nsim_devlink_setup(struct netdevsim *ns)
+int nsim_devlink_setup(struct netdevsim *ns)
 {
 	struct net *net = nsim_to_net(ns);
 	bool *reg_devlink = net_generic(net, nsim_devlink_id);
 	struct devlink *devlink;
-	int err = -ENOMEM;
+	int err;
 
 	/* only one device per namespace controls devlink */
 	if (!*reg_devlink) {
 		ns->devlink = NULL;
-		return;
+		return 0;
 	}
 
 	devlink = devlink_alloc(&nsim_devlink_ops, 0);
 	if (!devlink)
-		return;
+		return -ENOMEM;
 
 	err = devlink_register(devlink, &ns->dev);
 	if (err)
@@ -247,12 +247,14 @@ void nsim_devlink_setup(struct netdevsim *ns)
 
 	*reg_devlink = false;
 
-	return;
+	return 0;
 
 err_dl_unregister:
 	devlink_unregister(devlink);
 err_devlink_free:
 	devlink_free(devlink);
+
+	return err;
 }
 
 /* Initialize per network namespace state */
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 8b30ab3ea2c2..ec68f38213d9 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -167,10 +167,14 @@ static int nsim_init(struct net_device *dev)
 
 	SET_NETDEV_DEV(dev, &ns->dev);
 
-	nsim_devlink_setup(ns);
+	err = nsim_devlink_setup(ns);
+	if (err)
+		goto err_unreg_dev;
 
 	return 0;
 
+err_unreg_dev:
+	device_unregister(&ns->dev);
 err_bpf_uninit:
 	nsim_bpf_uninit(ns);
 err_debugfs_destroy:
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index afb8cf90c0fd..3a8581af3b85 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -117,7 +117,7 @@ enum nsim_resource_id {
 	NSIM_RESOURCE_IPV6_FIB_RULES,
 };
 
-void nsim_devlink_setup(struct netdevsim *ns);
+int nsim_devlink_setup(struct netdevsim *ns);
 void nsim_devlink_teardown(struct netdevsim *ns);
 
 int nsim_devlink_init(void);
@@ -128,8 +128,9 @@ void nsim_fib_exit(void);
 u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max);
 int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val);
 #else
-static inline void nsim_devlink_setup(struct netdevsim *ns)
+static inline int nsim_devlink_setup(struct netdevsim *ns)
 {
+	return 0;
 }
 
 static inline void nsim_devlink_teardown(struct netdevsim *ns)

From c679f6a26dc3d450b76e94717850a14ff706cc58 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 30 Mar 2018 02:24:28 +0000
Subject: [PATCH 1577/1678] net: hns3: remove unnecessary pci_set_drvdata() and
 devm_kfree()

There is no need for explicit calls of devm_kfree(), as the allocated
memory will be freed during driver's detach.

The driver core clears the driver data to NULL after device_release.
Thus, it is not needed to manually clear the device driver data to NULL.

So remove the unnecessary pci_set_drvdata() and devm_kfree().

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index a31b4adf6e6a..8c55965a66ac 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1614,10 +1614,6 @@ static void hns3_remove(struct pci_dev *pdev)
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 
 	hnae3_unregister_ae_dev(ae_dev);
-
-	devm_kfree(&pdev->dev, ae_dev);
-
-	pci_set_drvdata(pdev, NULL);
 }
 
 static struct pci_driver hns3_driver = {

From fc1dd36992bb041b4470120aecf8986910c56088 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 30 Mar 2018 19:38:27 +0300
Subject: [PATCH 1578/1678] net: Remove net_rwsem from {,
 un}register_netdevice_notifier()

These functions take net_rwsem, while wireless_nlevent_flush()
also takes it. But down_read() can't be taken recursive,
because of rw_semaphore design, which prevents it to be occupied
by only readers forever.

Since we take pernet_ops_rwsem in {,un}register_netdevice_notifier(),
net list can't change, so these down_read()/up_read() can be removed.

Fixes: f0b07bb151b0 "net: Introduce net_rwsem to protect net_namespace_list"
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 07da7add4845..8edb58829124 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1633,7 +1633,6 @@ int register_netdevice_notifier(struct notifier_block *nb)
 		goto unlock;
 	if (dev_boot_phase)
 		goto unlock;
-	down_read(&net_rwsem);
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
@@ -1647,7 +1646,6 @@ int register_netdevice_notifier(struct notifier_block *nb)
 			call_netdevice_notifier(nb, NETDEV_UP, dev);
 		}
 	}
-	up_read(&net_rwsem);
 
 unlock:
 	rtnl_unlock();
@@ -1671,7 +1669,6 @@ rollback:
 	}
 
 outroll:
-	up_read(&net_rwsem);
 	raw_notifier_chain_unregister(&netdev_chain, nb);
 	goto unlock;
 }
@@ -1704,7 +1701,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 	if (err)
 		goto unlock;
 
-	down_read(&net_rwsem);
 	for_each_net(net) {
 		for_each_netdev(net, dev) {
 			if (dev->flags & IFF_UP) {
@@ -1715,7 +1711,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
 		}
 	}
-	up_read(&net_rwsem);
 unlock:
 	rtnl_unlock();
 	up_write(&pernet_ops_rwsem);

From 554873e517115c4b7207259f1cadfd77d90b5395 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 30 Mar 2018 19:38:37 +0300
Subject: [PATCH 1579/1678] net: Do not take net_rwsem in
 __rtnl_link_unregister()

This function calls call_netdevice_notifier(), which also
may take net_rwsem. So, we can't use net_rwsem here.

This patch makes callers of this functions take pernet_ops_rwsem,
like register_netdevice_notifier() does. This will protect
the modifications of net_namespace_list, and allows notifiers
to take it (they won't have to care about context).

Since __rtnl_link_unregister() is used on module load
and unload (which are not frequent operations), this looks
for me better, than make all call_netdevice_notifier()
always executing in "protected net_namespace_list" context.

Also, this fixes the problem we had a deal in 328fbe747ad4
"Close race between {un, }register_netdevice_notifier and ...",
and guarantees __rtnl_link_unregister() does not skip
exitting net.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dummy.c      | 2 ++
 drivers/net/ifb.c        | 2 ++
 net/core/net_namespace.c | 1 +
 net/core/rtnetlink.c     | 6 +++---
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 30b1c8512049..0d15a12a4560 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -219,6 +219,7 @@ static int __init dummy_init_module(void)
 {
 	int i, err = 0;
 
+	down_write(&pernet_ops_rwsem);
 	rtnl_lock();
 	err = __rtnl_link_register(&dummy_link_ops);
 	if (err < 0)
@@ -233,6 +234,7 @@ static int __init dummy_init_module(void)
 
 out:
 	rtnl_unlock();
+	up_write(&pernet_ops_rwsem);
 
 	return err;
 }
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 0008da7e9d4c..5f2897ec0edc 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -330,6 +330,7 @@ static int __init ifb_init_module(void)
 {
 	int i, err;
 
+	down_write(&pernet_ops_rwsem);
 	rtnl_lock();
 	err = __rtnl_link_register(&ifb_link_ops);
 	if (err < 0)
@@ -344,6 +345,7 @@ static int __init ifb_init_module(void)
 
 out:
 	rtnl_unlock();
+	up_write(&pernet_ops_rwsem);
 
 	return err;
 }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7fdf321d4997..a11e03f920d3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -51,6 +51,7 @@ static bool init_net_initialized;
  * outside.
  */
 DECLARE_RWSEM(pernet_ops_rwsem);
+EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
 
 #define MIN_PERNET_OPS_ID	\
 	((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e86b28482ca7..45936922d7e2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -412,17 +412,17 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
  * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
  * @ops: struct rtnl_link_ops * to unregister
  *
- * The caller must hold the rtnl_mutex.
+ * The caller must hold the rtnl_mutex and guarantee net_namespace_list
+ * integrity (hold pernet_ops_rwsem for writing to close the race
+ * with setup_net() and cleanup_net()).
  */
 void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
 	struct net *net;
 
-	down_read(&net_rwsem);
 	for_each_net(net) {
 		__rtnl_kill_links(net, ops);
 	}
-	up_read(&net_rwsem);
 	list_del(&ops->list);
 }
 EXPORT_SYMBOL_GPL(__rtnl_link_unregister);

From 49b44aa23e58d292cc1be79a5c7883ac8c927676 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 30 Mar 2018 12:37:30 -0700
Subject: [PATCH 1580/1678] ethernet: hisilicon: hns: hns_dsaf_mac: Use generic
 eth_broadcast_addr

Rather than use an on-stack array to copy a broadcast address, use
the generic eth_broadcast_addr function to save a trivial amount of
object code.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index cac86e9ae0dd..9dcc5765f11f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -369,7 +369,6 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 {
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
-	u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct dsaf_drv_mac_single_dest_entry mac_entry;
 
 	/* directy return ok in debug network mode */
@@ -377,7 +376,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 		return 0;
 
 	if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
-		memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
+		eth_broadcast_addr(mac_entry.addr);
 		mac_entry.in_vlan_id = vlan_id;
 		mac_entry.in_port_num = mac_cb->mac_id;
 		mac_entry.port_num = port_num;
@@ -404,7 +403,6 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
 	u8 port_num;
-	u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct mac_entry_idx *uc_mac_entry;
 	struct dsaf_drv_mac_single_dest_entry mac_entry;
 
@@ -414,7 +412,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
 	uc_mac_entry = &mac_cb->addr_entry_idx[vmid];
 
 	if (!HNS_DSAF_IS_DEBUG(dsaf_dev))  {
-		memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
+		eth_broadcast_addr(mac_entry.addr);
 		mac_entry.in_vlan_id = uc_mac_entry->vlan_id;
 		mac_entry.in_port_num = mac_cb->mac_id;
 		ret = hns_mac_get_inner_port_num(mac_cb, vmid, &port_num);

From 3be9b5fdc6379faf6f23cd8539ef9a6235396c5f Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Fri, 30 Mar 2018 13:57:59 -0700
Subject: [PATCH 1581/1678] hv_netvsc: Clean up extra parameter from
 rndis_filter_receive_data()

The variables, msg and data, have the same value. This patch removes
the extra one.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/rndis_filter.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 4a4952363e8a..e2b68d9328a7 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -365,14 +365,15 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
 
 static int rndis_filter_receive_data(struct net_device *ndev,
 				     struct netvsc_device *nvdev,
-				     struct rndis_message *msg,
 				     struct vmbus_channel *channel,
-				     void *data, u32 data_buflen)
+				     struct rndis_message *msg,
+				     u32 data_buflen)
 {
 	struct rndis_packet *rndis_pkt = &msg->msg.pkt;
 	const struct ndis_tcp_ip_checksum_info *csum_info;
 	const struct ndis_pkt_8021q_info *vlan;
 	u32 data_offset;
+	void *data;
 
 	/* Remove the rndis header and pass it back up the stack */
 	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
@@ -393,14 +394,15 @@ static int rndis_filter_receive_data(struct net_device *ndev,
 
 	vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
 
+	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
+
+	data = (void *)msg + data_offset;
+
 	/*
 	 * Remove the rndis trailer padding from rndis packet message
 	 * rndis_pkt->data_len tell us the real data length, we only copy
 	 * the data packet to the stack, without the rndis trailer padding
 	 */
-	data = (void *)((unsigned long)data + data_offset);
-	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
-
 	return netvsc_recv_callback(ndev, nvdev, channel,
 				    data, rndis_pkt->data_len,
 				    csum_info, vlan);
@@ -419,8 +421,8 @@ int rndis_filter_receive(struct net_device *ndev,
 
 	switch (rndis_msg->ndis_msg_type) {
 	case RNDIS_MSG_PACKET:
-		return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
-						 channel, data, buflen);
+		return rndis_filter_receive_data(ndev, net_dev, channel,
+						 rndis_msg, buflen);
 	case RNDIS_MSG_INIT_C:
 	case RNDIS_MSG_QUERY_C:
 	case RNDIS_MSG_SET_C:

From eeb0a2a526d816253705977033e98fdfbab32387 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Sat, 31 Mar 2018 06:11:41 +0000
Subject: [PATCH 1582/1678] vlan: vlan_hw_filter_capable() can be static

Fixes the following sparse warning:

net/8021q/vlan_core.c:168:6: warning:
 symbol 'vlan_hw_filter_capable' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c8d7abdc0463..4f60e86f4b8d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -165,7 +165,7 @@ struct vlan_vid_info {
 	int refcount;
 };
 
-bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto)
+static bool vlan_hw_filter_capable(const struct net_device *dev, __be16 proto)
 {
 	if (proto == htons(ETH_P_8021Q) &&
 	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)

From d4f52de02f04f0a7dfcb1d0228a2ff58b06aa230 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:06 -0400
Subject: [PATCH 1583/1678] bnxt_en: Update firmware interface to 1.9.1.15.

Minor changes, such as new extended port statistics.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  15 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |   4 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h |   5 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 289 ++++++++++++------
 4 files changed, 210 insertions(+), 103 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3ff5f65758a3..45d45321acb2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3577,9 +3577,13 @@ static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
 			    FUNC_DRV_RGTR_REQ_ENABLES_VER);
 
 	req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
-	req.ver_maj = DRV_VER_MAJ;
-	req.ver_min = DRV_VER_MIN;
-	req.ver_upd = DRV_VER_UPD;
+	req.flags = cpu_to_le32(FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE);
+	req.ver_maj_8b = DRV_VER_MAJ;
+	req.ver_min_8b = DRV_VER_MIN;
+	req.ver_upd_8b = DRV_VER_UPD;
+	req.ver_maj = cpu_to_le16(DRV_VER_MAJ);
+	req.ver_min = cpu_to_le16(DRV_VER_MIN);
+	req.ver_upd = cpu_to_le16(DRV_VER_UPD);
 
 	if (BNXT_PF(bp)) {
 		u32 data[8];
@@ -5418,10 +5422,9 @@ static int bnxt_hwrm_set_cache_line_size(struct bnxt *bp, int size)
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
 	req.fid = cpu_to_le16(0xffff);
 	req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_CACHE_LINESIZE);
-	req.cache_linesize = FUNC_QCFG_RESP_CACHE_LINESIZE_CACHE_LINESIZE_64;
+	req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64;
 	if (size == 128)
-		req.cache_linesize =
-			FUNC_QCFG_RESP_CACHE_LINESIZE_CACHE_LINESIZE_128;
+		req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128;
 
 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 960162c9386c..c0c548feea98 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -12,11 +12,11 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.9.0"
+#define DRV_MODULE_VERSION	"1.9.1"
 
 #define DRV_VER_MAJ	1
 #define DRV_VER_MIN	9
-#define DRV_VER_UPD	0
+#define DRV_VER_UPD	1
 
 #include <linux/interrupt.h>
 #include <linux/rhashtable.h>
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
index d2e0af960bf5..69efde785f23 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
@@ -1,7 +1,7 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
- * Copyright (c) 2016-2017 Broadcom Limited
+ * Copyright (c) 2016-2018 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,7 +34,8 @@ struct bnxt_cos2bw_cfg {
 };
 
 #define BNXT_LLQ(q_profile)	\
-	((q_profile) == QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS)
+	((q_profile) ==		\
+	 QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_ROCE)
 
 #define HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL	0x0300
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 82d17f8cc0db..0fe0ea8dce6c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -188,6 +188,7 @@ struct cmd_nums {
 	#define HWRM_STAT_CTX_FREE                        0xb1UL
 	#define HWRM_STAT_CTX_QUERY                       0xb2UL
 	#define HWRM_STAT_CTX_CLR_STATS                   0xb3UL
+	#define HWRM_PORT_QSTATS_EXT                      0xb4UL
 	#define HWRM_FW_RESET                             0xc0UL
 	#define HWRM_FW_QSTATUS                           0xc1UL
 	#define HWRM_FW_SET_TIME                          0xc8UL
@@ -199,6 +200,7 @@ struct cmd_nums {
 	#define HWRM_REJECT_FWD_RESP                      0xd1UL
 	#define HWRM_FWD_RESP                             0xd2UL
 	#define HWRM_FWD_ASYNC_EVENT_CMPL                 0xd3UL
+	#define HWRM_OEM_CMD                              0xd4UL
 	#define HWRM_TEMP_MONITOR_QUERY                   0xe0UL
 	#define HWRM_WOL_FILTER_ALLOC                     0xf0UL
 	#define HWRM_WOL_FILTER_FREE                      0xf1UL
@@ -271,6 +273,7 @@ struct cmd_nums {
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
 	#define HWRM_SELFTEST_RETRIEVE_SERDES_DATA        0x203UL
+	#define HWRM_PCIE_QSTATS                          0x204UL
 	#define HWRM_DBG_READ_DIRECT                      0xff10UL
 	#define HWRM_DBG_READ_INDIRECT                    0xff11UL
 	#define HWRM_DBG_WRITE_DIRECT                     0xff12UL
@@ -341,9 +344,9 @@ struct hwrm_err_output {
 #define HWRM_RESP_VALID_KEY 1
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 9
-#define HWRM_VERSION_UPDATE 0
-#define HWRM_VERSION_RSVD 0
-#define HWRM_VERSION_STR "1.9.0.0"
+#define HWRM_VERSION_UPDATE 1
+#define HWRM_VERSION_RSVD 15
+#define HWRM_VERSION_STR "1.9.1.15"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -616,30 +619,6 @@ struct hwrm_async_event_cmpl_link_speed_cfg_change {
 	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_ILLEGAL_LINK_SPEED_CFG           0x20000UL
 };
 
-/* hwrm_async_event_cmpl_pf_drvr_unload (size:128b/16B) */
-struct hwrm_async_event_cmpl_pf_drvr_unload {
-	__le16	type;
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK            0x3fUL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT             0
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT  0x2eUL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_LAST             ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT
-	__le16	event_id;
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_LAST          ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD
-	__le32	event_data2;
-	u8	opaque_v;
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V          0x1UL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_OPAQUE_MASK 0xfeUL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_OPAQUE_SFT 1
-	u8	timestamp_lo;
-	__le16	timestamp_hi;
-	__le32	event_data1;
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_SFT 0
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_PORT_MASK   0x70000UL
-	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_PORT_SFT    16
-};
-
 /* hwrm_async_event_cmpl_vf_cfg_change (size:128b/16B) */
 struct hwrm_async_event_cmpl_vf_cfg_change {
 	__le16	type;
@@ -854,6 +833,7 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_NVGRE_TUN_FLAGS_SUPPORTED      0x2000UL
 	#define FUNC_QCAPS_RESP_FLAGS_GRE_TUN_FLAGS_SUPPORTED        0x4000UL
 	#define FUNC_QCAPS_RESP_FLAGS_MPLS_TUN_FLAGS_SUPPORTED       0x8000UL
+	#define FUNC_QCAPS_RESP_FLAGS_PCIE_STATS_SUPPORTED           0x10000UL
 	u8	mac_address[6];
 	__le16	max_rsscos_ctx;
 	__le16	max_cmpl_rings;
@@ -966,10 +946,14 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_EVB_MODE_VEB    0x1UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEPA   0x2UL
 	#define FUNC_QCFG_RESP_EVB_MODE_LAST  FUNC_QCFG_RESP_EVB_MODE_VEPA
-	u8	cache_linesize;
-	#define FUNC_QCFG_RESP_CACHE_LINESIZE_CACHE_LINESIZE_64  0x0UL
-	#define FUNC_QCFG_RESP_CACHE_LINESIZE_CACHE_LINESIZE_128 0x1UL
-	#define FUNC_QCFG_RESP_CACHE_LINESIZE_LAST              FUNC_QCFG_RESP_CACHE_LINESIZE_CACHE_LINESIZE_128
+	u8	options;
+	#define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_MASK    0x3UL
+	#define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SFT     0
+	#define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SIZE_64   0x0UL
+	#define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SIZE_128  0x1UL
+	#define FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_LAST     FUNC_QCFG_RESP_OPTIONS_CACHE_LINESIZE_SIZE_128
+	#define FUNC_QCFG_RESP_OPTIONS_RSVD_MASK              0xfcUL
+	#define FUNC_QCFG_RESP_OPTIONS_RSVD_SFT               2
 	__le16	alloc_vfs;
 	__le32	alloc_mcast_filters;
 	__le32	alloc_hw_ring_grps;
@@ -1124,10 +1108,14 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_EVB_MODE_VEB    0x1UL
 	#define FUNC_CFG_REQ_EVB_MODE_VEPA   0x2UL
 	#define FUNC_CFG_REQ_EVB_MODE_LAST  FUNC_CFG_REQ_EVB_MODE_VEPA
-	u8	cache_linesize;
-	#define FUNC_CFG_REQ_CACHE_LINESIZE_CACHE_LINESIZE_64  0x0UL
-	#define FUNC_CFG_REQ_CACHE_LINESIZE_CACHE_LINESIZE_128 0x1UL
-	#define FUNC_CFG_REQ_CACHE_LINESIZE_LAST              FUNC_CFG_REQ_CACHE_LINESIZE_CACHE_LINESIZE_128
+	u8	options;
+	#define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_MASK    0x3UL
+	#define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SFT     0
+	#define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64   0x0UL
+	#define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128  0x1UL
+	#define FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_LAST     FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128
+	#define FUNC_CFG_REQ_OPTIONS_RSVD_MASK              0xfcUL
+	#define FUNC_CFG_REQ_OPTIONS_RSVD_SFT               2
 	__le16	num_mcast_filters;
 };
 
@@ -1248,7 +1236,7 @@ struct hwrm_func_vf_vnic_ids_query_output {
 	u8	valid;
 };
 
-/* hwrm_func_drv_rgtr_input (size:832b/104B) */
+/* hwrm_func_drv_rgtr_input (size:896b/112B) */
 struct hwrm_func_drv_rgtr_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -1256,8 +1244,9 @@ struct hwrm_func_drv_rgtr_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE      0x1UL
-	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE     0x2UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_ALL_MODE       0x1UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FWD_NONE_MODE      0x2UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE     0x4UL
 	__le32	enables;
 	#define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE             0x1UL
 	#define FUNC_DRV_RGTR_REQ_ENABLES_VER                 0x2UL
@@ -1277,14 +1266,18 @@ struct hwrm_func_drv_rgtr_input {
 	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 0x74UL
 	#define FUNC_DRV_RGTR_REQ_OS_TYPE_UEFI      0x8000UL
 	#define FUNC_DRV_RGTR_REQ_OS_TYPE_LAST     FUNC_DRV_RGTR_REQ_OS_TYPE_UEFI
-	u8	ver_maj;
-	u8	ver_min;
-	u8	ver_upd;
+	u8	ver_maj_8b;
+	u8	ver_min_8b;
+	u8	ver_upd_8b;
 	u8	unused_0[3];
 	__le32	timestamp;
 	u8	unused_1[4];
 	__le32	vf_req_fwd[8];
 	__le32	async_event_fwd[8];
+	__le16	ver_maj;
+	__le16	ver_min;
+	__le16	ver_upd;
+	__le16	ver_patch;
 };
 
 /* hwrm_func_drv_rgtr_output (size:128b/16B) */
@@ -1379,7 +1372,7 @@ struct hwrm_func_drv_qver_input {
 	u8	unused_0[2];
 };
 
-/* hwrm_func_drv_qver_output (size:128b/16B) */
+/* hwrm_func_drv_qver_output (size:192b/24B) */
 struct hwrm_func_drv_qver_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1398,11 +1391,15 @@ struct hwrm_func_drv_qver_output {
 	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 0x74UL
 	#define FUNC_DRV_QVER_RESP_OS_TYPE_UEFI      0x8000UL
 	#define FUNC_DRV_QVER_RESP_OS_TYPE_LAST     FUNC_DRV_QVER_RESP_OS_TYPE_UEFI
-	u8	ver_maj;
-	u8	ver_min;
-	u8	ver_upd;
+	u8	ver_maj_8b;
+	u8	ver_min_8b;
+	u8	ver_upd_8b;
 	u8	unused_0[2];
 	u8	valid;
+	__le16	ver_maj;
+	__le16	ver_min;
+	__le16	ver_upd;
+	__le16	ver_patch;
 };
 
 /* hwrm_func_resource_qcaps_input (size:192b/24B) */
@@ -1416,7 +1413,7 @@ struct hwrm_func_resource_qcaps_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_func_resource_qcaps_output (size:384b/48B) */
+/* hwrm_func_resource_qcaps_output (size:448b/56B) */
 struct hwrm_func_resource_qcaps_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1425,9 +1422,10 @@ struct hwrm_func_resource_qcaps_output {
 	__le16	max_vfs;
 	__le16	max_msix;
 	__le16	vf_reservation_strategy;
-	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MAXIMAL 0x0UL
-	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL 0x1UL
-	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_LAST   FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL
+	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MAXIMAL        0x0UL
+	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL        0x1UL
+	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL_STATIC 0x2UL
+	#define FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_LAST          FUNC_RESOURCE_QCAPS_RESP_VF_RESERVATION_STRATEGY_MINIMAL_STATIC
 	__le16	min_rsscos_ctx;
 	__le16	max_rsscos_ctx;
 	__le16	min_cmpl_rings;
@@ -1444,7 +1442,8 @@ struct hwrm_func_resource_qcaps_output {
 	__le16	max_stat_ctx;
 	__le16	min_hw_ring_grps;
 	__le16	max_hw_ring_grps;
-	u8	unused_0;
+	__le16	max_tx_scheduler_inputs;
+	u8	unused_0[7];
 	u8	valid;
 };
 
@@ -1627,6 +1626,16 @@ struct hwrm_port_phy_cfg_output {
 	u8	valid;
 };
 
+/* hwrm_port_phy_cfg_cmd_err (size:64b/8B) */
+struct hwrm_port_phy_cfg_cmd_err {
+	u8	code;
+	#define PORT_PHY_CFG_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define PORT_PHY_CFG_CMD_ERR_CODE_ILLEGAL_SPEED 0x1UL
+	#define PORT_PHY_CFG_CMD_ERR_CODE_RETRY         0x2UL
+	#define PORT_PHY_CFG_CMD_ERR_CODE_LAST         PORT_PHY_CFG_CMD_ERR_CODE_RETRY
+	u8	unused_0[7];
+};
+
 /* hwrm_port_phy_qcfg_input (size:192b/24B) */
 struct hwrm_port_phy_qcfg_input {
 	__le16	req_type;
@@ -2030,6 +2039,33 @@ struct hwrm_port_qstats_output {
 	u8	valid;
 };
 
+/* hwrm_port_qstats_ext_input (size:320b/40B) */
+struct hwrm_port_qstats_ext_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	port_id;
+	__le16	tx_stat_size;
+	__le16	rx_stat_size;
+	u8	unused_0[2];
+	__le64	tx_stat_host_addr;
+	__le64	rx_stat_host_addr;
+};
+
+/* hwrm_port_qstats_ext_output (size:128b/16B) */
+struct hwrm_port_qstats_ext_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	tx_stat_size;
+	__le16	rx_stat_size;
+	u8	unused_0[3];
+	u8	valid;
+};
+
 /* hwrm_port_lpbk_qstats_input (size:128b/16B) */
 struct hwrm_port_lpbk_qstats_input {
 	__le16	req_type;
@@ -2552,7 +2588,11 @@ struct hwrm_queue_qportcfg_input {
 	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX    0x1UL
 	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX
 	__le16	port_id;
-	u8	unused_0[2];
+	u8	drv_qmap_cap;
+	#define QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_DISABLED 0x0UL
+	#define QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_ENABLED  0x1UL
+	#define QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_LAST    QUEUE_QPORTCFG_REQ_DRV_QMAP_CAP_ENABLED
+	u8	unused_0;
 };
 
 /* hwrm_queue_qportcfg_output (size:256b/32B) */
@@ -2571,52 +2611,68 @@ struct hwrm_queue_qportcfg_output {
 	u8	queue_cos2bw_cfg_allowed;
 	u8	queue_id0;
 	u8	queue_id0_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id1;
 	u8	queue_id1_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id2;
 	u8	queue_id2_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id3;
 	u8	queue_id3_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id4;
 	u8	queue_id4_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id5;
 	u8	queue_id5_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id6;
 	u8	queue_id6_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN
 	u8	queue_id7;
 	u8	queue_id7_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY    0x0UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN  0xffUL
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST    QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY          0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_ROCE  0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY_ROCE_CNP 0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN        0xffUL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN
 	u8	valid;
 };
 
@@ -5180,6 +5236,29 @@ struct hwrm_stat_ctx_clr_stats_output {
 	u8	valid;
 };
 
+/* hwrm_pcie_qstats_input (size:256b/32B) */
+struct hwrm_pcie_qstats_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	pcie_stat_size;
+	u8	unused_0[6];
+	__le64	pcie_stat_host_addr;
+};
+
+/* hwrm_pcie_qstats_output (size:128b/16B) */
+struct hwrm_pcie_qstats_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le16	pcie_stat_size;
+	u8	unused_0[5];
+	u8	valid;
+};
+
 /* tx_port_stats (size:3264b/408B) */
 struct tx_port_stats {
 	__le64	tx_64b_frames;
@@ -5305,6 +5384,30 @@ struct rx_port_stats {
 	__le64	rx_stat_err;
 };
 
+/* rx_port_stats_ext (size:320b/40B) */
+struct rx_port_stats_ext {
+	__le64	link_down_events;
+	__le64	continuous_pause_events;
+	__le64	resume_pause_events;
+	__le64	continuous_roce_pause_events;
+	__le64	resume_roce_pause_events;
+};
+
+/* pcie_ctx_hw_stats (size:768b/96B) */
+struct pcie_ctx_hw_stats {
+	__le64	pcie_pl_signal_integrity;
+	__le64	pcie_dl_signal_integrity;
+	__le64	pcie_tl_signal_integrity;
+	__le64	pcie_link_integrity;
+	__le64	pcie_tx_traffic_rate;
+	__le64	pcie_rx_traffic_rate;
+	__le64	pcie_tx_dllp_statistics;
+	__le64	pcie_rx_dllp_statistics;
+	__le64	pcie_equalization_time;
+	__le32	pcie_ltssm_histogram[4];
+	__le64	pcie_recovery_histogram;
+};
+
 /* hwrm_fw_reset_input (size:192b/24B) */
 struct hwrm_fw_reset_input {
 	__le16	req_type;
@@ -5313,14 +5416,15 @@ struct hwrm_fw_reset_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	u8	embedded_proc_type;
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT    0x0UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT    0x1UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE    0x3UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST    0x4UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP      0x5UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP    0x6UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_LAST   FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT                 0x0UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT                 0x1UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL              0x2UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE                 0x3UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST                 0x4UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP                   0x5UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP                 0x6UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT 0x7UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_LAST                FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT
 	u8	selfrst_status;
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE    0x0UL
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP    0x1UL
@@ -6253,8 +6357,7 @@ struct hwrm_selftest_exec_input {
 	#define SELFTEST_EXEC_REQ_FLAGS_MEMORY_TEST              0x8UL
 	#define SELFTEST_EXEC_REQ_FLAGS_PCIE_SERDES_TEST         0x10UL
 	#define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_SERDES_TEST     0x20UL
-	u8	pcie_lane_num;
-	u8	unused_0[6];
+	u8	unused_0[7];
 };
 
 /* hwrm_selftest_exec_output (size:128b/16B) */

From 1d3ef13dd48da9177e417379644be9003bc459cc Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:07 -0400
Subject: [PATCH 1584/1678] bnxt_en: Adjust default rings for multi-port NICs.

Change the default ring logic to select default number of rings to be up to
8 per port if the default rings x NIC ports <= total CPUs.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 45d45321acb2..82a3aad27dd2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8393,9 +8393,15 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 	if (sh)
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	dflt_rings = netif_get_num_default_rss_queues();
-	/* Reduce default rings to reduce memory usage on multi-port cards */
-	if (bp->port_count > 1)
-		dflt_rings = min_t(int, dflt_rings, 4);
+	/* Reduce default rings on multi-port cards so that total default
+	 * rings do not exceed CPU count.
+	 */
+	if (bp->port_count > 1) {
+		int max_rings =
+			max_t(int, num_online_cpus() / bp->port_count, 1);
+
+		dflt_rings = min_t(int, dflt_rings, max_rings);
+	}
 	rc = bnxt_get_dflt_rings(bp, &max_rx_rings, &max_tx_rings, sh);
 	if (rc)
 		return rc;

From abe93ad2e06e3c16562b5de2787e7442fa088895 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:08 -0400
Subject: [PATCH 1585/1678] bnxt_en: Use a dedicated VNIC mode for RDMA.

If the RDMA driver is registered, use a new VNIC mode that allows
RDMA traffic to be seen on the netdev in promiscuous mode.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 18 ++++++++++++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 82a3aad27dd2..62a1443c1a2f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4002,6 +4002,13 @@ static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx)
 	return rc;
 }
 
+static u32 bnxt_get_roce_vnic_mode(struct bnxt *bp)
+{
+	if (bp->flags & BNXT_FLAG_ROCE_MIRROR_CAP)
+		return VNIC_CFG_REQ_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE;
+	return VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE;
+}
+
 int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 {
 	unsigned int ring = 0, grp_idx;
@@ -4057,8 +4064,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 	if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan)
 		req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
 	if (!vnic_id && bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
-		req.flags |=
-			cpu_to_le32(VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE);
+		req.flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
 
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
@@ -4139,9 +4145,13 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
-		if (resp->flags &
-		    cpu_to_le32(VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP))
+		u32 flags = le32_to_cpu(resp->flags);
+
+		if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP)
 			bp->flags |= BNXT_FLAG_NEW_RSS_CAP;
+		if (flags &
+		    VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP)
+			bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP;
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index c0c548feea98..9290d3cdc646 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1151,6 +1151,7 @@ struct bnxt {
 	#define BNXT_FLAG_FW_DCBX_AGENT	0x800000
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 	#define BNXT_FLAG_DIM		0x2000000
+	#define BNXT_FLAG_ROCE_MIRROR_CAP	0x4000000
 	#define BNXT_FLAG_NEW_RM	0x8000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\

From 2373d8d6a7932d28b8e31ea2a70bf6c002d97ac8 Mon Sep 17 00:00:00 2001
From: Scott Branden <scott.branden@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:09 -0400
Subject: [PATCH 1586/1678] bnxt_en: fix clear flags in ethtool reset handling

Clear flags when reset command processed successfully for components
specified.

Fixes: 6502ad5963a5 ("bnxt_en: Add ETH_RESET_AP support")
Signed-off-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 1801582076be..7af01a565741 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2535,16 +2535,20 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 			return -EOPNOTSUPP;
 
 		rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP);
-		if (!rc)
+		if (!rc) {
 			netdev_info(dev, "Reset request successful. Reload driver to complete reset\n");
+			*flags = 0;
+		}
 	} else if (*flags == ETH_RESET_AP) {
 		/* This feature is not supported in older firmware versions */
 		if (bp->hwrm_spec_code < 0x10803)
 			return -EOPNOTSUPP;
 
 		rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_AP);
-		if (!rc)
+		if (!rc) {
 			netdev_info(dev, "Reset Application Processor request successful.\n");
+			*flags = 0;
+		}
 	} else {
 		rc = -EINVAL;
 	}

From 746df139646ea7fd11c26f88fd95a247d2a7c94b Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:10 -0400
Subject: [PATCH 1587/1678] bnxt_en: Add support for ndo_set_vf_trust

Trusted VFs are allowed to modify MAC address, even when PF
has assigned one.

Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  1 +
 .../net/ethernet/broadcom/bnxt/bnxt_sriov.c   | 41 +++++++++++++++----
 .../net/ethernet/broadcom/bnxt/bnxt_sriov.h   |  3 +-
 4 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 62a1443c1a2f..527ef269fed0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8206,6 +8206,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_set_vf_rate	= bnxt_set_vf_bw,
 	.ndo_set_vf_link_state	= bnxt_set_vf_link_state,
 	.ndo_set_vf_spoofchk	= bnxt_set_vf_spoofchk,
+	.ndo_set_vf_trust	= bnxt_set_vf_trust,
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= bnxt_poll_controller,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 9290d3cdc646..ae7c69b44472 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -815,6 +815,7 @@ struct bnxt_vf_info {
 #define BNXT_VF_SPOOFCHK	0x2
 #define BNXT_VF_LINK_FORCED	0x4
 #define BNXT_VF_LINK_UP		0x8
+#define BNXT_VF_TRUST		0x10
 	u32	func_flags; /* func cfg flags */
 	u32	min_tx_rate;
 	u32	max_tx_rate;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index d87faad901fe..a3d368ee3072 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -1,7 +1,7 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
- * Copyright (c) 2016-2017 Broadcom Limited
+ * Copyright (c) 2016-2018 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -121,6 +121,23 @@ int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
 	return rc;
 }
 
+int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trusted)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_vf_info *vf;
+
+	if (bnxt_vf_ndo_prep(bp, vf_id))
+		return -EINVAL;
+
+	vf = &bp->pf.vf[vf_id];
+	if (trusted)
+		vf->flags |= BNXT_VF_TRUST;
+	else
+		vf->flags &= ~BNXT_VF_TRUST;
+
+	return 0;
+}
+
 int bnxt_get_vf_config(struct net_device *dev, int vf_id,
 		       struct ifla_vf_info *ivi)
 {
@@ -147,6 +164,7 @@ int bnxt_get_vf_config(struct net_device *dev, int vf_id,
 	else
 		ivi->qos = 0;
 	ivi->spoofchk = !!(vf->flags & BNXT_VF_SPOOFCHK);
+	ivi->trusted = !!(vf->flags & BNXT_VF_TRUST);
 	if (!(vf->flags & BNXT_VF_LINK_FORCED))
 		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
 	else if (vf->flags & BNXT_VF_LINK_UP)
@@ -886,18 +904,19 @@ exec_fwd_resp_exit:
 	return rc;
 }
 
-static int bnxt_vf_store_mac(struct bnxt *bp, struct bnxt_vf_info *vf)
+static int bnxt_vf_configure_mac(struct bnxt *bp, struct bnxt_vf_info *vf)
 {
 	u32 msg_size = sizeof(struct hwrm_func_vf_cfg_input);
 	struct hwrm_func_vf_cfg_input *req =
 		(struct hwrm_func_vf_cfg_input *)vf->hwrm_cmd_req_addr;
 
-	/* Only allow VF to set a valid MAC address if the PF assigned MAC
-	 * address is zero
+	/* Allow VF to set a valid MAC address, if trust is set to on or
+	 * if the PF assigned MAC address is zero
 	 */
 	if (req->enables & cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR)) {
 		if (is_valid_ether_addr(req->dflt_mac_addr) &&
-		    !is_valid_ether_addr(vf->mac_addr)) {
+		    ((vf->flags & BNXT_VF_TRUST) ||
+		     (!is_valid_ether_addr(vf->mac_addr)))) {
 			ether_addr_copy(vf->vf_mac_addr, req->dflt_mac_addr);
 			return bnxt_hwrm_exec_fwd_resp(bp, vf, msg_size);
 		}
@@ -913,11 +932,17 @@ static int bnxt_vf_validate_set_mac(struct bnxt *bp, struct bnxt_vf_info *vf)
 		(struct hwrm_cfa_l2_filter_alloc_input *)vf->hwrm_cmd_req_addr;
 	bool mac_ok = false;
 
-	/* VF MAC address must first match PF MAC address, if it is valid.
+	if (!is_valid_ether_addr((const u8 *)req->l2_addr))
+		return bnxt_hwrm_fwd_err_resp(bp, vf, msg_size);
+
+	/* Allow VF to set a valid MAC address, if trust is set to on.
+	 * Or VF MAC address must first match MAC address in PF's context.
 	 * Otherwise, it must match the VF MAC address if firmware spec >=
 	 * 1.2.2
 	 */
-	if (is_valid_ether_addr(vf->mac_addr)) {
+	if (vf->flags & BNXT_VF_TRUST) {
+		mac_ok = true;
+	} else if (is_valid_ether_addr(vf->mac_addr)) {
 		if (ether_addr_equal((const u8 *)req->l2_addr, vf->mac_addr))
 			mac_ok = true;
 	} else if (is_valid_ether_addr(vf->vf_mac_addr)) {
@@ -993,7 +1018,7 @@ static int bnxt_vf_req_validate_snd(struct bnxt *bp, struct bnxt_vf_info *vf)
 
 	switch (req_type) {
 	case HWRM_FUNC_VF_CFG:
-		rc = bnxt_vf_store_mac(bp, vf);
+		rc = bnxt_vf_configure_mac(bp, vf);
 		break;
 	case HWRM_CFA_L2_FILTER_ALLOC:
 		rc = bnxt_vf_validate_set_mac(bp, vf);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index dbc8d977fc5a..d10f6f6c7860 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -1,7 +1,7 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
- * Copyright (c) 2016-2017 Broadcom Limited
+ * Copyright (c) 2016-2018 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -17,6 +17,7 @@ int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int bnxt_set_vf_bw(struct net_device *, int, int, int);
 int bnxt_set_vf_link_state(struct net_device *, int, int);
 int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
+int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust);
 int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs);
 void bnxt_sriov_disable(struct bnxt *);
 void bnxt_hwrm_exec_fwd_req(struct bnxt *);

From 699efed00df0631e39a639b49e3b8e27e62e6c89 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:11 -0400
Subject: [PATCH 1588/1678] bnxt_en: Include additional hardware port
 statistics in ethtool -S.

Include additional hardware port statistics in ethtool -S, which
are useful for debugging.

Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 7af01a565741..859d4c0b5b31 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -181,6 +181,8 @@ static const struct {
 	BNXT_RX_STATS_ENTRY(rx_bytes),
 	BNXT_RX_STATS_ENTRY(rx_runt_bytes),
 	BNXT_RX_STATS_ENTRY(rx_runt_frames),
+	BNXT_RX_STATS_ENTRY(rx_stat_discard),
+	BNXT_RX_STATS_ENTRY(rx_stat_err),
 
 	BNXT_TX_STATS_ENTRY(tx_64b_frames),
 	BNXT_TX_STATS_ENTRY(tx_65b_127b_frames),
@@ -216,6 +218,9 @@ static const struct {
 	BNXT_TX_STATS_ENTRY(tx_eee_lpi_duration),
 	BNXT_TX_STATS_ENTRY(tx_total_collisions),
 	BNXT_TX_STATS_ENTRY(tx_bytes),
+	BNXT_TX_STATS_ENTRY(tx_xthol_frames),
+	BNXT_TX_STATS_ENTRY(tx_stat_discard),
+	BNXT_TX_STATS_ENTRY(tx_stat_error),
 };
 
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)

From 00db3cba35211cd7d458d378a5931fadfa86a17c Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:12 -0400
Subject: [PATCH 1589/1678] bnxt_en: Add extended port statistics support

Gather periodic extended port statistics, if the device is PF and
link is up.

Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 45 ++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  6 +++
 .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 32 +++++++++++++
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 527ef269fed0..2251327f8cdf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3059,12 +3059,21 @@ static void bnxt_free_stats(struct bnxt *bp)
 	u32 size, i;
 	struct pci_dev *pdev = bp->pdev;
 
+	bp->flags &= ~BNXT_FLAG_PORT_STATS;
+	bp->flags &= ~BNXT_FLAG_PORT_STATS_EXT;
+
 	if (bp->hw_rx_port_stats) {
 		dma_free_coherent(&pdev->dev, bp->hw_port_stats_size,
 				  bp->hw_rx_port_stats,
 				  bp->hw_rx_port_stats_map);
 		bp->hw_rx_port_stats = NULL;
-		bp->flags &= ~BNXT_FLAG_PORT_STATS;
+	}
+
+	if (bp->hw_rx_port_stats_ext) {
+		dma_free_coherent(&pdev->dev, sizeof(struct rx_port_stats_ext),
+				  bp->hw_rx_port_stats_ext,
+				  bp->hw_rx_port_stats_ext_map);
+		bp->hw_rx_port_stats_ext = NULL;
 	}
 
 	if (!bp->bnapi)
@@ -3120,6 +3129,21 @@ static int bnxt_alloc_stats(struct bnxt *bp)
 		bp->hw_tx_port_stats_map = bp->hw_rx_port_stats_map +
 					   sizeof(struct rx_port_stats) + 512;
 		bp->flags |= BNXT_FLAG_PORT_STATS;
+
+		/* Display extended statistics only if FW supports it */
+		if (bp->hwrm_spec_code < 0x10804 ||
+		    bp->hwrm_spec_code == 0x10900)
+			return 0;
+
+		bp->hw_rx_port_stats_ext =
+			dma_zalloc_coherent(&pdev->dev,
+					    sizeof(struct rx_port_stats_ext),
+					    &bp->hw_rx_port_stats_ext_map,
+					    GFP_KERNEL);
+		if (!bp->hw_rx_port_stats_ext)
+			return 0;
+
+		bp->flags |= BNXT_FLAG_PORT_STATS_EXT;
 	}
 	return 0;
 }
@@ -5340,6 +5364,21 @@ static int bnxt_hwrm_port_qstats(struct bnxt *bp)
 	return rc;
 }
 
+static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
+{
+	struct hwrm_port_qstats_ext_input req = {0};
+	struct bnxt_pf_info *pf = &bp->pf;
+
+	if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_QSTATS_EXT, -1, -1);
+	req.port_id = cpu_to_le16(pf->port_id);
+	req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
+	req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map);
+	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+}
+
 static void bnxt_hwrm_free_tunnel_ports(struct bnxt *bp)
 {
 	if (bp->vxlan_port_cnt) {
@@ -7491,8 +7530,10 @@ static void bnxt_sp_task(struct work_struct *work)
 		bnxt_hwrm_tunnel_dst_port_free(
 			bp, TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE);
 	}
-	if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event))
+	if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) {
 		bnxt_hwrm_port_qstats(bp);
+		bnxt_hwrm_port_qstats_ext(bp);
+	}
 
 	if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) {
 		int rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index ae7c69b44472..78034b504baa 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1154,6 +1154,7 @@ struct bnxt {
 	#define BNXT_FLAG_DIM		0x2000000
 	#define BNXT_FLAG_ROCE_MIRROR_CAP	0x4000000
 	#define BNXT_FLAG_NEW_RM	0x8000000
+	#define BNXT_FLAG_PORT_STATS_EXT	0x10000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
 					    BNXT_FLAG_RFS |		\
@@ -1273,8 +1274,10 @@ struct bnxt {
 
 	struct rx_port_stats	*hw_rx_port_stats;
 	struct tx_port_stats	*hw_tx_port_stats;
+	struct rx_port_stats_ext	*hw_rx_port_stats_ext;
 	dma_addr_t		hw_rx_port_stats_map;
 	dma_addr_t		hw_tx_port_stats_map;
+	dma_addr_t		hw_rx_port_stats_ext_map;
 	int			hw_port_stats_size;
 
 	u16			hwrm_max_req_len;
@@ -1385,6 +1388,9 @@ struct bnxt {
 	((offsetof(struct tx_port_stats, counter) +	\
 	  sizeof(struct rx_port_stats) + 512) / 8)
 
+#define BNXT_RX_STATS_EXT_OFFSET(counter)		\
+	(offsetof(struct rx_port_stats_ext, counter) / 8)
+
 #define I2C_DEV_ADDR_A0				0xa0
 #define I2C_DEV_ADDR_A2				0xa2
 #define SFP_EEPROM_SFF_8472_COMP_ADDR		0x5e
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 859d4c0b5b31..e184e4bbe544 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -137,6 +137,9 @@ reset_coalesce:
 #define BNXT_TX_STATS_ENTRY(counter)	\
 	{ BNXT_TX_STATS_OFFSET(counter), __stringify(counter) }
 
+#define BNXT_RX_STATS_EXT_ENTRY(counter)	\
+	{ BNXT_RX_STATS_EXT_OFFSET(counter), __stringify(counter) }
+
 static const struct {
 	long offset;
 	char string[ETH_GSTRING_LEN];
@@ -223,7 +226,19 @@ static const struct {
 	BNXT_TX_STATS_ENTRY(tx_stat_error),
 };
 
+static const struct {
+	long offset;
+	char string[ETH_GSTRING_LEN];
+} bnxt_port_stats_ext_arr[] = {
+	BNXT_RX_STATS_EXT_ENTRY(link_down_events),
+	BNXT_RX_STATS_EXT_ENTRY(continuous_pause_events),
+	BNXT_RX_STATS_EXT_ENTRY(resume_pause_events),
+	BNXT_RX_STATS_EXT_ENTRY(continuous_roce_pause_events),
+	BNXT_RX_STATS_EXT_ENTRY(resume_roce_pause_events),
+};
+
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
+#define BNXT_NUM_PORT_STATS_EXT ARRAY_SIZE(bnxt_port_stats_ext_arr)
 
 static int bnxt_get_num_stats(struct bnxt *bp)
 {
@@ -232,6 +247,9 @@ static int bnxt_get_num_stats(struct bnxt *bp)
 	if (bp->flags & BNXT_FLAG_PORT_STATS)
 		num_stats += BNXT_NUM_PORT_STATS;
 
+	if (bp->flags & BNXT_FLAG_PORT_STATS_EXT)
+		num_stats += BNXT_NUM_PORT_STATS_EXT;
+
 	return num_stats;
 }
 
@@ -279,6 +297,14 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 					       bnxt_port_stats_arr[i].offset));
 		}
 	}
+	if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) {
+		__le64 *port_stats_ext = (__le64 *)bp->hw_rx_port_stats_ext;
+
+		for (i = 0; i < BNXT_NUM_PORT_STATS_EXT; i++, j++) {
+			buf[j] = le64_to_cpu(*(port_stats_ext +
+					    bnxt_port_stats_ext_arr[i].offset));
+		}
+	}
 }
 
 static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
@@ -339,6 +365,12 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 				buf += ETH_GSTRING_LEN;
 			}
 		}
+		if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) {
+			for (i = 0; i < BNXT_NUM_PORT_STATS_EXT; i++) {
+				strcpy(buf, bnxt_port_stats_ext_arr[i].string);
+				buf += ETH_GSTRING_LEN;
+			}
+		}
 		break;
 	case ETH_SS_TEST:
 		if (bp->num_tests)

From db4723b3cd2d836ae44382d16e6a4418ae8929dc Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:13 -0400
Subject: [PATCH 1590/1678] bnxt_en: Check max_tx_scheduler_inputs value from
 firmware.

When checking for the maximum pre-set TX channels for ethtool -l, we
need to check the current max_tx_scheduler_inputs parameter from firmware.
This parameter specifies the max input for the internal QoS nodes currently
available to this function.  The function's TX rings will be capped by this
parameter.  By adding this logic, we provide a more accurate pre-set max
TX channels to the user.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |  8 ++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         |  2 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 11 +++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2251327f8cdf..62b7d69cc8e5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5093,7 +5093,7 @@ func_qcfg_exit:
 	return rc;
 }
 
-static int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp)
+int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
 {
 	struct hwrm_func_resource_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_func_resource_qcaps_input req = {0};
@@ -5110,6 +5110,10 @@ static int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp)
 		goto hwrm_func_resc_qcaps_exit;
 	}
 
+	hw_resc->max_tx_sch_inputs = le16_to_cpu(resp->max_tx_scheduler_inputs);
+	if (!all)
+		goto hwrm_func_resc_qcaps_exit;
+
 	hw_resc->min_rsscos_ctxs = le16_to_cpu(resp->min_rsscos_ctx);
 	hw_resc->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 	hw_resc->min_cp_rings = le16_to_cpu(resp->min_cmpl_rings);
@@ -5216,7 +5220,7 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	if (rc)
 		return rc;
 	if (bp->hwrm_spec_code >= 0x10803) {
-		rc = bnxt_hwrm_func_resc_qcaps(bp);
+		rc = bnxt_hwrm_func_resc_qcaps(bp, true);
 		if (!rc)
 			bp->flags |= BNXT_FLAG_NEW_RM;
 	}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 78034b504baa..22aa290c94a0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -786,6 +786,7 @@ struct bnxt_hw_resc {
 	u16	min_tx_rings;
 	u16	max_tx_rings;
 	u16	resv_tx_rings;
+	u16	max_tx_sch_inputs;
 	u16	min_rx_rings;
 	u16	max_rx_rings;
 	u16	resv_rx_rings;
@@ -1456,6 +1457,7 @@ int bnxt_hwrm_set_pause(struct bnxt *);
 int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
+int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index e184e4bbe544..8d8ccd67e0e2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -425,15 +425,26 @@ static void bnxt_get_channels(struct net_device *dev,
 			      struct ethtool_channels *channel)
 {
 	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 	int max_rx_rings, max_tx_rings, tcs;
+	int max_tx_sch_inputs;
+
+	/* Get the most up-to-date max_tx_sch_inputs. */
+	if (bp->flags & BNXT_FLAG_NEW_RM)
+		bnxt_hwrm_func_resc_qcaps(bp, false);
+	max_tx_sch_inputs = hw_resc->max_tx_sch_inputs;
 
 	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true);
+	if (max_tx_sch_inputs)
+		max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs);
 	channel->max_combined = min_t(int, max_rx_rings, max_tx_rings);
 
 	if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) {
 		max_rx_rings = 0;
 		max_tx_rings = 0;
 	}
+	if (max_tx_sch_inputs)
+		max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs);
 
 	tcs = netdev_get_num_tc(dev);
 	if (tcs > 1)

From 596f9d55feebdf31c03172fcc82cdec62bb969ea Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:14 -0400
Subject: [PATCH 1591/1678] bnxt_en: Improve resource accounting for SRIOV.

When VFs are created, the current code subtracts the maximum VF
resources from the PF's pool.  This under-estimates the resources
remaining in the PF pool.  Instead, we should subtract the minimum
VF resources.  The VF minimum resources are guaranteed to the VFs
and only these should be subtracted from the PF's pool.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnxt/bnxt_sriov.c    | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index a3d368ee3072..4fa4761fbd1c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -510,18 +510,16 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	if (pf->active_vfs) {
-		u16 n = 1;
+		u16 n = pf->active_vfs;
 
-		if (pf->vf_resv_strategy != BNXT_VF_RESV_STRATEGY_MINIMAL)
-			n = pf->active_vfs;
-
-		hw_resc->max_tx_rings -= vf_tx_rings * n;
-		hw_resc->max_rx_rings -= vf_rx_rings * n;
-		hw_resc->max_hw_ring_grps -= vf_ring_grps * n;
-		hw_resc->max_cp_rings -= vf_cp_rings * n;
+		hw_resc->max_tx_rings -= le16_to_cpu(req.min_tx_rings) * n;
+		hw_resc->max_rx_rings -= le16_to_cpu(req.min_rx_rings) * n;
+		hw_resc->max_hw_ring_grps -= le16_to_cpu(req.min_hw_ring_grps) *
+					     n;
+		hw_resc->max_cp_rings -= le16_to_cpu(req.min_cmpl_rings) * n;
 		hw_resc->max_rsscos_ctxs -= pf->active_vfs;
-		hw_resc->max_stat_ctxs -= vf_stat_ctx * n;
-		hw_resc->max_vnics -= vf_vnics * n;
+		hw_resc->max_stat_ctxs -= le16_to_cpu(req.min_stat_ctx) * n;
+		hw_resc->max_vnics -= le16_to_cpu(req.min_vnics) * n;
 
 		rc = pf->active_vfs;
 	}

From 845adfe40c2a75e67ddae6639fc2b987338b7983 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:15 -0400
Subject: [PATCH 1592/1678] bnxt_en: Improve valid bit checking in firmware
 response message.

When firmware sends a DMA response to the driver, the last byte of the
message will be set to 1 to indicate that the whole response is valid.
The driver waits for the message to be valid before reading the message.

The firmware spec allows these response messages to increase in
length by adding new fields to the end of these messages.  The
older spec's valid location may become a new field in a newer
spec.  To guarantee compatibility, the driver should zero the valid
byte before interpreting the entire message so that any new fields not
implemented by the older spec will be read as zero.

For messages that are forwarded to VFs, we need to set the length
and re-instate the valid bit so the VF will see the valid response.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 21 ++++++++++++++-----
 .../net/ethernet/broadcom/bnxt/bnxt_sriov.c   |  2 ++
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 62b7d69cc8e5..6fcf4dc67d5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3422,7 +3422,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	int i, intr_process, rc, tmo_count;
 	struct input *req = msg;
 	u32 *data = msg;
-	__le32 *resp_len, *valid;
+	__le32 *resp_len;
+	u8 *valid;
 	u16 cp_ring_id, len = 0;
 	struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
@@ -3474,6 +3475,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
 	i = 0;
 	tmo_count = timeout * 40;
+	resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
 	if (intr_process) {
 		/* Wait until hwrm response cmpl interrupt is processed */
 		while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID &&
@@ -3486,9 +3488,11 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 				   le16_to_cpu(req->req_type));
 			return -1;
 		}
+		len = (le32_to_cpu(*resp_len) & HWRM_RESP_LEN_MASK) >>
+		      HWRM_RESP_LEN_SFT;
+		valid = bp->hwrm_cmd_resp_addr + len - 1;
 	} else {
 		/* Check if response len is updated */
-		resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
 		for (i = 0; i < tmo_count; i++) {
 			len = (le32_to_cpu(*resp_len) & HWRM_RESP_LEN_MASK) >>
 			      HWRM_RESP_LEN_SFT;
@@ -3504,10 +3508,12 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			return -1;
 		}
 
-		/* Last word of resp contains valid bit */
-		valid = bp->hwrm_cmd_resp_addr + len - 4;
+		/* Last byte of resp contains valid bit */
+		valid = bp->hwrm_cmd_resp_addr + len - 1;
 		for (i = 0; i < 5; i++) {
-			if (le32_to_cpu(*valid) & HWRM_RESP_VALID_MASK)
+			/* make sure we read from updated DMA memory */
+			dma_rmb();
+			if (*valid)
 				break;
 			udelay(1);
 		}
@@ -3520,6 +3526,11 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 		}
 	}
 
+	/* Zero valid bit for compatibility.  Valid bit in an older spec
+	 * may become a new field in a newer spec.  We must make sure that
+	 * a new field not implemented by old spec will read zero.
+	 */
+	*valid = 0;
 	rc = le16_to_cpu(resp->error_code);
 	if (rc && !silent)
 		netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 4fa4761fbd1c..f952963d594e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -974,7 +974,9 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 		memcpy(&phy_qcfg_resp, &bp->link_info.phy_qcfg_resp,
 		       sizeof(phy_qcfg_resp));
 		mutex_unlock(&bp->hwrm_cmd_lock);
+		phy_qcfg_resp.resp_len = cpu_to_le16(sizeof(phy_qcfg_resp));
 		phy_qcfg_resp.seq_id = phy_qcfg_req->seq_id;
+		phy_qcfg_resp.valid = 1;
 
 		if (vf->flags & BNXT_VF_LINK_UP) {
 			/* if physical link is down, force link up on VF */

From 9899bb59ff08a50aef033b4d388d223adca58a7f Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:16 -0400
Subject: [PATCH 1593/1678] bnxt_en: Improve ring allocation logic.

Currently, the driver code makes some assumptions about the group index
and the map index of rings.  This makes the code more difficult to
understand and less flexible.

Improve it by adding the grp_idx and map_idx fields explicitly to the
bnxt_ring_struct as a union.  The grp_idx is initialized for each tx ring
and rx agg ring during init. time.  We do the same for the map_idx for
each cmpl ring.

The grp_idx ties the tx ring to the ring group.  The map_idx is the
doorbell index of the ring.  With this new infrastructure, we can change
the ring index mapping scheme easily in the future.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 32 ++++++++++++-----------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  4 +++
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6fcf4dc67d5e..8c8ef6b6ca91 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2317,6 +2317,7 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
 			if (rc)
 				return rc;
 
+			ring->grp_idx = i;
 			rxr->rx_agg_bmap_size = bp->rx_agg_ring_mask + 1;
 			mem_size = rxr->rx_agg_bmap_size / 8;
 			rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL);
@@ -2389,6 +2390,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 		if (rc)
 			return rc;
 
+		ring->grp_idx = txr->bnapi->index;
 		if (bp->tx_push_size) {
 			dma_addr_t mapping;
 
@@ -2458,6 +2460,7 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp)
 		rc = bnxt_alloc_ring(bp, ring);
 		if (rc)
 			return rc;
+		ring->map_idx = i;
 	}
 	return 0;
 }
@@ -4253,12 +4256,12 @@ static int bnxt_hwrm_ring_grp_free(struct bnxt *bp)
 
 static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 				    struct bnxt_ring_struct *ring,
-				    u32 ring_type, u32 map_index,
-				    u32 stats_ctx_id)
+				    u32 ring_type, u32 map_index)
 {
 	int rc = 0, err = 0;
 	struct hwrm_ring_alloc_input req = {0};
 	struct hwrm_ring_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_ring_grp_info *grp_info;
 	u16 ring_id;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_ALLOC, -1, -1);
@@ -4280,10 +4283,10 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 	case HWRM_RING_ALLOC_TX:
 		req.ring_type = RING_ALLOC_REQ_RING_TYPE_TX;
 		/* Association of transmit ring with completion ring */
-		req.cmpl_ring_id =
-			cpu_to_le16(bp->grp_info[map_index].cp_fw_ring_id);
+		grp_info = &bp->grp_info[ring->grp_idx];
+		req.cmpl_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
 		req.length = cpu_to_le32(bp->tx_ring_mask + 1);
-		req.stat_ctx_id = cpu_to_le32(stats_ctx_id);
+		req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
 		req.queue_id = cpu_to_le16(ring->queue_id);
 		break;
 	case HWRM_RING_ALLOC_RX:
@@ -4370,10 +4373,11 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+		u32 map_idx = ring->map_idx;
 
-		cpr->cp_doorbell = bp->bar1 + i * 0x80;
-		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_CMPL, i,
-					      INVALID_STATS_CTX_ID);
+		cpr->cp_doorbell = bp->bar1 + map_idx * 0x80;
+		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_CMPL,
+					      map_idx);
 		if (rc)
 			goto err_out;
 		BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
@@ -4389,11 +4393,10 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
 		struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
-		u32 map_idx = txr->bnapi->index;
-		u16 fw_stats_ctx = bp->grp_info[map_idx].fw_stats_ctx;
+		u32 map_idx = i;
 
 		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_TX,
-					      map_idx, fw_stats_ctx);
+					      map_idx);
 		if (rc)
 			goto err_out;
 		txr->tx_doorbell = bp->bar1 + map_idx * 0x80;
@@ -4405,7 +4408,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 		u32 map_idx = rxr->bnapi->index;
 
 		rc = hwrm_ring_alloc_send_msg(bp, ring, HWRM_RING_ALLOC_RX,
-					      map_idx, INVALID_STATS_CTX_ID);
+					      map_idx);
 		if (rc)
 			goto err_out;
 		rxr->rx_doorbell = bp->bar1 + map_idx * 0x80;
@@ -4418,13 +4421,12 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 			struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 			struct bnxt_ring_struct *ring =
 						&rxr->rx_agg_ring_struct;
-			u32 grp_idx = rxr->bnapi->index;
+			u32 grp_idx = ring->grp_idx;
 			u32 map_idx = grp_idx + bp->rx_nr_rings;
 
 			rc = hwrm_ring_alloc_send_msg(bp, ring,
 						      HWRM_RING_ALLOC_AGG,
-						      map_idx,
-						      INVALID_STATS_CTX_ID);
+						      map_idx);
 			if (rc)
 				goto err_out;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 22aa290c94a0..e0cd0cb529c0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -573,6 +573,10 @@ struct bnxt_ring_struct {
 	void			**vmem;
 
 	u16			fw_ring_id; /* Ring id filled by Chimp FW */
+	union {
+		u16		grp_idx;
+		u16		map_idx; /* Used by cmpl rings */
+	};
 	u8			queue_id;
 };
 

From 08654eb213a8066b30c41e22067a9f066b40c80f Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:17 -0400
Subject: [PATCH 1594/1678] bnxt_en: Change IRQ assignment for RDMA driver.

In the current code, the range of MSIX vectors allocated for the RDMA
driver is disjoint from the network driver.  This creates a problem
for the new firmware ring reservation scheme.  The new scheme requires
the reserved completion rings/MSIX vectors to be in a contiguous
range.

Change the logic to allocate RDMA MSIX vectors to be contiguous with
the vectors used by bnxt_en on new firmware using the new scheme.
The new function bnxt_get_num_msix() calculates the exact number of
vectors needed by both drivers.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 31 +++++++++++++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 30 +++++++++++++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h |  3 ++
 3 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8c8ef6b6ca91..041b800e6734 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4720,6 +4720,21 @@ static int bnxt_hwrm_reserve_rings(struct bnxt *bp, int tx, int rx, int grp,
 		return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, vnic);
 }
 
+static int bnxt_cp_rings_in_use(struct bnxt *bp)
+{
+	int cp = bp->cp_nr_rings;
+	int ulp_msix, ulp_base;
+
+	ulp_msix = bnxt_get_ulp_msix_num(bp);
+	if (ulp_msix) {
+		ulp_base = bnxt_get_ulp_msix_base(bp);
+		cp += ulp_msix;
+		if ((ulp_base + ulp_msix) > cp)
+			cp = ulp_base + ulp_msix;
+	}
+	return cp;
+}
+
 static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
 			   bool shared);
 
@@ -5893,12 +5908,24 @@ void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs)
 	bp->hw_resc.max_irqs = max_irqs;
 }
 
+static int bnxt_get_num_msix(struct bnxt *bp)
+{
+	if (!(bp->flags & BNXT_FLAG_NEW_RM))
+		return bnxt_get_max_func_irqs(bp);
+
+	return bnxt_cp_rings_in_use(bp);
+}
+
 static int bnxt_init_msix(struct bnxt *bp)
 {
-	int i, total_vecs, rc = 0, min = 1;
+	int i, total_vecs, max, rc = 0, min = 1;
 	struct msix_entry *msix_ent;
 
-	total_vecs = bnxt_get_max_func_irqs(bp);
+	total_vecs = bnxt_get_num_msix(bp);
+	max = bnxt_get_max_func_irqs(bp);
+	if (total_vecs > max)
+		total_vecs = max;
+
 	msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
 	if (!msix_ent)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 997e10e8b863..62636cd44331 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -116,6 +116,9 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 	if (!(bp->flags & BNXT_FLAG_USING_MSIX))
 		return -ENODEV;
 
+	if (edev->ulp_tbl[ulp_id].msix_requested)
+		return -EAGAIN;
+
 	max_cp_rings = bnxt_get_max_func_cp_rings(bp);
 	max_idx = min_t(int, bp->total_irqs, max_cp_rings);
 	avail_msix = max_idx - bp->cp_nr_rings;
@@ -124,7 +127,11 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 	if (avail_msix > num_msix)
 		avail_msix = num_msix;
 
-	idx = max_idx - avail_msix;
+	if (bp->flags & BNXT_FLAG_NEW_RM)
+		idx = bp->cp_nr_rings;
+	else
+		idx = max_idx - avail_msix;
+	edev->ulp_tbl[ulp_id].msix_base = idx;
 	for (i = 0; i < avail_msix; i++) {
 		ent[i].vector = bp->irq_tbl[idx + i].vector;
 		ent[i].ring_idx = idx + i;
@@ -154,6 +161,27 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 	return 0;
 }
 
+int bnxt_get_ulp_msix_num(struct bnxt *bp)
+{
+	if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) {
+		struct bnxt_en_dev *edev = bp->edev;
+
+		return edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested;
+	}
+	return 0;
+}
+
+int bnxt_get_ulp_msix_base(struct bnxt *bp)
+{
+	if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) {
+		struct bnxt_en_dev *edev = bp->edev;
+
+		if (edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested)
+			return edev->ulp_tbl[BNXT_ROCE_ULP].msix_base;
+	}
+	return 0;
+}
+
 void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id)
 {
 	ASSERT_RTNL();
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index d2471067dc37..c9fa7eb56a08 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -49,6 +49,7 @@ struct bnxt_ulp {
 	unsigned long	*async_events_bmap;
 	u16		max_async_event_id;
 	u16		msix_requested;
+	u16		msix_base;
 	atomic_t	ref_count;
 };
 
@@ -84,6 +85,8 @@ static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev, int ulp_id)
 	return false;
 }
 
+int bnxt_get_ulp_msix_num(struct bnxt *bp);
+int bnxt_get_ulp_msix_base(struct bnxt *bp);
 void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id);
 void bnxt_ulp_stop(struct bnxt *bp);
 void bnxt_ulp_start(struct bnxt *bp);

From e5811b8c09df9bc80eabc95339fceded23f16289 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:18 -0400
Subject: [PATCH 1595/1678] bnxt_en: Add IRQ remapping logic.

Add remapping logic so that bnxt_en can use any arbitrary MSIX vectors.
This will allow the driver to reserve one range of MSIX vectors to be
used by both bnxt_en and bnxt_re.  bnxt_en can now skip over the MSIX
vectors used by bnxt_re.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 59 ++++++++++++++++-------
 1 file changed, 42 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 041b800e6734..c380a0d5a2ec 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2444,8 +2444,10 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
 
 static int bnxt_alloc_cp_rings(struct bnxt *bp)
 {
-	int i, rc;
+	int i, rc, ulp_base_vec, ulp_msix;
 
+	ulp_msix = bnxt_get_ulp_msix_num(bp);
+	ulp_base_vec = bnxt_get_ulp_msix_base(bp);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr;
@@ -2460,7 +2462,11 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp)
 		rc = bnxt_alloc_ring(bp, ring);
 		if (rc)
 			return rc;
-		ring->map_idx = i;
+
+		if (ulp_msix && i >= ulp_base_vec)
+			ring->map_idx = i + ulp_msix;
+		else
+			ring->map_idx = i;
 	}
 	return 0;
 }
@@ -3384,6 +3390,15 @@ static void bnxt_disable_int(struct bnxt *bp)
 	}
 }
 
+static int bnxt_cp_num_to_irq_num(struct bnxt *bp, int n)
+{
+	struct bnxt_napi *bnapi = bp->bnapi[n];
+	struct bnxt_cp_ring_info *cpr;
+
+	cpr = &bnapi->cp_ring;
+	return cpr->cp_ring_struct.map_idx;
+}
+
 static void bnxt_disable_int_sync(struct bnxt *bp)
 {
 	int i;
@@ -3391,8 +3406,11 @@ static void bnxt_disable_int_sync(struct bnxt *bp)
 	atomic_inc(&bp->intr_sem);
 
 	bnxt_disable_int(bp);
-	for (i = 0; i < bp->cp_nr_rings; i++)
-		synchronize_irq(bp->irq_tbl[i].vector);
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
+
+		synchronize_irq(bp->irq_tbl[map_idx].vector);
+	}
 }
 
 static void bnxt_enable_int(struct bnxt *bp)
@@ -5824,6 +5842,7 @@ static void bnxt_setup_msix(struct bnxt *bp)
 	}
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
+		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
 		char *attr;
 
 		if (bp->flags & BNXT_FLAG_SHARED_RINGS)
@@ -5833,9 +5852,9 @@ static void bnxt_setup_msix(struct bnxt *bp)
 		else
 			attr = "tx";
 
-		snprintf(bp->irq_tbl[i].name, len, "%s-%s-%d", dev->name, attr,
-			 i);
-		bp->irq_tbl[i].handler = bnxt_msix;
+		snprintf(bp->irq_tbl[map_idx].name, len, "%s-%s-%d", dev->name,
+			 attr, i);
+		bp->irq_tbl[map_idx].handler = bnxt_msix;
 	}
 }
 
@@ -6059,7 +6078,9 @@ static void bnxt_free_irq(struct bnxt *bp)
 		return;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
-		irq = &bp->irq_tbl[i];
+		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
+
+		irq = &bp->irq_tbl[map_idx];
 		if (irq->requested) {
 			if (irq->have_cpumask) {
 				irq_set_affinity_hint(irq->vector, NULL);
@@ -6078,14 +6099,25 @@ static int bnxt_request_irq(struct bnxt *bp)
 	int i, j, rc = 0;
 	unsigned long flags = 0;
 #ifdef CONFIG_RFS_ACCEL
-	struct cpu_rmap *rmap = bp->dev->rx_cpu_rmap;
+	struct cpu_rmap *rmap;
 #endif
 
+	rc = bnxt_setup_int_mode(bp);
+	if (rc) {
+		netdev_err(bp->dev, "bnxt_setup_int_mode err: %x\n",
+			   rc);
+		return rc;
+	}
+#ifdef CONFIG_RFS_ACCEL
+	rmap = bp->dev->rx_cpu_rmap;
+#endif
 	if (!(bp->flags & BNXT_FLAG_USING_MSIX))
 		flags = IRQF_SHARED;
 
 	for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
-		struct bnxt_irq *irq = &bp->irq_tbl[i];
+		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
+		struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
+
 #ifdef CONFIG_RFS_ACCEL
 		if (rmap && bp->bnapi[i]->rx_ring) {
 			rc = irq_cpu_rmap_add(rmap, irq->vector);
@@ -6805,13 +6837,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 		rc = bnxt_reserve_rings(bp);
 		if (rc)
 			return rc;
-
-		rc = bnxt_setup_int_mode(bp);
-		if (rc) {
-			netdev_err(bp->dev, "bnxt_setup_int_mode err: %x\n",
-				   rc);
-			return rc;
-		}
 	}
 	if ((bp->flags & BNXT_FLAG_RFS) &&
 	    !(bp->flags & BNXT_FLAG_USING_MSIX)) {

From 4e41dc5deb6e5c36ac5f2e49575485920037b2aa Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:19 -0400
Subject: [PATCH 1596/1678] bnxt_en: Refactor bnxt_need_reserve_rings().

Refactor bnxt_need_reserve_rings() slightly so that __bnxt_reserve_rings()
can call it and remove some duplicated code.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 57 ++++++++++-------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c380a0d5a2ec..a6237dd8cacd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4753,6 +4753,30 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
 	return cp;
 }
 
+static bool bnxt_need_reserve_rings(struct bnxt *bp)
+{
+	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+	int cp = bp->cp_nr_rings;
+	int rx = bp->rx_nr_rings;
+	int vnic = 1, grp = rx;
+
+	if (bp->hwrm_spec_code < 0x10601)
+		return false;
+
+	if (hw_resc->resv_tx_rings != bp->tx_nr_rings)
+		return true;
+
+	if (bp->flags & BNXT_FLAG_RFS)
+		vnic = rx + 1;
+	if (bp->flags & BNXT_FLAG_AGG_RINGS)
+		rx <<= 1;
+	if ((bp->flags & BNXT_FLAG_NEW_RM) &&
+	    (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp ||
+	     hw_resc->resv_hw_ring_grps != grp || hw_resc->resv_vnics != vnic))
+		return true;
+	return false;
+}
+
 static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
 			   bool shared);
 
@@ -4766,7 +4790,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	bool sh = false;
 	int vnic = 1;
 
-	if (bp->hwrm_spec_code < 0x10601)
+	if (!bnxt_need_reserve_rings(bp))
 		return 0;
 
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
@@ -4775,14 +4799,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 		vnic = rx + 1;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx <<= 1;
-
 	grp = bp->rx_nr_rings;
-	if (tx == hw_resc->resv_tx_rings &&
-	    (!(bp->flags & BNXT_FLAG_NEW_RM) ||
-	      (rx == hw_resc->resv_rx_rings &&
-	       grp == hw_resc->resv_hw_ring_grps &&
-	       cp == hw_resc->resv_cp_rings && vnic == hw_resc->resv_vnics)))
-		return 0;
 
 	rc = bnxt_hwrm_reserve_rings(bp, tx, rx, grp, cp, vnic);
 	if (rc)
@@ -4826,30 +4843,6 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	return rc;
 }
 
-static bool bnxt_need_reserve_rings(struct bnxt *bp)
-{
-	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	int rx = bp->rx_nr_rings;
-	int vnic = 1;
-
-	if (bp->hwrm_spec_code < 0x10601)
-		return false;
-
-	if (hw_resc->resv_tx_rings != bp->tx_nr_rings)
-		return true;
-
-	if (bp->flags & BNXT_FLAG_RFS)
-		vnic = rx + 1;
-	if (bp->flags & BNXT_FLAG_AGG_RINGS)
-		rx <<= 1;
-	if ((bp->flags & BNXT_FLAG_NEW_RM) &&
-	    (hw_resc->resv_rx_rings != rx ||
-	     hw_resc->resv_cp_rings != bp->cp_nr_rings ||
-	     hw_resc->resv_vnics != vnic))
-		return true;
-	return false;
-}
-
 static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 				    int ring_grps, int cp_rings, int vnics)
 {

From fbcfc8e4674156cb7eb3d8054bd4332142d2cc58 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:20 -0400
Subject: [PATCH 1597/1678] bnxt_en: Reserve completion rings and MSIX for
 bnxt_re RDMA driver.

Add additional logic to reserve completion rings for the bnxt_re driver
when it requests MSIX vectors.  The function bnxt_cp_rings_in_use()
will return the total number of completion rings used by both drivers
that need to be reserved.  If the network interface in up, we will
close and open the NIC to reserve the new set of completion rings and
re-initialize the vectors.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 39 ++++++++++++++-----
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  3 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 39 +++++++++++++++----
 3 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a6237dd8cacd..6a1d18fc140f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4756,7 +4756,7 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
 static bool bnxt_need_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	int cp = bp->cp_nr_rings;
+	int cp = bnxt_cp_rings_in_use(bp);
 	int rx = bp->rx_nr_rings;
 	int vnic = 1, grp = rx;
 
@@ -4783,9 +4783,9 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
 static int __bnxt_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+	int cp = bnxt_cp_rings_in_use(bp);
 	int tx = bp->tx_nr_rings;
 	int rx = bp->rx_nr_rings;
-	int cp = bp->cp_nr_rings;
 	int grp, rx_rings, rc;
 	bool sh = false;
 	int vnic = 1;
@@ -5908,7 +5908,7 @@ void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max)
 	bp->hw_resc.max_cp_rings = max;
 }
 
-static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
+unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
@@ -5920,6 +5920,26 @@ void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs)
 	bp->hw_resc.max_irqs = max_irqs;
 }
 
+int bnxt_get_avail_msix(struct bnxt *bp, int num)
+{
+	int max_cp = bnxt_get_max_func_cp_rings(bp);
+	int max_irq = bnxt_get_max_func_irqs(bp);
+	int total_req = bp->cp_nr_rings + num;
+	int max_idx, avail_msix;
+
+	max_idx = min_t(int, bp->total_irqs, max_cp);
+	avail_msix = max_idx - bp->cp_nr_rings;
+	if (!(bp->flags & BNXT_FLAG_NEW_RM) || avail_msix >= num)
+		return avail_msix;
+
+	if (max_irq < total_req) {
+		num = max_irq - bp->cp_nr_rings;
+		if (num <= 0)
+			return 0;
+	}
+	return num;
+}
+
 static int bnxt_get_num_msix(struct bnxt *bp)
 {
 	if (!(bp->flags & BNXT_FLAG_NEW_RM))
@@ -5930,7 +5950,7 @@ static int bnxt_get_num_msix(struct bnxt *bp)
 
 static int bnxt_init_msix(struct bnxt *bp)
 {
-	int i, total_vecs, max, rc = 0, min = 1;
+	int i, total_vecs, max, rc = 0, min = 1, ulp_msix;
 	struct msix_entry *msix_ent;
 
 	total_vecs = bnxt_get_num_msix(bp);
@@ -5951,7 +5971,8 @@ static int bnxt_init_msix(struct bnxt *bp)
 		min = 2;
 
 	total_vecs = pci_enable_msix_range(bp->pdev, msix_ent, min, total_vecs);
-	if (total_vecs < 0) {
+	ulp_msix = bnxt_get_ulp_msix_num(bp);
+	if (total_vecs < 0 || total_vecs < ulp_msix) {
 		rc = -ENODEV;
 		goto msix_setup_exit;
 	}
@@ -5964,7 +5985,7 @@ static int bnxt_init_msix(struct bnxt *bp)
 		bp->total_irqs = total_vecs;
 		/* Trim rings based upon num of vectors allocated */
 		rc = bnxt_trim_rings(bp, &bp->rx_nr_rings, &bp->tx_nr_rings,
-				     total_vecs, min == 1);
+				     total_vecs - ulp_msix, min == 1);
 		if (rc)
 			goto msix_setup_exit;
 
@@ -6028,9 +6049,8 @@ static void bnxt_clear_int_mode(struct bnxt *bp)
 	bp->flags &= ~BNXT_FLAG_USING_MSIX;
 }
 
-static int bnxt_reserve_rings(struct bnxt *bp)
+int bnxt_reserve_rings(struct bnxt *bp)
 {
-	int orig_cp = bp->hw_resc.resv_cp_rings;
 	int tcs = netdev_get_num_tc(bp->dev);
 	int rc;
 
@@ -6042,7 +6062,8 @@ static int bnxt_reserve_rings(struct bnxt *bp)
 		netdev_err(bp->dev, "ring reservation failure rc: %d\n", rc);
 		return rc;
 	}
-	if ((bp->flags & BNXT_FLAG_NEW_RM) && bp->cp_nr_rings > orig_cp) {
+	if ((bp->flags & BNXT_FLAG_NEW_RM) &&
+	    (bnxt_get_num_msix(bp) != bp->total_irqs)) {
 		bnxt_clear_int_mode(bp);
 		rc = bnxt_init_int_mode(bp);
 		if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index e0cd0cb529c0..3d55d3b56865 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1454,7 +1454,10 @@ unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
 void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
 void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max);
+unsigned int bnxt_get_max_func_irqs(struct bnxt *bp);
 void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max);
+int bnxt_get_avail_msix(struct bnxt *bp, int num);
+int bnxt_reserve_rings(struct bnxt *bp);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
 int bnxt_hwrm_set_pause(struct bnxt *);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 62636cd44331..2583923df5a1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -108,6 +108,7 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 	struct bnxt *bp = netdev_priv(dev);
 	int max_idx, max_cp_rings;
 	int avail_msix, i, idx;
+	int rc = 0;
 
 	ASSERT_RTNL();
 	if (ulp_id != BNXT_ROCE_ULP)
@@ -120,26 +121,46 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 		return -EAGAIN;
 
 	max_cp_rings = bnxt_get_max_func_cp_rings(bp);
-	max_idx = min_t(int, bp->total_irqs, max_cp_rings);
-	avail_msix = max_idx - bp->cp_nr_rings;
+	avail_msix = bnxt_get_avail_msix(bp, num_msix);
 	if (!avail_msix)
 		return -ENOMEM;
 	if (avail_msix > num_msix)
 		avail_msix = num_msix;
 
-	if (bp->flags & BNXT_FLAG_NEW_RM)
+	if (bp->flags & BNXT_FLAG_NEW_RM) {
 		idx = bp->cp_nr_rings;
-	else
+	} else {
+		max_idx = min_t(int, bp->total_irqs, max_cp_rings);
 		idx = max_idx - avail_msix;
+	}
 	edev->ulp_tbl[ulp_id].msix_base = idx;
+	edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
+	if (bp->total_irqs < (idx + avail_msix)) {
+		if (netif_running(dev)) {
+			bnxt_close_nic(bp, true, false);
+			rc = bnxt_open_nic(bp, true, false);
+		} else {
+			rc = bnxt_reserve_rings(bp);
+		}
+	}
+	if (rc) {
+		edev->ulp_tbl[ulp_id].msix_requested = 0;
+		return -EAGAIN;
+	}
+
+	if (bp->flags & BNXT_FLAG_NEW_RM) {
+		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
+
+		avail_msix = hw_resc->resv_cp_rings - bp->cp_nr_rings;
+		edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
+	}
 	for (i = 0; i < avail_msix; i++) {
 		ent[i].vector = bp->irq_tbl[idx + i].vector;
 		ent[i].ring_idx = idx + i;
 		ent[i].db_offset = (idx + i) * 0x80;
 	}
-	bnxt_set_max_func_irqs(bp, max_idx - avail_msix);
+	bnxt_set_max_func_irqs(bp, bnxt_get_max_func_irqs(bp) - avail_msix);
 	bnxt_set_max_func_cp_rings(bp, max_cp_rings - avail_msix);
-	edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	return avail_msix;
 }
 
@@ -157,7 +178,11 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 	msix_requested = edev->ulp_tbl[ulp_id].msix_requested;
 	bnxt_set_max_func_cp_rings(bp, max_cp_rings + msix_requested);
 	edev->ulp_tbl[ulp_id].msix_requested = 0;
-	bnxt_set_max_func_irqs(bp, bp->total_irqs);
+	bnxt_set_max_func_irqs(bp, bnxt_get_max_func_irqs(bp) + msix_requested);
+	if (netif_running(dev)) {
+		bnxt_close_nic(bp, true, false);
+		bnxt_open_nic(bp, true, false);
+	}
 	return 0;
 }
 

From ec86f14ea5064e36ee111297bdb376dda4cba264 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 31 Mar 2018 13:54:21 -0400
Subject: [PATCH 1598/1678] bnxt_en: Add ULP calls to stop and restart IRQs.

When the driver needs to re-initailize the IRQ vectors, we make the
new ulp_irq_stop() call to tell the RDMA driver to disable and free
the IRQ vectors.  After IRQ vectors have been re-initailized, we
make the ulp_irq_restart() call to tell the RDMA driver that
IRQs can be restarted.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  7 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 81 +++++++++++++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 19 +++--
 3 files changed, 90 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6a1d18fc140f..1991f0c7bc0e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6064,8 +6064,10 @@ int bnxt_reserve_rings(struct bnxt *bp)
 	}
 	if ((bp->flags & BNXT_FLAG_NEW_RM) &&
 	    (bnxt_get_num_msix(bp) != bp->total_irqs)) {
+		bnxt_ulp_irq_stop(bp);
 		bnxt_clear_int_mode(bp);
 		rc = bnxt_init_int_mode(bp);
+		bnxt_ulp_irq_restart(bp, rc);
 		if (rc)
 			return rc;
 	}
@@ -8575,16 +8577,15 @@ int bnxt_restore_pf_fw_resources(struct bnxt *bp)
 	int rc;
 
 	ASSERT_RTNL();
-	if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
-		return 0;
-
 	bnxt_hwrm_func_qcaps(bp);
 
 	if (netif_running(bp->dev))
 		__bnxt_close_nic(bp, true, false);
 
+	bnxt_ulp_irq_stop(bp);
 	bnxt_clear_int_mode(bp);
 	rc = bnxt_init_int_mode(bp);
+	bnxt_ulp_irq_restart(bp, rc);
 
 	if (netif_running(bp->dev)) {
 		if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 2583923df5a1..347e4f946eb2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -1,6 +1,6 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
- * Copyright (c) 2016 Broadcom Limited
+ * Copyright (c) 2016-2018 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -101,13 +101,27 @@ static int bnxt_unregister_dev(struct bnxt_en_dev *edev, int ulp_id)
 	return 0;
 }
 
+static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
+{
+	struct bnxt_en_dev *edev = bp->edev;
+	int num_msix, idx, i;
+
+	num_msix = edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested;
+	idx = edev->ulp_tbl[BNXT_ROCE_ULP].msix_base;
+	for (i = 0; i < num_msix; i++) {
+		ent[i].vector = bp->irq_tbl[idx + i].vector;
+		ent[i].ring_idx = idx + i;
+		ent[i].db_offset = (idx + i) * 0x80;
+	}
+}
+
 static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 			      struct bnxt_msix_entry *ent, int num_msix)
 {
 	struct net_device *dev = edev->net;
 	struct bnxt *bp = netdev_priv(dev);
 	int max_idx, max_cp_rings;
-	int avail_msix, i, idx;
+	int avail_msix, idx;
 	int rc = 0;
 
 	ASSERT_RTNL();
@@ -154,13 +168,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 		avail_msix = hw_resc->resv_cp_rings - bp->cp_nr_rings;
 		edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	}
-	for (i = 0; i < avail_msix; i++) {
-		ent[i].vector = bp->irq_tbl[idx + i].vector;
-		ent[i].ring_idx = idx + i;
-		ent[i].db_offset = (idx + i) * 0x80;
-	}
+	bnxt_fill_msix_vecs(bp, ent);
 	bnxt_set_max_func_irqs(bp, bnxt_get_max_func_irqs(bp) - avail_msix);
 	bnxt_set_max_func_cp_rings(bp, max_cp_rings - avail_msix);
+	edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED;
 	return avail_msix;
 }
 
@@ -174,11 +185,15 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 	if (ulp_id != BNXT_ROCE_ULP)
 		return -EINVAL;
 
+	if (!(edev->flags & BNXT_EN_FLAG_MSIX_REQUESTED))
+		return 0;
+
 	max_cp_rings = bnxt_get_max_func_cp_rings(bp);
 	msix_requested = edev->ulp_tbl[ulp_id].msix_requested;
 	bnxt_set_max_func_cp_rings(bp, max_cp_rings + msix_requested);
 	edev->ulp_tbl[ulp_id].msix_requested = 0;
 	bnxt_set_max_func_irqs(bp, bnxt_get_max_func_irqs(bp) + msix_requested);
+	edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED;
 	if (netif_running(dev)) {
 		bnxt_close_nic(bp, true, false);
 		bnxt_open_nic(bp, true, false);
@@ -340,6 +355,58 @@ void bnxt_ulp_shutdown(struct bnxt *bp)
 	}
 }
 
+void bnxt_ulp_irq_stop(struct bnxt *bp)
+{
+	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_ulp_ops *ops;
+
+	if (!edev || !(edev->flags & BNXT_EN_FLAG_MSIX_REQUESTED))
+		return;
+
+	if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) {
+		struct bnxt_ulp *ulp = &edev->ulp_tbl[BNXT_ROCE_ULP];
+
+		if (!ulp->msix_requested)
+			return;
+
+		ops = rtnl_dereference(ulp->ulp_ops);
+		if (!ops || !ops->ulp_irq_stop)
+			return;
+		ops->ulp_irq_stop(ulp->handle);
+	}
+}
+
+void bnxt_ulp_irq_restart(struct bnxt *bp, int err)
+{
+	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_ulp_ops *ops;
+
+	if (!edev || !(edev->flags & BNXT_EN_FLAG_MSIX_REQUESTED))
+		return;
+
+	if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) {
+		struct bnxt_ulp *ulp = &edev->ulp_tbl[BNXT_ROCE_ULP];
+		struct bnxt_msix_entry *ent = NULL;
+
+		if (!ulp->msix_requested)
+			return;
+
+		ops = rtnl_dereference(ulp->ulp_ops);
+		if (!ops || !ops->ulp_irq_restart)
+			return;
+
+		if (!err) {
+			ent = kcalloc(ulp->msix_requested, sizeof(*ent),
+				      GFP_KERNEL);
+			if (!ent)
+				return;
+			bnxt_fill_msix_vecs(bp, ent);
+		}
+		ops->ulp_irq_restart(ulp->handle, ent);
+		kfree(ent);
+	}
+}
+
 void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl)
 {
 	u16 event_id = le16_to_cpu(cmpl->event_id);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index c9fa7eb56a08..df48ac71729f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -1,6 +1,6 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
- * Copyright (c) 2016 Broadcom Limited
+ * Copyright (c) 2016-2018 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -20,6 +20,12 @@
 struct hwrm_async_event_cmpl;
 struct bnxt;
 
+struct bnxt_msix_entry {
+	u32	vector;
+	u32	ring_idx;
+	u32	db_offset;
+};
+
 struct bnxt_ulp_ops {
 	/* async_notifier() cannot sleep (in BH context) */
 	void (*ulp_async_notifier)(void *, struct hwrm_async_event_cmpl *);
@@ -27,12 +33,8 @@ struct bnxt_ulp_ops {
 	void (*ulp_start)(void *);
 	void (*ulp_sriov_config)(void *, int);
 	void (*ulp_shutdown)(void *);
-};
-
-struct bnxt_msix_entry {
-	u32	vector;
-	u32	ring_idx;
-	u32	db_offset;
+	void (*ulp_irq_stop)(void *);
+	void (*ulp_irq_restart)(void *, struct bnxt_msix_entry *);
 };
 
 struct bnxt_fw_msg {
@@ -61,6 +63,7 @@ struct bnxt_en_dev {
 	#define BNXT_EN_FLAG_ROCEV2_CAP		0x2
 	#define BNXT_EN_FLAG_ROCE_CAP		(BNXT_EN_FLAG_ROCEV1_CAP | \
 						 BNXT_EN_FLAG_ROCEV2_CAP)
+	#define BNXT_EN_FLAG_MSIX_REQUESTED	0x4
 	const struct bnxt_en_ops	*en_ops;
 	struct bnxt_ulp			ulp_tbl[BNXT_MAX_ULP];
 };
@@ -92,6 +95,8 @@ void bnxt_ulp_stop(struct bnxt *bp);
 void bnxt_ulp_start(struct bnxt *bp);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
 void bnxt_ulp_shutdown(struct bnxt *bp);
+void bnxt_ulp_irq_stop(struct bnxt *bp);
+void bnxt_ulp_irq_restart(struct bnxt *bp, int err);
 void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl);
 struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev);
 

From c22af22cbdc206a0273d0e6d773bd3dfc99d2b02 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:42 -0700
Subject: [PATCH 1599/1678] ipv6: frag: remove unused field

csum field in struct frag_queue is not used, remove it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 50a6f0ddb878..5c18836672e9 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -603,7 +603,6 @@ struct frag_queue {
 	struct in6_addr		daddr;
 
 	int			iif;
-	unsigned int		csum;
 	__u16			nhoffset;
 	u8			ecn;
 };

From 787bea7748a76130566f881c2342a0be4127d182 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:43 -0700
Subject: [PATCH 1600/1678] inet: frags: change inet_frags_init_net() return
 value

We will soon initialize one rhashtable per struct netns_frags
in inet_frags_init_net().

This patch changes the return value to eventually propagate an
error.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  3 ++-
 net/ieee802154/6lowpan/reassembly.c     | 11 ++++++++---
 net/ipv4/ip_fragment.c                  | 12 +++++++++---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 12 +++++++++---
 net/ipv6/reassembly.c                   | 11 +++++++++--
 5 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 351f0c3cdcd9..b1d62176f3b4 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -104,9 +104,10 @@ struct inet_frags {
 int inet_frags_init(struct inet_frags *);
 void inet_frags_fini(struct inet_frags *);
 
-static inline void inet_frags_init_net(struct netns_frags *nf)
+static inline int inet_frags_init_net(struct netns_frags *nf)
 {
 	atomic_set(&nf->mem, 0);
+	return 0;
 }
 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
 
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 85bf86ad6b18..2aaab4bba429 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -581,14 +581,19 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 {
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
+	int res;
 
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	inet_frags_init_net(&ieee802154_lowpan->frags);
-
-	return lowpan_frags_ns_sysctl_register(net);
+	res = inet_frags_init_net(&ieee802154_lowpan->frags);
+	if (res < 0)
+		return res;
+	res = lowpan_frags_ns_sysctl_register(net);
+	if (res < 0)
+		inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+	return res;
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbf1b94942c0..e0b39d4ecbd4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -846,6 +846,8 @@ static void __init ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
+	int res;
+
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -871,9 +873,13 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 
 	net->ipv4.frags.max_dist = 64;
 
-	inet_frags_init_net(&net->ipv4.frags);
-
-	return ip4_frags_ns_ctl_register(net);
+	res = inet_frags_init_net(&net->ipv4.frags);
+	if (res < 0)
+		return res;
+	res = ip4_frags_ns_ctl_register(net);
+	if (res < 0)
+		inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+	return res;
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..6ff41569134a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -629,12 +629,18 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
 
 static int nf_ct_net_init(struct net *net)
 {
+	int res;
+
 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	inet_frags_init_net(&net->nf_frag.frags);
-
-	return nf_ct_frag6_sysctl_register(net);
+	res = inet_frags_init_net(&net->nf_frag.frags);
+	if (res < 0)
+		return res;
+	res = nf_ct_frag6_sysctl_register(net);
+	if (res < 0)
+		inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+	return res;
 }
 
 static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 08a139f14d0f..a8f7a5f0251a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -711,13 +711,20 @@ static void ip6_frags_sysctl_unregister(void)
 
 static int __net_init ipv6_frags_init_net(struct net *net)
 {
+	int res;
+
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	inet_frags_init_net(&net->ipv6.frags);
+	res = inet_frags_init_net(&net->ipv6.frags);
+	if (res < 0)
+		return res;
 
-	return ip6_frags_ns_sysctl_register(net);
+	res = ip6_frags_ns_sysctl_register(net);
+	if (res < 0)
+		inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+	return res;
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)

From 093ba72914b696521e4885756a68a3332782c8de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:44 -0700
Subject: [PATCH 1601/1678] inet: frags: add a pointer to struct netns_frags

In order to simplify the API, add a pointer to struct inet_frags.
This will allow us to make things less complex.

These functions no longer have a struct inet_frags parameter :

inet_frag_destroy(struct inet_frag_queue *q  /*, struct inet_frags *f */)
inet_frag_put(struct inet_frag_queue *q /*, struct inet_frags *f */)
inet_frag_kill(struct inet_frag_queue *q /*, struct inet_frags *f */)
inet_frags_exit_net(struct netns_frags *nf /*, struct inet_frags *f */)
ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 | 11 ++++++-----
 include/net/ipv6.h                      |  3 +--
 net/ieee802154/6lowpan/reassembly.c     | 13 +++++++------
 net/ipv4/inet_fragment.c                | 17 ++++++++++-------
 net/ipv4/ip_fragment.c                  |  9 +++++----
 net/ipv6/netfilter/nf_conntrack_reasm.c | 16 +++++++++-------
 net/ipv6/reassembly.c                   | 20 ++++++++++----------
 7 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b1d62176f3b4..69e531ed8189 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -10,6 +10,7 @@ struct netns_frags {
 	int			high_thresh;
 	int			low_thresh;
 	int			max_dist;
+	struct inet_frags	*f;
 };
 
 /**
@@ -109,20 +110,20 @@ static inline int inet_frags_init_net(struct netns_frags *nf)
 	atomic_set(&nf->mem, 0);
 	return 0;
 }
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
+void inet_frags_exit_net(struct netns_frags *nf);
 
-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
+void inet_frag_kill(struct inet_frag_queue *q);
+void inet_frag_destroy(struct inet_frag_queue *q);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash);
 
 void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
 				   const char *prefix);
 
-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
+static inline void inet_frag_put(struct inet_frag_queue *q)
 {
 	if (refcount_dec_and_test(&q->refcnt))
-		inet_frag_destroy(q, f);
+		inet_frag_destroy(q);
 }
 
 static inline bool inet_frag_evicting(struct inet_frag_queue *q)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5c18836672e9..57b7fe43d2ab 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -607,8 +607,7 @@ struct frag_queue {
 	u8			ecn;
 };
 
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
-			   struct inet_frags *frags);
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
 
 static inline bool ipv6_addr_any(const struct in6_addr *a)
 {
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 2aaab4bba429..6badc055555b 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -94,10 +94,10 @@ static void lowpan_frag_expire(struct timer_list *t)
 	if (fq->q.flags & INET_FRAG_COMPLETE)
 		goto out;
 
-	inet_frag_kill(&fq->q, &lowpan_frags);
+	inet_frag_kill(&fq->q);
 out:
 	spin_unlock(&fq->q.lock);
-	inet_frag_put(&fq->q, &lowpan_frags);
+	inet_frag_put(&fq->q);
 }
 
 static inline struct lowpan_frag_queue *
@@ -230,7 +230,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
 	struct sk_buff *fp, *head = fq->q.fragments;
 	int sum_truesize;
 
-	inet_frag_kill(&fq->q, &lowpan_frags);
+	inet_frag_kill(&fq->q);
 
 	/* Make the one we just received the head. */
 	if (prev) {
@@ -438,7 +438,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
 		ret = lowpan_frag_queue(fq, skb, frag_type);
 		spin_unlock(&fq->q.lock);
 
-		inet_frag_put(&fq->q, &lowpan_frags);
+		inet_frag_put(&fq->q);
 		return ret;
 	}
 
@@ -586,13 +586,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+	ieee802154_lowpan->frags.f = &lowpan_frags;
 
 	res = inet_frags_init_net(&ieee802154_lowpan->frags);
 	if (res < 0)
 		return res;
 	res = lowpan_frags_ns_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+		inet_frags_exit_net(&ieee802154_lowpan->frags);
 	return res;
 }
 
@@ -602,7 +603,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
 		net_ieee802154_lowpan(net);
 
 	lowpan_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+	inet_frags_exit_net(&ieee802154_lowpan->frags);
 }
 
 static struct pernet_operations lowpan_frags_ops = {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index e8ec28999f5c..1ac69f65d0de 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -219,8 +219,9 @@ void inet_frags_fini(struct inet_frags *f)
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
 {
+	struct inet_frags *f =nf->f;
 	unsigned int seq;
 	int i;
 
@@ -264,33 +265,34 @@ __acquires(hb->chain_lock)
 	return hb;
 }
 
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+static inline void fq_unlink(struct inet_frag_queue *fq)
 {
 	struct inet_frag_bucket *hb;
 
-	hb = get_frag_bucket_locked(fq, f);
+	hb = get_frag_bucket_locked(fq, fq->net->f);
 	hlist_del(&fq->list);
 	fq->flags |= INET_FRAG_COMPLETE;
 	spin_unlock(&hb->chain_lock);
 }
 
-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
 {
 	if (del_timer(&fq->timer))
 		refcount_dec(&fq->refcnt);
 
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
-		fq_unlink(fq, f);
+		fq_unlink(fq);
 		refcount_dec(&fq->refcnt);
 	}
 }
 EXPORT_SYMBOL(inet_frag_kill);
 
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+void inet_frag_destroy(struct inet_frag_queue *q)
 {
 	struct sk_buff *fp;
 	struct netns_frags *nf;
 	unsigned int sum, sum_truesize = 0;
+	struct inet_frags *f;
 
 	WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
 	WARN_ON(del_timer(&q->timer) != 0);
@@ -298,6 +300,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
 	/* Release all fragment data. */
 	fp = q->fragments;
 	nf = q->net;
+	f = nf->f;
 	while (fp) {
 		struct sk_buff *xp = fp->next;
 
@@ -333,7 +336,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 			refcount_inc(&qp->refcnt);
 			spin_unlock(&hb->chain_lock);
 			qp_in->flags |= INET_FRAG_COMPLETE;
-			inet_frag_put(qp_in, f);
+			inet_frag_put(qp_in);
 			return qp;
 		}
 	}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e0b39d4ecbd4..cd2b4c9419fc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -168,7 +168,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
 
 static void ipq_put(struct ipq *ipq)
 {
-	inet_frag_put(&ipq->q, &ip4_frags);
+	inet_frag_put(&ipq->q);
 }
 
 /* Kill ipq entry. It is not destroyed immediately,
@@ -176,7 +176,7 @@ static void ipq_put(struct ipq *ipq)
  */
 static void ipq_kill(struct ipq *ipq)
 {
-	inet_frag_kill(&ipq->q, &ip4_frags);
+	inet_frag_kill(&ipq->q);
 }
 
 static bool frag_expire_skip_icmp(u32 user)
@@ -872,20 +872,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 	net->ipv4.frags.timeout = IP_FRAG_TIME;
 
 	net->ipv4.frags.max_dist = 64;
+	net->ipv4.frags.f = &ip4_frags;
 
 	res = inet_frags_init_net(&net->ipv4.frags);
 	if (res < 0)
 		return res;
 	res = ip4_frags_ns_ctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+		inet_frags_exit_net(&net->ipv4.frags);
 	return res;
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
 {
 	ip4_frags_ns_ctl_unregister(net);
-	inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+	inet_frags_exit_net(&net->ipv4.frags);
 }
 
 static struct pernet_operations ip4_frags_ops = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6ff41569134a..c4b40fdee838 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -178,7 +178,7 @@ static void nf_ct_frag6_expire(struct timer_list *t)
 	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
-	ip6_expire_frag_queue(net, fq, &nf_frags);
+	ip6_expire_frag_queue(net, fq);
 }
 
 /* Creation primitives. */
@@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 			 * this case. -DaveM
 			 */
 			pr_debug("end of fragment not rounded to 8 bytes.\n");
-			inet_frag_kill(&fq->q, &nf_frags);
+			inet_frag_kill(&fq->q);
 			return -EPROTO;
 		}
 		if (end > fq->q.len) {
@@ -357,7 +357,7 @@ found:
 	return 0;
 
 discard_fq:
-	inet_frag_kill(&fq->q, &nf_frags);
+	inet_frag_kill(&fq->q);
 err:
 	return -EINVAL;
 }
@@ -379,7 +379,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 	int    payload_len;
 	u8 ecn;
 
-	inet_frag_kill(&fq->q, &nf_frags);
+	inet_frag_kill(&fq->q);
 
 	WARN_ON(head == NULL);
 	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -622,7 +622,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 
 out_unlock:
 	spin_unlock_bh(&fq->q.lock);
-	inet_frag_put(&fq->q, &nf_frags);
+	inet_frag_put(&fq->q);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
@@ -634,19 +634,21 @@ static int nf_ct_net_init(struct net *net)
 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->nf_frag.frags.f = &nf_frags;
+
 	res = inet_frags_init_net(&net->nf_frag.frags);
 	if (res < 0)
 		return res;
 	res = nf_ct_frag6_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+		inet_frags_exit_net(&net->nf_frag.frags);
 	return res;
 }
 
 static void nf_ct_net_exit(struct net *net)
 {
 	nf_ct_frags6_sysctl_unregister(net);
-	inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+	inet_frags_exit_net(&net->nf_frag.frags);
 }
 
 static struct pernet_operations nf_ct_net_ops = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a8f7a5f0251a..4855de6f673a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -128,8 +128,7 @@ void ip6_frag_init(struct inet_frag_queue *q, const void *a)
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
-			   struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
 {
 	struct net_device *dev = NULL;
 
@@ -138,7 +137,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
 	if (fq->q.flags & INET_FRAG_COMPLETE)
 		goto out;
 
-	inet_frag_kill(&fq->q, frags);
+	inet_frag_kill(&fq->q);
 
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, fq->iif);
@@ -166,7 +165,7 @@ out_rcu_unlock:
 	rcu_read_unlock();
 out:
 	spin_unlock(&fq->q.lock);
-	inet_frag_put(&fq->q, frags);
+	inet_frag_put(&fq->q);
 }
 EXPORT_SYMBOL(ip6_expire_frag_queue);
 
@@ -179,7 +178,7 @@ static void ip6_frag_expire(struct timer_list *t)
 	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, ipv6.frags);
 
-	ip6_expire_frag_queue(net, fq, &ip6_frags);
+	ip6_expire_frag_queue(net, fq);
 }
 
 static struct frag_queue *
@@ -364,7 +363,7 @@ found:
 	return -1;
 
 discard_fq:
-	inet_frag_kill(&fq->q, &ip6_frags);
+	inet_frag_kill(&fq->q);
 err:
 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			IPSTATS_MIB_REASMFAILS);
@@ -391,7 +390,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	int sum_truesize;
 	u8 ecn;
 
-	inet_frag_kill(&fq->q, &ip6_frags);
+	inet_frag_kill(&fq->q);
 
 	ecn = ip_frag_ecn_table[fq->ecn];
 	if (unlikely(ecn == 0xff))
@@ -569,7 +568,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
 		spin_unlock(&fq->q.lock);
-		inet_frag_put(&fq->q, &ip6_frags);
+		inet_frag_put(&fq->q);
 		return ret;
 	}
 
@@ -716,6 +715,7 @@ static int __net_init ipv6_frags_init_net(struct net *net)
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->ipv6.frags.f = &ip6_frags;
 
 	res = inet_frags_init_net(&net->ipv6.frags);
 	if (res < 0)
@@ -723,14 +723,14 @@ static int __net_init ipv6_frags_init_net(struct net *net)
 
 	res = ip6_frags_ns_sysctl_register(net);
 	if (res < 0)
-		inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+		inet_frags_exit_net(&net->ipv6.frags);
 	return res;
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)
 {
 	ip6_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+	inet_frags_exit_net(&net->ipv6.frags);
 }
 
 static struct pernet_operations ip6_frags_ops = {

From 5b975bab23615cd0fdf67af6c9298eb01c4b9f61 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:45 -0700
Subject: [PATCH 1602/1678] inet: frags: refactor ipv6_frag_init()

We want to call inet_frags_init() earlier.

This is a prereq to "inet: frags: use rhashtables for reassembly units"

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4855de6f673a..f0071b113a92 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -742,18 +742,6 @@ int __init ipv6_frag_init(void)
 {
 	int ret;
 
-	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
-	if (ret)
-		goto out;
-
-	ret = ip6_frags_sysctl_register();
-	if (ret)
-		goto err_sysctl;
-
-	ret = register_pernet_subsys(&ip6_frags_ops);
-	if (ret)
-		goto err_pernet;
-
 	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
 	ip6_frags.destructor = NULL;
@@ -762,8 +750,21 @@ int __init ipv6_frag_init(void)
 	ip6_frags.frag_expire = ip6_frag_expire;
 	ip6_frags.frags_cache_name = ip6_frag_cache_name;
 	ret = inet_frags_init(&ip6_frags);
+	if (ret)
+		goto out;
+
+	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+	if (ret)
+		goto err_protocol;
+
+	ret = ip6_frags_sysctl_register();
+	if (ret)
+		goto err_sysctl;
+
+	ret = register_pernet_subsys(&ip6_frags_ops);
 	if (ret)
 		goto err_pernet;
+
 out:
 	return ret;
 
@@ -771,6 +772,8 @@ err_pernet:
 	ip6_frags_sysctl_unregister();
 err_sysctl:
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+err_protocol:
+	inet_frags_fini(&ip6_frags);
 	goto out;
 }
 

From 807f1844df4ac23594268fa9f41902d0549e92aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:46 -0700
Subject: [PATCH 1603/1678] inet: frags: refactor lowpan_net_frag_init()

We want to call lowpan_net_frag_init() earlier.
Similar to commit "inet: frags: refactor ipv6_frag_init()"

This is a prereq to "inet: frags: use rhashtables for reassembly units"

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan/reassembly.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 6badc055555b..ddada12a044d 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -615,14 +615,6 @@ int __init lowpan_net_frag_init(void)
 {
 	int ret;
 
-	ret = lowpan_frags_sysctl_register();
-	if (ret)
-		return ret;
-
-	ret = register_pernet_subsys(&lowpan_frags_ops);
-	if (ret)
-		goto err_pernet;
-
 	lowpan_frags.hashfn = lowpan_hashfn;
 	lowpan_frags.constructor = lowpan_frag_init;
 	lowpan_frags.destructor = NULL;
@@ -632,11 +624,21 @@ int __init lowpan_net_frag_init(void)
 	lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
 	ret = inet_frags_init(&lowpan_frags);
 	if (ret)
-		goto err_pernet;
+		goto out;
 
+	ret = lowpan_frags_sysctl_register();
+	if (ret)
+		goto err_sysctl;
+
+	ret = register_pernet_subsys(&lowpan_frags_ops);
+	if (ret)
+		goto err_pernet;
+out:
 	return ret;
 err_pernet:
 	lowpan_frags_sysctl_unregister();
+err_sysctl:
+	inet_frags_fini(&lowpan_frags);
 	return ret;
 }
 

From 483a6e4fa055123142d8956866fe2aa9c98d546d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:47 -0700
Subject: [PATCH 1604/1678] inet: frags: refactor ipfrag_init()

We need to call inet_frags_init() before register_pernet_subsys(),
as a prereq for following patch ("inet: frags: use rhashtables for reassembly units")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cd2b4c9419fc..1a3bc85d6f5e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -896,8 +896,6 @@ static struct pernet_operations ip4_frags_ops = {
 
 void __init ipfrag_init(void)
 {
-	ip4_frags_ctl_register();
-	register_pernet_subsys(&ip4_frags_ops);
 	ip4_frags.hashfn = ip4_hashfn;
 	ip4_frags.constructor = ip4_frag_init;
 	ip4_frags.destructor = ip4_frag_free;
@@ -907,4 +905,6 @@ void __init ipfrag_init(void)
 	ip4_frags.frags_cache_name = ip_frag_cache_name;
 	if (inet_frags_init(&ip4_frags))
 		panic("IP: failed to allocate ip4_frags cache\n");
+	ip4_frags_ctl_register();
+	register_pernet_subsys(&ip4_frags_ops);
 }

From ae6da1f503abb5a5081f9f6c4a6881de97830f3e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:48 -0700
Subject: [PATCH 1605/1678] rhashtable: add schedule points

Rehashing and destroying large hash table takes a lot of time,
and happens in process context. It is safe to add cond_resched()
in rhashtable_rehash_table() and rhashtable_free_and_destroy()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 47de025b6245..2b2b79974b61 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -333,6 +333,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 		err = rhashtable_rehash_chain(ht, old_hash);
 		if (err)
 			return err;
+		cond_resched();
 	}
 
 	/* Publish the new table pointer. */
@@ -1112,6 +1113,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
+			cond_resched();
 			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;

From 648700f76b03b7e8149d13cc2bdb3355035258a9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:49 -0700
Subject: [PATCH 1606/1678] inet: frags: use rhashtables for reassembly units

Some applications still rely on IP fragmentation, and to be fair linux
reassembly unit is not working under any serious load.

It uses static hash tables of 1024 buckets, and up to 128 items per bucket (!!!)

A work queue is supposed to garbage collect items when host is under memory
pressure, and doing a hash rebuild, changing seed used in hash computations.

This work queue blocks softirqs for up to 25 ms when doing a hash rebuild,
occurring every 5 seconds if host is under fire.

Then there is the problem of sharing this hash table for all netns.

It is time to switch to rhashtables, and allocate one of them per netns
to speedup netns dismantle, since this is a critical metric these days.

Lookup is now using RCU. A followup patch will even remove
the refcount hold/release left from prior implementation and save
a couple of atomic operations.

Before this patch, 16 cpus (16 RX queue NIC) could not handle more
than 1 Mpps frags DDOS.

After the patch, I reach 9 Mpps without any tuning, and can use up to 2GB
of storage for the fragments (exact number depends on frags being evicted
after timeout)

$ grep FRAG /proc/net/sockstat
FRAG: inuse 1966916 memory 2140004608

A followup patch will change the limits for 64bit arches.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Florian Westphal <fw@strlen.de>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt  |   7 +-
 include/net/inet_frag.h                 |  81 +++---
 include/net/ipv6.h                      |  16 +-
 net/ieee802154/6lowpan/6lowpan_i.h      |  26 +-
 net/ieee802154/6lowpan/reassembly.c     |  91 +++---
 net/ipv4/inet_fragment.c                | 354 +++++-------------------
 net/ipv4/ip_fragment.c                  | 112 ++++----
 net/ipv6/netfilter/nf_conntrack_reasm.c |  51 +---
 net/ipv6/reassembly.c                   | 110 ++++----
 9 files changed, 270 insertions(+), 578 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 33f35f049ad5..6f2a3670e44b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -134,13 +134,10 @@ min_adv_mss - INTEGER
 IP Fragmentation:
 
 ipfrag_high_thresh - INTEGER
-	Maximum memory used to reassemble IP fragments. When
-	ipfrag_high_thresh bytes of memory is allocated for this purpose,
-	the fragment handler will toss packets until ipfrag_low_thresh
-	is reached. This also serves as a maximum limit to namespaces
-	different from the initial one.
+	Maximum memory used to reassemble IP fragments.
 
 ipfrag_low_thresh - INTEGER
+	(Obsolete since linux-4.17)
 	Maximum memory used to reassemble IP fragments before the kernel
 	begins to remove incomplete fragment queues to free up resources.
 	The kernel still accepts new fragments for defragmentation.
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 69e531ed8189..3fec0d3a0d01 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -2,7 +2,11 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
+#include <linux/rhashtable.h>
+
 struct netns_frags {
+	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
+
 	/* Keep atomic mem on separate cachelines in structs that include it */
 	atomic_t		mem ____cacheline_aligned_in_smp;
 	/* sysctls */
@@ -26,12 +30,30 @@ enum {
 	INET_FRAG_COMPLETE	= BIT(2),
 };
 
+struct frag_v4_compare_key {
+	__be32		saddr;
+	__be32		daddr;
+	u32		user;
+	u32		vif;
+	__be16		id;
+	u16		protocol;
+};
+
+struct frag_v6_compare_key {
+	struct in6_addr	saddr;
+	struct in6_addr	daddr;
+	u32		user;
+	__be32		id;
+	u32		iif;
+};
+
 /**
  * struct inet_frag_queue - fragment queue
  *
- * @lock: spinlock protecting the queue
+ * @node: rhash node
+ * @key: keys identifying this frag.
  * @timer: queue expiration timer
- * @list: hash bucket list
+ * @lock: spinlock protecting this frag
  * @refcnt: reference count of the queue
  * @fragments: received fragments head
  * @fragments_tail: received fragments tail
@@ -41,12 +63,16 @@ enum {
  * @flags: fragment queue flags
  * @max_size: maximum received fragment size
  * @net: namespace that this frag belongs to
- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
+ * @rcu: rcu head for freeing deferall
  */
 struct inet_frag_queue {
-	spinlock_t		lock;
+	struct rhash_head	node;
+	union {
+		struct frag_v4_compare_key v4;
+		struct frag_v6_compare_key v6;
+	} key;
 	struct timer_list	timer;
-	struct hlist_node	list;
+	spinlock_t		lock;
 	refcount_t		refcnt;
 	struct sk_buff		*fragments;
 	struct sk_buff		*fragments_tail;
@@ -55,51 +81,20 @@ struct inet_frag_queue {
 	int			meat;
 	__u8			flags;
 	u16			max_size;
-	struct netns_frags	*net;
-	struct hlist_node	list_evictor;
-};
-
-#define INETFRAGS_HASHSZ	1024
-
-/* averaged:
- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
- *	       rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
- *	       struct frag_queue))
- */
-#define INETFRAGS_MAXDEPTH	128
-
-struct inet_frag_bucket {
-	struct hlist_head	chain;
-	spinlock_t		chain_lock;
+	struct netns_frags      *net;
+	struct rcu_head		rcu;
 };
 
 struct inet_frags {
-	struct inet_frag_bucket	hash[INETFRAGS_HASHSZ];
-
-	struct work_struct	frags_work;
-	unsigned int next_bucket;
-	unsigned long last_rebuild_jiffies;
-	bool rebuild;
-
-	/* The first call to hashfn is responsible to initialize
-	 * rnd. This is best done with net_get_random_once.
-	 *
-	 * rnd_seqlock is used to let hash insertion detect
-	 * when it needs to re-lookup the hash chain to use.
-	 */
-	u32			rnd;
-	seqlock_t		rnd_seqlock;
 	unsigned int		qsize;
 
-	unsigned int		(*hashfn)(const struct inet_frag_queue *);
-	bool			(*match)(const struct inet_frag_queue *q,
-					 const void *arg);
 	void			(*constructor)(struct inet_frag_queue *q,
 					       const void *arg);
 	void			(*destructor)(struct inet_frag_queue *);
 	void			(*frag_expire)(struct timer_list *t);
 	struct kmem_cache	*frags_cachep;
 	const char		*frags_cache_name;
+	struct rhashtable_params rhash_params;
 };
 
 int inet_frags_init(struct inet_frags *);
@@ -108,15 +103,13 @@ void inet_frags_fini(struct inet_frags *);
 static inline int inet_frags_init_net(struct netns_frags *nf)
 {
 	atomic_set(&nf->mem, 0);
-	return 0;
+	return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
 }
 void inet_frags_exit_net(struct netns_frags *nf);
 
 void inet_frag_kill(struct inet_frag_queue *q);
 void inet_frag_destroy(struct inet_frag_queue *q);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
-		struct inet_frags *f, void *key, unsigned int hash);
-
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
 void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
 				   const char *prefix);
 
@@ -128,7 +121,7 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
 
 static inline bool inet_frag_evicting(struct inet_frag_queue *q)
 {
-	return !hlist_unhashed(&q->list_evictor);
+	return false;
 }
 
 /* Memory Tracking Functions. */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 57b7fe43d2ab..6fa9a2bc5896 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -579,17 +579,8 @@ enum ip6_defrag_users {
 	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
 };
 
-struct ip6_create_arg {
-	__be32 id;
-	u32 user;
-	const struct in6_addr *src;
-	const struct in6_addr *dst;
-	int iif;
-	u8 ecn;
-};
-
 void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+extern const struct rhashtable_params ip6_rhash_params;
 
 /*
  *	Equivalent of ipv4 struct ip
@@ -597,11 +588,6 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
 struct frag_queue {
 	struct inet_frag_queue	q;
 
-	__be32			id;		/* fragment id		*/
-	u32			user;
-	struct in6_addr		saddr;
-	struct in6_addr		daddr;
-
 	int			iif;
 	__u16			nhoffset;
 	u8			ecn;
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index d8de3bcfb103..b8d95cb71c25 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -17,37 +17,19 @@ typedef unsigned __bitwise lowpan_rx_result;
 #define LOWPAN_DISPATCH_FRAG1           0xc0
 #define LOWPAN_DISPATCH_FRAGN           0xe0
 
-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
 	u16 tag;
 	u16 d_size;
-	const struct ieee802154_addr *src;
-	const struct ieee802154_addr *dst;
+	const struct ieee802154_addr src;
+	const struct ieee802154_addr dst;
 };
 
-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
  */
 struct lowpan_frag_queue {
 	struct inet_frag_queue	q;
-
-	u16			tag;
-	u16			d_size;
-	struct ieee802154_addr	saddr;
-	struct ieee802154_addr	daddr;
 };
 
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
-	switch (a->mode) {
-	case IEEE802154_ADDR_LONG:
-		return (((__force u64)a->extended_addr) >> 32) ^
-			(((__force u64)a->extended_addr) & 0xffffffff);
-	case IEEE802154_ADDR_SHORT:
-		return (__force u32)(a->short_addr + (a->pan_id << 16));
-	default:
-		return 0;
-	}
-}
-
 int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
 void lowpan_net_frag_exit(void);
 int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index ddada12a044d..0fa0121f85d4 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
 static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
 			     struct sk_buff *prev, struct net_device *ldev);
 
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
-				     const struct ieee802154_addr *saddr,
-				     const struct ieee802154_addr *daddr)
-{
-	net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
-	return jhash_3words(ieee802154_addr_hash(saddr),
-			    ieee802154_addr_hash(daddr),
-			    (__force u32)(tag + (d_size << 16)),
-			    lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
-	const struct lowpan_frag_queue *fq;
-
-	fq = container_of(q, struct lowpan_frag_queue, q);
-	return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
-	const struct lowpan_frag_queue *fq;
-	const struct lowpan_create_arg *arg = a;
-
-	fq = container_of(q, struct lowpan_frag_queue, q);
-	return	fq->tag == arg->tag && fq->d_size == arg->d_size &&
-		ieee802154_addr_equal(&fq->saddr, arg->src) &&
-		ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
 static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
 {
-	const struct lowpan_create_arg *arg = a;
+	const struct frag_lowpan_compare_key *key = a;
 	struct lowpan_frag_queue *fq;
 
 	fq = container_of(q, struct lowpan_frag_queue, q);
 
-	fq->tag = arg->tag;
-	fq->d_size = arg->d_size;
-	fq->saddr = *arg->src;
-	fq->daddr = *arg->dst;
+	BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+	memcpy(&q->key, key, sizeof(*key));
 }
 
 static void lowpan_frag_expire(struct timer_list *t)
@@ -105,21 +73,17 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
 	const struct ieee802154_addr *src,
 	const struct ieee802154_addr *dst)
 {
-	struct inet_frag_queue *q;
-	struct lowpan_create_arg arg;
-	unsigned int hash;
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
+	struct frag_lowpan_compare_key key = {
+		.tag = cb->d_tag,
+		.d_size = cb->d_size,
+		.src = *src,
+		.dst = *dst,
+	};
+	struct inet_frag_queue *q;
 
-	arg.tag = cb->d_tag;
-	arg.d_size = cb->d_size;
-	arg.src = src;
-	arg.dst = dst;
-
-	hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
-
-	q = inet_frag_find(&ieee802154_lowpan->frags,
-			   &lowpan_frags, &arg, hash);
+	q = inet_frag_find(&ieee802154_lowpan->frags, &key);
 	if (IS_ERR_OR_NULL(q)) {
 		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
@@ -611,17 +575,46 @@ static struct pernet_operations lowpan_frags_ops = {
 	.exit = lowpan_frags_exit_net,
 };
 
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	return jhash2(data,
+		      sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct inet_frag_queue *fq = data;
+
+	return jhash2((const u32 *)&fq->key,
+		      sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct frag_lowpan_compare_key *key = arg->key;
+	const struct inet_frag_queue *fq = ptr;
+
+	return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+	.head_offset		= offsetof(struct inet_frag_queue, node),
+	.hashfn			= lowpan_key_hashfn,
+	.obj_hashfn		= lowpan_obj_hashfn,
+	.obj_cmpfn		= lowpan_obj_cmpfn,
+	.automatic_shrinking	= true,
+};
+
 int __init lowpan_net_frag_init(void)
 {
 	int ret;
 
-	lowpan_frags.hashfn = lowpan_hashfn;
 	lowpan_frags.constructor = lowpan_frag_init;
 	lowpan_frags.destructor = NULL;
 	lowpan_frags.qsize = sizeof(struct frag_queue);
-	lowpan_frags.match = lowpan_frag_match;
 	lowpan_frags.frag_expire = lowpan_frag_expire;
 	lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+	lowpan_frags.rhash_params = lowpan_rhash_params;
 	ret = inet_frags_init(&lowpan_frags);
 	if (ret)
 		goto out;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1ac69f65d0de..ebb8f411e0db 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
 #include <net/inet_frag.h>
 #include <net/inet_ecn.h>
 
-#define INETFRAGS_EVICT_BUCKETS   128
-#define INETFRAGS_EVICT_MAX	  512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  * Value : 0xff if frame should be dropped.
  *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
 };
 EXPORT_SYMBOL(ip_frag_ecn_table);
 
-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
-	return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
-	return time_after(jiffies,
-	       f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
-	int i;
-
-	write_seqlock_bh(&f->rnd_seqlock);
-
-	if (!inet_frag_may_rebuild(f))
-		goto out;
-
-	get_random_bytes(&f->rnd, sizeof(u32));
-
-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
-		struct inet_frag_bucket *hb;
-		struct inet_frag_queue *q;
-		struct hlist_node *n;
-
-		hb = &f->hash[i];
-		spin_lock(&hb->chain_lock);
-
-		hlist_for_each_entry_safe(q, n, &hb->chain, list) {
-			unsigned int hval = inet_frag_hashfn(f, q);
-
-			if (hval != i) {
-				struct inet_frag_bucket *hb_dest;
-
-				hlist_del(&q->list);
-
-				/* Relink to new hash chain. */
-				hb_dest = &f->hash[hval];
-
-				/* This is the only place where we take
-				 * another chain_lock while already holding
-				 * one.  As this will not run concurrently,
-				 * we cannot deadlock on hb_dest lock below, if its
-				 * already locked it will be released soon since
-				 * other caller cannot be waiting for hb lock
-				 * that we've taken above.
-				 */
-				spin_lock_nested(&hb_dest->chain_lock,
-						 SINGLE_DEPTH_NESTING);
-				hlist_add_head(&q->list, &hb_dest->chain);
-				spin_unlock(&hb_dest->chain_lock);
-			}
-		}
-		spin_unlock(&hb->chain_lock);
-	}
-
-	f->rebuild = false;
-	f->last_rebuild_jiffies = jiffies;
-out:
-	write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
-	if (!hlist_unhashed(&q->list_evictor))
-		return false;
-
-	return q->net->low_thresh == 0 ||
-	       frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
-	struct inet_frag_queue *fq;
-	struct hlist_node *n;
-	unsigned int evicted = 0;
-	HLIST_HEAD(expired);
-
-	spin_lock(&hb->chain_lock);
-
-	hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
-		if (!inet_fragq_should_evict(fq))
-			continue;
-
-		if (!del_timer(&fq->timer))
-			continue;
-
-		hlist_add_head(&fq->list_evictor, &expired);
-		++evicted;
-	}
-
-	spin_unlock(&hb->chain_lock);
-
-	hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-		f->frag_expire(&fq->timer);
-
-	return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
-	unsigned int budget = INETFRAGS_EVICT_BUCKETS;
-	unsigned int i, evicted = 0;
-	struct inet_frags *f;
-
-	f = container_of(work, struct inet_frags, frags_work);
-
-	BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
-	local_bh_disable();
-
-	for (i = READ_ONCE(f->next_bucket); budget; --budget) {
-		evicted += inet_evict_bucket(f, &f->hash[i]);
-		i = (i + 1) & (INETFRAGS_HASHSZ - 1);
-		if (evicted > INETFRAGS_EVICT_MAX)
-			break;
-	}
-
-	f->next_bucket = i;
-
-	local_bh_enable();
-
-	if (f->rebuild && inet_frag_may_rebuild(f))
-		inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
-	if (unlikely(!work_pending(&f->frags_work)))
-		schedule_work(&f->frags_work);
-}
-
 int inet_frags_init(struct inet_frags *f)
 {
-	int i;
-
-	INIT_WORK(&f->frags_work, inet_frag_worker);
-
-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
-		struct inet_frag_bucket *hb = &f->hash[i];
-
-		spin_lock_init(&hb->chain_lock);
-		INIT_HLIST_HEAD(&hb->chain);
-	}
-
-	seqlock_init(&f->rnd_seqlock);
-	f->last_rebuild_jiffies = 0;
 	f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
 					    NULL);
 	if (!f->frags_cachep)
@@ -214,79 +59,69 @@ EXPORT_SYMBOL(inet_frags_init);
 
 void inet_frags_fini(struct inet_frags *f)
 {
-	cancel_work_sync(&f->frags_work);
+	/* We must wait that all inet_frag_destroy_rcu() have completed. */
+	rcu_barrier();
+
 	kmem_cache_destroy(f->frags_cachep);
+	f->frags_cachep = NULL;
 }
 EXPORT_SYMBOL(inet_frags_fini);
 
+static void inet_frags_free_cb(void *ptr, void *arg)
+{
+	struct inet_frag_queue *fq = ptr;
+
+	/* If we can not cancel the timer, it means this frag_queue
+	 * is already disappearing, we have nothing to do.
+	 * Otherwise, we own a refcount until the end of this function.
+	 */
+	if (!del_timer(&fq->timer))
+		return;
+
+	spin_lock_bh(&fq->lock);
+	if (!(fq->flags & INET_FRAG_COMPLETE)) {
+		fq->flags |= INET_FRAG_COMPLETE;
+		refcount_dec(&fq->refcnt);
+	}
+	spin_unlock_bh(&fq->lock);
+
+	inet_frag_put(fq);
+}
+
 void inet_frags_exit_net(struct netns_frags *nf)
 {
-	struct inet_frags *f =nf->f;
-	unsigned int seq;
-	int i;
+	nf->low_thresh = 0; /* prevent creation of new frags */
 
-	nf->low_thresh = 0;
-
-evict_again:
-	local_bh_disable();
-	seq = read_seqbegin(&f->rnd_seqlock);
-
-	for (i = 0; i < INETFRAGS_HASHSZ ; i++)
-		inet_evict_bucket(f, &f->hash[i]);
-
-	local_bh_enable();
-	cond_resched();
-
-	if (read_seqretry(&f->rnd_seqlock, seq) ||
-	    sum_frag_mem_limit(nf))
-		goto evict_again;
+	rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
 
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
-{
-	struct inet_frag_bucket *hb;
-	unsigned int seq, hash;
-
- restart:
-	seq = read_seqbegin(&f->rnd_seqlock);
-
-	hash = inet_frag_hashfn(f, fq);
-	hb = &f->hash[hash];
-
-	spin_lock(&hb->chain_lock);
-	if (read_seqretry(&f->rnd_seqlock, seq)) {
-		spin_unlock(&hb->chain_lock);
-		goto restart;
-	}
-
-	return hb;
-}
-
-static inline void fq_unlink(struct inet_frag_queue *fq)
-{
-	struct inet_frag_bucket *hb;
-
-	hb = get_frag_bucket_locked(fq, fq->net->f);
-	hlist_del(&fq->list);
-	fq->flags |= INET_FRAG_COMPLETE;
-	spin_unlock(&hb->chain_lock);
-}
-
 void inet_frag_kill(struct inet_frag_queue *fq)
 {
 	if (del_timer(&fq->timer))
 		refcount_dec(&fq->refcnt);
 
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
-		fq_unlink(fq);
+		struct netns_frags *nf = fq->net;
+
+		fq->flags |= INET_FRAG_COMPLETE;
+		rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
 		refcount_dec(&fq->refcnt);
 	}
 }
 EXPORT_SYMBOL(inet_frag_kill);
 
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+						 rcu);
+	struct inet_frags *f = q->net->f;
+
+	if (f->destructor)
+		f->destructor(q);
+	kmem_cache_free(f->frags_cachep, q);
+}
+
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
 	struct sk_buff *fp;
@@ -310,59 +145,20 @@ void inet_frag_destroy(struct inet_frag_queue *q)
 	}
 	sum = sum_truesize + f->qsize;
 
-	if (f->destructor)
-		f->destructor(q);
-	kmem_cache_free(f->frags_cachep, q);
+	call_rcu(&q->rcu, inet_frag_destroy_rcu);
 
 	sub_frag_mem_limit(nf, sum);
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
-						struct inet_frag_queue *qp_in,
-						struct inet_frags *f,
-						void *arg)
-{
-	struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
-	struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
-	/* With SMP race we have to recheck hash table, because
-	 * such entry could have been created on other cpu before
-	 * we acquired hash bucket lock.
-	 */
-	hlist_for_each_entry(qp, &hb->chain, list) {
-		if (qp->net == nf && f->match(qp, arg)) {
-			refcount_inc(&qp->refcnt);
-			spin_unlock(&hb->chain_lock);
-			qp_in->flags |= INET_FRAG_COMPLETE;
-			inet_frag_put(qp_in);
-			return qp;
-		}
-	}
-#endif
-	qp = qp_in;
-	if (!mod_timer(&qp->timer, jiffies + nf->timeout))
-		refcount_inc(&qp->refcnt);
-
-	refcount_inc(&qp->refcnt);
-	hlist_add_head(&qp->list, &hb->chain);
-
-	spin_unlock(&hb->chain_lock);
-
-	return qp;
-}
-
 static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 					       struct inet_frags *f,
 					       void *arg)
 {
 	struct inet_frag_queue *q;
 
-	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
-		inet_frag_schedule_worker(f);
+	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
 		return NULL;
-	}
 
 	q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
 	if (!q)
@@ -374,59 +170,52 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 
 	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
-	refcount_set(&q->refcnt, 1);
+	refcount_set(&q->refcnt, 3);
 
 	return q;
 }
 
 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
-						struct inet_frags *f,
 						void *arg)
 {
+	struct inet_frags *f = nf->f;
 	struct inet_frag_queue *q;
+	int err;
 
 	q = inet_frag_alloc(nf, f, arg);
 	if (!q)
 		return NULL;
 
-	return inet_frag_intern(nf, q, f, arg);
+	mod_timer(&q->timer, jiffies + nf->timeout);
+
+	err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
+				     f->rhash_params);
+	if (err < 0) {
+		q->flags |= INET_FRAG_COMPLETE;
+		inet_frag_kill(q);
+		inet_frag_destroy(q);
+		return NULL;
+	}
+	return q;
 }
 
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
-				       struct inet_frags *f, void *key,
-				       unsigned int hash)
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
 {
-	struct inet_frag_bucket *hb;
-	struct inet_frag_queue *q;
-	int depth = 0;
+	struct inet_frag_queue *fq;
 
-	if (frag_mem_limit(nf) > nf->low_thresh)
-		inet_frag_schedule_worker(f);
+	rcu_read_lock();
 
-	hash &= (INETFRAGS_HASHSZ - 1);
-	hb = &f->hash[hash];
-
-	spin_lock(&hb->chain_lock);
-	hlist_for_each_entry(q, &hb->chain, list) {
-		if (q->net == nf && f->match(q, key)) {
-			refcount_inc(&q->refcnt);
-			spin_unlock(&hb->chain_lock);
-			return q;
-		}
-		depth++;
+	fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+	if (fq) {
+		if (!refcount_inc_not_zero(&fq->refcnt))
+			fq = NULL;
+		rcu_read_unlock();
+		return fq;
 	}
-	spin_unlock(&hb->chain_lock);
+	rcu_read_unlock();
 
-	if (depth <= INETFRAGS_MAXDEPTH)
-		return inet_frag_create(nf, f, key);
-
-	if (inet_frag_may_rebuild(f)) {
-		if (!f->rebuild)
-			f->rebuild = true;
-		inet_frag_schedule_worker(f);
-	}
-
-	return ERR_PTR(-ENOBUFS);
+	return inet_frag_create(nf, key);
 }
 EXPORT_SYMBOL(inet_frag_find);
 
@@ -434,8 +223,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
 				   const char *prefix)
 {
 	static const char msg[] = "inet_frag_find: Fragment hash bucket"
-		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
-		". Dropping fragment.\n";
+		" list length grew over limit. Dropping fragment.\n";
 
 	if (PTR_ERR(q) == -ENOBUFS)
 		net_dbg_ratelimited("%s%s", prefix, msg);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1a3bc85d6f5e..4021820db6f2 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -69,15 +69,9 @@ struct ipfrag_skb_cb
 struct ipq {
 	struct inet_frag_queue q;
 
-	u32		user;
-	__be32		saddr;
-	__be32		daddr;
-	__be16		id;
-	u8		protocol;
 	u8		ecn; /* RFC3168 support */
 	u16		max_df_size; /* largest frag with DF set seen */
 	int             iif;
-	int             vif;   /* L3 master device index */
 	unsigned int    rid;
 	struct inet_peer *peer;
 };
@@ -97,41 +91,6 @@ int ip_frag_mem(struct net *net)
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			 struct net_device *dev);
 
-struct ip4_create_arg {
-	struct iphdr *iph;
-	u32 user;
-	int vif;
-};
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
-	net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
-	return jhash_3words((__force u32)id << 16 | prot,
-			    (__force u32)saddr, (__force u32)daddr,
-			    ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
-	const struct ipq *ipq;
-
-	ipq = container_of(q, struct ipq, q);
-	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
-	const struct ipq *qp;
-	const struct ip4_create_arg *arg = a;
-
-	qp = container_of(q, struct ipq, q);
-	return	qp->id == arg->iph->id &&
-		qp->saddr == arg->iph->saddr &&
-		qp->daddr == arg->iph->daddr &&
-		qp->protocol == arg->iph->protocol &&
-		qp->user == arg->user &&
-		qp->vif == arg->vif;
-}
 
 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 {
@@ -140,17 +99,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 					       frags);
 	struct net *net = container_of(ipv4, struct net, ipv4);
 
-	const struct ip4_create_arg *arg = a;
+	const struct frag_v4_compare_key *key = a;
 
-	qp->protocol = arg->iph->protocol;
-	qp->id = arg->iph->id;
-	qp->ecn = ip4_frag_ecn(arg->iph->tos);
-	qp->saddr = arg->iph->saddr;
-	qp->daddr = arg->iph->daddr;
-	qp->vif = arg->vif;
-	qp->user = arg->user;
+	q->key.v4 = *key;
+	qp->ecn = 0;
 	qp->peer = q->net->max_dist ?
-		inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
 		NULL;
 }
 
@@ -234,7 +188,7 @@ static void ip_expire(struct timer_list *t)
 		/* Only an end host needs to send an ICMP
 		 * "Fragment Reassembly Timeout" message, per RFC792.
 		 */
-		if (frag_expire_skip_icmp(qp->user) &&
+		if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
 		    (skb_rtable(head)->rt_type != RTN_LOCAL))
 			goto out;
 
@@ -262,17 +216,17 @@ out_rcu_unlock:
 static struct ipq *ip_find(struct net *net, struct iphdr *iph,
 			   u32 user, int vif)
 {
+	struct frag_v4_compare_key key = {
+		.saddr = iph->saddr,
+		.daddr = iph->daddr,
+		.user = user,
+		.vif = vif,
+		.id = iph->id,
+		.protocol = iph->protocol,
+	};
 	struct inet_frag_queue *q;
-	struct ip4_create_arg arg;
-	unsigned int hash;
 
-	arg.iph = iph;
-	arg.user = user;
-	arg.vif = vif;
-
-	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
-	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
+	q = inet_frag_find(&net->ipv4.frags, &key);
 	if (IS_ERR_OR_NULL(q)) {
 		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
@@ -656,7 +610,7 @@ out_nomem:
 	err = -ENOMEM;
 	goto out_fail;
 out_oversize:
-	net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+	net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
 out_fail:
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 	return err;
@@ -894,15 +848,47 @@ static struct pernet_operations ip4_frags_ops = {
 	.exit = ipv4_frags_exit_net,
 };
 
+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	return jhash2(data,
+		      sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct inet_frag_queue *fq = data;
+
+	return jhash2((const u32 *)&fq->key.v4,
+		      sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct frag_v4_compare_key *key = arg->key;
+	const struct inet_frag_queue *fq = ptr;
+
+	return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+	.head_offset		= offsetof(struct inet_frag_queue, node),
+	.key_offset		= offsetof(struct inet_frag_queue, key),
+	.key_len		= sizeof(struct frag_v4_compare_key),
+	.hashfn			= ip4_key_hashfn,
+	.obj_hashfn		= ip4_obj_hashfn,
+	.obj_cmpfn		= ip4_obj_cmpfn,
+	.automatic_shrinking	= true,
+};
+
 void __init ipfrag_init(void)
 {
-	ip4_frags.hashfn = ip4_hashfn;
 	ip4_frags.constructor = ip4_frag_init;
 	ip4_frags.destructor = ip4_frag_free;
 	ip4_frags.qsize = sizeof(struct ipq);
-	ip4_frags.match = ip4_frag_match;
 	ip4_frags.frag_expire = ip_expire;
 	ip4_frags.frags_cache_name = ip_frag_cache_name;
+	ip4_frags.rhash_params = ip4_rhash_params;
 	if (inet_frags_init(&ip4_frags))
 		panic("IP: failed to allocate ip4_frags cache\n");
 	ip4_frags_ctl_register();
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c4b40fdee838..0ad3df551d98 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -152,23 +152,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
 	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
 }
 
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
-				 const struct in6_addr *daddr)
-{
-	net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
-	return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
-			    (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
-	const struct frag_queue *nq;
-
-	nq = container_of(q, struct frag_queue, q);
-	return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
 static void nf_ct_frag6_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -182,26 +165,19 @@ static void nf_ct_frag6_expire(struct timer_list *t)
 }
 
 /* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
-					 u32 user, struct in6_addr *src,
-					 struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+				  const struct ipv6hdr *hdr, int iif)
 {
+	struct frag_v6_compare_key key = {
+		.id = id,
+		.saddr = hdr->saddr,
+		.daddr = hdr->daddr,
+		.user = user,
+		.iif = iif,
+	};
 	struct inet_frag_queue *q;
-	struct ip6_create_arg arg;
-	unsigned int hash;
 
-	arg.id = id;
-	arg.user = user;
-	arg.src = src;
-	arg.dst = dst;
-	arg.iif = iif;
-	arg.ecn = ecn;
-
-	local_bh_disable();
-	hash = nf_hash_frag(id, src, dst);
-
-	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
-	local_bh_enable();
+	q = inet_frag_find(&net->nf_frag.frags, &key);
 	if (IS_ERR_OR_NULL(q)) {
 		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
@@ -593,8 +569,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
 	skb_orphan(skb);
-	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
-		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+	fq = fq_find(net, fhdr->identification, user, hdr,
+		     skb->dev ? skb->dev->ifindex : 0);
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		return -ENOMEM;
@@ -660,13 +636,12 @@ int nf_ct_frag6_init(void)
 {
 	int ret = 0;
 
-	nf_frags.hashfn = nf_hashfn;
 	nf_frags.constructor = ip6_frag_init;
 	nf_frags.destructor = NULL;
 	nf_frags.qsize = sizeof(struct frag_queue);
-	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	nf_frags.frags_cache_name = nf_frags_cache_name;
+	nf_frags.rhash_params = ip6_rhash_params;
 	ret = inet_frags_init(&nf_frags);
 	if (ret)
 		goto out;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f0071b113a92..3fc853e4492a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -79,52 +79,13 @@ static struct inet_frags ip6_frags;
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev);
 
-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
-				    const struct in6_addr *daddr)
-{
-	net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
-	return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
-			    (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
-	const struct frag_queue *fq;
-
-	fq = container_of(q, struct frag_queue, q);
-	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
-	const struct frag_queue *fq;
-	const struct ip6_create_arg *arg = a;
-
-	fq = container_of(q, struct frag_queue, q);
-	return	fq->id == arg->id &&
-		fq->user == arg->user &&
-		ipv6_addr_equal(&fq->saddr, arg->src) &&
-		ipv6_addr_equal(&fq->daddr, arg->dst) &&
-		(arg->iif == fq->iif ||
-		 !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
-					       IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
 void ip6_frag_init(struct inet_frag_queue *q, const void *a)
 {
 	struct frag_queue *fq = container_of(q, struct frag_queue, q);
-	const struct ip6_create_arg *arg = a;
+	const struct frag_v6_compare_key *key = a;
 
-	fq->id = arg->id;
-	fq->user = arg->user;
-	fq->saddr = *arg->src;
-	fq->daddr = *arg->dst;
-	fq->ecn = arg->ecn;
+	q->key.v6 = *key;
+	fq->ecn = 0;
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
@@ -182,23 +143,22 @@ static void ip6_frag_expire(struct timer_list *t)
 }
 
 static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
-	const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 {
+	struct frag_v6_compare_key key = {
+		.id = id,
+		.saddr = hdr->saddr,
+		.daddr = hdr->daddr,
+		.user = IP6_DEFRAG_LOCAL_DELIVER,
+		.iif = iif,
+	};
 	struct inet_frag_queue *q;
-	struct ip6_create_arg arg;
-	unsigned int hash;
 
-	arg.id = id;
-	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
-	arg.src = src;
-	arg.dst = dst;
-	arg.iif = iif;
-	arg.ecn = ecn;
+	if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+					    IPV6_ADDR_LINKLOCAL)))
+		key.iif = 0;
 
-	hash = inet6_hash_frag(id, src, dst);
-
-	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
+	q = inet_frag_find(&net->ipv6.frags, &key);
 	if (IS_ERR_OR_NULL(q)) {
 		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
@@ -530,6 +490,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct frag_queue *fq;
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	int iif;
 
 	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
 		goto fail_hdr;
@@ -558,13 +519,14 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
-		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+	iif = skb->dev ? skb->dev->ifindex : 0;
+	fq = fq_find(net, fhdr->identification, hdr, iif);
 	if (fq) {
 		int ret;
 
 		spin_lock(&fq->q.lock);
 
+		fq->iif = iif;
 		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
 		spin_unlock(&fq->q.lock);
@@ -738,17 +700,47 @@ static struct pernet_operations ip6_frags_ops = {
 	.exit = ipv6_frags_exit_net,
 };
 
+static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	return jhash2(data,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct inet_frag_queue *fq = data;
+
+	return jhash2((const u32 *)&fq->key.v6,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct frag_v6_compare_key *key = arg->key;
+	const struct inet_frag_queue *fq = ptr;
+
+	return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+const struct rhashtable_params ip6_rhash_params = {
+	.head_offset		= offsetof(struct inet_frag_queue, node),
+	.hashfn			= ip6_key_hashfn,
+	.obj_hashfn		= ip6_obj_hashfn,
+	.obj_cmpfn		= ip6_obj_cmpfn,
+	.automatic_shrinking	= true,
+};
+EXPORT_SYMBOL(ip6_rhash_params);
+
 int __init ipv6_frag_init(void)
 {
 	int ret;
 
-	ip6_frags.hashfn = ip6_hashfn;
 	ip6_frags.constructor = ip6_frag_init;
 	ip6_frags.destructor = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
-	ip6_frags.match = ip6_frag_match;
 	ip6_frags.frag_expire = ip6_frag_expire;
 	ip6_frags.frags_cache_name = ip6_frag_cache_name;
+	ip6_frags.rhash_params = ip6_rhash_params;
 	ret = inet_frags_init(&ip6_frags);
 	if (ret)
 		goto out;

From 6befe4a78b1553edb6eed3a78b4bcd9748526672 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:50 -0700
Subject: [PATCH 1607/1678] inet: frags: remove some helpers

Remove sum_frag_mem_limit(), ip_frag_mem() & ip6_frag_mem()

Also since we use rhashtable we can bring back the number of fragments
in "grep FRAG /proc/net/sockstat /proc/net/sockstat6" that was
removed in commit 434d305405ab ("inet: frag: don't account number
of fragment queues")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 5 -----
 include/net/ip.h        | 1 -
 include/net/ipv6.h      | 7 -------
 net/ipv4/ip_fragment.c  | 5 -----
 net/ipv4/proc.c         | 6 +++---
 net/ipv6/proc.c         | 5 +++--
 6 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 3fec0d3a0d01..4b5449df0aad 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -141,11 +141,6 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
 	atomic_add(i, &nf->mem);
 }
 
-static inline int sum_frag_mem_limit(struct netns_frags *nf)
-{
-	return atomic_read(&nf->mem);
-}
-
 /* RFC 3168 support :
  * We want to check ECN values of all fragments, do detect invalid combinations.
  * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
diff --git a/include/net/ip.h b/include/net/ip.h
index 36f8f7811093..ecffd843e7b8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -588,7 +588,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
 	return skb;
 }
 #endif
-int ip_frag_mem(struct net *net);
 
 /*
  *	Functions provided by ip_forward.c
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6fa9a2bc5896..37455e840347 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -379,13 +379,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
 	    idev->cnf.accept_ra;
 }
 
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int ip6_frag_mem(struct net *net)
-{
-	return sum_frag_mem_limit(&net->ipv6.frags);
-}
-#endif
-
 #define IPV6_FRAG_HIGH_THRESH	(4 * 1024*1024)	/* 4194304 */
 #define IPV6_FRAG_LOW_THRESH	(3 * 1024*1024)	/* 3145728 */
 #define IPV6_FRAG_TIMEOUT	(60 * HZ)	/* 60 seconds */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4021820db6f2..44f4fa306e22 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -83,11 +83,6 @@ static u8 ip4_frag_ecn(u8 tos)
 
 static struct inet_frags ip4_frags;
 
-int ip_frag_mem(struct net *net)
-{
-	return sum_frag_mem_limit(&net->ipv4.frags);
-}
-
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			 struct net_device *dev);
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index adfb75340275..aacfce0d7d82 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
 static int sockstat_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq->private;
-	unsigned int frag_mem;
 	int orphans, sockets;
 
 	orphans = percpu_counter_sum_positive(&tcp_orphan_count);
@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 		   sock_prot_inuse_get(net, &udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n",
 		   sock_prot_inuse_get(net, &raw_prot));
-	frag_mem = ip_frag_mem(net);
-	seq_printf(seq,  "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+	seq_printf(seq,  "FRAG: inuse %u memory %u\n",
+		   atomic_read(&net->ipv4.frags.rhashtable.nelems),
+		   frag_mem_limit(&net->ipv4.frags));
 	return 0;
 }
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 6e57028d2e91..8befeb91e071 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
 static int sockstat6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq->private;
-	unsigned int frag_mem = ip6_frag_mem(net);
 
 	seq_printf(seq, "TCP6: inuse %d\n",
 		       sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 			sock_prot_inuse_get(net, &udplitev6_prot));
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(net, &rawv6_prot));
-	seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+	seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+		   atomic_read(&net->ipv6.frags.rhashtable.nelems),
+		   frag_mem_limit(&net->ipv6.frags));
 	return 0;
 }
 

From 399d1404be660d355192ff4df5ccc3f4159ec1e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:51 -0700
Subject: [PATCH 1608/1678] inet: frags: get rif of inet_frag_evicting()

This refactors ip_expire() since one indentation level is removed.

Note: in the future, we should try hard to avoid the skb_clone()
since this is a serious performance cost.
Under DDOS, the ICMP message wont be sent because of rate limits.

Fact that ip6_expire_frag_queue() does not use skb_clone() is
disturbing too. Presumably IPv6 should have the same
issue than the one we fixed in commit ec4fbd64751d
("inet: frag: release spinlock before calling icmp_send()")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h |  5 ----
 net/ipv4/ip_fragment.c  | 61 ++++++++++++++++++++---------------------
 net/ipv6/reassembly.c   |  4 ---
 3 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 4b5449df0aad..0e8e159d88f7 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -119,11 +119,6 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
 		inet_frag_destroy(q);
 }
 
-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
-{
-	return false;
-}
-
 /* Memory Tracking Functions. */
 
 static inline int frag_mem_limit(struct netns_frags *nf)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 44f4fa306e22..b844f517b75b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -143,8 +143,11 @@ static bool frag_expire_skip_icmp(u32 user)
 static void ip_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
-	struct ipq *qp;
+	struct sk_buff *clone, *head;
+	const struct iphdr *iph;
 	struct net *net;
+	struct ipq *qp;
+	int err;
 
 	qp = container_of(frag, struct ipq, q);
 	net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -158,45 +161,41 @@ static void ip_expire(struct timer_list *t)
 	ipq_kill(qp);
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 
-	if (!inet_frag_evicting(&qp->q)) {
-		struct sk_buff *clone, *head = qp->q.fragments;
-		const struct iphdr *iph;
-		int err;
+	head = qp->q.fragments;
 
-		__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+	__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
 
-		if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
-			goto out;
+	if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+		goto out;
 
-		head->dev = dev_get_by_index_rcu(net, qp->iif);
-		if (!head->dev)
-			goto out;
+	head->dev = dev_get_by_index_rcu(net, qp->iif);
+	if (!head->dev)
+		goto out;
 
 
-		/* skb has no dst, perform route lookup again */
-		iph = ip_hdr(head);
-		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+	/* skb has no dst, perform route lookup again */
+	iph = ip_hdr(head);
+	err = ip_route_input_noref(head, iph->daddr, iph->saddr,
 					   iph->tos, head->dev);
-		if (err)
-			goto out;
+	if (err)
+		goto out;
 
-		/* Only an end host needs to send an ICMP
-		 * "Fragment Reassembly Timeout" message, per RFC792.
-		 */
-		if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
-		    (skb_rtable(head)->rt_type != RTN_LOCAL))
-			goto out;
+	/* Only an end host needs to send an ICMP
+	 * "Fragment Reassembly Timeout" message, per RFC792.
+	 */
+	if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+	    (skb_rtable(head)->rt_type != RTN_LOCAL))
+		goto out;
 
-		clone = skb_clone(head, GFP_ATOMIC);
+	clone = skb_clone(head, GFP_ATOMIC);
 
-		/* Send an ICMP "Fragment Reassembly Timeout" message. */
-		if (clone) {
-			spin_unlock(&qp->q.lock);
-			icmp_send(clone, ICMP_TIME_EXCEEDED,
-				  ICMP_EXC_FRAGTIME, 0);
-			consume_skb(clone);
-			goto out_rcu_unlock;
-		}
+	/* Send an ICMP "Fragment Reassembly Timeout" message. */
+	if (clone) {
+		spin_unlock(&qp->q.lock);
+		icmp_send(clone, ICMP_TIME_EXCEEDED,
+			  ICMP_EXC_FRAGTIME, 0);
+		consume_skb(clone);
+		goto out_rcu_unlock;
 	}
 out:
 	spin_unlock(&qp->q.lock);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3fc853e4492a..70acad126d04 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -106,10 +106,6 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
 		goto out_rcu_unlock;
 
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
-	if (inet_frag_evicting(&fq->q))
-		goto out_rcu_unlock;
-
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
 
 	/* Don't send error if the first segment did not arrive. */

From 2d44ed22e607f9a285b049de2263e3840673a260 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:52 -0700
Subject: [PATCH 1609/1678] inet: frags: remove inet_frag_maybe_warn_overflow()

This function is obsolete, after rhashtable addition to inet defrag.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  2 --
 net/ieee802154/6lowpan/reassembly.c     |  5 ++---
 net/ipv4/inet_fragment.c                | 11 -----------
 net/ipv4/ip_fragment.c                  |  5 ++---
 net/ipv6/netfilter/nf_conntrack_reasm.c |  5 ++---
 net/ipv6/reassembly.c                   |  5 ++---
 6 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 0e8e159d88f7..95e353e3305b 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -110,8 +110,6 @@ void inet_frags_exit_net(struct netns_frags *nf);
 void inet_frag_kill(struct inet_frag_queue *q);
 void inet_frag_destroy(struct inet_frag_queue *q);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
-				   const char *prefix);
 
 static inline void inet_frag_put(struct inet_frag_queue *q)
 {
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 0fa0121f85d4..1aec71a3f904 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -84,10 +84,9 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
 	struct inet_frag_queue *q;
 
 	q = inet_frag_find(&ieee802154_lowpan->frags, &key);
-	if (IS_ERR_OR_NULL(q)) {
-		inet_frag_maybe_warn_overflow(q, pr_fmt());
+	if (!q)
 		return NULL;
-	}
+
 	return container_of(q, struct lowpan_frag_queue, q);
 }
 
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index ebb8f411e0db..c9e35b81d093 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -218,14 +218,3 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
 	return inet_frag_create(nf, key);
 }
 EXPORT_SYMBOL(inet_frag_find);
-
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
-				   const char *prefix)
-{
-	static const char msg[] = "inet_frag_find: Fragment hash bucket"
-		" list length grew over limit. Dropping fragment.\n";
-
-	if (PTR_ERR(q) == -ENOBUFS)
-		net_dbg_ratelimited("%s%s", prefix, msg);
-}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b844f517b75b..b0366224f314 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -221,10 +221,9 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
 	struct inet_frag_queue *q;
 
 	q = inet_frag_find(&net->ipv4.frags, &key);
-	if (IS_ERR_OR_NULL(q)) {
-		inet_frag_maybe_warn_overflow(q, pr_fmt());
+	if (!q)
 		return NULL;
-	}
+
 	return container_of(q, struct ipq, q);
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0ad3df551d98..d866412b8f6c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -178,10 +178,9 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
 	struct inet_frag_queue *q;
 
 	q = inet_frag_find(&net->nf_frag.frags, &key);
-	if (IS_ERR_OR_NULL(q)) {
-		inet_frag_maybe_warn_overflow(q, pr_fmt());
+	if (!q)
 		return NULL;
-	}
+
 	return container_of(q, struct frag_queue, q);
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 70acad126d04..2a77fda5e3bc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -155,10 +155,9 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
 		key.iif = 0;
 
 	q = inet_frag_find(&net->ipv6.frags, &key);
-	if (IS_ERR_OR_NULL(q)) {
-		inet_frag_maybe_warn_overflow(q, pr_fmt());
+	if (!q)
 		return NULL;
-	}
+
 	return container_of(q, struct frag_queue, q);
 }
 

From 3e67f106f619dcfaf6f4e2039599bdb69848c714 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:53 -0700
Subject: [PATCH 1610/1678] inet: frags: break the 2GB limit for frags storage

Some users are willing to provision huge amounts of memory to be able
to perform reassembly reasonnably well under pressure.

Current memory tracking is using one atomic_t and integers.

Switch to atomic_long_t so that 64bit arches can use more than 2GB,
without any cost for 32bit arches.

Note that this patch avoids an overflow error, if high_thresh was set
to ~2GB, since this test in inet_frag_alloc() was never true :

if (... || frag_mem_limit(nf) > nf->high_thresh)

Tested:

$ echo 16000000000 >/proc/sys/net/ipv4/ipfrag_high_thresh

<frag DDOS>

$ grep FRAG /proc/net/sockstat
FRAG: inuse 14705885 memory 16000002880

$ nstat -n ; sleep 1 ; nstat | grep Reas
IpReasmReqds                    3317150            0.0
IpReasmFails                    3317112            0.0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt  |  4 ++--
 include/net/inet_frag.h                 | 20 ++++++++++----------
 net/ieee802154/6lowpan/reassembly.c     | 10 +++++-----
 net/ipv4/ip_fragment.c                  | 10 +++++-----
 net/ipv4/proc.c                         |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 10 +++++-----
 net/ipv6/proc.c                         |  2 +-
 net/ipv6/reassembly.c                   |  6 +++---
 8 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 6f2a3670e44b..5dc1a040a2f1 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -133,10 +133,10 @@ min_adv_mss - INTEGER
 
 IP Fragmentation:
 
-ipfrag_high_thresh - INTEGER
+ipfrag_high_thresh - LONG INTEGER
 	Maximum memory used to reassemble IP fragments.
 
-ipfrag_low_thresh - INTEGER
+ipfrag_low_thresh - LONG INTEGER
 	(Obsolete since linux-4.17)
 	Maximum memory used to reassemble IP fragments before the kernel
 	begins to remove incomplete fragment queues to free up resources.
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 95e353e3305b..a52e7273e7a5 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -8,11 +8,11 @@ struct netns_frags {
 	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
 
 	/* Keep atomic mem on separate cachelines in structs that include it */
-	atomic_t		mem ____cacheline_aligned_in_smp;
+	atomic_long_t		mem ____cacheline_aligned_in_smp;
 	/* sysctls */
+	long			high_thresh;
+	long			low_thresh;
 	int			timeout;
-	int			high_thresh;
-	int			low_thresh;
 	int			max_dist;
 	struct inet_frags	*f;
 };
@@ -102,7 +102,7 @@ void inet_frags_fini(struct inet_frags *);
 
 static inline int inet_frags_init_net(struct netns_frags *nf)
 {
-	atomic_set(&nf->mem, 0);
+	atomic_long_set(&nf->mem, 0);
 	return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
 }
 void inet_frags_exit_net(struct netns_frags *nf);
@@ -119,19 +119,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
 
 /* Memory Tracking Functions. */
 
-static inline int frag_mem_limit(struct netns_frags *nf)
+static inline long frag_mem_limit(const struct netns_frags *nf)
 {
-	return atomic_read(&nf->mem);
+	return atomic_long_read(&nf->mem);
 }
 
-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
 {
-	atomic_sub(i, &nf->mem);
+	atomic_long_sub(val, &nf->mem);
 }
 
-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
+static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
 {
-	atomic_add(i, &nf->mem);
+	atomic_long_add(val, &nf->mem);
 }
 
 /* RFC 3168 support :
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 1aec71a3f904..44f148a6bb57 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -411,23 +411,23 @@ err:
 }
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table lowpan_frags_ns_ctl_table[] = {
 	{
 		.procname	= "6lowpanfrag_high_thresh",
 		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
 	},
 	{
 		.procname	= "6lowpanfrag_low_thresh",
 		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
 	},
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b0366224f314..053869f2c49b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -678,23 +678,23 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 EXPORT_SYMBOL(ip_check_defrag);
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table ip4_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ipfrag_high_thresh",
 		.data		= &init_net.ipv4.frags.high_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.ipv4.frags.low_thresh
 	},
 	{
 		.procname	= "ipfrag_low_thresh",
 		.data		= &init_net.ipv4.frags.low_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.ipv4.frags.high_thresh
 	},
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index aacfce0d7d82..a058de677e94 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -71,7 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 		   sock_prot_inuse_get(net, &udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n",
 		   sock_prot_inuse_get(net, &raw_prot));
-	seq_printf(seq,  "FRAG: inuse %u memory %u\n",
+	seq_printf(seq,  "FRAG: inuse %u memory %lu\n",
 		   atomic_read(&net->ipv4.frags.rhashtable.nelems),
 		   frag_mem_limit(&net->ipv4.frags));
 	return 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d866412b8f6c..603a39592859 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,7 @@ struct nf_ct_frag6_skb_cb
 static struct inet_frags nf_frags;
 
 #ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
 
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
@@ -76,18 +76,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_low_thresh",
 		.data		= &init_net.nf_frag.frags.low_thresh,
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.nf_frag.frags.high_thresh
 	},
 	{
 		.procname	= "nf_conntrack_frag6_high_thresh",
 		.data		= &init_net.nf_frag.frags.high_thresh,
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.nf_frag.frags.low_thresh
 	},
 	{ }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 8befeb91e071..a85f7e0b14b1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -47,7 +47,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 			sock_prot_inuse_get(net, &udplitev6_prot));
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       sock_prot_inuse_get(net, &rawv6_prot));
-	seq_printf(seq, "FRAG6: inuse %u memory %u\n",
+	seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
 		   atomic_read(&net->ipv6.frags.rhashtable.nelems),
 		   frag_mem_limit(&net->ipv6.frags));
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2a77fda5e3bc..905a8aee2671 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -552,15 +552,15 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 	{
 		.procname	= "ip6frag_high_thresh",
 		.data		= &init_net.ipv6.frags.high_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &init_net.ipv6.frags.low_thresh
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
 		.data		= &init_net.ipv6.frags.low_thresh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,

From 1eec5d5670084ee644597bd26c25e22c69b9f748 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:54 -0700
Subject: [PATCH 1611/1678] inet: frags: do not clone skb in ip_expire()

An skb_clone() was added in commit ec4fbd64751d ("inet: frag: release
spinlock before calling icmp_send()")

While fixing the bug at that time, it also added a very high cost
for DDOS frags, as the ICMP rate limit is applied after this
expensive operation (skb_clone() + consume_skb(), implying memory
allocations, copy, and freeing)

We can use skb_get(head) here, all we want is to make sure skb wont
be freed by another cpu.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 053869f2c49b..fb185d9a5cc7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -143,8 +143,8 @@ static bool frag_expire_skip_icmp(u32 user)
 static void ip_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
-	struct sk_buff *clone, *head;
 	const struct iphdr *iph;
+	struct sk_buff *head;
 	struct net *net;
 	struct ipq *qp;
 	int err;
@@ -187,16 +187,12 @@ static void ip_expire(struct timer_list *t)
 	    (skb_rtable(head)->rt_type != RTN_LOCAL))
 		goto out;
 
-	clone = skb_clone(head, GFP_ATOMIC);
+	skb_get(head);
+	spin_unlock(&qp->q.lock);
+	icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+	kfree_skb(head);
+	goto out_rcu_unlock;
 
-	/* Send an ICMP "Fragment Reassembly Timeout" message. */
-	if (clone) {
-		spin_unlock(&qp->q.lock);
-		icmp_send(clone, ICMP_TIME_EXCEEDED,
-			  ICMP_EXC_FRAGTIME, 0);
-		consume_skb(clone);
-		goto out_rcu_unlock;
-	}
 out:
 	spin_unlock(&qp->q.lock);
 out_rcu_unlock:

From 05c0b86b9696802fd0ce5676a92a63f1b455bdf3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:55 -0700
Subject: [PATCH 1612/1678] ipv6: frags: rewrite ip6_expire_frag_queue()

Make it similar to IPv4 ip_expire(), and release the lock
before calling icmp functions.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 905a8aee2671..2127da130dc2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -92,7 +92,9 @@ EXPORT_SYMBOL(ip6_frag_init);
 void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
 {
 	struct net_device *dev = NULL;
+	struct sk_buff *head;
 
+	rcu_read_lock();
 	spin_lock(&fq->q.lock);
 
 	if (fq->q.flags & INET_FRAG_COMPLETE)
@@ -100,28 +102,34 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
 
 	inet_frag_kill(&fq->q);
 
-	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, fq->iif);
 	if (!dev)
-		goto out_rcu_unlock;
+		goto out;
 
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
 
 	/* Don't send error if the first segment did not arrive. */
-	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
-		goto out_rcu_unlock;
+	head = fq->q.fragments;
+	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+		goto out;
 
 	/* But use as source device on which LAST ARRIVED
 	 * segment was received. And do not use fq->dev
 	 * pointer directly, device might already disappeared.
 	 */
-	fq->q.fragments->dev = dev;
-	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
-	rcu_read_unlock();
+	head->dev = dev;
+	skb_get(head);
+	spin_unlock(&fq->q.lock);
+
+	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+	kfree_skb(head);
+	goto out_rcu_unlock;
+
 out:
 	spin_unlock(&fq->q.lock);
+out_rcu_unlock:
+	rcu_read_unlock();
 	inet_frag_put(&fq->q);
 }
 EXPORT_SYMBOL(ip6_expire_frag_queue);

From e5d672a0780d9e7118caad4c171ec88b8299398d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:56 -0700
Subject: [PATCH 1613/1678] rhashtable: reorganize struct rhashtable layout

While under frags DDOS I noticed unfortunate false sharing between
@nelems and @params.automatic_shrinking

Move @nelems at the end of struct rhashtable so that first cache line
is shared between all cpus, because almost never dirtied.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 668a21f04b09..1f8ad121eb43 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -152,25 +152,25 @@ struct rhashtable_params {
 /**
  * struct rhashtable - Hash table handle
  * @tbl: Bucket table
- * @nelems: Number of elements in table
  * @key_len: Key length for hashfn
- * @p: Configuration parameters
  * @max_elems: Maximum number of elements in table
+ * @p: Configuration parameters
  * @rhlist: True if this is an rhltable
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
  * @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
-	atomic_t			nelems;
 	unsigned int			key_len;
-	struct rhashtable_params	p;
 	unsigned int			max_elems;
+	struct rhashtable_params	p;
 	bool				rhlist;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
 	spinlock_t			lock;
+	atomic_t			nelems;
 };
 
 /**

From c2615cf5a761b32bf74e85bddc223dfff3d9b9f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:57 -0700
Subject: [PATCH 1614/1678] inet: frags: reorganize struct netns_frags

Put the read-mostly fields in a separate cache line
at the beginning of struct netns_frags, to reduce
false sharing noticed in inet_frag_kill()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index a52e7273e7a5..ed07e3786d98 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -5,16 +5,17 @@
 #include <linux/rhashtable.h>
 
 struct netns_frags {
-	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
-
-	/* Keep atomic mem on separate cachelines in structs that include it */
-	atomic_long_t		mem ____cacheline_aligned_in_smp;
 	/* sysctls */
 	long			high_thresh;
 	long			low_thresh;
 	int			timeout;
 	int			max_dist;
 	struct inet_frags	*f;
+
+	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
+
+	/* Keep atomic mem on separate cachelines in structs that include it */
+	atomic_long_t		mem ____cacheline_aligned_in_smp;
 };
 
 /**

From bf66337140c64c27fa37222b7abca7e49d63fb57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:58 -0700
Subject: [PATCH 1615/1678] inet: frags: get rid of ipfrag_skb_cb/FRAG_CB

ip_defrag uses skb->cb[] to store the fragment offset, and unfortunately
this integer is currently in a different cache line than skb->next,
meaning that we use two cache lines per skb when finding the insertion point.

By aliasing skb->ip_defrag_offset and skb->dev, we pack all the fields
in a single cache line and save precious memory bandwidth.

Note that after the fast path added by Changli Gao in commit
d6bebca92c66 ("fragment: add fast path for in-order fragments")
this change wont help the fast path, since we still need
to access prev->len (2nd cache line), but will show great
benefits when slow path is entered, since we perform
a linear scan of a potentially long list.

Also, note that this potential long list is an attack vector,
we might consider also using an rb-tree there eventually.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 net/ipv4/ip_fragment.c | 35 ++++++++++++++---------------------
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47082f54ec1f..9065477ed255 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -672,6 +672,7 @@ struct sk_buff {
 				 * UDP receive path is one user.
 				 */
 				unsigned long		dev_scratch;
+				int			ip_defrag_offset;
 			};
 		};
 		struct rb_node	rbnode; /* used in netem & tcp stack */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fb185d9a5cc7..994fa70a910f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,14 +57,6 @@
  */
 static const char ip_frag_cache_name[] = "ip4-frags";
 
-struct ipfrag_skb_cb
-{
-	struct inet_skb_parm	h;
-	int			offset;
-};
-
-#define FRAG_CB(skb)	((struct ipfrag_skb_cb *)((skb)->cb))
-
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
 	struct inet_frag_queue q;
@@ -353,13 +345,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * this fragment, right?
 	 */
 	prev = qp->q.fragments_tail;
-	if (!prev || FRAG_CB(prev)->offset < offset) {
+	if (!prev || prev->ip_defrag_offset < offset) {
 		next = NULL;
 		goto found;
 	}
 	prev = NULL;
 	for (next = qp->q.fragments; next != NULL; next = next->next) {
-		if (FRAG_CB(next)->offset >= offset)
+		if (next->ip_defrag_offset >= offset)
 			break;	/* bingo! */
 		prev = next;
 	}
@@ -370,7 +362,7 @@ found:
 	 * any overlaps are eliminated.
 	 */
 	if (prev) {
-		int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+		int i = (prev->ip_defrag_offset + prev->len) - offset;
 
 		if (i > 0) {
 			offset += i;
@@ -387,8 +379,8 @@ found:
 
 	err = -ENOMEM;
 
-	while (next && FRAG_CB(next)->offset < end) {
-		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+	while (next && next->ip_defrag_offset < end) {
+		int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
 
 		if (i < next->len) {
 			/* Eat head of the next overlapped fragment
@@ -396,7 +388,7 @@ found:
 			 */
 			if (!pskb_pull(next, i))
 				goto err;
-			FRAG_CB(next)->offset += i;
+			next->ip_defrag_offset += i;
 			qp->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
@@ -420,7 +412,13 @@ found:
 		}
 	}
 
-	FRAG_CB(skb)->offset = offset;
+	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
+	dev = skb->dev;
+	if (dev)
+		qp->iif = dev->ifindex;
+	/* Makes sure compiler wont do silly aliasing games */
+	barrier();
+	skb->ip_defrag_offset = offset;
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
@@ -431,11 +429,6 @@ found:
 	else
 		qp->q.fragments = skb;
 
-	dev = skb->dev;
-	if (dev) {
-		qp->iif = dev->ifindex;
-		skb->dev = NULL;
-	}
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
@@ -511,7 +504,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	}
 
 	WARN_ON(!head);
-	WARN_ON(FRAG_CB(head)->offset != 0);
+	WARN_ON(head->ip_defrag_offset != 0);
 
 	/* Allocate a new buffer for the datagram. */
 	ihlen = ip_hdrlen(head);

From 219badfaade986a2c3d99abd4eb6d83f4f9ed2fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:58:59 -0700
Subject: [PATCH 1616/1678] ipv6: frags: get rid of ip6frag_skb_cb/FRAG6_CB

ip6_frag_queue uses skb->cb[] to store the fragment offset, meaning that
we could use two cache lines per skb when finding the insertion point,
if for some reason inet6_skb_parm size is increased in the future.

By using skb->ip_defrag_offset instead of skb->cb[], we pack all
the fields in a single cache line, matching what we did for IPv4.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2127da130dc2..7b52efc63f6a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -62,13 +62,6 @@
 
 static const char ip6_frag_cache_name[] = "ip6-frags";
 
-struct ip6frag_skb_cb {
-	struct inet6_skb_parm	h;
-	int			offset;
-};
-
-#define FRAG6_CB(skb)	((struct ip6frag_skb_cb *)((skb)->cb))
-
 static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
 {
 	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -250,13 +243,13 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * this fragment, right?
 	 */
 	prev = fq->q.fragments_tail;
-	if (!prev || FRAG6_CB(prev)->offset < offset) {
+	if (!prev || prev->ip_defrag_offset < offset) {
 		next = NULL;
 		goto found;
 	}
 	prev = NULL;
 	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (FRAG6_CB(next)->offset >= offset)
+		if (next->ip_defrag_offset >= offset)
 			break;	/* bingo! */
 		prev = next;
 	}
@@ -271,14 +264,20 @@ found:
 
 	/* Check for overlap with preceding fragment. */
 	if (prev &&
-	    (FRAG6_CB(prev)->offset + prev->len) > offset)
+	    (prev->ip_defrag_offset + prev->len) > offset)
 		goto discard_fq;
 
 	/* Look for overlap with succeeding segment. */
-	if (next && FRAG6_CB(next)->offset < end)
+	if (next && next->ip_defrag_offset < end)
 		goto discard_fq;
 
-	FRAG6_CB(skb)->offset = offset;
+	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
+	dev = skb->dev;
+	if (dev)
+		fq->iif = dev->ifindex;
+	/* Makes sure compiler wont do silly aliasing games */
+	barrier();
+	skb->ip_defrag_offset = offset;
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
@@ -289,11 +288,6 @@ found:
 	else
 		fq->q.fragments = skb;
 
-	dev = skb->dev;
-	if (dev) {
-		fq->iif = dev->ifindex;
-		skb->dev = NULL;
-	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
 	fq->ecn |= ecn;
@@ -380,7 +374,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	}
 
 	WARN_ON(head == NULL);
-	WARN_ON(FRAG6_CB(head)->offset != 0);
+	WARN_ON(head->ip_defrag_offset != 0);
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -

From f2d1c724fca176ddbd32a9397df49221afbcb227 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 12:59:00 -0700
Subject: [PATCH 1617/1678] inet: frags: get rid of
 nf_ct_frag6_skb_cb/NFCT_FRAG6_CB

nf_ct_frag6_queue() uses skb->cb[] to store the fragment offset,
meaning that we could use two cache lines per skb when finding
the insertion point, if for some reason inet6_skb_parm size
is increased in the future.

By using skb->ip_defrag_offset instead of skb->cb[] we pack all the fields
in a single cache line, matching what we did for IPv4.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 29 ++++++++++---------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 603a39592859..3622aac343ae 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -52,14 +52,6 @@
 
 static const char nf_frags_cache_name[] = "nf-frags";
 
-struct nf_ct_frag6_skb_cb
-{
-	struct inet6_skb_parm	h;
-	int			offset;
-};
-
-#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
 static struct inet_frags nf_frags;
 
 #ifdef CONFIG_SYSCTL
@@ -270,13 +262,13 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * this fragment, right?
 	 */
 	prev = fq->q.fragments_tail;
-	if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+	if (!prev || prev->ip_defrag_offset < offset) {
 		next = NULL;
 		goto found;
 	}
 	prev = NULL;
 	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (NFCT_FRAG6_CB(next)->offset >= offset)
+		if (next->ip_defrag_offset >= offset)
 			break;	/* bingo! */
 		prev = next;
 	}
@@ -292,14 +284,19 @@ found:
 
 	/* Check for overlap with preceding fragment. */
 	if (prev &&
-	    (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+	    (prev->ip_defrag_offset + prev->len) > offset)
 		goto discard_fq;
 
 	/* Look for overlap with succeeding segment. */
-	if (next && NFCT_FRAG6_CB(next)->offset < end)
+	if (next && next->ip_defrag_offset < end)
 		goto discard_fq;
 
-	NFCT_FRAG6_CB(skb)->offset = offset;
+	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
+	if (skb->dev)
+		fq->iif = skb->dev->ifindex;
+	/* Makes sure compiler wont do silly aliasing games */
+	barrier();
+	skb->ip_defrag_offset = offset;
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
@@ -310,10 +307,6 @@ found:
 	else
 		fq->q.fragments = skb;
 
-	if (skb->dev) {
-		fq->iif = skb->dev->ifindex;
-		skb->dev = NULL;
-	}
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
 	fq->ecn |= ecn;
@@ -357,7 +350,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 	inet_frag_kill(&fq->q);
 
 	WARN_ON(head == NULL);
-	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+	WARN_ON(head->ip_defrag_offset != 0);
 
 	ecn = ip_frag_ecn_table[fq->ecn];
 	if (unlikely(ecn == 0xff))

From dd0bed1665d6ca17efd747a90a0bb804b4bf2005 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:52 +0530
Subject: [PATCH 1618/1678] tls: support for Inline tls record

Facility to register Inline TLS drivers to net/tls. Setup
TLS_HW_RECORD prot to listen on offload device.

Cases handled
- Inline TLS device exists, setup prot for TLS_HW_RECORD
- Atleast one Inline TLS exists, sets TLS_HW_RECORD.
- If non-inline device establish connection, move to TLS_SW_TX

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h  |  32 ++++++++++++-
 net/tls/tls_main.c | 114 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 437a746300bf..3da8e13a6d96 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -56,6 +56,32 @@
 #define TLS_RECORD_TYPE_DATA		0x17
 
 #define TLS_AAD_SPACE_SIZE		13
+#define TLS_DEVICE_NAME_MAX		32
+
+/*
+ * This structure defines the routines for Inline TLS driver.
+ * The following routines are optional and filled with a
+ * null pointer if not defined.
+ *
+ * @name: Its the name of registered Inline tls device
+ * @dev_list: Inline tls device list
+ * int (*feature)(struct tls_device *device);
+ *     Called to return Inline TLS driver capability
+ *
+ * int (*hash)(struct tls_device *device, struct sock *sk);
+ *     This function sets Inline driver for listen and program
+ *     device specific functioanlity as required
+ *
+ * void (*unhash)(struct tls_device *device, struct sock *sk);
+ *     This function cleans listen state set by Inline TLS driver
+ */
+struct tls_device {
+	char name[TLS_DEVICE_NAME_MAX];
+	struct list_head dev_list;
+	int  (*feature)(struct tls_device *device);
+	int  (*hash)(struct tls_device *device, struct sock *sk);
+	void (*unhash)(struct tls_device *device, struct sock *sk);
+};
 
 struct tls_sw_context {
 	struct crypto_aead *aead_send;
@@ -114,7 +140,7 @@ struct tls_context {
 
 	void *priv_ctx;
 
-	u8 conf:2;
+	u8 conf:3;
 
 	struct cipher_context tx;
 	struct cipher_context rx;
@@ -135,6 +161,8 @@ struct tls_context {
 	int  (*getsockopt)(struct sock *sk, int level,
 			   int optname, char __user *optval,
 			   int __user *optlen);
+	int  (*hash)(struct sock *sk);
+	void (*unhash)(struct sock *sk);
 };
 
 int wait_on_pending_writer(struct sock *sk, long *timeo);
@@ -283,5 +311,7 @@ static inline struct tls_offload_context *tls_offload_ctx(
 
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
+void tls_register_device(struct tls_device *device);
+void tls_unregister_device(struct tls_device *device);
 
 #endif /* _TLS_OFFLOAD_H */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 6f5c1146da4a..0d379970960e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -38,6 +38,7 @@
 #include <linux/highmem.h>
 #include <linux/netdevice.h>
 #include <linux/sched/signal.h>
+#include <linux/inetdevice.h>
 
 #include <net/tls.h>
 
@@ -56,11 +57,14 @@ enum {
 	TLS_SW_TX,
 	TLS_SW_RX,
 	TLS_SW_RXTX,
+	TLS_HW_RECORD,
 	TLS_NUM_CONFIG,
 };
 
 static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
+static LIST_HEAD(device_list);
+static DEFINE_MUTEX(device_mutex);
 static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG];
 static struct proto_ops tls_sw_proto_ops;
 
@@ -241,8 +245,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	lock_sock(sk);
 	sk_proto_close = ctx->sk_proto_close;
 
+	if (ctx->conf == TLS_HW_RECORD)
+		goto skip_tx_cleanup;
+
 	if (ctx->conf == TLS_BASE) {
 		kfree(ctx);
+		ctx = NULL;
 		goto skip_tx_cleanup;
 	}
 
@@ -276,6 +284,11 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 skip_tx_cleanup:
 	release_sock(sk);
 	sk_proto_close(sk, timeout);
+	/* free ctx for TLS_HW_RECORD, used by tcp_set_state
+	 * for sk->sk_prot->unhash [tls_hw_unhash]
+	 */
+	if (ctx && ctx->conf == TLS_HW_RECORD)
+		kfree(ctx);
 }
 
 static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -493,6 +506,79 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
 	return do_tls_setsockopt(sk, optname, optval, optlen);
 }
 
+static struct tls_context *create_ctx(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tls_context *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	icsk->icsk_ulp_data = ctx;
+	return ctx;
+}
+
+static int tls_hw_prot(struct sock *sk)
+{
+	struct tls_context *ctx;
+	struct tls_device *dev;
+	int rc = 0;
+
+	mutex_lock(&device_mutex);
+	list_for_each_entry(dev, &device_list, dev_list) {
+		if (dev->feature && dev->feature(dev)) {
+			ctx = create_ctx(sk);
+			if (!ctx)
+				goto out;
+
+			ctx->hash = sk->sk_prot->hash;
+			ctx->unhash = sk->sk_prot->unhash;
+			ctx->sk_proto_close = sk->sk_prot->close;
+			ctx->conf = TLS_HW_RECORD;
+			update_sk_prot(sk, ctx);
+			rc = 1;
+			break;
+		}
+	}
+out:
+	mutex_unlock(&device_mutex);
+	return rc;
+}
+
+static void tls_hw_unhash(struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+	struct tls_device *dev;
+
+	mutex_lock(&device_mutex);
+	list_for_each_entry(dev, &device_list, dev_list) {
+		if (dev->unhash)
+			dev->unhash(dev, sk);
+	}
+	mutex_unlock(&device_mutex);
+	ctx->unhash(sk);
+}
+
+static int tls_hw_hash(struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+	struct tls_device *dev;
+	int err;
+
+	err = ctx->hash(sk);
+	mutex_lock(&device_mutex);
+	list_for_each_entry(dev, &device_list, dev_list) {
+		if (dev->hash)
+			err |= dev->hash(dev, sk);
+	}
+	mutex_unlock(&device_mutex);
+
+	if (err)
+		tls_hw_unhash(sk);
+	return err;
+}
+
 static void build_protos(struct proto *prot, struct proto *base)
 {
 	prot[TLS_BASE] = *base;
@@ -511,15 +597,22 @@ static void build_protos(struct proto *prot, struct proto *base)
 	prot[TLS_SW_RXTX] = prot[TLS_SW_TX];
 	prot[TLS_SW_RXTX].recvmsg	= tls_sw_recvmsg;
 	prot[TLS_SW_RXTX].close		= tls_sk_proto_close;
+
+	prot[TLS_HW_RECORD] = *base;
+	prot[TLS_HW_RECORD].hash	= tls_hw_hash;
+	prot[TLS_HW_RECORD].unhash	= tls_hw_unhash;
+	prot[TLS_HW_RECORD].close	= tls_sk_proto_close;
 }
 
 static int tls_init(struct sock *sk)
 {
 	int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
-	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tls_context *ctx;
 	int rc = 0;
 
+	if (tls_hw_prot(sk))
+		goto out;
+
 	/* The TLS ulp is currently supported only for TCP sockets
 	 * in ESTABLISHED state.
 	 * Supporting sockets in LISTEN state will require us
@@ -530,12 +623,11 @@ static int tls_init(struct sock *sk)
 		return -ENOTSUPP;
 
 	/* allocate tls context */
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	ctx = create_ctx(sk);
 	if (!ctx) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	icsk->icsk_ulp_data = ctx;
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
@@ -557,6 +649,22 @@ out:
 	return rc;
 }
 
+void tls_register_device(struct tls_device *device)
+{
+	mutex_lock(&device_mutex);
+	list_add_tail(&device->dev_list, &device_list);
+	mutex_unlock(&device_mutex);
+}
+EXPORT_SYMBOL(tls_register_device);
+
+void tls_unregister_device(struct tls_device *device)
+{
+	mutex_lock(&device_mutex);
+	list_del(&device->dev_list);
+	mutex_unlock(&device_mutex);
+}
+EXPORT_SYMBOL(tls_unregister_device);
+
 static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.name			= "tls",
 	.uid			= TCP_ULP_TLS,

From e0be6bea2583486ec4ed98e36437d82ea8190811 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:53 +0530
Subject: [PATCH 1619/1678] ethtool: enable Inline TLS in HW

Ethtool option enables TLS record offload on HW, user
configures the feature for netdev capable of Inline TLS.
This allows user to define custom sk_prot for Inline TLS sock

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 ++
 net/core/ethtool.c              | 1 +
 2 files changed, 3 insertions(+)

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index db84c516bcfb..35b79f47a13d 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -79,6 +79,7 @@ enum {
 	NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
 
 	NETIF_F_GRO_HW_BIT,		/* Hardware Generic receive offload */
+	NETIF_F_HW_TLS_RECORD_BIT,	/* Offload TLS record */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -145,6 +146,7 @@ enum {
 #define NETIF_F_HW_ESP		__NETIF_F(HW_ESP)
 #define NETIF_F_HW_ESP_TX_CSUM	__NETIF_F(HW_ESP_TX_CSUM)
 #define	NETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
+#define NETIF_F_HW_TLS_RECORD	__NETIF_F(HW_TLS_RECORD)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
 	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index eb55252ca1fb..03416e6dd5d7 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -108,6 +108,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_HW_ESP_BIT] =		 "esp-hw-offload",
 	[NETIF_F_HW_ESP_TX_CSUM_BIT] =	 "esp-tx-csum-hw-offload",
 	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
+	[NETIF_F_HW_TLS_RECORD_BIT] =	"tls-hw-record",
 };
 
 static const char

From e108708968e33da102dd38bd2dce37904842a319 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:54 +0530
Subject: [PATCH 1620/1678] cxgb4: Inline TLS FW Interface

Key area size in hw-config file. CPL struct for TLS request
and response. Work request for Inline TLS.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Reviewed-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h   | 122 ++++++++++++-
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h  |   2 +
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 165 +++++++++++++++++-
 3 files changed, 283 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 5e8f5ca8e3ee..fe2029e993a2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -82,13 +82,15 @@ enum {
 	CPL_RX_ISCSI_CMP      = 0x45,
 	CPL_TRACE_PKT_T5      = 0x48,
 	CPL_RX_ISCSI_DDP      = 0x49,
+	CPL_RX_TLS_CMP        = 0x4E,
 
 	CPL_RDMA_READ_REQ     = 0x60,
 
 	CPL_PASS_OPEN_REQ6    = 0x81,
 	CPL_ACT_OPEN_REQ6     = 0x83,
 
-	CPL_TX_TLS_PDU     =    0x88,
+	CPL_TX_TLS_PDU        = 0x88,
+	CPL_TX_TLS_SFO        = 0x89,
 	CPL_TX_SEC_PDU        = 0x8A,
 	CPL_TX_TLS_ACK        = 0x8B,
 
@@ -98,6 +100,7 @@ enum {
 	CPL_RX_MPS_PKT        = 0xAF,
 
 	CPL_TRACE_PKT         = 0xB0,
+	CPL_TLS_DATA          = 0xB1,
 	CPL_ISCSI_DATA	      = 0xB2,
 
 	CPL_FW4_MSG           = 0xC0,
@@ -155,6 +158,7 @@ enum {
 	ULP_MODE_RDMA          = 4,
 	ULP_MODE_TCPDDP	       = 5,
 	ULP_MODE_FCOE          = 6,
+	ULP_MODE_TLS           = 8,
 };
 
 enum {
@@ -1445,6 +1449,14 @@ struct cpl_tx_data {
 #define T6_TX_FORCE_V(x)	((x) << T6_TX_FORCE_S)
 #define T6_TX_FORCE_F		T6_TX_FORCE_V(1U)
 
+#define TX_SHOVE_S    14
+#define TX_SHOVE_V(x) ((x) << TX_SHOVE_S)
+
+#define TX_ULP_MODE_S    10
+#define TX_ULP_MODE_M    0x7
+#define TX_ULP_MODE_V(x) ((x) << TX_ULP_MODE_S)
+#define TX_ULP_MODE_G(x) (((x) >> TX_ULP_MODE_S) & TX_ULP_MODE_M)
+
 enum {
 	ULP_TX_MEM_READ = 2,
 	ULP_TX_MEM_WRITE = 3,
@@ -1455,12 +1467,21 @@ enum {
 	ULP_TX_SC_NOOP = 0x80,
 	ULP_TX_SC_IMM  = 0x81,
 	ULP_TX_SC_DSGL = 0x82,
-	ULP_TX_SC_ISGL = 0x83
+	ULP_TX_SC_ISGL = 0x83,
+	ULP_TX_SC_MEMRD = 0x86
 };
 
 #define ULPTX_CMD_S    24
 #define ULPTX_CMD_V(x) ((x) << ULPTX_CMD_S)
 
+#define ULPTX_LEN16_S    0
+#define ULPTX_LEN16_M    0xFF
+#define ULPTX_LEN16_V(x) ((x) << ULPTX_LEN16_S)
+
+#define ULP_TX_SC_MORE_S 23
+#define ULP_TX_SC_MORE_V(x) ((x) << ULP_TX_SC_MORE_S)
+#define ULP_TX_SC_MORE_F  ULP_TX_SC_MORE_V(1U)
+
 struct ulptx_sge_pair {
 	__be32 len[2];
 	__be64 addr[2];
@@ -2183,4 +2204,101 @@ struct cpl_srq_table_rpl {
 #define SRQT_IDX_V(x) ((x) << SRQT_IDX_S)
 #define SRQT_IDX_G(x) (((x) >> SRQT_IDX_S) & SRQT_IDX_M)
 
+struct cpl_tx_tls_sfo {
+	__be32 op_to_seg_len;
+	__be32 pld_len;
+	__be32 type_protover;
+	__be32 r1_lo;
+	__be32 seqno_numivs;
+	__be32 ivgen_hdrlen;
+	__be64 scmd1;
+};
+
+/* cpl_tx_tls_sfo macros */
+#define CPL_TX_TLS_SFO_OPCODE_S         24
+#define CPL_TX_TLS_SFO_OPCODE_V(x)      ((x) << CPL_TX_TLS_SFO_OPCODE_S)
+
+#define CPL_TX_TLS_SFO_DATA_TYPE_S      20
+#define CPL_TX_TLS_SFO_DATA_TYPE_V(x)   ((x) << CPL_TX_TLS_SFO_DATA_TYPE_S)
+
+#define CPL_TX_TLS_SFO_CPL_LEN_S        16
+#define CPL_TX_TLS_SFO_CPL_LEN_V(x)     ((x) << CPL_TX_TLS_SFO_CPL_LEN_S)
+
+#define CPL_TX_TLS_SFO_SEG_LEN_S        0
+#define CPL_TX_TLS_SFO_SEG_LEN_M        0xffff
+#define CPL_TX_TLS_SFO_SEG_LEN_V(x)     ((x) << CPL_TX_TLS_SFO_SEG_LEN_S)
+#define CPL_TX_TLS_SFO_SEG_LEN_G(x)     \
+	(((x) >> CPL_TX_TLS_SFO_SEG_LEN_S) & CPL_TX_TLS_SFO_SEG_LEN_M)
+
+#define CPL_TX_TLS_SFO_TYPE_S           24
+#define CPL_TX_TLS_SFO_TYPE_M           0xff
+#define CPL_TX_TLS_SFO_TYPE_V(x)        ((x) << CPL_TX_TLS_SFO_TYPE_S)
+#define CPL_TX_TLS_SFO_TYPE_G(x)        \
+	(((x) >> CPL_TX_TLS_SFO_TYPE_S) & CPL_TX_TLS_SFO_TYPE_M)
+
+#define CPL_TX_TLS_SFO_PROTOVER_S       8
+#define CPL_TX_TLS_SFO_PROTOVER_M       0xffff
+#define CPL_TX_TLS_SFO_PROTOVER_V(x)    ((x) << CPL_TX_TLS_SFO_PROTOVER_S)
+#define CPL_TX_TLS_SFO_PROTOVER_G(x)    \
+	(((x) >> CPL_TX_TLS_SFO_PROTOVER_S) & CPL_TX_TLS_SFO_PROTOVER_M)
+
+struct cpl_tls_data {
+	struct rss_header rsshdr;
+	union opcode_tid ot;
+	__be32 length_pkd;
+	__be32 seq;
+	__be32 r1;
+};
+
+#define CPL_TLS_DATA_OPCODE_S           24
+#define CPL_TLS_DATA_OPCODE_M           0xff
+#define CPL_TLS_DATA_OPCODE_V(x)        ((x) << CPL_TLS_DATA_OPCODE_S)
+#define CPL_TLS_DATA_OPCODE_G(x)        \
+	(((x) >> CPL_TLS_DATA_OPCODE_S) & CPL_TLS_DATA_OPCODE_M)
+
+#define CPL_TLS_DATA_TID_S              0
+#define CPL_TLS_DATA_TID_M              0xffffff
+#define CPL_TLS_DATA_TID_V(x)           ((x) << CPL_TLS_DATA_TID_S)
+#define CPL_TLS_DATA_TID_G(x)           \
+	(((x) >> CPL_TLS_DATA_TID_S) & CPL_TLS_DATA_TID_M)
+
+#define CPL_TLS_DATA_LENGTH_S           0
+#define CPL_TLS_DATA_LENGTH_M           0xffff
+#define CPL_TLS_DATA_LENGTH_V(x)        ((x) << CPL_TLS_DATA_LENGTH_S)
+#define CPL_TLS_DATA_LENGTH_G(x)        \
+	(((x) >> CPL_TLS_DATA_LENGTH_S) & CPL_TLS_DATA_LENGTH_M)
+
+struct cpl_rx_tls_cmp {
+	struct rss_header rsshdr;
+	union opcode_tid ot;
+	__be32 pdulength_length;
+	__be32 seq;
+	__be32 ddp_report;
+	__be32 r;
+	__be32 ddp_valid;
+};
+
+#define CPL_RX_TLS_CMP_OPCODE_S         24
+#define CPL_RX_TLS_CMP_OPCODE_M         0xff
+#define CPL_RX_TLS_CMP_OPCODE_V(x)      ((x) << CPL_RX_TLS_CMP_OPCODE_S)
+#define CPL_RX_TLS_CMP_OPCODE_G(x)      \
+	(((x) >> CPL_RX_TLS_CMP_OPCODE_S) & CPL_RX_TLS_CMP_OPCODE_M)
+
+#define CPL_RX_TLS_CMP_TID_S            0
+#define CPL_RX_TLS_CMP_TID_M            0xffffff
+#define CPL_RX_TLS_CMP_TID_V(x)         ((x) << CPL_RX_TLS_CMP_TID_S)
+#define CPL_RX_TLS_CMP_TID_G(x)         \
+	(((x) >> CPL_RX_TLS_CMP_TID_S) & CPL_RX_TLS_CMP_TID_M)
+
+#define CPL_RX_TLS_CMP_PDULENGTH_S      16
+#define CPL_RX_TLS_CMP_PDULENGTH_M      0xffff
+#define CPL_RX_TLS_CMP_PDULENGTH_V(x)   ((x) << CPL_RX_TLS_CMP_PDULENGTH_S)
+#define CPL_RX_TLS_CMP_PDULENGTH_G(x)   \
+	(((x) >> CPL_RX_TLS_CMP_PDULENGTH_S) & CPL_RX_TLS_CMP_PDULENGTH_M)
+
+#define CPL_RX_TLS_CMP_LENGTH_S         0
+#define CPL_RX_TLS_CMP_LENGTH_M         0xffff
+#define CPL_RX_TLS_CMP_LENGTH_V(x)      ((x) << CPL_RX_TLS_CMP_LENGTH_S)
+#define CPL_RX_TLS_CMP_LENGTH_G(x)      \
+	(((x) >> CPL_RX_TLS_CMP_LENGTH_S) & CPL_RX_TLS_CMP_LENGTH_M)
 #endif  /* __T4_MSG_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index a6df73398d17..276fdf214b75 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -2775,6 +2775,8 @@
 #define ULP_RX_LA_RDPTR_A 0x19240
 #define ULP_RX_LA_RDDATA_A 0x19244
 #define ULP_RX_LA_WRPTR_A 0x19248
+#define ULP_RX_TLS_KEY_LLIMIT_A 0x192ac
+#define ULP_RX_TLS_KEY_ULIMIT_A 0x192b0
 
 #define HPZ3_S    24
 #define HPZ3_V(x) ((x) << HPZ3_S)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 544757f6ab3a..e3d4751f21ac 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -105,6 +105,7 @@ enum fw_wr_opcodes {
 	FW_RI_INV_LSTAG_WR             = 0x1a,
 	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_PTP_TX_PKT_WR               = 0x46,
+	FW_TLSTX_DATA_WR	       = 0x68,
 	FW_CRYPTO_LOOKASIDE_WR         = 0X6d,
 	FW_LASTC2E_WR                  = 0x70,
 	FW_FILTER2_WR		       = 0x77
@@ -635,6 +636,30 @@ struct fw_ofld_connection_wr {
 #define FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL_F       \
 	FW_OFLD_CONNECTION_WR_CPLPASSACCEPTRPL_V(1U)
 
+enum fw_flowc_mnem_tcpstate {
+	FW_FLOWC_MNEM_TCPSTATE_CLOSED   = 0, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_LISTEN   = 1, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_SYNSENT  = 2, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_SYNRECEIVED = 3, /* illegal */
+	FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED = 4, /* default */
+	FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT = 5, /* got peer close already */
+	FW_FLOWC_MNEM_TCPSTATE_FINWAIT1 = 6, /* haven't gotten ACK for FIN and
+					      * will resend FIN - equiv ESTAB
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_CLOSING  = 7, /* haven't gotten ACK for FIN and
+					      * will resend FIN but have
+					      * received FIN
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_LASTACK  = 8, /* haven't gotten ACK for FIN and
+					      * will resend FIN but have
+					      * received FIN
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_FINWAIT2 = 9, /* sent FIN and got FIN + ACK,
+					      * waiting for FIN
+					      */
+	FW_FLOWC_MNEM_TCPSTATE_TIMEWAIT = 10, /* not expected */
+};
+
 enum fw_flowc_mnem {
 	FW_FLOWC_MNEM_PFNVFN,		/* PFN [15:8] VFN [7:0] */
 	FW_FLOWC_MNEM_CH,
@@ -651,6 +676,8 @@ enum fw_flowc_mnem {
 	FW_FLOWC_MNEM_DCBPRIO,
 	FW_FLOWC_MNEM_SND_SCALE,
 	FW_FLOWC_MNEM_RCV_SCALE,
+	FW_FLOWC_MNEM_ULD_MODE,
+	FW_FLOWC_MNEM_MAX,
 };
 
 struct fw_flowc_mnemval {
@@ -675,6 +702,14 @@ struct fw_ofld_tx_data_wr {
 	__be32 tunnel_to_proxy;
 };
 
+#define FW_OFLD_TX_DATA_WR_ALIGNPLD_S   30
+#define FW_OFLD_TX_DATA_WR_ALIGNPLD_V(x) ((x) << FW_OFLD_TX_DATA_WR_ALIGNPLD_S)
+#define FW_OFLD_TX_DATA_WR_ALIGNPLD_F   FW_OFLD_TX_DATA_WR_ALIGNPLD_V(1U)
+
+#define FW_OFLD_TX_DATA_WR_SHOVE_S      29
+#define FW_OFLD_TX_DATA_WR_SHOVE_V(x)   ((x) << FW_OFLD_TX_DATA_WR_SHOVE_S)
+#define FW_OFLD_TX_DATA_WR_SHOVE_F      FW_OFLD_TX_DATA_WR_SHOVE_V(1U)
+
 #define FW_OFLD_TX_DATA_WR_TUNNEL_S     19
 #define FW_OFLD_TX_DATA_WR_TUNNEL_V(x)  ((x) << FW_OFLD_TX_DATA_WR_TUNNEL_S)
 
@@ -691,10 +726,6 @@ struct fw_ofld_tx_data_wr {
 #define FW_OFLD_TX_DATA_WR_MORE_S       15
 #define FW_OFLD_TX_DATA_WR_MORE_V(x)    ((x) << FW_OFLD_TX_DATA_WR_MORE_S)
 
-#define FW_OFLD_TX_DATA_WR_SHOVE_S      14
-#define FW_OFLD_TX_DATA_WR_SHOVE_V(x)   ((x) << FW_OFLD_TX_DATA_WR_SHOVE_S)
-#define FW_OFLD_TX_DATA_WR_SHOVE_F      FW_OFLD_TX_DATA_WR_SHOVE_V(1U)
-
 #define FW_OFLD_TX_DATA_WR_ULPMODE_S    10
 #define FW_OFLD_TX_DATA_WR_ULPMODE_V(x) ((x) << FW_OFLD_TX_DATA_WR_ULPMODE_S)
 
@@ -1121,6 +1152,12 @@ enum fw_caps_config_iscsi {
 	FW_CAPS_CONFIG_ISCSI_TARGET_CNXOFLD = 0x00000008,
 };
 
+enum fw_caps_config_crypto {
+	FW_CAPS_CONFIG_CRYPTO_LOOKASIDE = 0x00000001,
+	FW_CAPS_CONFIG_TLS_INLINE = 0x00000002,
+	FW_CAPS_CONFIG_IPSEC_INLINE = 0x00000004,
+};
+
 enum fw_caps_config_fcoe {
 	FW_CAPS_CONFIG_FCOE_INITIATOR	= 0x00000001,
 	FW_CAPS_CONFIG_FCOE_TARGET	= 0x00000002,
@@ -1266,6 +1303,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP = 0x31,
 	FW_PARAMS_PARAM_PFVF_HPFILTER_START = 0x32,
 	FW_PARAMS_PARAM_PFVF_HPFILTER_END = 0x33,
+	FW_PARAMS_PARAM_PFVF_TLS_START = 0x34,
+	FW_PARAMS_PARAM_PFVF_TLS_END = 0x35,
 	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39,
 	FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A,
 };
@@ -3839,4 +3878,122 @@ struct fw_crypto_lookaside_wr {
 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \
 	 FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M)
 
+struct fw_tlstx_data_wr {
+	__be32 op_to_immdlen;
+	__be32 flowid_len16;
+	__be32 plen;
+	__be32 lsodisable_to_flags;
+	__be32 r5;
+	__be32 ctxloc_to_exp;
+	__be16 mfs;
+	__be16 adjustedplen_pkd;
+	__be16 expinplenmax_pkd;
+	u8   pdusinplenmax_pkd;
+	u8   r10;
+};
+
+#define FW_TLSTX_DATA_WR_OPCODE_S       24
+#define FW_TLSTX_DATA_WR_OPCODE_M       0xff
+#define FW_TLSTX_DATA_WR_OPCODE_V(x)    ((x) << FW_TLSTX_DATA_WR_OPCODE_S)
+#define FW_TLSTX_DATA_WR_OPCODE_G(x)    \
+	(((x) >> FW_TLSTX_DATA_WR_OPCODE_S) & FW_TLSTX_DATA_WR_OPCODE_M)
+
+#define FW_TLSTX_DATA_WR_COMPL_S        21
+#define FW_TLSTX_DATA_WR_COMPL_M        0x1
+#define FW_TLSTX_DATA_WR_COMPL_V(x)     ((x) << FW_TLSTX_DATA_WR_COMPL_S)
+#define FW_TLSTX_DATA_WR_COMPL_G(x)     \
+	(((x) >> FW_TLSTX_DATA_WR_COMPL_S) & FW_TLSTX_DATA_WR_COMPL_M)
+#define FW_TLSTX_DATA_WR_COMPL_F        FW_TLSTX_DATA_WR_COMPL_V(1U)
+
+#define FW_TLSTX_DATA_WR_IMMDLEN_S      0
+#define FW_TLSTX_DATA_WR_IMMDLEN_M      0xff
+#define FW_TLSTX_DATA_WR_IMMDLEN_V(x)   ((x) << FW_TLSTX_DATA_WR_IMMDLEN_S)
+#define FW_TLSTX_DATA_WR_IMMDLEN_G(x)   \
+	(((x) >> FW_TLSTX_DATA_WR_IMMDLEN_S) & FW_TLSTX_DATA_WR_IMMDLEN_M)
+
+#define FW_TLSTX_DATA_WR_FLOWID_S       8
+#define FW_TLSTX_DATA_WR_FLOWID_M       0xfffff
+#define FW_TLSTX_DATA_WR_FLOWID_V(x)    ((x) << FW_TLSTX_DATA_WR_FLOWID_S)
+#define FW_TLSTX_DATA_WR_FLOWID_G(x)    \
+	(((x) >> FW_TLSTX_DATA_WR_FLOWID_S) & FW_TLSTX_DATA_WR_FLOWID_M)
+
+#define FW_TLSTX_DATA_WR_LEN16_S        0
+#define FW_TLSTX_DATA_WR_LEN16_M        0xff
+#define FW_TLSTX_DATA_WR_LEN16_V(x)     ((x) << FW_TLSTX_DATA_WR_LEN16_S)
+#define FW_TLSTX_DATA_WR_LEN16_G(x)     \
+	(((x) >> FW_TLSTX_DATA_WR_LEN16_S) & FW_TLSTX_DATA_WR_LEN16_M)
+
+#define FW_TLSTX_DATA_WR_LSODISABLE_S   31
+#define FW_TLSTX_DATA_WR_LSODISABLE_M   0x1
+#define FW_TLSTX_DATA_WR_LSODISABLE_V(x) \
+	((x) << FW_TLSTX_DATA_WR_LSODISABLE_S)
+#define FW_TLSTX_DATA_WR_LSODISABLE_G(x) \
+	(((x) >> FW_TLSTX_DATA_WR_LSODISABLE_S) & FW_TLSTX_DATA_WR_LSODISABLE_M)
+#define FW_TLSTX_DATA_WR_LSODISABLE_F   FW_TLSTX_DATA_WR_LSODISABLE_V(1U)
+
+#define FW_TLSTX_DATA_WR_ALIGNPLD_S     30
+#define FW_TLSTX_DATA_WR_ALIGNPLD_M     0x1
+#define FW_TLSTX_DATA_WR_ALIGNPLD_V(x)  ((x) << FW_TLSTX_DATA_WR_ALIGNPLD_S)
+#define FW_TLSTX_DATA_WR_ALIGNPLD_G(x)  \
+	(((x) >> FW_TLSTX_DATA_WR_ALIGNPLD_S) & FW_TLSTX_DATA_WR_ALIGNPLD_M)
+#define FW_TLSTX_DATA_WR_ALIGNPLD_F     FW_TLSTX_DATA_WR_ALIGNPLD_V(1U)
+
+#define FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_S 29
+#define FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_M 0x1
+#define FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_V(x) \
+	((x) << FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_S)
+#define FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_G(x) \
+	(((x) >> FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_S) & \
+	FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_M)
+#define FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_F FW_TLSTX_DATA_WR_ALIGNPLDSHOVE_V(1U)
+
+#define FW_TLSTX_DATA_WR_FLAGS_S        0
+#define FW_TLSTX_DATA_WR_FLAGS_M        0xfffffff
+#define FW_TLSTX_DATA_WR_FLAGS_V(x)     ((x) << FW_TLSTX_DATA_WR_FLAGS_S)
+#define FW_TLSTX_DATA_WR_FLAGS_G(x)     \
+	(((x) >> FW_TLSTX_DATA_WR_FLAGS_S) & FW_TLSTX_DATA_WR_FLAGS_M)
+
+#define FW_TLSTX_DATA_WR_CTXLOC_S       30
+#define FW_TLSTX_DATA_WR_CTXLOC_M       0x3
+#define FW_TLSTX_DATA_WR_CTXLOC_V(x)    ((x) << FW_TLSTX_DATA_WR_CTXLOC_S)
+#define FW_TLSTX_DATA_WR_CTXLOC_G(x)    \
+	(((x) >> FW_TLSTX_DATA_WR_CTXLOC_S) & FW_TLSTX_DATA_WR_CTXLOC_M)
+
+#define FW_TLSTX_DATA_WR_IVDSGL_S       29
+#define FW_TLSTX_DATA_WR_IVDSGL_M       0x1
+#define FW_TLSTX_DATA_WR_IVDSGL_V(x)    ((x) << FW_TLSTX_DATA_WR_IVDSGL_S)
+#define FW_TLSTX_DATA_WR_IVDSGL_G(x)    \
+	(((x) >> FW_TLSTX_DATA_WR_IVDSGL_S) & FW_TLSTX_DATA_WR_IVDSGL_M)
+#define FW_TLSTX_DATA_WR_IVDSGL_F       FW_TLSTX_DATA_WR_IVDSGL_V(1U)
+
+#define FW_TLSTX_DATA_WR_KEYSIZE_S      24
+#define FW_TLSTX_DATA_WR_KEYSIZE_M      0x1f
+#define FW_TLSTX_DATA_WR_KEYSIZE_V(x)   ((x) << FW_TLSTX_DATA_WR_KEYSIZE_S)
+#define FW_TLSTX_DATA_WR_KEYSIZE_G(x)   \
+	(((x) >> FW_TLSTX_DATA_WR_KEYSIZE_S) & FW_TLSTX_DATA_WR_KEYSIZE_M)
+
+#define FW_TLSTX_DATA_WR_NUMIVS_S       14
+#define FW_TLSTX_DATA_WR_NUMIVS_M       0xff
+#define FW_TLSTX_DATA_WR_NUMIVS_V(x)    ((x) << FW_TLSTX_DATA_WR_NUMIVS_S)
+#define FW_TLSTX_DATA_WR_NUMIVS_G(x)    \
+	(((x) >> FW_TLSTX_DATA_WR_NUMIVS_S) & FW_TLSTX_DATA_WR_NUMIVS_M)
+
+#define FW_TLSTX_DATA_WR_EXP_S          0
+#define FW_TLSTX_DATA_WR_EXP_M          0x3fff
+#define FW_TLSTX_DATA_WR_EXP_V(x)       ((x) << FW_TLSTX_DATA_WR_EXP_S)
+#define FW_TLSTX_DATA_WR_EXP_G(x)       \
+	(((x) >> FW_TLSTX_DATA_WR_EXP_S) & FW_TLSTX_DATA_WR_EXP_M)
+
+#define FW_TLSTX_DATA_WR_ADJUSTEDPLEN_S 1
+#define FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(x) \
+	((x) << FW_TLSTX_DATA_WR_ADJUSTEDPLEN_S)
+
+#define FW_TLSTX_DATA_WR_EXPINPLENMAX_S 4
+#define FW_TLSTX_DATA_WR_EXPINPLENMAX_V(x) \
+	((x) << FW_TLSTX_DATA_WR_EXPINPLENMAX_S)
+
+#define FW_TLSTX_DATA_WR_PDUSINPLENMAX_S 2
+#define FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(x) \
+	((x) << FW_TLSTX_DATA_WR_PDUSINPLENMAX_S)
+
 #endif /* _T4FW_INTERFACE_H_ */

From e383f2483425c267654d7ba184843f79d39feb4a Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:55 +0530
Subject: [PATCH 1621/1678] cxgb4: LLD driver changes to support TLS

Read the Inline TLS capability from firmware.
Determine the area reserved for storing the keys
Dump the Inline TLS tx and rx records count.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Reviewed-by: Michael Werner <werner@chelsio.com>
Reviewed-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   |  32 ++++--
 .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h    |   7 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c      | 107 +++++++++++++++++-
 3 files changed, 131 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0072580e2c25..24d2865b8806 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4549,18 +4549,32 @@ static int adap_init0(struct adapter *adap)
 		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.cryptocaps) {
-		/* Should query params here...TODO */
-		params[0] = FW_PARAM_PFVF(NCRYPTO_LOOKASIDE);
-		ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
-				      params, val);
-		if (ret < 0) {
-			if (ret != -EINVAL)
+		if (ntohs(caps_cmd.cryptocaps) &
+		    FW_CAPS_CONFIG_CRYPTO_LOOKASIDE) {
+			params[0] = FW_PARAM_PFVF(NCRYPTO_LOOKASIDE);
+			ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+					      2, params, val);
+			if (ret < 0) {
+				if (ret != -EINVAL)
+					goto bye;
+			} else {
+				adap->vres.ncrypto_fc = val[0];
+			}
+			adap->num_ofld_uld += 1;
+		}
+		if (ntohs(caps_cmd.cryptocaps) &
+		    FW_CAPS_CONFIG_TLS_INLINE) {
+			params[0] = FW_PARAM_PFVF(TLS_START);
+			params[1] = FW_PARAM_PFVF(TLS_END);
+			ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+					      2, params, val);
+			if (ret < 0)
 				goto bye;
-		} else {
-			adap->vres.ncrypto_fc = val[0];
+			adap->vres.key.start = val[0];
+			adap->vres.key.size = val[1] - val[0] + 1;
+			adap->num_uld += 1;
 		}
 		adap->params.crypto = ntohs(caps_cmd.cryptocaps);
-		adap->num_uld += 1;
 	}
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index b0ca06edaa7c..de9ad311dacd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -237,6 +237,7 @@ enum cxgb4_uld {
 	CXGB4_ULD_ISCSI,
 	CXGB4_ULD_ISCSIT,
 	CXGB4_ULD_CRYPTO,
+	CXGB4_ULD_TLS,
 	CXGB4_ULD_MAX
 };
 
@@ -289,6 +290,7 @@ struct cxgb4_virt_res {                      /* virtualized HW resources */
 	struct cxgb4_range qp;
 	struct cxgb4_range cq;
 	struct cxgb4_range ocq;
+	struct cxgb4_range key;
 	unsigned int ncrypto_fc;
 };
 
@@ -300,6 +302,9 @@ struct chcr_stats_debug {
 	atomic_t error;
 	atomic_t fallback;
 	atomic_t ipsec_cnt;
+	atomic_t tls_pdu_tx;
+	atomic_t tls_pdu_rx;
+	atomic_t tls_key;
 };
 
 #define OCQ_WIN_OFFSET(pdev, vres) \
@@ -382,6 +387,8 @@ struct cxgb4_uld_info {
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
+int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
+		       const void *src, unsigned int len);
 int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6e310a0da7c9..1a28df137e1f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1019,8 +1019,8 @@ EXPORT_SYMBOL(cxgb4_ring_tx_db);
 void cxgb4_inline_tx_skb(const struct sk_buff *skb,
 			 const struct sge_txq *q, void *pos)
 {
-	u64 *p;
 	int left = (void *)q->stat - pos;
+	u64 *p;
 
 	if (likely(skb->len <= left)) {
 		if (likely(!skb->data_len))
@@ -1735,15 +1735,13 @@ static void txq_stop_maperr(struct sge_uld_txq *q)
 /**
  *	ofldtxq_stop - stop an offload Tx queue that has become full
  *	@q: the queue to stop
- *	@skb: the packet causing the queue to become full
+ *	@wr: the Work Request causing the queue to become full
  *
  *	Stops an offload Tx queue that has become full and modifies the packet
  *	being written to request a wakeup.
  */
-static void ofldtxq_stop(struct sge_uld_txq *q, struct sk_buff *skb)
+static void ofldtxq_stop(struct sge_uld_txq *q, struct fw_wr_hdr *wr)
 {
-	struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data;
-
 	wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F);
 	q->q.stops++;
 	q->full = 1;
@@ -1804,7 +1802,7 @@ static void service_ofldq(struct sge_uld_txq *q)
 		credits = txq_avail(&q->q) - ndesc;
 		BUG_ON(credits < 0);
 		if (unlikely(credits < TXQ_STOP_THRES))
-			ofldtxq_stop(q, skb);
+			ofldtxq_stop(q, (struct fw_wr_hdr *)skb->data);
 
 		pos = (u64 *)&q->q.desc[q->q.pidx];
 		if (is_ofld_imm(skb))
@@ -2005,6 +2003,103 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(cxgb4_ofld_send);
 
+static void *inline_tx_header(const void *src,
+			      const struct sge_txq *q,
+			      void *pos, int length)
+{
+	int left = (void *)q->stat - pos;
+	u64 *p;
+
+	if (likely(length <= left)) {
+		memcpy(pos, src, length);
+		pos += length;
+	} else {
+		memcpy(pos, src, left);
+		memcpy(q->desc, src + left, length - left);
+		pos = (void *)q->desc + (length - left);
+	}
+	/* 0-pad to multiple of 16 */
+	p = PTR_ALIGN(pos, 8);
+	if ((uintptr_t)p & 8) {
+		*p = 0;
+		return p + 1;
+	}
+	return p;
+}
+
+/**
+ *      ofld_xmit_direct - copy a WR into offload queue
+ *      @q: the Tx offload queue
+ *      @src: location of WR
+ *      @len: WR length
+ *
+ *      Copy an immediate WR into an uncontended SGE offload queue.
+ */
+static int ofld_xmit_direct(struct sge_uld_txq *q, const void *src,
+			    unsigned int len)
+{
+	unsigned int ndesc;
+	int credits;
+	u64 *pos;
+
+	/* Use the lower limit as the cut-off */
+	if (len > MAX_IMM_OFLD_TX_DATA_WR_LEN) {
+		WARN_ON(1);
+		return NET_XMIT_DROP;
+	}
+
+	/* Don't return NET_XMIT_CN here as the current
+	 * implementation doesn't queue the request
+	 * using an skb when the following conditions not met
+	 */
+	if (!spin_trylock(&q->sendq.lock))
+		return NET_XMIT_DROP;
+
+	if (q->full || !skb_queue_empty(&q->sendq) ||
+	    q->service_ofldq_running) {
+		spin_unlock(&q->sendq.lock);
+		return NET_XMIT_DROP;
+	}
+	ndesc = flits_to_desc(DIV_ROUND_UP(len, 8));
+	credits = txq_avail(&q->q) - ndesc;
+	pos = (u64 *)&q->q.desc[q->q.pidx];
+
+	/* ofldtxq_stop modifies WR header in-situ */
+	inline_tx_header(src, &q->q, pos, len);
+	if (unlikely(credits < TXQ_STOP_THRES))
+		ofldtxq_stop(q, (struct fw_wr_hdr *)pos);
+	txq_advance(&q->q, ndesc);
+	cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
+
+	spin_unlock(&q->sendq.lock);
+	return NET_XMIT_SUCCESS;
+}
+
+int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
+		       const void *src, unsigned int len)
+{
+	struct sge_uld_txq_info *txq_info;
+	struct sge_uld_txq *txq;
+	struct adapter *adap;
+	int ret;
+
+	adap = netdev2adap(dev);
+
+	local_bh_disable();
+	txq_info = adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+	if (unlikely(!txq_info)) {
+		WARN_ON(true);
+		local_bh_enable();
+		return NET_XMIT_DROP;
+	}
+	txq = &txq_info->uldtxq[idx];
+
+	ret = ofld_xmit_direct(txq, src, len);
+	local_bh_enable();
+	return net_xmit_eval(ret);
+}
+EXPORT_SYMBOL(cxgb4_immdata_send);
+
 /**
  *	t4_crypto_send - send crypto packet
  *	@adap: the adapter

From 639d28a1a40785ba36eec57b4d5d6b21be67196f Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:56 +0530
Subject: [PATCH 1622/1678] crypto: chcr - Inline TLS Key Macros

Define macro for programming the TLS Key context

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.h | 42 +++++++++++++++++++++++
 drivers/crypto/chelsio/chcr_core.h | 55 +++++++++++++++++++++++++++++-
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index d1673a5d4bf5..f263cd42a84f 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -86,6 +86,39 @@
 	 KEY_CONTEXT_OPAD_PRESENT_M)
 #define KEY_CONTEXT_OPAD_PRESENT_F      KEY_CONTEXT_OPAD_PRESENT_V(1U)
 
+#define TLS_KEYCTX_RXFLIT_CNT_S 24
+#define TLS_KEYCTX_RXFLIT_CNT_V(x) ((x) << TLS_KEYCTX_RXFLIT_CNT_S)
+
+#define TLS_KEYCTX_RXPROT_VER_S 20
+#define TLS_KEYCTX_RXPROT_VER_M 0xf
+#define TLS_KEYCTX_RXPROT_VER_V(x) ((x) << TLS_KEYCTX_RXPROT_VER_S)
+
+#define TLS_KEYCTX_RXCIPH_MODE_S 16
+#define TLS_KEYCTX_RXCIPH_MODE_M 0xf
+#define TLS_KEYCTX_RXCIPH_MODE_V(x) ((x) << TLS_KEYCTX_RXCIPH_MODE_S)
+
+#define TLS_KEYCTX_RXAUTH_MODE_S 12
+#define TLS_KEYCTX_RXAUTH_MODE_M 0xf
+#define TLS_KEYCTX_RXAUTH_MODE_V(x) ((x) << TLS_KEYCTX_RXAUTH_MODE_S)
+
+#define TLS_KEYCTX_RXCIAU_CTRL_S 11
+#define TLS_KEYCTX_RXCIAU_CTRL_V(x) ((x) << TLS_KEYCTX_RXCIAU_CTRL_S)
+
+#define TLS_KEYCTX_RX_SEQCTR_S 9
+#define TLS_KEYCTX_RX_SEQCTR_M 0x3
+#define TLS_KEYCTX_RX_SEQCTR_V(x) ((x) << TLS_KEYCTX_RX_SEQCTR_S)
+
+#define TLS_KEYCTX_RX_VALID_S 8
+#define TLS_KEYCTX_RX_VALID_V(x) ((x) << TLS_KEYCTX_RX_VALID_S)
+
+#define TLS_KEYCTX_RXCK_SIZE_S 3
+#define TLS_KEYCTX_RXCK_SIZE_M 0x7
+#define TLS_KEYCTX_RXCK_SIZE_V(x) ((x) << TLS_KEYCTX_RXCK_SIZE_S)
+
+#define TLS_KEYCTX_RXMK_SIZE_S 0
+#define TLS_KEYCTX_RXMK_SIZE_M 0x7
+#define TLS_KEYCTX_RXMK_SIZE_V(x) ((x) << TLS_KEYCTX_RXMK_SIZE_S)
+
 #define CHCR_HASH_MAX_DIGEST_SIZE 64
 #define CHCR_MAX_SHA_DIGEST_SIZE 64
 
@@ -176,6 +209,15 @@
 		      KEY_CONTEXT_SALT_PRESENT_V(1) | \
 		      KEY_CONTEXT_CTX_LEN_V((ctx_len)))
 
+#define  FILL_KEY_CRX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \
+		htonl(TLS_KEYCTX_RXMK_SIZE_V(mk_size) | \
+		      TLS_KEYCTX_RXCK_SIZE_V(ck_size) | \
+		      TLS_KEYCTX_RX_VALID_V(1) | \
+		      TLS_KEYCTX_RX_SEQCTR_V(3) | \
+		      TLS_KEYCTX_RXAUTH_MODE_V(4) | \
+		      TLS_KEYCTX_RXCIPH_MODE_V(2) | \
+		      TLS_KEYCTX_RXFLIT_CNT_V((ctx_len)))
+
 #define FILL_WR_OP_CCTX_SIZE \
 		htonl( \
 			FW_CRYPTO_LOOKASIDE_WR_OPCODE_V( \
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 3c29ee09b8b5..77056a90c8e1 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -65,10 +65,58 @@ struct uld_ctx;
 struct _key_ctx {
 	__be32 ctx_hdr;
 	u8 salt[MAX_SALT];
-	__be64 reserverd;
+	__be64 iv_to_auth;
 	unsigned char key[0];
 };
 
+#define KEYCTX_TX_WR_IV_S  55
+#define KEYCTX_TX_WR_IV_M  0x1ffULL
+#define KEYCTX_TX_WR_IV_V(x) ((x) << KEYCTX_TX_WR_IV_S)
+#define KEYCTX_TX_WR_IV_G(x) \
+	(((x) >> KEYCTX_TX_WR_IV_S) & KEYCTX_TX_WR_IV_M)
+
+#define KEYCTX_TX_WR_AAD_S 47
+#define KEYCTX_TX_WR_AAD_M 0xffULL
+#define KEYCTX_TX_WR_AAD_V(x) ((x) << KEYCTX_TX_WR_AAD_S)
+#define KEYCTX_TX_WR_AAD_G(x) (((x) >> KEYCTX_TX_WR_AAD_S) & \
+				KEYCTX_TX_WR_AAD_M)
+
+#define KEYCTX_TX_WR_AADST_S 39
+#define KEYCTX_TX_WR_AADST_M 0xffULL
+#define KEYCTX_TX_WR_AADST_V(x) ((x) << KEYCTX_TX_WR_AADST_S)
+#define KEYCTX_TX_WR_AADST_G(x) \
+	(((x) >> KEYCTX_TX_WR_AADST_S) & KEYCTX_TX_WR_AADST_M)
+
+#define KEYCTX_TX_WR_CIPHER_S 30
+#define KEYCTX_TX_WR_CIPHER_M 0x1ffULL
+#define KEYCTX_TX_WR_CIPHER_V(x) ((x) << KEYCTX_TX_WR_CIPHER_S)
+#define KEYCTX_TX_WR_CIPHER_G(x) \
+	(((x) >> KEYCTX_TX_WR_CIPHER_S) & KEYCTX_TX_WR_CIPHER_M)
+
+#define KEYCTX_TX_WR_CIPHERST_S 23
+#define KEYCTX_TX_WR_CIPHERST_M 0x7f
+#define KEYCTX_TX_WR_CIPHERST_V(x) ((x) << KEYCTX_TX_WR_CIPHERST_S)
+#define KEYCTX_TX_WR_CIPHERST_G(x) \
+	(((x) >> KEYCTX_TX_WR_CIPHERST_S) & KEYCTX_TX_WR_CIPHERST_M)
+
+#define KEYCTX_TX_WR_AUTH_S 14
+#define KEYCTX_TX_WR_AUTH_M 0x1ff
+#define KEYCTX_TX_WR_AUTH_V(x) ((x) << KEYCTX_TX_WR_AUTH_S)
+#define KEYCTX_TX_WR_AUTH_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTH_S) & KEYCTX_TX_WR_AUTH_M)
+
+#define KEYCTX_TX_WR_AUTHST_S 7
+#define KEYCTX_TX_WR_AUTHST_M 0x7f
+#define KEYCTX_TX_WR_AUTHST_V(x) ((x) << KEYCTX_TX_WR_AUTHST_S)
+#define KEYCTX_TX_WR_AUTHST_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTHST_S) & KEYCTX_TX_WR_AUTHST_M)
+
+#define KEYCTX_TX_WR_AUTHIN_S 0
+#define KEYCTX_TX_WR_AUTHIN_M 0x7f
+#define KEYCTX_TX_WR_AUTHIN_V(x) ((x) << KEYCTX_TX_WR_AUTHIN_S)
+#define KEYCTX_TX_WR_AUTHIN_G(x) \
+	(((x) >> KEYCTX_TX_WR_AUTHIN_S) & KEYCTX_TX_WR_AUTHIN_M)
+
 struct chcr_wr {
 	struct fw_crypto_lookaside_wr wreq;
 	struct ulp_txpkt ulptx;
@@ -90,6 +138,11 @@ struct uld_ctx {
 	struct chcr_dev *dev;
 };
 
+struct sge_opaque_hdr {
+	void *dev;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+};
+
 struct chcr_ipsec_req {
 	struct ulp_txpkt ulptx;
 	struct ulptx_idata sc_imm;

From a6779341a173aa8cedb5985e0c21c5d7c94c270a Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:57 +0530
Subject: [PATCH 1623/1678] crypto: chtls - structure and macro for Inline TLS

Define Inline TLS state, connection management info.
Supporting macros definition.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Reviewed-by: Michael Werner <werner@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls.h    | 482 ++++++++++++++++++++++++
 drivers/crypto/chelsio/chtls/chtls_cm.h | 203 ++++++++++
 2 files changed, 685 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls.h
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_cm.h

diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h
new file mode 100644
index 000000000000..f4b8f1ec0061
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls.h
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CHTLS_H__
+#define __CHTLS_H__
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/ctr.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+#include <linux/tls.h>
+#include <net/tls.h>
+
+#include "t4fw_api.h"
+#include "t4_msg.h"
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+#include "l2t.h"
+#include "chcr_algo.h"
+#include "chcr_core.h"
+#include "chcr_crypto.h"
+
+#define MAX_IVS_PAGE			256
+#define TLS_KEY_CONTEXT_SZ		64
+#define CIPHER_BLOCK_SIZE		16
+#define GCM_TAG_SIZE			16
+#define KEY_ON_MEM_SZ			16
+#define AEAD_EXPLICIT_DATA_SIZE		8
+#define TLS_HEADER_LENGTH		5
+#define SCMD_CIPH_MODE_AES_GCM		2
+/* Any MFS size should work and come from openssl */
+#define TLS_MFS				16384
+
+#define RSS_HDR sizeof(struct rss_header)
+#define TLS_WR_CPL_LEN \
+	(sizeof(struct fw_tlstx_data_wr) + sizeof(struct cpl_tx_tls_sfo))
+
+enum {
+	CHTLS_KEY_CONTEXT_DSGL,
+	CHTLS_KEY_CONTEXT_IMM,
+	CHTLS_KEY_CONTEXT_DDR,
+};
+
+enum {
+	CHTLS_LISTEN_START,
+	CHTLS_LISTEN_STOP,
+};
+
+/* Flags for return value of CPL message handlers */
+enum {
+	CPL_RET_BUF_DONE =    1,   /* buffer processing done */
+	CPL_RET_BAD_MSG =     2,   /* bad CPL message */
+	CPL_RET_UNKNOWN_TID = 4    /* unexpected unknown TID */
+};
+
+#define TLS_RCV_ST_READ_HEADER		0xF0
+#define TLS_RCV_ST_READ_BODY		0xF1
+#define TLS_RCV_ST_READ_DONE		0xF2
+#define TLS_RCV_ST_READ_NB		0xF3
+
+#define LISTEN_INFO_HASH_SIZE 32
+#define RSPQ_HASH_BITS 5
+struct listen_info {
+	struct listen_info *next;  /* Link to next entry */
+	struct sock *sk;           /* The listening socket */
+	unsigned int stid;         /* The server TID */
+};
+
+enum {
+	T4_LISTEN_START_PENDING,
+	T4_LISTEN_STARTED
+};
+
+enum csk_flags {
+	CSK_CALLBACKS_CHKD,	/* socket callbacks have been sanitized */
+	CSK_ABORT_REQ_RCVD,	/* received one ABORT_REQ_RSS message */
+	CSK_TX_MORE_DATA,	/* sending ULP data; don't set SHOVE bit */
+	CSK_TX_WAIT_IDLE,	/* suspend Tx until in-flight data is ACKed */
+	CSK_ABORT_SHUTDOWN,	/* shouldn't send more abort requests */
+	CSK_ABORT_RPL_PENDING,	/* expecting an abort reply */
+	CSK_CLOSE_CON_REQUESTED,/* we've sent a close_conn_req */
+	CSK_TX_DATA_SENT,	/* sent a TX_DATA WR on this connection */
+	CSK_TX_FAILOVER,	/* Tx traffic failing over */
+	CSK_UPDATE_RCV_WND,	/* Need to update rcv window */
+	CSK_RST_ABORTED,	/* outgoing RST was aborted */
+	CSK_TLS_HANDSHK,	/* TLS Handshake */
+	CSK_CONN_INLINE,	/* Connection on HW */
+};
+
+struct listen_ctx {
+	struct sock *lsk;
+	struct chtls_dev *cdev;
+	struct sk_buff_head synq;
+	u32 state;
+};
+
+struct key_map {
+	unsigned long *addr;
+	unsigned int start;
+	unsigned int available;
+	unsigned int size;
+	spinlock_t lock; /* lock for key id request from map */
+} __packed;
+
+struct tls_scmd {
+	u32 seqno_numivs;
+	u32 ivgen_hdrlen;
+};
+
+struct chtls_dev {
+	struct tls_device tlsdev;
+	struct list_head list;
+	struct cxgb4_lld_info *lldi;
+	struct pci_dev *pdev;
+	struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
+	spinlock_t listen_lock; /* lock for listen list */
+	struct net_device **ports;
+	struct tid_info *tids;
+	unsigned int pfvf;
+	const unsigned short *mtus;
+
+	struct idr hwtid_idr;
+	struct idr stid_idr;
+
+	spinlock_t idr_lock ____cacheline_aligned_in_smp;
+
+	struct net_device *egr_dev[NCHAN * 2];
+	struct sk_buff *rspq_skb_cache[1 << RSPQ_HASH_BITS];
+	struct sk_buff *askb;
+
+	struct sk_buff_head deferq;
+	struct work_struct deferq_task;
+
+	struct list_head list_node;
+	struct list_head rcu_node;
+	struct list_head na_node;
+	unsigned int send_page_order;
+	struct key_map kmap;
+};
+
+struct chtls_hws {
+	struct sk_buff_head sk_recv_queue;
+	u8 txqid;
+	u8 ofld;
+	u16 type;
+	u16 rstate;
+	u16 keyrpl;
+	u16 pldlen;
+	u16 rcvpld;
+	u16 compute;
+	u16 expansion;
+	u16 keylen;
+	u16 pdus;
+	u16 adjustlen;
+	u16 ivsize;
+	u16 txleft;
+	u32 mfs;
+	s32 txkey;
+	s32 rxkey;
+	u32 fcplenmax;
+	u32 copied_seq;
+	u64 tx_seq_no;
+	struct tls_scmd scmd;
+	struct tls12_crypto_info_aes_gcm_128 crypto_info;
+};
+
+struct chtls_sock {
+	struct sock *sk;
+	struct chtls_dev *cdev;
+	struct l2t_entry *l2t_entry;    /* pointer to the L2T entry */
+	struct net_device *egress_dev;  /* TX_CHAN for act open retry */
+
+	struct sk_buff_head txq;
+	struct sk_buff *wr_skb_head;
+	struct sk_buff *wr_skb_tail;
+	struct sk_buff *ctrl_skb_cache;
+	struct sk_buff *txdata_skb_cache; /* abort path messages */
+	struct kref kref;
+	unsigned long flags;
+	u32 opt2;
+	u32 wr_credits;
+	u32 wr_unacked;
+	u32 wr_max_credits;
+	u32 wr_nondata;
+	u32 hwtid;               /* TCP Control Block ID */
+	u32 txq_idx;
+	u32 rss_qid;
+	u32 tid;
+	u32 idr;
+	u32 mss;
+	u32 ulp_mode;
+	u32 tx_chan;
+	u32 rx_chan;
+	u32 sndbuf;
+	u32 txplen_max;
+	u32 mtu_idx;           /* MTU table index */
+	u32 smac_idx;
+	u8 port_id;
+	u8 tos;
+	u16 resv2;
+	u32 delack_mode;
+	u32 delack_seq;
+
+	void *passive_reap_next;        /* placeholder for passive */
+	struct chtls_hws tlshws;
+	struct synq {
+		struct sk_buff *next;
+		struct sk_buff *prev;
+	} synq;
+	struct listen_ctx *listen_ctx;
+};
+
+struct tls_hdr {
+	u8  type;
+	u16 version;
+	u16 length;
+} __packed;
+
+struct tlsrx_cmp_hdr {
+	u8  type;
+	u16 version;
+	u16 length;
+
+	u64 tls_seq;
+	u16 reserved1;
+	u8  res_to_mac_error;
+} __packed;
+
+/* res_to_mac_error fields */
+#define TLSRX_HDR_PKT_INT_ERROR_S   4
+#define TLSRX_HDR_PKT_INT_ERROR_M   0x1
+#define TLSRX_HDR_PKT_INT_ERROR_V(x) \
+	((x) << TLSRX_HDR_PKT_INT_ERROR_S)
+#define TLSRX_HDR_PKT_INT_ERROR_G(x) \
+	(((x) >> TLSRX_HDR_PKT_INT_ERROR_S) & TLSRX_HDR_PKT_INT_ERROR_M)
+#define TLSRX_HDR_PKT_INT_ERROR_F   TLSRX_HDR_PKT_INT_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_SPP_ERROR_S        3
+#define TLSRX_HDR_PKT_SPP_ERROR_M        0x1
+#define TLSRX_HDR_PKT_SPP_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_SPP_ERROR)
+#define TLSRX_HDR_PKT_SPP_ERROR_G(x)     \
+	(((x) >> TLSRX_HDR_PKT_SPP_ERROR_S) & TLSRX_HDR_PKT_SPP_ERROR_M)
+#define TLSRX_HDR_PKT_SPP_ERROR_F        TLSRX_HDR_PKT_SPP_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_CCDX_ERROR_S       2
+#define TLSRX_HDR_PKT_CCDX_ERROR_M       0x1
+#define TLSRX_HDR_PKT_CCDX_ERROR_V(x)    ((x) << TLSRX_HDR_PKT_CCDX_ERROR_S)
+#define TLSRX_HDR_PKT_CCDX_ERROR_G(x)    \
+	(((x) >> TLSRX_HDR_PKT_CCDX_ERROR_S) & TLSRX_HDR_PKT_CCDX_ERROR_M)
+#define TLSRX_HDR_PKT_CCDX_ERROR_F       TLSRX_HDR_PKT_CCDX_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_PAD_ERROR_S        1
+#define TLSRX_HDR_PKT_PAD_ERROR_M        0x1
+#define TLSRX_HDR_PKT_PAD_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_PAD_ERROR_S)
+#define TLSRX_HDR_PKT_PAD_ERROR_G(x)     \
+	(((x) >> TLSRX_HDR_PKT_PAD_ERROR_S) & TLSRX_HDR_PKT_PAD_ERROR_M)
+#define TLSRX_HDR_PKT_PAD_ERROR_F        TLSRX_HDR_PKT_PAD_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_MAC_ERROR_S        0
+#define TLSRX_HDR_PKT_MAC_ERROR_M        0x1
+#define TLSRX_HDR_PKT_MAC_ERROR_V(x)     ((x) << TLSRX_HDR_PKT_MAC_ERROR)
+#define TLSRX_HDR_PKT_MAC_ERROR_G(x)     \
+	(((x) >> S_TLSRX_HDR_PKT_MAC_ERROR_S) & TLSRX_HDR_PKT_MAC_ERROR_M)
+#define TLSRX_HDR_PKT_MAC_ERROR_F        TLSRX_HDR_PKT_MAC_ERROR_V(1U)
+
+#define TLSRX_HDR_PKT_ERROR_M           0x1F
+
+struct ulp_mem_rw {
+	__be32 cmd;
+	__be32 len16;             /* command length */
+	__be32 dlen;              /* data length in 32-byte units */
+	__be32 lock_addr;
+};
+
+struct tls_key_wr {
+	__be32 op_to_compl;
+	__be32 flowid_len16;
+	__be32 ftid;
+	u8   reneg_to_write_rx;
+	u8   protocol;
+	__be16 mfs;
+};
+
+struct tls_key_req {
+	struct tls_key_wr wr;
+	struct ulp_mem_rw req;
+	struct ulptx_idata sc_imm;
+};
+
+/*
+ * This lives in skb->cb and is used to chain WRs in a linked list.
+ */
+struct wr_skb_cb {
+	struct l2t_skb_cb l2t;          /* reserve space for l2t CB */
+	struct sk_buff *next_wr;        /* next write request */
+};
+
+/* Per-skb backlog handler.  Run when a socket's backlog is processed. */
+struct blog_skb_cb {
+	void (*backlog_rcv)(struct sock *sk, struct sk_buff *skb);
+	struct chtls_dev *cdev;
+};
+
+/*
+ * Similar to tcp_skb_cb but with ULP elements added to support TLS,
+ * etc.
+ */
+struct ulp_skb_cb {
+	struct wr_skb_cb wr;		/* reserve space for write request */
+	u16 flags;			/* TCP-like flags */
+	u8 psh;
+	u8 ulp_mode;			/* ULP mode/submode of sk_buff */
+	u32 seq;			/* TCP sequence number */
+	union { /* ULP-specific fields */
+		struct {
+			u8  type;
+			u8  ofld;
+			u8  iv;
+		} tls;
+	} ulp;
+};
+
+#define ULP_SKB_CB(skb) ((struct ulp_skb_cb *)&((skb)->cb[0]))
+#define BLOG_SKB_CB(skb) ((struct blog_skb_cb *)(skb)->cb)
+
+/*
+ * Flags for ulp_skb_cb.flags.
+ */
+enum {
+	ULPCB_FLAG_NEED_HDR  = 1 << 0,	/* packet needs a TX_DATA_WR header */
+	ULPCB_FLAG_NO_APPEND = 1 << 1,	/* don't grow this skb */
+	ULPCB_FLAG_BARRIER   = 1 << 2,	/* set TX_WAIT_IDLE after sending */
+	ULPCB_FLAG_HOLD      = 1 << 3,	/* skb not ready for Tx yet */
+	ULPCB_FLAG_COMPL     = 1 << 4,	/* request WR completion */
+	ULPCB_FLAG_URG       = 1 << 5,	/* urgent data */
+	ULPCB_FLAG_TLS_ND    = 1 << 6, /* payload of zero length */
+	ULPCB_FLAG_NO_HDR    = 1 << 7, /* not a ofld wr */
+};
+
+/* The ULP mode/submode of an skbuff */
+#define skb_ulp_mode(skb)  (ULP_SKB_CB(skb)->ulp_mode)
+#define TCP_PAGE(sk)   (sk->sk_frag.page)
+#define TCP_OFF(sk)    (sk->sk_frag.offset)
+
+static inline struct chtls_dev *to_chtls_dev(struct tls_device *tlsdev)
+{
+	return container_of(tlsdev, struct chtls_dev, tlsdev);
+}
+
+static inline void csk_set_flag(struct chtls_sock *csk,
+				enum csk_flags flag)
+{
+	__set_bit(flag, &csk->flags);
+}
+
+static inline void csk_reset_flag(struct chtls_sock *csk,
+				  enum csk_flags flag)
+{
+	__clear_bit(flag, &csk->flags);
+}
+
+static inline bool csk_conn_inline(const struct chtls_sock *csk)
+{
+	return test_bit(CSK_CONN_INLINE, &csk->flags);
+}
+
+static inline int csk_flag(const struct sock *sk, enum csk_flags flag)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (!csk_conn_inline(csk))
+		return 0;
+	return test_bit(flag, &csk->flags);
+}
+
+static inline int csk_flag_nochk(const struct chtls_sock *csk,
+				 enum csk_flags flag)
+{
+	return test_bit(flag, &csk->flags);
+}
+
+static inline void *cplhdr(struct sk_buff *skb)
+{
+	return skb->data;
+}
+
+static inline int is_neg_adv(unsigned int status)
+{
+	return status == CPL_ERR_RTX_NEG_ADVICE ||
+	       status == CPL_ERR_KEEPALV_NEG_ADVICE ||
+	       status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static inline void process_cpl_msg(void (*fn)(struct sock *, struct sk_buff *),
+				   struct sock *sk,
+				   struct sk_buff *skb)
+{
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	bh_lock_sock(sk);
+	if (unlikely(sock_owned_by_user(sk))) {
+		BLOG_SKB_CB(skb)->backlog_rcv = fn;
+		__sk_add_backlog(sk, skb);
+	} else {
+		fn(sk, skb);
+	}
+	bh_unlock_sock(sk);
+}
+
+static inline void chtls_sock_free(struct kref *ref)
+{
+	struct chtls_sock *csk = container_of(ref, struct chtls_sock,
+					      kref);
+	kfree(csk);
+}
+
+static inline void __chtls_sock_put(const char *fn, struct chtls_sock *csk)
+{
+	kref_put(&csk->kref, chtls_sock_free);
+}
+
+static inline void __chtls_sock_get(const char *fn,
+				    struct chtls_sock *csk)
+{
+	kref_get(&csk->kref);
+}
+
+static inline void send_or_defer(struct sock *sk, struct tcp_sock *tp,
+				 struct sk_buff *skb, int through_l2t)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (through_l2t) {
+		/* send through L2T */
+		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+	} else {
+		/* send directly */
+		cxgb4_ofld_send(csk->egress_dev, skb);
+	}
+}
+
+typedef int (*chtls_handler_func)(struct chtls_dev *, struct sk_buff *);
+extern chtls_handler_func chtls_handlers[NUM_CPL_CMDS];
+void chtls_install_cpl_ops(struct sock *sk);
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi);
+void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk);
+int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk);
+void chtls_close(struct sock *sk, long timeout);
+int chtls_disconnect(struct sock *sk, int flags);
+void chtls_shutdown(struct sock *sk, int how);
+void chtls_destroy_sock(struct sock *sk);
+int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
+		  size_t len, int nonblock, int flags, int *addr_len);
+int chtls_sendpage(struct sock *sk, struct page *page,
+		   int offset, size_t size, int flags);
+int send_tx_flowc_wr(struct sock *sk, int compl,
+		     u32 snd_nxt, u32 rcv_nxt);
+void chtls_tcp_push(struct sock *sk, int flags);
+int chtls_push_frames(struct chtls_sock *csk, int comp);
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val);
+int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode);
+void skb_entail(struct sock *sk, struct sk_buff *skb, int flags);
+unsigned int keyid_to_addr(int start_addr, int keyid);
+void free_tls_keyid(struct sock *sk);
+#endif
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.h b/drivers/crypto/chelsio/chtls/chtls_cm.h
new file mode 100644
index 000000000000..78eb3afa3a80
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CHTLS_CM_H__
+#define __CHTLS_CM_H__
+
+/*
+ * TCB settings
+ */
+/* 3:0 */
+#define TCB_ULP_TYPE_W    0
+#define TCB_ULP_TYPE_S    0
+#define TCB_ULP_TYPE_M    0xfULL
+#define TCB_ULP_TYPE_V(x) ((x) << TCB_ULP_TYPE_S)
+
+/* 11:4 */
+#define TCB_ULP_RAW_W    0
+#define TCB_ULP_RAW_S    4
+#define TCB_ULP_RAW_M    0xffULL
+#define TCB_ULP_RAW_V(x) ((x) << TCB_ULP_RAW_S)
+
+#define TF_TLS_KEY_SIZE_S    7
+#define TF_TLS_KEY_SIZE_V(x) ((x) << TF_TLS_KEY_SIZE_S)
+
+#define TF_TLS_CONTROL_S     2
+#define TF_TLS_CONTROL_V(x) ((x) << TF_TLS_CONTROL_S)
+
+#define TF_TLS_ACTIVE_S      1
+#define TF_TLS_ACTIVE_V(x) ((x) << TF_TLS_ACTIVE_S)
+
+#define TF_TLS_ENABLE_S      0
+#define TF_TLS_ENABLE_V(x) ((x) << TF_TLS_ENABLE_S)
+
+#define TF_RX_QUIESCE_S    15
+#define TF_RX_QUIESCE_V(x) ((x) << TF_RX_QUIESCE_S)
+
+/*
+ * Max receive window supported by HW in bytes.  Only a small part of it can
+ * be set through option0, the rest needs to be set through RX_DATA_ACK.
+ */
+#define MAX_RCV_WND ((1U << 27) - 1)
+#define MAX_MSS     65536
+
+/*
+ * Min receive window.  We want it to be large enough to accommodate receive
+ * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
+ */
+#define MIN_RCV_WND (24 * 1024U)
+#define LOOPBACK(x)     (((x) & htonl(0xff000000)) == htonl(0x7f000000))
+
+/* ulp_mem_io + ulptx_idata + payload + padding */
+#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8)
+
+/* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */
+#define TX_HEADER_LEN \
+	(sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr))
+#define TX_TLSHDR_LEN \
+	(sizeof(struct fw_tlstx_data_wr) + sizeof(struct cpl_tx_tls_sfo) + \
+	 sizeof(struct sge_opaque_hdr))
+#define TXDATA_SKB_LEN 128
+
+enum {
+	CPL_TX_TLS_SFO_TYPE_CCS,
+	CPL_TX_TLS_SFO_TYPE_ALERT,
+	CPL_TX_TLS_SFO_TYPE_HANDSHAKE,
+	CPL_TX_TLS_SFO_TYPE_DATA,
+	CPL_TX_TLS_SFO_TYPE_HEARTBEAT,
+};
+
+enum {
+	TLS_HDR_TYPE_CCS = 20,
+	TLS_HDR_TYPE_ALERT,
+	TLS_HDR_TYPE_HANDSHAKE,
+	TLS_HDR_TYPE_RECORD,
+	TLS_HDR_TYPE_HEARTBEAT,
+};
+
+typedef void (*defer_handler_t)(struct chtls_dev *dev, struct sk_buff *skb);
+extern struct request_sock_ops chtls_rsk_ops;
+
+struct deferred_skb_cb {
+	defer_handler_t handler;
+	struct chtls_dev *dev;
+};
+
+#define DEFERRED_SKB_CB(skb) ((struct deferred_skb_cb *)(skb)->cb)
+#define failover_flowc_wr_len offsetof(struct fw_flowc_wr, mnemval[3])
+#define WR_SKB_CB(skb) ((struct wr_skb_cb *)(skb)->cb)
+#define ACCEPT_QUEUE(sk) (&inet_csk(sk)->icsk_accept_queue.rskq_accept_head)
+
+#define SND_WSCALE(tp) ((tp)->rx_opt.snd_wscale)
+#define RCV_WSCALE(tp) ((tp)->rx_opt.rcv_wscale)
+#define USER_MSS(tp) ((tp)->rx_opt.user_mss)
+#define TS_RECENT_STAMP(tp) ((tp)->rx_opt.ts_recent_stamp)
+#define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok)
+#define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok)
+#define SACK_OK(tp) ((tp)->rx_opt.sack_ok)
+#define INC_ORPHAN_COUNT(sk) percpu_counter_inc((sk)->sk_prot->orphan_count)
+
+/* TLS SKB */
+#define skb_ulp_tls_inline(skb)      (ULP_SKB_CB(skb)->ulp.tls.ofld)
+#define skb_ulp_tls_iv_imm(skb)      (ULP_SKB_CB(skb)->ulp.tls.iv)
+
+void chtls_defer_reply(struct sk_buff *skb, struct chtls_dev *dev,
+		       defer_handler_t handler);
+
+/*
+ * Returns true if the socket is in one of the supplied states.
+ */
+static inline unsigned int sk_in_state(const struct sock *sk,
+				       unsigned int states)
+{
+	return states & (1 << sk->sk_state);
+}
+
+static void chtls_rsk_destructor(struct request_sock *req)
+{
+	/* do nothing */
+}
+
+static inline void chtls_init_rsk_ops(struct proto *chtls_tcp_prot,
+				      struct request_sock_ops *chtls_tcp_ops,
+				      struct proto *tcp_prot, int family)
+{
+	memset(chtls_tcp_ops, 0, sizeof(*chtls_tcp_ops));
+	chtls_tcp_ops->family = family;
+	chtls_tcp_ops->obj_size = sizeof(struct tcp_request_sock);
+	chtls_tcp_ops->destructor = chtls_rsk_destructor;
+	chtls_tcp_ops->slab = tcp_prot->rsk_prot->slab;
+	chtls_tcp_prot->rsk_prot = chtls_tcp_ops;
+}
+
+static inline void chtls_reqsk_free(struct request_sock *req)
+{
+	if (req->rsk_listener)
+		sock_put(req->rsk_listener);
+	kmem_cache_free(req->rsk_ops->slab, req);
+}
+
+#define DECLARE_TASK_FUNC(task, task_param) \
+		static void task(struct work_struct *task_param)
+
+static inline void sk_wakeup_sleepers(struct sock *sk, bool interruptable)
+{
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq)) {
+		if (interruptable)
+			wake_up_interruptible(sk_sleep(sk));
+		else
+			wake_up_all(sk_sleep(sk));
+	}
+	rcu_read_unlock();
+}
+
+static inline void chtls_set_req_port(struct request_sock *oreq,
+				      __be16 source, __be16 dest)
+{
+	inet_rsk(oreq)->ir_rmt_port = source;
+	inet_rsk(oreq)->ir_num = ntohs(dest);
+}
+
+static inline void chtls_set_req_addr(struct request_sock *oreq,
+				      __be32 local_ip, __be32 peer_ip)
+{
+	inet_rsk(oreq)->ir_loc_addr = local_ip;
+	inet_rsk(oreq)->ir_rmt_addr = peer_ip;
+}
+
+static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb_dst_set(skb, NULL);
+	__skb_unlink(skb, &sk->sk_receive_queue);
+	__kfree_skb(skb);
+}
+
+static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb)
+{
+	skb_dst_set(skb, NULL);
+	__skb_unlink(skb, &sk->sk_receive_queue);
+	kfree_skb(skb);
+}
+
+static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb)
+{
+	WR_SKB_CB(skb)->next_wr = NULL;
+
+	skb_get(skb);
+
+	if (!csk->wr_skb_head)
+		csk->wr_skb_head = skb;
+	else
+		WR_SKB_CB(csk->wr_skb_tail)->next_wr = skb;
+	csk->wr_skb_tail = skb;
+}
+#endif

From a089439478734a6a0aa2eabbc03113e0c34db282 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:58 +0530
Subject: [PATCH 1624/1678] crypto: chtls - Register chtls with net tls

Register chtls as Inline TLS driver, chtls is ULD to cxgb4.
Setsockopt to program (tx/rx) keys on chip.
Support AES GCM of key size 128.
Support both Inline Rx and Tx.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Reviewed-by: Casey Leedom <leedom@chelsio.com>
Reviewed-by: Michael Werner <werner@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_main.c | 575 ++++++++++++++++++++++
 1 file changed, 575 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_main.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
new file mode 100644
index 000000000000..04b316f86ebd
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+#include <net/tls.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+#define DRV_NAME "chtls"
+
+/*
+ * chtls device management
+ * maintains a list of the chtls devices
+ */
+static LIST_HEAD(cdev_list);
+static DEFINE_MUTEX(cdev_mutex);
+static DEFINE_MUTEX(cdev_list_lock);
+
+static DEFINE_MUTEX(notify_mutex);
+static RAW_NOTIFIER_HEAD(listen_notify_list);
+static struct proto chtls_cpl_prot;
+struct request_sock_ops chtls_rsk_ops;
+static uint send_page_order = (14 - PAGE_SHIFT < 0) ? 0 : 14 - PAGE_SHIFT;
+
+static void register_listen_notifier(struct notifier_block *nb)
+{
+	mutex_lock(&notify_mutex);
+	raw_notifier_chain_register(&listen_notify_list, nb);
+	mutex_unlock(&notify_mutex);
+}
+
+static void unregister_listen_notifier(struct notifier_block *nb)
+{
+	mutex_lock(&notify_mutex);
+	raw_notifier_chain_unregister(&listen_notify_list, nb);
+	mutex_unlock(&notify_mutex);
+}
+
+static int listen_notify_handler(struct notifier_block *this,
+				 unsigned long event, void *data)
+{
+	struct chtls_dev *cdev;
+	struct sock *sk;
+	int ret;
+
+	sk = data;
+	ret =  NOTIFY_DONE;
+
+	switch (event) {
+	case CHTLS_LISTEN_START:
+	case CHTLS_LISTEN_STOP:
+		mutex_lock(&cdev_list_lock);
+		list_for_each_entry(cdev, &cdev_list, list) {
+			if (event == CHTLS_LISTEN_START)
+				ret = chtls_listen_start(cdev, sk);
+			else
+				chtls_listen_stop(cdev, sk);
+		}
+		mutex_unlock(&cdev_list_lock);
+		break;
+	}
+	return ret;
+}
+
+static struct notifier_block listen_notifier = {
+	.notifier_call = listen_notify_handler
+};
+
+static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (likely(skb_transport_header(skb) != skb_network_header(skb)))
+		return tcp_v4_do_rcv(sk, skb);
+	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+	return 0;
+}
+
+static int chtls_start_listen(struct sock *sk)
+{
+	int err;
+
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return -EPROTONOSUPPORT;
+
+	if (sk->sk_family == PF_INET &&
+	    LOOPBACK(inet_sk(sk)->inet_rcv_saddr))
+		return -EADDRNOTAVAIL;
+
+	sk->sk_backlog_rcv = listen_backlog_rcv;
+	mutex_lock(&notify_mutex);
+	err = raw_notifier_call_chain(&listen_notify_list,
+				      CHTLS_LISTEN_START, sk);
+	mutex_unlock(&notify_mutex);
+	return err;
+}
+
+static void chtls_stop_listen(struct sock *sk)
+{
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return;
+
+	mutex_lock(&notify_mutex);
+	raw_notifier_call_chain(&listen_notify_list,
+				CHTLS_LISTEN_STOP, sk);
+	mutex_unlock(&notify_mutex);
+}
+
+static int chtls_inline_feature(struct tls_device *dev)
+{
+	struct net_device *netdev;
+	struct chtls_dev *cdev;
+	int i;
+
+	cdev = to_chtls_dev(dev);
+
+	for (i = 0; i < cdev->lldi->nports; i++) {
+		netdev = cdev->ports[i];
+		if (netdev->features & NETIF_F_HW_TLS_RECORD)
+			return 1;
+	}
+	return 0;
+}
+
+static int chtls_create_hash(struct tls_device *dev, struct sock *sk)
+{
+	if (sk->sk_state == TCP_LISTEN)
+		return chtls_start_listen(sk);
+	return 0;
+}
+
+static void chtls_destroy_hash(struct tls_device *dev, struct sock *sk)
+{
+	if (sk->sk_state == TCP_LISTEN)
+		chtls_stop_listen(sk);
+}
+
+static void chtls_register_dev(struct chtls_dev *cdev)
+{
+	struct tls_device *tlsdev = &cdev->tlsdev;
+
+	strlcpy(tlsdev->name, "chtls", TLS_DEVICE_NAME_MAX);
+	strlcat(tlsdev->name, cdev->lldi->ports[0]->name,
+		TLS_DEVICE_NAME_MAX);
+	tlsdev->feature = chtls_inline_feature;
+	tlsdev->hash = chtls_create_hash;
+	tlsdev->unhash = chtls_destroy_hash;
+	tls_register_device(&cdev->tlsdev);
+}
+
+static void chtls_unregister_dev(struct chtls_dev *cdev)
+{
+	tls_unregister_device(&cdev->tlsdev);
+}
+
+static void process_deferq(struct work_struct *task_param)
+{
+	struct chtls_dev *cdev = container_of(task_param,
+				struct chtls_dev, deferq_task);
+	struct sk_buff *skb;
+
+	spin_lock_bh(&cdev->deferq.lock);
+	while ((skb = __skb_dequeue(&cdev->deferq)) != NULL) {
+		spin_unlock_bh(&cdev->deferq.lock);
+		DEFERRED_SKB_CB(skb)->handler(cdev, skb);
+		spin_lock_bh(&cdev->deferq.lock);
+	}
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
+static int chtls_get_skb(struct chtls_dev *cdev)
+{
+	cdev->askb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
+	if (!cdev->askb)
+		return -ENOMEM;
+
+	skb_put(cdev->askb, sizeof(struct tcphdr));
+	skb_reset_transport_header(cdev->askb);
+	memset(cdev->askb->data, 0, cdev->askb->len);
+	return 0;
+}
+
+static void *chtls_uld_add(const struct cxgb4_lld_info *info)
+{
+	struct cxgb4_lld_info *lldi;
+	struct chtls_dev *cdev;
+	int i, j;
+
+	cdev = kzalloc(sizeof(*cdev) + info->nports *
+		      (sizeof(struct net_device *)), GFP_KERNEL);
+	if (!cdev)
+		goto out;
+
+	lldi = kzalloc(sizeof(*lldi), GFP_KERNEL);
+	if (!lldi)
+		goto out_lldi;
+
+	if (chtls_get_skb(cdev))
+		goto out_skb;
+
+	*lldi = *info;
+	cdev->lldi = lldi;
+	cdev->pdev = lldi->pdev;
+	cdev->tids = lldi->tids;
+	cdev->ports = (struct net_device **)(cdev + 1);
+	cdev->ports = lldi->ports;
+	cdev->mtus = lldi->mtus;
+	cdev->tids = lldi->tids;
+	cdev->pfvf = FW_VIID_PFN_G(cxgb4_port_viid(lldi->ports[0]))
+			<< FW_VIID_PFN_S;
+
+	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++) {
+		unsigned int size = 64 - sizeof(struct rsp_ctrl) - 8;
+
+		cdev->rspq_skb_cache[i] = __alloc_skb(size,
+						      gfp_any(), 0,
+						      lldi->nodeid);
+		if (unlikely(!cdev->rspq_skb_cache[i]))
+			goto out_rspq_skb;
+	}
+
+	idr_init(&cdev->hwtid_idr);
+	INIT_WORK(&cdev->deferq_task, process_deferq);
+	spin_lock_init(&cdev->listen_lock);
+	spin_lock_init(&cdev->idr_lock);
+	cdev->send_page_order = min_t(uint, get_order(32768),
+				      send_page_order);
+
+	if (lldi->vr->key.size)
+		if (chtls_init_kmap(cdev, lldi))
+			goto out_rspq_skb;
+
+	mutex_lock(&cdev_mutex);
+	list_add_tail(&cdev->list, &cdev_list);
+	mutex_unlock(&cdev_mutex);
+
+	return cdev;
+out_rspq_skb:
+	for (j = 0; j <= i; j++)
+		kfree_skb(cdev->rspq_skb_cache[j]);
+	kfree_skb(cdev->askb);
+out_skb:
+	kfree(lldi);
+out_lldi:
+	kfree(cdev);
+out:
+	return NULL;
+}
+
+static void chtls_free_uld(struct chtls_dev *cdev)
+{
+	int i;
+
+	chtls_unregister_dev(cdev);
+	kvfree(cdev->kmap.addr);
+	idr_destroy(&cdev->hwtid_idr);
+	for (i = 0; i < (1 << RSPQ_HASH_BITS); i++)
+		kfree_skb(cdev->rspq_skb_cache[i]);
+	kfree(cdev->lldi);
+	if (cdev->askb)
+		kfree_skb(cdev->askb);
+	kfree(cdev);
+}
+
+static void chtls_free_all_uld(void)
+{
+	struct chtls_dev *cdev, *tmp;
+
+	mutex_lock(&cdev_mutex);
+	list_for_each_entry_safe(cdev, tmp, &cdev_list, list)
+		chtls_free_uld(cdev);
+	mutex_unlock(&cdev_mutex);
+}
+
+static int chtls_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+	struct chtls_dev *cdev = handle;
+
+	switch (new_state) {
+	case CXGB4_STATE_UP:
+		chtls_register_dev(cdev);
+		break;
+	case CXGB4_STATE_DOWN:
+		break;
+	case CXGB4_STATE_START_RECOVERY:
+		break;
+	case CXGB4_STATE_DETACH:
+		mutex_lock(&cdev_mutex);
+		list_del(&cdev->list);
+		mutex_unlock(&cdev_mutex);
+		chtls_free_uld(cdev);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
+					  const __be64 *rsp,
+					  u32 pktshift)
+{
+	struct sk_buff *skb;
+
+	/* Allocate space for cpl_pass_accpet_req which will be synthesized by
+	 * driver. Once driver synthesizes cpl_pass_accpet_req the skb will go
+	 * through the regular cpl_pass_accept_req processing in TOM.
+	 */
+	skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req)
+			- pktshift, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+	__skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req)
+		   - pktshift);
+	/* For now we will copy  cpl_rx_pkt in the skb */
+	skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_rx_pkt));
+	skb_copy_to_linear_data_offset(skb, sizeof(struct cpl_pass_accept_req)
+				       , gl->va + pktshift,
+				       gl->tot_len - pktshift);
+
+	return skb;
+}
+
+static int chtls_recv_packet(struct chtls_dev *cdev,
+			     const struct pkt_gl *gl, const __be64 *rsp)
+{
+	unsigned int opcode = *(u8 *)rsp;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = copy_gl_to_skb_pkt(gl, rsp, cdev->lldi->sge_pktshift);
+	if (!skb)
+		return -ENOMEM;
+
+	ret = chtls_handlers[opcode](cdev, skb);
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+
+	return 0;
+}
+
+static int chtls_recv_rsp(struct chtls_dev *cdev, const __be64 *rsp)
+{
+	unsigned long rspq_bin;
+	unsigned int opcode;
+	struct sk_buff *skb;
+	unsigned int len;
+	int ret;
+
+	len = 64 - sizeof(struct rsp_ctrl) - 8;
+	opcode = *(u8 *)rsp;
+
+	rspq_bin = hash_ptr((void *)rsp, RSPQ_HASH_BITS);
+	skb = cdev->rspq_skb_cache[rspq_bin];
+	if (skb && !skb_is_nonlinear(skb) &&
+	    !skb_shared(skb) && !skb_cloned(skb)) {
+		refcount_inc(&skb->users);
+		if (refcount_read(&skb->users) == 2) {
+			__skb_trim(skb, 0);
+			if (skb_tailroom(skb) >= len)
+				goto copy_out;
+		}
+		refcount_dec(&skb->users);
+	}
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+copy_out:
+	__skb_put(skb, len);
+	skb_copy_to_linear_data(skb, rsp, len);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	ret = chtls_handlers[opcode](cdev, skb);
+
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+	return 0;
+}
+
+static void chtls_recv(struct chtls_dev *cdev,
+		       struct sk_buff **skbs, const __be64 *rsp)
+{
+	struct sk_buff *skb = *skbs;
+	unsigned int opcode;
+	int ret;
+
+	opcode = *(u8 *)rsp;
+
+	__skb_push(skb, sizeof(struct rss_header));
+	skb_copy_to_linear_data(skb, rsp, sizeof(struct rss_header));
+
+	ret = chtls_handlers[opcode](cdev, skb);
+	if (ret & CPL_RET_BUF_DONE)
+		kfree_skb(skb);
+}
+
+static int chtls_uld_rx_handler(void *handle, const __be64 *rsp,
+				const struct pkt_gl *gl)
+{
+	struct chtls_dev *cdev = handle;
+	unsigned int opcode;
+	struct sk_buff *skb;
+
+	opcode = *(u8 *)rsp;
+
+	if (unlikely(opcode == CPL_RX_PKT)) {
+		if (chtls_recv_packet(cdev, gl, rsp) < 0)
+			goto nomem;
+		return 0;
+	}
+
+	if (!gl)
+		return chtls_recv_rsp(cdev, rsp);
+
+#define RX_PULL_LEN 128
+	skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
+	if (unlikely(!skb))
+		goto nomem;
+	chtls_recv(cdev, &skb, rsp);
+	return 0;
+
+nomem:
+	return -ENOMEM;
+}
+
+static int do_chtls_getsockopt(struct sock *sk, char __user *optval,
+			       int __user *optlen)
+{
+	struct tls_crypto_info crypto_info;
+
+	crypto_info.version = TLS_1_2_VERSION;
+	if (copy_to_user(optval, &crypto_info, sizeof(struct tls_crypto_info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int chtls_getsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, int __user *optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->getsockopt(sk, level, optname, optval, optlen);
+
+	return do_chtls_getsockopt(sk, optval, optlen);
+}
+
+static int do_chtls_setsockopt(struct sock *sk, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct tls_crypto_info *crypto_info, tmp_crypto_info;
+	struct chtls_sock *csk;
+	int keylen;
+	int rc = 0;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	if (!optval || optlen < sizeof(*crypto_info)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info));
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/* check version */
+	if (tmp_crypto_info.version != TLS_1_2_VERSION) {
+		rc = -ENOTSUPP;
+		goto out;
+	}
+
+	crypto_info = (struct tls_crypto_info *)&csk->tlshws.crypto_info;
+
+	switch (tmp_crypto_info.cipher_type) {
+	case TLS_CIPHER_AES_GCM_128: {
+		rc = copy_from_user(crypto_info, optval,
+				    sizeof(struct
+					   tls12_crypto_info_aes_gcm_128));
+
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		keylen = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		rc = chtls_setkey(csk, keylen, optname);
+		break;
+	}
+	default:
+		rc = -EINVAL;
+		goto out;
+	}
+out:
+	return rc;
+}
+
+static int chtls_setsockopt(struct sock *sk, int level, int optname,
+			    char __user *optval, unsigned int optlen)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (level != SOL_TLS)
+		return ctx->setsockopt(sk, level, optname, optval, optlen);
+
+	return do_chtls_setsockopt(sk, optname, optval, optlen);
+}
+
+static struct cxgb4_uld_info chtls_uld_info = {
+	.name = DRV_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.ntxq = MAX_ULD_QSETS,
+	.rxq_size = 1024,
+	.add = chtls_uld_add,
+	.state_change = chtls_uld_state_change,
+	.rx_handler = chtls_uld_rx_handler,
+};
+
+void chtls_install_cpl_ops(struct sock *sk)
+{
+	sk->sk_prot = &chtls_cpl_prot;
+}
+
+static void __init chtls_init_ulp_ops(void)
+{
+	chtls_cpl_prot			= tcp_prot;
+	chtls_init_rsk_ops(&chtls_cpl_prot, &chtls_rsk_ops,
+			   &tcp_prot, PF_INET);
+	chtls_cpl_prot.close		= chtls_close;
+	chtls_cpl_prot.disconnect	= chtls_disconnect;
+	chtls_cpl_prot.destroy		= chtls_destroy_sock;
+	chtls_cpl_prot.shutdown		= chtls_shutdown;
+	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
+	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
+}
+
+static int __init chtls_register(void)
+{
+	chtls_init_ulp_ops();
+	register_listen_notifier(&listen_notifier);
+	cxgb4_register_uld(CXGB4_ULD_TLS, &chtls_uld_info);
+	return 0;
+}
+
+static void __exit chtls_unregister(void)
+{
+	unregister_listen_notifier(&listen_notifier);
+	chtls_free_all_uld();
+	cxgb4_unregister_uld(CXGB4_ULD_TLS);
+}
+
+module_init(chtls_register);
+module_exit(chtls_unregister);
+
+MODULE_DESCRIPTION("Chelsio TLS Inline driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(DRV_VERSION);

From cc35c88ae4db219611e204375d6a4248bc0e84d6 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:41:59 +0530
Subject: [PATCH 1625/1678] crypto : chtls - CPL handler definition

Exchange messages with hardware to program the TLS session
CPL handlers for messages received from chip.

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: Michael Werner <werner@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_cm.c | 2126 +++++++++++++++++++++++
 net/ipv4/tcp_minisocks.c                |    1 +
 2 files changed, 2127 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_cm.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
new file mode 100644
index 000000000000..82a473a0cefa
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -0,0 +1,2126 @@
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sched/signal.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/if_vlan.h>
+#include <net/tcp.h>
+#include <net/dst.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+/*
+ * State transitions and actions for close.  Note that if we are in SYN_SENT
+ * we remain in that state as we cannot control a connection while it's in
+ * SYN_SENT; such connections are allowed to establish and are then aborted.
+ */
+static unsigned char new_state[16] = {
+	/* current state:     new state:      action: */
+	/* (Invalid)       */ TCP_CLOSE,
+	/* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+	/* TCP_SYN_SENT    */ TCP_SYN_SENT,
+	/* TCP_SYN_RECV    */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
+	/* TCP_FIN_WAIT1   */ TCP_FIN_WAIT1,
+	/* TCP_FIN_WAIT2   */ TCP_FIN_WAIT2,
+	/* TCP_TIME_WAIT   */ TCP_CLOSE,
+	/* TCP_CLOSE       */ TCP_CLOSE,
+	/* TCP_CLOSE_WAIT  */ TCP_LAST_ACK | TCP_ACTION_FIN,
+	/* TCP_LAST_ACK    */ TCP_LAST_ACK,
+	/* TCP_LISTEN      */ TCP_CLOSE,
+	/* TCP_CLOSING     */ TCP_CLOSING,
+};
+
+static struct chtls_sock *chtls_sock_create(struct chtls_dev *cdev)
+{
+	struct chtls_sock *csk = kzalloc(sizeof(*csk), GFP_ATOMIC);
+
+	if (!csk)
+		return NULL;
+
+	csk->txdata_skb_cache = alloc_skb(TXDATA_SKB_LEN, GFP_ATOMIC);
+	if (!csk->txdata_skb_cache) {
+		kfree(csk);
+		return NULL;
+	}
+
+	kref_init(&csk->kref);
+	csk->cdev = cdev;
+	skb_queue_head_init(&csk->txq);
+	csk->wr_skb_head = NULL;
+	csk->wr_skb_tail = NULL;
+	csk->mss = MAX_MSS;
+	csk->tlshws.ofld = 1;
+	csk->tlshws.txkey = -1;
+	csk->tlshws.rxkey = -1;
+	csk->tlshws.mfs = TLS_MFS;
+	skb_queue_head_init(&csk->tlshws.sk_recv_queue);
+	return csk;
+}
+
+static void chtls_sock_release(struct kref *ref)
+{
+	struct chtls_sock *csk =
+		container_of(ref, struct chtls_sock, kref);
+
+	kfree(csk);
+}
+
+static struct net_device *chtls_ipv4_netdev(struct chtls_dev *cdev,
+					    struct sock *sk)
+{
+	struct net_device *ndev = cdev->ports[0];
+
+	if (likely(!inet_sk(sk)->inet_rcv_saddr))
+		return ndev;
+
+	ndev = ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr);
+	if (!ndev)
+		return NULL;
+
+	if (is_vlan_dev(ndev))
+		return vlan_dev_real_dev(ndev);
+	return ndev;
+}
+
+static void assign_rxopt(struct sock *sk, unsigned int opt)
+{
+	const struct chtls_dev *cdev;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	cdev = csk->cdev;
+	tp->tcp_header_len           = sizeof(struct tcphdr);
+	tp->rx_opt.mss_clamp         = cdev->mtus[TCPOPT_MSS_G(opt)] - 40;
+	tp->mss_cache                = tp->rx_opt.mss_clamp;
+	tp->rx_opt.tstamp_ok         = TCPOPT_TSTAMP_G(opt);
+	tp->rx_opt.snd_wscale        = TCPOPT_SACK_G(opt);
+	tp->rx_opt.wscale_ok         = TCPOPT_WSCALE_OK_G(opt);
+	SND_WSCALE(tp)               = TCPOPT_SND_WSCALE_G(opt);
+	if (!tp->rx_opt.wscale_ok)
+		tp->rx_opt.rcv_wscale = 0;
+	if (tp->rx_opt.tstamp_ok) {
+		tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+		tp->rx_opt.mss_clamp -= TCPOLEN_TSTAMP_ALIGNED;
+	} else if (csk->opt2 & TSTAMPS_EN_F) {
+		csk->opt2 &= ~TSTAMPS_EN_F;
+		csk->mtu_idx = TCPOPT_MSS_G(opt);
+	}
+}
+
+static void chtls_purge_receive_queue(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		skb_dst_set(skb, (void *)NULL);
+		kfree_skb(skb);
+	}
+}
+
+static void chtls_purge_write_queue(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&csk->txq))) {
+		sk->sk_wmem_queued -= skb->truesize;
+		__kfree_skb(skb);
+	}
+}
+
+static void chtls_purge_recv_queue(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *tlsk = &csk->tlshws;
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) {
+		skb_dst_set(skb, NULL);
+		kfree_skb(skb);
+	}
+}
+
+static void abort_arp_failure(void *handle, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req = cplhdr(skb);
+	struct chtls_dev *cdev;
+
+	cdev = (struct chtls_dev *)handle;
+	req->cmd = CPL_ABORT_NO_RST;
+	cxgb4_ofld_send(cdev->lldi->ports[0], skb);
+}
+
+static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len)
+{
+	if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) {
+		__skb_trim(skb, 0);
+		refcount_add(2, &skb->users);
+	} else {
+		skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+	}
+	return skb;
+}
+
+static void chtls_send_abort(struct sock *sk, int mode, struct sk_buff *skb)
+{
+	struct cpl_abort_req *req;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	if (!skb)
+		skb = alloc_ctrl_skb(csk->txdata_skb_cache, sizeof(*req));
+
+	req = (struct cpl_abort_req *)skb_put(skb, sizeof(*req));
+	INIT_TP_WR_CPL(req, CPL_ABORT_REQ, csk->tid);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+	req->rsvd0 = htonl(tp->snd_nxt);
+	req->rsvd1 = !csk_flag_nochk(csk, CSK_TX_DATA_SENT);
+	req->cmd = mode;
+	t4_set_arp_err_handler(skb, csk->cdev, abort_arp_failure);
+	send_or_defer(sk, tp, skb, mode == CPL_ABORT_SEND_RST);
+}
+
+static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (unlikely(csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) ||
+		     !csk->cdev)) {
+		if (sk->sk_state == TCP_SYN_RECV)
+			csk_set_flag(csk, CSK_RST_ABORTED);
+		goto out;
+	}
+
+	if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
+			WARN_ONCE(1, "send tx flowc error");
+		csk_set_flag(csk, CSK_TX_DATA_SENT);
+	}
+
+	csk_set_flag(csk, CSK_ABORT_RPL_PENDING);
+	chtls_purge_write_queue(sk);
+
+	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
+	if (sk->sk_state != TCP_SYN_RECV)
+		chtls_send_abort(sk, mode, skb);
+	else
+		goto out;
+
+	return;
+out:
+	if (skb)
+		kfree_skb(skb);
+}
+
+static void release_tcp_port(struct sock *sk)
+{
+	if (inet_csk(sk)->icsk_bind_hash)
+		inet_put_port(sk);
+}
+
+static void tcp_uncork(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (tp->nonagle & TCP_NAGLE_CORK) {
+		tp->nonagle &= ~TCP_NAGLE_CORK;
+		chtls_tcp_push(sk, 0);
+	}
+}
+
+static void chtls_close_conn(struct sock *sk)
+{
+	struct cpl_close_con_req *req;
+	struct chtls_sock *csk;
+	struct sk_buff *skb;
+	unsigned int tid;
+	unsigned int len;
+
+	len = roundup(sizeof(struct cpl_close_con_req), 16);
+	csk = rcu_dereference_sk_user_data(sk);
+	tid = csk->tid;
+
+	skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL);
+	req = (struct cpl_close_con_req *)__skb_put(skb, len);
+	memset(req, 0, len);
+	req->wr.wr_hi = htonl(FW_WR_OP_V(FW_TP_WR) |
+			      FW_WR_IMMDLEN_V(sizeof(*req) -
+					      sizeof(req->wr)));
+	req->wr.wr_mid = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)) |
+			       FW_WR_FLOWID_V(tid));
+
+	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
+
+	tcp_uncork(sk);
+	skb_entail(sk, skb, ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+	if (sk->sk_state != TCP_SYN_SENT)
+		chtls_push_frames(csk, 1);
+}
+
+/*
+ * Perform a state transition during close and return the actions indicated
+ * for the transition.  Do not make this function inline, the main reason
+ * it exists at all is to avoid multiple inlining of tcp_set_state.
+ */
+static int make_close_transition(struct sock *sk)
+{
+	int next = (int)new_state[sk->sk_state];
+
+	tcp_set_state(sk, next & TCP_STATE_MASK);
+	return next & TCP_ACTION_FIN;
+}
+
+void chtls_close(struct sock *sk, long timeout)
+{
+	int data_lost, prev_state;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	lock_sock(sk);
+	sk->sk_shutdown |= SHUTDOWN_MASK;
+
+	data_lost = skb_queue_len(&sk->sk_receive_queue);
+	data_lost |= skb_queue_len(&csk->tlshws.sk_recv_queue);
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+
+	if (sk->sk_state == TCP_CLOSE) {
+		goto wait;
+	} else if (data_lost || sk->sk_state == TCP_SYN_SENT) {
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+		release_tcp_port(sk);
+		goto unlock;
+	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+		sk->sk_prot->disconnect(sk, 0);
+	} else if (make_close_transition(sk)) {
+		chtls_close_conn(sk);
+	}
+wait:
+	if (timeout)
+		sk_stream_wait_close(sk, timeout);
+
+unlock:
+	prev_state = sk->sk_state;
+	sock_hold(sk);
+	sock_orphan(sk);
+
+	release_sock(sk);
+
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (prev_state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
+		goto out;
+
+	if (sk->sk_state == TCP_FIN_WAIT2 && tcp_sk(sk)->linger2 < 0 &&
+	    !csk_flag(sk, CSK_ABORT_SHUTDOWN)) {
+		struct sk_buff *skb;
+
+		skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+		if (skb)
+			chtls_send_reset(sk, CPL_ABORT_SEND_RST, skb);
+	}
+
+	if (sk->sk_state == TCP_CLOSE)
+		inet_csk_destroy_sock(sk);
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	sock_put(sk);
+}
+
+/*
+ * Wait until a socket enters on of the given states.
+ */
+static int wait_for_states(struct sock *sk, unsigned int states)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct socket_wq _sk_wq;
+	long current_timeo;
+	int err = 0;
+
+	current_timeo = 200;
+
+	/*
+	 * We want this to work even when there's no associated struct socket.
+	 * In that case we provide a temporary wait_queue_head_t.
+	 */
+	if (!sk->sk_wq) {
+		init_waitqueue_head(&_sk_wq.wait);
+		_sk_wq.fasync_list = NULL;
+		init_rcu_head_on_stack(&_sk_wq.rcu);
+		RCU_INIT_POINTER(sk->sk_wq, &_sk_wq);
+	}
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	while (!sk_in_state(sk, states)) {
+		if (!current_timeo) {
+			err = -EBUSY;
+			break;
+		}
+		if (signal_pending(current)) {
+			err = sock_intr_errno(current_timeo);
+			break;
+		}
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		release_sock(sk);
+		if (!sk_in_state(sk, states))
+			current_timeo = schedule_timeout(current_timeo);
+		__set_current_state(TASK_RUNNING);
+		lock_sock(sk);
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	if (rcu_dereference(sk->sk_wq) == &_sk_wq)
+		sk->sk_wq = NULL;
+	return err;
+}
+
+int chtls_disconnect(struct sock *sk, int flags)
+{
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+	int err;
+
+	tp = tcp_sk(sk);
+	csk = rcu_dereference_sk_user_data(sk);
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+	chtls_purge_write_queue(sk);
+
+	if (sk->sk_state != TCP_CLOSE) {
+		sk->sk_err = ECONNRESET;
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, NULL);
+		err = wait_for_states(sk, TCPF_CLOSE);
+		if (err)
+			return err;
+	}
+	chtls_purge_recv_queue(sk);
+	chtls_purge_receive_queue(sk);
+	tp->max_window = 0xFFFF << (tp->rx_opt.snd_wscale);
+	return tcp_disconnect(sk, flags);
+}
+
+#define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
+				 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
+void chtls_shutdown(struct sock *sk, int how)
+{
+	if ((how & SEND_SHUTDOWN) &&
+	    sk_in_state(sk, SHUTDOWN_ELIGIBLE_STATE) &&
+	    make_close_transition(sk))
+		chtls_close_conn(sk);
+}
+
+void chtls_destroy_sock(struct sock *sk)
+{
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	chtls_purge_recv_queue(sk);
+	csk->ulp_mode = ULP_MODE_NONE;
+	chtls_purge_write_queue(sk);
+	free_tls_keyid(sk);
+	kref_put(&csk->kref, chtls_sock_release);
+	sk->sk_prot = &tcp_prot;
+	sk->sk_prot->destroy(sk);
+}
+
+static void reset_listen_child(struct sock *child)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(child);
+	struct sk_buff *skb;
+
+	skb = alloc_ctrl_skb(csk->txdata_skb_cache,
+			     sizeof(struct cpl_abort_req));
+
+	chtls_send_reset(child, CPL_ABORT_SEND_RST, skb);
+	sock_orphan(child);
+	INC_ORPHAN_COUNT(child);
+	if (child->sk_state == TCP_CLOSE)
+		inet_csk_destroy_sock(child);
+}
+
+static void chtls_disconnect_acceptq(struct sock *listen_sk)
+{
+	struct request_sock **pprev;
+
+	pprev = ACCEPT_QUEUE(listen_sk);
+	while (*pprev) {
+		struct request_sock *req = *pprev;
+
+		if (req->rsk_ops == &chtls_rsk_ops) {
+			struct sock *child = req->sk;
+
+			*pprev = req->dl_next;
+			sk_acceptq_removed(listen_sk);
+			reqsk_put(req);
+			sock_hold(child);
+			local_bh_disable();
+			bh_lock_sock(child);
+			release_tcp_port(child);
+			reset_listen_child(child);
+			bh_unlock_sock(child);
+			local_bh_enable();
+			sock_put(child);
+		} else {
+			pprev = &req->dl_next;
+		}
+	}
+}
+
+static int listen_hashfn(const struct sock *sk)
+{
+	return ((unsigned long)sk >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
+}
+
+static struct listen_info *listen_hash_add(struct chtls_dev *cdev,
+					   struct sock *sk,
+					   unsigned int stid)
+{
+	struct listen_info *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (p) {
+		int key = listen_hashfn(sk);
+
+		p->sk = sk;
+		p->stid = stid;
+		spin_lock(&cdev->listen_lock);
+		p->next = cdev->listen_hash_tab[key];
+		cdev->listen_hash_tab[key] = p;
+		spin_unlock(&cdev->listen_lock);
+	}
+	return p;
+}
+
+static int listen_hash_find(struct chtls_dev *cdev,
+			    struct sock *sk)
+{
+	struct listen_info *p;
+	int stid = -1;
+	int key;
+
+	key = listen_hashfn(sk);
+
+	spin_lock(&cdev->listen_lock);
+	for (p = cdev->listen_hash_tab[key]; p; p = p->next)
+		if (p->sk == sk) {
+			stid = p->stid;
+			break;
+		}
+	spin_unlock(&cdev->listen_lock);
+	return stid;
+}
+
+static int listen_hash_del(struct chtls_dev *cdev,
+			   struct sock *sk)
+{
+	struct listen_info *p, **prev;
+	int stid = -1;
+	int key;
+
+	key = listen_hashfn(sk);
+	prev = &cdev->listen_hash_tab[key];
+
+	spin_lock(&cdev->listen_lock);
+	for (p = *prev; p; prev = &p->next, p = p->next)
+		if (p->sk == sk) {
+			stid = p->stid;
+			*prev = p->next;
+			kfree(p);
+			break;
+		}
+	spin_unlock(&cdev->listen_lock);
+	return stid;
+}
+
+static void cleanup_syn_rcv_conn(struct sock *child, struct sock *parent)
+{
+	struct request_sock *req;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(child);
+	req = csk->passive_reap_next;
+
+	reqsk_queue_removed(&inet_csk(parent)->icsk_accept_queue, req);
+	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
+	chtls_reqsk_free(req);
+	csk->passive_reap_next = NULL;
+}
+
+static void chtls_reset_synq(struct listen_ctx *listen_ctx)
+{
+	struct sock *listen_sk = listen_ctx->lsk;
+
+	while (!skb_queue_empty(&listen_ctx->synq)) {
+		struct chtls_sock *csk =
+			container_of((struct synq *)__skb_dequeue
+				(&listen_ctx->synq), struct chtls_sock, synq);
+		struct sock *child = csk->sk;
+
+		cleanup_syn_rcv_conn(child, listen_sk);
+		sock_hold(child);
+		local_bh_disable();
+		bh_lock_sock(child);
+		release_tcp_port(child);
+		reset_listen_child(child);
+		bh_unlock_sock(child);
+		local_bh_enable();
+		sock_put(child);
+	}
+}
+
+int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct net_device *ndev;
+	struct listen_ctx *ctx;
+	struct adapter *adap;
+	struct port_info *pi;
+	int stid;
+	int ret;
+
+	if (sk->sk_family != PF_INET)
+		return -EAGAIN;
+
+	rcu_read_lock();
+	ndev = chtls_ipv4_netdev(cdev, sk);
+	rcu_read_unlock();
+	if (!ndev)
+		return -EBADF;
+
+	pi = netdev_priv(ndev);
+	adap = pi->adapter;
+	if (!(adap->flags & FULL_INIT_DONE))
+		return -EBADF;
+
+	if (listen_hash_find(cdev, sk) >= 0)   /* already have it */
+		return -EADDRINUSE;
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	__module_get(THIS_MODULE);
+	ctx->lsk = sk;
+	ctx->cdev = cdev;
+	ctx->state = T4_LISTEN_START_PENDING;
+	skb_queue_head_init(&ctx->synq);
+
+	stid = cxgb4_alloc_stid(cdev->tids, sk->sk_family, ctx);
+	if (stid < 0)
+		goto free_ctx;
+
+	sock_hold(sk);
+	if (!listen_hash_add(cdev, sk, stid))
+		goto free_stid;
+
+	ret = cxgb4_create_server(ndev, stid,
+				  inet_sk(sk)->inet_rcv_saddr,
+				  inet_sk(sk)->inet_sport, 0,
+				  cdev->lldi->rxq_ids[0]);
+	if (ret > 0)
+		ret = net_xmit_errno(ret);
+	if (ret)
+		goto del_hash;
+	return 0;
+del_hash:
+	listen_hash_del(cdev, sk);
+free_stid:
+	cxgb4_free_stid(cdev->tids, stid, sk->sk_family);
+	sock_put(sk);
+free_ctx:
+	kfree(ctx);
+	module_put(THIS_MODULE);
+	return -EBADF;
+}
+
+void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
+{
+	struct listen_ctx *listen_ctx;
+	int stid;
+
+	stid = listen_hash_del(cdev, sk);
+	if (stid < 0)
+		return;
+
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	chtls_reset_synq(listen_ctx);
+
+	cxgb4_remove_server(cdev->lldi->ports[0], stid,
+			    cdev->lldi->rxq_ids[0], 0);
+	chtls_disconnect_acceptq(sk);
+}
+
+static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_open_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int stid = GET_TID(rpl);
+	struct listen_ctx *listen_ctx;
+
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	if (!listen_ctx)
+		return CPL_RET_BUF_DONE;
+
+	if (listen_ctx->state == T4_LISTEN_START_PENDING) {
+		listen_ctx->state = T4_LISTEN_STARTED;
+		return CPL_RET_BUF_DONE;
+	}
+
+	if (rpl->status != CPL_ERR_NONE) {
+		pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
+			rpl->status, stid);
+		return CPL_RET_BUF_DONE;
+	}
+	cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+	sock_put(listen_ctx->lsk);
+	kfree(listen_ctx);
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	struct listen_ctx *listen_ctx;
+	unsigned int stid;
+	void *data;
+
+	stid = GET_TID(rpl);
+	data = lookup_stid(cdev->tids, stid);
+	listen_ctx = (struct listen_ctx *)data;
+
+	if (rpl->status != CPL_ERR_NONE) {
+		pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
+			rpl->status, stid);
+		return CPL_RET_BUF_DONE;
+	}
+
+	cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
+	sock_put(listen_ctx->lsk);
+	kfree(listen_ctx);
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static void chtls_release_resources(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	unsigned int tid = csk->tid;
+	struct tid_info *tids;
+
+	if (!cdev)
+		return;
+
+	tids = cdev->tids;
+	kfree_skb(csk->txdata_skb_cache);
+	csk->txdata_skb_cache = NULL;
+
+	if (csk->l2t_entry) {
+		cxgb4_l2t_release(csk->l2t_entry);
+		csk->l2t_entry = NULL;
+	}
+
+	cxgb4_remove_tid(tids, csk->port_id, tid, sk->sk_family);
+	sock_put(sk);
+}
+
+static void chtls_conn_done(struct sock *sk)
+{
+	if (sock_flag(sk, SOCK_DEAD))
+		chtls_purge_receive_queue(sk);
+	sk_wakeup_sleepers(sk, 0);
+	tcp_done(sk);
+}
+
+static void do_abort_syn_rcv(struct sock *child, struct sock *parent)
+{
+	/*
+	 * If the server is still open we clean up the child connection,
+	 * otherwise the server already did the clean up as it was purging
+	 * its SYN queue and the skb was just sitting in its backlog.
+	 */
+	if (likely(parent->sk_state == TCP_LISTEN)) {
+		cleanup_syn_rcv_conn(child, parent);
+		/* Without the below call to sock_orphan,
+		 * we leak the socket resource with syn_flood test
+		 * as inet_csk_destroy_sock will not be called
+		 * in tcp_done since SOCK_DEAD flag is not set.
+		 * Kernel handles this differently where new socket is
+		 * created only after 3 way handshake is done.
+		 */
+		sock_orphan(child);
+		percpu_counter_inc((child)->sk_prot->orphan_count);
+		chtls_release_resources(child);
+		chtls_conn_done(child);
+	} else {
+		if (csk_flag(child, CSK_RST_ABORTED)) {
+			chtls_release_resources(child);
+			chtls_conn_done(child);
+		}
+	}
+}
+
+static void pass_open_abort(struct sock *child, struct sock *parent,
+			    struct sk_buff *skb)
+{
+	do_abort_syn_rcv(child, parent);
+	kfree_skb(skb);
+}
+
+static void bl_pass_open_abort(struct sock *lsk, struct sk_buff *skb)
+{
+	pass_open_abort(skb->sk, lsk, skb);
+}
+
+static void chtls_pass_open_arp_failure(struct sock *sk,
+					struct sk_buff *skb)
+{
+	const struct request_sock *oreq;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct sock *parent;
+	void *data;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+
+	/*
+	 * If the connection is being aborted due to the parent listening
+	 * socket going away there's nothing to do, the ABORT_REQ will close
+	 * the connection.
+	 */
+	if (csk_flag(sk, CSK_ABORT_RPL_PENDING)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	oreq = csk->passive_reap_next;
+	data = lookup_stid(cdev->tids, oreq->ts_recent);
+	parent = ((struct listen_ctx *)data)->lsk;
+
+	bh_lock_sock(parent);
+	if (!sock_owned_by_user(parent)) {
+		pass_open_abort(sk, parent, skb);
+	} else {
+		BLOG_SKB_CB(skb)->backlog_rcv = bl_pass_open_abort;
+		__sk_add_backlog(parent, skb);
+	}
+	bh_unlock_sock(parent);
+}
+
+static void chtls_accept_rpl_arp_failure(void *handle,
+					 struct sk_buff *skb)
+{
+	struct sock *sk = (struct sock *)handle;
+
+	sock_hold(sk);
+	process_cpl_msg(chtls_pass_open_arp_failure, sk, skb);
+	sock_put(sk);
+}
+
+static unsigned int chtls_select_mss(const struct chtls_sock *csk,
+				     unsigned int pmtu,
+				     struct cpl_pass_accept_req *req)
+{
+	struct chtls_dev *cdev;
+	struct dst_entry *dst;
+	unsigned int tcpoptsz;
+	unsigned int iphdrsz;
+	unsigned int mtu_idx;
+	struct tcp_sock *tp;
+	unsigned int mss;
+	struct sock *sk;
+
+	mss = ntohs(req->tcpopt.mss);
+	sk = csk->sk;
+	dst = __sk_dst_get(sk);
+	cdev = csk->cdev;
+	tp = tcp_sk(sk);
+	tcpoptsz = 0;
+
+	iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
+	if (req->tcpopt.tstamp)
+		tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
+
+	tp->advmss = dst_metric_advmss(dst);
+	if (USER_MSS(tp) && tp->advmss > USER_MSS(tp))
+		tp->advmss = USER_MSS(tp);
+	if (tp->advmss > pmtu - iphdrsz)
+		tp->advmss = pmtu - iphdrsz;
+	if (mss && tp->advmss > mss)
+		tp->advmss = mss;
+
+	tp->advmss = cxgb4_best_aligned_mtu(cdev->lldi->mtus,
+					    iphdrsz + tcpoptsz,
+					    tp->advmss - tcpoptsz,
+					    8, &mtu_idx);
+	tp->advmss -= iphdrsz;
+
+	inet_csk(sk)->icsk_pmtu_cookie = pmtu;
+	return mtu_idx;
+}
+
+static unsigned int select_rcv_wnd(struct chtls_sock *csk)
+{
+	unsigned int rcvwnd;
+	unsigned int wnd;
+	struct sock *sk;
+
+	sk = csk->sk;
+	wnd = tcp_full_space(sk);
+
+	if (wnd < MIN_RCV_WND)
+		wnd = MIN_RCV_WND;
+
+	rcvwnd = MAX_RCV_WND;
+
+	csk_set_flag(csk, CSK_UPDATE_RCV_WND);
+	return min(wnd, rcvwnd);
+}
+
+static unsigned int select_rcv_wscale(int space, int wscale_ok, int win_clamp)
+{
+	int wscale = 0;
+
+	if (space > MAX_RCV_WND)
+		space = MAX_RCV_WND;
+	if (win_clamp && win_clamp < space)
+		space = win_clamp;
+
+	if (wscale_ok) {
+		while (wscale < 14 && (65535 << wscale) < space)
+			wscale++;
+	}
+	return wscale;
+}
+
+static void chtls_pass_accept_rpl(struct sk_buff *skb,
+				  struct cpl_pass_accept_req *req,
+				  unsigned int tid)
+
+{
+	struct cpl_t5_pass_accept_rpl *rpl5;
+	struct cxgb4_lld_info *lldi;
+	const struct tcphdr *tcph;
+	const struct tcp_sock *tp;
+	struct chtls_sock *csk;
+	unsigned int len;
+	struct sock *sk;
+	u32 opt2, hlen;
+	u64 opt0;
+
+	sk = skb->sk;
+	tp = tcp_sk(sk);
+	csk = sk->sk_user_data;
+	csk->tid = tid;
+	lldi = csk->cdev->lldi;
+	len = roundup(sizeof(*rpl5), 16);
+
+	rpl5 = __skb_put_zero(skb, len);
+	INIT_TP_WR(rpl5, tid);
+
+	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+						     csk->tid));
+	csk->mtu_idx = chtls_select_mss(csk, dst_mtu(__sk_dst_get(sk)),
+					req);
+	opt0 = TCAM_BYPASS_F |
+	       WND_SCALE_V((tp)->rx_opt.rcv_wscale) |
+	       MSS_IDX_V(csk->mtu_idx) |
+	       L2T_IDX_V(csk->l2t_entry->idx) |
+	       NAGLE_V(!(tp->nonagle & TCP_NAGLE_OFF)) |
+	       TX_CHAN_V(csk->tx_chan) |
+	       SMAC_SEL_V(csk->smac_idx) |
+	       DSCP_V(csk->tos >> 2) |
+	       ULP_MODE_V(ULP_MODE_TLS) |
+	       RCV_BUFSIZ_V(min(tp->rcv_wnd >> 10, RCV_BUFSIZ_M));
+
+	opt2 = RX_CHANNEL_V(0) |
+		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
+
+	if (!is_t5(lldi->adapter_type))
+		opt2 |= RX_FC_DISABLE_F;
+	if (req->tcpopt.tstamp)
+		opt2 |= TSTAMPS_EN_F;
+	if (req->tcpopt.sack)
+		opt2 |= SACK_EN_F;
+	hlen = ntohl(req->hdr_len);
+
+	tcph = (struct tcphdr *)((u8 *)(req + 1) +
+			T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
+	if (tcph->ece && tcph->cwr)
+		opt2 |= CCTRL_ECN_V(1);
+	opt2 |= CONG_CNTRL_V(CONG_ALG_NEWRENO);
+	opt2 |= T5_ISS_F;
+	opt2 |= T5_OPT_2_VALID_F;
+	rpl5->opt0 = cpu_to_be64(opt0);
+	rpl5->opt2 = cpu_to_be32(opt2);
+	rpl5->iss = cpu_to_be32((prandom_u32() & ~7UL) - 1);
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, csk->port_id);
+	t4_set_arp_err_handler(skb, sk, chtls_accept_rpl_arp_failure);
+	cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+}
+
+static void inet_inherit_port(struct inet_hashinfo *hash_info,
+			      struct sock *lsk, struct sock *newsk)
+{
+	local_bh_disable();
+	__inet_inherit_port(lsk, newsk);
+	local_bh_enable();
+}
+
+static int chtls_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb->protocol) {
+		kfree_skb(skb);
+		return 0;
+	}
+	BLOG_SKB_CB(skb)->backlog_rcv(sk, skb);
+	return 0;
+}
+
+static struct sock *chtls_recv_sock(struct sock *lsk,
+				    struct request_sock *oreq,
+				    void *network_hdr,
+				    const struct cpl_pass_accept_req *req,
+				    struct chtls_dev *cdev)
+{
+	const struct tcphdr *tcph;
+	struct inet_sock *newinet;
+	const struct iphdr *iph;
+	struct net_device *ndev;
+	struct chtls_sock *csk;
+	struct dst_entry *dst;
+	struct neighbour *n;
+	struct tcp_sock *tp;
+	struct sock *newsk;
+	u16 port_id;
+	int rxq_idx;
+	int step;
+
+	iph = (const struct iphdr *)network_hdr;
+	newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb);
+	if (!newsk)
+		goto free_oreq;
+
+	dst = inet_csk_route_child_sock(lsk, newsk, oreq);
+	if (!dst)
+		goto free_sk;
+
+	tcph = (struct tcphdr *)(iph + 1);
+	n = dst_neigh_lookup(dst, &iph->saddr);
+	if (!n)
+		goto free_sk;
+
+	ndev = n->dev;
+	if (!ndev)
+		goto free_dst;
+	port_id = cxgb4_port_idx(ndev);
+
+	csk = chtls_sock_create(cdev);
+	if (!csk)
+		goto free_dst;
+
+	csk->l2t_entry = cxgb4_l2t_get(cdev->lldi->l2t, n, ndev, 0);
+	if (!csk->l2t_entry)
+		goto free_csk;
+
+	newsk->sk_user_data = csk;
+	newsk->sk_backlog_rcv = chtls_backlog_rcv;
+
+	tp = tcp_sk(newsk);
+	newinet = inet_sk(newsk);
+
+	newinet->inet_daddr = iph->saddr;
+	newinet->inet_rcv_saddr = iph->daddr;
+	newinet->inet_saddr = iph->daddr;
+
+	oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	sk_setup_caps(newsk, dst);
+	csk->sk = newsk;
+	csk->passive_reap_next = oreq;
+	csk->tx_chan = cxgb4_port_chan(ndev);
+	csk->port_id = port_id;
+	csk->egress_dev = ndev;
+	csk->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+	csk->ulp_mode = ULP_MODE_TLS;
+	step = cdev->lldi->nrxq / cdev->lldi->nchan;
+	csk->rss_qid = cdev->lldi->rxq_ids[port_id * step];
+	rxq_idx = port_id * step;
+	csk->txq_idx = (rxq_idx < cdev->lldi->ntxq) ? rxq_idx :
+			port_id * step;
+	csk->sndbuf = newsk->sk_sndbuf;
+	csk->smac_idx = cxgb4_tp_smt_idx(cdev->lldi->adapter_type,
+					 cxgb4_port_viid(ndev));
+	tp->rcv_wnd = select_rcv_wnd(csk);
+	RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
+					   WSCALE_OK(tp),
+					   tp->window_clamp);
+	neigh_release(n);
+	inet_inherit_port(&tcp_hashinfo, lsk, newsk);
+	csk_set_flag(csk, CSK_CONN_INLINE);
+	bh_unlock_sock(newsk); /* tcp_create_openreq_child ->sk_clone_lock */
+
+	return newsk;
+free_csk:
+	chtls_sock_release(&csk->kref);
+free_dst:
+	dst_release(dst);
+free_sk:
+	inet_csk_prepare_forced_close(newsk);
+	tcp_done(newsk);
+free_oreq:
+	chtls_reqsk_free(oreq);
+	return NULL;
+}
+
+/*
+ * Populate a TID_RELEASE WR.  The skb must be already propely sized.
+ */
+static  void mk_tid_release(struct sk_buff *skb,
+			    unsigned int chan, unsigned int tid)
+{
+	struct cpl_tid_release *req;
+	unsigned int len;
+
+	len = roundup(sizeof(struct cpl_tid_release), 16);
+	req = (struct cpl_tid_release *)__skb_put(skb, len);
+	memset(req, 0, len);
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
+	INIT_TP_WR_CPL(req, CPL_TID_RELEASE, tid);
+}
+
+static int chtls_get_module(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (!try_module_get(icsk->icsk_ulp_ops->owner))
+		return -1;
+
+	return 0;
+}
+
+static void chtls_pass_accept_request(struct sock *sk,
+				      struct sk_buff *skb)
+{
+	struct cpl_t5_pass_accept_rpl *rpl;
+	struct cpl_pass_accept_req *req;
+	struct listen_ctx *listen_ctx;
+	struct request_sock *oreq;
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct tcphdr *tcph;
+	struct sock *newsk;
+	struct ethhdr *eh;
+	struct iphdr *iph;
+	void *network_hdr;
+	unsigned int stid;
+	unsigned int len;
+	unsigned int tid;
+
+	req = cplhdr(skb) + RSS_HDR;
+	tid = GET_TID(req);
+	cdev = BLOG_SKB_CB(skb)->cdev;
+	newsk = lookup_tid(cdev->tids, tid);
+	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	if (newsk) {
+		pr_info("tid (%d) already in use\n", tid);
+		return;
+	}
+
+	len = roundup(sizeof(*rpl), 16);
+	reply_skb = alloc_skb(len, GFP_ATOMIC);
+	if (!reply_skb) {
+		cxgb4_remove_tid(cdev->tids, 0, tid, sk->sk_family);
+		kfree_skb(skb);
+		return;
+	}
+
+	if (sk->sk_state != TCP_LISTEN)
+		goto reject;
+
+	if (inet_csk_reqsk_queue_is_full(sk))
+		goto reject;
+
+	if (sk_acceptq_is_full(sk))
+		goto reject;
+
+	oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
+	if (!oreq)
+		goto reject;
+
+	oreq->rsk_rcv_wnd = 0;
+	oreq->rsk_window_clamp = 0;
+	oreq->cookie_ts = 0;
+	oreq->mss = 0;
+	oreq->ts_recent = 0;
+
+	eh = (struct ethhdr *)(req + 1);
+	iph = (struct iphdr *)(eh + 1);
+	if (iph->version != 0x4)
+		goto free_oreq;
+
+	network_hdr = (void *)(eh + 1);
+	tcph = (struct tcphdr *)(iph + 1);
+
+	tcp_rsk(oreq)->tfo_listener = false;
+	tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq);
+	chtls_set_req_port(oreq, tcph->source, tcph->dest);
+	inet_rsk(oreq)->ecn_ok = 0;
+	chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
+	if (req->tcpopt.wsf <= 14) {
+		inet_rsk(oreq)->wscale_ok = 1;
+		inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
+	}
+	inet_rsk(oreq)->ir_iif = sk->sk_bound_dev_if;
+
+	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
+	if (!newsk)
+		goto reject;
+
+	if (chtls_get_module(newsk))
+		goto reject;
+	inet_csk_reqsk_queue_added(sk);
+	reply_skb->sk = newsk;
+	chtls_install_cpl_ops(newsk);
+	cxgb4_insert_tid(cdev->tids, newsk, tid, newsk->sk_family);
+	csk = rcu_dereference_sk_user_data(newsk);
+	listen_ctx = (struct listen_ctx *)lookup_stid(cdev->tids, stid);
+	csk->listen_ctx = listen_ctx;
+	__skb_queue_tail(&listen_ctx->synq, (struct sk_buff *)&csk->synq);
+	chtls_pass_accept_rpl(reply_skb, req, tid);
+	kfree_skb(skb);
+	return;
+
+free_oreq:
+	chtls_reqsk_free(oreq);
+reject:
+	mk_tid_release(reply_skb, 0, tid);
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+/*
+ * Handle a CPL_PASS_ACCEPT_REQ message.
+ */
+static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_accept_req *req = cplhdr(skb) + RSS_HDR;
+	struct listen_ctx *ctx;
+	unsigned int stid;
+	unsigned int tid;
+	struct sock *lsk;
+	void *data;
+
+	stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+	tid = GET_TID(req);
+
+	data = lookup_stid(cdev->tids, stid);
+	if (!data)
+		return 1;
+
+	ctx = (struct listen_ctx *)data;
+	lsk = ctx->lsk;
+
+	if (unlikely(tid >= cdev->tids->ntids)) {
+		pr_info("passive open TID %u too large\n", tid);
+		return 1;
+	}
+
+	BLOG_SKB_CB(skb)->cdev = cdev;
+	process_cpl_msg(chtls_pass_accept_request, lsk, skb);
+	return 0;
+}
+
+/*
+ * Completes some final bits of initialization for just established connections
+ * and changes their state to TCP_ESTABLISHED.
+ *
+ * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
+ */
+static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->pushed_seq = snd_isn;
+	tp->write_seq = snd_isn;
+	tp->snd_nxt = snd_isn;
+	tp->snd_una = snd_isn;
+	inet_sk(sk)->inet_id = tp->write_seq ^ jiffies;
+	assign_rxopt(sk, opt);
+
+	if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
+		tp->rcv_wup -= tp->rcv_wnd - (RCV_BUFSIZ_M << 10);
+
+	smp_mb();
+	tcp_set_state(sk, TCP_ESTABLISHED);
+}
+
+static void chtls_abort_conn(struct sock *sk, struct sk_buff *skb)
+{
+	struct sk_buff *abort_skb;
+
+	abort_skb = alloc_skb(sizeof(struct cpl_abort_req), GFP_ATOMIC);
+	if (abort_skb)
+		chtls_send_reset(sk, CPL_ABORT_SEND_RST, abort_skb);
+}
+
+static struct sock *reap_list;
+static DEFINE_SPINLOCK(reap_list_lock);
+
+/*
+ * Process the reap list.
+ */
+DECLARE_TASK_FUNC(process_reap_list, task_param)
+{
+	spin_lock_bh(&reap_list_lock);
+	while (reap_list) {
+		struct sock *sk = reap_list;
+		struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+		reap_list = csk->passive_reap_next;
+		csk->passive_reap_next = NULL;
+		spin_unlock(&reap_list_lock);
+		sock_hold(sk);
+
+		bh_lock_sock(sk);
+		chtls_abort_conn(sk, NULL);
+		sock_orphan(sk);
+		if (sk->sk_state == TCP_CLOSE)
+			inet_csk_destroy_sock(sk);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		spin_lock(&reap_list_lock);
+	}
+	spin_unlock_bh(&reap_list_lock);
+}
+
+static DECLARE_WORK(reap_task, process_reap_list);
+
+static void add_to_reap_list(struct sock *sk)
+{
+	struct chtls_sock *csk = sk->sk_user_data;
+
+	local_bh_disable();
+	bh_lock_sock(sk);
+	release_tcp_port(sk); /* release the port immediately */
+
+	spin_lock(&reap_list_lock);
+	csk->passive_reap_next = reap_list;
+	reap_list = sk;
+	if (!csk->passive_reap_next)
+		schedule_work(&reap_task);
+	spin_unlock(&reap_list_lock);
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void add_pass_open_to_parent(struct sock *child, struct sock *lsk,
+				    struct chtls_dev *cdev)
+{
+	struct request_sock *oreq;
+	struct chtls_sock *csk;
+
+	if (lsk->sk_state != TCP_LISTEN)
+		return;
+
+	csk = child->sk_user_data;
+	oreq = csk->passive_reap_next;
+	csk->passive_reap_next = NULL;
+
+	reqsk_queue_removed(&inet_csk(lsk)->icsk_accept_queue, oreq);
+	__skb_unlink((struct sk_buff *)&csk->synq, &csk->listen_ctx->synq);
+
+	if (sk_acceptq_is_full(lsk)) {
+		chtls_reqsk_free(oreq);
+		add_to_reap_list(child);
+	} else {
+		refcount_set(&oreq->rsk_refcnt, 1);
+		inet_csk_reqsk_queue_add(lsk, oreq, child);
+		lsk->sk_data_ready(lsk);
+	}
+}
+
+static void bl_add_pass_open_to_parent(struct sock *lsk, struct sk_buff *skb)
+{
+	struct sock *child = skb->sk;
+
+	skb->sk = NULL;
+	add_pass_open_to_parent(child, lsk, BLOG_SKB_CB(skb)->cdev);
+	kfree_skb(skb);
+}
+
+static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_pass_establish *req = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct sock *lsk, *sk;
+	unsigned int hwtid;
+
+	hwtid = GET_TID(req);
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (!sk)
+		return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
+
+	bh_lock_sock(sk);
+	if (unlikely(sock_owned_by_user(sk))) {
+		kfree_skb(skb);
+	} else {
+		unsigned int stid;
+		void *data;
+
+		csk = sk->sk_user_data;
+		csk->wr_max_credits = 64;
+		csk->wr_credits = 64;
+		csk->wr_unacked = 0;
+		make_established(sk, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+		stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
+		sk->sk_state_change(sk);
+		if (unlikely(sk->sk_socket))
+			sk_wake_async(sk, 0, POLL_OUT);
+
+		data = lookup_stid(cdev->tids, stid);
+		lsk = ((struct listen_ctx *)data)->lsk;
+
+		bh_lock_sock(lsk);
+		if (unlikely(skb_queue_empty(&csk->listen_ctx->synq))) {
+			/* removed from synq */
+			bh_unlock_sock(lsk);
+			kfree_skb(skb);
+			goto unlock;
+		}
+
+		if (likely(!sock_owned_by_user(lsk))) {
+			kfree_skb(skb);
+			add_pass_open_to_parent(sk, lsk, cdev);
+		} else {
+			skb->sk = sk;
+			BLOG_SKB_CB(skb)->cdev = cdev;
+			BLOG_SKB_CB(skb)->backlog_rcv =
+				bl_add_pass_open_to_parent;
+			__sk_add_backlog(lsk, skb);
+		}
+		bh_unlock_sock(lsk);
+	}
+unlock:
+	bh_unlock_sock(sk);
+	return 0;
+}
+
+/*
+ * Handle receipt of an urgent pointer.
+ */
+static void handle_urg_ptr(struct sock *sk, u32 urg_seq)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	urg_seq--;
+	if (tp->urg_data && !after(urg_seq, tp->urg_seq))
+		return;	/* duplicate pointer */
+
+	sk_send_sigurg(sk);
+	if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
+	    !sock_flag(sk, SOCK_URGINLINE) &&
+	    tp->copied_seq != tp->rcv_nxt) {
+		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+		tp->copied_seq++;
+		if (skb && tp->copied_seq - ULP_SKB_CB(skb)->seq >= skb->len)
+			chtls_free_skb(sk, skb);
+	}
+
+	tp->urg_data = TCP_URG_NOTYET;
+	tp->urg_seq = urg_seq;
+}
+
+static void check_sk_callbacks(struct chtls_sock *csk)
+{
+	struct sock *sk = csk->sk;
+
+	if (unlikely(sk->sk_user_data &&
+		     !csk_flag_nochk(csk, CSK_CALLBACKS_CHKD)))
+		csk_set_flag(csk, CSK_CALLBACKS_CHKD);
+}
+
+/*
+ * Handles Rx data that arrives in a state where the socket isn't accepting
+ * new data.
+ */
+static void handle_excess_rx(struct sock *sk, struct sk_buff *skb)
+{
+	if (!csk_flag(sk, CSK_ABORT_SHUTDOWN))
+		chtls_abort_conn(sk, skb);
+
+	kfree_skb(skb);
+}
+
+static void chtls_recv_data(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_rx_data *hdr = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		handle_excess_rx(sk, skb);
+		return;
+	}
+
+	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
+	ULP_SKB_CB(skb)->psh = hdr->psh;
+	skb_ulp_mode(skb) = ULP_MODE_NONE;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*hdr) + RSS_HDR);
+	if (!skb->data_len)
+		__skb_trim(skb, ntohs(hdr->len));
+
+	if (unlikely(hdr->urg))
+		handle_urg_ptr(sk, tp->rcv_nxt + ntohs(hdr->urg));
+	if (unlikely(tp->urg_data == TCP_URG_NOTYET &&
+		     tp->urg_seq - tp->rcv_nxt < skb->len))
+		tp->urg_data = TCP_URG_VALID |
+			       skb->data[tp->urg_seq - tp->rcv_nxt];
+
+	if (unlikely(hdr->dack_mode != csk->delack_mode)) {
+		csk->delack_mode = hdr->dack_mode;
+		csk->delack_seq = tp->rcv_nxt;
+	}
+
+	tcp_hdr(skb)->fin = 0;
+	tp->rcv_nxt += skb->len;
+
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		check_sk_callbacks(csk);
+		sk->sk_data_ready(sk);
+	}
+}
+
+static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_rx_data *req = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_recv_data, sk, skb);
+	return 0;
+}
+
+static void chtls_recv_pdu(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_tls_data *hdr = cplhdr(skb);
+	struct chtls_sock *csk;
+	struct chtls_hws *tlsk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tlsk = &csk->tlshws;
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		handle_excess_rx(sk, skb);
+		return;
+	}
+
+	ULP_SKB_CB(skb)->seq = ntohl(hdr->seq);
+	ULP_SKB_CB(skb)->flags = 0;
+	skb_ulp_mode(skb) = ULP_MODE_TLS;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*hdr));
+	if (!skb->data_len)
+		__skb_trim(skb,
+			   CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd)));
+
+	if (unlikely(tp->urg_data == TCP_URG_NOTYET && tp->urg_seq -
+		     tp->rcv_nxt < skb->len))
+		tp->urg_data = TCP_URG_VALID |
+			       skb->data[tp->urg_seq - tp->rcv_nxt];
+
+	tcp_hdr(skb)->fin = 0;
+	tlsk->pldlen = CPL_TLS_DATA_LENGTH_G(ntohl(hdr->length_pkd));
+	__skb_queue_tail(&tlsk->sk_recv_queue, skb);
+}
+
+static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_tls_data *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_recv_pdu, sk, skb);
+	return 0;
+}
+
+static void chtls_set_hdrlen(struct sk_buff *skb, unsigned int nlen)
+{
+	struct tlsrx_cmp_hdr *tls_cmp_hdr = cplhdr(skb);
+
+	skb->hdr_len = ntohs((__force __be16)tls_cmp_hdr->length);
+	tls_cmp_hdr->length = ntohs((__force __be16)nlen);
+}
+
+static void chtls_rx_hdr(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_rx_tls_cmp *cmp_cpl = cplhdr(skb);
+	struct sk_buff *skb_rec;
+	struct chtls_sock *csk;
+	struct chtls_hws *tlsk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tlsk = &csk->tlshws;
+	tp = tcp_sk(sk);
+
+	ULP_SKB_CB(skb)->seq = ntohl(cmp_cpl->seq);
+	ULP_SKB_CB(skb)->flags = 0;
+
+	skb_reset_transport_header(skb);
+	__skb_pull(skb, sizeof(*cmp_cpl));
+	if (!skb->data_len)
+		__skb_trim(skb, CPL_RX_TLS_CMP_LENGTH_G
+				(ntohl(cmp_cpl->pdulength_length)));
+
+	tp->rcv_nxt +=
+		CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl->pdulength_length));
+
+	skb_rec = __skb_dequeue(&tlsk->sk_recv_queue);
+	if (!skb_rec) {
+		ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_ND;
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+	} else {
+		chtls_set_hdrlen(skb, tlsk->pldlen);
+		tlsk->pldlen = 0;
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		__skb_queue_tail(&sk->sk_receive_queue, skb_rec);
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		check_sk_callbacks(csk);
+		sk->sk_data_ready(sk);
+	}
+}
+
+static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_rx_tls_cmp *req = cplhdr(skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	skb_dst_set(skb, NULL);
+	process_cpl_msg(chtls_rx_hdr, sk, skb);
+
+	return 0;
+}
+
+static void chtls_timewait(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->rcv_nxt++;
+	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->srtt_us = 0;
+	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+}
+
+static void chtls_peer_close(struct sock *sk, struct sk_buff *skb)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	sk->sk_shutdown |= RCV_SHUTDOWN;
+	sock_set_flag(sk, SOCK_DONE);
+
+	switch (sk->sk_state) {
+	case TCP_SYN_RECV:
+	case TCP_ESTABLISHED:
+		tcp_set_state(sk, TCP_CLOSE_WAIT);
+		break;
+	case TCP_FIN_WAIT1:
+		tcp_set_state(sk, TCP_CLOSING);
+		break;
+	case TCP_FIN_WAIT2:
+		chtls_release_resources(sk);
+		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+			chtls_conn_done(sk);
+		else
+			chtls_timewait(sk);
+		break;
+	default:
+		pr_info("cpl_peer_close in bad state %d\n", sk->sk_state);
+	}
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_state_change(sk);
+		/* Do not send POLL_HUP for half duplex close. */
+
+		if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+		    sk->sk_state == TCP_CLOSE)
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+		else
+			sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	}
+}
+
+static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_close_con_rpl *rpl = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+
+	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
+
+	switch (sk->sk_state) {
+	case TCP_CLOSING:
+		chtls_release_resources(sk);
+		if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING))
+			chtls_conn_done(sk);
+		else
+			chtls_timewait(sk);
+		break;
+	case TCP_LAST_ACK:
+		chtls_release_resources(sk);
+		chtls_conn_done(sk);
+		break;
+	case TCP_FIN_WAIT1:
+		tcp_set_state(sk, TCP_FIN_WAIT2);
+		sk->sk_shutdown |= SEND_SHUTDOWN;
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_state_change(sk);
+		else if (tcp_sk(sk)->linger2 < 0 &&
+			 !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN))
+			chtls_abort_conn(sk, skb);
+		break;
+	default:
+		pr_info("close_con_rpl in bad state %d\n", sk->sk_state);
+	}
+	kfree_skb(skb);
+}
+
+static struct sk_buff *get_cpl_skb(struct sk_buff *skb,
+				   size_t len, gfp_t gfp)
+{
+	if (likely(!skb_is_nonlinear(skb) && !skb_cloned(skb))) {
+		WARN_ONCE(skb->len < len, "skb alloc error");
+		__skb_trim(skb, len);
+		skb_get(skb);
+	} else {
+		skb = alloc_skb(len, gfp);
+		if (skb)
+			__skb_put(skb, len);
+	}
+	return skb;
+}
+
+static void set_abort_rpl_wr(struct sk_buff *skb, unsigned int tid,
+			     int cmd)
+{
+	struct cpl_abort_rpl *rpl = cplhdr(skb);
+
+	INIT_TP_WR_CPL(rpl, CPL_ABORT_RPL, tid);
+	rpl->cmd = cmd;
+}
+
+static void send_defer_abort_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct sk_buff *reply_skb;
+
+	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
+			      GFP_KERNEL | __GFP_NOFAIL);
+	__skb_put(reply_skb, sizeof(struct cpl_abort_rpl));
+	set_abort_rpl_wr(reply_skb, GET_TID(req),
+			 (req->status & CPL_ABORT_NO_RST));
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, req->status >> 1);
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+static void send_abort_rpl(struct sock *sk, struct sk_buff *skb,
+			   struct chtls_dev *cdev, int status, int queue)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+
+	csk = rcu_dereference_sk_user_data(sk);
+
+	reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl),
+			      GFP_KERNEL);
+
+	if (!reply_skb) {
+		req->status = (queue << 1);
+		send_defer_abort_rpl(cdev, skb);
+		return;
+	}
+
+	set_abort_rpl_wr(reply_skb, GET_TID(req), status);
+	kfree_skb(skb);
+
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
+	if (csk_conn_inline(csk)) {
+		struct l2t_entry *e = csk->l2t_entry;
+
+		if (e && sk->sk_state != TCP_SYN_RECV) {
+			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
+			return;
+		}
+	}
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+}
+
+/*
+ * Add an skb to the deferred skb queue for processing from process context.
+ */
+static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev,
+			   defer_handler_t handler)
+{
+	DEFERRED_SKB_CB(skb)->handler = handler;
+	spin_lock_bh(&cdev->deferq.lock);
+	__skb_queue_tail(&cdev->deferq, skb);
+	if (skb_queue_len(&cdev->deferq) == 1)
+		schedule_work(&cdev->deferq_task);
+	spin_unlock_bh(&cdev->deferq.lock);
+}
+
+static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb,
+				 struct chtls_dev *cdev,
+				 int status, int queue)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
+	struct sk_buff *reply_skb;
+	struct chtls_sock *csk;
+	unsigned int tid;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tid = GET_TID(req);
+
+	reply_skb = get_cpl_skb(skb, sizeof(struct cpl_abort_rpl), gfp_any());
+	if (!reply_skb) {
+		req->status = (queue << 1) | status;
+		t4_defer_reply(skb, cdev, send_defer_abort_rpl);
+		return;
+	}
+
+	set_abort_rpl_wr(reply_skb, tid, status);
+	set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue);
+	if (csk_conn_inline(csk)) {
+		struct l2t_entry *e = csk->l2t_entry;
+
+		if (e && sk->sk_state != TCP_SYN_RECV) {
+			cxgb4_l2t_send(csk->egress_dev, reply_skb, e);
+			return;
+		}
+	}
+	cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb);
+	kfree_skb(skb);
+}
+
+/*
+ * This is run from a listener's backlog to abort a child connection in
+ * SYN_RCV state (i.e., one on the listener's SYN queue).
+ */
+static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb)
+{
+	struct chtls_sock *csk;
+	struct sock *child;
+	int queue;
+
+	child = skb->sk;
+	csk = rcu_dereference_sk_user_data(child);
+	queue = csk->txq_idx;
+
+	skb->sk	= NULL;
+	do_abort_syn_rcv(child, lsk);
+	send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev,
+		       CPL_ABORT_NO_RST, queue);
+}
+
+static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	const struct request_sock *oreq;
+	struct listen_ctx *listen_ctx;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct sock *psk;
+	void *ctx;
+
+	csk = sk->sk_user_data;
+	oreq = csk->passive_reap_next;
+	cdev = csk->cdev;
+
+	if (!oreq)
+		return -1;
+
+	ctx = lookup_stid(cdev->tids, oreq->ts_recent);
+	if (!ctx)
+		return -1;
+
+	listen_ctx = (struct listen_ctx *)ctx;
+	psk = listen_ctx->lsk;
+
+	bh_lock_sock(psk);
+	if (!sock_owned_by_user(psk)) {
+		int queue = csk->txq_idx;
+
+		do_abort_syn_rcv(sk, psk);
+		send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue);
+	} else {
+		skb->sk = sk;
+		BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv;
+		__sk_add_backlog(psk, skb);
+	}
+	bh_unlock_sock(psk);
+	return 0;
+}
+
+static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb)
+{
+	const struct cpl_abort_req_rss *req = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk = sk->sk_user_data;
+	int rst_status = CPL_ABORT_NO_RST;
+	int queue = csk->txq_idx;
+
+	if (is_neg_adv(req->status)) {
+		if (sk->sk_state == TCP_SYN_RECV)
+			chtls_set_tcb_tflag(sk, 0, 0);
+
+		kfree_skb(skb);
+		return;
+	}
+
+	csk_reset_flag(csk, CSK_ABORT_REQ_RCVD);
+
+	if (!csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN) &&
+	    !csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (send_tx_flowc_wr(sk, 0, tp->snd_nxt, tp->rcv_nxt) < 0)
+			WARN_ONCE(1, "send_tx_flowc error");
+		csk_set_flag(csk, CSK_TX_DATA_SENT);
+	}
+
+	csk_set_flag(csk, CSK_ABORT_SHUTDOWN);
+
+	if (!csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
+		sk->sk_err = ETIMEDOUT;
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_error_report(sk);
+
+		if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb))
+			return;
+
+		chtls_release_resources(sk);
+		chtls_conn_done(sk);
+	}
+
+	chtls_send_abort_rpl(sk, skb, csk->cdev, rst_status, queue);
+}
+
+static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_abort_rpl_rss *rpl = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	cdev = csk->cdev;
+
+	if (csk_flag_nochk(csk, CSK_ABORT_RPL_PENDING)) {
+		csk_reset_flag(csk, CSK_ABORT_RPL_PENDING);
+		if (!csk_flag_nochk(csk, CSK_ABORT_REQ_RCVD)) {
+			if (sk->sk_state == TCP_SYN_SENT) {
+				cxgb4_remove_tid(cdev->tids,
+						 csk->port_id,
+						 GET_TID(rpl),
+						 sk->sk_family);
+				sock_put(sk);
+			}
+			chtls_release_resources(sk);
+			chtls_conn_done(sk);
+		}
+	}
+	kfree_skb(skb);
+}
+
+static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_peer_close *req = cplhdr(skb) + RSS_HDR;
+	void (*fn)(struct sock *sk, struct sk_buff *skb);
+	unsigned int hwtid = GET_TID(req);
+	struct sock *sk;
+	u8 opcode;
+
+	opcode = ((const struct rss_header *)cplhdr(skb))->opcode;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	if (!sk)
+		goto rel_skb;
+
+	switch (opcode) {
+	case CPL_PEER_CLOSE:
+		fn = chtls_peer_close;
+		break;
+	case CPL_CLOSE_CON_RPL:
+		fn = chtls_close_con_rpl;
+		break;
+	case CPL_ABORT_REQ_RSS:
+		fn = chtls_abort_req_rss;
+		break;
+	case CPL_ABORT_RPL_RSS:
+		fn = chtls_abort_rpl_rss;
+		break;
+	default:
+		goto rel_skb;
+	}
+
+	process_cpl_msg(fn, sk, skb);
+	return 0;
+
+rel_skb:
+	kfree_skb(skb);
+	return 0;
+}
+
+static struct sk_buff *dequeue_wr(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb = csk->wr_skb_head;
+
+	if (likely(skb)) {
+	/* Don't bother clearing the tail */
+		csk->wr_skb_head = WR_SKB_CB(skb)->next_wr;
+		WR_SKB_CB(skb)->next_wr = NULL;
+	}
+	return skb;
+}
+
+static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb)
+{
+	struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR;
+	struct chtls_sock *csk = sk->sk_user_data;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 credits = hdr->credits;
+	u32 snd_una;
+
+	snd_una = ntohl(hdr->snd_una);
+	csk->wr_credits += credits;
+
+	if (csk->wr_unacked > csk->wr_max_credits - csk->wr_credits)
+		csk->wr_unacked = csk->wr_max_credits - csk->wr_credits;
+
+	while (credits) {
+		struct sk_buff *pskb = csk->wr_skb_head;
+		u32 csum;
+
+		if (unlikely(!pskb)) {
+			if (csk->wr_nondata)
+				csk->wr_nondata -= credits;
+			break;
+		}
+		csum = (__force u32)pskb->csum;
+		if (unlikely(credits < csum)) {
+			pskb->csum = (__force __wsum)(csum - credits);
+			break;
+		}
+		dequeue_wr(sk);
+		credits -= csum;
+		kfree_skb(pskb);
+	}
+	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_SEQVAL) {
+		if (unlikely(before(snd_una, tp->snd_una))) {
+			kfree_skb(skb);
+			return;
+		}
+
+		if (tp->snd_una != snd_una) {
+			tp->snd_una = snd_una;
+			tp->rcv_tstamp = tcp_time_stamp(tp);
+			if (tp->snd_una == tp->snd_nxt &&
+			    !csk_flag_nochk(csk, CSK_TX_FAILOVER))
+				csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+		}
+	}
+
+	if (hdr->seq_vld & CPL_FW4_ACK_FLAGS_CH) {
+		unsigned int fclen16 = roundup(failover_flowc_wr_len, 16);
+
+		csk->wr_credits -= fclen16;
+		csk_reset_flag(csk, CSK_TX_WAIT_IDLE);
+		csk_reset_flag(csk, CSK_TX_FAILOVER);
+	}
+	if (skb_queue_len(&csk->txq) && chtls_push_frames(csk, 0))
+		sk->sk_write_space(sk);
+
+	kfree_skb(skb);
+}
+
+static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb)
+{
+	struct cpl_fw4_ack *rpl = cplhdr(skb) + RSS_HDR;
+	unsigned int hwtid = GET_TID(rpl);
+	struct sock *sk;
+
+	sk = lookup_tid(cdev->tids, hwtid);
+	process_cpl_msg(chtls_rx_ack, sk, skb);
+
+	return 0;
+}
+
+chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = {
+	[CPL_PASS_OPEN_RPL]     = chtls_pass_open_rpl,
+	[CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl,
+	[CPL_PASS_ACCEPT_REQ]   = chtls_pass_accept_req,
+	[CPL_PASS_ESTABLISH]    = chtls_pass_establish,
+	[CPL_RX_DATA]           = chtls_rx_data,
+	[CPL_TLS_DATA]          = chtls_rx_pdu,
+	[CPL_RX_TLS_CMP]        = chtls_rx_cmp,
+	[CPL_PEER_CLOSE]        = chtls_conn_cpl,
+	[CPL_CLOSE_CON_RPL]     = chtls_conn_cpl,
+	[CPL_ABORT_REQ_RSS]     = chtls_conn_cpl,
+	[CPL_ABORT_RPL_RSS]     = chtls_conn_cpl,
+	[CPL_FW4_ACK]           = chtls_wr_ack,
+};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e7e36433cdb5..57b5468b5139 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -332,6 +332,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 	tcp_update_metrics(sk);
 	tcp_done(sk);
 }
+EXPORT_SYMBOL(tcp_time_wait);
 
 void tcp_twsk_destructor(struct sock *sk)
 {

From 36bedb3f2e5b81832b5895363ed3fedb9ff1e8d0 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:42:00 +0530
Subject: [PATCH 1626/1678] crypto: chtls - Inline TLS record Tx

TLS handler for record transmit.
Create Inline TLS work request and post to FW.
Create Inline TLS record CPLs for hardware

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: Michael Werner <werner@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_io.c   | 1222 +++++++++++++++++++++
 drivers/crypto/chelsio/chtls/chtls_main.c |    2 +
 2 files changed, 1224 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_io.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
new file mode 100644
index 000000000000..6974d3ed0ff5
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -0,0 +1,1222 @@
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sched/signal.h>
+#include <net/tcp.h>
+#include <net/busy_poll.h>
+#include <crypto/aes.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static bool is_tls_tx(struct chtls_sock *csk)
+{
+	return csk->tlshws.txkey >= 0;
+}
+
+static int data_sgl_len(const struct sk_buff *skb)
+{
+	unsigned int cnt;
+
+	cnt = skb_shinfo(skb)->nr_frags;
+	return sgl_len(cnt) * 8;
+}
+
+static int nos_ivs(struct sock *sk, unsigned int size)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	return DIV_ROUND_UP(size, csk->tlshws.mfs);
+}
+
+static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
+{
+	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
+	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
+
+	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
+	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
+		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
+		return 1;
+	}
+	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
+	return 0;
+}
+
+static int max_ivs_size(struct sock *sk, int size)
+{
+	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
+}
+
+static int ivs_size(struct sock *sk, const struct sk_buff *skb)
+{
+	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
+		 CIPHER_BLOCK_SIZE) : 0;
+}
+
+static int flowc_wr_credits(int nparams, int *flowclenp)
+{
+	int flowclen16, flowclen;
+
+	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+	flowclen16 = DIV_ROUND_UP(flowclen, 16);
+	flowclen = flowclen16 * 16;
+
+	if (flowclenp)
+		*flowclenp = flowclen;
+
+	return flowclen16;
+}
+
+static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
+					   struct fw_flowc_wr *flowc,
+					   int flowclen)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(flowclen, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	memcpy(__skb_put(skb, flowclen), flowc, flowclen);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+
+	return skb;
+}
+
+static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
+			 int flowclen)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	int flowclen16 = flowclen / 16;
+	struct sk_buff *skb;
+	int ret;
+
+	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
+		skb = create_flowc_wr_skb(sk, flowc, flowclen);
+		if (!skb)
+			return -ENOMEM;
+
+		skb_entail(sk, skb,
+			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
+		return 0;
+	}
+
+	ret = cxgb4_immdata_send(csk->egress_dev,
+				 csk->txq_idx,
+				 flowc, flowclen);
+	if (!ret)
+		return flowclen16;
+	skb = create_flowc_wr_skb(sk, flowc, flowclen);
+	if (!skb)
+		return -ENOMEM;
+	send_or_defer(sk, tp, skb, 0);
+	return flowclen16;
+}
+
+static u8 tcp_state_to_flowc_state(u8 state)
+{
+	switch (state) {
+	case TCP_ESTABLISHED:
+		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
+	case TCP_CLOSE_WAIT:
+		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
+	case TCP_FIN_WAIT1:
+		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
+	case TCP_CLOSING:
+		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
+	case TCP_LAST_ACK:
+		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
+	case TCP_FIN_WAIT2:
+		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
+	}
+
+	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
+}
+
+int send_tx_flowc_wr(struct sock *sk, int compl,
+		     u32 snd_nxt, u32 rcv_nxt)
+{
+	struct flowc_packed {
+		struct fw_flowc_wr fc;
+		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
+	} __packed sflowc;
+	int nparams, paramidx, flowclen16, flowclen;
+	struct fw_flowc_wr *flowc;
+	struct chtls_sock *csk;
+	struct tcp_sock *tp;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	tp = tcp_sk(sk);
+	memset(&sflowc, 0, sizeof(sflowc));
+	flowc = &sflowc.fc;
+
+#define FLOWC_PARAM(__m, __v) \
+	do { \
+		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
+		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
+		paramidx++; \
+	} while (0)
+
+	paramidx = 0;
+
+	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
+	FLOWC_PARAM(CH, csk->tx_chan);
+	FLOWC_PARAM(PORT, csk->tx_chan);
+	FLOWC_PARAM(IQID, csk->rss_qid);
+	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
+	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
+	FLOWC_PARAM(SNDBUF, csk->sndbuf);
+	FLOWC_PARAM(MSS, tp->mss_cache);
+	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
+
+	if (SND_WSCALE(tp))
+		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
+
+	if (csk->ulp_mode == ULP_MODE_TLS)
+		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
+
+	if (csk->tlshws.fcplenmax)
+		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
+
+	nparams = paramidx;
+#undef FLOWC_PARAM
+
+	flowclen16 = flowc_wr_credits(nparams, &flowclen);
+	flowc->op_to_nparams =
+		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
+			    FW_WR_COMPL_V(compl) |
+			    FW_FLOWC_WR_NPARAMS_V(nparams));
+	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
+					  FW_WR_FLOWID_V(csk->tid));
+
+	return send_flowc_wr(sk, flowc, flowclen);
+}
+
+/* Copy IVs to WR */
+static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
+
+{
+	struct chtls_sock *csk;
+	unsigned char *iv_loc;
+	struct chtls_hws *hws;
+	unsigned char *ivs;
+	u16 number_of_ivs;
+	struct page *page;
+	int err = 0;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	number_of_ivs = nos_ivs(sk, skb->len);
+
+	if (number_of_ivs > MAX_IVS_PAGE) {
+		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
+		return -ENOMEM;
+	}
+
+	/* generate the  IVs */
+	ivs = kmalloc(number_of_ivs * CIPHER_BLOCK_SIZE, GFP_ATOMIC);
+	if (!ivs)
+		return -ENOMEM;
+	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
+
+	if (skb_ulp_tls_iv_imm(skb)) {
+		/* send the IVs as immediate data in the WR */
+		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
+						CIPHER_BLOCK_SIZE);
+		if (iv_loc)
+			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
+
+		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
+	} else {
+		/* Send the IVs as sgls */
+		/* Already accounted IV DSGL for credits */
+		skb_shinfo(skb)->nr_frags--;
+		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
+		if (!page) {
+			pr_info("%s : Page allocation for IVs failed\n",
+				__func__);
+			err = -ENOMEM;
+			goto out;
+		}
+		memcpy(page_address(page), ivs, number_of_ivs *
+		       CIPHER_BLOCK_SIZE);
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
+				   number_of_ivs * CIPHER_BLOCK_SIZE);
+		hws->ivsize = 0;
+	}
+out:
+	kfree(ivs);
+	return err;
+}
+
+/* Copy Key to WR */
+static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
+{
+	struct ulptx_sc_memrd *sc_memrd;
+	struct chtls_sock *csk;
+	struct chtls_dev *cdev;
+	struct ulptx_idata *sc;
+	struct chtls_hws *hws;
+	u32 immdlen;
+	int kaddr;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	cdev = csk->cdev;
+
+	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
+	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
+	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
+	if (sc) {
+		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+		sc->len = htonl(0);
+		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
+		sc_memrd->cmd_to_len =
+				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
+				ULP_TX_SC_MORE_V(1) |
+				ULPTX_LEN16_V(hws->keylen >> 4));
+		sc_memrd->addr = htonl(kaddr);
+	}
+}
+
+static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
+{
+	return hws->tx_seq_no++;
+}
+
+static bool is_sg_request(const struct sk_buff *skb)
+{
+	return skb->peeked ||
+		(skb->len > MAX_IMM_ULPTX_WR_LEN);
+}
+
+/*
+ * Returns true if an sk_buff carries urgent data.
+ */
+static bool skb_urgent(struct sk_buff *skb)
+{
+	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
+}
+
+/* TLS content type for CPL SFO */
+static unsigned char tls_content_type(unsigned char content_type)
+{
+	switch (content_type) {
+	case TLS_HDR_TYPE_CCS:
+		return CPL_TX_TLS_SFO_TYPE_CCS;
+	case TLS_HDR_TYPE_ALERT:
+		return CPL_TX_TLS_SFO_TYPE_ALERT;
+	case TLS_HDR_TYPE_HANDSHAKE:
+		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
+	case TLS_HDR_TYPE_HEARTBEAT:
+		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
+	}
+	return CPL_TX_TLS_SFO_TYPE_DATA;
+}
+
+static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+			   int dlen, int tls_immd, u32 credits,
+			   int expn, int pdus)
+{
+	struct fw_tlstx_data_wr *req_wr;
+	struct cpl_tx_tls_sfo *req_cpl;
+	unsigned int wr_ulp_mode_force;
+	struct tls_scmd *updated_scmd;
+	unsigned char data_type;
+	struct chtls_sock *csk;
+	struct net_device *dev;
+	struct chtls_hws *hws;
+	struct tls_scmd *scmd;
+	struct adapter *adap;
+	unsigned char *req;
+	int immd_len;
+	int iv_imm;
+	int len;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	iv_imm = skb_ulp_tls_iv_imm(skb);
+	dev = csk->egress_dev;
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+	scmd = &hws->scmd;
+	len = dlen + expn;
+
+	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
+	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
+
+	updated_scmd = scmd;
+	updated_scmd->seqno_numivs &= 0xffffff80;
+	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
+	hws->scmd = *updated_scmd;
+
+	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
+	req_cpl = (struct cpl_tx_tls_sfo *)req;
+	req = (unsigned char *)__skb_push(skb, (sizeof(struct
+				fw_tlstx_data_wr)));
+
+	req_wr = (struct fw_tlstx_data_wr *)req;
+	immd_len = (tls_immd ? dlen : 0);
+	req_wr->op_to_immdlen =
+		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
+		FW_TLSTX_DATA_WR_COMPL_V(1) |
+		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
+	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
+				     FW_TLSTX_DATA_WR_LEN16_V(credits));
+	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
+
+	if (is_sg_request(skb))
+		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
+			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
+			FW_OFLD_TX_DATA_WR_SHOVE_F);
+
+	req_wr->lsodisable_to_flags =
+			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
+			      FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) |
+			      T6_TX_FORCE_F | wr_ulp_mode_force |
+			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
+					 skb_queue_empty(&csk->txq)));
+
+	req_wr->ctxloc_to_exp =
+			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
+			      FW_TLSTX_DATA_WR_EXP_V(expn) |
+			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
+			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
+			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
+
+	/* Fill in the length */
+	req_wr->plen = htonl(len);
+	req_wr->mfs = htons(hws->mfs);
+	req_wr->adjustedplen_pkd =
+		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
+	req_wr->expinplenmax_pkd =
+		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
+	req_wr->pdusinplenmax_pkd =
+		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
+	req_wr->r10 = 0;
+
+	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
+	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
+				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
+				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
+				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
+	req_cpl->pld_len = htonl(len - expn);
+
+	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
+		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
+		TLS_HDR_TYPE_HEARTBEAT : 0) |
+		CPL_TX_TLS_SFO_PROTOVER_V(0));
+
+	/* create the s-command */
+	req_cpl->r1_lo = 0;
+	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
+	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
+	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
+}
+
+/*
+ * Calculate the TLS data expansion size
+ */
+static int chtls_expansion_size(struct sock *sk, int data_len,
+				int fullpdu,
+				unsigned short *pducnt)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tls_scmd *scmd = &hws->scmd;
+	int fragsize = hws->mfs;
+	int expnsize = 0;
+	int fragleft;
+	int fragcnt;
+	int expppdu;
+
+	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
+	    SCMD_CIPH_MODE_AES_GCM) {
+		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
+			  TLS_HEADER_LENGTH;
+
+		if (fullpdu) {
+			*pducnt = data_len / (expppdu + fragsize);
+			if (*pducnt > 32)
+				*pducnt = 32;
+			else if (!*pducnt)
+				*pducnt = 1;
+			expnsize = (*pducnt) * expppdu;
+			return expnsize;
+		}
+		fragcnt = (data_len / fragsize);
+		expnsize =  fragcnt * expppdu;
+		fragleft = data_len % fragsize;
+		if (fragleft > 0)
+			expnsize += expppdu;
+	}
+	return expnsize;
+}
+
+/* WR with IV, KEY and CPL SFO added */
+static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
+			       int tls_tx_imm, int tls_len, u32 credits)
+{
+	unsigned short pdus_per_ulp = 0;
+	struct chtls_sock *csk;
+	struct chtls_hws *hws;
+	int expn_sz;
+	int pdus;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
+	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
+	if (!hws->compute) {
+		hws->expansion = chtls_expansion_size(sk,
+						      hws->fcplenmax,
+						      1, &pdus_per_ulp);
+		hws->pdus = pdus_per_ulp;
+		hws->adjustlen = hws->pdus *
+			((hws->expansion / hws->pdus) + hws->mfs);
+		hws->compute = 1;
+	}
+	if (tls_copy_ivs(sk, skb))
+		return;
+	tls_copy_tx_key(sk, skb);
+	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
+	hws->tx_seq_no += (pdus - 1);
+}
+
+static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
+			    unsigned int immdlen, int len,
+			    u32 credits, u32 compl)
+{
+	struct fw_ofld_tx_data_wr *req;
+	unsigned int wr_ulp_mode_force;
+	struct chtls_sock *csk;
+	unsigned int opcode;
+
+	csk = rcu_dereference_sk_user_data(sk);
+	opcode = FW_OFLD_TX_DATA_WR;
+
+	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
+	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
+				FW_WR_COMPL_V(compl) |
+				FW_WR_IMMDLEN_V(immdlen));
+	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
+				FW_WR_LEN16_V(credits));
+
+	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
+	if (is_sg_request(skb))
+		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
+			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
+				FW_OFLD_TX_DATA_WR_SHOVE_F);
+
+	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
+			FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) |
+			FW_OFLD_TX_DATA_WR_SHOVE_V((!csk_flag
+					(sk, CSK_TX_MORE_DATA)) &&
+					 skb_queue_empty(&csk->txq)));
+	req->plen = htonl(len);
+}
+
+static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
+			 bool size)
+{
+	int wr_size;
+
+	wr_size = TLS_WR_CPL_LEN;
+	wr_size += KEY_ON_MEM_SZ;
+	wr_size += ivs_size(csk->sk, skb);
+
+	if (size)
+		return wr_size;
+
+	/* frags counted for IV dsgl */
+	if (!skb_ulp_tls_iv_imm(skb))
+		skb_shinfo(skb)->nr_frags++;
+
+	return wr_size;
+}
+
+static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
+{
+	int length = skb->len;
+
+	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
+		return false;
+
+	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
+		/* Check TLS header len for Immediate */
+		if (csk->ulp_mode == ULP_MODE_TLS &&
+		    skb_ulp_tls_inline(skb))
+			length += chtls_wr_size(csk, skb, true);
+		else
+			length += sizeof(struct fw_ofld_tx_data_wr);
+
+		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
+	}
+	return true;
+}
+
+static unsigned int calc_tx_flits(const struct sk_buff *skb,
+				  unsigned int immdlen)
+{
+	unsigned int flits, cnt;
+
+	flits = immdlen / 8;   /* headers */
+	cnt = skb_shinfo(skb)->nr_frags;
+	if (skb_tail_pointer(skb) != skb_transport_header(skb))
+		cnt++;
+	return flits + sgl_len(cnt);
+}
+
+static void arp_failure_discard(void *handle, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+}
+
+int chtls_push_frames(struct chtls_sock *csk, int comp)
+{
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tcp_sock *tp;
+	struct sk_buff *skb;
+	int total_size = 0;
+	struct sock *sk;
+	int wr_size;
+
+	wr_size = sizeof(struct fw_ofld_tx_data_wr);
+	sk = csk->sk;
+	tp = tcp_sk(sk);
+
+	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
+		return 0;
+
+	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
+		return 0;
+
+	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
+	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
+		skb_queue_len(&csk->txq) > 1)) {
+		unsigned int credit_len = skb->len;
+		unsigned int credits_needed;
+		unsigned int completion = 0;
+		int tls_len = skb->len;/* TLS data len before IV/key */
+		unsigned int immdlen;
+		int len = skb->len;    /* length [ulp bytes] inserted by hw */
+		int flowclen16 = 0;
+		int tls_tx_imm = 0;
+
+		immdlen = skb->len;
+		if (!is_ofld_imm(csk, skb)) {
+			immdlen = skb_transport_offset(skb);
+			if (skb_ulp_tls_inline(skb))
+				wr_size = chtls_wr_size(csk, skb, false);
+			credit_len = 8 * calc_tx_flits(skb, immdlen);
+		} else {
+			if (skb_ulp_tls_inline(skb)) {
+				wr_size = chtls_wr_size(csk, skb, false);
+				tls_tx_imm = 1;
+			}
+		}
+		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
+			credit_len += wr_size;
+		credits_needed = DIV_ROUND_UP(credit_len, 16);
+		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
+			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
+						      tp->rcv_nxt);
+			if (flowclen16 <= 0)
+				break;
+			csk->wr_credits -= flowclen16;
+			csk->wr_unacked += flowclen16;
+			csk->wr_nondata += flowclen16;
+			csk_set_flag(csk, CSK_TX_DATA_SENT);
+		}
+
+		if (csk->wr_credits < credits_needed) {
+			if (skb_ulp_tls_inline(skb) &&
+			    !skb_ulp_tls_iv_imm(skb))
+				skb_shinfo(skb)->nr_frags--;
+			break;
+		}
+
+		__skb_unlink(skb, &csk->txq);
+		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
+				      CPL_PRIORITY_DATA);
+		if (hws->ofld)
+			hws->txqid = (skb->queue_mapping >> 1);
+		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
+		csk->wr_credits -= credits_needed;
+		csk->wr_unacked += credits_needed;
+		csk->wr_nondata = 0;
+		enqueue_wr(csk, skb);
+
+		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
+			if ((comp && csk->wr_unacked == credits_needed) ||
+			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
+			    csk->wr_unacked >= csk->wr_max_credits / 2) {
+				completion = 1;
+				csk->wr_unacked = 0;
+			}
+			if (skb_ulp_tls_inline(skb))
+				make_tlstx_data_wr(sk, skb, tls_tx_imm,
+						   tls_len, credits_needed);
+			else
+				make_tx_data_wr(sk, skb, immdlen, len,
+						credits_needed, completion);
+			tp->snd_nxt += len;
+			tp->lsndtime = tcp_time_stamp(tp);
+			if (completion)
+				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
+		} else {
+			struct cpl_close_con_req *req = cplhdr(skb);
+			unsigned int cmd  = CPL_OPCODE_G(ntohl
+					     (OPCODE_TID(req)));
+
+			if (cmd == CPL_CLOSE_CON_REQ)
+				csk_set_flag(csk,
+					     CSK_CLOSE_CON_REQUESTED);
+
+			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
+			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
+				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
+				csk->wr_unacked = 0;
+			}
+		}
+		total_size += skb->truesize;
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
+			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
+		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
+	}
+	sk->sk_wmem_queued -= total_size;
+	return total_size;
+}
+
+static void mark_urg(struct tcp_sock *tp, int flags,
+		     struct sk_buff *skb)
+{
+	if (unlikely(flags & MSG_OOB)) {
+		tp->snd_up = tp->write_seq;
+		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
+					 ULPCB_FLAG_BARRIER |
+					 ULPCB_FLAG_NO_APPEND |
+					 ULPCB_FLAG_NEED_HDR;
+	}
+}
+
+/*
+ * Returns true if a connection should send more data to TCP engine
+ */
+static bool should_push(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/*
+	 * If we've released our offload resources there's nothing to do ...
+	 */
+	if (!cdev)
+		return false;
+
+	/*
+	 * If there aren't any work requests in flight, or there isn't enough
+	 * data in flight, or Nagle is off then send the current TX_DATA
+	 * otherwise hold it and wait to accumulate more data.
+	 */
+	return csk->wr_credits == csk->wr_max_credits ||
+		(tp->nonagle & TCP_NAGLE_OFF);
+}
+
+/*
+ * Returns true if a TCP socket is corked.
+ */
+static bool corked(const struct tcp_sock *tp, int flags)
+{
+	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
+}
+
+/*
+ * Returns true if a send should try to push new data.
+ */
+static bool send_should_push(struct sock *sk, int flags)
+{
+	return should_push(sk) && !corked(tcp_sk(sk), flags);
+}
+
+void chtls_tcp_push(struct sock *sk, int flags)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	int qlen = skb_queue_len(&csk->txq);
+
+	if (likely(qlen)) {
+		struct sk_buff *skb = skb_peek_tail(&csk->txq);
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		mark_urg(tp, flags, skb);
+
+		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
+		    corked(tp, flags)) {
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
+			return;
+		}
+
+		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
+		if (qlen == 1 &&
+		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		     should_push(sk)))
+			chtls_push_frames(csk, 1);
+	}
+}
+
+/*
+ * Calculate the size for a new send sk_buff.  It's maximum size so we can
+ * pack lots of data into it, unless we plan to send it immediately, in which
+ * case we size it more tightly.
+ *
+ * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
+ * arise in normal cases and when it does we are just wasting memory.
+ */
+static int select_size(struct sock *sk, int io_len, int flags, int len)
+{
+	const int pgbreak = SKB_MAX_HEAD(len);
+
+	/*
+	 * If the data wouldn't fit in the main body anyway, put only the
+	 * header in the main body so it can use immediate data and place all
+	 * the payload in page fragments.
+	 */
+	if (io_len > pgbreak)
+		return 0;
+
+	/*
+	 * If we will be accumulating payload get a large main body.
+	 */
+	if (!send_should_push(sk, flags))
+		return pgbreak;
+
+	return io_len;
+}
+
+void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ULP_SKB_CB(skb)->seq = tp->write_seq;
+	ULP_SKB_CB(skb)->flags = flags;
+	__skb_queue_tail(&csk->txq, skb);
+	sk->sk_wmem_queued += skb->truesize;
+
+	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
+		put_page(TCP_PAGE(sk));
+		TCP_PAGE(sk) = NULL;
+		TCP_OFF(sk) = 0;
+	}
+}
+
+static struct sk_buff *get_tx_skb(struct sock *sk, int size)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
+	if (likely(skb)) {
+		skb_reserve(skb, TX_HEADER_LEN);
+		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
+		skb_reset_transport_header(skb);
+	}
+	return skb;
+}
+
+static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct sk_buff *skb;
+
+	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
+			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
+			sk->sk_allocation);
+	if (likely(skb)) {
+		skb_reserve(skb, (TX_TLSHDR_LEN +
+			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
+		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
+		skb_reset_transport_header(skb);
+		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
+		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
+	}
+	return skb;
+}
+
+static void tx_skb_finalize(struct sk_buff *skb)
+{
+	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
+
+	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
+		cb->flags = ULPCB_FLAG_NEED_HDR;
+	cb->flags |= ULPCB_FLAG_NO_APPEND;
+}
+
+static void push_frames_if_head(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+
+	if (skb_queue_len(&csk->txq) == 1)
+		chtls_push_frames(csk, 1);
+}
+
+static int chtls_skb_copy_to_page_nocache(struct sock *sk,
+					  struct iov_iter *from,
+					  struct sk_buff *skb,
+					  struct page *page,
+					  int off, int copy)
+{
+	int err;
+
+	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
+				       off, copy, skb->len);
+	if (err)
+		return err;
+
+	skb->len             += copy;
+	skb->data_len        += copy;
+	skb->truesize        += copy;
+	sk->sk_wmem_queued   += copy;
+	return 0;
+}
+
+/* Read TLS header to find content type and data length */
+static u16 tls_header_read(struct tls_hdr *thdr, struct iov_iter *from)
+{
+	if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr))
+		return -EFAULT;
+	return (__force u16)cpu_to_be16(thdr->length);
+}
+
+int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct chtls_dev *cdev = csk->cdev;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int mss, flags, err;
+	int recordsz = 0;
+	int copied = 0;
+	int hdrlen = 0;
+	long timeo;
+
+	lock_sock(sk);
+	flags = msg->msg_flags;
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
+		err = sk_stream_wait_connect(sk, &timeo);
+		if (err)
+			goto out_err;
+	}
+
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+	err = -EPIPE;
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		goto out_err;
+
+	mss = csk->mss;
+	csk_set_flag(csk, CSK_TX_MORE_DATA);
+
+	while (msg_data_left(msg)) {
+		int copy = 0;
+
+		skb = skb_peek_tail(&csk->txq);
+		if (skb) {
+			copy = mss - skb->len;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+
+		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
+			struct tls_hdr hdr;
+
+			recordsz = tls_header_read(&hdr, &msg->msg_iter);
+			size -= TLS_HEADER_LENGTH;
+			hdrlen += TLS_HEADER_LENGTH;
+			csk->tlshws.txleft = recordsz;
+			csk->tlshws.type = hdr.type;
+			if (skb)
+				ULP_SKB_CB(skb)->ulp.tls.type = hdr.type;
+		}
+
+		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		    copy <= 0) {
+new_buf:
+			if (skb) {
+				tx_skb_finalize(skb);
+				push_frames_if_head(sk);
+			}
+
+			if (is_tls_tx(csk)) {
+				skb = get_record_skb(sk,
+						     select_size(sk,
+								 recordsz,
+								 flags,
+								 TX_TLSHDR_LEN),
+								 false);
+			} else {
+				skb = get_tx_skb(sk,
+						 select_size(sk, size, flags,
+							     TX_HEADER_LEN));
+			}
+			if (unlikely(!skb))
+				goto wait_for_memory;
+
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			copy = mss;
+		}
+		if (copy > size)
+			copy = size;
+
+		if (skb_tailroom(skb) > 0) {
+			copy = min(copy, skb_tailroom(skb));
+			if (is_tls_tx(csk))
+				copy = min_t(int, copy, csk->tlshws.txleft);
+			err = skb_add_data_nocache(sk, skb,
+						   &msg->msg_iter, copy);
+			if (err)
+				goto do_fault;
+		} else {
+			int i = skb_shinfo(skb)->nr_frags;
+			struct page *page = TCP_PAGE(sk);
+			int pg_size = PAGE_SIZE;
+			int off = TCP_OFF(sk);
+			bool merge;
+
+			if (page)
+				pg_size <<= compound_order(page);
+
+			if (off < pg_size &&
+			    skb_can_coalesce(skb, i, page, off)) {
+				merge = 1;
+				goto copy;
+			}
+			merge = 0;
+			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
+			    MAX_SKB_FRAGS))
+				goto new_buf;
+
+			if (page && off == pg_size) {
+				put_page(page);
+				TCP_PAGE(sk) = page = NULL;
+				pg_size = PAGE_SIZE;
+			}
+
+			if (!page) {
+				gfp_t gfp = sk->sk_allocation;
+				int order = cdev->send_page_order;
+
+				if (order) {
+					page = alloc_pages(gfp | __GFP_COMP |
+							   __GFP_NOWARN |
+							   __GFP_NORETRY,
+							   order);
+					if (page)
+						pg_size <<=
+							compound_order(page);
+				}
+				if (!page) {
+					page = alloc_page(gfp);
+					pg_size = PAGE_SIZE;
+				}
+				if (!page)
+					goto wait_for_memory;
+				off = 0;
+			}
+copy:
+			if (copy > pg_size - off)
+				copy = pg_size - off;
+			if (is_tls_tx(csk))
+				copy = min_t(int, copy, csk->tlshws.txleft);
+
+			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
+							     skb, page,
+							     off, copy);
+			if (unlikely(err)) {
+				if (!TCP_PAGE(sk)) {
+					TCP_PAGE(sk) = page;
+					TCP_OFF(sk) = 0;
+				}
+				goto do_fault;
+			}
+			/* Update the skb. */
+			if (merge) {
+				skb_shinfo(skb)->frags[i - 1].size += copy;
+			} else {
+				skb_fill_page_desc(skb, i, page, off, copy);
+				if (off + copy < pg_size) {
+					/* space left keep page */
+					get_page(page);
+					TCP_PAGE(sk) = page;
+				} else {
+					TCP_PAGE(sk) = NULL;
+				}
+			}
+			TCP_OFF(sk) = off + copy;
+		}
+		if (unlikely(skb->len == mss))
+			tx_skb_finalize(skb);
+		tp->write_seq += copy;
+		copied += copy;
+		size -= copy;
+
+		if (is_tls_tx(csk))
+			csk->tlshws.txleft -= copy;
+
+		if (corked(tp, flags) &&
+		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
+
+		if (size == 0)
+			goto out;
+
+		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
+			push_frames_if_head(sk);
+		continue;
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err)
+			goto do_error;
+	}
+out:
+	csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	if (copied)
+		chtls_tcp_push(sk, flags);
+done:
+	release_sock(sk);
+	return copied + hdrlen;
+do_fault:
+	if (!skb->len) {
+		__skb_unlink(skb, &csk->txq);
+		sk->sk_wmem_queued -= skb->truesize;
+		__kfree_skb(skb);
+	}
+do_error:
+	if (copied)
+		goto out;
+out_err:
+	if (csk_conn_inline(csk))
+		csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	copied = sk_stream_error(sk, flags, err);
+	goto done;
+}
+
+int chtls_sendpage(struct sock *sk, struct page *page,
+		   int offset, size_t size, int flags)
+{
+	struct chtls_sock *csk;
+	int mss, err, copied;
+	struct tcp_sock *tp;
+	long timeo;
+
+	tp = tcp_sk(sk);
+	copied = 0;
+	csk = rcu_dereference_sk_user_data(sk);
+	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+	err = sk_stream_wait_connect(sk, &timeo);
+	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+	    err != 0)
+		goto out_err;
+
+	mss = csk->mss;
+	csk_set_flag(csk, CSK_TX_MORE_DATA);
+
+	while (size > 0) {
+		struct sk_buff *skb = skb_peek_tail(&csk->txq);
+		int copy, i;
+
+		copy = mss - skb->len;
+		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
+		    copy <= 0) {
+new_buf:
+
+			if (is_tls_tx(csk)) {
+				skb = get_record_skb(sk,
+						     select_size(sk, size,
+								 flags,
+								 TX_TLSHDR_LEN),
+						     true);
+			} else {
+				skb = get_tx_skb(sk, 0);
+			}
+			if (!skb)
+				goto do_error;
+			copy = mss;
+		}
+		if (copy > size)
+			copy = size;
+
+		i = skb_shinfo(skb)->nr_frags;
+		if (skb_can_coalesce(skb, i, page, offset)) {
+			skb_shinfo(skb)->frags[i - 1].size += copy;
+		} else if (i < MAX_SKB_FRAGS) {
+			get_page(page);
+			skb_fill_page_desc(skb, i, page, offset, copy);
+		} else {
+			tx_skb_finalize(skb);
+			push_frames_if_head(sk);
+			goto new_buf;
+		}
+
+		skb->len += copy;
+		if (skb->len == mss)
+			tx_skb_finalize(skb);
+		skb->data_len += copy;
+		skb->truesize += copy;
+		sk->sk_wmem_queued += copy;
+		tp->write_seq += copy;
+		copied += copy;
+		offset += copy;
+		size -= copy;
+
+		if (corked(tp, flags) &&
+		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
+			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
+
+		if (!size)
+			break;
+
+		if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
+			push_frames_if_head(sk);
+		continue;
+
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+	}
+out:
+	csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	if (copied)
+		chtls_tcp_push(sk, flags);
+done:
+	release_sock(sk);
+	return copied;
+
+do_error:
+	if (copied)
+		goto out;
+
+out_err:
+	if (csk_conn_inline(csk))
+		csk_reset_flag(csk, CSK_TX_MORE_DATA);
+	copied = sk_stream_error(sk, flags, err);
+	goto done;
+}
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index 04b316f86ebd..e5e543a45542 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -547,6 +547,8 @@ static void __init chtls_init_ulp_ops(void)
 	chtls_cpl_prot.disconnect	= chtls_disconnect;
 	chtls_cpl_prot.destroy		= chtls_destroy_sock;
 	chtls_cpl_prot.shutdown		= chtls_shutdown;
+	chtls_cpl_prot.sendmsg		= chtls_sendmsg;
+	chtls_cpl_prot.sendpage		= chtls_sendpage;
 	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
 	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
 }

From b647993fca1460937bd05f07c87a8234054a68f7 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:42:01 +0530
Subject: [PATCH 1627/1678] crypto: chtls - Inline TLS record Rx

handler for record receive. plain text copied to user
buffer

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: Michael Werner <werner@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_io.c   | 602 +++++++++++++++++++++-
 drivers/crypto/chelsio/chtls/chtls_main.c |   1 +
 2 files changed, 602 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index 6974d3ed0ff5..5a75be43950f 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -30,6 +30,11 @@ static bool is_tls_tx(struct chtls_sock *csk)
 	return csk->tlshws.txkey >= 0;
 }
 
+static bool is_tls_rx(struct chtls_sock *csk)
+{
+	return csk->tlshws.rxkey >= 0;
+}
+
 static int data_sgl_len(const struct sk_buff *skb)
 {
 	unsigned int cnt;
@@ -106,10 +111,12 @@ static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
 {
 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	int flowclen16 = flowclen / 16;
 	struct sk_buff *skb;
+	int flowclen16;
 	int ret;
 
+	flowclen16 = flowclen / 16;
+
 	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
 		skb = create_flowc_wr_skb(sk, flowc, flowclen);
 		if (!skb)
@@ -1220,3 +1227,596 @@ out_err:
 	copied = sk_stream_error(sk, flags, err);
 	goto done;
 }
+
+static void chtls_select_window(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	unsigned int wnd = tp->rcv_wnd;
+
+	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
+	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
+
+	if (wnd > MAX_RCV_WND)
+		wnd = MAX_RCV_WND;
+
+/*
+ * Check if we need to grow the receive window in response to an increase in
+ * the socket's receive buffer size.  Some applications increase the buffer
+ * size dynamically and rely on the window to grow accordingly.
+ */
+
+	if (wnd > tp->rcv_wnd) {
+		tp->rcv_wup -= wnd - tp->rcv_wnd;
+		tp->rcv_wnd = wnd;
+		/* Mark the receive window as updated */
+		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
+	}
+}
+
+/*
+ * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
+ * to return without sending the message in case we cannot allocate
+ * an sk_buff.  Returns the number of credits sent.
+ */
+static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
+{
+	struct cpl_rx_data_ack *req;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
+	if (!skb)
+		return 0;
+	__skb_put(skb, sizeof(*req));
+	req = (struct cpl_rx_data_ack *)skb->head;
+
+	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
+	INIT_TP_WR(req, csk->tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
+						    csk->tid));
+	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
+				       RX_FORCE_ACK_F);
+	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
+	return credits;
+}
+
+#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
+			     TCPF_FIN_WAIT1 | \
+			     TCPF_FIN_WAIT2)
+
+/*
+ * Called after some received data has been read.  It returns RX credits
+ * to the HW for the amount of data processed.
+ */
+static void chtls_cleanup_rbuf(struct sock *sk, int copied)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct tcp_sock *tp;
+	int must_send;
+	u32 credits;
+	u32 thres;
+
+	thres = 15 * 1024;
+
+	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
+		return;
+
+	chtls_select_window(sk);
+	tp = tcp_sk(sk);
+	credits = tp->copied_seq - tp->rcv_wup;
+	if (unlikely(!credits))
+		return;
+
+/*
+ * For coalescing to work effectively ensure the receive window has
+ * at least 16KB left.
+ */
+	must_send = credits + 16384 >= tp->rcv_wnd;
+
+	if (must_send || credits >= thres)
+		tp->rcv_wup += send_rx_credits(csk, credits);
+}
+
+static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			    int nonblock, int flags, int *addr_len)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_hws *hws = &csk->tlshws;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct adapter *adap;
+	unsigned long avail;
+	int buffers_freed;
+	int copied = 0;
+	int request;
+	int target;
+	long timeo;
+
+	adap = netdev2adap(dev);
+	buffers_freed = 0;
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	request = len;
+
+	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
+		chtls_cleanup_rbuf(sk, copied);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset = 0;
+
+		if (unlikely(tp->urg_data &&
+			     tp->urg_seq == tp->copied_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+					-EAGAIN;
+				break;
+			}
+		}
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			goto found_ok_skb;
+		if (csk->wr_credits &&
+		    skb_queue_len(&csk->txq) &&
+		    chtls_push_frames(csk, csk->wr_credits ==
+				      csk->wr_max_credits))
+			sk->sk_write_space(sk);
+
+		if (copied >= target && !sk->sk_backlog.tail)
+			break;
+
+		if (copied) {
+			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    signal_pending(current))
+				break;
+
+			if (!timeo)
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				copied = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+		if (sk->sk_backlog.tail) {
+			release_sock(sk);
+			lock_sock(sk);
+			chtls_cleanup_rbuf(sk, copied);
+			continue;
+		}
+
+		if (copied >= target)
+			break;
+		chtls_cleanup_rbuf(sk, copied);
+		sk_wait_data(sk, &timeo, NULL);
+			continue;
+found_ok_skb:
+		if (!skb->len) {
+			skb_dst_set(skb, NULL);
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			kfree_skb(skb);
+
+			if (!copied && !timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (copied < target) {
+				release_sock(sk);
+				lock_sock(sk);
+				continue;
+			}
+			break;
+		}
+		offset = hws->copied_seq;
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - tp->copied_seq;
+
+			if (urg_offset < avail) {
+				if (urg_offset) {
+					avail = urg_offset;
+				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
+					/* First byte is urgent, skip */
+					tp->copied_seq++;
+					offset++;
+					avail--;
+					if (!avail)
+						goto skip_copy;
+				}
+			}
+		}
+		if (hws->rstate == TLS_RCV_ST_READ_BODY) {
+			if (skb_copy_datagram_msg(skb, offset,
+						  msg, avail)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		} else {
+			struct tlsrx_cmp_hdr *tls_hdr_pkt =
+				(struct tlsrx_cmp_hdr *)skb->data;
+
+			if ((tls_hdr_pkt->res_to_mac_error &
+			    TLSRX_HDR_PKT_ERROR_M))
+				tls_hdr_pkt->type = 0x7F;
+
+			/* CMP pld len is for recv seq */
+			hws->rcvpld = skb->hdr_len;
+			if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		}
+		copied += avail;
+		len -= avail;
+		hws->copied_seq += avail;
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+			tp->urg_data = 0;
+
+		if (hws->rstate == TLS_RCV_ST_READ_BODY &&
+		    (avail + offset) >= skb->len) {
+			if (likely(skb))
+				chtls_free_skb(sk, skb);
+			buffers_freed++;
+			hws->rstate = TLS_RCV_ST_READ_HEADER;
+			atomic_inc(&adap->chcr_stats.tls_pdu_rx);
+			tp->copied_seq += hws->rcvpld;
+			hws->copied_seq = 0;
+			if (copied >= target &&
+			    !skb_peek(&sk->sk_receive_queue))
+				break;
+		} else {
+			if (likely(skb)) {
+				if (ULP_SKB_CB(skb)->flags &
+				    ULPCB_FLAG_TLS_ND)
+					hws->rstate =
+						TLS_RCV_ST_READ_HEADER;
+				else
+					hws->rstate =
+						TLS_RCV_ST_READ_BODY;
+				chtls_free_skb(sk, skb);
+			}
+			buffers_freed++;
+			tp->copied_seq += avail;
+			hws->copied_seq = 0;
+		}
+	} while (len > 0);
+
+	if (buffers_freed)
+		chtls_cleanup_rbuf(sk, copied);
+	release_sock(sk);
+	return copied;
+}
+
+/*
+ * Peek at data in a socket's receive buffer.
+ */
+static int peekmsg(struct sock *sk, struct msghdr *msg,
+		   size_t len, int nonblock, int flags)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 peek_seq, offset;
+	struct sk_buff *skb;
+	int copied = 0;
+	size_t avail;          /* amount of available data in current skb */
+	long timeo;
+
+	lock_sock(sk);
+	timeo = sock_rcvtimeo(sk, nonblock);
+	peek_seq = tp->copied_seq;
+
+	do {
+		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+				-EAGAIN;
+				break;
+			}
+		}
+
+		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			offset = peek_seq - ULP_SKB_CB(skb)->seq;
+			if (offset < skb->len)
+				goto found_ok_skb;
+		}
+
+		/* empty receive queue */
+		if (copied)
+			break;
+		if (sock_flag(sk, SOCK_DONE))
+			break;
+		if (sk->sk_err) {
+			copied = sock_error(sk);
+			break;
+		}
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			break;
+		if (sk->sk_state == TCP_CLOSE) {
+			copied = -ENOTCONN;
+			break;
+		}
+		if (!timeo) {
+			copied = -EAGAIN;
+			break;
+		}
+		if (signal_pending(current)) {
+			copied = sock_intr_errno(timeo);
+			break;
+		}
+
+		if (sk->sk_backlog.tail) {
+			/* Do not sleep, just process backlog. */
+			release_sock(sk);
+			lock_sock(sk);
+		} else {
+			sk_wait_data(sk, &timeo, NULL);
+		}
+
+		if (unlikely(peek_seq != tp->copied_seq)) {
+			if (net_ratelimit())
+				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
+					current->comm, current->pid);
+			peek_seq = tp->copied_seq;
+		}
+		continue;
+
+found_ok_skb:
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+		/*
+		 * Do we have urgent data here?  We need to skip over the
+		 * urgent byte.
+		 */
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - peek_seq;
+
+			if (urg_offset < avail) {
+				/*
+				 * The amount of data we are preparing to copy
+				 * contains urgent data.
+				 */
+				if (!urg_offset) { /* First byte is urgent */
+					if (!sock_flag(sk, SOCK_URGINLINE)) {
+						peek_seq++;
+						offset++;
+						avail--;
+					}
+					if (!avail)
+						continue;
+				} else {
+					/* stop short of the urgent data */
+					avail = urg_offset;
+				}
+			}
+		}
+
+		/*
+		 * If MSG_TRUNC is specified the data is discarded.
+		 */
+		if (likely(!(flags & MSG_TRUNC)))
+			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		peek_seq += avail;
+		copied += avail;
+		len -= avail;
+	} while (len > 0);
+
+	release_sock(sk);
+	return copied;
+}
+
+int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		  int nonblock, int flags, int *addr_len)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct chtls_sock *csk;
+	struct chtls_hws *hws;
+	unsigned long avail;    /* amount of available data in current skb */
+	int buffers_freed;
+	int copied = 0;
+	int request;
+	long timeo;
+	int target;             /* Read at least this many bytes */
+
+	buffers_freed = 0;
+
+	if (unlikely(flags & MSG_OOB))
+		return tcp_prot.recvmsg(sk, msg, len, nonblock, flags,
+					addr_len);
+
+	if (unlikely(flags & MSG_PEEK))
+		return peekmsg(sk, msg, len, nonblock, flags);
+
+	if (sk_can_busy_loop(sk) &&
+	    skb_queue_empty(&sk->sk_receive_queue) &&
+	    sk->sk_state == TCP_ESTABLISHED)
+		sk_busy_loop(sk, nonblock);
+
+	lock_sock(sk);
+	csk = rcu_dereference_sk_user_data(sk);
+	hws = &csk->tlshws;
+
+	if (is_tls_rx(csk))
+		return chtls_pt_recvmsg(sk, msg, len, nonblock,
+					flags, addr_len);
+
+	timeo = sock_rcvtimeo(sk, nonblock);
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	request = len;
+
+	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
+		chtls_cleanup_rbuf(sk, copied);
+
+	do {
+		struct sk_buff *skb;
+		u32 offset;
+
+		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
+			if (copied)
+				break;
+			if (signal_pending(current)) {
+				copied = timeo ? sock_intr_errno(timeo) :
+					-EAGAIN;
+				break;
+			}
+		}
+
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb)
+			goto found_ok_skb;
+
+		if (csk->wr_credits &&
+		    skb_queue_len(&csk->txq) &&
+		    chtls_push_frames(csk, csk->wr_credits ==
+				      csk->wr_max_credits))
+			sk->sk_write_space(sk);
+
+		if (copied >= target && !sk->sk_backlog.tail)
+			break;
+
+		if (copied) {
+			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
+			    signal_pending(current))
+				break;
+		} else {
+			if (sock_flag(sk, SOCK_DONE))
+				break;
+			if (sk->sk_err) {
+				copied = sock_error(sk);
+				break;
+			}
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				break;
+			if (sk->sk_state == TCP_CLOSE) {
+				copied = -ENOTCONN;
+				break;
+			}
+			if (!timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				copied = sock_intr_errno(timeo);
+				break;
+			}
+		}
+
+		if (sk->sk_backlog.tail) {
+			release_sock(sk);
+			lock_sock(sk);
+			chtls_cleanup_rbuf(sk, copied);
+			continue;
+		}
+
+		if (copied >= target)
+			break;
+		chtls_cleanup_rbuf(sk, copied);
+		sk_wait_data(sk, &timeo, NULL);
+		continue;
+
+found_ok_skb:
+		if (!skb->len) {
+			chtls_kfree_skb(sk, skb);
+			if (!copied && !timeo) {
+				copied = -EAGAIN;
+				break;
+			}
+
+			if (copied < target)
+				continue;
+
+			break;
+		}
+
+		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
+		avail = skb->len - offset;
+		if (len < avail)
+			avail = len;
+
+		if (unlikely(tp->urg_data)) {
+			u32 urg_offset = tp->urg_seq - tp->copied_seq;
+
+			if (urg_offset < avail) {
+				if (urg_offset) {
+					avail = urg_offset;
+				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
+					tp->copied_seq++;
+					offset++;
+					avail--;
+					if (!avail)
+						goto skip_copy;
+				}
+			}
+		}
+
+		if (likely(!(flags & MSG_TRUNC))) {
+			if (skb_copy_datagram_msg(skb, offset,
+						  msg, avail)) {
+				if (!copied) {
+					copied = -EFAULT;
+					break;
+				}
+			}
+		}
+
+		tp->copied_seq += avail;
+		copied += avail;
+		len -= avail;
+
+skip_copy:
+		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
+			tp->urg_data = 0;
+
+		if (avail + offset >= skb->len) {
+			if (likely(skb))
+				chtls_free_skb(sk, skb);
+			buffers_freed++;
+
+			if  (copied >= target &&
+			     !skb_peek(&sk->sk_receive_queue))
+				break;
+		}
+	} while (len > 0);
+
+	if (buffers_freed)
+		chtls_cleanup_rbuf(sk, copied);
+
+	release_sock(sk);
+	return copied;
+}
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index e5e543a45542..007c45c38fc7 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -549,6 +549,7 @@ static void __init chtls_init_ulp_ops(void)
 	chtls_cpl_prot.shutdown		= chtls_shutdown;
 	chtls_cpl_prot.sendmsg		= chtls_sendmsg;
 	chtls_cpl_prot.sendpage		= chtls_sendpage;
+	chtls_cpl_prot.recvmsg		= chtls_recvmsg;
 	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
 	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
 }

From d25f2f71f653b78e8bd055cfe8fd70ef8f514f2b Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:42:02 +0530
Subject: [PATCH 1628/1678] crypto: chtls - Program the TLS session Key

Initialize the space reserved for storing the TLS keys,
get and free the location where key is stored for the TLS
connection.
Program the Tx and Rx key as received from user in
struct tls12_crypto_info_aes_gcm_128 and understood by hardware.
added socket option TLS_RX

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_hw.c | 412 ++++++++++++++++++++++++
 1 file changed, 412 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/chtls_hw.c

diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c
new file mode 100644
index 000000000000..54a13aa99121
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/chtls_hw.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2018 Chelsio Communications, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/tls.h>
+#include <net/tls.h>
+
+#include "chtls.h"
+#include "chtls_cm.h"
+
+static void __set_tcb_field_direct(struct chtls_sock *csk,
+				   struct cpl_set_tcb_field *req, u16 word,
+				   u64 mask, u64 val, u8 cookie, int no_reply)
+{
+	struct ulptx_idata *sc;
+
+	INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, csk->tid);
+	req->wr.wr_mid |= htonl(FW_WR_FLOWID_V(csk->tid));
+	req->reply_ctrl = htons(NO_REPLY_V(no_reply) |
+				QUEUENO_V(csk->rss_qid));
+	req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie));
+	req->mask = cpu_to_be64(mask);
+	req->val = cpu_to_be64(val);
+	sc = (struct ulptx_idata *)(req + 1);
+	sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
+	sc->len = htonl(0);
+}
+
+static void __set_tcb_field(struct sock *sk, struct sk_buff *skb, u16 word,
+			    u64 mask, u64 val, u8 cookie, int no_reply)
+{
+	struct cpl_set_tcb_field *req;
+	struct chtls_sock *csk;
+	struct ulptx_idata *sc;
+	unsigned int wrlen;
+
+	wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen);
+	__set_tcb_field_direct(csk, req, word, mask, val, cookie, no_reply);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id);
+}
+
+/*
+ * Send control message to HW, message go as immediate data and packet
+ * is freed immediately.
+ */
+static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val)
+{
+	struct cpl_set_tcb_field *req;
+	unsigned int credits_needed;
+	struct chtls_sock *csk;
+	struct ulptx_idata *sc;
+	struct sk_buff *skb;
+	unsigned int wrlen;
+	int ret;
+
+	wrlen = roundup(sizeof(*req) + sizeof(*sc), 16);
+
+	skb = alloc_skb(wrlen, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	credits_needed = DIV_ROUND_UP(wrlen, 16);
+	csk = rcu_dereference_sk_user_data(sk);
+
+	__set_tcb_field(sk, skb, word, mask, val, 0, 1);
+	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
+	csk->wr_credits -= credits_needed;
+	csk->wr_unacked += credits_needed;
+	enqueue_wr(csk, skb);
+	ret = cxgb4_ofld_send(csk->egress_dev, skb);
+	if (ret < 0)
+		kfree_skb(skb);
+	return ret < 0 ? ret : 0;
+}
+
+/*
+ * Set one of the t_flags bits in the TCB.
+ */
+int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val)
+{
+	return chtls_set_tcb_field(sk, 1, 1ULL << bit_pos,
+				   val << bit_pos);
+}
+
+static int chtls_set_tcb_keyid(struct sock *sk, int keyid)
+{
+	return chtls_set_tcb_field(sk, 31, 0xFFFFFFFFULL, keyid);
+}
+
+static int chtls_set_tcb_seqno(struct sock *sk)
+{
+	return chtls_set_tcb_field(sk, 28, ~0ULL, 0);
+}
+
+static int chtls_set_tcb_quiesce(struct sock *sk, int val)
+{
+	return chtls_set_tcb_field(sk, 1, (1ULL << TF_RX_QUIESCE_S),
+				   TF_RX_QUIESCE_V(val));
+}
+
+/* TLS Key bitmap processing */
+int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi)
+{
+	unsigned int num_key_ctx, bsize;
+	int ksize;
+
+	num_key_ctx = (lldi->vr->key.size / TLS_KEY_CONTEXT_SZ);
+	bsize = BITS_TO_LONGS(num_key_ctx);
+
+	cdev->kmap.size = num_key_ctx;
+	cdev->kmap.available = bsize;
+	ksize = sizeof(*cdev->kmap.addr) * bsize;
+	cdev->kmap.addr = kvzalloc(ksize, GFP_KERNEL);
+	if (!cdev->kmap.addr)
+		return -ENOMEM;
+
+	cdev->kmap.start = lldi->vr->key.start;
+	spin_lock_init(&cdev->kmap.lock);
+	return 0;
+}
+
+static int get_new_keyid(struct chtls_sock *csk, u32 optname)
+{
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_dev *cdev = csk->cdev;
+	struct chtls_hws *hws;
+	struct adapter *adap;
+	int keyid;
+
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+
+	spin_lock_bh(&cdev->kmap.lock);
+	keyid = find_first_zero_bit(cdev->kmap.addr, cdev->kmap.size);
+	if (keyid < cdev->kmap.size) {
+		__set_bit(keyid, cdev->kmap.addr);
+		if (optname == TLS_RX)
+			hws->rxkey = keyid;
+		else
+			hws->txkey = keyid;
+		atomic_inc(&adap->chcr_stats.tls_key);
+	} else {
+		keyid = -1;
+	}
+	spin_unlock_bh(&cdev->kmap.lock);
+	return keyid;
+}
+
+void free_tls_keyid(struct sock *sk)
+{
+	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
+	struct net_device *dev = csk->egress_dev;
+	struct chtls_dev *cdev = csk->cdev;
+	struct chtls_hws *hws;
+	struct adapter *adap;
+
+	if (!cdev->kmap.addr)
+		return;
+
+	adap = netdev2adap(dev);
+	hws = &csk->tlshws;
+
+	spin_lock_bh(&cdev->kmap.lock);
+	if (hws->rxkey >= 0) {
+		__clear_bit(hws->rxkey, cdev->kmap.addr);
+		atomic_dec(&adap->chcr_stats.tls_key);
+		hws->rxkey = -1;
+	}
+	if (hws->txkey >= 0) {
+		__clear_bit(hws->txkey, cdev->kmap.addr);
+		atomic_dec(&adap->chcr_stats.tls_key);
+		hws->txkey = -1;
+	}
+	spin_unlock_bh(&cdev->kmap.lock);
+}
+
+unsigned int keyid_to_addr(int start_addr, int keyid)
+{
+	return (start_addr + (keyid * TLS_KEY_CONTEXT_SZ)) >> 5;
+}
+
+static void chtls_rxkey_ivauth(struct _key_ctx *kctx)
+{
+	kctx->iv_to_auth = cpu_to_be64(KEYCTX_TX_WR_IV_V(6ULL) |
+				  KEYCTX_TX_WR_AAD_V(1ULL) |
+				  KEYCTX_TX_WR_AADST_V(5ULL) |
+				  KEYCTX_TX_WR_CIPHER_V(14ULL) |
+				  KEYCTX_TX_WR_CIPHERST_V(0ULL) |
+				  KEYCTX_TX_WR_AUTH_V(14ULL) |
+				  KEYCTX_TX_WR_AUTHST_V(16ULL) |
+				  KEYCTX_TX_WR_AUTHIN_V(16ULL));
+}
+
+static int chtls_key_info(struct chtls_sock *csk,
+			  struct _key_ctx *kctx,
+			  u32 keylen, u32 optname)
+{
+	unsigned char key[CHCR_KEYCTX_CIPHER_KEY_SIZE_256];
+	struct tls12_crypto_info_aes_gcm_128 *gcm_ctx;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	struct crypto_cipher *cipher;
+	int ck_size, key_ctx_size;
+	int ret;
+
+	gcm_ctx = (struct tls12_crypto_info_aes_gcm_128 *)
+		  &csk->tlshws.crypto_info;
+
+	key_ctx_size = sizeof(struct _key_ctx) +
+		       roundup(keylen, 16) + AEAD_H_SIZE;
+
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		return -EINVAL;
+	}
+	memcpy(key, gcm_ctx->key, keylen);
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	cipher = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(cipher)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret)
+		goto out1;
+
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h);
+	csk->tlshws.keylen = key_ctx_size;
+
+	/* Copy the Key context */
+	if (optname == TLS_RX) {
+		int key_ctx;
+
+		key_ctx = ((key_ctx_size >> 4) << 3);
+		kctx->ctx_hdr = FILL_KEY_CRX_HDR(ck_size,
+						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 0, 0, key_ctx);
+		chtls_rxkey_ivauth(kctx);
+	} else {
+		kctx->ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 0, 0, key_ctx_size >> 4);
+	}
+
+	memcpy(kctx->salt, gcm_ctx->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	memcpy(kctx->key, gcm_ctx->key, keylen);
+	memcpy(kctx->key + keylen, ghash_h, AEAD_H_SIZE);
+	/* erase key info from driver */
+	memset(gcm_ctx->key, 0, keylen);
+
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+static void chtls_set_scmd(struct chtls_sock *csk)
+{
+	struct chtls_hws *hws = &csk->tlshws;
+
+	hws->scmd.seqno_numivs =
+		SCMD_SEQ_NO_CTRL_V(3) |
+		SCMD_PROTO_VERSION_V(0) |
+		SCMD_ENC_DEC_CTRL_V(0) |
+		SCMD_CIPH_AUTH_SEQ_CTRL_V(1) |
+		SCMD_CIPH_MODE_V(2) |
+		SCMD_AUTH_MODE_V(4) |
+		SCMD_HMAC_CTRL_V(0) |
+		SCMD_IV_SIZE_V(4) |
+		SCMD_NUM_IVS_V(1);
+
+	hws->scmd.ivgen_hdrlen =
+		SCMD_IV_GEN_CTRL_V(1) |
+		SCMD_KEY_CTX_INLINE_V(0) |
+		SCMD_TLS_FRAG_ENABLE_V(1);
+}
+
+int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname)
+{
+	struct tls_key_req *kwr;
+	struct chtls_dev *cdev;
+	struct _key_ctx *kctx;
+	int wrlen, klen, len;
+	struct sk_buff *skb;
+	struct sock *sk;
+	int keyid;
+	int kaddr;
+	int ret;
+
+	cdev = csk->cdev;
+	sk = csk->sk;
+
+	klen = roundup((keylen + AEAD_H_SIZE) + sizeof(*kctx), 32);
+	wrlen = roundup(sizeof(*kwr), 16);
+	len = klen + wrlen;
+
+	/* Flush out-standing data before new key takes effect */
+	if (optname == TLS_TX) {
+		lock_sock(sk);
+		if (skb_queue_len(&csk->txq))
+			chtls_push_frames(csk, 0);
+		release_sock(sk);
+	}
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	keyid = get_new_keyid(csk, optname);
+	if (keyid < 0) {
+		ret = -ENOSPC;
+		goto out_nokey;
+	}
+
+	kaddr = keyid_to_addr(cdev->kmap.start, keyid);
+	kwr = (struct tls_key_req *)__skb_put_zero(skb, len);
+	kwr->wr.op_to_compl =
+		cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F |
+		      FW_WR_ATOMIC_V(1U));
+	kwr->wr.flowid_len16 =
+		cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16) |
+			    FW_WR_FLOWID_V(csk->tid)));
+	kwr->wr.protocol = 0;
+	kwr->wr.mfs = htons(TLS_MFS);
+	kwr->wr.reneg_to_write_rx = optname;
+
+	/* ulptx command */
+	kwr->req.cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
+			    T5_ULP_MEMIO_ORDER_V(1) |
+			    T5_ULP_MEMIO_IMM_V(1));
+	kwr->req.len16 = cpu_to_be32((csk->tid << 8) |
+			      DIV_ROUND_UP(len - sizeof(kwr->wr), 16));
+	kwr->req.dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN_V(klen >> 5));
+	kwr->req.lock_addr = cpu_to_be32(ULP_MEMIO_ADDR_V(kaddr));
+
+	/* sub command */
+	kwr->sc_imm.cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM));
+	kwr->sc_imm.len = cpu_to_be32(klen);
+
+	/* key info */
+	kctx = (struct _key_ctx *)(kwr + 1);
+	ret = chtls_key_info(csk, kctx, keylen, optname);
+	if (ret)
+		goto out_notcb;
+
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->tlshws.txqid);
+	csk->wr_credits -= DIV_ROUND_UP(len, 16);
+	csk->wr_unacked += DIV_ROUND_UP(len, 16);
+	enqueue_wr(csk, skb);
+	cxgb4_ofld_send(csk->egress_dev, skb);
+
+	chtls_set_scmd(csk);
+	/* Clear quiesce for Rx key */
+	if (optname == TLS_RX) {
+		ret = chtls_set_tcb_keyid(sk, keyid);
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_field(sk, 0,
+					  TCB_ULP_RAW_V(TCB_ULP_RAW_M),
+					  TCB_ULP_RAW_V((TF_TLS_KEY_SIZE_V(1) |
+					  TF_TLS_CONTROL_V(1) |
+					  TF_TLS_ACTIVE_V(1) |
+					  TF_TLS_ENABLE_V(1))));
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_seqno(sk);
+		if (ret)
+			goto out_notcb;
+		ret = chtls_set_tcb_quiesce(sk, 0);
+		if (ret)
+			goto out_notcb;
+		csk->tlshws.rxkey = keyid;
+	} else {
+		csk->tlshws.tx_seq_no = 0;
+		csk->tlshws.txkey = keyid;
+	}
+
+	return ret;
+out_notcb:
+	free_tls_keyid(sk);
+out_nokey:
+	kfree_skb(skb);
+	return ret;
+}

From bd7f48576660022b0779c5c7bf384ec1e8ca83c8 Mon Sep 17 00:00:00 2001
From: Atul Gupta <atul.gupta@chelsio.com>
Date: Sat, 31 Mar 2018 21:42:03 +0530
Subject: [PATCH 1629/1678] crypto: chtls - Makefile Kconfig

Entry for Inline TLS as another driver dependent on cxgb4 and chcr

Signed-off-by: Atul Gupta <atul.gupta@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/Kconfig        | 11 +++++++++++
 drivers/crypto/chelsio/Makefile       |  1 +
 drivers/crypto/chelsio/chtls/Makefile |  4 ++++
 3 files changed, 16 insertions(+)
 create mode 100644 drivers/crypto/chelsio/chtls/Makefile

diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 5ae9f8706f17..930d82d991f2 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -29,3 +29,14 @@ config CHELSIO_IPSEC_INLINE
         default n
         ---help---
           Enable support for IPSec Tx Inline.
+
+config CRYPTO_DEV_CHELSIO_TLS
+        tristate "Chelsio Crypto Inline TLS Driver"
+        depends on CHELSIO_T4
+        depends on TLS
+        select CRYPTO_DEV_CHELSIO
+        ---help---
+          Support Chelsio Inline TLS with Chelsio crypto accelerator.
+
+          To compile this driver as a module, choose M here: the module
+          will be called chtls.
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
index eaecaf1ebcf3..639e5718dff4 100644
--- a/drivers/crypto/chelsio/Makefile
+++ b/drivers/crypto/chelsio/Makefile
@@ -3,3 +3,4 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o
 chcr-objs :=  chcr_core.o chcr_algo.o
 chcr-$(CONFIG_CHELSIO_IPSEC_INLINE) += chcr_ipsec.o
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls/
diff --git a/drivers/crypto/chelsio/chtls/Makefile b/drivers/crypto/chelsio/chtls/Makefile
new file mode 100644
index 000000000000..df1379570a8e
--- /dev/null
+++ b/drivers/crypto/chelsio/chtls/Makefile
@@ -0,0 +1,4 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 -Idrivers/crypto/chelsio/
+
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO_TLS) += chtls.o
+chtls-objs := chtls_main.o chtls_cm.o chtls_io.o chtls_hw.o

From 8c59c264e5e17670c0ad2063fa40e3091b549151 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Mon, 26 Feb 2018 12:11:07 +0530
Subject: [PATCH 1630/1678] Bluetooth: Fix data type of appearence

It should be __le16 instead of __u16 since its part of
mgmt API.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 72a456bbbcd5..e7303eee65cd 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -600,7 +600,7 @@ struct mgmt_rp_read_ext_info {
 
 #define MGMT_OP_SET_APPEARANCE		0x0043
 struct mgmt_cp_set_appearance {
-	__u16	appearance;
+	__le16	appearance;
 } __packed;
 #define MGMT_SET_APPEARANCE_SIZE	2
 

From fb2d466be9b336e9c801978e6a75d06059ac124a Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Mon, 5 Mar 2018 19:02:35 +0100
Subject: [PATCH 1631/1678] Bluetooth: hci_bcm: use gpiod cansleep version

Some GPIO controller drivers request sleepable context and so can't
be accessed from IRQ context. Using gpiod_set/get_value accessors
with such controller leads to a kernel warning since they are
reserved for atomic context (according to the documentation).

Use the postfixed _cansleep version instead, indicating that context
is safe for sleeping if necessary. Note that this is the case here
since we never toggle the gpio neither from IRQ nor from a spinlocked
section.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 40b9fb247010..467e2f5cb7e3 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -908,13 +908,13 @@ static inline int bcm_apple_get_resources(struct bcm_device *dev)
 
 static int bcm_gpio_set_device_wakeup(struct bcm_device *dev, bool awake)
 {
-	gpiod_set_value(dev->device_wakeup, awake);
+	gpiod_set_value_cansleep(dev->device_wakeup, awake);
 	return 0;
 }
 
 static int bcm_gpio_set_shutdown(struct bcm_device *dev, bool powered)
 {
-	gpiod_set_value(dev->shutdown, powered);
+	gpiod_set_value_cansleep(dev->shutdown, powered);
 	return 0;
 }
 

From f3863f1d7a579a8d8d7741a777ef863674a4d0c8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 7 Mar 2018 22:39:03 +0100
Subject: [PATCH 1632/1678] Bluetooth: hci_bcm: Use default baud rate if
 missing shutdown GPIO

In case the shutdown GPIO is not wired up, it is impossible to reset the
Bluetooth controller to its original state. This include the initial
default baud rate which leads to issues when reloading the module or
when something unexpected happens. To avoid any kind of runtime
deadlocks, stick with the initial default baud rate.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/hci_bcm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 467e2f5cb7e3..ff7535e85dea 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -1146,6 +1146,12 @@ static int bcm_serdev_probe(struct serdev_device *serdev)
 	if (err)
 		return err;
 
+	if (!bcmdev->shutdown) {
+		dev_warn(&serdev->dev,
+			 "No reset resource, using default baud rate\n");
+		bcmdev->oper_speed = bcmdev->init_speed;
+	}
+
 	err = bcm_gpio_set_power(bcmdev, false);
 	if (err)
 		dev_err(&serdev->dev, "Failed to power down\n");

From a5c2f4c1c5292362c8f78e63f82c527cea955bbd Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Mon, 12 Mar 2018 10:20:04 +0100
Subject: [PATCH 1633/1678] Bluetooth: btmrvl: Delete an unnecessary variable
 initialisation in btmrvl_sdio_register_dev()

The local variable "ret" will be set to an appropriate value a bit later.
Thus omit the explicit initialisation at the beginning.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmrvl_sdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 7dbb4463b539..cd27d2f48bf2 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -920,7 +920,7 @@ static int btmrvl_sdio_register_dev(struct btmrvl_sdio_card *card)
 {
 	struct sdio_func *func;
 	u8 reg;
-	int ret = 0;
+	int ret;
 
 	if (!card || !card->func) {
 		BT_ERR("Error: card or function is NULL!");

From 9376e4a5a318e1dbf1c53a31cfbe919e0ba926b0 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Mon, 12 Mar 2018 11:15:59 +0100
Subject: [PATCH 1634/1678] Bluetooth: btmrvl: Delete an unnecessary variable
 initialisation in btmrvl_sdio_card_to_host()

The variable "payload" will eventually be set to an appropriate pointer
a bit later. Thus omit the explicit initialisation at the beginning.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmrvl_sdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index cd27d2f48bf2..6f99b9f3d57f 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -689,7 +689,7 @@ static int btmrvl_sdio_card_to_host(struct btmrvl_private *priv)
 	int ret, num_blocks, blksz;
 	struct sk_buff *skb = NULL;
 	u32 type;
-	u8 *payload = NULL;
+	u8 *payload;
 	struct hci_dev *hdev = priv->btmrvl_dev.hcidev;
 	struct btmrvl_sdio_card *card = priv->btmrvl_dev.card;
 

From 9bef22fb00cbfc6400c52c265c6c5ebcb6bfc1d1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 20 Mar 2018 09:31:04 +0100
Subject: [PATCH 1635/1678] Bluetooth: hci_ll: Use skb_put_u8 instead of struct
 hcill_cmd

The struct hcill_cmd to create an skb with a single u8 is pointless. So
just use skb_put_u8 instead.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/hci_ll.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 2f30dcad96bd..7c55a9f77808 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -82,10 +82,6 @@ enum hcill_states_e {
 	HCILL_AWAKE_TO_ASLEEP
 };
 
-struct hcill_cmd {
-	u8 cmd;
-} __packed;
-
 struct ll_device {
 	struct hci_uart hu;
 	struct serdev_device *serdev;
@@ -113,7 +109,6 @@ static int send_hcill_cmd(u8 cmd, struct hci_uart *hu)
 	int err = 0;
 	struct sk_buff *skb = NULL;
 	struct ll_struct *ll = hu->priv;
-	struct hcill_cmd *hcill_packet;
 
 	BT_DBG("hu %p cmd 0x%x", hu, cmd);
 
@@ -126,8 +121,7 @@ static int send_hcill_cmd(u8 cmd, struct hci_uart *hu)
 	}
 
 	/* prepare packet */
-	hcill_packet = skb_put(skb, 1);
-	hcill_packet->cmd = cmd;
+	skb_put_u8(skb, cmd);
 
 	/* send packet */
 	skb_queue_tail(&ll->txq, skb);

From a41e0796396eeceff673af4a38feaee149c6ff86 Mon Sep 17 00:00:00 2001
From: Vicente Bergas <vicencb@gmail.com>
Date: Tue, 20 Mar 2018 19:41:10 +0100
Subject: [PATCH 1636/1678] Bluetooth: btusb: Add USB ID 7392:a611 for Edimax
 EW-7611ULB

This WiFi/Bluetooth USB dongle uses a Realtek chipset, so, use btrtl for it.

Product information:
https://wikidevi.com/wiki/Edimax_EW-7611ULB

From /sys/kernel/debug/usb/devices
T:  Bus=02 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#=  3 Spd=480  MxCh= 0
D:  Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=7392 ProdID=a611 Rev= 2.00
S:  Manufacturer=Realtek
S:  Product=Edimax Wi-Fi N150 Bluetooth4.0 USB Adapter
S:  SerialNumber=00e04c000001
C:* #Ifs= 3 Cfg#= 1 Atr=e0 MxPwr=500mA
A:  FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=01
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
I:* If#= 2 Alt= 0 #EPs= 6 Cls=ff(vend.) Sub=ff Prot=ff Driver=rtl8723bu
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=87(I) Atr=03(Int.) MxPS=  64 Ivl=500us
E:  Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=09(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

Tested-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Vicente Bergas <vicencb@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 5cd868ea28ed..c701443de3e7 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -368,6 +368,9 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3459), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3494), .driver_info = BTUSB_REALTEK },
 
+	/* Additional Realtek 8723BU Bluetooth devices */
+	{ USB_DEVICE(0x7392, 0xa611), .driver_info = BTUSB_REALTEK },
+
 	/* Additional Realtek 8821AE Bluetooth devices */
 	{ USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3414), .driver_info = BTUSB_REALTEK },

From e09070c51b280567695022237e57c428e548b355 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Mar 2018 21:28:07 +0100
Subject: [PATCH 1637/1678] Bluetooth: hci_bcm: Add irq_polarity module option

Add irq_polarity module option for easier troubleshooting of irq-polarity
issues.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index ff7535e85dea..50c8523f8653 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -126,6 +126,10 @@ struct bcm_data {
 static DEFINE_MUTEX(bcm_device_lock);
 static LIST_HEAD(bcm_device_list);
 
+static int irq_polarity = -1;
+module_param(irq_polarity, int, 0444);
+MODULE_PARM_DESC(irq_polarity, "IRQ polarity 0: active-high 1: active-low");
+
 static inline void host_set_baudrate(struct hci_uart *hu, unsigned int speed)
 {
 	if (hu->serdev)
@@ -989,11 +993,17 @@ static int bcm_acpi_probe(struct bcm_device *dev)
 	}
 	acpi_dev_free_resource_list(&resources);
 
-	dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table);
-	if (dmi_id) {
-		dev_warn(dev->dev, "%s: Overwriting IRQ polarity to active low",
-			    dmi_id->ident);
-		dev->irq_active_low = true;
+	if (irq_polarity != -1) {
+		dev->irq_active_low = irq_polarity;
+		dev_warn(dev->dev, "Overwriting IRQ polarity to active %s by module-param\n",
+			 dev->irq_active_low ? "low" : "high");
+	} else {
+		dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table);
+		if (dmi_id) {
+			dev_warn(dev->dev, "%s: Overwriting IRQ polarity to active low",
+				 dmi_id->ident);
+			dev->irq_active_low = true;
+		}
 	}
 
 	return 0;

From bb5208b314c5127b716b2ee4f55803a8bb73b750 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Mar 2018 21:28:08 +0100
Subject: [PATCH 1638/1678] Bluetooth: hci_bcm: Treat Interrupt ACPI resources
 as always being active-low

Older devices with a serdev attached bcm bt hci, use an Interrupt ACPI
resource to describe the IRQ (rather then a GpioInt resource).

These device seem to all claim the IRQ is active-high and seem to all need
a DMI quirk to treat it as active-low. Instead simply always assume that
Interrupt resource specified IRQs are always active-low.

This fixes the bt device not being able to wake the host from runtime-
suspend on the: Asus T100TAM, Asus T200TA, Lenovo Yoga2 and the Toshiba
Encore, without the need to add 4 new DMI quirks for these models.

This also allows us to remove 2 DMI quirks for the Asus T100TA and Asus
T100CHI series. Likely the 2 remaining quirks can also be removed but I
could not find a DSDT of these devices to verify this.

Cc: stable@vger.kernel.org
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=198953
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1554835
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 50c8523f8653..ce8c629c0eac 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -799,22 +799,6 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = {
 #ifdef CONFIG_ACPI
 /* IRQ polarity of some chipsets are not defined correctly in ACPI table. */
 static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = {
-	{
-		.ident = "Asus T100TA",
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR,
-					"ASUSTeK COMPUTER INC."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
-		},
-	},
-	{
-		.ident = "Asus T100CHI",
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR,
-					"ASUSTeK COMPUTER INC."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
-		},
-	},
 	{	/* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
 		.ident = "Lenovo ThinkPad 8",
 		.matches = {
@@ -842,7 +826,9 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
 	switch (ares->type) {
 	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
 		irq = &ares->data.extended_irq;
-		dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW;
+		if (irq->polarity != ACPI_ACTIVE_LOW)
+			dev_info(dev->dev, "ACPI Interrupt resource is active-high, this is usually wrong, treating the IRQ as active-low\n");
+		dev->irq_active_low = true;
 		break;
 
 	case ACPI_RESOURCE_TYPE_GPIO:

From 4063cafa3b24ff04635bdedc97cd3e4320415065 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Mar 2018 21:28:09 +0100
Subject: [PATCH 1639/1678] Bluetooth: hci_bcm: Add 6 new ACPI HIDs

Add 6 new ACPI HIDs to enable bluetooth on devices using these HIDs,
I've tested the following HIDs / devices:

BCM2E74: Jumper ezPad mini 3
BCM2E83: Acer Iconia Tab8 w1-810
BCM2E90: Meegopad T08
BCM2EAA: Chuwi Vi8 plus (CWI519)

The reporter of Red Hat bugzilla 1554835 has tested:
BCM2E84: Lenovo Yoga2

The reporter of kernel bugzilla 274481 has tested:
BCM2E38: Toshiba Encore

Note the Lenovo Yoga2 and Toshiba Encore also needs the earlier patch to
treat all Interrupt ACPI resources as active low.

Cc: stable@vger.kernel.org
Buglink: https://bugzilla.kernel.org/attachment.cgi?id=274481
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1554835
Reported-and-tested-by: Robert R. Howell <rhowell@uwyo.edu>
Reported-and-tested-by: Christian Herzog <daduke@daduke.org>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index ce8c629c0eac..fe45b8869cc1 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -1076,6 +1076,7 @@ static const struct hci_uart_proto bcm_proto = {
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id bcm_acpi_match[] = {
 	{ "BCM2E1A", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
+	{ "BCM2E38", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E39", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E3A", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E3D", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
@@ -1088,12 +1089,17 @@ static const struct acpi_device_id bcm_acpi_match[] = {
 	{ "BCM2E67", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E71", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E72", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
+	{ "BCM2E74", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E7B", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E7C", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E7E", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
+	{ "BCM2E83", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
+	{ "BCM2E84", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
+	{ "BCM2E90", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
 	{ "BCM2E95", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
 	{ "BCM2E96", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
 	{ "BCM2EA4", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
+	{ "BCM2EAA", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, bcm_acpi_match);

From 9644e6b98cda0485d12b6b1cf72658855e57c878 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Mar 2018 21:28:10 +0100
Subject: [PATCH 1640/1678] Bluetooth: hci_bcm: Remove duplication in
 gpio-mappings declaration

We declare the same set of const acpi_gpio_params twice with different
names, besides the needless duplication this naming leads to a sortof
double indirection which also makes it harder to see how the mapping is
actually setup.

This commit renames the first set to have generic names, which better
describe the contents of the mapping and drops the second set.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index fe45b8869cc1..e4371d0edbcf 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -774,25 +774,21 @@ unlock:
 }
 #endif
 
-static const struct acpi_gpio_params int_last_device_wakeup_gpios = { 0, 0, false };
-static const struct acpi_gpio_params int_last_shutdown_gpios = { 1, 0, false };
-static const struct acpi_gpio_params int_last_host_wakeup_gpios = { 2, 0, false };
+static const struct acpi_gpio_params first_gpio = { 0, 0, false };
+static const struct acpi_gpio_params second_gpio = { 1, 0, false };
+static const struct acpi_gpio_params third_gpio = { 2, 0, false };
 
 static const struct acpi_gpio_mapping acpi_bcm_int_last_gpios[] = {
-	{ "device-wakeup-gpios", &int_last_device_wakeup_gpios, 1 },
-	{ "shutdown-gpios", &int_last_shutdown_gpios, 1 },
-	{ "host-wakeup-gpios", &int_last_host_wakeup_gpios, 1 },
+	{ "device-wakeup-gpios", &first_gpio, 1 },
+	{ "shutdown-gpios", &second_gpio, 1 },
+	{ "host-wakeup-gpios", &third_gpio, 1 },
 	{ },
 };
 
-static const struct acpi_gpio_params int_first_host_wakeup_gpios = { 0, 0, false };
-static const struct acpi_gpio_params int_first_device_wakeup_gpios = { 1, 0, false };
-static const struct acpi_gpio_params int_first_shutdown_gpios = { 2, 0, false };
-
 static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = {
-	{ "device-wakeup-gpios", &int_first_device_wakeup_gpios, 1 },
-	{ "shutdown-gpios", &int_first_shutdown_gpios, 1 },
-	{ "host-wakeup-gpios", &int_first_host_wakeup_gpios, 1 },
+	{ "host-wakeup-gpios", &first_gpio, 1 },
+	{ "device-wakeup-gpios", &second_gpio, 1 },
+	{ "shutdown-gpios", &third_gpio, 1 },
 	{ },
 };
 

From a4de1567be322f4fca75e47bb7bd4cd8e3d79657 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 16 Mar 2018 21:28:11 +0100
Subject: [PATCH 1641/1678] Bluetooth: hci_bcm: Do not tie GPIO pin order to a
 specific ACPI HID

Since I've been doing a lot of work on Linux Bay Trail / Cherry Trail
support, I've gathered a collection of ACPI DSDTs from about 50 such
machines.

Looking at these DSTDs many have an ACPI device entry describing a bcm
bluetooth device (often disabled in the DSDT), quite a few of these ACPI
device entries have a resource-table where the order does not match with
the order currently associated with the HID of that entry in the
bcm_acpi_match table.

Looking at the Windows .inf files, there is nothing indicating a specific
order there, so I believe that there is no 1:1 mapping between the ACPI
HID and the order in which the resources are listed.

Therefor this commit replaces the hardcoded mapping based on ACPI HID,
with code which actually checks in which order the resources are listed
and bases the gpio-mapping on that.

This should ensure that we always pick the right mapping and this will
make adding new ACPI HIDs to the driver easier.

This has been tested on the following devices:
-Asus T100CHI           BCM2E39 / brcmfmac43241b4-sdio / BCM4324B3-37.4M.hcd
-Asus T100TA            BCM2E39 / brcmfmac43241b4-sdio / BCM4324B3-37.4M.hcd
-Asus T200TA            BCM2E65 / brcmfmac43340-sdio   / BCM43341B0-37.4M.hcd
-Jumper ezPad mini 3    BCM2E74 / brcmfmac43430a0-sdio / BCM4343A0-26M.hcd
-Acer Iconia Tab8 w1-8  BCM2E83 / brcmfmac4330-sdio    / BCM4330B1-26M.hcd
-Chuwi Vi8 plus(CWI519) BCM2EAA / brcmfmac43430-sdio   / BCM43430A1-26M.hcd

Which together cover all 3 combinations of using an Interrupt resource /
GpioInt resource as first resource / GpioInt resource as last resource.

Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 91 ++++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 36 deletions(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index e4371d0edbcf..79a0ec57d485 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -98,6 +98,8 @@ struct bcm_device {
 	int			(*set_shutdown)(struct bcm_device *, bool);
 #ifdef CONFIG_ACPI
 	acpi_handle		btlp, btpu, btpd;
+	int			gpio_count;
+	int			gpio_int_idx;
 #endif
 
 	struct clk		*clk;
@@ -829,8 +831,11 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
 
 	case ACPI_RESOURCE_TYPE_GPIO:
 		gpio = &ares->data.gpio;
-		if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
+		if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT) {
+			dev->gpio_int_idx = dev->gpio_count;
 			dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW;
+		}
+		dev->gpio_count++;
 		break;
 
 	case ACPI_RESOURCE_TYPE_SERIAL_BUS:
@@ -948,20 +953,11 @@ static int bcm_acpi_probe(struct bcm_device *dev)
 	LIST_HEAD(resources);
 	const struct dmi_system_id *dmi_id;
 	const struct acpi_gpio_mapping *gpio_mapping = acpi_bcm_int_last_gpios;
-	const struct acpi_device_id *id;
 	struct resource_entry *entry;
 	int ret;
 
-	/* Retrieve GPIO data */
-	id = acpi_match_device(dev->dev->driver->acpi_match_table, dev->dev);
-	if (id)
-		gpio_mapping = (const struct acpi_gpio_mapping *) id->driver_data;
-
-	ret = devm_acpi_dev_add_driver_gpios(dev->dev, gpio_mapping);
-	if (ret)
-		return ret;
-
 	/* Retrieve UART ACPI info */
+	dev->gpio_int_idx = -1;
 	ret = acpi_dev_get_resources(ACPI_COMPANION(dev->dev),
 				     &resources, bcm_resource, dev);
 	if (ret < 0)
@@ -975,6 +971,29 @@ static int bcm_acpi_probe(struct bcm_device *dev)
 	}
 	acpi_dev_free_resource_list(&resources);
 
+	/* If the DSDT uses an Interrupt resource for the IRQ, then there are
+	 * only 2 GPIO resources, we use the irq-last mapping for this, since
+	 * we already have an irq the 3th / last mapping will not be used.
+	 */
+	if (dev->irq)
+		gpio_mapping = acpi_bcm_int_last_gpios;
+	else if (dev->gpio_int_idx == 0)
+		gpio_mapping = acpi_bcm_int_first_gpios;
+	else if (dev->gpio_int_idx == 2)
+		gpio_mapping = acpi_bcm_int_last_gpios;
+	else
+		dev_warn(dev->dev, "Unexpected ACPI gpio_int_idx: %d\n",
+			 dev->gpio_int_idx);
+
+	/* Warn if our expectations are not met. */
+	if (dev->gpio_count != (dev->irq ? 2 : 3))
+		dev_warn(dev->dev, "Unexpected number of ACPI GPIOs: %d\n",
+			 dev->gpio_count);
+
+	ret = devm_acpi_dev_add_driver_gpios(dev->dev, gpio_mapping);
+	if (ret)
+		return ret;
+
 	if (irq_polarity != -1) {
 		dev->irq_active_low = irq_polarity;
 		dev_warn(dev->dev, "Overwriting IRQ polarity to active %s by module-param\n",
@@ -1071,31 +1090,31 @@ static const struct hci_uart_proto bcm_proto = {
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id bcm_acpi_match[] = {
-	{ "BCM2E1A", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E38", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E39", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E3A", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E3D", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E3F", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E40", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E54", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E55", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E64", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E65", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E67", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E71", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E72", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E74", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E7B", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E7C", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E7E", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
-	{ "BCM2E83", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
-	{ "BCM2E84", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E90", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
-	{ "BCM2E95", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
-	{ "BCM2E96", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
-	{ "BCM2EA4", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
-	{ "BCM2EAA", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
+	{ "BCM2E1A" },
+	{ "BCM2E38" },
+	{ "BCM2E39" },
+	{ "BCM2E3A" },
+	{ "BCM2E3D" },
+	{ "BCM2E3F" },
+	{ "BCM2E40" },
+	{ "BCM2E54" },
+	{ "BCM2E55" },
+	{ "BCM2E64" },
+	{ "BCM2E65" },
+	{ "BCM2E67" },
+	{ "BCM2E71" },
+	{ "BCM2E72" },
+	{ "BCM2E74" },
+	{ "BCM2E7B" },
+	{ "BCM2E7C" },
+	{ "BCM2E7E" },
+	{ "BCM2E83" },
+	{ "BCM2E84" },
+	{ "BCM2E90" },
+	{ "BCM2E95" },
+	{ "BCM2E96" },
+	{ "BCM2EA4" },
+	{ "BCM2EAA" },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, bcm_acpi_match);

From 6112150261247d9a7d85dda476143985f44f8859 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 21 Mar 2018 13:53:18 +0100
Subject: [PATCH 1642/1678] Bluetooth: hci_bcm: Add ACPI HIDs found in Windows
 .inf files and DSTDs

Now that we need just an ACPI HID in the table, and the driver auto-
configures itself otherwise, we can easily add a bunch of known ACPI HIDs.

This avoids having to add these 1 by 1 as devices with one are encountered
by users.

This commit may seem as if it simply adds all IDs between BCM2E00-BCM2EAC,
but that is not true, all these IDs were found in actual .inf files and
the range is not entirely continuous, the following IDs are not added:
BCM2E6A, BCM2E6C, BCM2E8F and BCM2E91 because I did not see these in any
.inf files. As for the large amount of IDs this seems to be caused by
Broadcom using a separate ID for every bluetooth module using their
chips. E.g. BCM2EA6 seems to be specifically for the Raspberry Pi 3.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 141 ++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 79a0ec57d485..4ceaf1f4a4e7 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -1090,31 +1090,172 @@ static const struct hci_uart_proto bcm_proto = {
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id bcm_acpi_match[] = {
+	{ "BCM2E00" },
+	{ "BCM2E01" },
+	{ "BCM2E02" },
+	{ "BCM2E03" },
+	{ "BCM2E04" },
+	{ "BCM2E05" },
+	{ "BCM2E06" },
+	{ "BCM2E07" },
+	{ "BCM2E08" },
+	{ "BCM2E09" },
+	{ "BCM2E0A" },
+	{ "BCM2E0B" },
+	{ "BCM2E0C" },
+	{ "BCM2E0D" },
+	{ "BCM2E0E" },
+	{ "BCM2E0F" },
+	{ "BCM2E10" },
+	{ "BCM2E11" },
+	{ "BCM2E12" },
+	{ "BCM2E13" },
+	{ "BCM2E14" },
+	{ "BCM2E15" },
+	{ "BCM2E16" },
+	{ "BCM2E17" },
+	{ "BCM2E18" },
+	{ "BCM2E19" },
 	{ "BCM2E1A" },
+	{ "BCM2E1B" },
+	{ "BCM2E1C" },
+	{ "BCM2E1D" },
+	{ "BCM2E1F" },
+	{ "BCM2E20" },
+	{ "BCM2E21" },
+	{ "BCM2E22" },
+	{ "BCM2E23" },
+	{ "BCM2E24" },
+	{ "BCM2E25" },
+	{ "BCM2E26" },
+	{ "BCM2E27" },
+	{ "BCM2E28" },
+	{ "BCM2E29" },
+	{ "BCM2E2A" },
+	{ "BCM2E2B" },
+	{ "BCM2E2C" },
+	{ "BCM2E2D" },
+	{ "BCM2E2E" },
+	{ "BCM2E2F" },
+	{ "BCM2E30" },
+	{ "BCM2E31" },
+	{ "BCM2E32" },
+	{ "BCM2E33" },
+	{ "BCM2E34" },
+	{ "BCM2E35" },
+	{ "BCM2E36" },
+	{ "BCM2E37" },
 	{ "BCM2E38" },
 	{ "BCM2E39" },
 	{ "BCM2E3A" },
+	{ "BCM2E3B" },
+	{ "BCM2E3C" },
 	{ "BCM2E3D" },
+	{ "BCM2E3E" },
 	{ "BCM2E3F" },
 	{ "BCM2E40" },
+	{ "BCM2E41" },
+	{ "BCM2E42" },
+	{ "BCM2E43" },
+	{ "BCM2E44" },
+	{ "BCM2E45" },
+	{ "BCM2E46" },
+	{ "BCM2E47" },
+	{ "BCM2E48" },
+	{ "BCM2E49" },
+	{ "BCM2E4A" },
+	{ "BCM2E4B" },
+	{ "BCM2E4C" },
+	{ "BCM2E4D" },
+	{ "BCM2E4E" },
+	{ "BCM2E4F" },
+	{ "BCM2E50" },
+	{ "BCM2E51" },
+	{ "BCM2E52" },
+	{ "BCM2E53" },
 	{ "BCM2E54" },
 	{ "BCM2E55" },
+	{ "BCM2E56" },
+	{ "BCM2E57" },
+	{ "BCM2E58" },
+	{ "BCM2E59" },
+	{ "BCM2E5A" },
+	{ "BCM2E5B" },
+	{ "BCM2E5C" },
+	{ "BCM2E5D" },
+	{ "BCM2E5E" },
+	{ "BCM2E5F" },
+	{ "BCM2E60" },
+	{ "BCM2E61" },
+	{ "BCM2E62" },
+	{ "BCM2E63" },
 	{ "BCM2E64" },
 	{ "BCM2E65" },
+	{ "BCM2E66" },
 	{ "BCM2E67" },
+	{ "BCM2E68" },
+	{ "BCM2E69" },
+	{ "BCM2E6B" },
+	{ "BCM2E6D" },
+	{ "BCM2E6E" },
+	{ "BCM2E6F" },
+	{ "BCM2E70" },
 	{ "BCM2E71" },
 	{ "BCM2E72" },
+	{ "BCM2E73" },
 	{ "BCM2E74" },
+	{ "BCM2E75" },
+	{ "BCM2E76" },
+	{ "BCM2E77" },
+	{ "BCM2E78" },
+	{ "BCM2E79" },
+	{ "BCM2E7A" },
 	{ "BCM2E7B" },
 	{ "BCM2E7C" },
+	{ "BCM2E7D" },
 	{ "BCM2E7E" },
+	{ "BCM2E7F" },
+	{ "BCM2E80" },
+	{ "BCM2E81" },
+	{ "BCM2E82" },
 	{ "BCM2E83" },
 	{ "BCM2E84" },
+	{ "BCM2E85" },
+	{ "BCM2E86" },
+	{ "BCM2E87" },
+	{ "BCM2E88" },
+	{ "BCM2E89" },
+	{ "BCM2E8A" },
+	{ "BCM2E8B" },
+	{ "BCM2E8C" },
+	{ "BCM2E8D" },
+	{ "BCM2E8E" },
 	{ "BCM2E90" },
+	{ "BCM2E92" },
+	{ "BCM2E93" },
+	{ "BCM2E94" },
 	{ "BCM2E95" },
 	{ "BCM2E96" },
+	{ "BCM2E97" },
+	{ "BCM2E98" },
+	{ "BCM2E99" },
+	{ "BCM2E9A" },
+	{ "BCM2E9B" },
+	{ "BCM2E9C" },
+	{ "BCM2E9D" },
+	{ "BCM2EA0" },
+	{ "BCM2EA1" },
+	{ "BCM2EA2" },
+	{ "BCM2EA3" },
 	{ "BCM2EA4" },
+	{ "BCM2EA5" },
+	{ "BCM2EA6" },
+	{ "BCM2EA7" },
+	{ "BCM2EA8" },
+	{ "BCM2EA9" },
 	{ "BCM2EAA" },
+	{ "BCM2EAB" },
+	{ "BCM2EAC" },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, bcm_acpi_match);

From f9d7c8fd2630d1d15dbc23e6ff6f9f0b54194ee4 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 24 Mar 2018 10:19:52 +0100
Subject: [PATCH 1643/1678] Bluetooth: hci_ll: Convert to use h4_recv_buf
 helper

The HCILL or eHCILL protocol from TI is actually an H:4 protocol with a
few extra events and thus can also use the h4_recv_buf helper. Instead
of open coding the same funtionality add the extra events to the packet
description table and use h4_recv_buf.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/Kconfig  |   1 +
 drivers/bluetooth/hci_ll.c | 214 ++++++++++++-------------------------
 2 files changed, 70 insertions(+), 145 deletions(-)

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 149a38ee1fce..b83d12ac378e 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -147,6 +147,7 @@ config BT_HCIUART_ATH3K
 config BT_HCIUART_LL
 	bool "HCILL protocol support"
 	depends on BT_HCIUART_SERDEV
+	select BT_HCIUART_H4
 	help
 	  HCILL (HCI Low Level) is a serial protocol for communication
 	  between Bluetooth device and host. This protocol is required for
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 7c55a9f77808..27e414b4e3a2 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -67,13 +67,6 @@
 #define HCILL_WAKE_UP_IND	0x32
 #define HCILL_WAKE_UP_ACK	0x33
 
-/* HCILL receiver States */
-#define HCILL_W4_PACKET_TYPE	0
-#define HCILL_W4_EVENT_HDR	1
-#define HCILL_W4_ACL_HDR	2
-#define HCILL_W4_SCO_HDR	3
-#define HCILL_W4_DATA		4
-
 /* HCILL states */
 enum hcill_states_e {
 	HCILL_ASLEEP,
@@ -91,8 +84,6 @@ struct ll_device {
 };
 
 struct ll_struct {
-	unsigned long rx_state;
-	unsigned long rx_count;
 	struct sk_buff *rx_skb;
 	struct sk_buff_head txq;
 	spinlock_t hcill_lock;		/* HCILL state lock	*/
@@ -373,155 +364,88 @@ static int ll_enqueue(struct hci_uart *hu, struct sk_buff *skb)
 	return 0;
 }
 
-static inline int ll_check_data_len(struct hci_dev *hdev, struct ll_struct *ll, int len)
+static int ll_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
 {
-	int room = skb_tailroom(ll->rx_skb);
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct ll_struct *ll = hu->priv;
 
-	BT_DBG("len %d room %d", len, room);
-
-	if (!len) {
-		hci_recv_frame(hdev, ll->rx_skb);
-	} else if (len > room) {
-		BT_ERR("Data length is too large");
-		kfree_skb(ll->rx_skb);
-	} else {
-		ll->rx_state = HCILL_W4_DATA;
-		ll->rx_count = len;
-		return len;
+	switch (hci_skb_pkt_type(skb)) {
+	case HCILL_GO_TO_SLEEP_IND:
+		BT_DBG("HCILL_GO_TO_SLEEP_IND packet");
+		ll_device_want_to_sleep(hu);
+		break;
+	case HCILL_GO_TO_SLEEP_ACK:
+		/* shouldn't happen */
+		bt_dev_err(hdev, "received HCILL_GO_TO_SLEEP_ACK in state %ld",
+			   ll->hcill_state);
+		break;
+	case HCILL_WAKE_UP_IND:
+		BT_DBG("HCILL_WAKE_UP_IND packet");
+		ll_device_want_to_wakeup(hu);
+		break;
+	case HCILL_WAKE_UP_ACK:
+		BT_DBG("HCILL_WAKE_UP_ACK packet");
+		ll_device_woke_up(hu);
+		break;
 	}
 
-	ll->rx_state = HCILL_W4_PACKET_TYPE;
-	ll->rx_skb   = NULL;
-	ll->rx_count = 0;
-
+	kfree_skb(skb);
 	return 0;
 }
 
+#define LL_RECV_SLEEP_IND \
+	.type = HCILL_GO_TO_SLEEP_IND, \
+	.hlen = 0, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = 0
+
+#define LL_RECV_SLEEP_ACK \
+	.type = HCILL_GO_TO_SLEEP_ACK, \
+	.hlen = 0, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = 0
+
+#define LL_RECV_WAKE_IND \
+	.type = HCILL_WAKE_UP_IND, \
+	.hlen = 0, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = 0
+
+#define LL_RECV_WAKE_ACK \
+	.type = HCILL_WAKE_UP_ACK, \
+	.hlen = 0, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = 0
+
+static const struct h4_recv_pkt ll_recv_pkts[] = {
+	{ H4_RECV_ACL,       .recv = hci_recv_frame },
+	{ H4_RECV_SCO,       .recv = hci_recv_frame },
+	{ H4_RECV_EVENT,     .recv = hci_recv_frame },
+	{ LL_RECV_SLEEP_IND, .recv = ll_recv_frame  },
+	{ LL_RECV_SLEEP_ACK, .recv = ll_recv_frame  },
+	{ LL_RECV_WAKE_IND,  .recv = ll_recv_frame  },
+	{ LL_RECV_WAKE_ACK,  .recv = ll_recv_frame  },
+};
+
 /* Recv data */
 static int ll_recv(struct hci_uart *hu, const void *data, int count)
 {
 	struct ll_struct *ll = hu->priv;
-	const char *ptr;
-	struct hci_event_hdr *eh;
-	struct hci_acl_hdr   *ah;
-	struct hci_sco_hdr   *sh;
-	int len, type, dlen;
 
-	BT_DBG("hu %p count %d rx_state %ld rx_count %ld", hu, count, ll->rx_state, ll->rx_count);
+	if (!test_bit(HCI_UART_REGISTERED, &hu->flags))
+		return -EUNATCH;
 
-	ptr = data;
-	while (count) {
-		if (ll->rx_count) {
-			len = min_t(unsigned int, ll->rx_count, count);
-			skb_put_data(ll->rx_skb, ptr, len);
-			ll->rx_count -= len; count -= len; ptr += len;
-
-			if (ll->rx_count)
-				continue;
-
-			switch (ll->rx_state) {
-			case HCILL_W4_DATA:
-				BT_DBG("Complete data");
-				hci_recv_frame(hu->hdev, ll->rx_skb);
-
-				ll->rx_state = HCILL_W4_PACKET_TYPE;
-				ll->rx_skb = NULL;
-				continue;
-
-			case HCILL_W4_EVENT_HDR:
-				eh = hci_event_hdr(ll->rx_skb);
-
-				BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
-
-				ll_check_data_len(hu->hdev, ll, eh->plen);
-				continue;
-
-			case HCILL_W4_ACL_HDR:
-				ah = hci_acl_hdr(ll->rx_skb);
-				dlen = __le16_to_cpu(ah->dlen);
-
-				BT_DBG("ACL header: dlen %d", dlen);
-
-				ll_check_data_len(hu->hdev, ll, dlen);
-				continue;
-
-			case HCILL_W4_SCO_HDR:
-				sh = hci_sco_hdr(ll->rx_skb);
-
-				BT_DBG("SCO header: dlen %d", sh->dlen);
-
-				ll_check_data_len(hu->hdev, ll, sh->dlen);
-				continue;
-			}
-		}
-
-		/* HCILL_W4_PACKET_TYPE */
-		switch (*ptr) {
-		case HCI_EVENT_PKT:
-			BT_DBG("Event packet");
-			ll->rx_state = HCILL_W4_EVENT_HDR;
-			ll->rx_count = HCI_EVENT_HDR_SIZE;
-			type = HCI_EVENT_PKT;
-			break;
-
-		case HCI_ACLDATA_PKT:
-			BT_DBG("ACL packet");
-			ll->rx_state = HCILL_W4_ACL_HDR;
-			ll->rx_count = HCI_ACL_HDR_SIZE;
-			type = HCI_ACLDATA_PKT;
-			break;
-
-		case HCI_SCODATA_PKT:
-			BT_DBG("SCO packet");
-			ll->rx_state = HCILL_W4_SCO_HDR;
-			ll->rx_count = HCI_SCO_HDR_SIZE;
-			type = HCI_SCODATA_PKT;
-			break;
-
-		/* HCILL signals */
-		case HCILL_GO_TO_SLEEP_IND:
-			BT_DBG("HCILL_GO_TO_SLEEP_IND packet");
-			ll_device_want_to_sleep(hu);
-			ptr++; count--;
-			continue;
-
-		case HCILL_GO_TO_SLEEP_ACK:
-			/* shouldn't happen */
-			BT_ERR("received HCILL_GO_TO_SLEEP_ACK (in state %ld)", ll->hcill_state);
-			ptr++; count--;
-			continue;
-
-		case HCILL_WAKE_UP_IND:
-			BT_DBG("HCILL_WAKE_UP_IND packet");
-			ll_device_want_to_wakeup(hu);
-			ptr++; count--;
-			continue;
-
-		case HCILL_WAKE_UP_ACK:
-			BT_DBG("HCILL_WAKE_UP_ACK packet");
-			ll_device_woke_up(hu);
-			ptr++; count--;
-			continue;
-
-		default:
-			BT_ERR("Unknown HCI packet type %2.2x", (__u8)*ptr);
-			hu->hdev->stat.err_rx++;
-			ptr++; count--;
-			continue;
-		}
-
-		ptr++; count--;
-
-		/* Allocate packet */
-		ll->rx_skb = bt_skb_alloc(HCI_MAX_FRAME_SIZE, GFP_ATOMIC);
-		if (!ll->rx_skb) {
-			BT_ERR("Can't allocate mem for new packet");
-			ll->rx_state = HCILL_W4_PACKET_TYPE;
-			ll->rx_count = 0;
-			return -ENOMEM;
-		}
-
-		hci_skb_pkt_type(ll->rx_skb) = type;
+	ll->rx_skb = h4_recv_buf(hu->hdev, ll->rx_skb, data, count,
+				 ll_recv_pkts, ARRAY_SIZE(ll_recv_pkts));
+	if (IS_ERR(ll->rx_skb)) {
+		int err = PTR_ERR(ll->rx_skb);
+		bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err);
+		ll->rx_skb = NULL;
+		return err;
 	}
 
 	return count;

From 07eb96a5a7b083c988a2c7b0663e958e392f18c7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 24 Mar 2018 10:19:53 +0100
Subject: [PATCH 1644/1678] Bluetooth: bpa10x: Use separate h4_recv_buf helper

When adding the alignment and padding support for H:4 packet processing
for the Nokia driver, it broke the h4_recv_buf usage within bpa10x
driver. To fix this use a separate helper function and placing it into a
dedicated h4_recv.h header file.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/Kconfig   |   3 +-
 drivers/bluetooth/bpa10x.c  |   2 +-
 drivers/bluetooth/h4_recv.h | 160 ++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+), 3 deletions(-)
 create mode 100644 drivers/bluetooth/h4_recv.h

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index b83d12ac378e..4e7594f3d072 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -243,8 +243,7 @@ config BT_HCIBCM203X
 
 config BT_HCIBPA10X
 	tristate "HCI BPA10x USB driver"
-	depends on USB && BT_HCIUART
-	select BT_HCIUART_H4
+	depends on USB
 	help
 	  Bluetooth HCI BPA10x USB driver.
 	  This driver provides support for the Digianswer BPA 100/105 Bluetooth
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 801ea4ca65e4..c6f7cc57db14 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -35,7 +35,7 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-#include "hci_uart.h"
+#include "h4_recv.h"
 
 #define VERSION "0.11"
 
diff --git a/drivers/bluetooth/h4_recv.h b/drivers/bluetooth/h4_recv.h
new file mode 100644
index 000000000000..b432651f8236
--- /dev/null
+++ b/drivers/bluetooth/h4_recv.h
@@ -0,0 +1,160 @@
+/*
+ *
+ *  Generic Bluetooth HCI UART driver
+ *
+ *  Copyright (C) 2015-2018  Intel Corporation
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <asm/unaligned.h>
+
+struct h4_recv_pkt {
+	u8  type;	/* Packet type */
+	u8  hlen;	/* Header length */
+	u8  loff;	/* Data length offset in header */
+	u8  lsize;	/* Data length field size */
+	u16 maxlen;	/* Max overall packet length */
+	int (*recv)(struct hci_dev *hdev, struct sk_buff *skb);
+};
+
+#define H4_RECV_ACL \
+	.type = HCI_ACLDATA_PKT, \
+	.hlen = HCI_ACL_HDR_SIZE, \
+	.loff = 2, \
+	.lsize = 2, \
+	.maxlen = HCI_MAX_FRAME_SIZE \
+
+#define H4_RECV_SCO \
+	.type = HCI_SCODATA_PKT, \
+	.hlen = HCI_SCO_HDR_SIZE, \
+	.loff = 2, \
+	.lsize = 1, \
+	.maxlen = HCI_MAX_SCO_SIZE
+
+#define H4_RECV_EVENT \
+	.type = HCI_EVENT_PKT, \
+	.hlen = HCI_EVENT_HDR_SIZE, \
+	.loff = 1, \
+	.lsize = 1, \
+	.maxlen = HCI_MAX_EVENT_SIZE
+
+static inline struct sk_buff *h4_recv_buf(struct hci_dev *hdev,
+					  struct sk_buff *skb,
+					  const unsigned char *buffer,
+					  int count,
+					  const struct h4_recv_pkt *pkts,
+					  int pkts_count)
+{
+	while (count) {
+		int i, len;
+
+		if (!count)
+			break;
+
+		if (!skb) {
+			for (i = 0; i < pkts_count; i++) {
+				if (buffer[0] != (&pkts[i])->type)
+					continue;
+
+				skb = bt_skb_alloc((&pkts[i])->maxlen,
+						   GFP_ATOMIC);
+				if (!skb)
+					return ERR_PTR(-ENOMEM);
+
+				hci_skb_pkt_type(skb) = (&pkts[i])->type;
+				hci_skb_expect(skb) = (&pkts[i])->hlen;
+				break;
+			}
+
+			/* Check for invalid packet type */
+			if (!skb)
+				return ERR_PTR(-EILSEQ);
+
+			count -= 1;
+			buffer += 1;
+		}
+
+		len = min_t(uint, hci_skb_expect(skb) - skb->len, count);
+		skb_put_data(skb, buffer, len);
+
+		count -= len;
+		buffer += len;
+
+		/* Check for partial packet */
+		if (skb->len < hci_skb_expect(skb))
+			continue;
+
+		for (i = 0; i < pkts_count; i++) {
+			if (hci_skb_pkt_type(skb) == (&pkts[i])->type)
+				break;
+		}
+
+		if (i >= pkts_count) {
+			kfree_skb(skb);
+			return ERR_PTR(-EILSEQ);
+		}
+
+		if (skb->len == (&pkts[i])->hlen) {
+			u16 dlen;
+
+			switch ((&pkts[i])->lsize) {
+			case 0:
+				/* No variable data length */
+				dlen = 0;
+				break;
+			case 1:
+				/* Single octet variable length */
+				dlen = skb->data[(&pkts[i])->loff];
+				hci_skb_expect(skb) += dlen;
+
+				if (skb_tailroom(skb) < dlen) {
+					kfree_skb(skb);
+					return ERR_PTR(-EMSGSIZE);
+				}
+				break;
+			case 2:
+				/* Double octet variable length */
+				dlen = get_unaligned_le16(skb->data +
+							  (&pkts[i])->loff);
+				hci_skb_expect(skb) += dlen;
+
+				if (skb_tailroom(skb) < dlen) {
+					kfree_skb(skb);
+					return ERR_PTR(-EMSGSIZE);
+				}
+				break;
+			default:
+				/* Unsupported variable length */
+				kfree_skb(skb);
+				return ERR_PTR(-EILSEQ);
+			}
+
+			if (!dlen) {
+				/* No more data, complete frame */
+				(&pkts[i])->recv(hdev, skb);
+				skb = NULL;
+			}
+		} else {
+			/* Complete frame */
+			(&pkts[i])->recv(hdev, skb);
+			skb = NULL;
+		}
+	}
+
+	return skb;
+}

From 6f6f1eced8c325d5ef64451556947f606f9fac7a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 24 Mar 2018 10:19:54 +0100
Subject: [PATCH 1645/1678] Bluetooth: Remove unused btuart_cs driver

With patch 279c936153199 the btuart_cs driver has been deprecated in
favor of serial_cs + hci_uart combination.

  static struct pcmcia_device_id btuart_ids[] = {
         /* don't use this driver. Use serial_cs + hci_uart instead */
         PCMCIA_DEVICE_NULL
  };

Intead of keeping it around, just remove it since it is not even
assigned to any PCMCIA identifiers anymore.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/Kconfig     |  16 -
 drivers/bluetooth/Makefile    |   1 -
 drivers/bluetooth/btuart_cs.c | 675 ----------------------------------
 3 files changed, 692 deletions(-)
 delete mode 100644 drivers/bluetooth/btuart_cs.c

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 4e7594f3d072..010f5f579e68 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -305,22 +305,6 @@ config BT_HCIBLUECARD
 	  Say Y here to compile support for HCI BlueCard devices into the
 	  kernel or say M to compile it as module (bluecard_cs).
 
-config BT_HCIBTUART
-	tristate "HCI UART (PC Card) device driver"
-	depends on PCMCIA
-	help
-	  Bluetooth HCI UART (PC Card) driver.
-	  This driver provides support for Bluetooth PCMCIA devices with
-	  an UART interface:
-	     Xircom CreditCard Bluetooth Adapter
-	     Xircom RealPort2 Bluetooth Adapter
-	     Sphinx PICO Card
-	     H-Soft blue+Card
-	     Cyber-blue Compact Flash Card
-
-	  Say Y here to compile support for HCI UART devices into the
-	  kernel or say M to compile it as module (btuart_cs).
-
 config BT_HCIVHCI
 	tristate "HCI VHCI (Virtual HCI device) driver"
 	help
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 03cfc1b20c4a..ec16c55eb6e9 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_BT_HCIBFUSB)	+= bfusb.o
 obj-$(CONFIG_BT_HCIDTL1)	+= dtl1_cs.o
 obj-$(CONFIG_BT_HCIBT3C)	+= bt3c_cs.o
 obj-$(CONFIG_BT_HCIBLUECARD)	+= bluecard_cs.o
-obj-$(CONFIG_BT_HCIBTUART)	+= btuart_cs.o
 
 obj-$(CONFIG_BT_HCIBTUSB)	+= btusb.o
 obj-$(CONFIG_BT_HCIBTSDIO)	+= btsdio.o
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
deleted file mode 100644
index 310e9c2e09b6..000000000000
--- a/drivers/bluetooth/btuart_cs.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- *
- *  Driver for Bluetooth PCMCIA cards with HCI UART interface
- *
- *  Copyright (C) 2001-2002  Marcel Holtmann <marcel@holtmann.org>
- *
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation;
- *
- *  Software distributed under the License is distributed on an "AS
- *  IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
- *  implied. See the License for the specific language governing
- *  rights and limitations under the License.
- *
- *  The initial developer of the original code is David A. Hinds
- *  <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
- *  are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/ioport.h>
-#include <linux/spinlock.h>
-#include <linux/moduleparam.h>
-
-#include <linux/skbuff.h>
-#include <linux/string.h>
-#include <linux/serial.h>
-#include <linux/serial_reg.h>
-#include <linux/bitops.h>
-#include <linux/io.h>
-
-#include <pcmcia/cistpl.h>
-#include <pcmcia/ciscode.h>
-#include <pcmcia/ds.h>
-#include <pcmcia/cisreg.h>
-
-#include <net/bluetooth/bluetooth.h>
-#include <net/bluetooth/hci_core.h>
-
-
-
-/* ======================== Module parameters ======================== */
-
-
-MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
-MODULE_DESCRIPTION("Bluetooth driver for Bluetooth PCMCIA cards with HCI UART interface");
-MODULE_LICENSE("GPL");
-
-
-
-/* ======================== Local structures ======================== */
-
-
-struct btuart_info {
-	struct pcmcia_device *p_dev;
-
-	struct hci_dev *hdev;
-
-	spinlock_t lock;	/* For serializing operations */
-
-	struct sk_buff_head txq;
-	unsigned long tx_state;
-
-	unsigned long rx_state;
-	unsigned long rx_count;
-	struct sk_buff *rx_skb;
-};
-
-
-static int btuart_config(struct pcmcia_device *link);
-static void btuart_release(struct pcmcia_device *link);
-
-static void btuart_detach(struct pcmcia_device *p_dev);
-
-
-/* Maximum baud rate */
-#define SPEED_MAX  115200
-
-/* Default baud rate: 57600, 115200, 230400 or 460800 */
-#define DEFAULT_BAUD_RATE  115200
-
-
-/* Transmit states  */
-#define XMIT_SENDING	1
-#define XMIT_WAKEUP	2
-#define XMIT_WAITING	8
-
-/* Receiver states */
-#define RECV_WAIT_PACKET_TYPE	0
-#define RECV_WAIT_EVENT_HEADER	1
-#define RECV_WAIT_ACL_HEADER	2
-#define RECV_WAIT_SCO_HEADER	3
-#define RECV_WAIT_DATA		4
-
-
-
-/* ======================== Interrupt handling ======================== */
-
-
-static int btuart_write(unsigned int iobase, int fifo_size, __u8 *buf, int len)
-{
-	int actual = 0;
-
-	/* Tx FIFO should be empty */
-	if (!(inb(iobase + UART_LSR) & UART_LSR_THRE))
-		return 0;
-
-	/* Fill FIFO with current frame */
-	while ((fifo_size-- > 0) && (actual < len)) {
-		/* Transmit next byte */
-		outb(buf[actual], iobase + UART_TX);
-		actual++;
-	}
-
-	return actual;
-}
-
-
-static void btuart_write_wakeup(struct btuart_info *info)
-{
-	if (!info) {
-		BT_ERR("Unknown device");
-		return;
-	}
-
-	if (test_and_set_bit(XMIT_SENDING, &(info->tx_state))) {
-		set_bit(XMIT_WAKEUP, &(info->tx_state));
-		return;
-	}
-
-	do {
-		unsigned int iobase = info->p_dev->resource[0]->start;
-		register struct sk_buff *skb;
-		int len;
-
-		clear_bit(XMIT_WAKEUP, &(info->tx_state));
-
-		if (!pcmcia_dev_present(info->p_dev))
-			return;
-
-		skb = skb_dequeue(&(info->txq));
-		if (!skb)
-			break;
-
-		/* Send frame */
-		len = btuart_write(iobase, 16, skb->data, skb->len);
-		set_bit(XMIT_WAKEUP, &(info->tx_state));
-
-		if (len == skb->len) {
-			kfree_skb(skb);
-		} else {
-			skb_pull(skb, len);
-			skb_queue_head(&(info->txq), skb);
-		}
-
-		info->hdev->stat.byte_tx += len;
-
-	} while (test_bit(XMIT_WAKEUP, &(info->tx_state)));
-
-	clear_bit(XMIT_SENDING, &(info->tx_state));
-}
-
-
-static void btuart_receive(struct btuart_info *info)
-{
-	unsigned int iobase;
-	int boguscount = 0;
-
-	if (!info) {
-		BT_ERR("Unknown device");
-		return;
-	}
-
-	iobase = info->p_dev->resource[0]->start;
-
-	do {
-		info->hdev->stat.byte_rx++;
-
-		/* Allocate packet */
-		if (!info->rx_skb) {
-			info->rx_state = RECV_WAIT_PACKET_TYPE;
-			info->rx_count = 0;
-			info->rx_skb = bt_skb_alloc(HCI_MAX_FRAME_SIZE, GFP_ATOMIC);
-			if (!info->rx_skb) {
-				BT_ERR("Can't allocate mem for new packet");
-				return;
-			}
-		}
-
-		if (info->rx_state == RECV_WAIT_PACKET_TYPE) {
-
-			hci_skb_pkt_type(info->rx_skb) = inb(iobase + UART_RX);
-
-			switch (hci_skb_pkt_type(info->rx_skb)) {
-
-			case HCI_EVENT_PKT:
-				info->rx_state = RECV_WAIT_EVENT_HEADER;
-				info->rx_count = HCI_EVENT_HDR_SIZE;
-				break;
-
-			case HCI_ACLDATA_PKT:
-				info->rx_state = RECV_WAIT_ACL_HEADER;
-				info->rx_count = HCI_ACL_HDR_SIZE;
-				break;
-
-			case HCI_SCODATA_PKT:
-				info->rx_state = RECV_WAIT_SCO_HEADER;
-				info->rx_count = HCI_SCO_HDR_SIZE;
-				break;
-
-			default:
-				/* Unknown packet */
-				BT_ERR("Unknown HCI packet with type 0x%02x received",
-				       hci_skb_pkt_type(info->rx_skb));
-				info->hdev->stat.err_rx++;
-
-				kfree_skb(info->rx_skb);
-				info->rx_skb = NULL;
-				break;
-
-			}
-
-		} else {
-
-			skb_put_u8(info->rx_skb, inb(iobase + UART_RX));
-			info->rx_count--;
-
-			if (info->rx_count == 0) {
-
-				int dlen;
-				struct hci_event_hdr *eh;
-				struct hci_acl_hdr *ah;
-				struct hci_sco_hdr *sh;
-
-
-				switch (info->rx_state) {
-
-				case RECV_WAIT_EVENT_HEADER:
-					eh = hci_event_hdr(info->rx_skb);
-					info->rx_state = RECV_WAIT_DATA;
-					info->rx_count = eh->plen;
-					break;
-
-				case RECV_WAIT_ACL_HEADER:
-					ah = hci_acl_hdr(info->rx_skb);
-					dlen = __le16_to_cpu(ah->dlen);
-					info->rx_state = RECV_WAIT_DATA;
-					info->rx_count = dlen;
-					break;
-
-				case RECV_WAIT_SCO_HEADER:
-					sh = hci_sco_hdr(info->rx_skb);
-					info->rx_state = RECV_WAIT_DATA;
-					info->rx_count = sh->dlen;
-					break;
-
-				case RECV_WAIT_DATA:
-					hci_recv_frame(info->hdev, info->rx_skb);
-					info->rx_skb = NULL;
-					break;
-
-				}
-
-			}
-
-		}
-
-		/* Make sure we don't stay here too long */
-		if (boguscount++ > 16)
-			break;
-
-	} while (inb(iobase + UART_LSR) & UART_LSR_DR);
-}
-
-
-static irqreturn_t btuart_interrupt(int irq, void *dev_inst)
-{
-	struct btuart_info *info = dev_inst;
-	unsigned int iobase;
-	int boguscount = 0;
-	int iir, lsr;
-	irqreturn_t r = IRQ_NONE;
-
-	if (!info || !info->hdev)
-		/* our irq handler is shared */
-		return IRQ_NONE;
-
-	iobase = info->p_dev->resource[0]->start;
-
-	spin_lock(&(info->lock));
-
-	iir = inb(iobase + UART_IIR) & UART_IIR_ID;
-	while (iir) {
-		r = IRQ_HANDLED;
-
-		/* Clear interrupt */
-		lsr = inb(iobase + UART_LSR);
-
-		switch (iir) {
-		case UART_IIR_RLSI:
-			BT_ERR("RLSI");
-			break;
-		case UART_IIR_RDI:
-			/* Receive interrupt */
-			btuart_receive(info);
-			break;
-		case UART_IIR_THRI:
-			if (lsr & UART_LSR_THRE) {
-				/* Transmitter ready for data */
-				btuart_write_wakeup(info);
-			}
-			break;
-		default:
-			BT_ERR("Unhandled IIR=%#x", iir);
-			break;
-		}
-
-		/* Make sure we don't stay here too long */
-		if (boguscount++ > 100)
-			break;
-
-		iir = inb(iobase + UART_IIR) & UART_IIR_ID;
-
-	}
-
-	spin_unlock(&(info->lock));
-
-	return r;
-}
-
-
-static void btuart_change_speed(struct btuart_info *info,
-				unsigned int speed)
-{
-	unsigned long flags;
-	unsigned int iobase;
-	int fcr;		/* FIFO control reg */
-	int lcr;		/* Line control reg */
-	int divisor;
-
-	if (!info) {
-		BT_ERR("Unknown device");
-		return;
-	}
-
-	iobase = info->p_dev->resource[0]->start;
-
-	spin_lock_irqsave(&(info->lock), flags);
-
-	/* Turn off interrupts */
-	outb(0, iobase + UART_IER);
-
-	divisor = SPEED_MAX / speed;
-
-	fcr = UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT;
-
-	/* 
-	 * Use trigger level 1 to avoid 3 ms. timeout delay at 9600 bps, and
-	 * almost 1,7 ms at 19200 bps. At speeds above that we can just forget
-	 * about this timeout since it will always be fast enough. 
-	 */
-
-	if (speed < 38400)
-		fcr |= UART_FCR_TRIGGER_1;
-	else
-		fcr |= UART_FCR_TRIGGER_14;
-
-	/* Bluetooth cards use 8N1 */
-	lcr = UART_LCR_WLEN8;
-
-	outb(UART_LCR_DLAB | lcr, iobase + UART_LCR);	/* Set DLAB */
-	outb(divisor & 0xff, iobase + UART_DLL);	/* Set speed */
-	outb(divisor >> 8, iobase + UART_DLM);
-	outb(lcr, iobase + UART_LCR);	/* Set 8N1  */
-	outb(fcr, iobase + UART_FCR);	/* Enable FIFO's */
-
-	/* Turn on interrupts */
-	outb(UART_IER_RLSI | UART_IER_RDI | UART_IER_THRI, iobase + UART_IER);
-
-	spin_unlock_irqrestore(&(info->lock), flags);
-}
-
-
-
-/* ======================== HCI interface ======================== */
-
-
-static int btuart_hci_flush(struct hci_dev *hdev)
-{
-	struct btuart_info *info = hci_get_drvdata(hdev);
-
-	/* Drop TX queue */
-	skb_queue_purge(&(info->txq));
-
-	return 0;
-}
-
-
-static int btuart_hci_open(struct hci_dev *hdev)
-{
-	return 0;
-}
-
-
-static int btuart_hci_close(struct hci_dev *hdev)
-{
-	btuart_hci_flush(hdev);
-
-	return 0;
-}
-
-
-static int btuart_hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
-{
-	struct btuart_info *info = hci_get_drvdata(hdev);
-
-	switch (hci_skb_pkt_type(skb)) {
-	case HCI_COMMAND_PKT:
-		hdev->stat.cmd_tx++;
-		break;
-	case HCI_ACLDATA_PKT:
-		hdev->stat.acl_tx++;
-		break;
-	case HCI_SCODATA_PKT:
-		hdev->stat.sco_tx++;
-		break;
-	}
-
-	/* Prepend skb with frame type */
-	memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1);
-	skb_queue_tail(&(info->txq), skb);
-
-	btuart_write_wakeup(info);
-
-	return 0;
-}
-
-
-
-/* ======================== Card services HCI interaction ======================== */
-
-
-static int btuart_open(struct btuart_info *info)
-{
-	unsigned long flags;
-	unsigned int iobase = info->p_dev->resource[0]->start;
-	struct hci_dev *hdev;
-
-	spin_lock_init(&(info->lock));
-
-	skb_queue_head_init(&(info->txq));
-
-	info->rx_state = RECV_WAIT_PACKET_TYPE;
-	info->rx_count = 0;
-	info->rx_skb = NULL;
-
-	/* Initialize HCI device */
-	hdev = hci_alloc_dev();
-	if (!hdev) {
-		BT_ERR("Can't allocate HCI device");
-		return -ENOMEM;
-	}
-
-	info->hdev = hdev;
-
-	hdev->bus = HCI_PCCARD;
-	hci_set_drvdata(hdev, info);
-	SET_HCIDEV_DEV(hdev, &info->p_dev->dev);
-
-	hdev->open  = btuart_hci_open;
-	hdev->close = btuart_hci_close;
-	hdev->flush = btuart_hci_flush;
-	hdev->send  = btuart_hci_send_frame;
-
-	spin_lock_irqsave(&(info->lock), flags);
-
-	/* Reset UART */
-	outb(0, iobase + UART_MCR);
-
-	/* Turn off interrupts */
-	outb(0, iobase + UART_IER);
-
-	/* Initialize UART */
-	outb(UART_LCR_WLEN8, iobase + UART_LCR);	/* Reset DLAB */
-	outb((UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2), iobase + UART_MCR);
-
-	/* Turn on interrupts */
-	// outb(UART_IER_RLSI | UART_IER_RDI | UART_IER_THRI, iobase + UART_IER);
-
-	spin_unlock_irqrestore(&(info->lock), flags);
-
-	btuart_change_speed(info, DEFAULT_BAUD_RATE);
-
-	/* Timeout before it is safe to send the first HCI packet */
-	msleep(1000);
-
-	/* Register HCI device */
-	if (hci_register_dev(hdev) < 0) {
-		BT_ERR("Can't register HCI device");
-		info->hdev = NULL;
-		hci_free_dev(hdev);
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-
-static int btuart_close(struct btuart_info *info)
-{
-	unsigned long flags;
-	unsigned int iobase = info->p_dev->resource[0]->start;
-	struct hci_dev *hdev = info->hdev;
-
-	if (!hdev)
-		return -ENODEV;
-
-	btuart_hci_close(hdev);
-
-	spin_lock_irqsave(&(info->lock), flags);
-
-	/* Reset UART */
-	outb(0, iobase + UART_MCR);
-
-	/* Turn off interrupts */
-	outb(0, iobase + UART_IER);
-
-	spin_unlock_irqrestore(&(info->lock), flags);
-
-	hci_unregister_dev(hdev);
-	hci_free_dev(hdev);
-
-	return 0;
-}
-
-static int btuart_probe(struct pcmcia_device *link)
-{
-	struct btuart_info *info;
-
-	/* Create new info device */
-	info = devm_kzalloc(&link->dev, sizeof(*info), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	info->p_dev = link;
-	link->priv = info;
-
-	link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_VPP |
-		CONF_AUTO_SET_IO;
-
-	return btuart_config(link);
-}
-
-
-static void btuart_detach(struct pcmcia_device *link)
-{
-	btuart_release(link);
-}
-
-static int btuart_check_config(struct pcmcia_device *p_dev, void *priv_data)
-{
-	int *try = priv_data;
-
-	if (!try)
-		p_dev->io_lines = 16;
-
-	if ((p_dev->resource[0]->end != 8) || (p_dev->resource[0]->start == 0))
-		return -EINVAL;
-
-	p_dev->resource[0]->end = 8;
-	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-
-	return pcmcia_request_io(p_dev);
-}
-
-static int btuart_check_config_notpicky(struct pcmcia_device *p_dev,
-					void *priv_data)
-{
-	static unsigned int base[5] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, 0x0 };
-	int j;
-
-	if (p_dev->io_lines > 3)
-		return -ENODEV;
-
-	p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-	p_dev->resource[0]->flags |= IO_DATA_PATH_WIDTH_8;
-	p_dev->resource[0]->end = 8;
-
-	for (j = 0; j < 5; j++) {
-		p_dev->resource[0]->start = base[j];
-		p_dev->io_lines = base[j] ? 16 : 3;
-		if (!pcmcia_request_io(p_dev))
-			return 0;
-	}
-	return -ENODEV;
-}
-
-static int btuart_config(struct pcmcia_device *link)
-{
-	struct btuart_info *info = link->priv;
-	int i;
-	int try;
-
-	/* First pass: look for a config entry that looks normal.
-	 * Two tries: without IO aliases, then with aliases
-	 */
-	for (try = 0; try < 2; try++)
-		if (!pcmcia_loop_config(link, btuart_check_config, &try))
-			goto found_port;
-
-	/* Second pass: try to find an entry that isn't picky about
-	 * its base address, then try to grab any standard serial port
-	 * address, and finally try to get any free port.
-	 */
-	if (!pcmcia_loop_config(link, btuart_check_config_notpicky, NULL))
-		goto found_port;
-
-	BT_ERR("No usable port range found");
-	goto failed;
-
-found_port:
-	i = pcmcia_request_irq(link, btuart_interrupt);
-	if (i != 0)
-		goto failed;
-
-	i = pcmcia_enable_device(link);
-	if (i != 0)
-		goto failed;
-
-	if (btuart_open(info) != 0)
-		goto failed;
-
-	return 0;
-
-failed:
-	btuart_release(link);
-	return -ENODEV;
-}
-
-
-static void btuart_release(struct pcmcia_device *link)
-{
-	struct btuart_info *info = link->priv;
-
-	btuart_close(info);
-
-	pcmcia_disable_device(link);
-}
-
-static const struct pcmcia_device_id btuart_ids[] = {
-	/* don't use this driver. Use serial_cs + hci_uart instead */
-	PCMCIA_DEVICE_NULL
-};
-MODULE_DEVICE_TABLE(pcmcia, btuart_ids);
-
-static struct pcmcia_driver btuart_driver = {
-	.owner		= THIS_MODULE,
-	.name		= "btuart_cs",
-	.probe		= btuart_probe,
-	.remove		= btuart_detach,
-	.id_table	= btuart_ids,
-};
-module_pcmcia_driver(btuart_driver);

From 9270aa0ddcf33527977bf4562ea3434c79af0d09 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:50 +0300
Subject: [PATCH 1646/1678] mlxsw: spectrum_acl: Fix flex actions header ifndef
 define construct

Fix copy&paste error in flex actions header ifndef define construct

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
index 2726192836ad..bd6d552d95b9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
@@ -33,8 +33,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
+#ifndef _MLXSW_SPECTRUM_ACL_FLEX_ACTIONS_H
+#define _MLXSW_SPECTRUM_ACL_FLEX_ACTIONS_H
 
 #include "spectrum.h"
 

From c8276dd250e9a9833f8ff1da1cd53e6f7e6825e2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:51 +0300
Subject: [PATCH 1647/1678] mlxsw: spectrum_kvdl: Fix handling of
 resource_size_param

Current code uses global variables, adjusts them and passes pointer down
to devlink. With every other mlxsw_core instance, the previously passed
pointer values are rewritten. Fix this by de-globalize the variables.

Fixes: 7f47b19bd744 ("mlxsw: spectrum_kvdl: Add support for per part occupancy")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_kvdl.c   | 47 ++++++-------------
 1 file changed, 14 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 85503e93b93f..9e61518c4945 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -420,67 +420,48 @@ static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
 	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
 };
 
-static struct devlink_resource_size_params mlxsw_sp_kvdl_single_size_params = {
-	.size_min = 0,
-	.size_granularity = 1,
-	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
-};
-
-static struct devlink_resource_size_params mlxsw_sp_kvdl_chunks_size_params = {
-	.size_min = 0,
-	.size_granularity = MLXSW_SP_CHUNK_MAX,
-	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
-};
-
-static struct devlink_resource_size_params mlxsw_sp_kvdl_large_chunks_size_params = {
-	.size_min = 0,
-	.size_granularity = MLXSW_SP_LARGE_CHUNK_MAX,
-	.unit = DEVLINK_RESOURCE_UNIT_ENTRY,
-};
-
-static void
-mlxsw_sp_kvdl_resource_size_params_prepare(struct devlink *devlink)
+int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	static struct devlink_resource_size_params size_params;
 	u32 kvdl_max_size;
+	int err;
 
 	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
 			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
 			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
 
-	mlxsw_sp_kvdl_single_size_params.size_max = kvdl_max_size;
-	mlxsw_sp_kvdl_chunks_size_params.size_max = kvdl_max_size;
-	mlxsw_sp_kvdl_large_chunks_size_params.size_max = kvdl_max_size;
-}
-
-int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
-{
-	int err;
-
-	mlxsw_sp_kvdl_resource_size_params_prepare(devlink);
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, 1,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
 					MLXSW_SP_KVDL_SINGLE_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&mlxsw_sp_kvdl_single_size_params,
+					&size_params,
 					&mlxsw_sp_kvdl_single_ops);
 	if (err)
 		return err;
 
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP_CHUNK_MAX,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
 					MLXSW_SP_KVDL_CHUNKS_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&mlxsw_sp_kvdl_chunks_size_params,
+					&size_params,
 					&mlxsw_sp_kvdl_chunks_ops);
 	if (err)
 		return err;
 
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP_LARGE_CHUNK_MAX,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
 					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&mlxsw_sp_kvdl_large_chunks_size_params,
+					&size_params,
 					&mlxsw_sp_kvdl_chunks_large_ops);
 	return err;
 }

From f9b9120119ca70ccdb1c900ac061425e4784c4de Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:52 +0300
Subject: [PATCH 1648/1678] mlxsw: Constify devlink_resource_ops

devlink_resource_ops should be const as the arg of register function is
also const.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4aa84442e357..0e9ed41ce8bc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3815,7 +3815,7 @@ static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
 }
 
-static struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
+static const struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
 	.occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 9e61518c4945..201825c0019b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -408,15 +408,15 @@ static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
+static const struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
 	.occ_get = mlxsw_sp_kvdl_single_occ_get,
 };
 
-static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
+static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
 	.occ_get = mlxsw_sp_kvdl_chunks_occ_get,
 };
 
-static struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
+static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
 	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
 };
 

From 4f8768befdea9d8004361f180ca559f22ce5346d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:53 +0300
Subject: [PATCH 1649/1678] mlxsw: spectrum: Change KVD linear parts from list
 to array

The parts info is array. The parts copy this info array, yet they are a
list. So make the indexing according to the id and change the list of
parts into array of parts. This helps to eliminate lookups and
constructs like mlxsw_sp_kvdl_part_update() (took me some non-trivial
time to figure out what is going on there).
Alongside with that, introduce a helper macro to define the parts infos.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_kvdl.c   | 239 +++++++-----------
 1 file changed, 94 insertions(+), 145 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 201825c0019b..7b28f65d6407 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -55,24 +55,47 @@
 #define MLXSW_SP_KVDL_LARGE_CHUNKS_END \
 	(MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1)
 
-#define MLXSW_SP_CHUNK_MAX 32
-#define MLXSW_SP_LARGE_CHUNK_MAX 512
+#define MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE 1
+#define MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE 32
+#define MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
 
 struct mlxsw_sp_kvdl_part_info {
 	unsigned int part_index;
 	unsigned int start_index;
 	unsigned int end_index;
 	unsigned int alloc_size;
+	enum mlxsw_sp_resource_id resource_id;
 };
 
+enum mlxsw_sp_kvdl_part_id {
+	MLXSW_SP_KVDL_PART_ID_SINGLE,
+	MLXSW_SP_KVDL_PART_ID_CHUNKS,
+	MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS,
+};
+
+#define MLXSW_SP_KVDL_PART_INFO(id)				\
+[MLXSW_SP_KVDL_PART_ID_##id] = {				\
+	.start_index = MLXSW_SP_KVDL_##id##_BASE,		\
+	.end_index = MLXSW_SP_KVDL_##id##_END,			\
+	.alloc_size = MLXSW_SP_KVDL_##id##_ALLOC_SIZE,		\
+	.resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,	\
+}
+
+static const struct mlxsw_sp_kvdl_part_info mlxsw_sp_kvdl_parts_info[] = {
+	MLXSW_SP_KVDL_PART_INFO(SINGLE),
+	MLXSW_SP_KVDL_PART_INFO(CHUNKS),
+	MLXSW_SP_KVDL_PART_INFO(LARGE_CHUNKS),
+};
+
+#define MLXSW_SP_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp_kvdl_parts_info)
+
 struct mlxsw_sp_kvdl_part {
-	struct list_head list;
-	struct mlxsw_sp_kvdl_part_info *info;
+	struct mlxsw_sp_kvdl_part_info info;
 	unsigned long usage[0];	/* Entries */
 };
 
 struct mlxsw_sp_kvdl {
-	struct list_head parts_list;
+	struct mlxsw_sp_kvdl_part *parts[MLXSW_SP_KVDL_PARTS_INFO_LEN];
 };
 
 static struct mlxsw_sp_kvdl_part *
@@ -80,11 +103,13 @@ mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl,
 			      unsigned int alloc_size)
 {
 	struct mlxsw_sp_kvdl_part *part, *min_part = NULL;
+	int i;
 
-	list_for_each_entry(part, &kvdl->parts_list, list) {
-		if (alloc_size <= part->info->alloc_size &&
+	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
+		part = kvdl->parts[i];
+		if (alloc_size <= part->info.alloc_size &&
 		    (!min_part ||
-		     part->info->alloc_size <= min_part->info->alloc_size))
+		     part->info.alloc_size <= min_part->info.alloc_size))
 			min_part = part;
 	}
 
@@ -95,10 +120,12 @@ static struct mlxsw_sp_kvdl_part *
 mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index)
 {
 	struct mlxsw_sp_kvdl_part *part;
+	int i;
 
-	list_for_each_entry(part, &kvdl->parts_list, list) {
-		if (kvdl_index >= part->info->start_index &&
-		    kvdl_index <= part->info->end_index)
+	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
+		part = kvdl->parts[i];
+		if (kvdl_index >= part->info.start_index &&
+		    kvdl_index <= part->info.end_index)
 			return part;
 	}
 
@@ -122,7 +149,7 @@ mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info,
 static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
 				    u32 *p_kvdl_index)
 {
-	const struct mlxsw_sp_kvdl_part_info *info = part->info;
+	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
 	unsigned int entry_index, nr_entries;
 
 	nr_entries = (info->end_index - info->start_index + 1) /
@@ -132,8 +159,7 @@ static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
 		return -ENOBUFS;
 	__set_bit(entry_index, part->usage);
 
-	*p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(part->info,
-							entry_index);
+	*p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(info, entry_index);
 
 	return 0;
 }
@@ -141,10 +167,10 @@ static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
 static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part,
 				    u32 kvdl_index)
 {
+	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
 	unsigned int entry_index;
 
-	entry_index = mlxsw_sp_kvdl_index_entry_index(part->info,
-						      kvdl_index);
+	entry_index = mlxsw_sp_kvdl_index_entry_index(info, kvdl_index);
 	__clear_bit(entry_index, part->usage);
 }
 
@@ -183,74 +209,30 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 	if (IS_ERR(part))
 		return PTR_ERR(part);
 
-	*p_alloc_size = part->info->alloc_size;
+	*p_alloc_size = part->info.alloc_size;
 
 	return 0;
 }
 
-enum mlxsw_sp_kvdl_part_id {
-	MLXSW_SP_KVDL_PART_SINGLE,
-	MLXSW_SP_KVDL_PART_CHUNKS,
-	MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
-};
+static void mlxsw_sp_kvdl_part_update(struct mlxsw_sp_kvdl_part *part,
+				      struct mlxsw_sp_kvdl_part *part_prev,
+				      unsigned int size)
+{
 
-static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = {
-	{
-		.part_index	= MLXSW_SP_KVDL_PART_SINGLE,
-		.start_index	= MLXSW_SP_KVDL_SINGLE_BASE,
-		.end_index	= MLXSW_SP_KVDL_SINGLE_END,
-		.alloc_size	= 1,
-	},
-	{
-		.part_index	= MLXSW_SP_KVDL_PART_CHUNKS,
-		.start_index	= MLXSW_SP_KVDL_CHUNKS_BASE,
-		.end_index	= MLXSW_SP_KVDL_CHUNKS_END,
-		.alloc_size	= MLXSW_SP_CHUNK_MAX,
-	},
-	{
-		.part_index	= MLXSW_SP_KVDL_PART_LARGE_CHUNKS,
-		.start_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_BASE,
-		.end_index	= MLXSW_SP_KVDL_LARGE_CHUNKS_END,
-		.alloc_size	= MLXSW_SP_LARGE_CHUNK_MAX,
-	},
-};
+	if (!part_prev) {
+		part->info.end_index = size - 1;
+	} else {
+		part->info.start_index = part_prev->info.end_index + 1;
+		part->info.end_index = part->info.start_index + size - 1;
+	}
+}
 
 static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	list_for_each_entry(part, &mlxsw_sp->kvdl->parts_list, list) {
-		if (part->info->part_index == part_index)
-			return part;
-	}
-
-	return NULL;
-}
-
-static void
-mlxsw_sp_kvdl_part_update(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_kvdl_part *part, unsigned int size)
-{
-	struct mlxsw_sp_kvdl_part_info *info = part->info;
-
-	if (list_is_last(&part->list, &mlxsw_sp->kvdl->parts_list)) {
-		info->end_index = size - 1;
-	} else  {
-		struct mlxsw_sp_kvdl_part *last_part;
-
-		last_part = list_next_entry(part, list);
-		info->start_index = last_part->info->end_index + 1;
-		info->end_index = info->start_index + size - 1;
-	}
-}
-
-static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
-				   unsigned int part_index)
+mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+			const struct mlxsw_sp_kvdl_part_info *info,
+			struct mlxsw_sp_kvdl_part *part_prev)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	const struct mlxsw_sp_kvdl_part_info *info;
-	enum mlxsw_sp_resource_id resource_id;
 	struct mlxsw_sp_kvdl_part *part;
 	bool need_update = true;
 	unsigned int nr_entries;
@@ -258,23 +240,8 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 	u64 resource_size;
 	int err;
 
-	info = &kvdl_parts_info[part_index];
-
-	switch (part_index) {
-	case MLXSW_SP_KVDL_PART_SINGLE:
-		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE;
-		break;
-	case MLXSW_SP_KVDL_PART_CHUNKS:
-		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS;
-		break;
-	case MLXSW_SP_KVDL_PART_LARGE_CHUNKS:
-		resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	err = devlink_resource_size_get(devlink, resource_id, &resource_size);
+	err = devlink_resource_size_get(devlink, info->resource_id,
+					&resource_size);
 	if (err) {
 		need_update = false;
 		resource_size = info->end_index - info->start_index + 1;
@@ -284,86 +251,77 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
 	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
 	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
 	if (!part)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	part->info = kmemdup(info, sizeof(*part->info), GFP_KERNEL);
-	if (!part->info)
-		goto err_part_info_alloc;
+	memcpy(&part->info, info, sizeof(part->info));
 
-	list_add(&part->list, &mlxsw_sp->kvdl->parts_list);
 	if (need_update)
-		mlxsw_sp_kvdl_part_update(mlxsw_sp, part, resource_size);
-	return 0;
-
-err_part_info_alloc:
-	kfree(part);
-	return -ENOMEM;
+		mlxsw_sp_kvdl_part_update(part, part_prev, resource_size);
+	return part;
 }
 
-static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp,
-				    unsigned int part_index)
+static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp_kvdl_part *part)
 {
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, part_index);
-	if (!part)
-		return;
-
-	list_del(&part->list);
-	kfree(part->info);
 	kfree(part);
 }
 
 static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
+	const struct mlxsw_sp_kvdl_part_info *info;
+	struct mlxsw_sp_kvdl_part *part_prev = NULL;
 	int err, i;
 
-	INIT_LIST_HEAD(&mlxsw_sp->kvdl->parts_list);
-
-	for (i = 0; i < ARRAY_SIZE(kvdl_parts_info); i++) {
-		err = mlxsw_sp_kvdl_part_init(mlxsw_sp, i);
-		if (err)
+	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
+		info = &mlxsw_sp_kvdl_parts_info[i];
+		kvdl->parts[i] = mlxsw_sp_kvdl_part_init(mlxsw_sp, info,
+							 part_prev);
+		if (IS_ERR(kvdl->parts[i])) {
+			err = PTR_ERR(kvdl->parts[i]);
 			goto err_kvdl_part_init;
+		}
+		part_prev = kvdl->parts[i];
 	}
-
 	return 0;
 
 err_kvdl_part_init:
 	for (i--; i >= 0; i--)
-		mlxsw_sp_kvdl_part_fini(mlxsw_sp, i);
+		mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
 	return err;
 }
 
 static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 	int i;
 
-	for (i = ARRAY_SIZE(kvdl_parts_info) - 1; i >= 0; i--)
-		mlxsw_sp_kvdl_part_fini(mlxsw_sp, i);
+	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
+		mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
 }
 
 static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part)
 {
+	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
 	unsigned int nr_entries;
 	int bit = -1;
 	u64 occ = 0;
 
-	nr_entries = (part->info->end_index -
-		      part->info->start_index + 1) /
-		      part->info->alloc_size;
+	nr_entries = (info->end_index -
+		      info->start_index + 1) /
+		      info->alloc_size;
 	while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
 		< nr_entries)
-		occ += part->info->alloc_size;
+		occ += info->alloc_size;
 	return occ;
 }
 
 u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_sp_kvdl_part *part;
 	u64 occ = 0;
+	int i;
 
-	list_for_each_entry(part, &mlxsw_sp->kvdl->parts_list, list)
-		occ += mlxsw_sp_kvdl_part_occ(part);
+	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
+		occ += mlxsw_sp_kvdl_part_occ(mlxsw_sp->kvdl->parts[i]);
 
 	return occ;
 }
@@ -374,10 +332,7 @@ static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_kvdl_part *part;
 
-	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_SINGLE);
-	if (!part)
-		return -EINVAL;
-
+	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE];
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
@@ -387,10 +342,7 @@ static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_kvdl_part *part;
 
-	part = mlxsw_sp_kvdl_part_find(mlxsw_sp, MLXSW_SP_KVDL_PART_CHUNKS);
-	if (!part)
-		return -EINVAL;
-
+	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS];
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
@@ -400,11 +352,7 @@ static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_kvdl_part *part;
 
-	part = mlxsw_sp_kvdl_part_find(mlxsw_sp,
-				       MLXSW_SP_KVDL_PART_LARGE_CHUNKS);
-	if (!part)
-		return -EINVAL;
-
+	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS];
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
@@ -431,7 +379,8 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
 			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
 
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, 1,
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE,
 					  DEVLINK_RESOURCE_UNIT_ENTRY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
 					MLXSW_SP_KVDL_SINGLE_SIZE,
@@ -443,7 +392,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 		return err;
 
 	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_CHUNK_MAX,
+					  MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE,
 					  DEVLINK_RESOURCE_UNIT_ENTRY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
 					MLXSW_SP_KVDL_CHUNKS_SIZE,
@@ -455,7 +404,7 @@ int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
 		return err;
 
 	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_LARGE_CHUNK_MAX,
+					  MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
 					  DEVLINK_RESOURCE_UNIT_ENTRY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
 					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,

From 72779c97cf8f8f446521b48b12f305b6cf952bec Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:54 +0300
Subject: [PATCH 1650/1678] mlxsw: remove kvd_hash_granularity from config
 profile struct

This should not be part of the struct, as the struct fields
are tightly coupled with the FW command payload of the same name.
Just use the "granularity" define directly, as in other places.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.h     | 1 -
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 5ddafd74dc00..fd30eaf40475 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -256,7 +256,6 @@ struct mlxsw_config_profile {
 	u16	adaptive_routing_group_cap;
 	u8	arn;
 	u32	kvd_linear_size;
-	u16	kvd_hash_granularity;
 	u8	kvd_hash_single_parts;
 	u8	kvd_hash_double_parts;
 	u8	resource_query_enable;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 0e9ed41ce8bc..d503cdbeae29 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3794,7 +3794,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.used_max_pkey			= 1,
 	.max_pkey			= 0,
 	.used_kvd_split_data		= 1,
-	.kvd_hash_granularity		= MLXSW_SP_KVD_GRANULARITY,
 	.kvd_hash_single_parts		= 59,
 	.kvd_hash_double_parts		= 41,
 	.kvd_linear_size		= MLXSW_SP_KVD_LINEAR_SIZE,
@@ -3902,7 +3901,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	double_size *= profile->kvd_hash_double_parts;
 	double_size /= profile->kvd_hash_double_parts +
 		       profile->kvd_hash_single_parts;
-	double_size = rounddown(double_size, profile->kvd_hash_granularity);
+	double_size = rounddown(double_size, MLXSW_SP_KVD_GRANULARITY);
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_HASH_DOUBLE,
 					double_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
@@ -3962,7 +3961,7 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 		double_size /= profile->kvd_hash_double_parts +
 			       profile->kvd_hash_single_parts;
 		*p_double_size = rounddown(double_size,
-					   profile->kvd_hash_granularity);
+					   MLXSW_SP_KVD_GRANULARITY);
 	}
 
 	err = devlink_resource_size_get(devlink,

From 64f4588821776ce08c1ff0271630fcaada138705 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:55 +0300
Subject: [PATCH 1651/1678] mlxsw: core: Fix arg name of MLXSW_CORE_RES_VALID
 and MLXSW_CORE_RES_GET

First arg of these helpers should be "mlxsw_core".

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index fd30eaf40475..0d6452699364 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -325,14 +325,14 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
 			  enum mlxsw_res_id res_id);
 
-#define MLXSW_CORE_RES_VALID(res, short_res_id)			\
-	mlxsw_core_res_valid(res, MLXSW_RES_ID_##short_res_id)
+#define MLXSW_CORE_RES_VALID(mlxsw_core, short_res_id)			\
+	mlxsw_core_res_valid(mlxsw_core, MLXSW_RES_ID_##short_res_id)
 
 u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
 		       enum mlxsw_res_id res_id);
 
-#define MLXSW_CORE_RES_GET(res, short_res_id)			\
-	mlxsw_core_res_get(res, MLXSW_RES_ID_##short_res_id)
+#define MLXSW_CORE_RES_GET(mlxsw_core, short_res_id)			\
+	mlxsw_core_res_get(mlxsw_core, MLXSW_RES_ID_##short_res_id)
 
 #define MLXSW_BUS_F_TXRX	BIT(0)
 

From 110d2d2141d9671d4b4467ead06d1dc7288d0e9a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:56 +0300
Subject: [PATCH 1652/1678] mlxsw: Move "used_kvd_sizes" check to
 mlxsw_pci_config_profile

The check should be done directly in mlxsw_pci_config_profile, as for
other profile items. Also, be consistent in naming with the rest and
rename to "used_kvd_sizes".

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.h     | 3 +--
 drivers/net/ethernet/mellanox/mlxsw/pci.c      | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 ++---
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 0d6452699364..ff9daa09341d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -235,8 +235,7 @@ struct mlxsw_config_profile {
 		used_max_pkey:1,
 		used_ar_sec:1,
 		used_adaptive_routing_group_cap:1,
-		used_kvd_split_data:1; /* indicate for the kvd's values */
-
+		used_kvd_sizes:1;
 	u8	max_vepa_channels;
 	u16	max_mid;
 	u16	max_pgt;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index e30c6ce3dcb4..5ab068aec033 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1164,7 +1164,7 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 		mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
 			mbox, profile->adaptive_routing_group_cap);
 	}
-	if (MLXSW_RES_VALID(res, KVD_SIZE)) {
+	if (profile->used_kvd_sizes && MLXSW_RES_VALID(res, KVD_SIZE)) {
 		err = mlxsw_pci_profile_get_kvd_sizes(mlxsw_pci, profile, res);
 		if (err)
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d503cdbeae29..12062aab13c5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3793,7 +3793,7 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.max_ib_mc			= 0,
 	.used_max_pkey			= 1,
 	.max_pkey			= 0,
-	.used_kvd_split_data		= 1,
+	.used_kvd_sizes			= 1,
 	.kvd_hash_single_parts		= 59,
 	.kvd_hash_double_parts		= 41,
 	.kvd_linear_size		= MLXSW_SP_KVD_LINEAR_SIZE,
@@ -3934,8 +3934,7 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
 	int err;
 
 	if (!MLXSW_CORE_RES_VALID(mlxsw_core, KVD_SINGLE_MIN_SIZE) ||
-	    !MLXSW_CORE_RES_VALID(mlxsw_core, KVD_DOUBLE_MIN_SIZE) ||
-	    !profile->used_kvd_split_data)
+	    !MLXSW_CORE_RES_VALID(mlxsw_core, KVD_DOUBLE_MIN_SIZE))
 		return -EIO;
 
 	/* The hash part is what left of the kvd without the

From ad3f20b28602fd46b3a27b5a61be270a3794cc36 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:57 +0300
Subject: [PATCH 1653/1678] mlxsw: Move "resources_query_enable" out of
 mlxsw_config_profile

As struct mlxsw_config_profile is mapped to the payload of the FW
command of the same name, resources_query_enable flag does not belong
there. Move it to struct mlxsw_driver.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c     | 5 +++--
 drivers/net/ethernet/mellanox/mlxsw/core.h     | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/pci.c      | 9 +++------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/switchib.c | 1 -
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 -
 6 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 3529b545675d..93ea56620a24 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1008,6 +1008,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	const char *device_kind = mlxsw_bus_info->device_kind;
 	struct mlxsw_core *mlxsw_core;
 	struct mlxsw_driver *mlxsw_driver;
+	struct mlxsw_res *res;
 	size_t alloc_size;
 	int err;
 
@@ -1032,8 +1033,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	mlxsw_core->bus_priv = bus_priv;
 	mlxsw_core->bus_info = mlxsw_bus_info;
 
-	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
-			      &mlxsw_core->res);
+	res = mlxsw_driver->res_query_enabled ? &mlxsw_core->res : NULL;
+	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, res);
 	if (err)
 		goto err_bus_init;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index ff9daa09341d..092d39399f3c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -257,7 +257,6 @@ struct mlxsw_config_profile {
 	u32	kvd_linear_size;
 	u8	kvd_hash_single_parts;
 	u8	kvd_hash_double_parts;
-	u8	resource_query_enable;
 	struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
 };
 
@@ -314,6 +313,7 @@ struct mlxsw_driver {
 			     u64 *p_linear_size);
 	u8 txhdr_len;
 	const struct mlxsw_config_profile *profile;
+	bool res_query_enabled;
 };
 
 int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 5ab068aec033..3a9381977d6d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1015,16 +1015,14 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
 }
 
 static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
-				     struct mlxsw_res *res,
-				     u8 query_enabled)
+				     struct mlxsw_res *res)
 {
 	int index, i;
 	u64 data;
 	u16 id;
 	int err;
 
-	/* Not all the versions support resources query */
-	if (!query_enabled)
+	if (!res)
 		return 0;
 
 	mlxsw_cmd_mbox_zero(mbox);
@@ -1376,8 +1374,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	if (err)
 		goto err_boardinfo;
 
-	err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res,
-					profile->resource_query_enable);
+	err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res);
 	if (err)
 		goto err_query_resources;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 12062aab13c5..b831af38e0a1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3803,7 +3803,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 			.type		= MLXSW_PORT_SWID_TYPE_ETH,
 		}
 	},
-	.resource_query_enable		= 1,
 };
 
 static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
@@ -4002,6 +4001,7 @@ static struct mlxsw_driver mlxsw_sp_driver = {
 	.kvd_sizes_get			= mlxsw_sp_kvd_sizes_get,
 	.txhdr_len			= MLXSW_TXHDR_LEN,
 	.profile			= &mlxsw_sp_config_profile,
+	.res_query_enabled		= true,
 };
 
 bool mlxsw_sp_port_dev_check(const struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
index ab7a29846bfa..c698ec4fd9d4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
@@ -510,7 +510,6 @@ static const struct mlxsw_config_profile mlxsw_sib_config_profile = {
 			.type		= MLXSW_PORT_SWID_TYPE_IB,
 		}
 	},
-	.resource_query_enable		= 0,
 };
 
 static struct mlxsw_driver mlxsw_sib_driver = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index c87b0934a405..a655c5850aa6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -1706,7 +1706,6 @@ static const struct mlxsw_config_profile mlxsw_sx_config_profile = {
 			.type		= MLXSW_PORT_SWID_TYPE_IB,
 		}
 	},
-	.resource_query_enable		= 0,
 };
 
 static struct mlxsw_driver mlxsw_sx_driver = {

From 88d2fbcda145f5b256a254635c7420a2a645eca0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:58 +0300
Subject: [PATCH 1654/1678] mlxsw: spectrum: Pass mlxsw_core as arg of
 mlxsw_sp_kvdl_resources_register()

Pass struct mlxsw_core instead of devlink since it is nicer within mlxsw
code and we need both structs in mlxsw_sp_kvdl_resources_register()
anyway.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h      | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b831af38e0a1..53fffd09d133 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3892,7 +3892,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	if (err)
 		return err;
 
-	err = mlxsw_sp_kvdl_resources_register(devlink);
+	err = mlxsw_sp_kvdl_resources_register(mlxsw_core);
 	if  (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 21bee8f19894..c4d47e456737 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -443,7 +443,7 @@ int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int entry_count,
 				   unsigned int *p_alloc_size);
 u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_kvdl_resources_register(struct devlink *devlink);
+int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 7b28f65d6407..8796db44dcc3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -368,9 +368,9 @@ static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
 	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
 };
 
-int mlxsw_sp_kvdl_resources_register(struct devlink *devlink)
+int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
 {
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 	static struct devlink_resource_size_params size_params;
 	u32 kvdl_max_size;
 	int err;

From 59441fef2b10c651261be08aec558e90fbd594f8 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sun, 1 Apr 2018 17:34:59 +0300
Subject: [PATCH 1655/1678] mlxsw: spectrum: Don't use resource ID of 0

In commit 145307460ba9 ("devlink: Remove top_hierarchy arg to
devlink_resource_register"), the "top_hierarchy" parameter to
devlink_resource_register() was removed in favor of using the parameter
"parent_resource_id" exclusively to determine who the parent is. The
root node's resource ID for this purpose is
DEVLINK_RESOURCE_ID_PARENT_TOP with the value 0. It is therefore
problematic that the resource MLXSW_SP_RESOURCE_KVD has also ID of 0.

Fix this by numbering driver-specific resources from 1.

Fixes: 145307460ba9 ("devlink: Remove top_hierarchy arg to devlink_resource_register")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index c4d47e456737..82820ba43728 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -75,7 +75,7 @@
 #define MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS "large_chunks"
 
 enum mlxsw_sp_resource_id {
-	MLXSW_SP_RESOURCE_KVD,
+	MLXSW_SP_RESOURCE_KVD = 1,
 	MLXSW_SP_RESOURCE_KVD_LINEAR,
 	MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 	MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,

From ce5a453c4421ccd68a98fd987b6878923c549be2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 27 Mar 2018 14:21:14 -0700
Subject: [PATCH 1656/1678] net/mlx4_en: CHECKSUM_COMPLETE support for
 fragments

Refine the RX check summing handling to propagate the
hardware provided checksum so that we do not have to
compute it later in software.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 05787efef492..5c613c6663da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -821,14 +821,12 @@ xdp_drop_no_cnt:
 		skb_record_rx_queue(skb, cq_ring);
 
 		if (likely(dev->features & NETIF_F_RXCSUM)) {
-			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
-						      MLX4_CQE_STATUS_UDP)) {
+			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+						       MLX4_CQE_STATUS_UDP)) &&
+			    (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+			    cqe->checksum == cpu_to_be16(0xffff)) {
 				bool l2_tunnel;
 
-				if (!((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
-				      cqe->checksum == cpu_to_be16(0xffff)))
-					goto csum_none;
-
 				l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
 					(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 				ip_summed = CHECKSUM_UNNECESSARY;

From ec1258903a13d0255695a07280ae5e32303aa6c5 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Fri, 30 Mar 2018 20:34:33 +0300
Subject: [PATCH 1657/1678] ip6_gre: remove redundant 'tunnel' setting in
 ip6erspan_tap_init()

'tunnel' was already set at the start of ip6erspan_tap_init().

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 22e86557aca4..f8a103bdbd60 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1762,7 +1762,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
 		dev->mtu -= 8;
 
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
-	tunnel = netdev_priv(dev);
 	ip6gre_tnl_link_config(tunnel, 1);
 
 	return 0;

From c07255020551cadae8bf903f2c5e1fcbad731bac Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 1 Apr 2018 14:04:58 -0400
Subject: [PATCH 1658/1678] Revert "net: usb: asix88179_178a: de-duplicate
 code"

This reverts commit cf29bded91f9153208c4c2145d602cd82430ad1a.

It causes regressions.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 115 ++++++++++++++++++++++++---------
 1 file changed, 85 insertions(+), 30 deletions(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index fb1b78d4b9ef..a6ef75907ae9 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1223,7 +1223,7 @@ static int ax88179_led_setting(struct usbnet *dev)
 	return 0;
 }
 
-static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
+static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	u8 buf[5];
 	u16 *tmp16;
@@ -1231,11 +1231,12 @@ static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
 	struct ethtool_eee eee_data;
 
+	usbnet_get_endpoints(dev, intf);
+
 	tmp16 = (u16 *)buf;
 	tmp = (u8 *)buf;
 
-	if (!do_reset)
-		memset(ax179_data, 0, sizeof(*ax179_data));
+	memset(ax179_data, 0, sizeof(*ax179_data));
 
 	/* Power up ethernet PHY */
 	*tmp16 = 0;
@@ -1248,13 +1249,9 @@ static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
 	msleep(100);
 
-	if (do_reset)
-		ax88179_auto_detach(dev, 0);
-
 	ax88179_read_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN,
 			 ETH_ALEN, dev->net->dev_addr);
-	if (!do_reset)
-		memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
+	memcpy(dev->net->perm_addr, dev->net->dev_addr, ETH_ALEN);
 
 	/* RX bulk configuration */
 	memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
@@ -1269,21 +1266,19 @@ static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PAUSE_WATERLVL_HIGH,
 			  1, 1, tmp);
 
-	if (!do_reset) {
-		dev->net->netdev_ops = &ax88179_netdev_ops;
-		dev->net->ethtool_ops = &ax88179_ethtool_ops;
-		dev->net->needed_headroom = 8;
-		dev->net->max_mtu = 4088;
+	dev->net->netdev_ops = &ax88179_netdev_ops;
+	dev->net->ethtool_ops = &ax88179_ethtool_ops;
+	dev->net->needed_headroom = 8;
+	dev->net->max_mtu = 4088;
 
-		/* Initialize MII structure */
-		dev->mii.dev = dev->net;
-		dev->mii.mdio_read = ax88179_mdio_read;
-		dev->mii.mdio_write = ax88179_mdio_write;
-		dev->mii.phy_id_mask = 0xff;
-		dev->mii.reg_num_mask = 0xff;
-		dev->mii.phy_id = 0x03;
-		dev->mii.supports_gmii = 1;
-	}
+	/* Initialize MII structure */
+	dev->mii.dev = dev->net;
+	dev->mii.mdio_read = ax88179_mdio_read;
+	dev->mii.mdio_write = ax88179_mdio_write;
+	dev->mii.phy_id_mask = 0xff;
+	dev->mii.reg_num_mask = 0xff;
+	dev->mii.phy_id = 0x03;
+	dev->mii.supports_gmii = 1;
 
 	dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			      NETIF_F_RXCSUM;
@@ -1335,13 +1330,6 @@ static int ax88179_link_bind_or_reset(struct usbnet *dev, bool do_reset)
 	return 0;
 }
 
-static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
-{
-	usbnet_get_endpoints(dev, intf);
-
-	return ax88179_link_bind_or_reset(dev, false);
-}
-
 static void ax88179_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
 	u16 tmp16;
@@ -1470,7 +1458,74 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 
 static int ax88179_link_reset(struct usbnet *dev)
 {
-	return ax88179_link_bind_or_reset(dev, true);
+	struct ax88179_data *ax179_data = (struct ax88179_data *)dev->data;
+	u8 tmp[5], link_sts;
+	u16 mode, tmp16, delay = HZ / 10;
+	u32 tmp32 = 0x40000000;
+	unsigned long jtimeout;
+
+	jtimeout = jiffies + delay;
+	while (tmp32 & 0x40000000) {
+		mode = 0;
+		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2, &mode);
+		ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_CTL, 2, 2,
+				  &ax179_data->rxctl);
+
+		/*link up, check the usb device control TX FIFO full or empty*/
+		ax88179_read_cmd(dev, 0x81, 0x8c, 0, 4, &tmp32);
+
+		if (time_after(jiffies, jtimeout))
+			return 0;
+	}
+
+	mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN |
+	       AX_MEDIUM_RXFLOW_CTRLEN;
+
+	ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS,
+			 1, 1, &link_sts);
+
+	ax88179_read_cmd(dev, AX_ACCESS_PHY, AX88179_PHY_ID,
+			 GMII_PHY_PHYSR, 2, &tmp16);
+
+	if (!(tmp16 & GMII_PHY_PHYSR_LINK)) {
+		return 0;
+	} else if (GMII_PHY_PHYSR_GIGA == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
+		mode |= AX_MEDIUM_GIGAMODE | AX_MEDIUM_EN_125MHZ;
+		if (dev->net->mtu > 1500)
+			mode |= AX_MEDIUM_JUMBO_EN;
+
+		if (link_sts & AX_USB_SS)
+			memcpy(tmp, &AX88179_BULKIN_SIZE[0], 5);
+		else if (link_sts & AX_USB_HS)
+			memcpy(tmp, &AX88179_BULKIN_SIZE[1], 5);
+		else
+			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
+	} else if (GMII_PHY_PHYSR_100 == (tmp16 & GMII_PHY_PHYSR_SMASK)) {
+		mode |= AX_MEDIUM_PS;
+
+		if (link_sts & (AX_USB_SS | AX_USB_HS))
+			memcpy(tmp, &AX88179_BULKIN_SIZE[2], 5);
+		else
+			memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
+	} else {
+		memcpy(tmp, &AX88179_BULKIN_SIZE[3], 5);
+	}
+
+	/* RX bulk configuration */
+	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_RX_BULKIN_QCTRL, 5, 5, tmp);
+
+	dev->rx_urb_size = (1024 * (tmp[3] + 2));
+
+	if (tmp16 & GMII_PHY_PHYSR_FULL)
+		mode |= AX_MEDIUM_FULL_DUPLEX;
+	ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE,
+			  2, 2, &mode);
+
+	ax179_data->eee_enabled = ax88179_chk_eee(dev);
+
+	netif_carrier_on(dev->net);
+
+	return 0;
 }
 
 static int ax88179_reset(struct usbnet *dev)

From 694aba690de062cf27b28a5e56e7a5a7185b0a1c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 13:16:25 -0700
Subject: [PATCH 1659/1678] ipv4: factorize sk_wmem_alloc updates done by
 __ip_append_data()

While testing my inet defrag changes, I found that the senders
could spend ~20% of cpu cycles in skb_set_owner_w() updating
sk->sk_wmem_alloc for every fragment they cook.

The solution to this problem is to use alloc_skb() instead
of sock_wmalloc() and manually perform a single sk_wmem_alloc change.

Similar change for IPv6 is provided in following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 66340ab750e6..94cacae76aca 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -876,6 +876,7 @@ static int __ip_append_data(struct sock *sk,
 	unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
 	int csummode = CHECKSUM_NONE;
 	struct rtable *rt = (struct rtable *)cork->dst;
+	unsigned int wmem_alloc_delta = 0;
 	u32 tskey = 0;
 
 	skb = skb_peek_tail(queue);
@@ -971,11 +972,10 @@ alloc_new_skb:
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
-				if (refcount_read(&sk->sk_wmem_alloc) <=
+				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
 				    2 * sk->sk_sndbuf)
-					skb = sock_wmalloc(sk,
-							   alloclen + hh_len + 15, 1,
-							   sk->sk_allocation);
+					skb = alloc_skb(alloclen + hh_len + 15,
+							sk->sk_allocation);
 				if (unlikely(!skb))
 					err = -ENOBUFS;
 			}
@@ -1033,6 +1033,11 @@ alloc_new_skb:
 			/*
 			 * Put the packet on the pending queue.
 			 */
+			if (!skb->destructor) {
+				skb->destructor = sock_wfree;
+				skb->sk = sk;
+				wmem_alloc_delta += skb->truesize;
+			}
 			__skb_queue_tail(queue, skb);
 			continue;
 		}
@@ -1079,12 +1084,13 @@ alloc_new_skb:
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
-			refcount_add(copy, &sk->sk_wmem_alloc);
+			wmem_alloc_delta += copy;
 		}
 		offset += copy;
 		length -= copy;
 	}
 
+	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 	return 0;
 
 error_efault:
@@ -1092,6 +1098,7 @@ error_efault:
 error:
 	cork->length -= length;
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
+	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 	return err;
 }
 

From 1f4c6eb24029689a40dceae561e31ff6926d7f0d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 31 Mar 2018 13:16:26 -0700
Subject: [PATCH 1660/1678] ipv6: factorize sk_wmem_alloc updates done by
 __ip6_append_data()

While testing my inet defrag changes, I found that the senders
could spend ~20% of cpu cycles in skb_set_owner_w() updating
sk->sk_wmem_alloc for every fragment they cook, competing
with TX completion of prior skbs possibly happening on another cpus.

The solution to this problem is to use alloc_skb() instead
of sock_wmalloc() and manually perform a single sk_wmem_alloc change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2c7f09c3c39e..323d7a354ffb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1259,6 +1259,7 @@ static int __ip6_append_data(struct sock *sk,
 	struct ipv6_txoptions *opt = v6_cork->opt;
 	int csummode = CHECKSUM_NONE;
 	unsigned int maxnonfragsize, headersize;
+	unsigned int wmem_alloc_delta = 0;
 
 	skb = skb_peek_tail(queue);
 	if (!skb) {
@@ -1411,11 +1412,10 @@ alloc_new_skb:
 						(flags & MSG_DONTWAIT), &err);
 			} else {
 				skb = NULL;
-				if (refcount_read(&sk->sk_wmem_alloc) <=
+				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
 				    2 * sk->sk_sndbuf)
-					skb = sock_wmalloc(sk,
-							   alloclen + hh_len, 1,
-							   sk->sk_allocation);
+					skb = alloc_skb(alloclen + hh_len,
+							sk->sk_allocation);
 				if (unlikely(!skb))
 					err = -ENOBUFS;
 			}
@@ -1474,6 +1474,11 @@ alloc_new_skb:
 			/*
 			 * Put the packet on the pending queue
 			 */
+			if (!skb->destructor) {
+				skb->destructor = sock_wfree;
+				skb->sk = sk;
+				wmem_alloc_delta += skb->truesize;
+			}
 			__skb_queue_tail(queue, skb);
 			continue;
 		}
@@ -1520,12 +1525,13 @@ alloc_new_skb:
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
-			refcount_add(copy, &sk->sk_wmem_alloc);
+			wmem_alloc_delta += copy;
 		}
 		offset += copy;
 		length -= copy;
 	}
 
+	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 	return 0;
 
 error_efault:
@@ -1533,6 +1539,7 @@ error_efault:
 error:
 	cork->length -= length;
 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
 	return err;
 }
 

From 6e80e55bd37a90b412f168b1667ffa7d2debd46b Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sun, 1 Apr 2018 00:22:08 +0300
Subject: [PATCH 1661/1678] sh_eth: add sh_eth_cpu_data::no_xdfar flag

The commit 6ded286555c2 ("sh_eth: Fix RX recovery on R-Car in case of RX
ring underrun") added a check for an bad RDFAR offset in sh_eth_rx(), so
that the code could work on the R-Car Ether controllers which don't have
this register (and TDFAR), then the commit 3365711df02 ("sh_eth: WARN on
access to a register not implemented in a particular chip") replaced
offset 0 with 0xffff. Adding/checking the 'no_xdfar' bit field in the
'struct sh_eth_cpu_data' instead results in less object code...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 5 +++--
 drivers/net/ethernet/renesas/sh_eth.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 306558ef36b5..2bff03345b72 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -702,6 +702,7 @@ static struct sh_eth_cpu_data rcar_gen1_data = {
 	.mpr		= 1,
 	.tpauser	= 1,
 	.hw_swap	= 1,
+	.no_xdfar	= 1,
 };
 
 /* R-Car Gen2 and RZ/G1 */
@@ -735,6 +736,7 @@ static struct sh_eth_cpu_data rcar_gen2_data = {
 	.mpr		= 1,
 	.tpauser	= 1,
 	.hw_swap	= 1,
+	.no_xdfar	= 1,
 	.rmiimode	= 1,
 	.magic		= 1,
 };
@@ -1615,8 +1617,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 	/* If we don't need to check status, don't. -KDU */
 	if (!(sh_eth_read(ndev, EDRRR) & EDRRR_R)) {
 		/* fix the values for the next receiving if RDE is set */
-		if (intr_status & EESR_RDE &&
-		    mdp->reg_offset[RDFAR] != SH_ETH_OFFSET_INVALID) {
+		if (intr_status & EESR_RDE && !mdp->cd->no_xdfar) {
 			u32 count = (sh_eth_read(ndev, RDFAR) -
 				     sh_eth_read(ndev, RDLAR)) >> 4;
 
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index a0416e04306a..a5b792ce2ae7 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -508,6 +508,7 @@ struct sh_eth_cpu_data {
 	unsigned rpadir:1;	/* E-DMAC have RPADIR */
 	unsigned no_trimd:1;	/* E-DMAC DO NOT have TRIMD */
 	unsigned no_ade:1;	/* E-DMAC DO NOT have ADE bit in EESR */
+	unsigned no_xdfar:1;	/* E-DMAC DOES NOT have RDFAR/TDFAR */
 	unsigned xdfar_rw:1;	/* E-DMAC has writeable RDFAR/TDFAR */
 	unsigned hw_checksum:1;	/* E-DMAC has CSMR */
 	unsigned select_mii:1;	/* EtherC have RMII_MII (MII select register) */

From e14549a50a6c5e3320bb941440b1c3ae4812ea69 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sun, 1 Apr 2018 00:23:51 +0300
Subject: [PATCH 1662/1678] sh_eth: kill useless check in __sh_eth_get_regs()

Iff TSU registers exist on a given [G]Ether controller, they always include
the CAM entry table registers (TSU_ADR{H|L}<n>), thus the check for invalid
TSU_ADRH0 offset in __sh_eth_get_regs() is useless...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 2bff03345b72..b6b90a6314e3 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2153,22 +2153,17 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
 		add_tsu_reg(TSU_POST2);
 		add_tsu_reg(TSU_POST3);
 		add_tsu_reg(TSU_POST4);
-		if (mdp->reg_offset[TSU_ADRH0] != SH_ETH_OFFSET_INVALID) {
-			/* This is the start of a table, not just a single
-			 * register.
-			 */
-			if (buf) {
-				unsigned int i;
+		/* This is the start of a table, not just a single register. */
+		if (buf) {
+			unsigned int i;
 
-				mark_reg_valid(TSU_ADRH0);
-				for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES * 2; i++)
-					*buf++ = ioread32(
-						mdp->tsu_addr +
-						mdp->reg_offset[TSU_ADRH0] +
-						i * 4);
-			}
-			len += SH_ETH_TSU_CAM_ENTRIES * 2;
+			mark_reg_valid(TSU_ADRH0);
+			for (i = 0; i < SH_ETH_TSU_CAM_ENTRIES * 2; i++)
+				*buf++ = ioread32(mdp->tsu_addr +
+						  mdp->reg_offset[TSU_ADRH0] +
+						  i * 4);
 		}
+		len += SH_ETH_TSU_CAM_ENTRIES * 2;
 	}
 
 #undef mark_reg_valid

From 45a42bc9cc65b9ab33411bee454b55bd8a00c977 Mon Sep 17 00:00:00 2001
From: Ian W MORRISON <ianwmorrison@gmail.com>
Date: Tue, 27 Mar 2018 09:09:28 +1100
Subject: [PATCH 1663/1678] Bluetooth: hci_bcm: Remove DMI quirk for the MINIX
 Z83-4

As Interrupt resource specified IRQs are now assumed to be always
active-low the DMI quirk for the MINIX Z83-4 is no longer required.

Signed-off-by: Ian W MORRISON <ianwmorrison@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 4ceaf1f4a4e7..441f5e1deb11 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -804,13 +804,6 @@ static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = {
 			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
 		},
 	},
-	{
-		.ident = "MINIX Z83-4",
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MINIX"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
-		},
-	},
 	{ }
 };
 

From f9b95db0165ae81c99fe1893e586aab34751ee51 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 28 Mar 2018 13:22:14 +0000
Subject: [PATCH 1664/1678] Bluetooth: btrsi: remove unused including
 <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btrsi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/bluetooth/btrsi.c b/drivers/bluetooth/btrsi.c
index 5034325e417c..60d1419590ba 100644
--- a/drivers/bluetooth/btrsi.c
+++ b/drivers/bluetooth/btrsi.c
@@ -13,7 +13,6 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <net/bluetooth/bluetooth.h>

From 96e58d368fa6c9419fa1fd724a50b7ed900a78d2 Mon Sep 17 00:00:00 2001
From: Vic Wei <vwei@codeaurora.org>
Date: Wed, 28 Mar 2018 08:28:47 -0700
Subject: [PATCH 1665/1678] Bluetooth: Set HCI_QUIRK_SIMULTANEOUS_DISCOVERY for
 BTUSB_QCA_ROME

QCA Rome controllers can do both LE scan and BR/EDR inquiry at once.

Signed-off-by: Vic Wei <vwei@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index c701443de3e7..c8c8b0b8d333 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -3063,6 +3063,7 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_QCA_ROME) {
 		data->setup_on_usb = btusb_setup_qca;
 		hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
+		set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
 	}
 
 #ifdef CONFIG_BT_HCIBTUSB_RTL

From 9ea471320e1302be0fac67c14a7ab7982609fea7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 30 Mar 2018 16:05:06 -0500
Subject: [PATCH 1666/1678] Bluetooth: Mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c        | 1 +
 net/bluetooth/rfcomm/sock.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6e9fc86d8daf..8a80d48d89c4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4801,6 +4801,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 		case MGMT_LTK_P256_DEBUG:
 			authenticated = 0x00;
 			type = SMP_LTK_P256_DEBUG;
+			/* fall through */
 		default:
 			continue;
 		}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 93a3b219db09..d606e9212291 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -221,6 +221,7 @@ static void __rfcomm_sock_close(struct sock *sk)
 	case BT_CONFIG:
 	case BT_CONNECTED:
 		rfcomm_dlc_close(d, 0);
+		/* fall through */
 
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);

From ec1d8ccb07deaf30fd0508af6755364ac47dc08d Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 30 Mar 2018 09:44:00 +0800
Subject: [PATCH 1667/1678] vlan: also check phy_driver ts_info for vlan's real
 device

Just like function ethtool_get_ts_info(), we should also consider the
phy_driver ts_info call back. For example, driver dp83640.

Fixes: 37dd9255b2f6 ("vlan: Pass ethtool get_ts_info queries to real device.")
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f7e83f6d2e64..236452ebbd9e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -29,6 +29,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/phy.h>
 #include <net/arp.h>
 #include <net/switchdev.h>
 
@@ -665,8 +666,11 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
 {
 	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
+	struct phy_device *phydev = vlan->real_dev->phydev;
 
-	if (ops->get_ts_info) {
+	if (phydev && phydev->drv && phydev->drv->ts_info) {
+		 return phydev->drv->ts_info(phydev, info);
+	} else if (ops->get_ts_info) {
 		return ops->get_ts_info(vlan->real_dev, info);
 	} else {
 		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |

From da01ec4ee5e69933641e6017d6461d0ce9dbfac6 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 30 Mar 2018 10:11:21 +0800
Subject: [PATCH 1668/1678] net: sched: do not emit messages while holding
 spinlock

move messages emitting out of sch_tree_lock to avoid holding
this lock too long.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 1ea9846cc6ce..2a4ab7caf553 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1337,6 +1337,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 	u64 rate64, ceil64;
+	int warn = 0;
 
 	/* extract all subattrs from opt attr */
 	if (!opt)
@@ -1499,13 +1500,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->quantum = min_t(u64, quantum, INT_MAX);
 
 		if (!hopt->quantum && cl->quantum < 1000) {
-			pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
-				cl->common.classid);
+			warn = -1;
 			cl->quantum = 1000;
 		}
 		if (!hopt->quantum && cl->quantum > 200000) {
-			pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
-				cl->common.classid);
+			warn = 1;
 			cl->quantum = 200000;
 		}
 		if (hopt->quantum)
@@ -1519,6 +1518,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
 	sch_tree_unlock(sch);
 
+	if (warn)
+		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
+			    cl->common.classid, (warn == -1 ? "small" : "big"));
+
 	qdisc_class_hash_grow(sch, &q->clhash);
 
 	*arg = (unsigned long)cl;

From 6095e590d277ecec54805c4999335406f08855fe Mon Sep 17 00:00:00 2001
From: John Allen <jallen@linux.vnet.ibm.com>
Date: Fri, 30 Mar 2018 13:44:21 -0500
Subject: [PATCH 1669/1678] ibmvnic: Disable irqs before exiting reset from
 closed state

When the driver is closed, all the associated irqs are disabled. In the
event that the driver exits a reset in the closed state, we should be
consistent with the state we are in directly after a close. So before we
exit the reset routine, all irqs should be disabled as well. This will
prevent the irqs from being enabled twice in this case and reporting a
number of noisy warning traces.

Signed-off-by: John Allen <jallen@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0389a7a52152..b492af6affc3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1167,19 +1167,11 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 	}
 }
 
-static void ibmvnic_cleanup(struct net_device *netdev)
+static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+	struct net_device *netdev = adapter->netdev;
 	int i;
 
-	/* ensure that transmissions are stopped if called by do_reset */
-	if (adapter->resetting)
-		netif_tx_disable(netdev);
-	else
-		netif_tx_stop_all_queues(netdev);
-
-	ibmvnic_napi_disable(adapter);
-
 	if (adapter->tx_scrq) {
 		for (i = 0; i < adapter->req_tx_queues; i++)
 			if (adapter->tx_scrq[i]->irq) {
@@ -1198,6 +1190,21 @@ static void ibmvnic_cleanup(struct net_device *netdev)
 			}
 		}
 	}
+}
+
+static void ibmvnic_cleanup(struct net_device *netdev)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	/* ensure that transmissions are stopped if called by do_reset */
+	if (adapter->resetting)
+		netif_tx_disable(netdev);
+	else
+		netif_tx_stop_all_queues(netdev);
+
+	ibmvnic_napi_disable(adapter);
+	ibmvnic_disable_irqs(adapter);
+
 	clean_rx_pools(adapter);
 	clean_tx_pools(adapter);
 }
@@ -1772,6 +1779,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		}
 	}
 
+	ibmvnic_disable_irqs(adapter);
 	adapter->state = VNIC_CLOSED;
 
 	if (reset_state == VNIC_CLOSED)

From 48bfc39791b8b4a25f165e711f18b9c1617cefbc Mon Sep 17 00:00:00 2001
From: Tal Gilboa <talgi@mellanox.com>
Date: Fri, 30 Mar 2018 15:50:08 -0700
Subject: [PATCH 1670/1678] net/mlx5e: Set EQE based as default TX interrupt
 moderation mode

The default TX moderation mode was mistakenly set to CQE based. The
intention was to add a control ability in order to improve some specific
use-cases. In general, we prefer to use EQE based moderation as it gives
much better numbers for the common cases.

CQE based causes a degradation in the common case since it resets the
moderation timer on CQE generation. This causes an issue when TSO is
well utilized (large TSO sessions). The timer is set to 16us so traffic
of ~64KB TSO sessions per second would mean timer reset (CQE per TSO
session -> long time between CQEs). In this case we quickly reach the
tcp_limit_output_bytes (256KB by default) and cause a halt in TX traffic.

By setting EQE based moderation we make sure timer would expire after
16us regardless of the packet rate.
This fixes an up to 40% packet rate and up to 23% bandwidth degradtions.

Fixes: 0088cbbc4b66 ("net/mlx5e: Enable CQE based moderation on TX CQ")
Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c71f4f10283b..0aab3afc6885 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4137,7 +4137,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 			    struct mlx5e_params *params,
 			    u16 max_channels, u16 mtu)
 {
-	u8 cq_period_mode = 0;
+	u8 rx_cq_period_mode;
 
 	params->sw_mtu = mtu;
 	params->hard_mtu = MLX5E_ETH_HARD_MTU;
@@ -4173,12 +4173,12 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
-	cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+	rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
 			MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 			MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
-	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
-	mlx5e_set_tx_cq_mode_params(params, cq_period_mode);
+	mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
+	mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 
 	/* TX inline */
 	params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);

From afb133637071be6deeb8b3d0e55593ffbf63c527 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Sat, 31 Mar 2018 23:42:03 +0800
Subject: [PATCH 1671/1678] sky2: Increase D3 delay to sky2 stops working after
 suspend

The sky2 ethernet stops working after system resume from suspend:
[ 582.852065] sky2 0000:04:00.0: Refused to change power state, currently in D3

The current 150ms delay is not enough, change it to 200ms can solve the
issue.

BugLink: https://bugs.launchpad.net/bugs/1758507
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/sky2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 9b77db7c13d0..697d9b374f5e 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -5087,7 +5087,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
-	pdev->d3_delay = 150;
+	pdev->d3_delay = 200;
 
 	return 0;
 

From 781492abce1bd3ef3fb01271772d6b77e260d193 Mon Sep 17 00:00:00 2001
From: Frans Meulenbroeks <fransmeulenbroeks@gmail.com>
Date: Sun, 1 Apr 2018 22:52:55 +0200
Subject: [PATCH 1672/1678] fix typo in command value in
 drivers/net/phy/mdio-bitbang.

mdio-bitbang mentioned 10 for both read and write.
However mdio read opcode is 10 and write opcode is 01
Fixed comment.

Signed-off-by: Frans Meulenbroeks <fransmeulenbroeks@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bitbang.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
index 61a543c788cc..403b085f0a89 100644
--- a/drivers/net/phy/mdio-bitbang.c
+++ b/drivers/net/phy/mdio-bitbang.c
@@ -113,7 +113,7 @@ static void mdiobb_cmd(struct mdiobb_ctrl *ctrl, int op, u8 phy, u8 reg)
 	for (i = 0; i < 32; i++)
 		mdiobb_send_bit(ctrl, 1);
 
-	/* send the start bit (01) and the read opcode (10) or write (10).
+	/* send the start bit (01) and the read opcode (10) or write (01).
 	   Clause 45 operation uses 00 for the start and 11, 10 for
 	   read/write */
 	mdiobb_send_bit(ctrl, 0);

From 6174a30df1b902e1fedbd728f5343937e83e64e6 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 1 Apr 2018 22:40:35 +0800
Subject: [PATCH 1673/1678] route: check sysctl_fib_multipath_use_neigh earlier
 than hash

Prior to this patch, when one packet is hashed into path [1]
(hash <= nh_upper_bound) and it's neigh is dead, it will try
path [2]. However, if path [2]'s neigh is alive but it's
hash > nh_upper_bound, it will not return this alive path.
This packet will never be sent even if path [2] is alive.

 3.3.3.1/24:
  nexthop via 1.1.1.254 dev eth1 weight 1 <--[1] (dead neigh)
  nexthop via 2.2.2.254 dev eth2 weight 1 <--[2]

With sysctl_fib_multipath_use_neigh set is supposed to find an
available path respecting to the l3/l4 hash. But if there is
no available route with this hash, it should at least return
an alive route even with other hash.

This patch is to fix it by processing fib_multipath_use_neigh
earlier than the hash check, so that it will at least return
an alive route if there is when fib_multipath_use_neigh is
enabled. It's also compatible with before when there are alive
routes with the l3/l4 hash.

Fixes: a6db4494d218 ("net: ipv4: Consider failed nexthops in multipath routes")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e7c602c600ac..c27122f01b87 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1746,18 +1746,20 @@ void fib_select_multipath(struct fib_result *res, int hash)
 	bool first = false;
 
 	for_nexthops(fi) {
+		if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+			if (!fib_good_nh(nh))
+				continue;
+			if (!first) {
+				res->nh_sel = nhsel;
+				first = true;
+			}
+		}
+
 		if (hash > atomic_read(&nh->nh_upper_bound))
 			continue;
 
-		if (!net->ipv4.sysctl_fib_multipath_use_neigh ||
-		    fib_good_nh(nh)) {
-			res->nh_sel = nhsel;
-			return;
-		}
-		if (!first) {
-			res->nh_sel = nhsel;
-			first = true;
-		}
+		res->nh_sel = nhsel;
+		return;
 	} endfor_nexthops(fi);
 }
 #endif

From 16a1c0646e55c3345bce8e4edfc06ad119d27c04 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 1 Apr 2018 10:26:29 -0700
Subject: [PATCH 1674/1678] net: bgmac: Correctly annotate register space

All the members: base, idm_base and nicpm_base should be annotated with
__iomem since they are pointers to register space. This fixes a bunch of
sparse reported warnings.

Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support")
Fixes: dd5c5d037f5e ("net: ethernet: bgmac: add NS2 support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 4040d846da8e..40d02fec2747 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -479,9 +479,9 @@ struct bgmac_rx_header {
 struct bgmac {
 	union {
 		struct {
-			void *base;
-			void *idm_base;
-			void *nicpm_base;
+			void __iomem *base;
+			void __iomem *idm_base;
+			void __iomem *nicpm_base;
 		} plat;
 		struct {
 			struct bcma_device *core;

From 60d6e6f0b9e422dd01aeda39257ee0428e5e2a3f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 1 Apr 2018 10:26:30 -0700
Subject: [PATCH 1675/1678] net: bgmac: Fix endian access in
 bgmac_dma_tx_ring_free()

bgmac_dma_tx_ring_free() assigns the ctl1 word which is a litle endian
32-bit word without using proper accessors, fix this, and because a
length cannot be negative, use unsigned int while at it.

Fixes: 9cde94506eac ("bgmac: implement scatter/gather support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 2326cc219c46..e6ea8e61f96d 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -533,7 +533,8 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
 	int i;
 
 	for (i = 0; i < BGMAC_TX_RING_SLOTS; i++) {
-		int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
+		u32 ctl1 = le32_to_cpu(dma_desc[i].ctl1);
+		unsigned int len = ctl1 & BGMAC_DESC_CTL1_LEN;
 
 		slot = &ring->slots[i];
 		dev_kfree_skb(slot->skb);

From 6e00f7dd5e4edc2443f030b226f66fe4f1267667 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 1 Apr 2018 21:57:59 -0700
Subject: [PATCH 1676/1678] ipv6: frags: fix
 /proc/sys/net/ipv6/ip6frag_low_thresh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I forgot to change ip6frag_low_thresh proc_handler
from proc_dointvec_minmax to proc_doulongvec_minmax

Fixes: 3e67f106f619 ("inet: frags: break the 2GB limit for frags storage")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7b52efc63f6a..70e4a578b2fb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -564,7 +564,7 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 		.data		= &init_net.ipv6.frags.low_thresh,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &init_net.ipv6.frags.high_thresh
 	},

From 4a188a63afdffb5f62cccc508589c19e5297ed05 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Mon, 2 Apr 2018 11:23:43 +0800
Subject: [PATCH 1677/1678] net: mvneta: split rxq/txq init and txq deinit into
 SW and HW parts

This is to prepare the suspend/resume improvement in next patch. The
SW parts can be optimized out during resume.

As for rxq handling during suspend, we'd like to drop packets by
calling mvneta_rxq_drop_pkts() which is both SW and HW operation,
so we don't split rxq deinit.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 85 +++++++++++++++++++++------
 1 file changed, 66 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index b26bcdf4cd03..d4fbad235a79 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2797,10 +2797,8 @@ static void mvneta_rx_reset(struct mvneta_port *pp)
 
 /* Rx/Tx queue initialization/cleanup methods */
 
-/* Create a specified RX queue */
-static int mvneta_rxq_init(struct mvneta_port *pp,
-			   struct mvneta_rx_queue *rxq)
-
+static int mvneta_rxq_sw_init(struct mvneta_port *pp,
+			      struct mvneta_rx_queue *rxq)
 {
 	rxq->size = pp->rx_ring_size;
 
@@ -2813,6 +2811,12 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 
 	rxq->last_desc = rxq->size - 1;
 
+	return 0;
+}
+
+static void mvneta_rxq_hw_init(struct mvneta_port *pp,
+			       struct mvneta_rx_queue *rxq)
+{
 	/* Set Rx descriptors queue starting address */
 	mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
 	mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
@@ -2836,6 +2840,20 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 		mvneta_rxq_short_pool_set(pp, rxq);
 		mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
 	}
+}
+
+/* Create a specified RX queue */
+static int mvneta_rxq_init(struct mvneta_port *pp,
+			   struct mvneta_rx_queue *rxq)
+
+{
+	int ret;
+
+	ret = mvneta_rxq_sw_init(pp, rxq);
+	if (ret < 0)
+		return ret;
+
+	mvneta_rxq_hw_init(pp, rxq);
 
 	return 0;
 }
@@ -2858,9 +2876,8 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
 	rxq->descs_phys        = 0;
 }
 
-/* Create and initialize a tx queue */
-static int mvneta_txq_init(struct mvneta_port *pp,
-			   struct mvneta_tx_queue *txq)
+static int mvneta_txq_sw_init(struct mvneta_port *pp,
+			      struct mvneta_tx_queue *txq)
 {
 	int cpu;
 
@@ -2873,7 +2890,6 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 	txq->tx_stop_threshold = txq->size - MVNETA_MAX_SKB_DESCS;
 	txq->tx_wake_threshold = txq->tx_stop_threshold / 2;
 
-
 	/* Allocate memory for TX descriptors */
 	txq->descs = dma_alloc_coherent(pp->dev->dev.parent,
 					txq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2883,14 +2899,6 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 
 	txq->last_desc = txq->size - 1;
 
-	/* Set maximum bandwidth for enabled TXQs */
-	mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0x03ffffff);
-	mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0x3fffffff);
-
-	/* Set Tx descriptors queue starting address */
-	mvreg_write(pp, MVNETA_TXQ_BASE_ADDR_REG(txq->id), txq->descs_phys);
-	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), txq->size);
-
 	txq->tx_skb = kmalloc_array(txq->size, sizeof(*txq->tx_skb),
 				    GFP_KERNEL);
 	if (!txq->tx_skb) {
@@ -2911,7 +2919,6 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 				  txq->descs, txq->descs_phys);
 		return -ENOMEM;
 	}
-	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
 
 	/* Setup XPS mapping */
 	if (txq_number > 1)
@@ -2924,9 +2931,38 @@ static int mvneta_txq_init(struct mvneta_port *pp,
 	return 0;
 }
 
+static void mvneta_txq_hw_init(struct mvneta_port *pp,
+			       struct mvneta_tx_queue *txq)
+{
+	/* Set maximum bandwidth for enabled TXQs */
+	mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0x03ffffff);
+	mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0x3fffffff);
+
+	/* Set Tx descriptors queue starting address */
+	mvreg_write(pp, MVNETA_TXQ_BASE_ADDR_REG(txq->id), txq->descs_phys);
+	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), txq->size);
+
+	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
+}
+
+/* Create and initialize a tx queue */
+static int mvneta_txq_init(struct mvneta_port *pp,
+			   struct mvneta_tx_queue *txq)
+{
+	int ret;
+
+	ret = mvneta_txq_sw_init(pp, txq);
+	if (ret < 0)
+		return ret;
+
+	mvneta_txq_hw_init(pp, txq);
+
+	return 0;
+}
+
 /* Free allocated resources when mvneta_txq_init() fails to allocate memory*/
-static void mvneta_txq_deinit(struct mvneta_port *pp,
-			      struct mvneta_tx_queue *txq)
+static void mvneta_txq_sw_deinit(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq)
 {
 	struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id);
 
@@ -2947,7 +2983,11 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
 	txq->last_desc         = 0;
 	txq->next_desc_to_proc = 0;
 	txq->descs_phys        = 0;
+}
 
+static void mvneta_txq_hw_deinit(struct mvneta_port *pp,
+				 struct mvneta_tx_queue *txq)
+{
 	/* Set minimum bandwidth for disabled TXQs */
 	mvreg_write(pp, MVETH_TXQ_TOKEN_CFG_REG(txq->id), 0);
 	mvreg_write(pp, MVETH_TXQ_TOKEN_COUNT_REG(txq->id), 0);
@@ -2957,6 +2997,13 @@ static void mvneta_txq_deinit(struct mvneta_port *pp,
 	mvreg_write(pp, MVNETA_TXQ_SIZE_REG(txq->id), 0);
 }
 
+static void mvneta_txq_deinit(struct mvneta_port *pp,
+			      struct mvneta_tx_queue *txq)
+{
+	mvneta_txq_sw_deinit(pp, txq);
+	mvneta_txq_hw_deinit(pp, txq);
+}
+
 /* Cleanup all Tx queues */
 static void mvneta_cleanup_txqs(struct mvneta_port *pp)
 {

From 1799cdd287ff09c6d34347409b0d4f464e197aea Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Mon, 2 Apr 2018 11:24:59 +0800
Subject: [PATCH 1678/1678] net: mvneta: improve suspend/resume

Current suspend/resume implementation reuses the mvneta_open() and
mvneta_close(), but it could be optimized to take only necessary
actions during suspend/resume.

One obvious problem of current implementation is: after hundreds of
system suspend/resume cycles, the resume of mvneta could fail due to
fragmented dma coherent memory. After this patch, the non-necessary
memory alloc/free is optimized out.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 69 ++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d4fbad235a79..17a904cc6a5e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4571,16 +4571,45 @@ static int mvneta_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM_SLEEP
 static int mvneta_suspend(struct device *device)
 {
+	int queue;
 	struct net_device *dev = dev_get_drvdata(device);
 	struct mvneta_port *pp = netdev_priv(dev);
 
+	if (!netif_running(dev))
+		goto clean_exit;
+
+	if (!pp->neta_armada3700) {
+		spin_lock(&pp->lock);
+		pp->is_stopped = true;
+		spin_unlock(&pp->lock);
+
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
+	}
+
 	rtnl_lock();
-	if (netif_running(dev))
-		mvneta_stop(dev);
+	mvneta_stop_dev(pp);
 	rtnl_unlock();
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
+
+		mvneta_rxq_drop_pkts(pp, rxq);
+	}
+
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvneta_tx_queue *txq = &pp->txqs[queue];
+
+		mvneta_txq_hw_deinit(pp, txq);
+	}
+
+clean_exit:
 	netif_device_detach(dev);
 	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
+
 	return 0;
 }
 
@@ -4589,7 +4618,7 @@ static int mvneta_resume(struct device *device)
 	struct platform_device *pdev = to_platform_device(device);
 	struct net_device *dev = dev_get_drvdata(device);
 	struct mvneta_port *pp = netdev_priv(dev);
-	int err;
+	int err, queue;
 
 	clk_prepare_enable(pp->clk);
 	if (!IS_ERR(pp->clk_bus))
@@ -4611,12 +4640,38 @@ static int mvneta_resume(struct device *device)
 	}
 
 	netif_device_attach(dev);
-	rtnl_lock();
-	if (netif_running(dev)) {
-		mvneta_open(dev);
-		mvneta_set_rx_mode(dev);
+
+	if (!netif_running(dev))
+		return 0;
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
+
+		rxq->next_desc_to_proc = 0;
+		mvneta_rxq_hw_init(pp, rxq);
 	}
+
+	for (queue = 0; queue < txq_number; queue++) {
+		struct mvneta_tx_queue *txq = &pp->txqs[queue];
+
+		txq->next_desc_to_proc = 0;
+		mvneta_txq_hw_init(pp, txq);
+	}
+
+	if (!pp->neta_armada3700) {
+		spin_lock(&pp->lock);
+		pp->is_stopped = false;
+		spin_unlock(&pp->lock);
+		cpuhp_state_add_instance_nocalls(online_hpstate,
+						 &pp->node_online);
+		cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						 &pp->node_dead);
+	}
+
+	rtnl_lock();
+	mvneta_start_dev(pp);
 	rtnl_unlock();
+	mvneta_set_rx_mode(dev);
 
 	return 0;
 }